764 lines
26 KiB
Bash
Executable File
764 lines
26 KiB
Bash
Executable File
#!/bin/bash
|
|
# Load test: detect JSON key ordering mutations in Bifrost's request proxying.
|
|
# Sends randomized payloads with different schema shapes and compares the input
|
|
# key order against what Bifrost actually sent to the provider (via
|
|
# extra_fields.raw_request with json.RawMessage preservation).
|
|
#
|
|
# Validates:
|
|
# - Tool parameter key ordering at every nesting level (properties, $defs, nested schemas)
|
|
# - tool_choice serialization (key ordering, no extra zero-value fields like "custom"/"allowed_tools")
|
|
# - Multiple tool schemas, deeply nested objects, adversarial property orderings
|
|
#
|
|
# Each request randomly picks from 8 different payload shapes to maximize coverage.
|
|
#
|
|
# This catches both:
|
|
# - Consistent mutations (struct field order overriding client order) — 100% rate
|
|
# - Sporadic mutations (sync.Pool reuse, concurrency bugs) — variable rate
|
|
#
|
|
# Prerequisites:
|
|
# - Bifrost running with send_back_raw_request: true on the openai provider
|
|
# - OpenAI provider pointed at a mock server (any 200 response works)
|
|
#
|
|
# Usage: ./tests/load_test_parameter_ordering.sh [rps] [duration]
|
|
# rps - requests per second (default: 20)
|
|
# duration - how many seconds to run (default: 10)
|
|
|
|
BIFROST_URL="http://localhost:8080/litellm/v1/chat/completions"
|
|
RPS="${1:-20}"
|
|
DURATION="${2:-10}"
|
|
NUM_REQUESTS=$((RPS * DURATION))
|
|
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
TMPDIR=$(mktemp -d)
|
|
trap 'rm -rf "$TMPDIR"' EXIT
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Payload 1: Standard — non-alpha properties, $defs after required, function tool_choice
|
|
# ---------------------------------------------------------------------------
|
|
cat > "$TMPDIR/payload_1.json" << 'EOF'
|
|
{
|
|
"model": "openai/gpt-4.1",
|
|
"messages": [{"role": "user", "content": "test"}],
|
|
"temperature": 0,
|
|
"tools": [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "structured_response",
|
|
"description": "Generate a structured response",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"reasoning": {
|
|
"type": "string",
|
|
"description": "Step by step reasoning",
|
|
"title": "Reasoning"
|
|
},
|
|
"summary": {
|
|
"type": "string",
|
|
"description": "The final summary",
|
|
"title": "Summary"
|
|
},
|
|
"tags": {
|
|
"description": "Relevant tags",
|
|
"items": {"$ref": "#/$defs/Tag"},
|
|
"title": "Tags",
|
|
"type": "array"
|
|
},
|
|
"confidence": {
|
|
"description": "Confidence score",
|
|
"title": "Confidence",
|
|
"type": "number"
|
|
}
|
|
},
|
|
"required": ["reasoning", "summary", "tags", "confidence"],
|
|
"$defs": {
|
|
"Tag": {
|
|
"type": "object",
|
|
"description": "A tag",
|
|
"required": ["label"],
|
|
"properties": {
|
|
"label": {"description": "The tag label", "title": "Label", "type": "string"},
|
|
"score": {"description": "Relevance score", "title": "Score", "type": "number"}
|
|
},
|
|
"title": "Tag"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"tool_choice": {
|
|
"type": "function",
|
|
"function": {"name": "structured_response"}
|
|
}
|
|
}
|
|
EOF
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Payload 2: Reverse-alpha properties, $defs at TOP, string tool_choice "auto"
|
|
# Property names z_ y_ x_ w_ would get reordered to w_ x_ y_ z_ if sorted
|
|
# ---------------------------------------------------------------------------
|
|
cat > "$TMPDIR/payload_2.json" << 'EOF'
|
|
{
|
|
"model": "openai/gpt-4.1",
|
|
"messages": [{"role": "user", "content": "test"}],
|
|
"tools": [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "reverse_alpha_tool",
|
|
"parameters": {
|
|
"$defs": {
|
|
"ZItem": {
|
|
"type": "object",
|
|
"properties": {
|
|
"z_name": {"type": "string"},
|
|
"a_value": {"type": "number"}
|
|
},
|
|
"required": ["z_name"]
|
|
}
|
|
},
|
|
"type": "object",
|
|
"properties": {
|
|
"z_output": {"type": "string", "description": "Last alphabetically, first in schema"},
|
|
"y_reasoning": {"type": "string", "description": "Second to last"},
|
|
"x_items": {
|
|
"type": "array",
|
|
"items": {"$ref": "#/$defs/ZItem"},
|
|
"description": "Third to last"
|
|
},
|
|
"w_confidence": {"type": "number", "description": "Fourth to last"}
|
|
},
|
|
"required": ["z_output", "y_reasoning"]
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"tool_choice": "auto"
|
|
}
|
|
EOF
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Payload 3: Multiple tools, deeply nested objects, string tool_choice "required"
|
|
# ---------------------------------------------------------------------------
|
|
cat > "$TMPDIR/payload_3.json" << 'EOF'
|
|
{
|
|
"model": "openai/gpt-4.1",
|
|
"messages": [{"role": "user", "content": "test"}],
|
|
"tools": [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "deep_nested_tool",
|
|
"description": "Tool with 3-level nesting",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"output": {
|
|
"type": "object",
|
|
"description": "Nested output",
|
|
"properties": {
|
|
"verdict": {"type": "string"},
|
|
"metadata": {
|
|
"type": "object",
|
|
"properties": {
|
|
"timestamp": {"type": "string"},
|
|
"source": {"type": "string"},
|
|
"confidence": {"type": "number"},
|
|
"author": {"type": "string"}
|
|
}
|
|
},
|
|
"score": {"type": "number"}
|
|
}
|
|
},
|
|
"chain_of_thought": {"type": "string"},
|
|
"answer": {"type": "string"}
|
|
},
|
|
"required": ["output", "answer"]
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "secondary_tool",
|
|
"description": "A second tool to verify multi-tool ordering",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {"type": "string", "description": "Search query"},
|
|
"max_results": {"type": "integer", "description": "Limit"},
|
|
"filters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"date_range": {"type": "string"},
|
|
"category": {"type": "string"},
|
|
"active_only": {"type": "boolean"}
|
|
}
|
|
}
|
|
},
|
|
"required": ["query"]
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"tool_choice": "required"
|
|
}
|
|
EOF
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Payload 4: Many properties in adversarial order (zigzag), no $defs, tool_choice "none"
|
|
# Names deliberately interleave early/late alphabet letters
|
|
# ---------------------------------------------------------------------------
|
|
cat > "$TMPDIR/payload_4.json" << 'EOF'
|
|
{
|
|
"model": "openai/gpt-4.1",
|
|
"messages": [{"role": "user", "content": "test"}],
|
|
"temperature": 0.7,
|
|
"max_tokens": 500,
|
|
"tools": [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "zigzag_tool",
|
|
"description": "Properties in zigzag alphabetical order",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"zebra": {"type": "string"},
|
|
"apple": {"type": "string"},
|
|
"yarn": {"type": "number"},
|
|
"banana": {"type": "boolean"},
|
|
"xenon": {"type": "string"},
|
|
"cherry": {"type": "integer"},
|
|
"walnut": {"type": "string"},
|
|
"date": {"type": "array", "items": {"type": "string"}},
|
|
"violet": {"type": "number"},
|
|
"elderberry": {"type": "string"}
|
|
},
|
|
"required": ["zebra", "apple", "yarn"]
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"tool_choice": "none"
|
|
}
|
|
EOF
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Payload 5: $defs with multiple definitions, additionalProperties, nested $ref
|
|
# ---------------------------------------------------------------------------
|
|
cat > "$TMPDIR/payload_5.json" << 'EOF'
|
|
{
|
|
"model": "openai/gpt-4.1",
|
|
"messages": [{"role": "user", "content": "test"}],
|
|
"tools": [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "AnswerResponseModel",
|
|
"description": "Realistic pydantic-generated schema",
|
|
"parameters": {
|
|
"$defs": {
|
|
"Citation": {
|
|
"type": "object",
|
|
"properties": {
|
|
"url": {"type": "string", "description": "Source URL"},
|
|
"text": {"type": "string", "description": "Cited text"},
|
|
"page_number": {"type": "integer", "description": "Page"}
|
|
},
|
|
"required": ["url", "text"]
|
|
},
|
|
"Metadata": {
|
|
"type": "object",
|
|
"properties": {
|
|
"model_version": {"type": "string"},
|
|
"latency_ms": {"type": "number"},
|
|
"token_count": {"type": "integer"}
|
|
},
|
|
"required": ["model_version"]
|
|
}
|
|
},
|
|
"type": "object",
|
|
"properties": {
|
|
"answer": {"type": "string", "description": "The answer"},
|
|
"chain_of_thought": {"type": "string", "description": "Reasoning steps"},
|
|
"citations": {
|
|
"type": "array",
|
|
"items": {"$ref": "#/$defs/Citation"},
|
|
"description": "Supporting citations"
|
|
},
|
|
"is_unanswered": {"type": "boolean", "description": "Whether answerable"},
|
|
"metadata": {"$ref": "#/$defs/Metadata"}
|
|
},
|
|
"required": ["answer", "is_unanswered"],
|
|
"additionalProperties": false
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"tool_choice": {
|
|
"type": "function",
|
|
"function": {"name": "AnswerResponseModel"}
|
|
}
|
|
}
|
|
EOF
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Payload 6: Minimal single-property tool, no tool_choice — tests baseline passthrough
|
|
# Also uses top-level keys in non-standard order (tools before messages)
|
|
# ---------------------------------------------------------------------------
|
|
cat > "$TMPDIR/payload_6.json" << 'EOF'
|
|
{
|
|
"model": "openai/gpt-4.1",
|
|
"tools": [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "simple_extractor",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"result": {"type": "string", "description": "Extracted result"}
|
|
},
|
|
"required": ["result"]
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"messages": [{"role": "user", "content": "test"}],
|
|
"temperature": 0
|
|
}
|
|
EOF
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Payload 7: EXACT reproduction of reported bug — Issue 1 + 2 + 3 combined
|
|
# tool_choice: {type, function} with AnswerResponseModel (Issue 1)
|
|
# properties: answer, chain_of_thought, citations, is_unanswered (Issue 2)
|
|
# $defs with Citation at TOP of parameters object (Issue 3)
|
|
# ---------------------------------------------------------------------------
|
|
cat > "$TMPDIR/payload_7.json" << 'EOF'
|
|
{
|
|
"model": "openai/gpt-4.1",
|
|
"messages": [{"role": "user", "content": "test"}],
|
|
"tools": [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "AnswerResponseModel",
|
|
"parameters": {
|
|
"$defs": {
|
|
"Citation": {
|
|
"type": "object",
|
|
"properties": {
|
|
"url": {"type": "string"},
|
|
"text": {"type": "string"}
|
|
},
|
|
"required": ["url", "text"]
|
|
}
|
|
},
|
|
"properties": {
|
|
"answer": {"type": "string", "description": "The answer"},
|
|
"chain_of_thought": {"type": "string", "description": "Reasoning"},
|
|
"citations": {
|
|
"type": "array",
|
|
"items": {"$ref": "#/$defs/Citation"}
|
|
},
|
|
"is_unanswered": {"type": "boolean"}
|
|
},
|
|
"required": ["answer", "is_unanswered"],
|
|
"type": "object"
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"tool_choice": {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "AnswerResponseModel"
|
|
}
|
|
}
|
|
}
|
|
EOF
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Payload 8: tool_choice string variants cycle — ensures "none"/"auto"/"required"
|
|
# pass through as strings and don't get expanded to structs
|
|
# Also: properties in exact reverse alphabetical to maximize reorder detection
|
|
# ---------------------------------------------------------------------------
|
|
cat > "$TMPDIR/payload_8.json" << 'EOF'
|
|
{
|
|
"model": "openai/gpt-4.1",
|
|
"messages": [{"role": "user", "content": "test"}],
|
|
"tools": [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "reverse_order_check",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"zulu": {"type": "string"},
|
|
"yankee": {"type": "string"},
|
|
"x_ray": {"type": "string"},
|
|
"whiskey": {"type": "number"},
|
|
"victor": {"type": "boolean"},
|
|
"uniform": {"type": "string"},
|
|
"tango": {"type": "integer"},
|
|
"sierra": {"type": "string"},
|
|
"romeo": {"type": "number"},
|
|
"quebec": {"type": "string"},
|
|
"papa": {"type": "boolean"},
|
|
"oscar": {"type": "string"},
|
|
"november": {"type": "string"},
|
|
"mike": {"type": "number"},
|
|
"lima": {"type": "string"},
|
|
"kilo": {"type": "boolean"},
|
|
"juliet": {"type": "string"},
|
|
"india": {"type": "integer"},
|
|
"hotel": {"type": "string"},
|
|
"golf": {"type": "number"},
|
|
"foxtrot": {"type": "string"},
|
|
"echo_field": {"type": "boolean"},
|
|
"delta": {"type": "string"},
|
|
"charlie": {"type": "number"},
|
|
"bravo": {"type": "string"},
|
|
"alpha": {"type": "string"}
|
|
},
|
|
"required": ["zulu", "alpha"]
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"tool_choice": "auto"
|
|
}
|
|
EOF
|
|
|
|
NUM_PAYLOADS=8
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Python analyzer — compares input vs output key ordering at every nesting level
|
|
# ---------------------------------------------------------------------------
|
|
cat > "$TMPDIR/analyze.py" << 'PYEOF'
|
|
import json, sys, os
|
|
from collections import OrderedDict
|
|
|
|
def extract_key_orders(obj, path=""):
|
|
"""Recursively extract key orders from all nested dicts.
|
|
Returns a dict of {path: [keys]} for every object in the tree."""
|
|
if not isinstance(obj, dict):
|
|
return {}
|
|
result = {path: list(obj.keys())}
|
|
for key, val in obj.items():
|
|
child_path = f"{path}.{key}" if path else key
|
|
if isinstance(val, dict):
|
|
result.update(extract_key_orders(val, child_path))
|
|
elif isinstance(val, list):
|
|
for i, item in enumerate(val):
|
|
if isinstance(item, dict):
|
|
result.update(extract_key_orders(item, f"{child_path}[{i}]"))
|
|
return result
|
|
|
|
def get_all_tool_parameters(payload):
|
|
"""Extract tool function parameters from ALL tools in a chat completion payload.
|
|
Returns list of (tool_name, parameters_dict) tuples."""
|
|
tools = payload.get("tools", [])
|
|
result = []
|
|
for tool in tools:
|
|
func = tool.get("function", {})
|
|
name = func.get("name", "unknown")
|
|
params = func.get("parameters")
|
|
if params is not None:
|
|
result.append((name, params))
|
|
return result
|
|
|
|
def check_tool_choice(input_payload, raw_request):
|
|
"""Check tool_choice serialization: key ordering and no extra fields.
|
|
Returns a list of (description, input, output) mutation tuples.
|
|
|
|
Catches Issue 1 from the bug report:
|
|
- Zero-value fields injected: "custom":{"name":""}, "allowed_tools":{"mode":"","tools":null}
|
|
- Key reordering: "type" moving from first to last position
|
|
- String tool_choice ("auto"/"none"/"required") being expanded to struct
|
|
"""
|
|
mutations = []
|
|
input_tc = input_payload.get("tool_choice")
|
|
output_tc = raw_request.get("tool_choice")
|
|
if input_tc is None and output_tc is None:
|
|
return mutations
|
|
if input_tc is None and output_tc is not None:
|
|
mutations.append(("tool_choice (injected)", None, output_tc))
|
|
return mutations
|
|
if input_tc is not None and output_tc is None:
|
|
mutations.append(("tool_choice (dropped)", input_tc, None))
|
|
return mutations
|
|
|
|
# Issue 1: string tool_choice must stay as string, not become struct
|
|
if isinstance(input_tc, str):
|
|
if isinstance(output_tc, dict):
|
|
mutations.append(("tool_choice (string->struct)", input_tc, list(output_tc.keys())))
|
|
elif output_tc != input_tc:
|
|
mutations.append(("tool_choice (string)", input_tc, output_tc))
|
|
return mutations
|
|
|
|
if isinstance(input_tc, dict) and isinstance(output_tc, dict):
|
|
input_keys = list(input_tc.keys())
|
|
output_keys = list(output_tc.keys())
|
|
|
|
# Issue 1.2: Check for zero-value fields from unused union variants
|
|
# These are the exact fields reported in the bug:
|
|
zero_value_fields = {"custom", "allowed_tools"}
|
|
injected = zero_value_fields & (set(output_keys) - set(input_keys))
|
|
if injected:
|
|
mutations.append(("tool_choice (zero-value fields injected)", sorted(injected), [
|
|
f'{k}={json.dumps(output_tc[k])}' for k in sorted(injected)
|
|
]))
|
|
|
|
# Any other extra fields
|
|
other_extra = set(output_keys) - set(input_keys) - zero_value_fields
|
|
if other_extra:
|
|
mutations.append(("tool_choice (unexpected extra fields)", [], list(other_extra)))
|
|
|
|
# Issue 1.2: Check key ordering — "type" should stay first, not move to end
|
|
if input_keys != output_keys:
|
|
mutations.append(("tool_choice (key order)", input_keys, output_keys))
|
|
|
|
# Recursively check nested key orders (e.g. function object)
|
|
input_tc_orders = extract_key_orders(input_tc, "tool_choice")
|
|
output_tc_orders = extract_key_orders(output_tc, "tool_choice")
|
|
for path, inp_keys in input_tc_orders.items():
|
|
out_keys = output_tc_orders.get(path)
|
|
if out_keys is not None and inp_keys != out_keys:
|
|
mutations.append((path, inp_keys, out_keys))
|
|
return mutations
|
|
|
|
def check_defs_position(input_params, output_params, tool_idx):
|
|
"""Check that $defs stays in its original position within the parameters object.
|
|
|
|
Catches Issue 3 from the bug report:
|
|
- $defs at top of parameters moves to bottom after round-trip
|
|
"""
|
|
mutations = []
|
|
input_keys = list(input_params.keys())
|
|
output_keys = list(output_params.keys())
|
|
|
|
if "$defs" in input_keys and "$defs" in output_keys:
|
|
input_pos = input_keys.index("$defs")
|
|
output_pos = output_keys.index("$defs")
|
|
if input_pos != output_pos:
|
|
mutations.append((
|
|
f"tools[{tool_idx}].parameters ($defs position)",
|
|
f"$defs at index {input_pos} in {input_keys}",
|
|
f"$defs at index {output_pos} in {output_keys}"
|
|
))
|
|
|
|
if "definitions" in input_keys and "definitions" in output_keys:
|
|
input_pos = input_keys.index("definitions")
|
|
output_pos = output_keys.index("definitions")
|
|
if input_pos != output_pos:
|
|
mutations.append((
|
|
f"tools[{tool_idx}].parameters (definitions position)",
|
|
f"definitions at index {input_pos} in {input_keys}",
|
|
f"definitions at index {output_pos} in {output_keys}"
|
|
))
|
|
|
|
return mutations
|
|
|
|
# Analyze response
|
|
resp_file = sys.argv[1]
|
|
idx = sys.argv[2]
|
|
payload_file = sys.argv[3]
|
|
|
|
try:
|
|
# Load input payload (the known-good key order)
|
|
with open(payload_file) as f:
|
|
input_payload = json.load(f, object_pairs_hook=OrderedDict)
|
|
|
|
input_tool_params = get_all_tool_parameters(input_payload)
|
|
if not input_tool_params:
|
|
print(f"PARSE_ERROR:{idx}:no tool parameters in input payload")
|
|
sys.exit(0)
|
|
|
|
with open(resp_file) as f:
|
|
resp = json.load(f, object_pairs_hook=OrderedDict)
|
|
|
|
raw_request = resp.get("extra_fields", OrderedDict()).get("raw_request")
|
|
if raw_request is None:
|
|
print(f"NO_RAW_REQUEST:{idx}")
|
|
sys.exit(0)
|
|
|
|
output_tool_params = get_all_tool_parameters(raw_request)
|
|
if not output_tool_params:
|
|
print(f"PARSE_ERROR:{idx}:no tool parameters in raw_request")
|
|
sys.exit(0)
|
|
|
|
mutations = []
|
|
|
|
# Compare each tool's parameter key ordering (Issue 2: properties reordering)
|
|
for i, (inp_name, inp_params) in enumerate(input_tool_params):
|
|
if i >= len(output_tool_params):
|
|
mutations.append((f"tool[{i}] missing", inp_name, "MISSING"))
|
|
continue
|
|
out_name, out_params = output_tool_params[i]
|
|
|
|
input_orders = extract_key_orders(inp_params, f"tools[{i}].parameters")
|
|
output_orders = extract_key_orders(out_params, f"tools[{i}].parameters")
|
|
|
|
for path, input_keys in input_orders.items():
|
|
output_keys = output_orders.get(path)
|
|
if output_keys is None:
|
|
continue
|
|
if input_keys != output_keys:
|
|
mutations.append((path, input_keys, output_keys))
|
|
|
|
# Issue 3: $defs position within parameters object
|
|
mutations.extend(check_defs_position(inp_params, out_params, i))
|
|
|
|
# Issue 1: tool_choice serialization (zero-value fields, key ordering)
|
|
mutations.extend(check_tool_choice(input_payload, raw_request))
|
|
|
|
if not mutations:
|
|
print(f"OK:{idx}")
|
|
else:
|
|
payload_num = os.path.basename(payload_file).replace("payload_", "").replace(".json", "")
|
|
print(f"MUTATED:{idx}")
|
|
for path, inp, out in mutations:
|
|
label = path if path else "parameters"
|
|
print(f" DETAIL:{idx}:P{payload_num}:{label}: input={inp} -> output={out}", file=sys.stderr)
|
|
|
|
except Exception as e:
|
|
print(f"PARSE_ERROR:{idx}:{e}")
|
|
PYEOF
|
|
|
|
echo -e "${CYAN}JSON Serialization Fidelity — Input vs Output Validator${NC}"
|
|
echo "=========================================================="
|
|
echo "Target: $BIFROST_URL"
|
|
echo "RPS: $RPS"
|
|
echo "Duration: ${DURATION}s"
|
|
echo "Total: $NUM_REQUESTS requests"
|
|
echo "Payloads: $NUM_PAYLOADS variants (randomly selected per request)"
|
|
echo ""
|
|
echo "Validates that Bifrost preserves the client's original JSON"
|
|
echo "key ordering (tool parameters + tool_choice) and doesn't"
|
|
echo "inject extra zero-value fields."
|
|
echo ""
|
|
echo "Payload variants:"
|
|
echo " P1: Standard — non-alpha properties, \$defs after required, function tool_choice"
|
|
echo " P2: Reverse-alpha properties, \$defs at TOP, tool_choice \"auto\""
|
|
echo " P3: Multiple tools, 3-level nested objects, tool_choice \"required\""
|
|
echo " P4: 10 zigzag-ordered properties, no \$defs, tool_choice \"none\""
|
|
echo " P5: Multiple \$defs, additionalProperties, pydantic-style schema"
|
|
echo " P6: Minimal single-property tool, no tool_choice, non-standard top-level order"
|
|
echo " P7: EXACT bug report reproduction — all 3 issues in one payload"
|
|
echo " P8: 26 reverse-alpha NATO properties — maximum reorder detection"
|
|
echo "=========================================================="
|
|
echo ""
|
|
|
|
# Send a single request with a random payload and analyze
|
|
send_and_check() {
|
|
local idx=$1
|
|
local payload_num=$(( (RANDOM % NUM_PAYLOADS) + 1 ))
|
|
local payload_file="$TMPDIR/payload_${payload_num}.json"
|
|
local outfile="$TMPDIR/resp_${idx}.json"
|
|
|
|
local httpcode
|
|
httpcode=$(curl -s -o "$outfile" -w "%{http_code}" \
|
|
-X POST "$BIFROST_URL" \
|
|
-H "Content-Type: application/json" \
|
|
-d @"$payload_file" \
|
|
--max-time 30 2>/dev/null)
|
|
|
|
if [ "$httpcode" != "200" ]; then
|
|
echo "HTTP_ERROR:${idx}:${httpcode}"
|
|
return
|
|
fi
|
|
|
|
python3 "$TMPDIR/analyze.py" "$outfile" "$idx" "$payload_file"
|
|
}
|
|
|
|
export -f send_and_check
|
|
export BIFROST_URL TMPDIR NUM_PAYLOADS
|
|
|
|
# Send RPS requests per second for DURATION seconds
|
|
idx=0
|
|
for sec in $(seq 1 "$DURATION"); do
|
|
for _ in $(seq 1 "$RPS"); do
|
|
((idx++))
|
|
send_and_check "$idx" >> "$TMPDIR/results.txt" 2>>"$TMPDIR/details.log" &
|
|
done
|
|
echo -e " Second $sec/$DURATION — launched $RPS requests"
|
|
sleep 1
|
|
done
|
|
|
|
# Wait for all background jobs to finish
|
|
wait
|
|
|
|
results=$(cat "$TMPDIR/results.txt" 2>/dev/null)
|
|
|
|
OK=0
|
|
MUTATED=0
|
|
HTTP_ERRORS=0
|
|
PARSE_ERRORS=0
|
|
NO_RAW=0
|
|
|
|
while IFS= read -r line; do
|
|
case "$line" in
|
|
OK:*) ((OK++)) ;;
|
|
MUTATED:*)
|
|
((MUTATED++))
|
|
idx=$(echo "$line" | cut -d: -f2)
|
|
echo -e "${RED} [MUTATED] Request #${idx}${NC}"
|
|
;;
|
|
HTTP_ERROR:*)
|
|
((HTTP_ERRORS++))
|
|
idx=$(echo "$line" | cut -d: -f2)
|
|
code=$(echo "$line" | cut -d: -f3)
|
|
echo -e "${YELLOW} [HTTP ${code}] Request #${idx}${NC}"
|
|
;;
|
|
NO_RAW_REQUEST:*)
|
|
((NO_RAW++))
|
|
echo -e "${YELLOW} [NO RAW REQUEST] Request #$(echo "$line" | cut -d: -f2) - is send_back_raw_request enabled?${NC}"
|
|
;;
|
|
PARSE_ERROR:*)
|
|
((PARSE_ERRORS++))
|
|
echo -e "${YELLOW} [PARSE ERROR] ${line}${NC}"
|
|
;;
|
|
esac
|
|
done <<< "$results"
|
|
|
|
TOTAL=$((OK + MUTATED + HTTP_ERRORS + PARSE_ERRORS + NO_RAW))
|
|
|
|
echo ""
|
|
echo "=========================================================="
|
|
echo -e "${CYAN}Results (${TOTAL}/${NUM_REQUESTS} completed):${NC}"
|
|
echo -e " ${GREEN}OK (order preserved): $OK${NC}"
|
|
echo -e " ${RED}MUTATED (reordered): $MUTATED${NC}"
|
|
echo -e " ${YELLOW}HTTP errors: $HTTP_ERRORS${NC}"
|
|
echo -e " ${YELLOW}No raw request: $NO_RAW${NC}"
|
|
echo -e " ${YELLOW}Parse errors: $PARSE_ERRORS${NC}"
|
|
echo "=========================================================="
|
|
|
|
if [ "$MUTATED" -gt 0 ]; then
|
|
RATE=$(python3 -c "print(f'{$MUTATED/$TOTAL*100:.1f}')" 2>/dev/null || echo "?")
|
|
echo ""
|
|
echo -e "${RED}MUTATION RATE: ${RATE}% ($MUTATED / $TOTAL)${NC}"
|
|
echo ""
|
|
echo -e "${CYAN}Key order mutations (input vs output):${NC}"
|
|
cat "$TMPDIR/details.log" 2>/dev/null | head -50
|
|
exit 1
|
|
elif [ "$NO_RAW" -gt 0 ]; then
|
|
echo ""
|
|
echo -e "${YELLOW}WARNING: No raw_request in responses. Enable send_back_raw_request in provider config.${NC}"
|
|
exit 2
|
|
else
|
|
echo ""
|
|
echo -e "${GREEN}All $OK requests preserved the original JSON key ordering across all $NUM_PAYLOADS payload variants.${NC}"
|
|
exit 0
|
|
fi
|