first commit
This commit is contained in:
763
tests/load_test_parameter_ordering.sh
Executable file
763
tests/load_test_parameter_ordering.sh
Executable file
@@ -0,0 +1,763 @@
|
||||
#!/bin/bash
|
||||
# Load test: detect JSON key ordering mutations in Bifrost's request proxying.
|
||||
# Sends randomized payloads with different schema shapes and compares the input
|
||||
# key order against what Bifrost actually sent to the provider (via
|
||||
# extra_fields.raw_request with json.RawMessage preservation).
|
||||
#
|
||||
# Validates:
|
||||
# - Tool parameter key ordering at every nesting level (properties, $defs, nested schemas)
|
||||
# - tool_choice serialization (key ordering, no extra zero-value fields like "custom"/"allowed_tools")
|
||||
# - Multiple tool schemas, deeply nested objects, adversarial property orderings
|
||||
#
|
||||
# Each request randomly picks from 8 different payload shapes to maximize coverage.
|
||||
#
|
||||
# This catches both:
|
||||
# - Consistent mutations (struct field order overriding client order) — 100% rate
|
||||
# - Sporadic mutations (sync.Pool reuse, concurrency bugs) — variable rate
|
||||
#
|
||||
# Prerequisites:
|
||||
# - Bifrost running with send_back_raw_request: true on the openai provider
|
||||
# - OpenAI provider pointed at a mock server (any 200 response works)
|
||||
#
|
||||
# Usage: ./tests/load_test_parameter_ordering.sh [rps] [duration]
|
||||
# rps - requests per second (default: 20)
|
||||
# duration - how many seconds to run (default: 10)
|
||||
|
||||
BIFROST_URL="http://localhost:8080/litellm/v1/chat/completions"
|
||||
RPS="${1:-20}"
|
||||
DURATION="${2:-10}"
|
||||
NUM_REQUESTS=$((RPS * DURATION))
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
TMPDIR=$(mktemp -d)
|
||||
trap 'rm -rf "$TMPDIR"' EXIT
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Payload 1: Standard — non-alpha properties, $defs after required, function tool_choice
|
||||
# ---------------------------------------------------------------------------
|
||||
cat > "$TMPDIR/payload_1.json" << 'EOF'
|
||||
{
|
||||
"model": "openai/gpt-4.1",
|
||||
"messages": [{"role": "user", "content": "test"}],
|
||||
"temperature": 0,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "structured_response",
|
||||
"description": "Generate a structured response",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning": {
|
||||
"type": "string",
|
||||
"description": "Step by step reasoning",
|
||||
"title": "Reasoning"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"description": "The final summary",
|
||||
"title": "Summary"
|
||||
},
|
||||
"tags": {
|
||||
"description": "Relevant tags",
|
||||
"items": {"$ref": "#/$defs/Tag"},
|
||||
"title": "Tags",
|
||||
"type": "array"
|
||||
},
|
||||
"confidence": {
|
||||
"description": "Confidence score",
|
||||
"title": "Confidence",
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": ["reasoning", "summary", "tags", "confidence"],
|
||||
"$defs": {
|
||||
"Tag": {
|
||||
"type": "object",
|
||||
"description": "A tag",
|
||||
"required": ["label"],
|
||||
"properties": {
|
||||
"label": {"description": "The tag label", "title": "Label", "type": "string"},
|
||||
"score": {"description": "Relevance score", "title": "Score", "type": "number"}
|
||||
},
|
||||
"title": "Tag"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": {
|
||||
"type": "function",
|
||||
"function": {"name": "structured_response"}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Payload 2: Reverse-alpha properties, $defs at TOP, string tool_choice "auto"
|
||||
# Property names z_ y_ x_ w_ would get reordered to w_ x_ y_ z_ if sorted
|
||||
# ---------------------------------------------------------------------------
|
||||
cat > "$TMPDIR/payload_2.json" << 'EOF'
|
||||
{
|
||||
"model": "openai/gpt-4.1",
|
||||
"messages": [{"role": "user", "content": "test"}],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "reverse_alpha_tool",
|
||||
"parameters": {
|
||||
"$defs": {
|
||||
"ZItem": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"z_name": {"type": "string"},
|
||||
"a_value": {"type": "number"}
|
||||
},
|
||||
"required": ["z_name"]
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"z_output": {"type": "string", "description": "Last alphabetically, first in schema"},
|
||||
"y_reasoning": {"type": "string", "description": "Second to last"},
|
||||
"x_items": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/$defs/ZItem"},
|
||||
"description": "Third to last"
|
||||
},
|
||||
"w_confidence": {"type": "number", "description": "Fourth to last"}
|
||||
},
|
||||
"required": ["z_output", "y_reasoning"]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": "auto"
|
||||
}
|
||||
EOF
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Payload 3: Multiple tools, deeply nested objects, string tool_choice "required"
|
||||
# ---------------------------------------------------------------------------
|
||||
cat > "$TMPDIR/payload_3.json" << 'EOF'
|
||||
{
|
||||
"model": "openai/gpt-4.1",
|
||||
"messages": [{"role": "user", "content": "test"}],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "deep_nested_tool",
|
||||
"description": "Tool with 3-level nesting",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"output": {
|
||||
"type": "object",
|
||||
"description": "Nested output",
|
||||
"properties": {
|
||||
"verdict": {"type": "string"},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"timestamp": {"type": "string"},
|
||||
"source": {"type": "string"},
|
||||
"confidence": {"type": "number"},
|
||||
"author": {"type": "string"}
|
||||
}
|
||||
},
|
||||
"score": {"type": "number"}
|
||||
}
|
||||
},
|
||||
"chain_of_thought": {"type": "string"},
|
||||
"answer": {"type": "string"}
|
||||
},
|
||||
"required": ["output", "answer"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "secondary_tool",
|
||||
"description": "A second tool to verify multi-tool ordering",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string", "description": "Search query"},
|
||||
"max_results": {"type": "integer", "description": "Limit"},
|
||||
"filters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"date_range": {"type": "string"},
|
||||
"category": {"type": "string"},
|
||||
"active_only": {"type": "boolean"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": "required"
|
||||
}
|
||||
EOF
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Payload 4: Many properties in adversarial order (zigzag), no $defs, tool_choice "none"
|
||||
# Names deliberately interleave early/late alphabet letters
|
||||
# ---------------------------------------------------------------------------
|
||||
cat > "$TMPDIR/payload_4.json" << 'EOF'
|
||||
{
|
||||
"model": "openai/gpt-4.1",
|
||||
"messages": [{"role": "user", "content": "test"}],
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 500,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "zigzag_tool",
|
||||
"description": "Properties in zigzag alphabetical order",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"zebra": {"type": "string"},
|
||||
"apple": {"type": "string"},
|
||||
"yarn": {"type": "number"},
|
||||
"banana": {"type": "boolean"},
|
||||
"xenon": {"type": "string"},
|
||||
"cherry": {"type": "integer"},
|
||||
"walnut": {"type": "string"},
|
||||
"date": {"type": "array", "items": {"type": "string"}},
|
||||
"violet": {"type": "number"},
|
||||
"elderberry": {"type": "string"}
|
||||
},
|
||||
"required": ["zebra", "apple", "yarn"]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": "none"
|
||||
}
|
||||
EOF
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Payload 5: $defs with multiple definitions, additionalProperties, nested $ref
|
||||
# ---------------------------------------------------------------------------
|
||||
cat > "$TMPDIR/payload_5.json" << 'EOF'
|
||||
{
|
||||
"model": "openai/gpt-4.1",
|
||||
"messages": [{"role": "user", "content": "test"}],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "AnswerResponseModel",
|
||||
"description": "Realistic pydantic-generated schema",
|
||||
"parameters": {
|
||||
"$defs": {
|
||||
"Citation": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {"type": "string", "description": "Source URL"},
|
||||
"text": {"type": "string", "description": "Cited text"},
|
||||
"page_number": {"type": "integer", "description": "Page"}
|
||||
},
|
||||
"required": ["url", "text"]
|
||||
},
|
||||
"Metadata": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model_version": {"type": "string"},
|
||||
"latency_ms": {"type": "number"},
|
||||
"token_count": {"type": "integer"}
|
||||
},
|
||||
"required": ["model_version"]
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"answer": {"type": "string", "description": "The answer"},
|
||||
"chain_of_thought": {"type": "string", "description": "Reasoning steps"},
|
||||
"citations": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/$defs/Citation"},
|
||||
"description": "Supporting citations"
|
||||
},
|
||||
"is_unanswered": {"type": "boolean", "description": "Whether answerable"},
|
||||
"metadata": {"$ref": "#/$defs/Metadata"}
|
||||
},
|
||||
"required": ["answer", "is_unanswered"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": {
|
||||
"type": "function",
|
||||
"function": {"name": "AnswerResponseModel"}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Payload 6: Minimal single-property tool, no tool_choice — tests baseline passthrough
|
||||
# Also uses top-level keys in non-standard order (tools before messages)
|
||||
# ---------------------------------------------------------------------------
|
||||
cat > "$TMPDIR/payload_6.json" << 'EOF'
|
||||
{
|
||||
"model": "openai/gpt-4.1",
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "simple_extractor",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"result": {"type": "string", "description": "Extracted result"}
|
||||
},
|
||||
"required": ["result"]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"messages": [{"role": "user", "content": "test"}],
|
||||
"temperature": 0
|
||||
}
|
||||
EOF
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Payload 7: EXACT reproduction of reported bug — Issue 1 + 2 + 3 combined
|
||||
# tool_choice: {type, function} with AnswerResponseModel (Issue 1)
|
||||
# properties: answer, chain_of_thought, citations, is_unanswered (Issue 2)
|
||||
# $defs with Citation at TOP of parameters object (Issue 3)
|
||||
# ---------------------------------------------------------------------------
|
||||
cat > "$TMPDIR/payload_7.json" << 'EOF'
|
||||
{
|
||||
"model": "openai/gpt-4.1",
|
||||
"messages": [{"role": "user", "content": "test"}],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "AnswerResponseModel",
|
||||
"parameters": {
|
||||
"$defs": {
|
||||
"Citation": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {"type": "string"},
|
||||
"text": {"type": "string"}
|
||||
},
|
||||
"required": ["url", "text"]
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"answer": {"type": "string", "description": "The answer"},
|
||||
"chain_of_thought": {"type": "string", "description": "Reasoning"},
|
||||
"citations": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/$defs/Citation"}
|
||||
},
|
||||
"is_unanswered": {"type": "boolean"}
|
||||
},
|
||||
"required": ["answer", "is_unanswered"],
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "AnswerResponseModel"
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Payload 8: tool_choice string variants cycle — ensures "none"/"auto"/"required"
|
||||
# pass through as strings and don't get expanded to structs
|
||||
# Also: properties in exact reverse alphabetical to maximize reorder detection
|
||||
# ---------------------------------------------------------------------------
|
||||
cat > "$TMPDIR/payload_8.json" << 'EOF'
|
||||
{
|
||||
"model": "openai/gpt-4.1",
|
||||
"messages": [{"role": "user", "content": "test"}],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "reverse_order_check",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"zulu": {"type": "string"},
|
||||
"yankee": {"type": "string"},
|
||||
"x_ray": {"type": "string"},
|
||||
"whiskey": {"type": "number"},
|
||||
"victor": {"type": "boolean"},
|
||||
"uniform": {"type": "string"},
|
||||
"tango": {"type": "integer"},
|
||||
"sierra": {"type": "string"},
|
||||
"romeo": {"type": "number"},
|
||||
"quebec": {"type": "string"},
|
||||
"papa": {"type": "boolean"},
|
||||
"oscar": {"type": "string"},
|
||||
"november": {"type": "string"},
|
||||
"mike": {"type": "number"},
|
||||
"lima": {"type": "string"},
|
||||
"kilo": {"type": "boolean"},
|
||||
"juliet": {"type": "string"},
|
||||
"india": {"type": "integer"},
|
||||
"hotel": {"type": "string"},
|
||||
"golf": {"type": "number"},
|
||||
"foxtrot": {"type": "string"},
|
||||
"echo_field": {"type": "boolean"},
|
||||
"delta": {"type": "string"},
|
||||
"charlie": {"type": "number"},
|
||||
"bravo": {"type": "string"},
|
||||
"alpha": {"type": "string"}
|
||||
},
|
||||
"required": ["zulu", "alpha"]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": "auto"
|
||||
}
|
||||
EOF
|
||||
|
||||
NUM_PAYLOADS=8
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Python analyzer — compares input vs output key ordering at every nesting level
|
||||
# ---------------------------------------------------------------------------
|
||||
cat > "$TMPDIR/analyze.py" << 'PYEOF'
|
||||
import json, sys, os
|
||||
from collections import OrderedDict
|
||||
|
||||
def extract_key_orders(obj, path=""):
|
||||
"""Recursively extract key orders from all nested dicts.
|
||||
Returns a dict of {path: [keys]} for every object in the tree."""
|
||||
if not isinstance(obj, dict):
|
||||
return {}
|
||||
result = {path: list(obj.keys())}
|
||||
for key, val in obj.items():
|
||||
child_path = f"{path}.{key}" if path else key
|
||||
if isinstance(val, dict):
|
||||
result.update(extract_key_orders(val, child_path))
|
||||
elif isinstance(val, list):
|
||||
for i, item in enumerate(val):
|
||||
if isinstance(item, dict):
|
||||
result.update(extract_key_orders(item, f"{child_path}[{i}]"))
|
||||
return result
|
||||
|
||||
def get_all_tool_parameters(payload):
|
||||
"""Extract tool function parameters from ALL tools in a chat completion payload.
|
||||
Returns list of (tool_name, parameters_dict) tuples."""
|
||||
tools = payload.get("tools", [])
|
||||
result = []
|
||||
for tool in tools:
|
||||
func = tool.get("function", {})
|
||||
name = func.get("name", "unknown")
|
||||
params = func.get("parameters")
|
||||
if params is not None:
|
||||
result.append((name, params))
|
||||
return result
|
||||
|
||||
def check_tool_choice(input_payload, raw_request):
|
||||
"""Check tool_choice serialization: key ordering and no extra fields.
|
||||
Returns a list of (description, input, output) mutation tuples.
|
||||
|
||||
Catches Issue 1 from the bug report:
|
||||
- Zero-value fields injected: "custom":{"name":""}, "allowed_tools":{"mode":"","tools":null}
|
||||
- Key reordering: "type" moving from first to last position
|
||||
- String tool_choice ("auto"/"none"/"required") being expanded to struct
|
||||
"""
|
||||
mutations = []
|
||||
input_tc = input_payload.get("tool_choice")
|
||||
output_tc = raw_request.get("tool_choice")
|
||||
if input_tc is None and output_tc is None:
|
||||
return mutations
|
||||
if input_tc is None and output_tc is not None:
|
||||
mutations.append(("tool_choice (injected)", None, output_tc))
|
||||
return mutations
|
||||
if input_tc is not None and output_tc is None:
|
||||
mutations.append(("tool_choice (dropped)", input_tc, None))
|
||||
return mutations
|
||||
|
||||
# Issue 1: string tool_choice must stay as string, not become struct
|
||||
if isinstance(input_tc, str):
|
||||
if isinstance(output_tc, dict):
|
||||
mutations.append(("tool_choice (string->struct)", input_tc, list(output_tc.keys())))
|
||||
elif output_tc != input_tc:
|
||||
mutations.append(("tool_choice (string)", input_tc, output_tc))
|
||||
return mutations
|
||||
|
||||
if isinstance(input_tc, dict) and isinstance(output_tc, dict):
|
||||
input_keys = list(input_tc.keys())
|
||||
output_keys = list(output_tc.keys())
|
||||
|
||||
# Issue 1.2: Check for zero-value fields from unused union variants
|
||||
# These are the exact fields reported in the bug:
|
||||
zero_value_fields = {"custom", "allowed_tools"}
|
||||
injected = zero_value_fields & (set(output_keys) - set(input_keys))
|
||||
if injected:
|
||||
mutations.append(("tool_choice (zero-value fields injected)", sorted(injected), [
|
||||
f'{k}={json.dumps(output_tc[k])}' for k in sorted(injected)
|
||||
]))
|
||||
|
||||
# Any other extra fields
|
||||
other_extra = set(output_keys) - set(input_keys) - zero_value_fields
|
||||
if other_extra:
|
||||
mutations.append(("tool_choice (unexpected extra fields)", [], list(other_extra)))
|
||||
|
||||
# Issue 1.2: Check key ordering — "type" should stay first, not move to end
|
||||
if input_keys != output_keys:
|
||||
mutations.append(("tool_choice (key order)", input_keys, output_keys))
|
||||
|
||||
# Recursively check nested key orders (e.g. function object)
|
||||
input_tc_orders = extract_key_orders(input_tc, "tool_choice")
|
||||
output_tc_orders = extract_key_orders(output_tc, "tool_choice")
|
||||
for path, inp_keys in input_tc_orders.items():
|
||||
out_keys = output_tc_orders.get(path)
|
||||
if out_keys is not None and inp_keys != out_keys:
|
||||
mutations.append((path, inp_keys, out_keys))
|
||||
return mutations
|
||||
|
||||
def check_defs_position(input_params, output_params, tool_idx):
|
||||
"""Check that $defs stays in its original position within the parameters object.
|
||||
|
||||
Catches Issue 3 from the bug report:
|
||||
- $defs at top of parameters moves to bottom after round-trip
|
||||
"""
|
||||
mutations = []
|
||||
input_keys = list(input_params.keys())
|
||||
output_keys = list(output_params.keys())
|
||||
|
||||
if "$defs" in input_keys and "$defs" in output_keys:
|
||||
input_pos = input_keys.index("$defs")
|
||||
output_pos = output_keys.index("$defs")
|
||||
if input_pos != output_pos:
|
||||
mutations.append((
|
||||
f"tools[{tool_idx}].parameters ($defs position)",
|
||||
f"$defs at index {input_pos} in {input_keys}",
|
||||
f"$defs at index {output_pos} in {output_keys}"
|
||||
))
|
||||
|
||||
if "definitions" in input_keys and "definitions" in output_keys:
|
||||
input_pos = input_keys.index("definitions")
|
||||
output_pos = output_keys.index("definitions")
|
||||
if input_pos != output_pos:
|
||||
mutations.append((
|
||||
f"tools[{tool_idx}].parameters (definitions position)",
|
||||
f"definitions at index {input_pos} in {input_keys}",
|
||||
f"definitions at index {output_pos} in {output_keys}"
|
||||
))
|
||||
|
||||
return mutations
|
||||
|
||||
# Analyze response
|
||||
resp_file = sys.argv[1]
|
||||
idx = sys.argv[2]
|
||||
payload_file = sys.argv[3]
|
||||
|
||||
try:
|
||||
# Load input payload (the known-good key order)
|
||||
with open(payload_file) as f:
|
||||
input_payload = json.load(f, object_pairs_hook=OrderedDict)
|
||||
|
||||
input_tool_params = get_all_tool_parameters(input_payload)
|
||||
if not input_tool_params:
|
||||
print(f"PARSE_ERROR:{idx}:no tool parameters in input payload")
|
||||
sys.exit(0)
|
||||
|
||||
with open(resp_file) as f:
|
||||
resp = json.load(f, object_pairs_hook=OrderedDict)
|
||||
|
||||
raw_request = resp.get("extra_fields", OrderedDict()).get("raw_request")
|
||||
if raw_request is None:
|
||||
print(f"NO_RAW_REQUEST:{idx}")
|
||||
sys.exit(0)
|
||||
|
||||
output_tool_params = get_all_tool_parameters(raw_request)
|
||||
if not output_tool_params:
|
||||
print(f"PARSE_ERROR:{idx}:no tool parameters in raw_request")
|
||||
sys.exit(0)
|
||||
|
||||
mutations = []
|
||||
|
||||
# Compare each tool's parameter key ordering (Issue 2: properties reordering)
|
||||
for i, (inp_name, inp_params) in enumerate(input_tool_params):
|
||||
if i >= len(output_tool_params):
|
||||
mutations.append((f"tool[{i}] missing", inp_name, "MISSING"))
|
||||
continue
|
||||
out_name, out_params = output_tool_params[i]
|
||||
|
||||
input_orders = extract_key_orders(inp_params, f"tools[{i}].parameters")
|
||||
output_orders = extract_key_orders(out_params, f"tools[{i}].parameters")
|
||||
|
||||
for path, input_keys in input_orders.items():
|
||||
output_keys = output_orders.get(path)
|
||||
if output_keys is None:
|
||||
continue
|
||||
if input_keys != output_keys:
|
||||
mutations.append((path, input_keys, output_keys))
|
||||
|
||||
# Issue 3: $defs position within parameters object
|
||||
mutations.extend(check_defs_position(inp_params, out_params, i))
|
||||
|
||||
# Issue 1: tool_choice serialization (zero-value fields, key ordering)
|
||||
mutations.extend(check_tool_choice(input_payload, raw_request))
|
||||
|
||||
if not mutations:
|
||||
print(f"OK:{idx}")
|
||||
else:
|
||||
payload_num = os.path.basename(payload_file).replace("payload_", "").replace(".json", "")
|
||||
print(f"MUTATED:{idx}")
|
||||
for path, inp, out in mutations:
|
||||
label = path if path else "parameters"
|
||||
print(f" DETAIL:{idx}:P{payload_num}:{label}: input={inp} -> output={out}", file=sys.stderr)
|
||||
|
||||
except Exception as e:
|
||||
print(f"PARSE_ERROR:{idx}:{e}")
|
||||
PYEOF
|
||||
|
||||
echo -e "${CYAN}JSON Serialization Fidelity — Input vs Output Validator${NC}"
|
||||
echo "=========================================================="
|
||||
echo "Target: $BIFROST_URL"
|
||||
echo "RPS: $RPS"
|
||||
echo "Duration: ${DURATION}s"
|
||||
echo "Total: $NUM_REQUESTS requests"
|
||||
echo "Payloads: $NUM_PAYLOADS variants (randomly selected per request)"
|
||||
echo ""
|
||||
echo "Validates that Bifrost preserves the client's original JSON"
|
||||
echo "key ordering (tool parameters + tool_choice) and doesn't"
|
||||
echo "inject extra zero-value fields."
|
||||
echo ""
|
||||
echo "Payload variants:"
|
||||
echo " P1: Standard — non-alpha properties, \$defs after required, function tool_choice"
|
||||
echo " P2: Reverse-alpha properties, \$defs at TOP, tool_choice \"auto\""
|
||||
echo " P3: Multiple tools, 3-level nested objects, tool_choice \"required\""
|
||||
echo " P4: 10 zigzag-ordered properties, no \$defs, tool_choice \"none\""
|
||||
echo " P5: Multiple \$defs, additionalProperties, pydantic-style schema"
|
||||
echo " P6: Minimal single-property tool, no tool_choice, non-standard top-level order"
|
||||
echo " P7: EXACT bug report reproduction — all 3 issues in one payload"
|
||||
echo " P8: 26 reverse-alpha NATO properties — maximum reorder detection"
|
||||
echo "=========================================================="
|
||||
echo ""
|
||||
|
||||
# Send a single request with a random payload and analyze
|
||||
send_and_check() {
|
||||
local idx=$1
|
||||
local payload_num=$(( (RANDOM % NUM_PAYLOADS) + 1 ))
|
||||
local payload_file="$TMPDIR/payload_${payload_num}.json"
|
||||
local outfile="$TMPDIR/resp_${idx}.json"
|
||||
|
||||
local httpcode
|
||||
httpcode=$(curl -s -o "$outfile" -w "%{http_code}" \
|
||||
-X POST "$BIFROST_URL" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d @"$payload_file" \
|
||||
--max-time 30 2>/dev/null)
|
||||
|
||||
if [ "$httpcode" != "200" ]; then
|
||||
echo "HTTP_ERROR:${idx}:${httpcode}"
|
||||
return
|
||||
fi
|
||||
|
||||
python3 "$TMPDIR/analyze.py" "$outfile" "$idx" "$payload_file"
|
||||
}
|
||||
|
||||
export -f send_and_check
|
||||
export BIFROST_URL TMPDIR NUM_PAYLOADS
|
||||
|
||||
# Send RPS requests per second for DURATION seconds
|
||||
idx=0
|
||||
for sec in $(seq 1 "$DURATION"); do
|
||||
for _ in $(seq 1 "$RPS"); do
|
||||
((idx++))
|
||||
send_and_check "$idx" >> "$TMPDIR/results.txt" 2>>"$TMPDIR/details.log" &
|
||||
done
|
||||
echo -e " Second $sec/$DURATION — launched $RPS requests"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# Wait for all background jobs to finish
|
||||
wait
|
||||
|
||||
results=$(cat "$TMPDIR/results.txt" 2>/dev/null)
|
||||
|
||||
OK=0
|
||||
MUTATED=0
|
||||
HTTP_ERRORS=0
|
||||
PARSE_ERRORS=0
|
||||
NO_RAW=0
|
||||
|
||||
while IFS= read -r line; do
|
||||
case "$line" in
|
||||
OK:*) ((OK++)) ;;
|
||||
MUTATED:*)
|
||||
((MUTATED++))
|
||||
idx=$(echo "$line" | cut -d: -f2)
|
||||
echo -e "${RED} [MUTATED] Request #${idx}${NC}"
|
||||
;;
|
||||
HTTP_ERROR:*)
|
||||
((HTTP_ERRORS++))
|
||||
idx=$(echo "$line" | cut -d: -f2)
|
||||
code=$(echo "$line" | cut -d: -f3)
|
||||
echo -e "${YELLOW} [HTTP ${code}] Request #${idx}${NC}"
|
||||
;;
|
||||
NO_RAW_REQUEST:*)
|
||||
((NO_RAW++))
|
||||
echo -e "${YELLOW} [NO RAW REQUEST] Request #$(echo "$line" | cut -d: -f2) - is send_back_raw_request enabled?${NC}"
|
||||
;;
|
||||
PARSE_ERROR:*)
|
||||
((PARSE_ERRORS++))
|
||||
echo -e "${YELLOW} [PARSE ERROR] ${line}${NC}"
|
||||
;;
|
||||
esac
|
||||
done <<< "$results"
|
||||
|
||||
TOTAL=$((OK + MUTATED + HTTP_ERRORS + PARSE_ERRORS + NO_RAW))
|
||||
|
||||
echo ""
|
||||
echo "=========================================================="
|
||||
echo -e "${CYAN}Results (${TOTAL}/${NUM_REQUESTS} completed):${NC}"
|
||||
echo -e " ${GREEN}OK (order preserved): $OK${NC}"
|
||||
echo -e " ${RED}MUTATED (reordered): $MUTATED${NC}"
|
||||
echo -e " ${YELLOW}HTTP errors: $HTTP_ERRORS${NC}"
|
||||
echo -e " ${YELLOW}No raw request: $NO_RAW${NC}"
|
||||
echo -e " ${YELLOW}Parse errors: $PARSE_ERRORS${NC}"
|
||||
echo "=========================================================="
|
||||
|
||||
if [ "$MUTATED" -gt 0 ]; then
|
||||
RATE=$(python3 -c "print(f'{$MUTATED/$TOTAL*100:.1f}')" 2>/dev/null || echo "?")
|
||||
echo ""
|
||||
echo -e "${RED}MUTATION RATE: ${RATE}% ($MUTATED / $TOTAL)${NC}"
|
||||
echo ""
|
||||
echo -e "${CYAN}Key order mutations (input vs output):${NC}"
|
||||
cat "$TMPDIR/details.log" 2>/dev/null | head -50
|
||||
exit 1
|
||||
elif [ "$NO_RAW" -gt 0 ]; then
|
||||
echo ""
|
||||
echo -e "${YELLOW}WARNING: No raw_request in responses. Enable send_back_raw_request in provider config.${NC}"
|
||||
exit 2
|
||||
else
|
||||
echo ""
|
||||
echo -e "${GREEN}All $OK requests preserved the original JSON key ordering across all $NUM_PAYLOADS payload variants.${NC}"
|
||||
exit 0
|
||||
fi
|
||||
Reference in New Issue
Block a user