first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/tests/load_test_parameter_ordering.sh
+++ b/tests/load_test_parameter_ordering.sh
@@ -0,0 +1,763 @@
+#!/bin/bash
+# Load test: detect JSON key ordering mutations in Bifrost's request proxying.
+# Sends randomized payloads with different schema shapes and compares the input
+# key order against what Bifrost actually sent to the provider (via
+# extra_fields.raw_request with json.RawMessage preservation).
+#
+# Validates:
+#   - Tool parameter key ordering at every nesting level (properties, $defs, nested schemas)
+#   - tool_choice serialization (key ordering, no extra zero-value fields like "custom"/"allowed_tools")
+#   - Multiple tool schemas, deeply nested objects, adversarial property orderings
+#
+# Each request randomly picks from 8 different payload shapes to maximize coverage.
+#
+# This catches both:
+#   - Consistent mutations (struct field order overriding client order) — 100% rate
+#   - Sporadic mutations (sync.Pool reuse, concurrency bugs) — variable rate
+#
+# Prerequisites:
+#   - Bifrost running with send_back_raw_request: true on the openai provider
+#   - OpenAI provider pointed at a mock server (any 200 response works)
+#
+# Usage: ./tests/load_test_parameter_ordering.sh [rps] [duration]
+#   rps      - requests per second (default: 20)
+#   duration - how many seconds to run (default: 10)
+
+BIFROST_URL="http://localhost:8080/litellm/v1/chat/completions"
+RPS="${1:-20}"
+DURATION="${2:-10}"
+NUM_REQUESTS=$((RPS * DURATION))
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+TMPDIR=$(mktemp -d)
+trap 'rm -rf "$TMPDIR"' EXIT
+
+# ---------------------------------------------------------------------------
+# Payload 1: Standard — non-alpha properties, $defs after required, function tool_choice
+# ---------------------------------------------------------------------------
+cat > "$TMPDIR/payload_1.json" << 'EOF'
+{
+  "model": "openai/gpt-4.1",
+  "messages": [{"role": "user", "content": "test"}],
+  "temperature": 0,
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "structured_response",
+        "description": "Generate a structured response",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "reasoning": {
+              "type": "string",
+              "description": "Step by step reasoning",
+              "title": "Reasoning"
+            },
+            "summary": {
+              "type": "string",
+              "description": "The final summary",
+              "title": "Summary"
+            },
+            "tags": {
+              "description": "Relevant tags",
+              "items": {"$ref": "#/$defs/Tag"},
+              "title": "Tags",
+              "type": "array"
+            },
+            "confidence": {
+              "description": "Confidence score",
+              "title": "Confidence",
+              "type": "number"
+            }
+          },
+          "required": ["reasoning", "summary", "tags", "confidence"],
+          "$defs": {
+            "Tag": {
+              "type": "object",
+              "description": "A tag",
+              "required": ["label"],
+              "properties": {
+                "label": {"description": "The tag label", "title": "Label", "type": "string"},
+                "score": {"description": "Relevance score", "title": "Score", "type": "number"}
+              },
+              "title": "Tag"
+            }
+          }
+        }
+      }
+    }
+  ],
+  "tool_choice": {
+    "type": "function",
+    "function": {"name": "structured_response"}
+  }
+}
+EOF
+
+# ---------------------------------------------------------------------------
+# Payload 2: Reverse-alpha properties, $defs at TOP, string tool_choice "auto"
+# Property names z_ y_ x_ w_ would get reordered to w_ x_ y_ z_ if sorted
+# ---------------------------------------------------------------------------
+cat > "$TMPDIR/payload_2.json" << 'EOF'
+{
+  "model": "openai/gpt-4.1",
+  "messages": [{"role": "user", "content": "test"}],
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "reverse_alpha_tool",
+        "parameters": {
+          "$defs": {
+            "ZItem": {
+              "type": "object",
+              "properties": {
+                "z_name": {"type": "string"},
+                "a_value": {"type": "number"}
+              },
+              "required": ["z_name"]
+            }
+          },
+          "type": "object",
+          "properties": {
+            "z_output": {"type": "string", "description": "Last alphabetically, first in schema"},
+            "y_reasoning": {"type": "string", "description": "Second to last"},
+            "x_items": {
+              "type": "array",
+              "items": {"$ref": "#/$defs/ZItem"},
+              "description": "Third to last"
+            },
+            "w_confidence": {"type": "number", "description": "Fourth to last"}
+          },
+          "required": ["z_output", "y_reasoning"]
+        }
+      }
+    }
+  ],
+  "tool_choice": "auto"
+}
+EOF
+
+# ---------------------------------------------------------------------------
+# Payload 3: Multiple tools, deeply nested objects, string tool_choice "required"
+# ---------------------------------------------------------------------------
+cat > "$TMPDIR/payload_3.json" << 'EOF'
+{
+  "model": "openai/gpt-4.1",
+  "messages": [{"role": "user", "content": "test"}],
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "deep_nested_tool",
+        "description": "Tool with 3-level nesting",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "output": {
+              "type": "object",
+              "description": "Nested output",
+              "properties": {
+                "verdict": {"type": "string"},
+                "metadata": {
+                  "type": "object",
+                  "properties": {
+                    "timestamp": {"type": "string"},
+                    "source": {"type": "string"},
+                    "confidence": {"type": "number"},
+                    "author": {"type": "string"}
+                  }
+                },
+                "score": {"type": "number"}
+              }
+            },
+            "chain_of_thought": {"type": "string"},
+            "answer": {"type": "string"}
+          },
+          "required": ["output", "answer"]
+        }
+      }
+    },
+    {
+      "type": "function",
+      "function": {
+        "name": "secondary_tool",
+        "description": "A second tool to verify multi-tool ordering",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "query": {"type": "string", "description": "Search query"},
+            "max_results": {"type": "integer", "description": "Limit"},
+            "filters": {
+              "type": "object",
+              "properties": {
+                "date_range": {"type": "string"},
+                "category": {"type": "string"},
+                "active_only": {"type": "boolean"}
+              }
+            }
+          },
+          "required": ["query"]
+        }
+      }
+    }
+  ],
+  "tool_choice": "required"
+}
+EOF
+
+# ---------------------------------------------------------------------------
+# Payload 4: Many properties in adversarial order (zigzag), no $defs, tool_choice "none"
+# Names deliberately interleave early/late alphabet letters
+# ---------------------------------------------------------------------------
+cat > "$TMPDIR/payload_4.json" << 'EOF'
+{
+  "model": "openai/gpt-4.1",
+  "messages": [{"role": "user", "content": "test"}],
+  "temperature": 0.7,
+  "max_tokens": 500,
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "zigzag_tool",
+        "description": "Properties in zigzag alphabetical order",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "zebra": {"type": "string"},
+            "apple": {"type": "string"},
+            "yarn": {"type": "number"},
+            "banana": {"type": "boolean"},
+            "xenon": {"type": "string"},
+            "cherry": {"type": "integer"},
+            "walnut": {"type": "string"},
+            "date": {"type": "array", "items": {"type": "string"}},
+            "violet": {"type": "number"},
+            "elderberry": {"type": "string"}
+          },
+          "required": ["zebra", "apple", "yarn"]
+        }
+      }
+    }
+  ],
+  "tool_choice": "none"
+}
+EOF
+
+# ---------------------------------------------------------------------------
+# Payload 5: $defs with multiple definitions, additionalProperties, nested $ref
+# ---------------------------------------------------------------------------
+cat > "$TMPDIR/payload_5.json" << 'EOF'
+{
+  "model": "openai/gpt-4.1",
+  "messages": [{"role": "user", "content": "test"}],
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "AnswerResponseModel",
+        "description": "Realistic pydantic-generated schema",
+        "parameters": {
+          "$defs": {
+            "Citation": {
+              "type": "object",
+              "properties": {
+                "url": {"type": "string", "description": "Source URL"},
+                "text": {"type": "string", "description": "Cited text"},
+                "page_number": {"type": "integer", "description": "Page"}
+              },
+              "required": ["url", "text"]
+            },
+            "Metadata": {
+              "type": "object",
+              "properties": {
+                "model_version": {"type": "string"},
+                "latency_ms": {"type": "number"},
+                "token_count": {"type": "integer"}
+              },
+              "required": ["model_version"]
+            }
+          },
+          "type": "object",
+          "properties": {
+            "answer": {"type": "string", "description": "The answer"},
+            "chain_of_thought": {"type": "string", "description": "Reasoning steps"},
+            "citations": {
+              "type": "array",
+              "items": {"$ref": "#/$defs/Citation"},
+              "description": "Supporting citations"
+            },
+            "is_unanswered": {"type": "boolean", "description": "Whether answerable"},
+            "metadata": {"$ref": "#/$defs/Metadata"}
+          },
+          "required": ["answer", "is_unanswered"],
+          "additionalProperties": false
+        }
+      }
+    }
+  ],
+  "tool_choice": {
+    "type": "function",
+    "function": {"name": "AnswerResponseModel"}
+  }
+}
+EOF
+
+# ---------------------------------------------------------------------------
+# Payload 6: Minimal single-property tool, no tool_choice — tests baseline passthrough
+# Also uses top-level keys in non-standard order (tools before messages)
+# ---------------------------------------------------------------------------
+cat > "$TMPDIR/payload_6.json" << 'EOF'
+{
+  "model": "openai/gpt-4.1",
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "simple_extractor",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "result": {"type": "string", "description": "Extracted result"}
+          },
+          "required": ["result"]
+        }
+      }
+    }
+  ],
+  "messages": [{"role": "user", "content": "test"}],
+  "temperature": 0
+}
+EOF
+
+# ---------------------------------------------------------------------------
+# Payload 7: EXACT reproduction of reported bug — Issue 1 + 2 + 3 combined
+# tool_choice: {type, function} with AnswerResponseModel (Issue 1)
+# properties: answer, chain_of_thought, citations, is_unanswered (Issue 2)
+# $defs with Citation at TOP of parameters object (Issue 3)
+# ---------------------------------------------------------------------------
+cat > "$TMPDIR/payload_7.json" << 'EOF'
+{
+  "model": "openai/gpt-4.1",
+  "messages": [{"role": "user", "content": "test"}],
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "AnswerResponseModel",
+        "parameters": {
+          "$defs": {
+            "Citation": {
+              "type": "object",
+              "properties": {
+                "url": {"type": "string"},
+                "text": {"type": "string"}
+              },
+              "required": ["url", "text"]
+            }
+          },
+          "properties": {
+            "answer": {"type": "string", "description": "The answer"},
+            "chain_of_thought": {"type": "string", "description": "Reasoning"},
+            "citations": {
+              "type": "array",
+              "items": {"$ref": "#/$defs/Citation"}
+            },
+            "is_unanswered": {"type": "boolean"}
+          },
+          "required": ["answer", "is_unanswered"],
+          "type": "object"
+        }
+      }
+    }
+  ],
+  "tool_choice": {
+    "type": "function",
+    "function": {
+      "name": "AnswerResponseModel"
+    }
+  }
+}
+EOF
+
+# ---------------------------------------------------------------------------
+# Payload 8: tool_choice string variants cycle — ensures "none"/"auto"/"required"
+# pass through as strings and don't get expanded to structs
+# Also: properties in exact reverse alphabetical to maximize reorder detection
+# ---------------------------------------------------------------------------
+cat > "$TMPDIR/payload_8.json" << 'EOF'
+{
+  "model": "openai/gpt-4.1",
+  "messages": [{"role": "user", "content": "test"}],
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "reverse_order_check",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "zulu": {"type": "string"},
+            "yankee": {"type": "string"},
+            "x_ray": {"type": "string"},
+            "whiskey": {"type": "number"},
+            "victor": {"type": "boolean"},
+            "uniform": {"type": "string"},
+            "tango": {"type": "integer"},
+            "sierra": {"type": "string"},
+            "romeo": {"type": "number"},
+            "quebec": {"type": "string"},
+            "papa": {"type": "boolean"},
+            "oscar": {"type": "string"},
+            "november": {"type": "string"},
+            "mike": {"type": "number"},
+            "lima": {"type": "string"},
+            "kilo": {"type": "boolean"},
+            "juliet": {"type": "string"},
+            "india": {"type": "integer"},
+            "hotel": {"type": "string"},
+            "golf": {"type": "number"},
+            "foxtrot": {"type": "string"},
+            "echo_field": {"type": "boolean"},
+            "delta": {"type": "string"},
+            "charlie": {"type": "number"},
+            "bravo": {"type": "string"},
+            "alpha": {"type": "string"}
+          },
+          "required": ["zulu", "alpha"]
+        }
+      }
+    }
+  ],
+  "tool_choice": "auto"
+}
+EOF
+
+NUM_PAYLOADS=8
+
+# ---------------------------------------------------------------------------
+# Python analyzer — compares input vs output key ordering at every nesting level
+# ---------------------------------------------------------------------------
+cat > "$TMPDIR/analyze.py" << 'PYEOF'
+import json, sys, os
+from collections import OrderedDict
+
+def extract_key_orders(obj, path=""):
+    """Recursively extract key orders from all nested dicts.
+    Returns a dict of {path: [keys]} for every object in the tree."""
+    if not isinstance(obj, dict):
+        return {}
+    result = {path: list(obj.keys())}
+    for key, val in obj.items():
+        child_path = f"{path}.{key}" if path else key
+        if isinstance(val, dict):
+            result.update(extract_key_orders(val, child_path))
+        elif isinstance(val, list):
+            for i, item in enumerate(val):
+                if isinstance(item, dict):
+                    result.update(extract_key_orders(item, f"{child_path}[{i}]"))
+    return result
+
+def get_all_tool_parameters(payload):
+    """Extract tool function parameters from ALL tools in a chat completion payload.
+    Returns list of (tool_name, parameters_dict) tuples."""
+    tools = payload.get("tools", [])
+    result = []
+    for tool in tools:
+        func = tool.get("function", {})
+        name = func.get("name", "unknown")
+        params = func.get("parameters")
+        if params is not None:
+            result.append((name, params))
+    return result
+
+def check_tool_choice(input_payload, raw_request):
+    """Check tool_choice serialization: key ordering and no extra fields.
+    Returns a list of (description, input, output) mutation tuples.
+
+    Catches Issue 1 from the bug report:
+      - Zero-value fields injected: "custom":{"name":""}, "allowed_tools":{"mode":"","tools":null}
+      - Key reordering: "type" moving from first to last position
+      - String tool_choice ("auto"/"none"/"required") being expanded to struct
+    """
+    mutations = []
+    input_tc = input_payload.get("tool_choice")
+    output_tc = raw_request.get("tool_choice")
+    if input_tc is None and output_tc is None:
+        return mutations
+    if input_tc is None and output_tc is not None:
+        mutations.append(("tool_choice (injected)", None, output_tc))
+        return mutations
+    if input_tc is not None and output_tc is None:
+        mutations.append(("tool_choice (dropped)", input_tc, None))
+        return mutations
+
+    # Issue 1: string tool_choice must stay as string, not become struct
+    if isinstance(input_tc, str):
+        if isinstance(output_tc, dict):
+            mutations.append(("tool_choice (string->struct)", input_tc, list(output_tc.keys())))
+        elif output_tc != input_tc:
+            mutations.append(("tool_choice (string)", input_tc, output_tc))
+        return mutations
+
+    if isinstance(input_tc, dict) and isinstance(output_tc, dict):
+        input_keys = list(input_tc.keys())
+        output_keys = list(output_tc.keys())
+
+        # Issue 1.2: Check for zero-value fields from unused union variants
+        # These are the exact fields reported in the bug:
+        zero_value_fields = {"custom", "allowed_tools"}
+        injected = zero_value_fields & (set(output_keys) - set(input_keys))
+        if injected:
+            mutations.append(("tool_choice (zero-value fields injected)", sorted(injected), [
+                f'{k}={json.dumps(output_tc[k])}' for k in sorted(injected)
+            ]))
+
+        # Any other extra fields
+        other_extra = set(output_keys) - set(input_keys) - zero_value_fields
+        if other_extra:
+            mutations.append(("tool_choice (unexpected extra fields)", [], list(other_extra)))
+
+        # Issue 1.2: Check key ordering — "type" should stay first, not move to end
+        if input_keys != output_keys:
+            mutations.append(("tool_choice (key order)", input_keys, output_keys))
+
+        # Recursively check nested key orders (e.g. function object)
+        input_tc_orders = extract_key_orders(input_tc, "tool_choice")
+        output_tc_orders = extract_key_orders(output_tc, "tool_choice")
+        for path, inp_keys in input_tc_orders.items():
+            out_keys = output_tc_orders.get(path)
+            if out_keys is not None and inp_keys != out_keys:
+                mutations.append((path, inp_keys, out_keys))
+    return mutations
+
+def check_defs_position(input_params, output_params, tool_idx):
+    """Check that $defs stays in its original position within the parameters object.
+
+    Catches Issue 3 from the bug report:
+      - $defs at top of parameters moves to bottom after round-trip
+    """
+    mutations = []
+    input_keys = list(input_params.keys())
+    output_keys = list(output_params.keys())
+
+    if "$defs" in input_keys and "$defs" in output_keys:
+        input_pos = input_keys.index("$defs")
+        output_pos = output_keys.index("$defs")
+        if input_pos != output_pos:
+            mutations.append((
+                f"tools[{tool_idx}].parameters ($defs position)",
+                f"$defs at index {input_pos} in {input_keys}",
+                f"$defs at index {output_pos} in {output_keys}"
+            ))
+
+    if "definitions" in input_keys and "definitions" in output_keys:
+        input_pos = input_keys.index("definitions")
+        output_pos = output_keys.index("definitions")
+        if input_pos != output_pos:
+            mutations.append((
+                f"tools[{tool_idx}].parameters (definitions position)",
+                f"definitions at index {input_pos} in {input_keys}",
+                f"definitions at index {output_pos} in {output_keys}"
+            ))
+
+    return mutations
+
+# Analyze response
+resp_file = sys.argv[1]
+idx = sys.argv[2]
+payload_file = sys.argv[3]
+
+try:
+    # Load input payload (the known-good key order)
+    with open(payload_file) as f:
+        input_payload = json.load(f, object_pairs_hook=OrderedDict)
+
+    input_tool_params = get_all_tool_parameters(input_payload)
+    if not input_tool_params:
+        print(f"PARSE_ERROR:{idx}:no tool parameters in input payload")
+        sys.exit(0)
+
+    with open(resp_file) as f:
+        resp = json.load(f, object_pairs_hook=OrderedDict)
+
+    raw_request = resp.get("extra_fields", OrderedDict()).get("raw_request")
+    if raw_request is None:
+        print(f"NO_RAW_REQUEST:{idx}")
+        sys.exit(0)
+
+    output_tool_params = get_all_tool_parameters(raw_request)
+    if not output_tool_params:
+        print(f"PARSE_ERROR:{idx}:no tool parameters in raw_request")
+        sys.exit(0)
+
+    mutations = []
+
+    # Compare each tool's parameter key ordering (Issue 2: properties reordering)
+    for i, (inp_name, inp_params) in enumerate(input_tool_params):
+        if i >= len(output_tool_params):
+            mutations.append((f"tool[{i}] missing", inp_name, "MISSING"))
+            continue
+        out_name, out_params = output_tool_params[i]
+
+        input_orders = extract_key_orders(inp_params, f"tools[{i}].parameters")
+        output_orders = extract_key_orders(out_params, f"tools[{i}].parameters")
+
+        for path, input_keys in input_orders.items():
+            output_keys = output_orders.get(path)
+            if output_keys is None:
+                continue
+            if input_keys != output_keys:
+                mutations.append((path, input_keys, output_keys))
+
+        # Issue 3: $defs position within parameters object
+        mutations.extend(check_defs_position(inp_params, out_params, i))
+
+    # Issue 1: tool_choice serialization (zero-value fields, key ordering)
+    mutations.extend(check_tool_choice(input_payload, raw_request))
+
+    if not mutations:
+        print(f"OK:{idx}")
+    else:
+        payload_num = os.path.basename(payload_file).replace("payload_", "").replace(".json", "")
+        print(f"MUTATED:{idx}")
+        for path, inp, out in mutations:
+            label = path if path else "parameters"
+            print(f"  DETAIL:{idx}:P{payload_num}:{label}: input={inp} -> output={out}", file=sys.stderr)
+
+except Exception as e:
+    print(f"PARSE_ERROR:{idx}:{e}")
+PYEOF
+
+echo -e "${CYAN}JSON Serialization Fidelity — Input vs Output Validator${NC}"
+echo "=========================================================="
+echo "Target:      $BIFROST_URL"
+echo "RPS:         $RPS"
+echo "Duration:    ${DURATION}s"
+echo "Total:       $NUM_REQUESTS requests"
+echo "Payloads:    $NUM_PAYLOADS variants (randomly selected per request)"
+echo ""
+echo "Validates that Bifrost preserves the client's original JSON"
+echo "key ordering (tool parameters + tool_choice) and doesn't"
+echo "inject extra zero-value fields."
+echo ""
+echo "Payload variants:"
+echo "  P1: Standard — non-alpha properties, \$defs after required, function tool_choice"
+echo "  P2: Reverse-alpha properties, \$defs at TOP, tool_choice \"auto\""
+echo "  P3: Multiple tools, 3-level nested objects, tool_choice \"required\""
+echo "  P4: 10 zigzag-ordered properties, no \$defs, tool_choice \"none\""
+echo "  P5: Multiple \$defs, additionalProperties, pydantic-style schema"
+echo "  P6: Minimal single-property tool, no tool_choice, non-standard top-level order"
+echo "  P7: EXACT bug report reproduction — all 3 issues in one payload"
+echo "  P8: 26 reverse-alpha NATO properties — maximum reorder detection"
+echo "=========================================================="
+echo ""
+
+# Send a single request with a random payload and analyze
+send_and_check() {
+    local idx=$1
+    local payload_num=$(( (RANDOM % NUM_PAYLOADS) + 1 ))
+    local payload_file="$TMPDIR/payload_${payload_num}.json"
+    local outfile="$TMPDIR/resp_${idx}.json"
+
+    local httpcode
+    httpcode=$(curl -s -o "$outfile" -w "%{http_code}" \
+        -X POST "$BIFROST_URL" \
+        -H "Content-Type: application/json" \
+        -d @"$payload_file" \
+        --max-time 30 2>/dev/null)
+
+    if [ "$httpcode" != "200" ]; then
+        echo "HTTP_ERROR:${idx}:${httpcode}"
+        return
+    fi
+
+    python3 "$TMPDIR/analyze.py" "$outfile" "$idx" "$payload_file"
+}
+
+export -f send_and_check
+export BIFROST_URL TMPDIR NUM_PAYLOADS
+
+# Send RPS requests per second for DURATION seconds
+idx=0
+for sec in $(seq 1 "$DURATION"); do
+    for _ in $(seq 1 "$RPS"); do
+        ((idx++))
+        send_and_check "$idx" >> "$TMPDIR/results.txt" 2>>"$TMPDIR/details.log" &
+    done
+    echo -e "  Second $sec/$DURATION — launched $RPS requests"
+    sleep 1
+done
+
+# Wait for all background jobs to finish
+wait
+
+results=$(cat "$TMPDIR/results.txt" 2>/dev/null)
+
+OK=0
+MUTATED=0
+HTTP_ERRORS=0
+PARSE_ERRORS=0
+NO_RAW=0
+
+while IFS= read -r line; do
+    case "$line" in
+        OK:*)           ((OK++)) ;;
+        MUTATED:*)
+            ((MUTATED++))
+            idx=$(echo "$line" | cut -d: -f2)
+            echo -e "${RED}  [MUTATED] Request #${idx}${NC}"
+            ;;
+        HTTP_ERROR:*)
+            ((HTTP_ERRORS++))
+            idx=$(echo "$line" | cut -d: -f2)
+            code=$(echo "$line" | cut -d: -f3)
+            echo -e "${YELLOW}  [HTTP ${code}] Request #${idx}${NC}"
+            ;;
+        NO_RAW_REQUEST:*)
+            ((NO_RAW++))
+            echo -e "${YELLOW}  [NO RAW REQUEST] Request #$(echo "$line" | cut -d: -f2) - is send_back_raw_request enabled?${NC}"
+            ;;
+        PARSE_ERROR:*)
+            ((PARSE_ERRORS++))
+            echo -e "${YELLOW}  [PARSE ERROR] ${line}${NC}"
+            ;;
+    esac
+done <<< "$results"
+
+TOTAL=$((OK + MUTATED + HTTP_ERRORS + PARSE_ERRORS + NO_RAW))
+
+echo ""
+echo "=========================================================="
+echo -e "${CYAN}Results (${TOTAL}/${NUM_REQUESTS} completed):${NC}"
+echo -e "  ${GREEN}OK (order preserved):  $OK${NC}"
+echo -e "  ${RED}MUTATED (reordered):   $MUTATED${NC}"
+echo -e "  ${YELLOW}HTTP errors:           $HTTP_ERRORS${NC}"
+echo -e "  ${YELLOW}No raw request:        $NO_RAW${NC}"
+echo -e "  ${YELLOW}Parse errors:          $PARSE_ERRORS${NC}"
+echo "=========================================================="
+
+if [ "$MUTATED" -gt 0 ]; then
+    RATE=$(python3 -c "print(f'{$MUTATED/$TOTAL*100:.1f}')" 2>/dev/null || echo "?")
+    echo ""
+    echo -e "${RED}MUTATION RATE: ${RATE}% ($MUTATED / $TOTAL)${NC}"
+    echo ""
+    echo -e "${CYAN}Key order mutations (input vs output):${NC}"
+    cat "$TMPDIR/details.log" 2>/dev/null | head -50
+    exit 1
+elif [ "$NO_RAW" -gt 0 ]; then
+    echo ""
+    echo -e "${YELLOW}WARNING: No raw_request in responses. Enable send_back_raw_request in provider config.${NC}"
+    exit 2
+else
+    echo ""
+    echo -e "${GREEN}All $OK requests preserved the original JSON key ordering across all $NUM_PAYLOADS payload variants.${NC}"
+    exit 0
+fi