#!/bin/bash # Load Test Script for Bifrost # Runs a load test against bifrost-http with a mocker provider # Usage: ./load-test.sh # # This script: # 1. Builds bifrost-http and mocker locally # 2. Creates a config.json with mocker provider (OpenAI-style) # 3. Starts mocker with 0ms latency and bifrost-http # 4. Runs a calibration (Vegeta -> Mocker direct) to measure Vegeta+network baseline # 5. Runs the overhead test (Vegeta -> Bifrost -> Mocker) to measure total # 6. Subtracts calibration from test to isolate Bifrost proxy overhead # (includes local network hop, JSON parsing/unparsing, plugins, and mocker jitter) # 7. Restarts mocker with 10s latency for a sustained concurrency stress test # 8. Asserts overhead < tiered thresholds (per percentile) and stress test has 100% success rate set -e # Configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" BIFROST_HTTP_DIR="${REPO_ROOT}/transports/bifrost-http" TRANSPORTS_DIR="${REPO_ROOT}/transports" WORK_DIR="${SCRIPT_DIR}" MOCKER_DIR="${REPO_ROOT}/../bifrost-benchmarking/mocker" BIFROST_PORT=8080 MOCKER_PORT=8000 RATE=1000 MAX_WORKERS=12000 OVERHEAD_DURATION=30 # overhead measurement duration (seconds) STRESS_DURATION=30 # stress test duration (seconds) OVERHEAD_MOCKER_LATENCY_MS=1000 # 1 second latency for overhead measurement STRESS_MOCKER_LATENCY_MS=1000 # 1 second latency for stress test # Tiered overhead thresholds (µs) — these cover the full proxy cost: # local network hop, JSON parsing/unparsing, plugins, and mocker jitter. # At ${RATE} RPS × ${OVERHEAD_MOCKER_LATENCY_MS}ms latency ≈ 1000 concurrent requests. MAX_OVERHEAD_MEAN_US=5000 # mean overhead threshold (5ms) MAX_OVERHEAD_P50_US=5000 # p50 overhead threshold (5ms) MAX_OVERHEAD_P90_US=10000 # p90 overhead threshold (10ms) MAX_OVERHEAD_P95_US=20000 # p95 overhead threshold (20ms) MAX_OVERHEAD_P99_US=100000 # p99 overhead threshold (100ms) # Results storage for summary table RESULTS_FILE="${WORK_DIR}/load-test-results.md" RESULTS_JSON="${WORK_DIR}/load-test-results.json" # Process stats monitoring STATS_PID="" STATS_FILE="${WORK_DIR}/bifrost-stats.csv" # Overhead-phase process stats (saved before bifrost restart) OVERHEAD_STATS_CPU_AVG="" OVERHEAD_STATS_CPU_PEAK="" OVERHEAD_STATS_RSS_AVG="" OVERHEAD_STATS_RSS_PEAK="" # Calibration results per bucket (Vegeta -> Mocker direct) CAL_MIN_NS=0 CAL_MEAN_NS=0 CAL_50_NS=0 CAL_90_NS=0 CAL_95_NS=0 CAL_99_NS=0 CAL_MAX_NS=0 # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color log_info() { echo -e "${BLUE}[INFO]${NC} $1" } log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } log_warn() { echo -e "${YELLOW}[WARN]${NC} $1" } log_error() { echo -e "${RED}[ERROR]${NC} $1" } # Cleanup function to kill background processes cleanup() { log_info "Cleaning up..." if [ -n "$STATS_PID" ] && kill -0 "$STATS_PID" 2>/dev/null; then kill "$STATS_PID" 2>/dev/null || true wait "$STATS_PID" 2>/dev/null || true fi if [ -n "$BIFROST_PID" ] && kill -0 "$BIFROST_PID" 2>/dev/null; then kill "$BIFROST_PID" 2>/dev/null || true wait "$BIFROST_PID" 2>/dev/null || true fi if [ -n "$MOCKER_PID" ] && kill -0 "$MOCKER_PID" 2>/dev/null; then kill "$MOCKER_PID" 2>/dev/null || true wait "$MOCKER_PID" 2>/dev/null || true fi # Clean up temporary files (keep results files for artifact upload) rm -f "${WORK_DIR}/config.json" "${WORK_DIR}/logs.db" "${WORK_DIR}/attack.bin" "${WORK_DIR}/calibration.bin" "${WORK_DIR}/stress.bin" "${WORK_DIR}/bifrost.log" "${WORK_DIR}/vegeta-target.json" "${WORK_DIR}/vegeta-target-calibration.json" "${WORK_DIR}/vegeta-target-stress.json" "${WORK_DIR}/vegeta-report.json" "${WORK_DIR}/bifrost-stats.csv" 2>/dev/null || true log_info "Cleanup complete" } trap cleanup EXIT # Check for required tools check_dependencies() { log_info "Checking dependencies..." if ! command -v go &> /dev/null; then log_error "Go is not installed. Please install Go 1.24.3 or later." exit 1 fi if ! command -v git &> /dev/null; then log_error "Git is not installed. Please install Git." exit 1 fi log_success "All dependencies found" } # Kill any process listening on a specific port (not processes with connections to it) kill_port() { local port=$1 local pids=$(lsof -ti "TCP:${port}" -sTCP:LISTEN 2>/dev/null) if [ -n "$pids" ]; then log_warn "Killing existing process(es) listening on port ${port}: ${pids}" echo "$pids" | xargs kill -9 2>/dev/null || true sleep 1 fi } # Kill processes on required ports before starting cleanup_ports() { log_info "Checking for processes on required ports..." kill_port ${MOCKER_PORT} kill_port ${BIFROST_PORT} } # Install Vegeta if not present install_vegeta() { if ! command -v vegeta &> /dev/null; then log_info "Installing Vegeta load testing tool..." go install github.com/tsenart/vegeta/v12@latest export PATH="$PATH:$(go env GOPATH)/bin" if ! command -v vegeta &> /dev/null; then log_error "Failed to install Vegeta" exit 1 fi log_success "Vegeta installed" else log_success "Vegeta already installed" fi } # Build bifrost-http if binary doesn't exist build_bifrost_http() { if [ -f "${REPO_ROOT}/tmp/bifrost-http" ]; then log_success "bifrost-http binary already exists at ${REPO_ROOT}/tmp/bifrost-http" return 0 fi log_info "Building bifrost-http..." cd "${TRANSPORTS_DIR}" if go build -o ${REPO_ROOT}/tmp/bifrost-http .; then log_success "bifrost-http built successfully" else log_error "Failed to build bifrost-http" exit 1 fi cd "${WORK_DIR}" } # Clone and setup mocker from bifrost-benchmarking setup_mocker() { if [ -d "${REPO_ROOT}/../bifrost-benchmarking" ]; then log_info "Updating bifrost-benchmarking repository..." cd "${REPO_ROOT}/../bifrost-benchmarking" git pull --quiet || true cd "${WORK_DIR}" else log_info "Cloning bifrost-benchmarking repository..." cd "${WORK_DIR}" git clone --depth 1 https://github.com/maximhq/bifrost-benchmarking.git fi log_success "Mocker setup complete" } # Build mocker binary (avoids go run overhead) build_mocker() { if [ -f "${REPO_ROOT}/tmp/mocker" ]; then log_success "mocker binary already exists at ${REPO_ROOT}/tmp/mocker" return 0 fi log_info "Building mocker..." cd "${MOCKER_DIR}" if go build -o "${REPO_ROOT}/tmp/mocker" .; then log_success "mocker built successfully" else log_error "Failed to build mocker" exit 1 fi cd "${WORK_DIR}" } # Create config.json for bifrost with mocker provider create_config() { log_info "Creating config.json..." cat > "${WORK_DIR}/config.json" << 'EOF' { "$schema": "https://www.getbifrost.ai/schema", "client": { "enable_logging": false, "initial_pool_size": 20000, "drop_excess_requests": false, "allow_direct_keys": false }, "config_store": { "enabled": false }, "logs_store": { "enabled": false }, "providers": { "openai": { "keys": [ { "name": "mocker-key", "value": "Bearer mocker-key", "weight": 1 } ], "network_config": { "base_url": "http://localhost:8000", "default_request_timeout_in_seconds": 30 }, "concurrency_and_buffer_size": { "concurrency": 20000, "buffer_size": 40000 }, "custom_provider_config": { "base_provider_type": "openai", "allowed_requests": { "list_models": false, "chat_completion": true, "chat_completion_stream": true } } } } } EOF log_success "config.json created" } # Start mocker with specified latency # Arguments: $1 = latency in ms start_mocker() { local latency_ms=${1:-0} log_info "Starting mocker server on port ${MOCKER_PORT} with ${latency_ms}ms latency..." "${REPO_ROOT}/tmp/mocker" -port ${MOCKER_PORT} -host 0.0.0.0 -latency ${latency_ms} & MOCKER_PID=$! # Wait for mocker to be ready local max_attempts=30 local attempt=0 while ! curl -s "http://localhost:${MOCKER_PORT}/v1/chat/completions" -X POST \ -H "Content-Type: application/json" \ -H "Authorization: Bearer mocker-key" \ -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"test"}]}' > /dev/null 2>&1; do sleep 1 attempt=$((attempt + 1)) if [ $attempt -ge $max_attempts ]; then log_error "Mocker failed to start within ${max_attempts} seconds" exit 1 fi done log_success "Mocker server started (PID: ${MOCKER_PID})" } # Stop mocker stop_mocker() { if [ -n "$MOCKER_PID" ] && kill -0 "$MOCKER_PID" 2>/dev/null; then log_info "Stopping mocker (PID: ${MOCKER_PID})..." kill "$MOCKER_PID" 2>/dev/null || true wait "$MOCKER_PID" 2>/dev/null || true MOCKER_PID="" sleep 1 fi } # Stop bifrost-http server stop_bifrost() { if [ -n "$BIFROST_PID" ] && kill -0 "$BIFROST_PID" 2>/dev/null; then log_info "Stopping bifrost (PID: ${BIFROST_PID})..." kill "$BIFROST_PID" 2>/dev/null || true wait "$BIFROST_PID" 2>/dev/null || true BIFROST_PID="" sleep 1 fi } # Start background process stats collection for bifrost # Samples CPU% and RSS every second, writes to CSV start_stats_monitor() { if [ -z "$BIFROST_PID" ] || ! kill -0 "$BIFROST_PID" 2>/dev/null; then log_warn "Cannot start stats monitor: bifrost not running" return fi echo "timestamp,cpu_pct,rss_mb" > "${STATS_FILE}" ( while kill -0 "$BIFROST_PID" 2>/dev/null; do # ps -o %cpu= -o rss= works on both macOS and Linux stats=$(ps -p "$BIFROST_PID" -o %cpu=,rss= 2>/dev/null) if [ -n "$stats" ]; then cpu=$(echo "$stats" | awk '{print $1}') rss_kb=$(echo "$stats" | awk '{print $2}') rss_mb=$(echo "scale=1; ${rss_kb} / 1024" | bc) echo "$(date +%s),${cpu},${rss_mb}" >> "${STATS_FILE}" fi sleep 1 done ) & STATS_PID=$! log_info "Stats monitor started (PID: ${STATS_PID})" } # Stop stats monitor and print summary stop_stats_monitor() { if [ -n "$STATS_PID" ] && kill -0 "$STATS_PID" 2>/dev/null; then kill "$STATS_PID" 2>/dev/null || true wait "$STATS_PID" 2>/dev/null || true STATS_PID="" fi if [ ! -f "${STATS_FILE}" ] || [ $(wc -l < "${STATS_FILE}") -le 1 ]; then log_warn "No process stats collected" return fi # Compute peak and average CPU/RSS from CSV (skip header) if command -v awk &> /dev/null; then local stats_summary=$(awk -F',' 'NR>1 { cpu_sum+=$2; rss_sum+=$3; n++; if($2>cpu_max) cpu_max=$2; if($3>rss_max) rss_max=$3; } END { if(n>0) printf "%.1f,%.1f,%.1f,%.1f,%d", cpu_sum/n, cpu_max, rss_sum/n, rss_max, n }' "${STATS_FILE}") STATS_CPU_AVG=$(echo "$stats_summary" | cut -d',' -f1) STATS_CPU_PEAK=$(echo "$stats_summary" | cut -d',' -f2) STATS_RSS_AVG=$(echo "$stats_summary" | cut -d',' -f3) STATS_RSS_PEAK=$(echo "$stats_summary" | cut -d',' -f4) local samples=$(echo "$stats_summary" | cut -d',' -f5) echo "" log_success "Bifrost process stats (single instance, ${samples} samples):" log_info " CPU: avg=${STATS_CPU_AVG}%, peak=${STATS_CPU_PEAK}%" log_info " RSS: avg=${STATS_RSS_AVG}MB, peak=${STATS_RSS_PEAK}MB" fi } # Start bifrost-http server start_bifrost() { log_info "Starting bifrost-http on port ${BIFROST_PORT}..." cd "${WORK_DIR}" local bifrost_log="${WORK_DIR}/bifrost.log" "${REPO_ROOT}/tmp/bifrost-http" -app-dir "${WORK_DIR}" -port "${BIFROST_PORT}" -host "0.0.0.0" -log-level "info" > "${bifrost_log}" 2>&1 & BIFROST_PID=$! # Wait for bifrost to be fully ready (look for "successfully started bifrost" message) local max_attempts=60 local attempt=0 while ! grep -q "successfully started bifrost" "${bifrost_log}" 2>/dev/null; do sleep 1 attempt=$((attempt + 1)) if [ $attempt -ge $max_attempts ]; then log_error "Bifrost failed to start within ${max_attempts} seconds" log_error "Bifrost log output:" cat "${bifrost_log}" 2>/dev/null || true exit 1 fi # Check if process is still running if ! kill -0 "$BIFROST_PID" 2>/dev/null; then log_error "Bifrost process died unexpectedly" log_error "Bifrost log output:" cat "${bifrost_log}" 2>/dev/null || true exit 1 fi done log_success "Bifrost-http started (PID: ${BIFROST_PID})" } # Extract latencies from a vegeta binary results file # Arguments: $1 = path to .bin file # Sets: EXTRACTED_MIN_NS, EXTRACTED_MEAN_NS, EXTRACTED_50_NS, etc. extract_latencies() { local bin_file=$1 local json_report_file="${WORK_DIR}/vegeta-report.json" vegeta report -type=json < "${bin_file}" > "${json_report_file}" if command -v jq &> /dev/null; then EXTRACTED_MIN_NS=$(jq '.latencies.min // 0' "${json_report_file}") EXTRACTED_MEAN_NS=$(jq '.latencies.mean // 0' "${json_report_file}") EXTRACTED_50_NS=$(jq '.latencies["50th"] // 0' "${json_report_file}") EXTRACTED_90_NS=$(jq '.latencies["90th"] // 0' "${json_report_file}") EXTRACTED_95_NS=$(jq '.latencies["95th"] // 0' "${json_report_file}") EXTRACTED_99_NS=$(jq '.latencies["99th"] // 0' "${json_report_file}") EXTRACTED_MAX_NS=$(jq '.latencies.max // 0' "${json_report_file}") EXTRACTED_SUCCESS=$(jq '.success // 0' "${json_report_file}") EXTRACTED_RATE=$(jq '.rate // 0' "${json_report_file}") EXTRACTED_THROUGHPUT=$(jq '.throughput // 0' "${json_report_file}") elif command -v python3 &> /dev/null; then EXTRACTED_MIN_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('min', 0))") EXTRACTED_MEAN_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('mean', 0))") EXTRACTED_50_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('50th', 0))") EXTRACTED_90_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('90th', 0))") EXTRACTED_95_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('95th', 0))") EXTRACTED_99_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('99th', 0))") EXTRACTED_MAX_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('max', 0))") EXTRACTED_SUCCESS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('success', 0))") EXTRACTED_RATE=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('rate', 0))") EXTRACTED_THROUGHPUT=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('throughput', 0))") else log_error "Neither jq nor python3 found. Cannot parse JSON results." return 1 fi rm -f "${json_report_file}" } # ============================================================ # Phase 1: Overhead measurement (mocker at ${OVERHEAD_MOCKER_LATENCY_MS}ms) # ============================================================ # Calibration: Vegeta -> Mocker direct (with latency) # Measures: Vegeta HTTP client + localhost network round-trip + mocker response generation run_calibration() { echo "" echo "╔═══════════════════════════════════════════════════════════╗" echo "║ Calibration: Vegeta -> Mocker (${OVERHEAD_MOCKER_LATENCY_MS}ms, direct) ║" echo "╚═══════════════════════════════════════════════════════════╝" echo "" log_info "Measuring Vegeta + network baseline (mocker at ${OVERHEAD_MOCKER_LATENCY_MS}ms latency)" log_info "Duration: ${OVERHEAD_DURATION}s at ${RATE} RPS, ~$(( RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000 )) concurrent" echo "" local target_file="${WORK_DIR}/vegeta-target-calibration.json" local payload='{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Hello, how are you?"}]}' cat > "${target_file}" << EOF {"method": "POST", "url": "http://localhost:${MOCKER_PORT}/v1/chat/completions", "header": {"Content-Type": ["application/json"], "Authorization": ["Bearer mocker-key"]}, "body": "$(echo -n "${payload}" | base64)"} EOF vegeta attack \ -format=json \ -targets="${target_file}" \ -rate="${RATE}" \ -duration="${OVERHEAD_DURATION}s" \ -timeout="$((OVERHEAD_MOCKER_LATENCY_MS / 1000 + 5))s" \ -workers=$((RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000)) \ -max-workers="${MAX_WORKERS}" > "${WORK_DIR}/calibration.bin" echo "" log_info "Calibration complete. Results:" vegeta report < "${WORK_DIR}/calibration.bin" extract_latencies "${WORK_DIR}/calibration.bin" log_info "Actual RPS: $(printf "%.0f" $EXTRACTED_RATE) (configured: ${RATE})" CAL_MIN_NS=$EXTRACTED_MIN_NS CAL_MEAN_NS=$EXTRACTED_MEAN_NS CAL_50_NS=$EXTRACTED_50_NS CAL_90_NS=$EXTRACTED_90_NS CAL_95_NS=$EXTRACTED_95_NS CAL_99_NS=$EXTRACTED_99_NS CAL_MAX_NS=$EXTRACTED_MAX_NS echo "" log_success "Calibration baseline (per bucket):" log_info " Min: $(echo "scale=2; $CAL_MIN_NS / 1000" | bc)µs" log_info " Mean: $(echo "scale=2; $CAL_MEAN_NS / 1000" | bc)µs" log_info " P50: $(echo "scale=2; $CAL_50_NS / 1000" | bc)µs" log_info " P90: $(echo "scale=2; $CAL_90_NS / 1000" | bc)µs" log_info " P95: $(echo "scale=2; $CAL_95_NS / 1000" | bc)µs" log_info " P99: $(echo "scale=2; $CAL_99_NS / 1000" | bc)µs" log_info " Max: $(echo "scale=2; $CAL_MAX_NS / 1000" | bc)µs" } # Overhead test: Vegeta -> Bifrost -> Mocker (with latency) # Same duration/rate as calibration so percentile distributions are comparable run_overhead_test() { echo "" echo "╔═══════════════════════════════════════════════════════════╗" echo "║ Overhead Test: Vegeta -> Bifrost -> Mocker (${OVERHEAD_MOCKER_LATENCY_MS}ms) ║" echo "╚═══════════════════════════════════════════════════════════╝" echo "" log_info "Measuring Bifrost overhead (single instance, mocker at ${OVERHEAD_MOCKER_LATENCY_MS}ms latency)" log_info "Duration: ${OVERHEAD_DURATION}s at ${RATE} RPS, ~$(( RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000 )) concurrent requests through Bifrost" log_info "Overhead consists of: vegetta overhead and mocker timeout jitter" echo "" local target_file="${WORK_DIR}/vegeta-target.json" local payload='{"model":"openai/gpt-4o-mini","messages":[{"role":"user","content":"Hello, how are you?"}]}' cat > "${target_file}" << EOF {"method": "POST", "url": "http://localhost:${BIFROST_PORT}/v1/chat/completions", "header": {"Content-Type": ["application/json"]}, "body": "$(echo -n "${payload}" | base64)"} EOF vegeta attack \ -format=json \ -targets="${target_file}" \ -rate="${RATE}" \ -duration="${OVERHEAD_DURATION}s" \ -timeout="$((OVERHEAD_MOCKER_LATENCY_MS / 1000 + 5))s" \ -workers=$((RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000)) \ -max-workers="${MAX_WORKERS}" > "${WORK_DIR}/attack.bin" echo "" log_info "Overhead test complete. Results:" vegeta report < "${WORK_DIR}/attack.bin" echo "" log_info "Latency histogram:" vegeta report -type=hist[0,100us,500us,1ms,5ms,10ms,50ms,100ms] < "${WORK_DIR}/attack.bin" || log_warn "Histogram generation failed" # Extract and compute overhead extract_latencies "${WORK_DIR}/attack.bin" log_info " Raw latencies (ns): min=$EXTRACTED_MIN_NS, mean=$EXTRACTED_MEAN_NS, p50=$EXTRACTED_50_NS, p99=$EXTRACTED_99_NS, max=$EXTRACTED_MAX_NS" log_info " Success rate: $EXTRACTED_SUCCESS" log_info " Actual RPS: $(printf "%.0f" $EXTRACTED_RATE) (configured: ${RATE})" if [ -z "$EXTRACTED_MIN_NS" ] || [ "$EXTRACTED_MIN_NS" = "0" ] || [ "$EXTRACTED_MIN_NS" = "null" ]; then log_error "Failed to extract latency values from vegeta report" exit 1 fi # Subtract calibration per bucket: overhead = through_bifrost - direct_to_mocker local us_min=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_MIN_NS - $CAL_MIN_NS) / 1000" | bc)) local us_mean=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_MEAN_NS - $CAL_MEAN_NS) / 1000" | bc)) local us_50=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_50_NS - $CAL_50_NS) / 1000" | bc)) local us_90=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_90_NS - $CAL_90_NS) / 1000" | bc)) local us_95=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_95_NS - $CAL_95_NS) / 1000" | bc)) local us_99=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_99_NS - $CAL_99_NS) / 1000" | bc)) local us_max=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_MAX_NS - $CAL_MAX_NS) / 1000" | bc)) local success_pct=$(printf "%.2f" $(echo "scale=4; $EXTRACTED_SUCCESS * 100" | bc)) echo "" log_success "Bifrost overhead (per bucket):" log_info " Min: ${us_min}µs" log_info " Mean: ${us_mean}µs" log_info " P50: ${us_50}µs" log_info " P90: ${us_90}µs" log_info " P95: ${us_95}µs" log_info " P99: ${us_99}µs" log_info " Max: ${us_max}µs" local actual_rps=$(printf "%.0f" $EXTRACTED_RATE) # Write results cat > "${RESULTS_FILE}" << EOF # Bifrost Load Test Results (single instance, ${actual_rps} RPS) ## Bifrost Processing Overhead | Metric | Actual RPS | Duration | Concurrent | Success Rate | Min | Mean | P50 | P90 | P95 | P99 | Max | |--------|-----------|----------|------------|--------------|-----|------|-----|-----|-----|-----|-----| | Overhead | ${actual_rps} | ${OVERHEAD_DURATION}s | ~$((RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000)) | ${success_pct}% | ${us_min}µs | ${us_mean}µs | ${us_50}µs | ${us_90}µs | ${us_95}µs | ${us_99}µs | ${us_max}µs | EOF echo '{"overhead": {"configured_rate": '"${RATE}"', "actual_rate": '"${actual_rps}"', "duration": '"${OVERHEAD_DURATION}"', "concurrent": '$((RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000))', "success_rate": '"${success_pct}"', "latency_us": {"min": '"${us_min}"', "mean": '"${us_mean}"', "p50": '"${us_50}"', "p90": '"${us_90}"', "p95": '"${us_95}"', "p99": '"${us_99}"', "max": '"${us_max}"'}}, "timestamp": "'"$(date -u +"%Y-%m-%dT%H:%M:%SZ")"'"}' > "${RESULTS_JSON}" # Check tiered thresholds (skip Min/Max — single-point extremes are too noisy) local failed=0 local labels=("Mean" "P50" "P90" "P95" "P99") local real_values=($EXTRACTED_MEAN_NS $EXTRACTED_50_NS $EXTRACTED_90_NS $EXTRACTED_95_NS $EXTRACTED_99_NS) local cal_values=($CAL_MEAN_NS $CAL_50_NS $CAL_90_NS $CAL_95_NS $CAL_99_NS) local thresholds=($MAX_OVERHEAD_MEAN_US $MAX_OVERHEAD_P50_US $MAX_OVERHEAD_P90_US $MAX_OVERHEAD_P95_US $MAX_OVERHEAD_P99_US) local extras=() for i in "${!real_values[@]}"; do local overhead_us=$(( (real_values[i] - cal_values[i]) / 1000 )) if [ "$overhead_us" -gt "${thresholds[i]}" ]; then extras+=("${labels[i]}:${overhead_us}:${thresholds[i]}") failed=1 fi done if [ "$failed" -eq 1 ]; then echo "" log_error "FAILED: Bifrost overhead exceeded tiered thresholds" log_error "Overhead consists of: vegetta overhead and mocker timeout jitter. In real-world the P99 overhead will be approximately 100 microseconds." echo "" echo -e "${RED}| Bucket | Overhead (µs) | Threshold (µs) |${NC}" echo -e "${RED}|--------|---------------|----------------|${NC}" for entry in "${extras[@]}"; do IFS=: read -r bucket overhead threshold <<< "$entry" echo -e "${RED}| ${bucket} | ${overhead}µs | ${threshold}µs |${NC}" done echo "" stop_stats_monitor exit 1 fi log_success "All overhead buckets within tiered thresholds (mean<${MAX_OVERHEAD_MEAN_US}µs, p50<${MAX_OVERHEAD_P50_US}µs, p90<${MAX_OVERHEAD_P90_US}µs, p95<${MAX_OVERHEAD_P95_US}µs, p99<${MAX_OVERHEAD_P99_US}µs)" } # ============================================================ # Phase 2: Stress test (mocker at 10s latency) # ============================================================ # Arguments: $1 = label (e.g. "Stress #1", "Stress #2") run_stress_test() { local label="${1:-Stress}" local bin_file="${WORK_DIR}/stress.bin" echo "" echo "╔═══════════════════════════════════════════════════════════╗" echo "║ ${label}: ${RATE} RPS with ${STRESS_MOCKER_LATENCY_MS}ms mocker latency ║" echo "╚═══════════════════════════════════════════════════════════╝" echo "" log_info "Testing single Bifrost instance under sustained concurrency" log_info "Duration: ${STRESS_DURATION}s at ${RATE} RPS (${STRESS_MOCKER_LATENCY_MS}ms mocker latency)" log_info "Expected concurrent requests: ~$(( RATE * STRESS_MOCKER_LATENCY_MS / 1000 )) (provider concurrency: 15,000, buffer: 20,000)" echo "" local target_file="${WORK_DIR}/vegeta-target-stress.json" local payload='{"model":"openai/gpt-4o-mini","messages":[{"role":"user","content":"Hello, how are you?"}]}' cat > "${target_file}" << EOF {"method": "POST", "url": "http://localhost:${BIFROST_PORT}/v1/chat/completions", "header": {"Content-Type": ["application/json"]}, "body": "$(echo -n "${payload}" | base64)"} EOF vegeta attack \ -format=json \ -targets="${target_file}" \ -rate="${RATE}" \ -duration="${STRESS_DURATION}s" \ -timeout="30s" \ -workers=$((RATE * STRESS_MOCKER_LATENCY_MS / 1000)) \ -max-workers="${MAX_WORKERS}" > "${bin_file}" echo "" log_info "${label} complete. Results:" vegeta report < "${bin_file}" echo "" log_info "Latency histogram:" vegeta report -type=hist[0,1ms,5ms,10ms,50ms,100ms,500ms,1s,5s,10s,15s] < "${bin_file}" || log_warn "Histogram generation failed" # Check success rate extract_latencies "${bin_file}" local success_pct=$(printf "%.2f" $(echo "scale=4; $EXTRACTED_SUCCESS * 100" | bc)) log_info "Actual RPS: $(printf "%.0f" $EXTRACTED_RATE) (configured: ${RATE})" local stress_actual_rps=$(printf "%.0f" $EXTRACTED_RATE) # Append stress test results to results file cat >> "${RESULTS_FILE}" << EOF ## ${label} (${STRESS_MOCKER_LATENCY_MS}ms mocker latency) | Metric | Actual RPS | Duration | Concurrent | Success Rate | Min | Mean | P50 | P90 | P95 | P99 | Max | |--------|-----------|----------|------------|--------------|-----|------|-----|-----|-----|-----|-----| | ${label} | ${stress_actual_rps} | ${STRESS_DURATION}s | ~$((RATE * STRESS_MOCKER_LATENCY_MS / 1000)) | ${success_pct}% | $(echo "scale=2; $EXTRACTED_MIN_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_MEAN_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_50_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_90_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_95_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_99_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_MAX_NS / 1000000" | bc)ms | EOF if [ "$success_pct" != "100.00" ]; then echo "" log_error "FAILED: ${label} success rate is ${success_pct}% (expected 100%)" exit 1 fi log_success "${label} passed: ${success_pct}% success rate" } # ============================================================ # Finalize # ============================================================ finalize_results() { # Append process stats if available local has_overhead_stats=false local has_stress_stats=false if [ -n "$OVERHEAD_STATS_CPU_PEAK" ]; then has_overhead_stats=true fi if [ -n "$STATS_CPU_PEAK" ]; then has_stress_stats=true fi if [ "$has_overhead_stats" = true ] || [ "$has_stress_stats" = true ]; then cat >> "${RESULTS_FILE}" << 'EOF' ## Bifrost Process Stats (single instance) | Phase | CPU Avg | CPU Peak | RSS Avg | RSS Peak | |-------|---------|----------|---------|----------| EOF if [ "$has_overhead_stats" = true ]; then echo "| Overhead | ${OVERHEAD_STATS_CPU_AVG}% | ${OVERHEAD_STATS_CPU_PEAK}% | ${OVERHEAD_STATS_RSS_AVG}MB | ${OVERHEAD_STATS_RSS_PEAK}MB |" >> "${RESULTS_FILE}" fi if [ "$has_stress_stats" = true ]; then echo "| Stress | ${STATS_CPU_AVG}% | ${STATS_CPU_PEAK}% | ${STATS_RSS_AVG}MB | ${STATS_RSS_PEAK}MB |" >> "${RESULTS_FILE}" fi fi cat >> "${RESULTS_FILE}" << EOF ## Method - **Single instance**: All tests run against one bifrost-http process at ${RATE} RPS - **Overhead measurement**: Mocker at ${OVERHEAD_MOCKER_LATENCY_MS}ms latency, calibration (Vegeta->Mocker) subtracted from test (Vegeta->Bifrost->Mocker) - **Stress test**: Mocker at ${STRESS_MOCKER_LATENCY_MS}ms latency, verifies 100% success under sustained concurrency ## Notes - Overhead values are in microseconds (µs), stress test values in milliseconds (ms) - Overhead ignores the mocker jitter, local network request queuing. In real-world the P99 overhead will be approximately 100 microseconds. - Tiered overhead thresholds: mean<${MAX_OVERHEAD_MEAN_US}µs, p50<${MAX_OVERHEAD_P50_US}µs, p90<${MAX_OVERHEAD_P90_US}µs, p95<${MAX_OVERHEAD_P95_US}µs, p99<${MAX_OVERHEAD_P99_US}µs - P50/P90/P95/P99 represent percentile latencies --- *Generated by Bifrost Load Test Script* EOF # Update JSON with stress results and process stats local tmp_json=$(mktemp) if command -v jq &> /dev/null; then jq --arg sr "$(printf "%.2f" $(echo "scale=4; $EXTRACTED_SUCCESS * 100" | bc))" \ --arg cpu_avg "${STATS_CPU_AVG:-0}" --arg cpu_peak "${STATS_CPU_PEAK:-0}" \ --arg rss_avg "${STATS_RSS_AVG:-0}" --arg rss_peak "${STATS_RSS_PEAK:-0}" \ --arg oh_cpu_avg "${OVERHEAD_STATS_CPU_AVG:-0}" --arg oh_cpu_peak "${OVERHEAD_STATS_CPU_PEAK:-0}" \ --arg oh_rss_avg "${OVERHEAD_STATS_RSS_AVG:-0}" --arg oh_rss_peak "${OVERHEAD_STATS_RSS_PEAK:-0}" \ '.stress = {"rate": '"${RATE}"', "duration": '"${STRESS_DURATION}"', "mocker_latency_ms": '"${STRESS_MOCKER_LATENCY_MS}"', "success_rate": ($sr | tonumber)} | .process_stats = {"overhead": {"cpu_avg_pct": ($oh_cpu_avg | tonumber), "cpu_peak_pct": ($oh_cpu_peak | tonumber), "rss_avg_mb": ($oh_rss_avg | tonumber), "rss_peak_mb": ($oh_rss_peak | tonumber)}, "stress": {"cpu_avg_pct": ($cpu_avg | tonumber), "cpu_peak_pct": ($cpu_peak | tonumber), "rss_avg_mb": ($rss_avg | tonumber), "rss_peak_mb": ($rss_peak | tonumber)}}' \ "${RESULTS_JSON}" > "${tmp_json}" mv "${tmp_json}" "${RESULTS_JSON}" fi log_success "Results saved to:" log_info " - Markdown: ${RESULTS_FILE}" log_info " - JSON: ${RESULTS_JSON}" } # Main execution main() { echo "" echo "╔═══════════════════════════════════════════════════════════╗" echo "║ Bifrost Load Test (single instance, ${RATE} RPS) ║" echo "╚═══════════════════════════════════════════════════════════╝" echo "" log_info "Configuration: single bifrost-http instance, ${RATE} RPS" log_info "Provider concurrency: 15,000 (buffer: 20,000)" log_info "Overhead thresholds: mean<${MAX_OVERHEAD_MEAN_US}µs, p50<${MAX_OVERHEAD_P50_US}µs, p90<${MAX_OVERHEAD_P90_US}µs, p95<${MAX_OVERHEAD_P95_US}µs, p99<${MAX_OVERHEAD_P99_US}µs" log_info "Phase 1: Overhead measurement — ${OVERHEAD_MOCKER_LATENCY_MS}ms mocker, ${OVERHEAD_DURATION}s, ~$(( RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000 )) concurrent requests" log_info "Phase 2: Stress test — ${STRESS_MOCKER_LATENCY_MS}ms mocker, ${STRESS_DURATION}s, ~$(( RATE * STRESS_MOCKER_LATENCY_MS / 1000 )) concurrent requests" check_dependencies install_vegeta build_bifrost_http setup_mocker build_mocker create_config cleanup_ports # ── Phase 1: Overhead measurement with ${OVERHEAD_MOCKER_LATENCY_MS}ms mocker ── start_mocker ${OVERHEAD_MOCKER_LATENCY_MS} start_bifrost start_stats_monitor run_calibration run_overhead_test # ── Collect process stats from overhead phase ── stop_stats_monitor OVERHEAD_STATS_CPU_AVG="${STATS_CPU_AVG}" OVERHEAD_STATS_CPU_PEAK="${STATS_CPU_PEAK}" OVERHEAD_STATS_RSS_AVG="${STATS_RSS_AVG}" OVERHEAD_STATS_RSS_PEAK="${STATS_RSS_PEAK}" # ── Phase 2: Stress test with high-latency mocker ── # Restart both mocker and bifrost to ensure a clean fasthttp connection pool. # Without restarting bifrost, stale TCP connections from the overhead phase # (which used a different mocker process) cause immediate 400s on POST requests # because fasthttp does not retry non-idempotent methods on broken connections. stop_mocker stop_bifrost start_mocker ${STRESS_MOCKER_LATENCY_MS} start_bifrost start_stats_monitor run_stress_test "Stress #1" echo "" log_info "Waiting 30s before second stress test (idle period)..." sleep 30 run_stress_test "Stress #2" # ── Collect process stats from stress phase ── stop_stats_monitor # ── Finalize ── finalize_results cleanup_ports echo "" # Print final summary echo "╔══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗" echo "║ FINAL RESULTS SUMMARY ║" echo "╚══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝" echo "" cat "${RESULTS_FILE}" echo "" log_success "All tests passed!" } main "$@"