first commit

This commit is contained in:
Beyhan Oğur
2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions

850
.github/workflows/scripts/load-test.sh vendored Executable file
View File

@@ -0,0 +1,850 @@
#!/bin/bash
# Load Test Script for Bifrost
# Runs a load test against bifrost-http with a mocker provider
# Usage: ./load-test.sh
#
# This script:
# 1. Builds bifrost-http and mocker locally
# 2. Creates a config.json with mocker provider (OpenAI-style)
# 3. Starts mocker with 0ms latency and bifrost-http
# 4. Runs a calibration (Vegeta -> Mocker direct) to measure Vegeta+network baseline
# 5. Runs the overhead test (Vegeta -> Bifrost -> Mocker) to measure total
# 6. Subtracts calibration from test to isolate Bifrost proxy overhead
# (includes local network hop, JSON parsing/unparsing, plugins, and mocker jitter)
# 7. Restarts mocker with 10s latency for a sustained concurrency stress test
# 8. Asserts overhead < tiered thresholds (per percentile) and stress test has 100% success rate
set -e
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
BIFROST_HTTP_DIR="${REPO_ROOT}/transports/bifrost-http"
TRANSPORTS_DIR="${REPO_ROOT}/transports"
WORK_DIR="${SCRIPT_DIR}"
MOCKER_DIR="${REPO_ROOT}/../bifrost-benchmarking/mocker"
BIFROST_PORT=8080
MOCKER_PORT=8000
RATE=1000
MAX_WORKERS=12000
OVERHEAD_DURATION=30 # overhead measurement duration (seconds)
STRESS_DURATION=30 # stress test duration (seconds)
OVERHEAD_MOCKER_LATENCY_MS=1000 # 1 second latency for overhead measurement
STRESS_MOCKER_LATENCY_MS=1000 # 1 second latency for stress test
# Tiered overhead thresholds (µs) — these cover the full proxy cost:
# local network hop, JSON parsing/unparsing, plugins, and mocker jitter.
# At ${RATE} RPS × ${OVERHEAD_MOCKER_LATENCY_MS}ms latency ≈ 1000 concurrent requests.
MAX_OVERHEAD_MEAN_US=5000 # mean overhead threshold (5ms)
MAX_OVERHEAD_P50_US=5000 # p50 overhead threshold (5ms)
MAX_OVERHEAD_P90_US=10000 # p90 overhead threshold (10ms)
MAX_OVERHEAD_P95_US=20000 # p95 overhead threshold (20ms)
MAX_OVERHEAD_P99_US=100000 # p99 overhead threshold (100ms)
# Results storage for summary table
RESULTS_FILE="${WORK_DIR}/load-test-results.md"
RESULTS_JSON="${WORK_DIR}/load-test-results.json"
# Process stats monitoring
STATS_PID=""
STATS_FILE="${WORK_DIR}/bifrost-stats.csv"
# Overhead-phase process stats (saved before bifrost restart)
OVERHEAD_STATS_CPU_AVG=""
OVERHEAD_STATS_CPU_PEAK=""
OVERHEAD_STATS_RSS_AVG=""
OVERHEAD_STATS_RSS_PEAK=""
# Calibration results per bucket (Vegeta -> Mocker direct)
CAL_MIN_NS=0
CAL_MEAN_NS=0
CAL_50_NS=0
CAL_90_NS=0
CAL_95_NS=0
CAL_99_NS=0
CAL_MAX_NS=0
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Cleanup function to kill background processes
cleanup() {
log_info "Cleaning up..."
if [ -n "$STATS_PID" ] && kill -0 "$STATS_PID" 2>/dev/null; then
kill "$STATS_PID" 2>/dev/null || true
wait "$STATS_PID" 2>/dev/null || true
fi
if [ -n "$BIFROST_PID" ] && kill -0 "$BIFROST_PID" 2>/dev/null; then
kill "$BIFROST_PID" 2>/dev/null || true
wait "$BIFROST_PID" 2>/dev/null || true
fi
if [ -n "$MOCKER_PID" ] && kill -0 "$MOCKER_PID" 2>/dev/null; then
kill "$MOCKER_PID" 2>/dev/null || true
wait "$MOCKER_PID" 2>/dev/null || true
fi
# Clean up temporary files (keep results files for artifact upload)
rm -f "${WORK_DIR}/config.json" "${WORK_DIR}/logs.db" "${WORK_DIR}/attack.bin" "${WORK_DIR}/calibration.bin" "${WORK_DIR}/stress.bin" "${WORK_DIR}/bifrost.log" "${WORK_DIR}/vegeta-target.json" "${WORK_DIR}/vegeta-target-calibration.json" "${WORK_DIR}/vegeta-target-stress.json" "${WORK_DIR}/vegeta-report.json" "${WORK_DIR}/bifrost-stats.csv" 2>/dev/null || true
log_info "Cleanup complete"
}
trap cleanup EXIT
# Check for required tools
check_dependencies() {
log_info "Checking dependencies..."
if ! command -v go &> /dev/null; then
log_error "Go is not installed. Please install Go 1.24.3 or later."
exit 1
fi
if ! command -v git &> /dev/null; then
log_error "Git is not installed. Please install Git."
exit 1
fi
log_success "All dependencies found"
}
# Kill any process listening on a specific port (not processes with connections to it)
kill_port() {
local port=$1
local pids=$(lsof -ti "TCP:${port}" -sTCP:LISTEN 2>/dev/null)
if [ -n "$pids" ]; then
log_warn "Killing existing process(es) listening on port ${port}: ${pids}"
echo "$pids" | xargs kill -9 2>/dev/null || true
sleep 1
fi
}
# Kill processes on required ports before starting
cleanup_ports() {
log_info "Checking for processes on required ports..."
kill_port ${MOCKER_PORT}
kill_port ${BIFROST_PORT}
}
# Install Vegeta if not present
install_vegeta() {
if ! command -v vegeta &> /dev/null; then
log_info "Installing Vegeta load testing tool..."
go install github.com/tsenart/vegeta/v12@latest
export PATH="$PATH:$(go env GOPATH)/bin"
if ! command -v vegeta &> /dev/null; then
log_error "Failed to install Vegeta"
exit 1
fi
log_success "Vegeta installed"
else
log_success "Vegeta already installed"
fi
}
# Build bifrost-http if binary doesn't exist
build_bifrost_http() {
if [ -f "${REPO_ROOT}/tmp/bifrost-http" ]; then
log_success "bifrost-http binary already exists at ${REPO_ROOT}/tmp/bifrost-http"
return 0
fi
log_info "Building bifrost-http..."
cd "${TRANSPORTS_DIR}"
if go build -o ${REPO_ROOT}/tmp/bifrost-http .; then
log_success "bifrost-http built successfully"
else
log_error "Failed to build bifrost-http"
exit 1
fi
cd "${WORK_DIR}"
}
# Clone and setup mocker from bifrost-benchmarking
setup_mocker() {
if [ -d "${REPO_ROOT}/../bifrost-benchmarking" ]; then
log_info "Updating bifrost-benchmarking repository..."
cd "${REPO_ROOT}/../bifrost-benchmarking"
git pull --quiet || true
cd "${WORK_DIR}"
else
log_info "Cloning bifrost-benchmarking repository..."
cd "${WORK_DIR}"
git clone --depth 1 https://github.com/maximhq/bifrost-benchmarking.git
fi
log_success "Mocker setup complete"
}
# Build mocker binary (avoids go run overhead)
build_mocker() {
if [ -f "${REPO_ROOT}/tmp/mocker" ]; then
log_success "mocker binary already exists at ${REPO_ROOT}/tmp/mocker"
return 0
fi
log_info "Building mocker..."
cd "${MOCKER_DIR}"
if go build -o "${REPO_ROOT}/tmp/mocker" .; then
log_success "mocker built successfully"
else
log_error "Failed to build mocker"
exit 1
fi
cd "${WORK_DIR}"
}
# Create config.json for bifrost with mocker provider
create_config() {
log_info "Creating config.json..."
cat > "${WORK_DIR}/config.json" << 'EOF'
{
"$schema": "https://www.getbifrost.ai/schema",
"client": {
"enable_logging": false,
"initial_pool_size": 20000,
"drop_excess_requests": false,
"allow_direct_keys": false
},
"config_store": {
"enabled": false
},
"logs_store": {
"enabled": false
},
"providers": {
"openai": {
"keys": [
{
"name": "mocker-key",
"value": "Bearer mocker-key",
"weight": 1
}
],
"network_config": {
"base_url": "http://localhost:8000",
"default_request_timeout_in_seconds": 30
},
"concurrency_and_buffer_size": {
"concurrency": 20000,
"buffer_size": 40000
},
"custom_provider_config": {
"base_provider_type": "openai",
"allowed_requests": {
"list_models": false,
"chat_completion": true,
"chat_completion_stream": true
}
}
}
}
}
EOF
log_success "config.json created"
}
# Start mocker with specified latency
# Arguments: $1 = latency in ms
start_mocker() {
local latency_ms=${1:-0}
log_info "Starting mocker server on port ${MOCKER_PORT} with ${latency_ms}ms latency..."
"${REPO_ROOT}/tmp/mocker" -port ${MOCKER_PORT} -host 0.0.0.0 -latency ${latency_ms} &
MOCKER_PID=$!
# Wait for mocker to be ready
local max_attempts=30
local attempt=0
while ! curl -s "http://localhost:${MOCKER_PORT}/v1/chat/completions" -X POST \
-H "Content-Type: application/json" \
-H "Authorization: Bearer mocker-key" \
-d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"test"}]}' > /dev/null 2>&1; do
sleep 1
attempt=$((attempt + 1))
if [ $attempt -ge $max_attempts ]; then
log_error "Mocker failed to start within ${max_attempts} seconds"
exit 1
fi
done
log_success "Mocker server started (PID: ${MOCKER_PID})"
}
# Stop mocker
stop_mocker() {
if [ -n "$MOCKER_PID" ] && kill -0 "$MOCKER_PID" 2>/dev/null; then
log_info "Stopping mocker (PID: ${MOCKER_PID})..."
kill "$MOCKER_PID" 2>/dev/null || true
wait "$MOCKER_PID" 2>/dev/null || true
MOCKER_PID=""
sleep 1
fi
}
# Stop bifrost-http server
stop_bifrost() {
if [ -n "$BIFROST_PID" ] && kill -0 "$BIFROST_PID" 2>/dev/null; then
log_info "Stopping bifrost (PID: ${BIFROST_PID})..."
kill "$BIFROST_PID" 2>/dev/null || true
wait "$BIFROST_PID" 2>/dev/null || true
BIFROST_PID=""
sleep 1
fi
}
# Start background process stats collection for bifrost
# Samples CPU% and RSS every second, writes to CSV
start_stats_monitor() {
if [ -z "$BIFROST_PID" ] || ! kill -0 "$BIFROST_PID" 2>/dev/null; then
log_warn "Cannot start stats monitor: bifrost not running"
return
fi
echo "timestamp,cpu_pct,rss_mb" > "${STATS_FILE}"
(
while kill -0 "$BIFROST_PID" 2>/dev/null; do
# ps -o %cpu= -o rss= works on both macOS and Linux
stats=$(ps -p "$BIFROST_PID" -o %cpu=,rss= 2>/dev/null)
if [ -n "$stats" ]; then
cpu=$(echo "$stats" | awk '{print $1}')
rss_kb=$(echo "$stats" | awk '{print $2}')
rss_mb=$(echo "scale=1; ${rss_kb} / 1024" | bc)
echo "$(date +%s),${cpu},${rss_mb}" >> "${STATS_FILE}"
fi
sleep 1
done
) &
STATS_PID=$!
log_info "Stats monitor started (PID: ${STATS_PID})"
}
# Stop stats monitor and print summary
stop_stats_monitor() {
if [ -n "$STATS_PID" ] && kill -0 "$STATS_PID" 2>/dev/null; then
kill "$STATS_PID" 2>/dev/null || true
wait "$STATS_PID" 2>/dev/null || true
STATS_PID=""
fi
if [ ! -f "${STATS_FILE}" ] || [ $(wc -l < "${STATS_FILE}") -le 1 ]; then
log_warn "No process stats collected"
return
fi
# Compute peak and average CPU/RSS from CSV (skip header)
if command -v awk &> /dev/null; then
local stats_summary=$(awk -F',' 'NR>1 {
cpu_sum+=$2; rss_sum+=$3; n++;
if($2>cpu_max) cpu_max=$2;
if($3>rss_max) rss_max=$3;
} END {
if(n>0) printf "%.1f,%.1f,%.1f,%.1f,%d", cpu_sum/n, cpu_max, rss_sum/n, rss_max, n
}' "${STATS_FILE}")
STATS_CPU_AVG=$(echo "$stats_summary" | cut -d',' -f1)
STATS_CPU_PEAK=$(echo "$stats_summary" | cut -d',' -f2)
STATS_RSS_AVG=$(echo "$stats_summary" | cut -d',' -f3)
STATS_RSS_PEAK=$(echo "$stats_summary" | cut -d',' -f4)
local samples=$(echo "$stats_summary" | cut -d',' -f5)
echo ""
log_success "Bifrost process stats (single instance, ${samples} samples):"
log_info " CPU: avg=${STATS_CPU_AVG}%, peak=${STATS_CPU_PEAK}%"
log_info " RSS: avg=${STATS_RSS_AVG}MB, peak=${STATS_RSS_PEAK}MB"
fi
}
# Start bifrost-http server
start_bifrost() {
log_info "Starting bifrost-http on port ${BIFROST_PORT}..."
cd "${WORK_DIR}"
local bifrost_log="${WORK_DIR}/bifrost.log"
"${REPO_ROOT}/tmp/bifrost-http" -app-dir "${WORK_DIR}" -port "${BIFROST_PORT}" -host "0.0.0.0" -log-level "info" > "${bifrost_log}" 2>&1 &
BIFROST_PID=$!
# Wait for bifrost to be fully ready (look for "successfully started bifrost" message)
local max_attempts=60
local attempt=0
while ! grep -q "successfully started bifrost" "${bifrost_log}" 2>/dev/null; do
sleep 1
attempt=$((attempt + 1))
if [ $attempt -ge $max_attempts ]; then
log_error "Bifrost failed to start within ${max_attempts} seconds"
log_error "Bifrost log output:"
cat "${bifrost_log}" 2>/dev/null || true
exit 1
fi
# Check if process is still running
if ! kill -0 "$BIFROST_PID" 2>/dev/null; then
log_error "Bifrost process died unexpectedly"
log_error "Bifrost log output:"
cat "${bifrost_log}" 2>/dev/null || true
exit 1
fi
done
log_success "Bifrost-http started (PID: ${BIFROST_PID})"
}
# Extract latencies from a vegeta binary results file
# Arguments: $1 = path to .bin file
# Sets: EXTRACTED_MIN_NS, EXTRACTED_MEAN_NS, EXTRACTED_50_NS, etc.
extract_latencies() {
local bin_file=$1
local json_report_file="${WORK_DIR}/vegeta-report.json"
vegeta report -type=json < "${bin_file}" > "${json_report_file}"
if command -v jq &> /dev/null; then
EXTRACTED_MIN_NS=$(jq '.latencies.min // 0' "${json_report_file}")
EXTRACTED_MEAN_NS=$(jq '.latencies.mean // 0' "${json_report_file}")
EXTRACTED_50_NS=$(jq '.latencies["50th"] // 0' "${json_report_file}")
EXTRACTED_90_NS=$(jq '.latencies["90th"] // 0' "${json_report_file}")
EXTRACTED_95_NS=$(jq '.latencies["95th"] // 0' "${json_report_file}")
EXTRACTED_99_NS=$(jq '.latencies["99th"] // 0' "${json_report_file}")
EXTRACTED_MAX_NS=$(jq '.latencies.max // 0' "${json_report_file}")
EXTRACTED_SUCCESS=$(jq '.success // 0' "${json_report_file}")
EXTRACTED_RATE=$(jq '.rate // 0' "${json_report_file}")
EXTRACTED_THROUGHPUT=$(jq '.throughput // 0' "${json_report_file}")
elif command -v python3 &> /dev/null; then
EXTRACTED_MIN_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('min', 0))")
EXTRACTED_MEAN_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('mean', 0))")
EXTRACTED_50_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('50th', 0))")
EXTRACTED_90_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('90th', 0))")
EXTRACTED_95_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('95th', 0))")
EXTRACTED_99_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('99th', 0))")
EXTRACTED_MAX_NS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('max', 0))")
EXTRACTED_SUCCESS=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('success', 0))")
EXTRACTED_RATE=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('rate', 0))")
EXTRACTED_THROUGHPUT=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('throughput', 0))")
else
log_error "Neither jq nor python3 found. Cannot parse JSON results."
return 1
fi
rm -f "${json_report_file}"
}
# ============================================================
# Phase 1: Overhead measurement (mocker at ${OVERHEAD_MOCKER_LATENCY_MS}ms)
# ============================================================
# Calibration: Vegeta -> Mocker direct (with latency)
# Measures: Vegeta HTTP client + localhost network round-trip + mocker response generation
run_calibration() {
echo ""
echo "╔═══════════════════════════════════════════════════════════╗"
echo "║ Calibration: Vegeta -> Mocker (${OVERHEAD_MOCKER_LATENCY_MS}ms, direct) ║"
echo "╚═══════════════════════════════════════════════════════════╝"
echo ""
log_info "Measuring Vegeta + network baseline (mocker at ${OVERHEAD_MOCKER_LATENCY_MS}ms latency)"
log_info "Duration: ${OVERHEAD_DURATION}s at ${RATE} RPS, ~$(( RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000 )) concurrent"
echo ""
local target_file="${WORK_DIR}/vegeta-target-calibration.json"
local payload='{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Hello, how are you?"}]}'
cat > "${target_file}" << EOF
{"method": "POST", "url": "http://localhost:${MOCKER_PORT}/v1/chat/completions", "header": {"Content-Type": ["application/json"], "Authorization": ["Bearer mocker-key"]}, "body": "$(echo -n "${payload}" | base64)"}
EOF
vegeta attack \
-format=json \
-targets="${target_file}" \
-rate="${RATE}" \
-duration="${OVERHEAD_DURATION}s" \
-timeout="$((OVERHEAD_MOCKER_LATENCY_MS / 1000 + 5))s" \
-workers=$((RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000)) \
-max-workers="${MAX_WORKERS}" > "${WORK_DIR}/calibration.bin"
echo ""
log_info "Calibration complete. Results:"
vegeta report < "${WORK_DIR}/calibration.bin"
extract_latencies "${WORK_DIR}/calibration.bin"
log_info "Actual RPS: $(printf "%.0f" $EXTRACTED_RATE) (configured: ${RATE})"
CAL_MIN_NS=$EXTRACTED_MIN_NS
CAL_MEAN_NS=$EXTRACTED_MEAN_NS
CAL_50_NS=$EXTRACTED_50_NS
CAL_90_NS=$EXTRACTED_90_NS
CAL_95_NS=$EXTRACTED_95_NS
CAL_99_NS=$EXTRACTED_99_NS
CAL_MAX_NS=$EXTRACTED_MAX_NS
echo ""
log_success "Calibration baseline (per bucket):"
log_info " Min: $(echo "scale=2; $CAL_MIN_NS / 1000" | bc)µs"
log_info " Mean: $(echo "scale=2; $CAL_MEAN_NS / 1000" | bc)µs"
log_info " P50: $(echo "scale=2; $CAL_50_NS / 1000" | bc)µs"
log_info " P90: $(echo "scale=2; $CAL_90_NS / 1000" | bc)µs"
log_info " P95: $(echo "scale=2; $CAL_95_NS / 1000" | bc)µs"
log_info " P99: $(echo "scale=2; $CAL_99_NS / 1000" | bc)µs"
log_info " Max: $(echo "scale=2; $CAL_MAX_NS / 1000" | bc)µs"
}
# Overhead test: Vegeta -> Bifrost -> Mocker (with latency)
# Same duration/rate as calibration so percentile distributions are comparable
run_overhead_test() {
echo ""
echo "╔═══════════════════════════════════════════════════════════╗"
echo "║ Overhead Test: Vegeta -> Bifrost -> Mocker (${OVERHEAD_MOCKER_LATENCY_MS}ms) ║"
echo "╚═══════════════════════════════════════════════════════════╝"
echo ""
log_info "Measuring Bifrost overhead (single instance, mocker at ${OVERHEAD_MOCKER_LATENCY_MS}ms latency)"
log_info "Duration: ${OVERHEAD_DURATION}s at ${RATE} RPS, ~$(( RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000 )) concurrent requests through Bifrost"
log_info "Overhead consists of: vegetta overhead and mocker timeout jitter"
echo ""
local target_file="${WORK_DIR}/vegeta-target.json"
local payload='{"model":"openai/gpt-4o-mini","messages":[{"role":"user","content":"Hello, how are you?"}]}'
cat > "${target_file}" << EOF
{"method": "POST", "url": "http://localhost:${BIFROST_PORT}/v1/chat/completions", "header": {"Content-Type": ["application/json"]}, "body": "$(echo -n "${payload}" | base64)"}
EOF
vegeta attack \
-format=json \
-targets="${target_file}" \
-rate="${RATE}" \
-duration="${OVERHEAD_DURATION}s" \
-timeout="$((OVERHEAD_MOCKER_LATENCY_MS / 1000 + 5))s" \
-workers=$((RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000)) \
-max-workers="${MAX_WORKERS}" > "${WORK_DIR}/attack.bin"
echo ""
log_info "Overhead test complete. Results:"
vegeta report < "${WORK_DIR}/attack.bin"
echo ""
log_info "Latency histogram:"
vegeta report -type=hist[0,100us,500us,1ms,5ms,10ms,50ms,100ms] < "${WORK_DIR}/attack.bin" || log_warn "Histogram generation failed"
# Extract and compute overhead
extract_latencies "${WORK_DIR}/attack.bin"
log_info " Raw latencies (ns): min=$EXTRACTED_MIN_NS, mean=$EXTRACTED_MEAN_NS, p50=$EXTRACTED_50_NS, p99=$EXTRACTED_99_NS, max=$EXTRACTED_MAX_NS"
log_info " Success rate: $EXTRACTED_SUCCESS"
log_info " Actual RPS: $(printf "%.0f" $EXTRACTED_RATE) (configured: ${RATE})"
if [ -z "$EXTRACTED_MIN_NS" ] || [ "$EXTRACTED_MIN_NS" = "0" ] || [ "$EXTRACTED_MIN_NS" = "null" ]; then
log_error "Failed to extract latency values from vegeta report"
exit 1
fi
# Subtract calibration per bucket: overhead = through_bifrost - direct_to_mocker
local us_min=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_MIN_NS - $CAL_MIN_NS) / 1000" | bc))
local us_mean=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_MEAN_NS - $CAL_MEAN_NS) / 1000" | bc))
local us_50=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_50_NS - $CAL_50_NS) / 1000" | bc))
local us_90=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_90_NS - $CAL_90_NS) / 1000" | bc))
local us_95=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_95_NS - $CAL_95_NS) / 1000" | bc))
local us_99=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_99_NS - $CAL_99_NS) / 1000" | bc))
local us_max=$(printf "%.2f" $(echo "scale=4; ($EXTRACTED_MAX_NS - $CAL_MAX_NS) / 1000" | bc))
local success_pct=$(printf "%.2f" $(echo "scale=4; $EXTRACTED_SUCCESS * 100" | bc))
echo ""
log_success "Bifrost overhead (per bucket):"
log_info " Min: ${us_min}µs"
log_info " Mean: ${us_mean}µs"
log_info " P50: ${us_50}µs"
log_info " P90: ${us_90}µs"
log_info " P95: ${us_95}µs"
log_info " P99: ${us_99}µs"
log_info " Max: ${us_max}µs"
local actual_rps=$(printf "%.0f" $EXTRACTED_RATE)
# Write results
cat > "${RESULTS_FILE}" << EOF
# Bifrost Load Test Results (single instance, ${actual_rps} RPS)
## Bifrost Processing Overhead
| Metric | Actual RPS | Duration | Concurrent | Success Rate | Min | Mean | P50 | P90 | P95 | P99 | Max |
|--------|-----------|----------|------------|--------------|-----|------|-----|-----|-----|-----|-----|
| Overhead | ${actual_rps} | ${OVERHEAD_DURATION}s | ~$((RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000)) | ${success_pct}% | ${us_min}µs | ${us_mean}µs | ${us_50}µs | ${us_90}µs | ${us_95}µs | ${us_99}µs | ${us_max}µs |
EOF
echo '{"overhead": {"configured_rate": '"${RATE}"', "actual_rate": '"${actual_rps}"', "duration": '"${OVERHEAD_DURATION}"', "concurrent": '$((RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000))', "success_rate": '"${success_pct}"', "latency_us": {"min": '"${us_min}"', "mean": '"${us_mean}"', "p50": '"${us_50}"', "p90": '"${us_90}"', "p95": '"${us_95}"', "p99": '"${us_99}"', "max": '"${us_max}"'}}, "timestamp": "'"$(date -u +"%Y-%m-%dT%H:%M:%SZ")"'"}' > "${RESULTS_JSON}"
# Check tiered thresholds (skip Min/Max — single-point extremes are too noisy)
local failed=0
local labels=("Mean" "P50" "P90" "P95" "P99")
local real_values=($EXTRACTED_MEAN_NS $EXTRACTED_50_NS $EXTRACTED_90_NS $EXTRACTED_95_NS $EXTRACTED_99_NS)
local cal_values=($CAL_MEAN_NS $CAL_50_NS $CAL_90_NS $CAL_95_NS $CAL_99_NS)
local thresholds=($MAX_OVERHEAD_MEAN_US $MAX_OVERHEAD_P50_US $MAX_OVERHEAD_P90_US $MAX_OVERHEAD_P95_US $MAX_OVERHEAD_P99_US)
local extras=()
for i in "${!real_values[@]}"; do
local overhead_us=$(( (real_values[i] - cal_values[i]) / 1000 ))
if [ "$overhead_us" -gt "${thresholds[i]}" ]; then
extras+=("${labels[i]}:${overhead_us}:${thresholds[i]}")
failed=1
fi
done
if [ "$failed" -eq 1 ]; then
echo ""
log_error "FAILED: Bifrost overhead exceeded tiered thresholds"
log_error "Overhead consists of: vegetta overhead and mocker timeout jitter. In real-world the P99 overhead will be approximately 100 microseconds."
echo ""
echo -e "${RED}| Bucket | Overhead (µs) | Threshold (µs) |${NC}"
echo -e "${RED}|--------|---------------|----------------|${NC}"
for entry in "${extras[@]}"; do
IFS=: read -r bucket overhead threshold <<< "$entry"
echo -e "${RED}| ${bucket} | ${overhead}µs | ${threshold}µs |${NC}"
done
echo ""
stop_stats_monitor
exit 1
fi
log_success "All overhead buckets within tiered thresholds (mean<${MAX_OVERHEAD_MEAN_US}µs, p50<${MAX_OVERHEAD_P50_US}µs, p90<${MAX_OVERHEAD_P90_US}µs, p95<${MAX_OVERHEAD_P95_US}µs, p99<${MAX_OVERHEAD_P99_US}µs)"
}
# ============================================================
# Phase 2: Stress test (mocker at 10s latency)
# ============================================================
# Arguments: $1 = label (e.g. "Stress #1", "Stress #2")
run_stress_test() {
local label="${1:-Stress}"
local bin_file="${WORK_DIR}/stress.bin"
echo ""
echo "╔═══════════════════════════════════════════════════════════╗"
echo "${label}: ${RATE} RPS with ${STRESS_MOCKER_LATENCY_MS}ms mocker latency ║"
echo "╚═══════════════════════════════════════════════════════════╝"
echo ""
log_info "Testing single Bifrost instance under sustained concurrency"
log_info "Duration: ${STRESS_DURATION}s at ${RATE} RPS (${STRESS_MOCKER_LATENCY_MS}ms mocker latency)"
log_info "Expected concurrent requests: ~$(( RATE * STRESS_MOCKER_LATENCY_MS / 1000 )) (provider concurrency: 15,000, buffer: 20,000)"
echo ""
local target_file="${WORK_DIR}/vegeta-target-stress.json"
local payload='{"model":"openai/gpt-4o-mini","messages":[{"role":"user","content":"Hello, how are you?"}]}'
cat > "${target_file}" << EOF
{"method": "POST", "url": "http://localhost:${BIFROST_PORT}/v1/chat/completions", "header": {"Content-Type": ["application/json"]}, "body": "$(echo -n "${payload}" | base64)"}
EOF
vegeta attack \
-format=json \
-targets="${target_file}" \
-rate="${RATE}" \
-duration="${STRESS_DURATION}s" \
-timeout="30s" \
-workers=$((RATE * STRESS_MOCKER_LATENCY_MS / 1000)) \
-max-workers="${MAX_WORKERS}" > "${bin_file}"
echo ""
log_info "${label} complete. Results:"
vegeta report < "${bin_file}"
echo ""
log_info "Latency histogram:"
vegeta report -type=hist[0,1ms,5ms,10ms,50ms,100ms,500ms,1s,5s,10s,15s] < "${bin_file}" || log_warn "Histogram generation failed"
# Check success rate
extract_latencies "${bin_file}"
local success_pct=$(printf "%.2f" $(echo "scale=4; $EXTRACTED_SUCCESS * 100" | bc))
log_info "Actual RPS: $(printf "%.0f" $EXTRACTED_RATE) (configured: ${RATE})"
local stress_actual_rps=$(printf "%.0f" $EXTRACTED_RATE)
# Append stress test results to results file
cat >> "${RESULTS_FILE}" << EOF
## ${label} (${STRESS_MOCKER_LATENCY_MS}ms mocker latency)
| Metric | Actual RPS | Duration | Concurrent | Success Rate | Min | Mean | P50 | P90 | P95 | P99 | Max |
|--------|-----------|----------|------------|--------------|-----|------|-----|-----|-----|-----|-----|
| ${label} | ${stress_actual_rps} | ${STRESS_DURATION}s | ~$((RATE * STRESS_MOCKER_LATENCY_MS / 1000)) | ${success_pct}% | $(echo "scale=2; $EXTRACTED_MIN_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_MEAN_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_50_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_90_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_95_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_99_NS / 1000000" | bc)ms | $(echo "scale=2; $EXTRACTED_MAX_NS / 1000000" | bc)ms |
EOF
if [ "$success_pct" != "100.00" ]; then
echo ""
log_error "FAILED: ${label} success rate is ${success_pct}% (expected 100%)"
exit 1
fi
log_success "${label} passed: ${success_pct}% success rate"
}
# ============================================================
# Finalize
# ============================================================
finalize_results() {
# Append process stats if available
local has_overhead_stats=false
local has_stress_stats=false
if [ -n "$OVERHEAD_STATS_CPU_PEAK" ]; then
has_overhead_stats=true
fi
if [ -n "$STATS_CPU_PEAK" ]; then
has_stress_stats=true
fi
if [ "$has_overhead_stats" = true ] || [ "$has_stress_stats" = true ]; then
cat >> "${RESULTS_FILE}" << 'EOF'
## Bifrost Process Stats (single instance)
| Phase | CPU Avg | CPU Peak | RSS Avg | RSS Peak |
|-------|---------|----------|---------|----------|
EOF
if [ "$has_overhead_stats" = true ]; then
echo "| Overhead | ${OVERHEAD_STATS_CPU_AVG}% | ${OVERHEAD_STATS_CPU_PEAK}% | ${OVERHEAD_STATS_RSS_AVG}MB | ${OVERHEAD_STATS_RSS_PEAK}MB |" >> "${RESULTS_FILE}"
fi
if [ "$has_stress_stats" = true ]; then
echo "| Stress | ${STATS_CPU_AVG}% | ${STATS_CPU_PEAK}% | ${STATS_RSS_AVG}MB | ${STATS_RSS_PEAK}MB |" >> "${RESULTS_FILE}"
fi
fi
cat >> "${RESULTS_FILE}" << EOF
## Method
- **Single instance**: All tests run against one bifrost-http process at ${RATE} RPS
- **Overhead measurement**: Mocker at ${OVERHEAD_MOCKER_LATENCY_MS}ms latency, calibration (Vegeta->Mocker) subtracted from test (Vegeta->Bifrost->Mocker)
- **Stress test**: Mocker at ${STRESS_MOCKER_LATENCY_MS}ms latency, verifies 100% success under sustained concurrency
## Notes
- Overhead values are in microseconds (µs), stress test values in milliseconds (ms)
- Overhead ignores the mocker jitter, local network request queuing. In real-world the P99 overhead will be approximately 100 microseconds.
- Tiered overhead thresholds: mean<${MAX_OVERHEAD_MEAN_US}µs, p50<${MAX_OVERHEAD_P50_US}µs, p90<${MAX_OVERHEAD_P90_US}µs, p95<${MAX_OVERHEAD_P95_US}µs, p99<${MAX_OVERHEAD_P99_US}µs
- P50/P90/P95/P99 represent percentile latencies
---
*Generated by Bifrost Load Test Script*
EOF
# Update JSON with stress results and process stats
local tmp_json=$(mktemp)
if command -v jq &> /dev/null; then
jq --arg sr "$(printf "%.2f" $(echo "scale=4; $EXTRACTED_SUCCESS * 100" | bc))" \
--arg cpu_avg "${STATS_CPU_AVG:-0}" --arg cpu_peak "${STATS_CPU_PEAK:-0}" \
--arg rss_avg "${STATS_RSS_AVG:-0}" --arg rss_peak "${STATS_RSS_PEAK:-0}" \
--arg oh_cpu_avg "${OVERHEAD_STATS_CPU_AVG:-0}" --arg oh_cpu_peak "${OVERHEAD_STATS_CPU_PEAK:-0}" \
--arg oh_rss_avg "${OVERHEAD_STATS_RSS_AVG:-0}" --arg oh_rss_peak "${OVERHEAD_STATS_RSS_PEAK:-0}" \
'.stress = {"rate": '"${RATE}"', "duration": '"${STRESS_DURATION}"', "mocker_latency_ms": '"${STRESS_MOCKER_LATENCY_MS}"', "success_rate": ($sr | tonumber)} | .process_stats = {"overhead": {"cpu_avg_pct": ($oh_cpu_avg | tonumber), "cpu_peak_pct": ($oh_cpu_peak | tonumber), "rss_avg_mb": ($oh_rss_avg | tonumber), "rss_peak_mb": ($oh_rss_peak | tonumber)}, "stress": {"cpu_avg_pct": ($cpu_avg | tonumber), "cpu_peak_pct": ($cpu_peak | tonumber), "rss_avg_mb": ($rss_avg | tonumber), "rss_peak_mb": ($rss_peak | tonumber)}}' \
"${RESULTS_JSON}" > "${tmp_json}"
mv "${tmp_json}" "${RESULTS_JSON}"
fi
log_success "Results saved to:"
log_info " - Markdown: ${RESULTS_FILE}"
log_info " - JSON: ${RESULTS_JSON}"
}
# Main execution
main() {
echo ""
echo "╔═══════════════════════════════════════════════════════════╗"
echo "║ Bifrost Load Test (single instance, ${RATE} RPS) ║"
echo "╚═══════════════════════════════════════════════════════════╝"
echo ""
log_info "Configuration: single bifrost-http instance, ${RATE} RPS"
log_info "Provider concurrency: 15,000 (buffer: 20,000)"
log_info "Overhead thresholds: mean<${MAX_OVERHEAD_MEAN_US}µs, p50<${MAX_OVERHEAD_P50_US}µs, p90<${MAX_OVERHEAD_P90_US}µs, p95<${MAX_OVERHEAD_P95_US}µs, p99<${MAX_OVERHEAD_P99_US}µs"
log_info "Phase 1: Overhead measurement — ${OVERHEAD_MOCKER_LATENCY_MS}ms mocker, ${OVERHEAD_DURATION}s, ~$(( RATE * OVERHEAD_MOCKER_LATENCY_MS / 1000 )) concurrent requests"
log_info "Phase 2: Stress test — ${STRESS_MOCKER_LATENCY_MS}ms mocker, ${STRESS_DURATION}s, ~$(( RATE * STRESS_MOCKER_LATENCY_MS / 1000 )) concurrent requests"
check_dependencies
install_vegeta
build_bifrost_http
setup_mocker
build_mocker
create_config
cleanup_ports
# ── Phase 1: Overhead measurement with ${OVERHEAD_MOCKER_LATENCY_MS}ms mocker ──
start_mocker ${OVERHEAD_MOCKER_LATENCY_MS}
start_bifrost
start_stats_monitor
run_calibration
run_overhead_test
# ── Collect process stats from overhead phase ──
stop_stats_monitor
OVERHEAD_STATS_CPU_AVG="${STATS_CPU_AVG}"
OVERHEAD_STATS_CPU_PEAK="${STATS_CPU_PEAK}"
OVERHEAD_STATS_RSS_AVG="${STATS_RSS_AVG}"
OVERHEAD_STATS_RSS_PEAK="${STATS_RSS_PEAK}"
# ── Phase 2: Stress test with high-latency mocker ──
# Restart both mocker and bifrost to ensure a clean fasthttp connection pool.
# Without restarting bifrost, stale TCP connections from the overhead phase
# (which used a different mocker process) cause immediate 400s on POST requests
# because fasthttp does not retry non-idempotent methods on broken connections.
stop_mocker
stop_bifrost
start_mocker ${STRESS_MOCKER_LATENCY_MS}
start_bifrost
start_stats_monitor
run_stress_test "Stress #1"
echo ""
log_info "Waiting 30s before second stress test (idle period)..."
sleep 30
run_stress_test "Stress #2"
# ── Collect process stats from stress phase ──
stop_stats_monitor
# ── Finalize ──
finalize_results
cleanup_ports
echo ""
# Print final summary
echo "╔══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗"
echo "║ FINAL RESULTS SUMMARY ║"
echo "╚══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝"
echo ""
cat "${RESULTS_FILE}"
echo ""
log_success "All tests passed!"
}
main "$@"