first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/docs/deployment-guides/config-json/client.mdx
+++ b/docs/deployment-guides/config-json/client.mdx
@@ -0,0 +1,276 @@
+---
+title: "Client Configuration"
+description: "Configure the Bifrost client in config.json — connection pool, logging, CORS, header filtering, compat shims, and MCP settings"
+icon: "gear"
+---
+
+The `client` block controls how Bifrost manages its internal worker pool, request logging, authentication enforcement, header policies, SDK compatibility shims, and MCP agent behaviour.
+
+---
+
+## Connection Pool
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `initial_pool_size` | integer | `300` | Pre-allocated worker goroutines per provider queue |
+| `drop_excess_requests` | boolean | `false` | Drop requests when queue is full instead of waiting (returns HTTP 429) |
+
+A larger pool reduces latency spikes under burst load at the cost of higher baseline memory. `500–1000` is a common starting point for production workloads with multiple providers.
+
+```json
+{
+  "client": {
+    "initial_pool_size": 1000,
+    "drop_excess_requests": true
+  }
+}
+```
+
+---
+
+## Request & Response Logging
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enable_logging` | boolean | — | Log all LLM requests and responses |
+| `disable_content_logging` | boolean | `false` | Strip message content from logs (keeps metadata only) |
+| `log_retention_days` | integer | `365` | Days to retain log entries in the store |
+| `logging_headers` | array of strings | `[]` | HTTP request headers to capture in log metadata |
+
+Set `disable_content_logging: true` for HIPAA / PCI compliance workloads where message content must not be persisted.
+
+```json
+{
+  "client": {
+    "enable_logging": true,
+    "disable_content_logging": true,
+    "log_retention_days": 90,
+    "logging_headers": ["x-request-id", "x-user-id"]
+  }
+}
+```
+
+---
+
+## Security & CORS
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `allowed_origins` | array | `["*"]` | CORS allowed origins (use URIs or `"*"`) |
+| `allow_direct_keys` | boolean | `false` | Allow callers to pass provider keys directly in requests |
+| `enforce_auth_on_inference` | boolean | `false` | Require auth (virtual key, API key, or user token) on `/v1/*` inference routes |
+| `max_request_body_size_mb` | integer | `100` | Maximum allowed request body size in MB |
+| `whitelisted_routes` | array of strings | `[]` | Routes that bypass auth middleware |
+| `allowed_headers` | array of strings | `[]` | Additional headers permitted for CORS and WebSocket |
+
+```json
+{
+  "client": {
+    "allowed_origins": [
+      "https://app.yourcompany.com",
+      "https://admin.yourcompany.com"
+    ],
+    "allow_direct_keys": false,
+    "enforce_auth_on_inference": true,
+    "max_request_body_size_mb": 50,
+    "whitelisted_routes": ["/health", "/metrics"]
+  }
+}
+```
+
+---
+
+## Header Filtering
+
+Controls which `x-bf-eh-*` extra headers are forwarded to upstream LLM providers.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `header_filter_config.allowlist` | array of strings | `[]` | Only these headers are forwarded (whitelist mode) |
+| `header_filter_config.denylist` | array of strings | `[]` | These headers are always blocked |
+| `required_headers` | array of strings | `[]` | Headers that must be present on every request (rejected with 400 if missing) |
+
+When both `allowlist` and `denylist` are empty, all `x-bf-eh-*` headers pass through. Specifying an `allowlist` enables strict whitelist mode — only listed headers are forwarded.
+
+```json
+{
+  "client": {
+    "header_filter_config": {
+      "allowlist": [
+        "x-bf-eh-anthropic-version",
+        "x-bf-eh-openai-beta"
+      ],
+      "denylist": []
+    },
+    "required_headers": ["x-request-id"]
+  }
+}
+```
+
+---
+
+## Compat Shims
+
+Compatibility flags that let Bifrost silently adapt request/response shapes for SDK integrations.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `compat.convert_text_to_chat` | boolean | `false` | Wrap legacy `/v1/completions` text requests as chat messages |
+| `compat.convert_chat_to_responses` | boolean | `false` | Translate chat completions to Responses API format |
+| `compat.should_drop_params` | boolean | `false` | Silently drop unsupported parameters instead of erroring |
+| `compat.should_convert_params` | boolean | `false` | Auto-convert parameter values across provider schemas |
+
+```json
+{
+  "client": {
+    "compat": {
+      "should_drop_params": true,
+      "convert_text_to_chat": true
+    }
+  }
+}
+```
+
+---
+
+## MCP Agent Settings
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `mcp_agent_depth` | integer | `10` | Maximum tool-call recursion depth for MCP agent mode |
+| `mcp_tool_execution_timeout` | integer | `30` | Timeout per MCP tool execution in seconds |
+| `mcp_code_mode_binding_level` | string | — | Code mode binding level: `"server"` or `"tool"` |
+| `mcp_tool_sync_interval` | integer | `10` | Global tool sync interval in minutes (`0` = disabled) |
+| `mcp_disable_auto_tool_inject` | boolean | `false` | When `true`, MCP tools are not automatically injected into requests |
+
+```json
+{
+  "client": {
+    "mcp_agent_depth": 15,
+    "mcp_tool_execution_timeout": 60,
+    "mcp_tool_sync_interval": 10
+  }
+}
+```
+
+---
+
+## Async Jobs
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `async_job_result_ttl` | integer | `3600` | TTL (seconds) for async job results |
+| `disable_db_pings_in_health` | boolean | `false` | Exclude database connectivity from `/health` endpoint checks |
+
+---
+
+## Prometheus Labels
+
+Add custom labels to every Prometheus metric emitted by Bifrost:
+
+```json
+{
+  "client": {
+    "prometheus_labels": ["environment=production", "region=us-east-1"]
+  }
+}
+```
+
+---
+
+## Authentication
+
+`governance.auth_config` protects the Bifrost dashboard and management API with username/password auth.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `is_enabled` | boolean | `false` | Enable username/password auth |
+| `admin_username` | string | — | Admin username |
+| `admin_password` | string | — | Admin password (use `env.` reference) |
+| `disable_auth_on_inference` | boolean | `false` | Skip auth check on `/v1/*` inference routes |
+
+```json
+{
+  "governance": {
+    "auth_config": {
+      "is_enabled": true,
+      "admin_username": "env.BIFROST_ADMIN_USERNAME",
+      "admin_password": "env.BIFROST_ADMIN_PASSWORD",
+      "disable_auth_on_inference": false
+    }
+  }
+}
+```
+
+<Note>
+A top-level `auth_config` is also accepted for backwards compatibility, but `governance.auth_config` is the preferred location.
+</Note>
+
+---
+
+## Encryption Key
+
+```json
+{
+  "encryption_key": "env.BIFROST_ENCRYPTION_KEY"
+}
+```
+
+| Notes |
+|-------|
+| Accepts any string; Bifrost derives a 32-byte AES-256 key using Argon2id |
+| Can also be set via the `BIFROST_ENCRYPTION_KEY` environment variable |
+| Once set and the database is populated, the key cannot be changed without clearing the database |
+| Omitting the key stores data in plain text — not recommended for production |
+
+---
+
+## Full Example
+
+```json
+{
+  "$schema": "https://www.getbifrost.ai/schema",
+  "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+  "governance": {
+    "auth_config": {
+      "is_enabled": true,
+      "admin_username": "env.BIFROST_ADMIN_USERNAME",
+      "admin_password": "env.BIFROST_ADMIN_PASSWORD",
+      "disable_auth_on_inference": false
+    }
+  },
+
+  "client": {
+    "initial_pool_size": 1000,
+    "drop_excess_requests": true,
+
+    "enable_logging": true,
+    "disable_content_logging": false,
+    "log_retention_days": 90,
+    "logging_headers": ["x-request-id", "x-user-id"],
+
+    "allowed_origins": ["https://app.yourcompany.com"],
+    "allow_direct_keys": false,
+    "enforce_auth_on_inference": true,
+    "max_request_body_size_mb": 100,
+
+    "header_filter_config": {
+      "allowlist": [],
+      "denylist": []
+    },
+    "required_headers": [],
+
+    "compat": {
+      "should_drop_params": false
+    },
+
+    "prometheus_labels": ["environment=production"],
+
+    "mcp_agent_depth": 10,
+    "mcp_tool_execution_timeout": 30,
+
+    "async_job_result_ttl": 3600
+  }
+}
+```
--- a/docs/deployment-guides/config-json/cluster.mdx
+++ b/docs/deployment-guides/config-json/cluster.mdx
@@ -0,0 +1,154 @@
+---
+title: "Cluster"
+description: "Configure enterprise cluster mode in config.json using peers or automatic discovery"
+icon: "circle-nodes"
+---
+
+<Warning>
+`cluster_config` is an enterprise capability. OSS builds ignore this section.
+
+</Warning>
+
+`cluster_config` enables multi-node Bifrost enterprise clustering with gossip-based membership and optional automatic node discovery.
+
+You can form a cluster in two ways:
+
+- Define static `peers` (`host:port`)
+- Enable `discovery` with one of: `kubernetes`, `dns`, `udp`, `consul`, `etcd`, `mdns`
+
+<Tip>
+At least one of `peers` or `discovery.enabled: true` must be configured when `cluster_config.enabled` is true.
+</Tip>
+
+---
+
+## Minimal Runnable Configs
+
+```json
+{
+  "cluster_config": {
+    "enabled": true,
+    "discovery": {
+      "enabled": true,
+      "type": "mdns",
+      "service_name": "bifrost-cluster"
+    }
+  }
+}
+```
+
+Use this for local testing. At startup, cluster init requires either:
+
+- non-empty `peers`, or
+- `discovery.enabled: true`
+
+If neither is set, cluster initialization fails.
+
+---
+
+## Static Peers
+
+```json
+{
+  "cluster_config": {
+    "enabled": true,
+    "region": "us-east-1",
+    "peers": [
+      "10.0.1.10:10101",
+      "10.0.1.11:10101"
+    ],
+    "gossip": {
+      "port": 10101,
+      "config": {
+        "timeout_seconds": 10,
+        "success_threshold": 3,
+        "failure_threshold": 3
+      }
+    }
+  }
+}
+```
+
+---
+
+## Discovery Example (etcd)
+
+```json
+{
+  "cluster_config": {
+    "enabled": true,
+    "region": "us-east-1",
+    "gossip": {
+      "port": 10101,
+      "config": {
+        "timeout_seconds": 10,
+        "success_threshold": 3,
+        "failure_threshold": 3
+      }
+    },
+    "discovery": {
+      "enabled": true,
+      "type": "etcd",
+      "service_name": "bifrost-cluster",
+      "etcd_endpoints": [
+        "http://etcd-1:2379",
+        "http://etcd-2:2379"
+      ],
+      "dial_timeout": "10s"
+    }
+  }
+}
+```
+
+---
+
+## Field Reference
+
+### `cluster_config`
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `enabled` | boolean | Enables cluster mode |
+| `region` | string | Region label for this node (defaults to `"unknown"` at runtime when omitted) |
+| `peers` | array of strings | Static peer addresses in `host:port` format |
+| `gossip` | object | Gossip/memberlist settings |
+| `discovery` | object | Automatic node discovery settings |
+
+### `cluster_config.gossip`
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `port` | integer | Gossip port for this node |
+| `config.timeout_seconds` | integer | Liveness timeout |
+| `config.success_threshold` | integer | Success count before healthy |
+| `config.failure_threshold` | integer | Failure count before unhealthy |
+
+### `cluster_config.discovery`
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `enabled` | boolean | Enables discovery process |
+| `type` | string | `kubernetes`, `dns`, `udp`, `consul`, `etcd`, `mdns` |
+| `service_name` | string | Service identifier (required for `consul`, `etcd`, `udp`, typically `mdns`; optional for `kubernetes` and `dns`) |
+| `bind_port` | integer | Port appended to discovered hosts if missing |
+| `dial_timeout` | string | Go duration string (`"5s"`, `"30s"`, `"1m"`) |
+| `allowed_address_space` | array of strings | CIDR filters for discovered nodes |
+| `k8s_namespace` | string | Kubernetes namespace for pod discovery |
+| `k8s_label_selector` | string | Kubernetes label selector |
+| `dns_names` | array of strings | DNS names to resolve |
+| `udp_broadcast_port` | integer | UDP broadcast port (required for `udp`) |
+| `consul_address` | string | Consul address |
+| `etcd_endpoints` | array of strings | etcd endpoint URLs |
+| `mdns_service` | string | Optional mDNS service type override (e.g. `"_bifrost-cluster._tcp"`) |
+
+<Note>
+For `discovery.type: "mdns"`, `service_name` is sufficient for most setups. When `mdns_service` is omitted, Bifrost derives the mDNS service type as `"_<service_name>._tcp"`. If you set `mdns_service`, it **overrides** the derived value and is used for both mDNS registration and browsing.
+</Note>
+
+<Warning>
+For `discovery.type: "udp"`, configure both `udp_broadcast_port` and `allowed_address_space`.
+</Warning>
+
+---
+
+For discovery-method deep dives and deployment patterns, see [Enterprise Clustering](/enterprise/clustering).
--- a/docs/deployment-guides/config-json/governance.mdx
+++ b/docs/deployment-guides/config-json/governance.mdx
@@ -0,0 +1,333 @@
+---
+title: "Governance"
+description: "Seed virtual keys, budgets, rate limits, routing rules, and admin auth in config.json"
+icon: "shield-check"
+---
+
+The `governance` block lets you seed all governance resources directly in `config.json`. On startup, Bifrost loads these into the configuration store. This is the recommended approach for GitOps workflows where governance state is managed as code.
+
+<Note>
+**Governance enforcement is always active** in OSS — you do not need a plugin entry to enable it. To require a virtual key on every inference request, set `client.enforce_auth_on_inference: true`. This is the global default, but a more specific inference-auth flag such as `governance.auth_config.disable_auth_on_inference` overrides it; if no specific override is set, `client.enforce_auth_on_inference` applies.
+</Note>
+
+---
+
+## Admin Authentication
+
+Protect the Bifrost dashboard and management API with username/password auth:
+
+```json
+{
+  "governance": {
+    "auth_config": {
+      "is_enabled": true,
+      "admin_username": "env.BIFROST_ADMIN_USERNAME",
+      "admin_password": "env.BIFROST_ADMIN_PASSWORD",
+      "disable_auth_on_inference": false
+    }
+  }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `is_enabled` | `false` | Enable admin username/password auth |
+| `admin_username` | — | Admin username (supports `env.` prefix) |
+| `admin_password` | — | Admin password (supports `env.` prefix) |
+| `disable_auth_on_inference` | `false` | Skip auth check on `/v1/*` inference routes |
+
+---
+
+## Virtual Keys
+
+Virtual keys are issued to clients and act as scoped API tokens. Each key specifies which providers, models, and API keys the bearer is allowed to use.
+
+```json
+{
+  "governance": {
+    "virtual_keys": [
+      {
+        "id": "vk-team-platform",
+        "name": "platform-team",
+        "value": "env.VK_PLATFORM_TEAM",
+        "is_active": true,
+        "provider_configs": [
+          {
+            "provider": "openai",
+            "allowed_models": ["gpt-4o", "gpt-4o-mini"],
+            "key_ids": ["*"],
+            "weight": 1
+          },
+          {
+            "provider": "anthropic",
+            "allowed_models": ["*"],
+            "key_ids": ["*"],
+            "weight": 1
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+
+### Virtual Key Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique virtual key ID (referenced by budgets / rate limits) |
+| `name` | Yes | Human-readable name |
+| `value` | No | The key token sent by clients (use `env.` prefix). Auto-generated if omitted |
+| `is_active` | No | Default `true`. Set `false` to disable without deleting |
+| `team_id` | No | Associate with a team (mutually exclusive with `customer_id`) |
+| `customer_id` | No | Associate with a customer |
+| `rate_limit_id` | No | Attach a rate limit |
+| `calendar_aligned` | No | Snap budget resets to day/week/month/year boundaries |
+| `provider_configs` | No | Allowed provider/model/key combinations (empty = deny all) |
+
+### Provider Config Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `provider` | Yes | Provider name (e.g. `"openai"`) |
+| `allowed_models` | No | Model allow-list. `["*"]` = all models; `[]` = deny all |
+| `key_ids` | No | Provider key names allowed for this VK. `["*"]` = all keys; `[]` = deny all. Use key `name` values (not UUIDs) in `config.json` |
+| `weight` | No | Load-balancing weight when multiple provider configs are present |
+| `rate_limit_id` | No | Attach a per-provider-config rate limit |
+
+---
+
+## Budgets
+
+Budgets cap cumulative spend (in USD) for a virtual key or provider config over a rolling window:
+
+```json
+{
+  "governance": {
+    "budgets": [
+      {
+        "id": "budget-platform-monthly",
+        "max_limit": 500.00,
+        "reset_duration": "1M",
+        "virtual_key_id": "vk-team-platform"
+      }
+    ]
+  }
+}
+```
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique budget ID |
+| `max_limit` | Yes | Maximum spend in USD |
+| `reset_duration` | Yes | Window length: `"30s"`, `"5m"`, `"1h"`, `"1d"`, `"1w"`, `"1M"`, `"1Y"` |
+| `virtual_key_id` | No | Attach to a virtual key (mutually exclusive with `provider_config_id`) |
+| `provider_config_id` | No | Attach to a provider config ID |
+
+---
+
+## Rate Limits
+
+Rate limits cap requests or tokens over a rolling window:
+
+```json
+{
+  "governance": {
+    "rate_limits": [
+      {
+        "id": "rl-platform-hourly",
+        "request_max_limit": 1000,
+        "request_reset_duration": "1h",
+        "token_max_limit": 1000000,
+        "token_reset_duration": "1h"
+      }
+    ]
+  }
+}
+```
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique rate limit ID |
+| `request_max_limit` | No | Maximum requests in window |
+| `request_reset_duration` | No | Window for request counter |
+| `token_max_limit` | No | Maximum tokens (input + output) in window |
+| `token_reset_duration` | No | Window for token counter |
+
+Attach a rate limit to a virtual key via `virtual_keys[].rate_limit_id`, or to a provider config via `virtual_keys[].provider_configs[].rate_limit_id`.
+
+---
+
+## Routing Rules
+
+Routing rules dynamically select the provider and model for each request based on a [CEL](https://cel.dev) expression. They are evaluated in priority order before the request is dispatched.
+
+```json
+{
+  "governance": {
+    "routing_rules": [
+      {
+        "id": "route-gpt4-to-azure",
+        "name": "Redirect GPT-4o to Azure",
+        "cel_expression": "request.model == 'gpt-4o'",
+        "targets": [
+          { "provider": "azure", "model": "gpt-4o", "weight": 1.0 }
+        ]
+      },
+      {
+        "id": "route-cost-split",
+        "name": "Split traffic 70/30 between providers",
+        "cel_expression": "true",
+        "targets": [
+          { "provider": "openai",    "weight": 0.7 },
+          { "provider": "anthropic", "weight": 0.3 }
+        ]
+      }
+    ]
+  }
+}
+```
+
+### Rule Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique rule ID |
+| `name` | Yes | Human-readable name |
+| `cel_expression` | No | CEL expression. `"true"` matches every request |
+| `targets` | Yes | Weighted target list (weights must sum to `1.0`) |
+| `enabled` | No | Default `true` |
+| `priority` | No | Evaluation order within scope — lower numbers run first |
+| `scope` | No | `"global"` (default), `"team"`, `"customer"`, `"virtual_key"` |
+| `scope_id` | Conditional | Required when `scope` is not `"global"` |
+| `chain_rule` | No | If `true`, re-evaluates the chain after this rule matches |
+| `fallbacks` | No | Ordered fallback provider list if primary target fails |
+
+### Target Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `weight` | Yes | Fraction of traffic (all weights in a rule must sum to `1.0`) |
+| `provider` | No | Target provider. Omit to keep the incoming request's provider |
+| `model` | No | Target model. Omit to keep the incoming request's model |
+| `key_id` | No | Pin a specific API key by name |
+
+---
+
+## Customers & Teams
+
+Define organizational entities and attach budgets or rate limits to them:
+
+```json
+{
+  "governance": {
+    "customers": [
+      {
+        "id": "customer-acme",
+        "name": "Acme Corp",
+        "budget_id": "budget-acme-monthly",
+        "rate_limit_id": "rl-acme-hourly"
+      }
+    ],
+    "teams": [
+      {
+        "id": "team-ml",
+        "name": "ML Team",
+        "customer_id": "customer-acme",
+        "budget_id": "budget-team-ml"
+      }
+    ]
+  }
+}
+```
+
+---
+
+## Full Governance Example
+
+```json
+{
+  "$schema": "https://www.getbifrost.ai/schema",
+  "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+  "client": {
+    "enforce_auth_on_inference": true
+  },
+
+  "governance": {
+    "auth_config": {
+      "is_enabled": true,
+      "admin_username": "env.BIFROST_ADMIN_USERNAME",
+      "admin_password": "env.BIFROST_ADMIN_PASSWORD"
+    },
+
+    "budgets": [
+      {
+        "id": "budget-platform",
+        "max_limit": 1000.00,
+        "reset_duration": "1M",
+        "virtual_key_id": "vk-platform"
+      }
+    ],
+
+    "rate_limits": [
+      {
+        "id": "rl-platform",
+        "request_max_limit": 5000,
+        "request_reset_duration": "1h",
+        "token_max_limit": 5000000,
+        "token_reset_duration": "1h"
+      }
+    ],
+
+    "virtual_keys": [
+      {
+        "id": "vk-platform",
+        "name": "platform-key",
+        "value": "env.VK_PLATFORM",
+        "is_active": true,
+        "rate_limit_id": "rl-platform",
+        "provider_configs": [
+          {
+            "provider": "openai",
+            "allowed_models": ["*"],
+            "key_ids": ["*"],
+            "weight": 1
+          }
+        ]
+      }
+    ],
+
+    "routing_rules": [
+      {
+        "id": "fallback-to-anthropic",
+        "name": "Fallback on error",
+        "cel_expression": "true",
+        "targets": [{ "provider": "openai", "weight": 1.0 }],
+        "fallbacks": ["anthropic"]
+      }
+    ]
+  },
+
+  "providers": {
+    "openai": {
+      "keys": [{ "name": "openai-primary", "value": "env.OPENAI_API_KEY", "models": ["*"], "weight": 1.0 }]
+    },
+    "anthropic": {
+      "keys": [{ "name": "anthropic-primary", "value": "env.ANTHROPIC_API_KEY", "models": ["*"], "weight": 1.0 }]
+    }
+  },
+
+  "config_store": {
+    "enabled": true,
+    "type": "postgres",
+    "config": {
+      "host": "env.PG_HOST",
+      "port": "5432",
+      "user": "env.PG_USER",
+      "password": "env.PG_PASSWORD",
+      "db_name": "bifrost"
+    }
+  }
+}
+```
--- a/docs/deployment-guides/config-json/guardrails.mdx
+++ b/docs/deployment-guides/config-json/guardrails.mdx
@@ -0,0 +1,291 @@
+---
+title: "Guardrails"
+description: "Configure content moderation and policy enforcement in config.json using guardrails_config"
+icon: "shield-halved"
+---
+
+<Note>
+Guardrails are an **enterprise-only** feature and require the enterprise Bifrost image.
+</Note>
+
+Guardrails are configured under `guardrails_config` in `config.json`. The configuration has two parts:
+
+- **`guardrail_providers`** — the backend that performs the check. Rules link to providers by `id`.
+- **`guardrail_rules`** — CEL expressions that control when and where providers are invoked.
+
+---
+
+## Providers
+
+<Tabs>
+<Tab title="Regex">
+
+Runs entirely in-process with no external dependency. Patterns use RE2 syntax. Supports optional per-pattern flags: `i` (case-insensitive), `m` (multiline), `s` (dot-all).
+
+```json
+{
+  "guardrails_config": {
+    "guardrail_providers": [
+      {
+        "id": 1,
+        "provider_name": "regex",
+        "policy_name": "block-secrets",
+        "enabled": true,
+        "timeout": 5,
+        "config": {
+          "patterns": [
+            { "pattern": "sk-[A-Za-z0-9]{20,}", "description": "OpenAI API key" },
+            { "pattern": "AKIA[0-9A-Z]{16}", "description": "AWS access key" },
+            { "pattern": "gh[ps]_[A-Za-z0-9]{36}", "description": "GitHub token", "flags": "i" }
+          ],
+          "mode": "block"
+        }
+      }
+    ]
+  }
+}
+```
+
+</Tab>
+<Tab title="AWS Bedrock">
+
+```json
+{
+  "guardrails_config": {
+    "guardrail_providers": [
+      {
+        "id": 2,
+        "provider_name": "bedrock",
+        "policy_name": "content-filter",
+        "enabled": true,
+        "timeout": 15,
+        "config": {
+          "guardrail_arn": "arn:aws:bedrock:us-east-1::guardrail/abc123",
+          "guardrail_version": "DRAFT",
+          "region": "us-east-1",
+          "access_key": "env.AWS_ACCESS_KEY_ID",
+          "secret_key": "env.AWS_SECRET_ACCESS_KEY"
+        }
+      }
+    ]
+  }
+}
+```
+
+</Tab>
+<Tab title="Azure Content Safety">
+
+```json
+{
+  "guardrails_config": {
+    "guardrail_providers": [
+      {
+        "id": 3,
+        "provider_name": "azure",
+        "policy_name": "azure-content-safety",
+        "enabled": true,
+        "timeout": 10,
+        "config": {
+          "endpoint": "https://your-resource.cognitiveservices.azure.com",
+          "api_key": "env.AZURE_CONTENT_SAFETY_KEY",
+          "analyze_enabled": true,
+          "analyze_severity_threshold": "medium",
+          "jailbreak_shield_enabled": true,
+          "indirect_attack_shield_enabled": true,
+          "copyright_enabled": false,
+          "text_blocklist_enabled": false,
+          "blocklist_names": []
+        }
+      }
+    ]
+  }
+}
+```
+
+`analyze_severity_threshold` accepts `"low"`, `"medium"`, or `"high"`.
+
+</Tab>
+<Tab title="Gray Swan">
+
+```json
+{
+  "guardrails_config": {
+    "guardrail_providers": [
+      {
+        "id": 4,
+        "provider_name": "grayswan",
+        "policy_name": "grayswan-jailbreak",
+        "enabled": true,
+        "timeout": 15,
+        "config": {
+          "api_key": "env.GRAYSWAN_API_KEY",
+          "violation_threshold": 0.7,
+          "reasoning_mode": "standard",
+          "policy_id": "",
+          "policy_ids": [],
+          "rules": {}
+        }
+      }
+    ]
+  }
+}
+```
+
+</Tab>
+</Tabs>
+
+### Provider Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique integer ID — referenced by rules via `provider_config_ids` |
+| `provider_name` | Yes | Backend: `"regex"`, `"bedrock"`, `"azure"`, `"grayswan"` |
+| `policy_name` | Yes | Human-readable policy label |
+| `enabled` | Yes | `true` to activate |
+| `timeout` | No | Execution timeout in seconds |
+| `config` | No | Provider-specific configuration object |
+
+---
+
+## Rules
+
+Rules are CEL expressions that fire when their condition matches. Available CEL variables:
+
+| Variable | Type | Description |
+|----------|------|-------------|
+| `model` | `string` | Model name from the request |
+| `provider` | `string` | Provider name (e.g. `"openai"`) |
+| `headers` | `map<string,string>` | HTTP request headers |
+| `params` | `map<string,string>` | Query parameters |
+| `customer` | `string` | Customer ID |
+| `team` | `string` | Team ID |
+| `user` | `string` | User ID |
+
+```json
+{
+  "guardrails_config": {
+    "guardrail_rules": [
+      {
+        "id": 101,
+        "name": "block-secrets-input",
+        "description": "Block prompts containing credentials",
+        "enabled": true,
+        "cel_expression": "true",
+        "apply_to": "input",
+        "sampling_rate": 100,
+        "timeout": 10,
+        "provider_config_ids": [1]
+      },
+      {
+        "id": 102,
+        "name": "content-safety-gpt4o-output",
+        "enabled": true,
+        "cel_expression": "model == 'gpt-4o'",
+        "apply_to": "output",
+        "sampling_rate": 100,
+        "timeout": 15,
+        "provider_config_ids": [3]
+      },
+      {
+        "id": 103,
+        "name": "grayswan-openai-partial",
+        "enabled": true,
+        "cel_expression": "provider == 'openai'",
+        "apply_to": "input",
+        "sampling_rate": 50,
+        "timeout": 20,
+        "provider_config_ids": [4]
+      }
+    ]
+  }
+}
+```
+
+### Rule Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique integer ID |
+| `name` | Yes | Human-readable name |
+| `description` | No | Optional description |
+| `enabled` | Yes | `true` to activate |
+| `cel_expression` | Yes | CEL boolean expression. `"true"` matches every request |
+| `apply_to` | Yes | `"input"`, `"output"`, or `"both"` |
+| `sampling_rate` | No | `0`–`100`; percentage of requests to evaluate (default: `100`) |
+| `timeout` | No | Rule timeout in seconds |
+| `provider_config_ids` | No | `id` values of providers to invoke when this rule matches. Multiple providers run in parallel |
+
+---
+
+## Full Example
+
+```json
+{
+  "$schema": "https://www.getbifrost.ai/schema",
+  "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+  "providers": {
+    "openai": {
+      "keys": [{ "name": "primary", "value": "env.OPENAI_API_KEY", "models": ["*"], "weight": 1.0 }]
+    }
+  },
+
+  "guardrails_config": {
+    "guardrail_providers": [
+      {
+        "id": 1,
+        "provider_name": "regex",
+        "policy_name": "block-secrets",
+        "enabled": true,
+        "timeout": 5,
+        "config": {
+          "patterns": [
+            { "pattern": "sk-[A-Za-z0-9]{20,}", "description": "OpenAI API key" },
+            { "pattern": "AKIA[0-9A-Z]{16}", "description": "AWS access key" }
+          ],
+          "mode": "block"
+        }
+      },
+      {
+        "id": 2,
+        "provider_name": "azure",
+        "policy_name": "content-safety",
+        "enabled": true,
+        "timeout": 10,
+        "config": {
+          "endpoint": "https://your-resource.cognitiveservices.azure.com",
+          "api_key": "env.AZURE_CONTENT_SAFETY_KEY",
+          "analyze_enabled": true,
+          "analyze_severity_threshold": "medium",
+          "jailbreak_shield_enabled": true,
+          "indirect_attack_shield_enabled": false
+        }
+      }
+    ],
+    "guardrail_rules": [
+      {
+        "id": 101,
+        "name": "block-secrets-input",
+        "description": "Block prompts leaking credentials",
+        "enabled": true,
+        "cel_expression": "true",
+        "apply_to": "input",
+        "sampling_rate": 100,
+        "timeout": 10,
+        "provider_config_ids": [1]
+      },
+      {
+        "id": 102,
+        "name": "content-safety-both",
+        "description": "Azure content safety on all traffic",
+        "enabled": true,
+        "cel_expression": "true",
+        "apply_to": "both",
+        "sampling_rate": 100,
+        "timeout": 15,
+        "provider_config_ids": [2]
+      }
+    ]
+  }
+}
+```
--- a/docs/deployment-guides/config-json/plugins.mdx
+++ b/docs/deployment-guides/config-json/plugins.mdx
@@ -0,0 +1,318 @@
+---
+title: "Plugins"
+description: "Configure Bifrost plugins in config.json — semantic cache, OpenTelemetry, Maxim, Datadog, and custom plugins"
+icon: "puzzle-piece"
+---
+
+<Note>
+**The `plugins` array only controls explicitly opt-in plugins**: `semantic_cache`, `otel`, `maxim`, `datadog` (enterprise), and custom plugins.
+
+**Telemetry, logging, and governance are auto-loaded built-ins** — they are always active and configured via the `client` block and dedicated top-level keys, not the `plugins` array.
+</Note>
+
+---
+
+## Auto-Loaded Built-ins
+
+These plugins start automatically. You do **not** add them to the `plugins` array.
+
+| Plugin | Always active? | How to configure |
+|--------|---------------|-----------------|
+| **Telemetry** (Prometheus `/metrics`) | Yes, always | `client.prometheus_labels` for custom labels; push gateway via `plugins` entry once DB-backed mode is running |
+| **Logging** | When `client.enable_logging: true` and `logs_store` is configured | `client.enable_logging`, `client.disable_content_logging`, `client.logging_headers` |
+| **Governance** | Yes, always (OSS) | `client.enforce_auth_on_inference` for VK enforcement; `governance.*` for virtual keys / budgets / routing rules |
+
+See [Client Configuration](/deployment-guides/config-json/client) and [Governance](/deployment-guides/config-json/governance) for full details.
+
+---
+
+## Plugin Array Structure
+
+Every entry in the `plugins` array supports these common fields:
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | string | Yes | Plugin name |
+| `enabled` | boolean | Yes | Enable or disable this plugin |
+| `config` | object | Varies | Plugin-specific configuration |
+| `path` | string | No | Path to a custom plugin binary or WASM file |
+| `version` | integer | No | 🛑 **DB-Backed Only.** Plugin metadata persisted on `TablePlugin`. In DB-backed sync, higher values trigger replacement/reload. Valid range: `1` to `32767`. |
+| `placement` | string | No | 🛑 **DB-Backed Only.** Execution metadata (`"pre_builtin"`, `"builtin"`, `"post_builtin"`) persisted on `TablePlugin` and used for ordering behavior. |
+| `order` | integer | No | 🛑 **DB-Backed Only.** Execution metadata persisted on `TablePlugin`; within a placement group, lower values run earlier. |
+
+<Note>
+`name`, `enabled`, `path`, and `config` are the core plugin config fields. In DB-backed mode, `version`, `placement`, and `order` are persisted on `TablePlugin` and used during sync/runtime ordering.
+</Note>
+
+---
+
+<Tabs>
+
+<Tab title="Semantic Cache">
+
+### Semantic Cache
+
+Caches LLM responses by semantic similarity. Returns a cached response when an incoming request is semantically close enough to a previous one.
+
+Requires a [vector store](/deployment-guides/config-json/storage#vector_store) to be configured.
+
+| Field | Required | Default | Description |
+|-------|----------|---------|-------------|
+| `config.dimension` | Yes | — | Embedding dimension. Use `1` for hash-based (exact) caching without an embedding provider |
+| `config.provider` | No | — | Provider for generating embeddings (required for semantic mode) |
+| `config.embedding_model` | No | — | Model for embeddings (required when `provider` is set) |
+| `config.threshold` | No | `0.8` | Cosine similarity threshold for a cache hit (0.0–1.0) |
+| `config.ttl` | No | `300` | Cache entry TTL in seconds (or a duration string like `"1h"`) |
+| `config.cache_by_model` | No | `true` | Include model in cache key |
+| `config.cache_by_provider` | No | `true` | Include provider in cache key |
+| `config.exclude_system_prompt` | No | `false` | Exclude system prompt from cache key |
+| `config.conversation_history_threshold` | No | `3` | Skip caching for requests with more messages than this |
+| `config.default_cache_key` | No | — | Default cache key when no `x-bf-cache-key` header is sent |
+
+**Semantic mode** (embedding-based similarity search):
+
+```json
+{
+  "plugins": [
+    {
+      "name": "semantic_cache",
+      "enabled": true,
+      "config": {
+        "provider": "openai",
+        "embedding_model": "text-embedding-3-small",
+        "dimension": 1536,
+        "threshold": 0.85,
+        "ttl": 300,
+        "cache_by_model": true,
+        "cache_by_provider": true
+      }
+    }
+  ]
+}
+```
+
+**Hash mode** (exact-match caching, no embedding provider needed):
+
+```json
+{
+  "plugins": [
+    {
+      "name": "semantic_cache",
+      "enabled": true,
+      "config": {
+        "dimension": 1,
+        "ttl": 1800
+      }
+    }
+  ]
+}
+```
+
+<Note>
+You must also configure a `vector_store` in `config.json`. See [Storage — vector_store](/deployment-guides/config-json/storage#vector_store).
+</Note>
+
+</Tab>
+
+<Tab title="OpenTelemetry">
+
+### OpenTelemetry (OTel)
+
+Exports distributed traces to any OTel-compatible collector (Jaeger, Zipkin, Tempo, Datadog via OTLP, etc.).
+
+| Field | Required | Default | Description |
+|-------|----------|---------|-------------|
+| `config.collector_url` | Yes | — | OTLP collector endpoint |
+| `config.trace_type` | Yes | — | Trace format: `"genai_extension"`, `"vercel"`, or `"open_inference"` |
+| `config.protocol` | Yes | — | `"http"` or `"grpc"` |
+| `config.service_name` | No | `"bifrost"` | Service name reported to the collector |
+| `config.metrics_enabled` | No | `false` | Enable push-based OTLP metrics export |
+| `config.metrics_endpoint` | No | — | OTLP metrics endpoint URL |
+| `config.metrics_push_interval` | No | `15` | Metrics push interval in seconds |
+| `config.headers` | No | — | Custom headers for the collector (supports `env.` prefix) |
+| `config.insecure` | No | `false` | Skip TLS verification |
+| `config.tls_ca_cert` | No | — | Path to TLS CA certificate |
+
+```json
+{
+  "plugins": [
+    {
+      "name": "otel",
+      "enabled": true,
+      "config": {
+        "collector_url": "http://otel-collector:4318",
+        "trace_type": "genai_extension",
+        "protocol": "http",
+        "service_name": "bifrost-gateway"
+      }
+    }
+  ]
+}
+```
+
+**With authentication headers:**
+
+```json
+{
+  "plugins": [
+    {
+      "name": "otel",
+      "enabled": true,
+      "config": {
+        "collector_url": "https://otel.example.com:4318",
+        "trace_type": "open_inference",
+        "protocol": "http",
+        "service_name": "bifrost",
+        "headers": {
+          "Authorization": "env.OTEL_AUTH_HEADER"
+        }
+      }
+    }
+  ]
+}
+```
+
+**With OTLP metrics export:**
+
+```json
+{
+  "plugins": [
+    {
+      "name": "otel",
+      "enabled": true,
+      "config": {
+        "collector_url": "http://otel-collector:4318",
+        "trace_type": "genai_extension",
+        "protocol": "http",
+        "metrics_enabled": true,
+        "metrics_endpoint": "http://otel-collector:4318/v1/metrics",
+        "metrics_push_interval": 30
+      }
+    }
+  ]
+}
+```
+
+</Tab>
+
+<Tab title="Maxim">
+
+### Maxim Observability
+
+Sends request traces to the [Maxim](https://www.getmaxim.ai) observability platform.
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `config.api_key` | Yes | Maxim API key (use `env.` prefix) |
+| `config.log_repo_id` | No | Default Maxim logger repository ID |
+
+```json
+{
+  "plugins": [
+    {
+      "name": "maxim",
+      "enabled": true,
+      "config": {
+        "api_key": "env.MAXIM_API_KEY",
+        "log_repo_id": "your-log-repo-id"
+      }
+    }
+  ]
+}
+```
+
+</Tab>
+
+<Tab title="Datadog">
+
+### Datadog
+
+<Note>
+Datadog is an **enterprise-only** plugin and is silently ignored in OSS builds.
+</Note>
+
+Sends APM traces and metrics to a Datadog Agent.
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `config.agent_addr` | `"localhost:8126"` | Datadog Agent address for APM traces |
+| `config.service_name` | `"bifrost"` | Service name in Datadog |
+| `config.env` | — | Environment tag (e.g. `"production"`, `"staging"`) |
+| `config.version` | — | Service version tag |
+| `config.enable_traces` | `true` | Enable APM trace collection |
+| `config.custom_tags` | `{}` | Additional key/value tags for all traces and metrics |
+
+```json
+{
+  "plugins": [
+    {
+      "name": "datadog",
+      "enabled": true,
+      "config": {
+        "agent_addr": "datadog-agent:8126",
+        "service_name": "bifrost",
+        "env": "production",
+        "enable_traces": true,
+        "custom_tags": {
+          "team": "platform",
+          "region": "us-east-1"
+        }
+      }
+    }
+  ]
+}
+```
+
+</Tab>
+
+</Tabs>
+
+---
+
+## Custom / Dynamic Plugins
+
+Load a custom Go plugin binary or WASM plugin at startup using the `path` field. Custom plugins must implement one of the Bifrost plugin interfaces.
+
+```json
+{
+  "plugins": [
+    {
+      "name": "my-custom-auth",
+      "enabled": true,
+      "path": "/app/plugins/my-custom-auth.so",
+      "config": {
+        "auth_endpoint": "env.AUTH_SERVICE_URL"
+      }
+    }
+  ]
+}
+```
+
+**WASM plugin:**
+
+```json
+{
+  "plugins": [
+    {
+      "name": "my-wasm-plugin",
+      "enabled": true,
+      "path": "/app/plugins/my-plugin.wasm",
+      "config": {}
+    }
+  ]
+}
+```
+
+See [Writing Go Plugins](/plugins/writing-go-plugin) and [Writing WASM Plugins](/plugins/writing-wasm-plugin) for implementation guides.
+
+**Placement and ordering (DB-backed only):**
+
+In DB-backed mode, plugin metadata such as `version` (`1` to `32767`), `placement`, and `order` can be managed via config sync and DB/UI workflows:
+
+| `placement` | When it runs |
+|-------------|-------------|
+| `pre_builtin` | Before all built-in plugins |
+| `builtin` | Alongside built-in plugins (by `order`) |
+| `post_builtin` | After all built-in plugins (default) |
+
+Within a placement group, lower `order` values run earlier.
--- a/docs/deployment-guides/config-json/providers.mdx
+++ b/docs/deployment-guides/config-json/providers.mdx
@@ -0,0 +1,755 @@
+---
+title: "Provider Setup"
+description: "Configure LLM providers in config.json — API keys, cloud-native auth, per-provider network settings, and self-hosted endpoints"
+icon: "plug"
+---
+
+All providers are configured under `providers` in `config.json`. Each provider entry contains a `keys` array where every key has a `name`, `value`, `models`, and `weight`, plus optional provider-specific config objects.
+
+**Supplying credentials:**
+
+Use the `env.` prefix to reference environment variables — never put API keys directly in `config.json`:
+
+```json
+{
+  "providers": {
+    "openai": {
+      "keys": [
+        {
+          "name": "primary",
+          "value": "env.OPENAI_API_KEY",
+          "models": ["*"],
+          "weight": 1.0
+        }
+      ]
+    }
+  }
+}
+```
+
+---
+
+## Common Provider Fields
+
+Every key object supports these fields:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | Unique name for this key (used in logs and virtual key pin) |
+| `value` | string | API key value or `env.VAR_NAME` reference |
+| `models` | array | Models this key serves. `["*"]` = all models |
+| `weight` | float | Load balancing weight. Higher = more traffic |
+| `aliases` | object | Map logical name → actual model name for this key |
+| `use_for_batch_api` | boolean | Mark key as eligible for batch API calls |
+
+Per-provider `network_config` options (applies to all standard providers):
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `default_request_timeout_in_seconds` | integer | Per-request timeout |
+| `max_retries` | integer | Retry attempts on transient errors |
+| `retry_backoff_initial` | integer | Initial backoff in milliseconds |
+| `retry_backoff_max` | integer | Maximum backoff in milliseconds |
+| `max_conns_per_host` | integer | Max TCP connections to the provider endpoint (default: 5000) |
+| `extra_headers` | object | Static headers added to every provider request |
+| `stream_idle_timeout_in_seconds` | integer | Idle timeout per stream chunk (default: 60) |
+| `insecure_skip_verify` | boolean | Disable TLS verification (last resort only) |
+| `ca_cert_pem` | string | PEM-encoded CA for self-signed or private CA endpoints |
+
+Concurrency and buffering per provider:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `concurrency_and_buffer_size.concurrency` | integer | Max concurrent requests to this provider |
+| `concurrency_and_buffer_size.buffer_size` | integer | Request queue depth |
+
+---
+
+<Tabs>
+
+<Tab title="OpenAI">
+
+### OpenAI
+
+Supports multiple keys with weighted load balancing. Mark one key with `use_for_batch_api: true` to designate it for the Batch API.
+
+```json
+{
+  "providers": {
+    "openai": {
+      "keys": [
+        {
+          "name": "openai-primary",
+          "value": "env.OPENAI_KEY_1",
+          "models": ["*"],
+          "weight": 2.0
+        },
+        {
+          "name": "openai-secondary",
+          "value": "env.OPENAI_KEY_2",
+          "models": ["gpt-4o-mini"],
+          "weight": 1.0
+        },
+        {
+          "name": "openai-batch",
+          "value": "env.OPENAI_KEY_BATCH",
+          "models": ["*"],
+          "weight": 1.0,
+          "use_for_batch_api": true
+        }
+      ],
+      "network_config": {
+        "default_request_timeout_in_seconds": 120,
+        "max_retries": 3,
+        "retry_backoff_initial": 500,
+        "retry_backoff_max": 5000
+      }
+    }
+  }
+}
+```
+
+</Tab>
+
+<Tab title="Anthropic">
+
+### Anthropic
+
+```json
+{
+  "providers": {
+    "anthropic": {
+      "keys": [
+        {
+          "name": "anthropic-primary",
+          "value": "env.ANTHROPIC_KEY_1",
+          "models": ["*"],
+          "weight": 1.0
+        },
+        {
+          "name": "anthropic-secondary",
+          "value": "env.ANTHROPIC_KEY_2",
+          "models": ["*"],
+          "weight": 1.0
+        }
+      ],
+      "network_config": {
+        "default_request_timeout_in_seconds": 180
+      }
+    }
+  }
+}
+```
+
+**Override Anthropic beta headers** (optional):
+
+```json
+{
+  "providers": {
+    "anthropic": {
+      "keys": [
+        {
+          "name": "primary",
+          "value": "env.ANTHROPIC_API_KEY",
+          "models": ["*"],
+          "weight": 1.0
+        }
+      ],
+      "network_config": {
+        "beta_header_overrides": {
+          "redact-thinking-": true
+        }
+      }
+    }
+  }
+}
+```
+
+</Tab>
+
+<Tab title="Azure OpenAI">
+
+### Azure OpenAI
+
+Azure requires `azure_key_config` on every key with `endpoint` and `api_version`. List your Azure deployment names in `models` — Bifrost routes requests using the model name as the deployment name. If your deployment names differ from the model names you use in requests, add an `aliases` map on the key.
+
+<Tabs>
+<Tab title="API Key">
+
+```json
+{
+  "providers": {
+    "azure": {
+      "keys": [
+        {
+          "name": "azure-primary",
+          "value": "env.AZURE_API_KEY",
+          "models": ["gpt-4o", "gpt-4o-mini"],
+          "weight": 1.0,
+          "azure_key_config": {
+            "endpoint": "env.AZURE_ENDPOINT",
+            "api_version": "env.AZURE_API_VERSION"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+Set environment variables:
+
+```bash
+export AZURE_API_KEY="your-azure-api-key"
+export AZURE_ENDPOINT="https://your-resource.openai.azure.com"
+export AZURE_API_VERSION="2024-10-21"
+```
+
+</Tab>
+<Tab title="Managed Identity / DefaultAzureCredential">
+
+When `value` is empty or omitted, Bifrost uses `DefaultAzureCredential` — which resolves credentials from Workload Identity, VM managed identity, or `az login`.
+
+```json
+{
+  "providers": {
+    "azure": {
+      "keys": [
+        {
+          "name": "azure-workload-identity",
+          "value": "",
+          "models": ["gpt-4o"],
+          "weight": 1.0,
+          "azure_key_config": {
+            "endpoint": "env.AZURE_ENDPOINT",
+            "api_version": "env.AZURE_API_VERSION"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+</Tab>
+</Tabs>
+
+**Deployment name aliases** — when your Azure deployment names differ from the model names in requests, use `aliases`:
+
+```json
+{
+  "providers": {
+    "azure": {
+      "keys": [
+        {
+          "name": "azure-primary",
+          "value": "env.AZURE_API_KEY",
+          "models": ["gpt-4o"],
+          "weight": 1.0,
+          "aliases": {
+            "gpt-4o": "gpt-4o-prod-deployment"
+          },
+          "azure_key_config": {
+            "endpoint": "env.AZURE_ENDPOINT",
+            "api_version": "env.AZURE_API_VERSION"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+**Multi-region failover** (two keys, different regions):
+
+```json
+{
+  "providers": {
+    "azure": {
+      "keys": [
+        {
+          "name": "eastus",
+          "value": "env.AZURE_KEY_EAST",
+          "models": ["gpt-4o"],
+          "weight": 1.0,
+          "azure_key_config": {
+            "endpoint": "env.AZURE_ENDPOINT_EAST",
+            "api_version": "env.AZURE_API_VERSION"
+          }
+        },
+        {
+          "name": "westus",
+          "value": "env.AZURE_KEY_WEST",
+          "models": ["gpt-4o"],
+          "weight": 1.0,
+          "azure_key_config": {
+            "endpoint": "env.AZURE_ENDPOINT_WEST",
+            "api_version": "env.AZURE_API_VERSION"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+</Tab>
+
+<Tab title="AWS Bedrock">
+
+### AWS Bedrock
+
+Bedrock requires `bedrock_key_config` with at minimum a `region`. Three auth modes:
+
+<Tabs>
+<Tab title="Static Credentials">
+
+```json
+{
+  "providers": {
+    "bedrock": {
+      "keys": [
+        {
+          "name": "bedrock-static",
+          "value": "",
+          "models": ["*"],
+          "weight": 1.0,
+          "bedrock_key_config": {
+            "region": "us-east-1",
+            "access_key": "env.AWS_ACCESS_KEY_ID",
+            "secret_key": "env.AWS_SECRET_ACCESS_KEY"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+</Tab>
+<Tab title="IAM Role (instance profile / IRSA)">
+
+When only `region` is set, Bifrost inherits credentials from the AWS SDK default chain — IRSA (IAM Roles for Service Accounts), EC2 instance profile, or `AWS_*` env vars.
+
+```json
+{
+  "providers": {
+    "bedrock": {
+      "keys": [
+        {
+          "name": "bedrock-iam",
+          "value": "",
+          "models": ["*"],
+          "weight": 1.0,
+          "bedrock_key_config": {
+            "region": "us-east-1"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+</Tab>
+<Tab title="STS AssumeRole">
+
+```json
+{
+  "providers": {
+    "bedrock": {
+      "keys": [
+        {
+          "name": "bedrock-assumerole",
+          "value": "",
+          "models": ["*"],
+          "weight": 1.0,
+          "bedrock_key_config": {
+            "region": "us-west-2",
+            "role_arn": "env.AWS_ROLE_ARN",
+            "external_id": "env.AWS_EXTERNAL_ID",
+            "session_name": "bifrost-session"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+</Tab>
+</Tabs>
+
+**Model aliases** (map logical names to Bedrock inference profile IDs):
+
+```json
+{
+  "bedrock_key_config": {
+    "region": "us-east-1"
+  },
+  "aliases": {
+    "claude-sonnet": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "claude-haiku":  "us.anthropic.claude-3-5-haiku-20241022-v1:0"
+  }
+}
+```
+
+**Batch API — S3 configuration:**
+
+```json
+{
+  "bedrock_key_config": {
+    "region": "us-east-1",
+    "access_key": "env.AWS_ACCESS_KEY_ID",
+    "secret_key": "env.AWS_SECRET_ACCESS_KEY",
+    "batch_s3_config": {
+      "buckets": [
+        {
+          "bucket_name": "my-bedrock-batch-bucket",
+          "prefix": "batch/",
+          "is_default": true
+        }
+      ]
+    }
+  }
+}
+```
+
+</Tab>
+
+<Tab title="Google Vertex AI">
+
+### Google Vertex AI
+
+Vertex requires `vertex_key_config` with `project_id` and `region`. Two auth modes:
+
+<Tabs>
+<Tab title="Service Account Key">
+
+```json
+{
+  "providers": {
+    "vertex": {
+      "keys": [
+        {
+          "name": "vertex-sa",
+          "value": "",
+          "models": ["*"],
+          "weight": 1.0,
+          "vertex_key_config": {
+            "project_id": "env.VERTEX_PROJECT_ID",
+            "region": "us-central1",
+            "auth_credentials": "env.VERTEX_AUTH_CREDENTIALS"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+`VERTEX_AUTH_CREDENTIALS` should contain the base64-encoded service account JSON.
+
+</Tab>
+<Tab title="GKE Workload Identity / ADC">
+
+When `auth_credentials` is omitted, Bifrost calls `google.FindDefaultCredentials` — which resolves to GKE Workload Identity, GCE metadata server, or `gcloud auth application-default login`.
+
+```json
+{
+  "providers": {
+    "vertex": {
+      "keys": [
+        {
+          "name": "vertex-workload-identity",
+          "value": "",
+          "models": ["*"],
+          "weight": 1.0,
+          "vertex_key_config": {
+            "project_id": "my-gcp-project",
+            "region": "us-central1"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+</Tab>
+</Tabs>
+
+</Tab>
+
+<Tab title="Groq / Gemini / Mistral / Others">
+
+### Standard API-Key Providers
+
+These providers follow the same simple pattern — one or more keys with weights. Replace the provider name and env var name accordingly.
+
+```json
+{
+  "providers": {
+    "groq": {
+      "keys": [
+        {
+          "name": "groq-primary",
+          "value": "env.GROQ_API_KEY",
+          "models": ["*"],
+          "weight": 1.0
+        }
+      ]
+    },
+    "gemini": {
+      "keys": [
+        {
+          "name": "gemini-primary",
+          "value": "env.GEMINI_API_KEY",
+          "models": ["*"],
+          "weight": 1.0
+        }
+      ]
+    },
+    "mistral": {
+      "keys": [
+        {
+          "name": "mistral-primary",
+          "value": "env.MISTRAL_API_KEY",
+          "models": ["*"],
+          "weight": 1.0
+        }
+      ]
+    },
+    "cohere": {
+      "keys": [{ "name": "cohere-main", "value": "env.COHERE_API_KEY", "models": ["*"], "weight": 1.0 }]
+    },
+    "perplexity": {
+      "keys": [{ "name": "perplexity-main", "value": "env.PERPLEXITY_API_KEY", "models": ["*"], "weight": 1.0 }]
+    },
+    "xai": {
+      "keys": [{ "name": "xai-main", "value": "env.XAI_API_KEY", "models": ["*"], "weight": 1.0 }]
+    },
+    "cerebras": {
+      "keys": [{ "name": "cerebras-main", "value": "env.CEREBRAS_API_KEY", "models": ["*"], "weight": 1.0 }]
+    },
+    "openrouter": {
+      "keys": [{ "name": "openrouter-main", "value": "env.OPENROUTER_API_KEY", "models": ["*"], "weight": 1.0 }]
+    },
+    "nebius": {
+      "keys": [{ "name": "nebius-main", "value": "env.NEBIUS_API_KEY", "models": ["*"], "weight": 1.0 }]
+    }
+  }
+}
+```
+
+</Tab>
+
+<Tab title="Self-Hosted">
+
+### Self-Hosted Providers
+
+Self-hosted providers point to a URL you operate. No API key is typically required (`"value": ""`).
+
+<Tabs>
+<Tab title="Ollama">
+
+```json
+{
+  "providers": {
+    "ollama": {
+      "keys": [
+        {
+          "name": "ollama-local",
+          "value": "",
+          "models": ["*"],
+          "weight": 1.0,
+          "ollama_key_config": {
+            "url": "http://localhost:11434"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+Using an env var for the URL (useful across environments):
+
+```json
+{
+  "ollama_key_config": {
+    "url": "env.OLLAMA_URL"
+  }
+}
+```
+
+</Tab>
+<Tab title="vLLM">
+
+vLLM instances are model-specific — one key per served model:
+
+```json
+{
+  "providers": {
+    "vllm": {
+      "keys": [
+        {
+          "name": "vllm-llama3-70b",
+          "value": "",
+          "models": ["llama-3-70b"],
+          "weight": 1.0,
+          "vllm_key_config": {
+            "url": "http://vllm-server:8000",
+            "model_name": "meta-llama/Meta-Llama-3-70B-Instruct"
+          }
+        },
+        {
+          "name": "vllm-mistral",
+          "value": "",
+          "models": ["mistral-7b"],
+          "weight": 1.0,
+          "vllm_key_config": {
+            "url": "http://vllm-mistral:8000",
+            "model_name": "mistralai/Mistral-7B-Instruct-v0.3"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+</Tab>
+<Tab title="SGLang">
+
+```json
+{
+  "providers": {
+    "sgl": {
+      "keys": [
+        {
+          "name": "sgl-main",
+          "value": "",
+          "models": ["*"],
+          "weight": 1.0,
+          "sgl_key_config": {
+            "url": "http://sgl-router:30000"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+</Tab>
+<Tab title="HuggingFace / Replicate">
+
+These providers use `aliases` to map logical model names to provider-specific IDs:
+
+```json
+{
+  "providers": {
+    "huggingface": {
+      "keys": [
+        {
+          "name": "hf-main",
+          "value": "env.HF_API_KEY",
+          "models": ["llama-3", "mixtral"],
+          "weight": 1.0,
+          "aliases": {
+            "llama-3": "meta-llama/Meta-Llama-3-8B-Instruct",
+            "mixtral": "mistralai/Mixtral-8x7B-Instruct-v0.1"
+          }
+        }
+      ]
+    },
+    "replicate": {
+      "keys": [
+        {
+          "name": "replicate-main",
+          "value": "env.REPLICATE_API_KEY",
+          "models": ["llama-3"],
+          "weight": 1.0,
+          "aliases": {
+            "llama-3": "meta/meta-llama-3-70b-instruct"
+          },
+          "replicate_key_config": {
+            "use_deployments_endpoint": false
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+</Tab>
+</Tabs>
+
+</Tab>
+
+</Tabs>
+
+---
+
+## Proxy Configuration
+
+Route provider traffic through an HTTP or SOCKS5 proxy:
+
+```json
+{
+  "providers": {
+    "openai": {
+      "keys": [
+        { "name": "primary", "value": "env.OPENAI_API_KEY", "models": ["*"], "weight": 1.0 }
+      ],
+      "proxy_config": {
+        "type": "http",
+        "url": "http://proxy.corp.example.com:3128",
+        "username": "env.PROXY_USER",
+        "password": "env.PROXY_PASS"
+      }
+    }
+  }
+}
+```
+
+| Field | Type | Options |
+|-------|------|---------|
+| `proxy_config.type` | string | `"none"`, `"http"`, `"socks5"`, `"environment"` |
+| `proxy_config.url` | string | Proxy server URL |
+| `proxy_config.username` | string | Proxy auth username |
+| `proxy_config.password` | string | Proxy auth password (`env.` supported) |
+| `proxy_config.ca_cert_pem` | string | PEM CA for TLS-intercepting proxies |
+
+Use `"type": "environment"` to pick up `HTTP_PROXY` / `HTTPS_PROXY` env vars automatically.
+
+---
+
+## Multi-Provider Example
+
+```json
+{
+  "$schema": "https://www.getbifrost.ai/schema",
+  "providers": {
+    "openai": {
+      "keys": [
+        { "name": "openai-primary", "value": "env.OPENAI_API_KEY", "models": ["*"], "weight": 2.0 }
+      ]
+    },
+    "anthropic": {
+      "keys": [
+        { "name": "anthropic-primary", "value": "env.ANTHROPIC_API_KEY", "models": ["*"], "weight": 1.0 }
+      ]
+    },
+    "groq": {
+      "keys": [
+        { "name": "groq-primary", "value": "env.GROQ_API_KEY", "models": ["*"], "weight": 1.0 }
+      ]
+    }
+  }
+}
+```
+
+With three providers and the weights above, traffic is distributed: 50% OpenAI, 25% Anthropic, 25% Groq. If any provider returns an error, Bifrost automatically retries on the next key or provider.
--- a/docs/deployment-guides/config-json/schema-reference.mdx
+++ b/docs/deployment-guides/config-json/schema-reference.mdx
@@ -0,0 +1,252 @@
+---
+title: "Schema Reference"
+description: "All top-level keys available in config.json, their types, and where each is documented"
+icon: "brackets-curly"
+---
+
+<Note>
+The live schema is published at [`https://www.getbifrost.ai/schema`](https://www.getbifrost.ai/schema). Add `"$schema": "https://www.getbifrost.ai/schema"` to your `config.json` for IDE autocomplete and inline validation.
+</Note>
+
+This page is a concise reference for every top-level key in `config.json`. Click the **Guide** links for full field-by-field documentation.
+
+---
+
+## Top-Level Keys
+
+| Key | Type | Description | Guide |
+|-----|------|-------------|-------|
+| `$schema` | string | Schema URL for IDE validation. Set to `"https://www.getbifrost.ai/schema"` | — |
+| `encryption_key` | string | Optional AES-256 key (derived via Argon2id). Accepts `env.VAR` prefix and is also read from `BIFROST_ENCRYPTION_KEY`. If omitted, data is stored in plaintext. | [Client](/deployment-guides/config-json/client#encryption-key) |
+| `client` | object | Worker pool, logging, CORS, auth enforcement, header filtering, MCP, compat shims | [Client](/deployment-guides/config-json/client) |
+| `providers` | object | LLM provider API keys, network settings, concurrency | [Providers](/deployment-guides/config-json/providers) |
+| `governance` | object | Admin auth, virtual keys, budgets, rate limits, routing rules, customers, teams | [Governance](/deployment-guides/config-json/governance) |
+| `guardrails_config` | object | Content moderation providers and CEL-based rules *(enterprise only)* | [Guardrails](/deployment-guides/config-json/guardrails) |
+| `access_profiles` | array | Access profile templates for enterprise RBAC/governance controls *(enterprise only)* | [Enterprise Governance](/enterprise/advanced-governance) |
+| `cluster_config` | object | Cluster mode settings: gossip, peers, and auto-discovery backends *(enterprise only)* | [Cluster](/deployment-guides/config-json/cluster) |
+| `config_store` | object | Configuration database backend — SQLite, PostgreSQL, or disabled (file-only mode) | [Storage](/deployment-guides/config-json/storage#config_store) |
+| `logs_store` | object | Request/response log database — SQLite, PostgreSQL + optional S3/GCS offload | [Storage](/deployment-guides/config-json/storage#logs_store) |
+| `vector_store` | object | Vector database for semantic cache — Weaviate, Redis, Qdrant, Pinecone, Valkey | [Storage](/deployment-guides/config-json/storage#vector_store) |
+| `plugins` | array | Opt-in plugins: `semantic_cache`, `otel`, `maxim`, `datadog`, custom | [Plugins](/deployment-guides/config-json/plugins) |
+| `framework` | object | Model pricing catalog URL and sync interval | [Framework](#framework) |
+| `mcp` | object | MCP server and tool configuration | — |
+| `websocket` | object | WebSocket / Realtime API connection pool tuning | [WebSocket](#websocket) |
+| `auth_config` | object | **Deprecated** — use `governance.auth_config` | [Client](/deployment-guides/config-json/client#authentication) |
+
+---
+
+## `version`
+
+Controls how empty arrays in allow-list fields (`models`, `allowed_models`, `key_ids`, `tools_to_execute`) are interpreted:
+
+| Value | Behaviour |
+|-------|-----------|
+| `2` *(default, v1.5.0+)* | Empty array = **deny all**; `["*"]` = allow all |
+| `1` *(v1.4.x compat)* | Empty array = **allow all** |
+
+Omitting `version` uses v2 semantics. Set `"version": 1` only if you are migrating from v1.4.x and need the old behaviour temporarily.
+
+---
+
+## `client`
+
+Controls the worker pool, logging pipeline, security, and SDK shims. All fields are optional.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `initial_pool_size` | integer | `300` | Pre-allocated goroutines per provider queue |
+| `drop_excess_requests` | boolean | `false` | Return HTTP 429 when queue is full |
+| `enable_logging` | boolean | `true`* | Persist request/response logs (`*` auto-enabled when `logs_store` is set) |
+| `disable_content_logging` | boolean | `false` | Strip message content from logs |
+| `log_retention_days` | integer | `365` | Days to retain log entries |
+| `logging_headers` | array | `[]` | HTTP headers to capture in log metadata |
+| `enforce_auth_on_inference` | boolean | `false` | Require a virtual key on every `/v1/*` request |
+| `allow_direct_keys` | boolean | `false` | Allow callers to pass provider API keys directly |
+| `allowed_origins` | array | `["*"]` | CORS allowed origins |
+| `max_request_body_size_mb` | integer | `100` | Maximum request body in MB |
+| `whitelisted_routes` | array | `[]` | Routes that bypass auth middleware |
+| `allowed_headers` | array | `[]` | Additional headers permitted for CORS/WebSocket |
+| `required_headers` | array | `[]` | Headers that must be present on every request |
+| `header_filter_config` | object | — | `allowlist` / `denylist` for `x-bf-eh-*` forwarded headers |
+| `prometheus_labels` | array | `[]` | Custom labels for all Prometheus metrics |
+| `compat` | object | — | SDK compatibility shims (`should_drop_params`, `convert_text_to_chat`, etc.) |
+| `mcp_agent_depth` | integer | `10` | Max tool-call recursion depth |
+| `mcp_tool_execution_timeout` | integer | `30` | Per-tool execution timeout in seconds |
+| `mcp_tool_sync_interval` | integer | `10` | Tool sync interval in minutes (`0` = disabled) |
+| `mcp_disable_auto_tool_inject` | boolean | `false` | Disable automatic MCP tool injection |
+| `async_job_result_ttl` | integer | `3600` | TTL for async job results in seconds |
+| `disable_db_pings_in_health` | boolean | `false` | Exclude DB connectivity from `/health` |
+| `routing_chain_max_depth` | integer | `10` | Max routing rule chain evaluation depth |
+
+Full documentation: [Client Configuration](/deployment-guides/config-json/client).
+
+---
+
+## `providers`
+
+Keyed by provider name. Each entry contains a `keys` array and optional `network_config`, `concurrency_and_buffer_size`, `proxy_config`.
+
+Supported provider keys: `openai`, `anthropic`, `azure`, `bedrock`, `vertex`, `gemini`, `mistral`, `groq`, `cohere`, `perplexity`, `xai`, `cerebras`, `openrouter`, `nebius`, `fireworks`, `parasail`, `huggingface`, `replicate`, `ollama`, `vllm`, `sgl`, `elevenlabs`, `runway`.
+
+Full documentation: [Provider Setup](/deployment-guides/config-json/providers).
+
+---
+
+## `governance`
+
+Seeds governance resources at startup. All sub-keys are optional arrays.
+
+| Sub-key | Description |
+|---------|-------------|
+| `auth_config` | Admin username/password auth for the dashboard |
+| `virtual_keys` | Scoped API tokens with provider/model allowlists |
+| `budgets` | Spend caps in USD over a rolling window |
+| `rate_limits` | Request and token rate limits |
+| `customers` | Customer entities (attach budgets/rate limits) |
+| `teams` | Team entities (attach to customers, budgets, rate limits) |
+| `routing_rules` | CEL-based dynamic provider/model routing |
+| `pricing_overrides` | Scoped per-model pricing overrides |
+| `model_configs` | Per-model rate limit and budget configurations |
+
+Full documentation: [Governance](/deployment-guides/config-json/governance).
+
+---
+
+## `guardrails_config`
+
+Enterprise-only. Two sub-keys: `guardrail_providers` (array) and `guardrail_rules` (array).
+
+Full documentation: [Guardrails](/deployment-guides/config-json/guardrails).
+
+---
+
+## `access_profiles`
+
+Enterprise-only. Defines access profile templates that can later be attached to roles/users.
+
+```json
+{
+  "access_profiles": [
+    {
+      "name": "platform-default",
+      "description": "Default platform profile",
+      "is_active": true,
+      "tags": ["platform", "default"],
+      "provider_configs": [
+        {
+          "provider_name": "openai",
+          "all_models_allowed": false,
+          "allowed_models": ["gpt-4o", "gpt-4o-mini"]
+        }
+      ],
+      "mcp_servers": [
+        { "mcp_server_id": "github" }
+      ],
+      "mcp_tool_overrides": [
+        { "mcp_client_id": "github", "tool_name": "create_pull_request", "action": "include" }
+      ]
+    }
+  ]
+}
+```
+
+---
+
+## `cluster_config`
+
+Enterprise-only clustering settings for multi-node deployments.
+
+| Sub-key | Description |
+|---------|-------------|
+| `enabled` | Enables cluster mode |
+| `region` | Region label used by enterprise clustering |
+| `peers` | Static peer list (`host:port`) |
+| `gossip` | Gossip/memberlist port + liveness thresholds |
+| `discovery` | Auto-discovery configuration (`kubernetes`, `dns`, `udp`, `consul`, `etcd`, `mdns`) |
+
+Full documentation: [Cluster](/deployment-guides/config-json/cluster).
+
+---
+
+## `config_store`, `logs_store`, `vector_store`
+
+Storage backends. Each has `enabled` (boolean), `type` (string), and `config` (object).
+
+| Store | Types |
+|-------|-------|
+| `config_store` | `"sqlite"`, `"postgres"` |
+| `logs_store` | `"sqlite"`, `"postgres"` (+ optional `object_storage`) |
+| `vector_store` | `"weaviate"`, `"redis"`, `"qdrant"`, `"pinecone"` (`"redis"` also covers Valkey-compatible endpoints) |
+
+Full documentation: [Storage](/deployment-guides/config-json/storage).
+
+---
+
+## `framework`
+
+Controls model pricing catalog sync:
+
+```json
+{
+  "framework": {
+    "pricing": {
+      "pricing_url": "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json",
+      "pricing_sync_interval": 86400
+    }
+  }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `pricing.pricing_url` | LiteLLM catalog | URL of a model pricing JSON file |
+| `pricing.pricing_sync_interval` | `86400` | Sync interval in seconds (minimum: `3600`) |
+
+---
+
+## `websocket`
+
+Optional tuning for the WebSocket gateway (Responses API WebSocket mode, Realtime API). WebSocket is always enabled.
+
+```json
+{
+  "websocket": {
+    "max_connections_per_user": 100,
+    "transcript_buffer_size": 100,
+    "pool": {
+      "max_idle_per_key": 50,
+      "max_total_connections": 1000,
+      "idle_timeout_seconds": 600,
+      "max_connection_lifetime_seconds": 7200
+    }
+  }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `max_connections_per_user` | `100` | Max concurrent WebSocket connections per user |
+| `transcript_buffer_size` | `100` | Transcript entries buffered for Realtime API mid-session fallback |
+| `pool.max_idle_per_key` | `50` | Max idle upstream connections per provider/key |
+| `pool.max_total_connections` | `1000` | Max total idle upstream connections |
+| `pool.idle_timeout_seconds` | `600` | Evict idle connections after this many seconds |
+| `pool.max_connection_lifetime_seconds` | `7200` | Max lifetime of any upstream connection |
+
+---
+
+## Minimal Valid Config
+
+```json
+{
+  "$schema": "https://www.getbifrost.ai/schema",
+  "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+  "providers": {
+    "openai": {
+      "keys": [
+        { "name": "primary", "value": "env.OPENAI_API_KEY", "models": ["*"], "weight": 1.0 }
+      ]
+    }
+  },
+  "config_store": { "enabled": false }
+}
+```
--- a/docs/deployment-guides/config-json/storage.mdx
+++ b/docs/deployment-guides/config-json/storage.mdx
@@ -0,0 +1,540 @@
+---
+title: "Storage"
+description: "Configure Bifrost storage backends in config.json — config_store, logs_store, vector_store, and object storage for logs"
+icon: "database"
+---
+
+Bifrost persists two types of data — **config** (providers, virtual keys, governance rules) and **logs** (request/response records). Each has its own store. A **vector store** is required for semantic caching.
+
+| Store | Purpose | Backends |
+|-------|---------|---------|
+| `config_store` | Provider configs, virtual keys, governance rules | SQLite, PostgreSQL |
+| `logs_store` | Request/response logs shown in UI | SQLite, PostgreSQL + optional S3/GCS offload |
+| `vector_store` | Semantic response caching | Weaviate, Redis, Valkey, Qdrant, Pinecone |
+
+<Note>
+If you use PostgreSQL for any store, the target database must be **UTF8 encoded**. See [PostgreSQL UTF8 Requirement](/quickstart/gateway/setting-up#postgresql-utf8-requirement).
+</Note>
+
+---
+
+## config_store
+
+<Note>
+When `config_store` is disabled (or absent), all configuration is loaded from `config.json` at startup only — the Web UI is disabled and changes require a restart. See [Two Configuration Modes](/deployment-guides/config-json#two-configuration-modes).
+</Note>
+
+<Tabs>
+
+<Tab title="SQLite">
+
+### SQLite (Default)
+
+Simplest setup — no external database required. Bifrost stores configuration in a local SQLite file.
+
+```json
+{
+  "config_store": {
+    "enabled": true,
+    "type": "sqlite",
+    "config": {
+      "path": "./config.db"
+    }
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `config.path` | Path to the SQLite file (relative to app-dir, or absolute) |
+
+</Tab>
+
+<Tab title="PostgreSQL">
+
+### PostgreSQL
+
+Production-grade storage suitable for high-availability and high-throughput deployments.
+
+```json
+{
+  "config_store": {
+    "enabled": true,
+    "type": "postgres",
+    "config": {
+      "host": "env.PG_HOST",
+      "port": "5432",
+      "user": "env.PG_USER",
+      "password": "env.PG_PASSWORD",
+      "db_name": "bifrost",
+      "ssl_mode": "require",
+      "max_idle_conns": 5,
+      "max_open_conns": 50
+    }
+  }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `host` | — | PostgreSQL host (supports `env.` prefix) |
+| `port` | — | PostgreSQL port (as string) |
+| `user` | — | Database user (supports `env.` prefix) |
+| `password` | — | Database password (supports `env.` prefix). Leave empty for IAM role auth. |
+| `db_name` | — | Database name |
+| `ssl_mode` | — | `"disable"`, `"require"`, `"verify-ca"`, `"verify-full"` |
+| `max_idle_conns` | `5` | Maximum idle connections in the pool |
+| `max_open_conns` | `50` | Maximum open connections to the database |
+
+</Tab>
+
+<Tab title="Disabled">
+
+### Disabled (file-only mode)
+
+Use this when you want Bifrost to read all configuration from `config.json` only — no database, no Web UI.
+
+```json
+{
+  "config_store": {
+    "enabled": false
+  }
+}
+```
+
+This is the recommended setup for [multinode OSS deployments](/deployment-guides/how-to/multinode) where a shared `config.json` is the single source of truth.
+
+</Tab>
+
+</Tabs>
+
+---
+
+## logs_store
+
+<Tabs>
+
+<Tab title="SQLite">
+
+### SQLite
+
+```json
+{
+  "logs_store": {
+    "enabled": true,
+    "type": "sqlite",
+    "config": {
+      "path": "./logs.db"
+    }
+  }
+}
+```
+
+</Tab>
+
+<Tab title="PostgreSQL">
+
+### PostgreSQL
+
+```json
+{
+  "logs_store": {
+    "enabled": true,
+    "type": "postgres",
+    "config": {
+      "host": "env.PG_HOST",
+      "port": "5432",
+      "user": "env.PG_USER",
+      "password": "env.PG_PASSWORD",
+      "db_name": "bifrost",
+      "ssl_mode": "require",
+      "max_idle_conns": 10,
+      "max_open_conns": 100
+    }
+  }
+}
+```
+
+For high log volumes, increase `max_open_conns`:
+
+```json
+{
+  "logs_store": {
+    "enabled": true,
+    "type": "postgres",
+    "config": {
+      "host": "env.PG_HOST",
+      "port": "5432",
+      "user": "env.PG_USER",
+      "password": "env.PG_PASSWORD",
+      "db_name": "bifrost",
+      "ssl_mode": "require",
+      "max_idle_conns": 10,
+      "max_open_conns": 200
+    },
+    "retention_days": 90
+  }
+}
+```
+
+</Tab>
+
+<Tab title="Disabled">
+
+```json
+{
+  "logs_store": {
+    "enabled": false
+  }
+}
+```
+
+</Tab>
+
+</Tabs>
+
+### Log Retention
+
+Set `retention_days` to automatically purge old log entries. `0` disables retention-based cleanup.
+
+```json
+{
+  "logs_store": {
+    "enabled": true,
+    "type": "postgres",
+    "config": { "...": "..." },
+    "retention_days": 90
+  }
+}
+```
+
+### Object Storage for Logs
+
+Offload large request/response payloads from the database to S3 or GCS. The database retains only lightweight index records; payloads are fetched on demand.
+
+<Tabs>
+<Tab title="AWS S3">
+
+```json
+{
+  "logs_store": {
+    "enabled": true,
+    "type": "postgres",
+    "config": { "...": "..." },
+    "object_storage": {
+      "type": "s3",
+      "bucket": "env.S3_BUCKET",
+      "prefix": "bifrost",
+      "compress": true,
+      "region": "us-east-1",
+      "access_key_id": "env.S3_ACCESS_KEY_ID",
+      "secret_access_key": "env.S3_SECRET_ACCESS_KEY"
+    }
+  }
+}
+```
+
+**IAM role (instance profile / IRSA)** — omit `access_key_id` and `secret_access_key`:
+
+```json
+{
+  "object_storage": {
+    "type": "s3",
+    "bucket": "bifrost-logs",
+    "region": "us-east-1",
+    "compress": true,
+    "role_arn": "arn:aws:iam::123456789012:role/BifrostS3Role"
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `bucket` | S3 bucket name (supports `env.` prefix) |
+| `prefix` | Key prefix for stored objects (default: `"bifrost"`) |
+| `compress` | Enable gzip compression (default: `false`) |
+| `region` | AWS region |
+| `access_key_id` | AWS access key ID (omit for default credential chain) |
+| `secret_access_key` | AWS secret access key |
+| `session_token` | STS temporary credentials session token |
+| `role_arn` | IAM role ARN for STS AssumeRole |
+| `endpoint` | Custom endpoint for MinIO / Cloudflare R2 |
+| `force_path_style` | Use path-style URLs (required for MinIO, default: `false`) |
+
+</Tab>
+<Tab title="Google Cloud Storage">
+
+```json
+{
+  "logs_store": {
+    "enabled": true,
+    "type": "postgres",
+    "config": { "...": "..." },
+    "object_storage": {
+      "type": "gcs",
+      "bucket": "bifrost-logs",
+      "prefix": "bifrost",
+      "compress": true,
+      "project_id": "env.GCP_PROJECT_ID",
+      "credentials_json": "env.GCS_CREDENTIALS_JSON"
+    }
+  }
+}
+```
+
+Omit `credentials_json` to use Application Default Credentials (Workload Identity, GCE metadata, `gcloud auth`).
+
+| Field | Description |
+|-------|-------------|
+| `project_id` | GCP project ID (supports `env.` prefix) |
+| `credentials_json` | Service account JSON or path — omit for ADC |
+
+</Tab>
+<Tab title="MinIO (Self-Hosted)">
+
+```json
+{
+  "object_storage": {
+    "type": "s3",
+    "bucket": "bifrost-logs",
+    "prefix": "bifrost",
+    "compress": false,
+    "region": "us-east-1",
+    "endpoint": "http://minio.internal:9000",
+    "access_key_id": "env.MINIO_ACCESS_KEY",
+    "secret_access_key": "env.MINIO_SECRET_KEY",
+    "force_path_style": true
+  }
+}
+```
+
+</Tab>
+</Tabs>
+
+---
+
+## vector_store
+
+A vector store is required for [semantic caching](/features/semantic-caching). Choose from Weaviate, Redis/Valkey, Qdrant, or Pinecone.
+
+<Tabs>
+
+<Tab title="Weaviate">
+
+```json
+{
+  "vector_store": {
+    "enabled": true,
+    "type": "weaviate",
+    "config": {
+      "scheme": "http",
+      "host": "localhost:8080",
+      "api_key": "env.WEAVIATE_API_KEY",
+      "grpc_config": {
+        "host": "localhost:50051",
+        "secured": false
+      }
+    }
+  }
+}
+```
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `scheme` | Yes | `"http"` or `"https"` |
+| `host` | Yes | Weaviate server host and port |
+| `api_key` | No | Weaviate API key (supports `env.` prefix) |
+| `grpc_config.host` | No | gRPC host for faster vector operations |
+| `grpc_config.secured` | No | Use TLS for gRPC connection |
+
+</Tab>
+
+<Tab title="Redis / Valkey">
+
+```json
+{
+  "vector_store": {
+    "enabled": true,
+    "type": "redis",
+    "config": {
+      "addr": "env.REDIS_ADDR",
+      "password": "env.REDIS_PASSWORD",
+      "db": 0,
+      "use_tls": false
+    }
+  }
+}
+```
+
+**AWS MemoryDB (cluster mode):**
+
+```json
+{
+  "vector_store": {
+    "enabled": true,
+    "type": "redis",
+    "config": {
+      "addr": "env.MEMORYDB_ENDPOINT",
+      "password": "env.MEMORYDB_PASSWORD",
+      "use_tls": true,
+      "cluster_mode": true
+    }
+  }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `addr` | — | Redis/Valkey address `host:port` (supports `env.` prefix) |
+| `password` | — | Redis AUTH password (supports `env.` prefix) |
+| `db` | `0` | Redis database number |
+| `use_tls` | `false` | Enable TLS |
+| `cluster_mode` | `false` | Enable cluster mode (required for MemoryDB; `db` must be `0`) |
+| `pool_size` | — | Maximum socket connections |
+
+</Tab>
+
+<Tab title="Qdrant">
+
+```json
+{
+  "vector_store": {
+    "enabled": true,
+    "type": "qdrant",
+    "config": {
+      "host": "env.QDRANT_HOST",
+      "port": 6334,
+      "api_key": "env.QDRANT_API_KEY",
+      "use_tls": false
+    }
+  }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `host` | — | Qdrant server host (supports `env.` prefix) |
+| `port` | `6334` | gRPC port |
+| `api_key` | — | API key (supports `env.` prefix) |
+| `use_tls` | `false` | Enable TLS |
+
+</Tab>
+
+<Tab title="Pinecone">
+
+Pinecone is external-only.
+
+```json
+{
+  "vector_store": {
+    "enabled": true,
+    "type": "pinecone",
+    "config": {
+      "api_key": "env.PINECONE_API_KEY",
+      "index_host": "env.PINECONE_INDEX_HOST"
+    }
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `api_key` | Pinecone API key (supports `env.` prefix) |
+| `index_host` | Index host from Pinecone console (e.g. `your-index.svc.us-east1-gcp.pinecone.io`) |
+
+</Tab>
+
+</Tabs>
+
+---
+
+## Mixed Backend Example
+
+Run the config store on PostgreSQL (for UI) while keeping logs on SQLite (simpler, cheaper for append-heavy workloads):
+
+```json
+{
+  "$schema": "https://www.getbifrost.ai/schema",
+  "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+  "config_store": {
+    "enabled": true,
+    "type": "postgres",
+    "config": {
+      "host": "env.PG_HOST",
+      "port": "5432",
+      "user": "env.PG_USER",
+      "password": "env.PG_PASSWORD",
+      "db_name": "bifrost",
+      "ssl_mode": "require"
+    }
+  },
+
+  "logs_store": {
+    "enabled": true,
+    "type": "sqlite",
+    "config": {
+      "path": "./logs.db"
+    }
+  }
+}
+```
+
+---
+
+## Full Storage Example
+
+```json
+{
+  "$schema": "https://www.getbifrost.ai/schema",
+  "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+  "config_store": {
+    "enabled": true,
+    "type": "postgres",
+    "config": {
+      "host": "env.PG_HOST",
+      "port": "5432",
+      "user": "env.PG_USER",
+      "password": "env.PG_PASSWORD",
+      "db_name": "bifrost",
+      "ssl_mode": "require",
+      "max_idle_conns": 5,
+      "max_open_conns": 50
+    }
+  },
+
+  "logs_store": {
+    "enabled": true,
+    "type": "postgres",
+    "config": {
+      "host": "env.PG_HOST",
+      "port": "5432",
+      "user": "env.PG_USER",
+      "password": "env.PG_PASSWORD",
+      "db_name": "bifrost",
+      "ssl_mode": "require",
+      "max_idle_conns": 10,
+      "max_open_conns": 100
+    },
+    "retention_days": 90,
+    "object_storage": {
+      "type": "s3",
+      "bucket": "env.S3_BUCKET",
+      "region": "us-east-1",
+      "compress": true,
+      "access_key_id": "env.S3_ACCESS_KEY_ID",
+      "secret_access_key": "env.S3_SECRET_ACCESS_KEY"
+    }
+  },
+
+  "vector_store": {
+    "enabled": true,
+    "type": "weaviate",
+    "config": {
+      "scheme": "http",
+      "host": "weaviate:8080"
+    }
+  }
+}
+```