{ "$schema": "https://json-schema.org/draft/2019-09/schema", "$id": "https://www.getbifrost.ai/schema", "title": "Bifrost Configuration Schema", "description": "Schema for Bifrost HTTP transport configuration", "type": "object", "properties": { "$schema": { "type": "string", "description": "The schema version. This should be set to \"https://www.getbifrost.ai/schema\"", "const": "https://www.getbifrost.ai/schema" }, "version": { "type": "integer", "description": "Controls how empty arrays in allow-list fields (models, allowed_models, key_ids, tools_to_execute) are interpreted. Omit or set to 2 for v1.5.0+ semantics: empty = deny all, [\"*\"] = allow all. Set to 1 to restore v1.4.x semantics: empty = allow all.", "enum": [1, 2], "default": 2 }, "encryption_key": { "type": "string", "description": "You can set the value as env. to use an environment variable. We also read encryption key from BIFROST_ENCRYPTION_KEY environment variable. Note: once set, the encryption key cannot be changed unless you clean up the database. Accepts any string; a secure 32-byte AES-256 key will be derived using Argon2id KDF. If not provided, data will be saved in plain text. Recommended: use a passphrase of at least 16 bytes for better security" }, "auth_config": { "$ref": "#/$defs/auth_config" }, "client": { "type": "object", "description": "Client configuration settings", "properties": { "drop_excess_requests": { "type": "boolean", "description": "Whether to drop excess requests when pool is full" }, "initial_pool_size": { "type": "integer", "minimum": 1, "description": "Initial size of the connection pool", "default": 300 }, "prometheus_labels": { "type": "array", "items": { "type": "string" }, "description": "Labels to use for Prometheus metrics" }, "allowed_origins": { "type": "array", "items": { "anyOf": [ { "type": "string", "const": "*" }, { "type": "string", "format": "uri" } ] }, "description": "CORS allowed origins (supports \"*\" or URI strings)" }, "enable_logging": { "type": "boolean", "description": "Enable request/response logging" }, "disable_content_logging": { "type": "boolean", "description": "Disable logging of sensitive content (inputs, outputs, embeddings, etc.)" }, "disable_db_pings_in_health": { "type": "boolean", "description": "Disable DB pings in health check", "default": false }, "log_retention_days": { "type": "integer", "minimum": 1, "description": "Number of days to retain logs", "default": 365 }, "enforce_governance_header": { "type": "boolean", "description": "Deprecated: use enforce_auth_on_inference" }, "enforce_auth_on_inference": { "type": "boolean", "description": "Require auth (VK, API key, or user token) on inference endpoints" }, "enforce_scim_auth": { "type": "boolean", "description": "Deprecated: use enforce_auth_on_inference" }, "allow_direct_keys": { "type": "boolean", "description": "Allow provider keys" }, "max_request_body_size_mb": { "type": "integer", "minimum": 1, "description": "Maximum request body size in MB" }, "compat": { "type": "object", "description": "Compat plugin configuration for request type conversion, parameter dropping, and parameter value conversion", "properties": { "convert_text_to_chat": { "type": "boolean", "description": "Convert text completion requests to chat for models that only support chat" }, "convert_chat_to_responses": { "type": "boolean", "description": "Convert chat completion requests to responses for models that only support responses" }, "should_drop_params": { "type": "boolean", "description": "Drop unsupported parameters based on model catalog allowlist" }, "should_convert_params": { "type": "boolean", "description": "Converts model parameter values that are not supported by the model.", "default": false } }, "additionalProperties": false }, "header_filter_config": { "type": "object", "description": "Global header filtering configuration for x-bf-eh-* headers forwarded to LLM providers", "properties": { "allowlist": { "type": "array", "items": { "type": "string" }, "description": "If non-empty, only these headers (from x-bf-eh-* prefix) are allowed to be forwarded" }, "denylist": { "type": "array", "items": { "type": "string" }, "description": "Headers to always block from being forwarded" } }, "additionalProperties": false }, "async_job_result_ttl": { "type": "integer", "description": "Default TTL for async job results in seconds (default: 3600 = 1 hour)", "default": 3600, "minimum": 1 }, "required_headers": { "type": "array", "items": { "type": "string" }, "description": "Headers that must be present on every request. Requests missing any of these headers are rejected with 400. Case-insensitive matching." }, "logging_headers": { "type": "array", "items": { "type": "string" }, "description": "Headers to capture in log metadata. Values are extracted from incoming requests and stored in the metadata field of log entries." }, "whitelisted_routes": { "type": "array", "items": { "type": "string" }, "description": "Routes that bypass auth middleware. Requests to these exact paths skip authentication checks." }, "hide_deleted_virtual_keys_in_filters": { "type": "boolean", "description": "When true, deleted virtual keys are omitted from logs and MCP logs filter data.", "default": false }, "allowed_headers": { "type": "array", "items": { "type": "string" }, "description": "Additional allowed headers for CORS and WebSocket" }, "mcp_agent_depth": { "type": "integer", "minimum": 1, "description": "Maximum depth for MCP agent mode tool execution", "default": 10 }, "mcp_tool_execution_timeout": { "type": "integer", "minimum": 1, "description": "Timeout for individual MCP tool execution in seconds", "default": 30 }, "mcp_code_mode_binding_level": { "type": "string", "enum": ["server", "tool"], "description": "Code mode binding level for MCP tools" }, "mcp_tool_sync_interval": { "type": "integer", "minimum": 0, "description": "Global tool sync interval in minutes (0 = disabled)", "default": 10 }, "mcp_disable_auto_tool_inject": { "type": "boolean", "description": "When true, MCP tools are not automatically injected into requests. Tools are only included when explicitly specified via request context filters or headers, such as x-bf-mcp-include-tools or x-bf-mcp-include-clients.", "default": false }, "routing_chain_max_depth": { "type": "integer", "minimum": 1, "description": "Maximum depth for routing rule chain evaluation", "default": 10 } }, "additionalProperties": false }, "framework": { "type": "object", "properties": { "pricing": { "$ref": "#/$defs/pricing_config" } }, "additionalProperties": false }, "providers": { "type": "object", "description": "AI provider configurations", "properties": { "openai": { "$ref": "#/$defs/provider" }, "anthropic": { "$ref": "#/$defs/provider" }, "bedrock": { "$ref": "#/$defs/provider_with_bedrock_config" }, "cohere": { "$ref": "#/$defs/provider" }, "azure": { "$ref": "#/$defs/provider_with_azure_config" }, "vertex": { "$ref": "#/$defs/provider_with_vertex_config" }, "mistral": { "$ref": "#/$defs/provider" }, "ollama": { "$ref": "#/$defs/provider_with_ollama_config" }, "groq": { "$ref": "#/$defs/provider" }, "gemini": { "$ref": "#/$defs/provider" }, "openrouter": { "$ref": "#/$defs/provider" }, "sgl": { "$ref": "#/$defs/provider_with_sgl_config" }, "parasail": { "$ref": "#/$defs/provider" }, "perplexity": { "$ref": "#/$defs/provider" }, "replicate": { "$ref": "#/$defs/provider_with_replicate_config" }, "elevenlabs": { "$ref": "#/$defs/provider" }, "cerebras": { "$ref": "#/$defs/provider" }, "vllm": { "$ref": "#/$defs/provider_with_vllm_config" }, "huggingface": { "$ref": "#/$defs/provider" }, "fireworks": { "$ref": "#/$defs/provider" }, "nebius": { "$ref": "#/$defs/provider" }, "xai": { "$ref": "#/$defs/provider" }, "runway": { "$ref": "#/$defs/provider" } }, "additionalProperties": true }, "governance": { "type": "object", "description": "Governance configuration for budgets, rate limits, customers, teams, virtual keys, and routing rules", "properties": { "budgets": { "type": "array", "description": "Budget configurations", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Budget ID" }, "max_limit": { "type": "number", "description": "Maximum budget limit in dollars" }, "reset_duration": { "type": "string", "description": "Budget reset duration (e.g., '30s', '5m', '1h', '1d', '1w', '1M', '1Y')" }, "current_usage": { "type": "number", "description": "Current usage in dollars", "default": 0 }, "last_reset": { "type": "string", "format": "date-time", "description": "Last time budget was reset" }, "virtual_key_id": { "type": "string", "description": "ID of the virtual key this budget belongs to (mutually exclusive with provider_config_id)" }, "provider_config_id": { "type": "integer", "description": "ID of the provider config this budget belongs to (mutually exclusive with virtual_key_id)" }, "team_id": { "type": "string", "description": "ID of the team this budget belongs to (mutually exclusive with virtual_key_id and provider_config_id)" }, "calendar_aligned": { "type": "boolean", "description": "Snap reset windows to clean calendar boundaries (day, week, month, year)", "default": false } }, "required": ["id", "max_limit", "reset_duration"], "additionalProperties": false } }, "rate_limits": { "type": "array", "description": "Rate limit configurations", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Rate limit ID" }, "token_max_limit": { "type": "integer", "description": "Maximum tokens allowed" }, "token_reset_duration": { "type": "string", "description": "Token reset duration (e.g., '30s', '5m', '1h', '1d', '1w', '1M', '1Y')" }, "token_current_usage": { "type": "integer", "description": "Current token usage", "default": 0 }, "token_last_reset": { "type": "string", "format": "date-time", "description": "Last time token counter was reset" }, "request_max_limit": { "type": "integer", "description": "Maximum requests allowed" }, "request_reset_duration": { "type": "string", "description": "Request reset duration (e.g., '30s', '5m', '1h', '1d', '1w', '1M', '1Y')" }, "request_current_usage": { "type": "integer", "description": "Current request usage", "default": 0 }, "request_last_reset": { "type": "string", "format": "date-time", "description": "Last time request counter was reset" }, "calendar_aligned": { "type": "boolean", "description": "Snap reset windows to clean calendar boundaries (day, week, month, year)", "default": false } }, "required": ["id"], "additionalProperties": false } }, "customers": { "type": "array", "description": "Customer configurations", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Customer ID" }, "name": { "type": "string", "description": "Customer name" }, "budget_id": { "type": "string", "description": "Associated budget ID" }, "rate_limit_id": { "type": "string", "description": "Associated rate limit ID" } }, "required": ["id", "name"], "additionalProperties": false } }, "teams": { "type": "array", "description": "Team configurations", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Team ID" }, "name": { "type": "string", "description": "Team name" }, "customer_id": { "type": "string", "description": "Associated customer ID" }, "budget_id": { "type": "string", "description": "Associated budget ID" }, "rate_limit_id": { "type": "string", "description": "Associated rate limit ID" }, "profile": { "type": "object", "description": "Team profile data" }, "config": { "type": "object", "description": "Team configuration data" }, "claims": { "type": "object", "description": "Team claims data" }, "virtual_key_count": { "type": "integer", "description": "Computed count of virtual keys associated with this team", "minimum": 0, "readOnly": true } }, "required": ["id", "name"], "additionalProperties": false } }, "virtual_keys": { "type": "array", "description": "Virtual key configurations", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Virtual key ID" }, "name": { "type": "string", "description": "Virtual key name" }, "description": { "type": "string", "description": "Virtual key description" }, "value": { "type": "string", "description": "The virtual key value" }, "is_active": { "type": "boolean", "description": "Whether the virtual key is active", "default": true }, "calendar_aligned": { "type": "boolean", "description": "Snap all budget resets to calendar boundaries (day, week, month, year)", "default": false }, "team_id": { "type": "string", "description": "Associated team ID (mutually exclusive with customer_id)" }, "customer_id": { "type": "string", "description": "Associated customer ID (mutually exclusive with team_id)" }, "rate_limit_id": { "type": "string", "description": "Associated rate limit ID" }, "provider_configs": { "type": "array", "description": "Provider configurations for this virtual key (empty means no providers allowed, deny-by-default)", "items": { "$ref": "#/$defs/virtual_key_provider_config" } }, "mcp_configs": { "type": "array", "description": "MCP configurations for this virtual key (empty array means no MCP tools allowed, deny-by-default)", "items": { "$ref": "#/$defs/virtual_key_mcp_config" } } }, "required": ["id", "name"], "additionalProperties": false } }, "routing_rules": { "type": "array", "description": "Routing rules for dynamic provider/model selection based on CEL expressions", "items": { "$ref": "#/$defs/routing_rule" } }, "pricing_overrides": { "type": "array", "description": "Scoped pricing overrides applied at runtime by the model catalog", "items": { "$ref": "#/$defs/provider_pricing_override" } }, "auth_config": { "$ref": "#/$defs/auth_config" }, "model_configs": { "type": "array", "description": "Per-model rate limit and budget configurations", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Model config ID" }, "model_name": { "type": "string", "description": "Model name to apply the configuration to" }, "provider": { "type": "string", "description": "Optional provider name to scope this config" }, "budget_id": { "type": "string", "description": "Budget ID to associate with this model" }, "rate_limit_id": { "type": "string", "description": "Rate limit ID to associate with this model" } }, "required": ["id", "model_name"], "additionalProperties": false } }, "providers": { "type": "array", "description": "Provider-level governance configurations", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Provider row ID" }, "name": { "type": "string", "description": "Provider name" }, "description": { "type": "string", "description": "Operator-facing provider description" }, "budget_id": { "type": "string", "description": "Associated budget ID" }, "rate_limit_id": { "type": "string", "description": "Associated rate limit ID" }, "send_back_raw_request": { "type": "boolean", "description": "Include raw request in BifrostResponse" }, "send_back_raw_response": { "type": "boolean", "description": "Include raw response in BifrostResponse" }, "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" }, "network_config": { "$ref": "#/$defs/network_config" }, "proxy_config": { "$ref": "#/$defs/proxy_config" }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" }, "concurrency_and_buffer_size": { "$ref": "#/$defs/concurrency_and_buffer_size" }, "openai_config": { "$ref": "#/$defs/openai_config" } }, "required": ["name"] } } }, "additionalProperties": false }, "mcp": { "type": "object", "description": "Model Context Protocol configuration", "properties": { "client_configs": { "type": "array", "items": { "$ref": "#/$defs/mcp_client_config" }, "description": "MCP client configurations" }, "tool_manager_config": { "$ref": "#/$defs/mcp_tool_manager_config" }, "tool_sync_interval": { "type": "string", "description": "Global default interval for syncing tools from MCP servers (Go duration, e.g. '10m', '1h')" } }, "additionalProperties": false }, "vector_store": { "type": "object", "description": "Vector store configuration for caching", "properties": { "enabled": { "type": "boolean", "description": "Enable vector store" }, "type": { "type": "string", "enum": ["weaviate", "redis", "qdrant", "pinecone"], "description": "Vector store type (use \"redis\" for Redis or Valkey-compatible endpoints)" }, "config": { "anyOf": [ { "if": { "properties": { "type": { "const": "weaviate" } } }, "then": { "$ref": "#/$defs/weaviate_config" } }, { "if": { "properties": { "type": { "const": "redis" } } }, "then": { "$ref": "#/$defs/redis_config" } }, { "if": { "properties": { "type": { "const": "qdrant" } } }, "then": { "$ref": "#/$defs/qdrant_config" } }, { "if": { "properties": { "type": { "const": "pinecone" } } }, "then": { "$ref": "#/$defs/pinecone_config" } } ] } }, "additionalProperties": false }, "config_store": { "type": "object", "description": "Configuration store settings", "properties": { "enabled": { "type": "boolean", "description": "Enable configuration store" }, "type": { "type": "string", "enum": ["sqlite", "postgres"], "description": "Configuration store type" }, "config": { "anyOf": [ { "if": { "properties": { "type": { "const": "sqlite" } } }, "then": { "type": "object", "properties": { "path": { "type": "string", "description": "Database file path" } }, "required": ["path"], "additionalProperties": false } }, { "if": { "properties": { "type": { "const": "postgres" } } }, "then": { "type": "object", "properties": { "host": { "type": "string", "description": "Database host" }, "port": { "type": "string", "description": "Database port" }, "user": { "type": "string", "description": "Database user" }, "password": { "type": "string", "description": "Database password. Leave empty if you want to use IAM role authentication." }, "db_name": { "type": "string", "description": "Database name" }, "ssl_mode": { "type": "string", "description": "Database SSL mode" }, "max_idle_conns": { "type": "integer", "description": "Maximum number of idle connections in the pool (default: 5)", "minimum": 0, "default": 5 }, "max_open_conns": { "type": "integer", "description": "Maximum number of open connections to the database (default: 50)", "minimum": 2, "default": 50 } }, "required": ["host", "port", "user", "password", "db_name", "ssl_mode"], "additionalProperties": false } } ] } }, "additionalProperties": false }, "logs_store": { "type": "object", "description": "Logs store settings", "properties": { "enabled": { "type": "boolean", "description": "Enable logs store" }, "type": { "type": "string", "enum": ["sqlite", "postgres"], "description": "Logs store type" }, "config": { "type": "object", "oneOf": [ { "if": { "properties": { "../type": { "const": "sqlite" } } }, "then": { "properties": { "path": { "type": "string", "description": "Database file path" } }, "required": ["path"], "additionalProperties": false } }, { "if": { "properties": { "../type": { "const": "postgres" } } }, "then": { "properties": { "host": { "type": "string", "description": "Database host" }, "port": { "type": "string", "description": "Database port" }, "user": { "type": "string", "description": "Database user" }, "password": { "type": "string", "description": "Database password. Leave empty if you want to use IAM role authentication." }, "db_name": { "type": "string", "description": "Database name" }, "ssl_mode": { "type": "string", "description": "Database SSL mode" }, "max_idle_conns": { "type": "integer", "description": "Maximum number of idle connections in the pool (default: 5)", "minimum": 0, "default": 5 }, "max_open_conns": { "type": "integer", "description": "Maximum number of open connections to the database (default: 50)", "minimum": 2, "default": 50 } }, "required": ["host", "port", "user", "password", "db_name", "ssl_mode"], "additionalProperties": false } } ] }, "object_storage": { "type": "object", "description": "Optional object storage for offloading log payloads. When configured, large request/response payloads are stored in S3/GCS while the DB keeps only lightweight index data.", "properties": { "type": { "type": "string", "enum": ["s3", "gcs"], "description": "Object storage backend type" }, "bucket": { "type": "string", "minLength": 1, "description": "Bucket name. Supports env var reference (e.g. env.S3_BUCKET)" }, "prefix": { "type": "string", "description": "Key prefix for stored objects (default: bifrost)", "default": "bifrost" }, "compress": { "type": "boolean", "description": "Enable gzip compression for stored objects. Default: false", "default": false } }, "required": ["type", "bucket"], "if": { "properties": { "type": { "const": "s3" } } }, "then": { "properties": { "type": true, "bucket": true, "prefix": true, "region": { "type": "string", "description": "AWS region. Supports env var reference" }, "endpoint": { "type": "string", "description": "Custom S3-compatible endpoint for MinIO/R2. Supports env var reference" }, "access_key_id": { "type": "string", "description": "AWS access key ID. Omit to use default credential chain (instance role, env vars, etc.). Supports env var reference" }, "secret_access_key": { "type": "string", "description": "AWS secret access key. Supports env var reference" }, "session_token": { "type": "string", "description": "AWS session token for STS temporary credentials. Supports env var reference" }, "role_arn": { "type": "string", "description": "AWS IAM role ARN for STS AssumeRole. Works with static creds or instance role. Supports env var reference" }, "force_path_style": { "type": "boolean", "description": "Use path-style URLs for S3 (required for MinIO). Default: false", "default": false }, "compress": true }, "dependentRequired": { "access_key_id": ["secret_access_key"], "secret_access_key": ["access_key_id"], "session_token": ["access_key_id", "secret_access_key"] }, "additionalProperties": false }, "else": { "properties": { "type": true, "bucket": true, "prefix": true, "credentials_json": { "type": "string", "description": "GCP service account credentials JSON or file path. Omit to use Application Default Credentials. Supports env var reference" }, "credentials": { "type": "string", "description": "Deprecated: use credentials_json. Kept for backwards compatibility." }, "project_id": { "type": "string", "description": "GCP project ID override. Supports env var reference" }, "compress": true }, "additionalProperties": false } }, "retention_days": { "type": "integer", "minimum": 0, "description": "Days to retain log entries. 0 disables retention-based cleanup." } }, "additionalProperties": false }, "plugins": { "type": "array", "description": "Plugins configuration", "items": { "type": "object", "required": ["enabled", "name"], "properties": { "enabled": { "type": "boolean", "description": "Enable plugins" }, "name": { "type": "string", "description": "Name of the plugin (built-in: telemetry, prompts, logging, governance, maxim, semantic_cache, otel, or custom plugin name)" }, "config": { "type": "object", "description": "Configuration for the plugin" }, "path": { "type": "string", "description": "Path to the plugin (optional, required for dynamic plugins)", "optional": true }, "version": { "type": "integer", "minimum": 1, "maximum": 32767, "description": "DB-Backed Only. Version metadata persisted on TablePlugin (default: 1). In DB-backed sync, version metadata is considered for plugin replacement/reload decisions.", "optional": true, "default": 1 }, "placement": { "type": "string", "enum": ["pre_builtin", "post_builtin", "builtin"], "description": "DB-Backed Only. Whether this plugin runs before, after, or as a built-in. Default: post_builtin. Ignored in config.json.", "optional": true, "default": "post_builtin" }, "order": { "type": "integer", "description": "DB-Backed Only. Position within placement group. Lower values execute earlier. Default: 0. Ignored in config.json.", "optional": true, "default": 0 } }, "allOf": [ { "if": { "properties": { "name": { "const": "telemetry" } } }, "then": { "required": ["config"], "properties": { "config": { "type": "object", "description": "Configuration for the telemetry plugin (Prometheus metrics)", "properties": { "custom_labels": { "type": "array", "items": { "type": "string" }, "description": "Custom labels to add to Prometheus metrics" }, "push_gateway": { "type": "object", "description": "Configuration for pushing metrics to a Prometheus Push Gateway for multi-node cluster deployments", "properties": { "enabled": { "type": "boolean", "description": "Whether to enable pushing metrics to the Push Gateway", "default": false }, "push_gateway_url": { "type": "string", "description": "URL of the Prometheus Push Gateway (e.g., http://pushgateway:9091)", "format": "uri" }, "job_name": { "type": "string", "description": "Job label for pushed metrics", "default": "bifrost" }, "instance_id": { "type": "string", "description": "Instance identifier for grouping metrics. If not set, hostname is used automatically." }, "push_interval": { "type": "integer", "description": "How often to push metrics in seconds", "default": 15, "minimum": 1, "maximum": 300 }, "basic_auth": { "type": "object", "description": "Basic authentication credentials for the Push Gateway", "properties": { "username": { "type": "string", "description": "Username for basic authentication" }, "password": { "type": "string", "description": "Password for basic authentication" } }, "required": ["username", "password"] } }, "required": ["push_gateway_url"] } }, "additionalProperties": false } } } }, { "if": { "properties": { "name": { "const": "logging" } } }, "then": { "required": ["config"], "properties": { "config": { "type": "object", "description": "Configuration for the logging plugin", "properties": { "disable_content_logging": { "type": "boolean", "description": "Disable logging of request and response content" }, "logging_headers": { "type": "array", "items": { "type": "string" }, "description": "List of headers to capture in log metadata" } }, "additionalProperties": false } } } }, { "if": { "properties": { "name": { "const": "governance" } } }, "then": { "required": ["config"], "properties": { "config": { "type": "object", "description": "Configuration for the governance plugin", "properties": { "is_vk_mandatory": { "type": "boolean", "description": "Whether virtual key (x-bf-vk header) is mandatory for all requests" }, "required_headers": { "type": "array", "items": { "type": "string" }, "description": "List of headers that must be present in requests" }, "is_enterprise": { "type": "boolean", "description": "Enable enterprise mode features" } }, "additionalProperties": false } } } }, { "if": { "properties": { "name": { "const": "maxim" } } }, "then": { "required": ["config"], "properties": { "config": { "type": "object", "description": "Configuration for the Maxim SDK integration plugin", "properties": { "api_key": { "type": "string", "description": "API key for Maxim SDK authentication" }, "log_repo_id": { "type": "string", "description": "Optional default ID for the Maxim logger instance" } }, "required": ["api_key"], "additionalProperties": false } } } }, { "if": { "properties": { "name": { "const": "semantic_cache" } } }, "then": { "required": ["config"], "properties": { "config": { "type": "object", "description": "Configuration for the semantic cache plugin", "properties": { "provider": { "type": "string", "minLength": 1, "description": "Provider to use for generating embeddings. Required for semantic search; omit it for direct hash mode with dimension: 1.", "enum": [ "openai", "anthropic", "gemini", "bedrock", "azure", "cohere", "mistral", "groq", "ollama", "openrouter", "vertex", "cerebras", "vllm", "parasail", "perplexity", "replicate", "sgl", "huggingface" ] }, "keys": { "type": "array", "description": "API keys for the embedding provider. These are injected at runtime for config-driven setups and are not needed for direct caching with dimension: 1.", "items": { "type": "string" } }, "embedding_model": { "type": "string", "description": "Model to use for generating embeddings in provider-backed semantic caching. Required when provider is set and not allowed in direct-only mode." }, "cleanup_on_shutdown": { "type": "boolean", "description": "Clean up cache on shutdown (default: false)" }, "ttl": { "description": "Time-to-live for cached responses (supports duration strings like '5m', '1h' or seconds as number, default: 5min)", "oneOf": [ { "type": "string", "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$" }, { "type": "integer", "minimum": 0 } ] }, "threshold": { "type": "number", "description": "Cosine similarity threshold for semantic matching (default: 0.8)", "minimum": 0, "maximum": 1 }, "vector_store_namespace": { "type": "string", "description": "Namespace for vector store (optional)" }, "dimension": { "type": "integer", "description": "Dimension for vector store embeddings. Use 1 for direct (hash-based) caching without an embedding provider.", "minimum": 1 }, "default_cache_key": { "type": "string", "description": "Default cache key used when no per-request key is provided. When set, all requests without an explicit x-bf-cache-key header will use this value and be cached automatically." }, "conversation_history_threshold": { "type": "integer", "description": "Skip caching for requests with more than this number of messages in conversation history (default: 3)", "minimum": 0 }, "cache_by_model": { "type": "boolean", "description": "Include model in cache key (default: true)" }, "cache_by_provider": { "type": "boolean", "description": "Include provider in cache key (default: true)" }, "exclude_system_prompt": { "type": "boolean", "description": "Exclude system prompt in cache key (default: false)" } }, "required": ["dimension"], "allOf": [ { "if": { "properties": { "provider": { "type": "string", "minLength": 1 } }, "required": ["provider"] }, "then": { "required": ["provider", "embedding_model"], "properties": { "dimension": { "type": "integer", "minimum": 2 } } }, "else": { "not": { "required": ["embedding_model"] }, "properties": { "dimension": { "const": 1 } } } } ], "additionalProperties": false } } } }, { "if": { "properties": { "name": { "const": "otel" } } }, "then": { "required": ["config"], "properties": { "config": { "type": "object", "description": "Configuration for the OpenTelemetry plugin", "properties": { "service_name": { "type": "string", "description": "Service name to be used for tracing", "default": "bifrost" }, "collector_url": { "type": "string", "description": "URL of the OpenTelemetry collector", "oneOf": [ { "format": "uri" }, { "pattern": "^[^:\\s]+:\\d+$" } ] }, "trace_type": { "type": "string", "description": "Type of trace to use for the OTEL collector", "enum": ["genai_extension", "vercel", "open_inference"] }, "protocol": { "type": "string", "description": "Protocol to use for the OTEL collector", "enum": ["http", "grpc"] }, "metrics_enabled": { "type": "boolean", "description": "Enable push-based metrics export via OTLP. Recommended for multi-node cluster deployments.", "default": false }, "metrics_endpoint": { "type": "string", "description": "OTLP metrics endpoint URL (e.g., http://otel-collector:4318/v1/metrics for HTTP or otel-collector:4317 for gRPC)", "oneOf": [ { "format": "uri" }, { "pattern": "^[^:\\s]+:\\d+$" } ] }, "metrics_push_interval": { "type": "integer", "description": "Metrics push interval in seconds", "default": 15, "minimum": 1, "maximum": 300 }, "headers": { "type": "object", "additionalProperties": { "type": "string" }, "description": "Custom headers for the collector. Supports env.VAR_NAME prefix for environment variable substitution." }, "tls_ca_cert": { "type": "string", "description": "Path to TLS CA certificate file" }, "insecure": { "type": "boolean", "description": "Skip TLS verification (ignored if tls_ca_cert is set)" } }, "required": ["collector_url", "trace_type", "protocol"], "additionalProperties": false } } } }, { "if": { "properties": { "name": { "const": "datadog" } } }, "then": { "required": ["config"], "properties": { "config": { "type": "object", "description": "Configuration for the Datadog APM and metrics plugin", "properties": { "service_name": { "type": "string", "description": "Name of the service to report to Datadog", "default": "bifrost" }, "agent_addr": { "type": "string", "description": "Address of the Datadog Agent for APM traces", "default": "localhost:8126" }, "env": { "type": "string", "description": "Environment tag (e.g., production, staging)" }, "version": { "type": "string", "description": "Service version tag" }, "custom_tags": { "type": "object", "additionalProperties": { "type": "string" }, "description": "Additional tags to add to all traces and metrics" }, "enable_traces": { "type": "boolean", "description": "Enable APM traces (default: true)", "default": true } }, "additionalProperties": false } } } } ], "additionalProperties": false } }, "websocket": { "$ref": "#/$defs/websocket_config" }, "guardrails_config": { "$ref": "#/$defs/guardrails_config" }, "audit_logs": { "$ref": "#/$defs/audit_logs_config" }, "cluster_config": { "$ref": "#/$defs/cluster_config" }, "load_balancer_config": { "$ref": "#/$defs/load_balancer_config" }, "large_payload_optimization": { "$ref": "#/$defs/large_payload_optimization" }, "scim_config": { "$ref": "#/$defs/scim_config" }, "access_profiles": { "$ref": "#/$defs/access_profiles" } }, "additionalProperties": false, "$defs": { "routing_target": { "type": "object", "description": "A single weighted routing target within a rule. All fields except weight are optional; omitting provider or model means use the incoming request value. Weights across all targets in a rule must sum to 1.", "properties": { "provider": { "type": "string", "minLength": 1, "description": "Target provider name (e.g., 'openai', 'azure'). Omit to use the incoming request provider" }, "model": { "type": "string", "minLength": 1, "description": "Target model name. Omit to use the incoming request model" }, "key_id": { "type": "string", "minLength": 1, "description": "Optional API key UUID to pin for this target. Omit for load-balanced key selection" }, "weight": { "type": "number", "description": "Probability weight for this target (must be > 0). All weights in a rule must sum to 1", "exclusiveMinimum": 0, "maximum": 1 } }, "required": ["weight"], "additionalProperties": false }, "routing_rule": { "type": "object", "description": "Routing rule for dynamic provider/model selection", "properties": { "id": { "type": "string", "description": "Unique routing rule ID" }, "name": { "type": "string", "description": "Human-readable rule name" }, "description": { "type": "string", "description": "Optional description of the rule" }, "enabled": { "type": "boolean", "description": "Whether the rule is enabled", "default": true }, "cel_expression": { "type": "string", "description": "CEL (Common Expression Language) expression for rule evaluation" }, "chain_rule": { "type": "boolean", "default": false, "description": "If true, re-evaluates routing chain after this rule matches" }, "targets": { "type": "array", "minItems": 1, "description": "Weighted routing targets. Weights must sum to 1. Omit provider or model to use the incoming request value.", "items": { "$ref": "#/$defs/routing_target" } }, "provider": false, "model": false, "fallbacks": { "type": "array", "description": "Fallback provider chain in order", "items": { "type": "string" } }, "scope": { "type": "string", "enum": ["global", "team", "customer", "virtual_key"], "description": "Rule scope level", "default": "global" }, "scope_id": { "type": "string", "description": "Entity ID for non-global scopes (required for non-global scope)" }, "priority": { "type": "integer", "description": "Evaluation priority within scope (lower = earlier)", "default": 0 }, "query": { "type": "object", "description": "Additional query parameters", "additionalProperties": true } }, "required": ["id", "name", "targets"], "additionalProperties": false, "if": { "properties": { "scope": { "enum": ["team", "customer", "virtual_key"] } }, "required": ["scope"] }, "then": { "required": ["scope_id"], "properties": { "scope_id": { "type": "string", "minLength": 1 } } } }, "virtual_key_provider_config": { "type": "object", "description": "Provider configuration for a virtual key", "properties": { "id": { "type": "integer", "description": "Provider config ID" }, "virtual_key_id": { "type": "string", "description": "Associated virtual key ID" }, "provider": { "type": "string", "description": "Provider name" }, "weight": { "type": ["number", "null"], "description": "Weight for load balancing (null opts out of weighted routing)", "default": null }, "allowed_models": { "type": "array", "description": "Allowed models for this provider config. Use [\"*\"] to allow all models; empty array denies all (deny-by-default).", "items": { "type": "string" } }, "rate_limit_id": { "type": "string", "description": "Associated rate limit ID" }, "key_ids": { "type": "array", "description": "Key identifiers allowed for this provider config. Use [\"*\"] to allow all keys; empty array denies all (deny-by-default). In config.json, values are key names. Via the API, values are key UUIDs.", "items": { "type": "string" } } }, "required": ["provider"], "additionalProperties": false }, "virtual_key_mcp_config": { "type": "object", "description": "MCP configuration for a virtual key", "properties": { "id": { "type": "integer", "description": "MCP config ID" }, "virtual_key_id": { "type": "string", "description": "Associated virtual key ID" }, "mcp_client_id": { "type": "integer", "description": "Associated MCP client ID (database format)" }, "mcp_client_name": { "type": "string", "description": "MCP client name (config file format \u2014 resolved to mcp_client_id at startup)" }, "tools_to_execute": { "type": "array", "description": "Include-only list of tools this Virtual Key is permitted to execute from this MCP client. ['*'] means all tools allowed, [] means no tools allowed (deny-by-default).", "items": { "type": "string" } } }, "additionalProperties": false }, "auth_config": { "type": "object", "description": "Authentication configuration. Deprecated: Use governance.auth_config instead.", "properties": { "admin_username": { "type": "string", "description": "Admin username" }, "admin_password": { "type": "string", "description": "Admin password" }, "is_enabled": { "type": "boolean", "description": "Whether authentication is enabled" }, "disable_auth_on_inference": { "type": "boolean", "description": "Whether authentication is disabled on inference" } }, "additionalProperties": false }, "pricing_config": { "type": "object", "properties": { "pricing_url": { "type": "string", "description": "Pricing URL", "optional": true, "format": "uri" }, "pricing_sync_interval": { "type": "integer", "description": "Pricing sync interval in seconds. Default is 24 hours. Minimum is 3600 seconds (1 hour).", "default": 86400, "optional": true, "minimum": 3600 } }, "additionalProperties": false }, "network_config": { "type": "object", "properties": { "base_url": { "type": "string", "format": "uri", "description": "Base URL for the provider (optional, required for Ollama)" }, "extra_headers": { "type": "object", "additionalProperties": { "type": "string" }, "description": "Additional headers to send with requests" }, "default_request_timeout_in_seconds": { "type": "integer", "minimum": 1, "description": "Default request timeout in seconds" }, "max_retries": { "type": "integer", "minimum": 0, "description": "Maximum number of retries" }, "retry_backoff_initial": { "type": "integer", "minimum": 0, "description": "Initial retry backoff in milliseconds" }, "retry_backoff_max": { "type": "integer", "minimum": 0, "description": "Maximum retry backoff in milliseconds" }, "enforce_http2": { "type": "boolean", "description": "Force HTTP/2 on provider connections (relevant for Bedrock and other net/http-based providers)" }, "insecure_skip_verify": { "type": "boolean", "description": "Disable TLS certificate verification for provider connections. This bypasses server certificate validation and should be used only as a last resort when a trusted CA chain cannot be configured. Prefer ca_cert_pem for self-signed or private CA deployments." }, "ca_cert_pem": { "type": "string", "description": "PEM-encoded CA certificate to trust for provider endpoint connections (e.g. self-signed or internal CA)" }, "stream_idle_timeout_in_seconds": { "type": "integer", "minimum": 5, "maximum": 3600, "description": "Idle timeout per stream chunk in seconds. If no data is received for this many seconds, the stream is closed. Default: 60." }, "max_conns_per_host": { "type": "integer", "minimum": 1, "maximum": 10000, "description": "Maximum number of TCP connections per provider host. For HTTP/2 (e.g. Bedrock), each connection supports ~100 concurrent streams. Default: 5000." }, "beta_header_overrides": { "type": "object", "additionalProperties": { "type": "boolean" }, "description": "Override default Anthropic beta header support per provider. Keys are header prefixes (e.g. 'redact-thinking-'), values are true (supported) or false (unsupported). Headers not listed use the built-in defaults." } }, "additionalProperties": false }, "network_config_without_base_url": { "type": "object", "properties": { "extra_headers": { "type": "object", "additionalProperties": { "type": "string" }, "description": "Additional headers to send with requests" }, "default_request_timeout_in_seconds": { "type": "integer", "minimum": 1, "description": "Default request timeout in seconds" }, "max_retries": { "type": "integer", "minimum": 0, "description": "Maximum number of retries" }, "retry_backoff_initial": { "type": "integer", "minimum": 0, "description": "Initial retry backoff in milliseconds" }, "retry_backoff_max": { "type": "integer", "minimum": 0, "description": "Maximum retry backoff in milliseconds" }, "enforce_http2": { "type": "boolean", "description": "Force HTTP/2 on provider connections (relevant for Bedrock and other net/http-based providers)" }, "insecure_skip_verify": { "type": "boolean", "description": "Disable TLS certificate verification for provider connections. This bypasses server certificate validation and should be used only as a last resort when a trusted CA chain cannot be configured. Prefer ca_cert_pem for self-signed or private CA deployments." }, "ca_cert_pem": { "type": "string", "description": "PEM-encoded CA certificate to trust for provider endpoint connections (e.g. self-signed or internal CA)" }, "stream_idle_timeout_in_seconds": { "type": "integer", "minimum": 5, "maximum": 3600, "description": "Idle timeout per stream chunk in seconds. If no data is received for this many seconds, the stream is closed. Default: 60." }, "max_conns_per_host": { "type": "integer", "minimum": 1, "maximum": 10000, "description": "Maximum number of TCP connections per provider host. For HTTP/2 (e.g. Bedrock), each connection supports ~100 concurrent streams. Default: 5000." }, "beta_header_overrides": { "type": "object", "additionalProperties": { "type": "boolean" }, "description": "Override default Anthropic beta header support per provider. Keys are header prefixes (e.g. 'redact-thinking-'), values are true (supported) or false (unsupported). Headers not listed use the built-in defaults." } }, "additionalProperties": false }, "openai_config": { "type": "object", "description": "OpenAI-specific provider settings", "properties": { "disable_store": { "type": "boolean", "description": "Disable OpenAI Responses API conversation storage." } }, "additionalProperties": false }, "concurrency_and_buffer_size": { "type": "object", "properties": { "concurrency": { "type": "integer", "minimum": 1, "description": "Number of concurrent requests" }, "buffer_size": { "type": "integer", "minimum": 1, "description": "Buffer size for requests" } }, "required": ["concurrency", "buffer_size"], "additionalProperties": false }, "base_key": { "type": "object", "properties": { "name": { "type": "string", "description": "Name of the key" }, "value": { "type": "string", "description": "API key value (can use env. prefix)" }, "models": { "type": "array", "items": { "type": "string" }, "description": "Models this key can access. Use [\"*\"] to allow all models; empty array denies all (deny-by-default)." }, "weight": { "type": "number", "minimum": 0, "description": "Weight for load balancing" }, "use_for_batch_api": { "type": "boolean", "description": "Whether this key can be used for batch API operations (default: false)", "default": false }, "aliases": { "type": "object", "additionalProperties": { "type": "string", "minLength": 1 }, "propertyNames": { "minLength": 1 }, "description": "Model alias mappings: maps a model name to a provider-specific identifier (deployment name, inference profile ID, fine-tuned model ID, etc.)" } }, "required": ["name", "weight"] }, "bedrock_key": { "allOf": [ { "$ref": "#/$defs/base_key" }, { "type": "object", "properties": { "bedrock_key_config": { "type": "object", "properties": { "access_key": { "type": "string", "description": "AWS access key (can use env. prefix)" }, "secret_key": { "type": "string", "description": "AWS secret key (can use env. prefix)" }, "session_token": { "type": "string", "description": "AWS session token (can use env. prefix)" }, "region": { "type": "string", "description": "AWS region" }, "arn": { "type": "string", "description": "AWS ARN" }, "role_arn": { "type": "string", "description": "AWS IAM role ARN for AssumeRole (can use env. prefix)" }, "external_id": { "type": "string", "description": "External ID for AssumeRole (can use env. prefix)" }, "session_name": { "type": "string", "description": "Role session name for AssumeRole (can use env. prefix)" }, "deployments": { "type": "object", "additionalProperties": { "type": "string" }, "description": "Model to deployment mappings" }, "batch_s3_config": { "type": "object", "description": "S3 bucket configuration for Bedrock batch operations", "properties": { "buckets": { "type": "array", "description": "List of S3 bucket configurations", "items": { "type": "object", "properties": { "bucket_name": { "type": "string", "description": "S3 bucket name" }, "prefix": { "type": "string", "description": "S3 key prefix for batch files" }, "is_default": { "type": "boolean", "description": "Whether this is the default bucket for batch operations" } }, "required": ["bucket_name"], "additionalProperties": false } } }, "additionalProperties": false } }, "required": ["region"], "additionalProperties": false } } } ] }, "vllm_key": { "allOf": [ { "$ref": "#/$defs/base_key" }, { "type": "object", "properties": { "vllm_key_config": { "type": "object", "properties": { "url": { "type": "string", "minLength": 1, "description": "VLLM server base URL (can use env. prefix)" }, "model_name": { "type": "string", "minLength": 1, "description": "Exact model name served on this VLLM instance" } }, "required": ["url", "model_name"], "additionalProperties": false } }, "required": ["vllm_key_config"] } ] }, "replicate_key": { "allOf": [ { "$ref": "#/$defs/base_key" }, { "type": "object", "properties": { "replicate_key_config": { "type": "object", "properties": { "use_deployments_endpoint": { "type": "boolean", "description": "Whether to use the deployments endpoint instead of the models endpoint (default: false)" } }, "additionalProperties": false } } } ] }, "ollama_key": { "allOf": [ { "$ref": "#/$defs/base_key" }, { "type": "object", "properties": { "ollama_key_config": { "type": "object", "properties": { "url": { "type": "string", "minLength": 1, "description": "Ollama server base URL (can use env. prefix)" } }, "required": ["url"], "additionalProperties": false } }, "required": ["ollama_key_config"] } ] }, "sgl_key": { "allOf": [ { "$ref": "#/$defs/base_key" }, { "type": "object", "properties": { "sgl_key_config": { "type": "object", "properties": { "url": { "type": "string", "minLength": 1, "description": "SGLang server base URL (can use env. prefix)" } }, "required": ["url"], "additionalProperties": false } }, "required": ["sgl_key_config"] } ] }, "azure_key": { "allOf": [ { "$ref": "#/$defs/base_key" }, { "type": "object", "properties": { "azure_key_config": { "type": "object", "properties": { "endpoint": { "type": "string", "description": "Azure endpoint (can use env. prefix)" }, "api_version": { "type": "string", "description": "Azure API version" } }, "required": ["endpoint", "api_version"], "additionalProperties": false } }, "required": ["azure_key_config"] } ] }, "vertex_key": { "allOf": [ { "$ref": "#/$defs/base_key" }, { "type": "object", "properties": { "vertex_key_config": { "type": "object", "properties": { "project_id": { "type": "string", "description": "Google Cloud project ID (can use env. prefix)" }, "project_number": { "type": "string", "description": "Google Cloud project number" }, "region": { "type": "string", "description": "Google Cloud region" }, "auth_credentials": { "type": "string", "description": "Authentication credentials (can use env. prefix)" } }, "required": ["project_id", "region"], "additionalProperties": false } }, "required": ["vertex_key_config"] } ] }, "provider": { "type": "object", "properties": { "keys": { "type": "array", "items": { "$ref": "#/$defs/base_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" }, "send_back_raw_request": { "type": "boolean", "description": "Include raw request in BifrostResponse (default: false)" }, "send_back_raw_response": { "type": "boolean", "description": "Include raw response in BifrostResponse (default: false)" }, "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" } }, "required": ["keys"], "additionalProperties": false }, "provider_with_bedrock_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { "$ref": "#/$defs/bedrock_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" }, "send_back_raw_request": { "type": "boolean", "description": "Include raw request in BifrostResponse (default: false)" }, "send_back_raw_response": { "type": "boolean", "description": "Include raw response in BifrostResponse (default: false)" }, "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" } }, "required": ["keys"], "additionalProperties": false }, "provider_with_vllm_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { "$ref": "#/$defs/vllm_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { "$ref": "#/$defs/network_config_without_base_url" }, "concurrency_and_buffer_size": { "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" }, "send_back_raw_request": { "type": "boolean", "description": "Include raw request in BifrostResponse (default: false)" }, "send_back_raw_response": { "type": "boolean", "description": "Include raw response in BifrostResponse (default: false)" }, "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" } }, "required": ["keys"], "additionalProperties": false }, "provider_with_replicate_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { "$ref": "#/$defs/replicate_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" }, "send_back_raw_request": { "type": "boolean", "description": "Include raw request in BifrostResponse (default: false)" }, "send_back_raw_response": { "type": "boolean", "description": "Include raw response in BifrostResponse (default: false)" }, "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" } }, "required": ["keys"], "additionalProperties": false }, "provider_with_azure_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { "$ref": "#/$defs/azure_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" }, "send_back_raw_request": { "type": "boolean", "description": "Include raw request in BifrostResponse (default: false)" }, "send_back_raw_response": { "type": "boolean", "description": "Include raw response in BifrostResponse (default: false)" }, "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" } }, "required": ["keys"], "additionalProperties": false }, "provider_with_vertex_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { "$ref": "#/$defs/vertex_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" }, "send_back_raw_request": { "type": "boolean", "description": "Include raw request in BifrostResponse (default: false)" }, "send_back_raw_response": { "type": "boolean", "description": "Include raw response in BifrostResponse (default: false)" }, "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" } }, "required": ["keys"], "additionalProperties": false }, "provider_with_ollama_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { "$ref": "#/$defs/ollama_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { "$ref": "#/$defs/network_config_without_base_url" }, "concurrency_and_buffer_size": { "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" }, "send_back_raw_request": { "type": "boolean", "description": "Include raw request in BifrostResponse (default: false)" }, "send_back_raw_response": { "type": "boolean", "description": "Include raw response in BifrostResponse (default: false)" }, "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" } }, "required": ["keys"], "additionalProperties": false }, "provider_with_sgl_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { "$ref": "#/$defs/sgl_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { "$ref": "#/$defs/network_config_without_base_url" }, "concurrency_and_buffer_size": { "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" }, "send_back_raw_request": { "type": "boolean", "description": "Include raw request in BifrostResponse (default: false)" }, "send_back_raw_response": { "type": "boolean", "description": "Include raw response in BifrostResponse (default: false)" }, "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" } }, "required": ["keys"], "additionalProperties": false }, "mcp_client_config": { "type": "object", "properties": { "client_id": { "type": "string", "description": "MCP client identifier" }, "name": { "type": "string", "description": "Name of the MCP client" }, "is_code_mode_client": { "type": "boolean", "description": "Whether this client is a code mode client" }, "connection_type": { "type": "string", "enum": ["stdio", "http", "sse", "inprocess"], "description": "Connection type for MCP client" }, "connection_string": { "type": "string", "description": "HTTP or SSE URL (alternative to http_config sub-object, required for HTTP or SSE connections)" }, "auth_type": { "type": "string", "enum": ["none", "headers", "oauth", "per_user_oauth"], "description": "Authentication type for MCP connection" }, "oauth_config_id": { "type": "string", "description": "OAuth config ID reference (required when auth_type is 'oauth' or 'per_user_oauth')" }, "headers": { "type": "object", "description": "Headers to send with the request (for headers auth type)", "additionalProperties": { "type": "string" } }, "stdio_config": { "type": "object", "properties": { "command": { "type": "string", "description": "Command to execute" }, "args": { "type": "array", "items": { "type": "string" }, "description": "Command arguments" }, "envs": { "type": "array", "items": { "type": "string" }, "description": "Environment variables" } }, "required": ["command"], "additionalProperties": false }, "tools_to_execute": { "type": "array", "items": { "type": "string" }, "description": "Include-only list of tools to execute. ['*'] means all, [] means none." }, "tools_to_auto_execute": { "type": "array", "items": { "type": "string" }, "description": "Auto-execute list of tools. ['*'] means all, [] means none." }, "tool_sync_interval": { "type": "string", "description": "Per-client override for tool sync interval (Go duration, e.g. '10m', '1h', 0 = use global, negative = disabled)" }, "allowed_extra_headers": { "type": "array", "items": { "type": "string" }, "description": "Allowlist of request-level headers that callers may forward to this MCP server at execution time. Use ['*'] to allow all headers." }, "is_ping_available": { "type": "boolean", "description": "Whether the MCP server supports ping for health checks (default: true)", "default": true }, "tool_pricing": { "type": "object", "description": "Tool pricing map (tool name to cost per execution)", "additionalProperties": { "type": "number", "minimum": 0 } }, "allow_on_all_virtual_keys": { "type": "boolean", "description": "When true, this MCP server is accessible to all virtual keys without requiring explicit per-key assignment. All tools are allowed by default. If a virtual key has an explicit MCP config for this server, that config takes precedence and overrides this behaviour.", "default": false } }, "required": ["name", "connection_type"], "additionalProperties": false, "if": { "properties": { "auth_type": { "enum": ["oauth", "per_user_oauth"] } }, "required": ["auth_type"] }, "then": { "required": ["oauth_config_id"] }, "oneOf": [ { "properties": { "connection_type": { "const": "stdio" } }, "required": ["stdio_config"] }, { "properties": { "connection_type": { "const": "websocket" } }, "required": ["websocket_config"] }, { "properties": { "connection_type": { "const": "http" } }, "anyOf": [ { "required": ["http_config"] }, { "required": ["connection_string"] } ] }, { "properties": { "connection_type": { "const": "sse" } }, "required": ["connection_string"] } ] }, "mcp_tool_manager_config": { "type": "object", "properties": { "tool_execution_timeout": { "type": "integer", "description": "Tool execution timeout in seconds", "minimum": 1, "default": 30 }, "max_agent_depth": { "type": "integer", "description": "Max agent depth", "minimum": 1, "default": 10 }, "code_mode_binding_level": { "type": "string", "enum": ["server", "tool"], "description": "How tools are exposed in VFS for code execution" }, "disable_auto_tool_inject": { "type": "boolean", "description": "When true, MCP tools are not automatically injected into requests. Tools are only included when explicitly specified via request context filters or headers, such as x-bf-mcp-include-tools or x-bf-mcp-include-clients.", "default": false } } }, "weaviate_config": { "type": "object", "description": "Weaviate configuration for vector store", "properties": { "scheme": { "type": "string", "description": "Weaviate server scheme (http or https) - REQUIRED" }, "host": { "type": "string", "description": "Weaviate server host (host:port) - REQUIRED" }, "api_key": { "type": "string", "description": "API key for Weaviate authentication (optional)" }, "grpc_config": { "type": "object", "properties": { "host": { "type": "string", "description": "Weaviate server host (host:port). If host is without a port number then the 80 port for insecured and 443 port for secured connections will be used." }, "secured": { "type": "boolean", "description": "Secured set it to true if it's a secured connection" } } }, "headers": { "type": "object", "description": "Additional headers to send with requests" }, "timeout": { "type": "string", "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for Weaviate operations (e.g., '5s')" }, "class_name": { "type": "string", "description": "Class name for Weaviate vector store" }, "properties": { "type": "array", "items": { "type": "object" }, "description": "Properties for Weaviate vector store" } }, "required": ["scheme", "host"], "additionalProperties": false }, "redis_config": { "type": "object", "description": "Redis configuration for vector store (also used for Valkey-compatible endpoints)", "properties": { "addr": { "type": "string", "description": "Redis/Valkey server address (host:port) - REQUIRED (can use env. prefix)" }, "username": { "type": "string", "description": "Username for Redis AUTH (optional, can use env. prefix)" }, "password": { "type": "string", "description": "Password for Redis AUTH (optional, can use env. prefix)" }, "db": { "type": "integer", "description": "Redis database number (default: 0)", "default": 0 }, "use_tls": { "type": "boolean", "description": "Use TLS for the Redis/Valkey connection (optional)", "default": false }, "insecure_skip_verify": { "type": "boolean", "description": "Skip TLS certificate verification for Redis/Valkey connections. Use only when a trusted CA cannot be configured.", "default": false }, "ca_cert_pem": { "type": "string", "description": "PEM-encoded CA certificate to trust for Redis/Valkey TLS connections" }, "cluster_mode": { "type": "boolean", "description": "Use Redis Cluster mode. Required for cluster configuration endpoints; when enabled, db must be 0.", "default": false }, "pool_size": { "type": "integer", "description": "Maximum number of socket connections (optional)" }, "max_active_conns": { "type": "integer", "description": "Maximum number of active connections (optional)" }, "min_idle_conns": { "type": "integer", "description": "Minimum number of idle connections (optional)" }, "max_idle_conns": { "type": "integer", "description": "Maximum number of idle connections (optional)" }, "conn_max_lifetime": { "type": "string", "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Connection maximum lifetime (e.g., '30m')" }, "conn_max_idle_time": { "type": "string", "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Connection maximum idle time (e.g., '5m')" }, "dial_timeout": { "type": "string", "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for socket connection (e.g., '5s')" }, "read_timeout": { "type": "string", "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for socket reads (e.g., '3s')" }, "write_timeout": { "type": "string", "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for socket writes (e.g., '3s')" }, "context_timeout": { "type": "string", "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for Redis operations (e.g., '10s')" } }, "required": ["addr"], "additionalProperties": false }, "qdrant_config": { "type": "object", "description": "Qdrant configuration for vector store", "properties": { "host": { "type": "string", "description": "Qdrant server host - REQUIRED (can use env. prefix)" }, "port": { "type": "integer", "description": "Qdrant server port (default: 6334 for gRPC)", "default": 6334 }, "api_key": { "type": "string", "description": "API key for authentication (optional, can use env. prefix)" }, "use_tls": { "type": "boolean", "description": "Use TLS for connection (optional)", "default": false } }, "required": ["host"], "additionalProperties": false }, "pinecone_config": { "type": "object", "description": "Pinecone configuration for vector store", "properties": { "api_key": { "type": "string", "description": "Pinecone API key - REQUIRED (can use env. prefix)" }, "index_host": { "type": "string", "description": "Index host URL from Pinecone console - REQUIRED (e.g., your-index.svc.environment.pinecone.io)" } }, "required": ["api_key", "index_host"], "additionalProperties": false }, "proxy_config": { "type": "object", "description": "Proxy configuration for provider connections", "properties": { "type": { "type": "string", "enum": ["none", "http", "socks5", "environment"], "description": "Type of proxy to use" }, "url": { "type": "string", "format": "uri", "description": "URL of the proxy server" }, "username": { "type": "string", "description": "Username for proxy authentication" }, "password": { "type": "string", "description": "Password for proxy authentication" }, "ca_cert_pem": { "type": "string", "description": "PEM-encoded CA certificate to trust for TLS connections through the proxy (for SSL-intercepting proxies)" } }, "required": ["type"], "additionalProperties": false }, "cluster_config": { "type": "object", "description": "Cluster mode configuration", "properties": { "enabled": { "type": "boolean", "description": "Whether cluster mode is enabled" }, "region": { "type": "string", "description": "Region label for cluster deployment (runtime default: unknown)" }, "peers": { "type": "array", "description": "List of peer addresses", "items": { "type": "string", "description": "Peer address in host:port format" } }, "gossip": { "type": "object", "description": "Gossip protocol configuration", "properties": { "port": { "type": "integer", "minimum": 1, "maximum": 65535, "description": "Port for gossip communication" }, "config": { "type": "object", "description": "Gossip protocol settings", "properties": { "timeout_seconds": { "type": "integer", "minimum": 1, "description": "Timeout for operations in seconds" }, "success_threshold": { "type": "integer", "minimum": 1, "description": "Number of successful probes required" }, "failure_threshold": { "type": "integer", "minimum": 1, "description": "Number of failed probes before marking as failed" } }, "required": ["timeout_seconds", "success_threshold", "failure_threshold"], "additionalProperties": false } }, "required": ["port", "config"], "additionalProperties": false }, "discovery": { "type": "object", "description": "Auto-discovery configuration for cluster nodes", "properties": { "enabled": { "type": "boolean", "description": "Whether auto-discovery is enabled" }, "type": { "type": "string", "enum": ["kubernetes", "dns", "udp", "consul", "etcd", "mdns"], "description": "Discovery mechanism type" }, "service_name": { "type": "string", "description": "Service name used for discovery (used by consul, etcd, UDP, and mDNS)" }, "bind_port": { "type": "integer", "minimum": 1, "maximum": 65535, "description": "Port to bind for cluster communication" }, "dial_timeout": { "type": "string", "description": "Timeout for discovery dial operations as a Go duration string (e.g. '5s', '1m')" }, "allowed_address_space": { "type": "array", "items": { "type": "string" }, "description": "CIDR notation for allowed address spaces (e.g., ['10.0.0.0/8', '192.168.0.0/16'])" }, "k8s_namespace": { "type": "string", "description": "Kubernetes namespace for service discovery" }, "k8s_label_selector": { "type": "string", "description": "Kubernetes label selector for filtering pods" }, "dns_names": { "type": "array", "items": { "type": "string" }, "description": "DNS names to resolve for node discovery" }, "udp_broadcast_port": { "type": "integer", "minimum": 1, "maximum": 65535, "description": "Port for UDP broadcast discovery" }, "consul_address": { "type": "string", "description": "Consul server address for service discovery" }, "etcd_endpoints": { "type": "array", "items": { "type": "string" }, "description": "Etcd endpoints for service discovery" }, "mdns_service": { "type": "string", "description": "mDNS service name for local network discovery" } }, "required": ["type"], "additionalProperties": false } }, "required": ["enabled"], "additionalProperties": false }, "scim_config": { "type": "object", "description": "SAML/SCIM (System for Cross-domain Identity Management) configuration", "properties": { "enabled": { "type": "boolean", "description": "Whether SAML/SCIM authentication is enabled" }, "provider": { "type": "string", "enum": ["okta", "entra"], "description": "SCIM provider type" }, "config": { "type": "object", "description": "Provider-specific configuration" } }, "required": ["enabled"], "additionalProperties": false, "allOf": [ { "if": { "properties": { "provider": { "const": "okta" } } }, "then": { "properties": { "config": { "$ref": "#/$defs/okta_config" } } } }, { "if": { "properties": { "provider": { "const": "entra" } } }, "then": { "properties": { "config": { "$ref": "#/$defs/entra_config" } } } } ] }, "okta_config": { "type": "object", "description": "Okta JWT authentication configuration", "properties": { "issuerUrl": { "type": "string", "format": "uri", "description": "Okta issuer URL (e.g., https://your-domain.okta.com/oauth2/default)" }, "clientId": { "type": "string", "description": "Okta application client ID" }, "clientSecret": { "type": "string", "description": "Okta client secret" }, "apiToken": { "type": "string", "description": "Okta API token for Admin API access" }, "audience": { "type": "string", "description": "JWT audience for validation (optional)" }, "userIdField": { "type": "string", "description": "JWT claim field for user ID (default: 'sub')", "default": "sub" }, "teamIdsField": { "type": "string", "description": "JWT claim field for team IDs (default: 'groups')", "default": "groups" }, "rolesField": { "type": "string", "description": "JWT claim field for roles (default: 'roles')", "default": "roles" } }, "required": ["issuerUrl", "clientId", "clientSecret", "apiToken"], "additionalProperties": false }, "entra_config": { "type": "object", "description": "Microsoft Entra ID (formerly Azure AD) JWT authentication configuration", "properties": { "tenantId": { "type": "string", "description": "Azure tenant ID or 'common' for multi-tenant applications" }, "clientId": { "type": "string", "description": "Application (client) ID from Azure portal" }, "clientSecret": { "type": "string", "description": "Client secret (optional, required for token revocation)" }, "cloud": { "type": "string", "enum": ["commercial", "gcc-high", "dod"], "default": "commercial", "description": "Cloud environment: 'commercial' (default), 'gcc-high' for US Government GCC High, or 'dod' for Department of Defense" }, "audience": { "type": "string", "description": "JWT audience for validation (default: clientId)" }, "appIdUri": { "type": "string", "format": "uri", "description": "App ID URI for v1.0 tokens (e.g., api://{clientId})" }, "userIdField": { "type": "string", "description": "JWT claim field for user ID (default: 'oid')", "default": "oid" }, "teamIdsField": { "type": "string", "description": "JWT claim field for team IDs (default: 'groups')", "default": "groups" }, "rolesField": { "type": "string", "description": "JWT claim field for roles (default: 'roles')", "default": "roles" } }, "required": ["tenantId", "clientId"], "additionalProperties": false }, "load_balancer_config": { "type": "object", "description": "Load balancer configuration for intelligent request routing", "properties": { "enabled": { "type": "boolean", "description": "Whether load balancing is enabled" }, "tracker_config": { "type": "object", "description": "Configuration for tracking route metrics and performance" }, "bootstrap": { "type": "object", "description": "Bootstrap data for initializing load balancer with historical metrics", "properties": { "route_metrics": { "type": "object", "description": "Historical metrics per route" }, "direction_metrics": { "type": "object", "description": "Historical metrics per direction" }, "routes": { "type": "object", "description": "Known routes" } } } }, "required": ["enabled"], "additionalProperties": false }, "guardrails_config": { "type": "object", "description": "Guardrails configuration for content moderation and policy enforcement", "properties": { "guardrail_rules": { "type": "array", "description": "List of guardrail rules", "items": { "type": "object", "properties": { "id": { "type": "integer", "description": "Unique identifier for the rule" }, "name": { "type": "string", "description": "Name of the guardrail rule" }, "description": { "type": "string", "description": "Description of what the rule does" }, "enabled": { "type": "boolean", "description": "Whether this rule is enabled" }, "cel_expression": { "type": "string", "description": "CEL (Common Expression Language) expression for rule evaluation" }, "apply_to": { "type": "string", "enum": ["input", "output", "both"], "description": "When to apply the guardrail (input, output, or both)" }, "sampling_rate": { "type": "integer", "minimum": 0, "maximum": 100, "description": "Percentage of requests to apply this rule to (0-100)" }, "timeout": { "type": "integer", "minimum": 0, "description": "Timeout in milliseconds for rule execution" }, "provider_config_ids": { "type": "array", "items": { "type": "integer" }, "description": "IDs of provider configurations to use with this rule" } }, "required": ["id", "name", "enabled", "cel_expression", "apply_to"], "additionalProperties": false } }, "guardrail_providers": { "type": "array", "description": "List of guardrail provider configurations", "items": { "type": "object", "properties": { "id": { "type": "integer", "description": "Unique identifier for the provider config" }, "provider_name": { "type": "string", "description": "Name of the guardrail provider (e.g., 'bedrock', 'azure')" }, "policy_name": { "type": "string", "description": "Name of the specific policy to use" }, "enabled": { "type": "boolean", "description": "Whether this provider config is enabled" }, "timeout": { "type": "integer", "minimum": 0, "description": "Timeout in milliseconds for provider execution" }, "config": { "type": "object", "description": "Provider-specific configuration" } }, "required": ["id", "provider_name", "policy_name", "enabled"], "additionalProperties": false } } }, "additionalProperties": false }, "access_profiles": { "type": "array", "description": "Enterprise access profile templates for RBAC-driven governance controls", "items": { "$ref": "#/$defs/access_profile" } }, "budget_line": { "type": "object", "properties": { "id": { "type": "string", "description": "Budget ID" }, "max_limit": { "type": "number", "description": "Maximum spend limit in USD" }, "reset_duration": { "type": "string", "description": "Reset window, e.g. \"1M\", \"1h\"" } }, "required": [ "id", "max_limit", "reset_duration" ], "additionalProperties": false }, "rate_limit_line": { "type": "object", "properties": { "id": { "type": "string", "description": "Rate limit ID" }, "token_max_limit": { "type": "integer", "description": "Maximum number of tokens allowed in the reset window" }, "token_reset_duration": { "type": "string", "description": "Token reset window, e.g. \"1M\", \"1h\"" }, "request_max_limit": { "type": "integer", "description": "Maximum number of requests allowed in the reset window" }, "request_reset_duration": { "type": "string", "description": "Request reset window, e.g. \"1M\", \"1h\"" } }, "required": ["id"], "additionalProperties": false }, "access_profile": { "type": "object", "properties": { "name": { "type": "string", "description": "Unique access profile name" }, "description": { "type": "string", "description": "Optional profile description" }, "is_active": { "type": "boolean", "description": "Whether this access profile is active", "default": true }, "tags": { "type": "array", "items": { "type": "string" }, "description": "Optional list of tags for filtering and grouping profiles" }, "budgets": { "type": "array", "items": { "$ref": "#/$defs/budget_line" }, "description": "Profile-level budgets" }, "rate_limit": { "$ref": "#/$defs/rate_limit_line" }, "provider_configs": { "type": "array", "description": "Per-provider restrictions and limits for this profile", "items": { "$ref": "#/$defs/access_profile_provider_config" } }, "mcp_tool_groups": { "type": "array", "description": "MCP tool group associations", "items": { "$ref": "#/$defs/access_profile_mcp_tool_group" } }, "mcp_servers": { "type": "array", "description": "MCP server associations", "items": { "$ref": "#/$defs/access_profile_mcp_server" } }, "mcp_tool_overrides": { "type": "array", "description": "Per-tool include/exclude MCP overrides", "items": { "$ref": "#/$defs/access_profile_mcp_tool_override" } } }, "required": [ "name" ], "additionalProperties": false }, "access_profile_provider_config": { "type": "object", "properties": { "provider_name": { "type": "string", "description": "Provider name" }, "all_models_allowed": { "type": "boolean", "description": "Whether all models under this provider are allowed", "default": false }, "allowed_models": { "type": "array", "items": { "type": "string" }, "description": "Allowed model names (ignored when all_models_allowed is true)" }, "budgets": { "type": "array", "items": { "$ref": "#/$defs/budget_line" } }, "rate_limit": { "$ref": "#/$defs/rate_limit_line" } }, "required": [ "provider_name" ], "additionalProperties": false }, "access_profile_mcp_tool_group": { "type": "object", "properties": { "tool_group_id": { "type": "integer", "minimum": 1, "description": "MCP tool group ID" } }, "required": [ "tool_group_id" ], "additionalProperties": false }, "access_profile_mcp_server": { "type": "object", "properties": { "mcp_server_id": { "type": "string", "description": "MCP server identifier" } }, "required": [ "mcp_server_id" ], "additionalProperties": false }, "access_profile_mcp_tool_override": { "type": "object", "properties": { "mcp_client_id": { "type": "string", "description": "MCP client identifier" }, "tool_name": { "type": "string", "description": "Tool name" }, "action": { "type": "string", "enum": ["include", "exclude"], "description": "Override action" } }, "required": [ "mcp_client_id", "tool_name", "action" ], "additionalProperties": false }, "audit_logs_config": { "type": "object", "description": "Audit logs configuration for CADF-compliant activity logging", "properties": { "disabled": { "type": "boolean", "description": "Whether audit logging is disabled (default: false)", "default": false }, "hmac_key": { "type": "string", "description": "HMAC secret key for signing audit events (minimum 32 bytes). Can use env. prefix for environment variables (e.g., 'env.AUDIT_HMAC_KEY')." }, "retention_days": { "type": "integer", "minimum": 0, "description": "Number of days to retain audit logs (0 means no retention limit)" } }, "additionalProperties": false }, "large_payload_optimization": { "type": "object", "description": "Large payload streaming optimization configuration. Detects large request/response payloads and streams them through without full materialization.", "properties": { "enabled": { "type": "boolean", "description": "Master toggle for large payload optimization (default: false)", "default": false }, "request_threshold_bytes": { "type": "integer", "minimum": 0, "description": "Request body size in bytes above which streaming kicks in (default: 10MB)", "default": 10485760 }, "response_threshold_bytes": { "type": "integer", "minimum": 0, "description": "Response body size in bytes above which streaming kicks in (default: 10MB)", "default": 10485760 }, "prefetch_size_bytes": { "type": "integer", "minimum": 0, "description": "How much of the request to prefetch for metadata extraction in bytes (default: 64KB)", "default": 65536 }, "max_payload_bytes": { "type": "integer", "minimum": 0, "description": "Maximum allowed payload size in bytes; reject above this (default: 500MB)", "default": 524288000 }, "truncated_log_bytes": { "type": "integer", "minimum": 0, "description": "How many bytes to store as truncated preview in logs (default: 1MB)", "default": 1048576 } }, "additionalProperties": false }, "websocket_config": { "type": "object", "description": "Optional tuning for the WebSocket gateway (Responses API WebSocket Mode, Realtime API). WebSocket is always enabled; these fields override the high defaults.", "properties": { "max_connections_per_user": { "type": "integer", "minimum": 1, "description": "Maximum concurrent WebSocket connections per user", "default": 100 }, "transcript_buffer_size": { "type": "integer", "minimum": 1, "description": "Number of transcript entries to buffer for Realtime API mid-session fallback", "default": 100 }, "pool": { "type": "object", "description": "Upstream WebSocket connection pool configuration", "properties": { "max_idle_per_key": { "type": "integer", "minimum": 1, "description": "Maximum idle connections per provider/key combination", "default": 50 }, "max_total_connections": { "type": "integer", "minimum": 1, "description": "Maximum total idle connections across all providers", "default": 1000 }, "idle_timeout_seconds": { "type": "integer", "minimum": 1, "description": "Seconds before an idle connection is evicted", "default": 600 }, "max_connection_lifetime_seconds": { "type": "integer", "minimum": 1, "description": "Maximum lifetime of a connection in seconds", "default": 7200 } }, "additionalProperties": false } }, "additionalProperties": false }, "provider_pricing_override": { "type": "object", "description": "Scoped pricing override applied at runtime by the model catalog", "properties": { "id": { "type": "string", "description": "Unique pricing override ID" }, "name": { "type": "string", "description": "Human-readable name for this override" }, "scope_kind": { "type": "string", "description": "Scope level for this override", "enum": [ "global", "provider", "provider_key", "virtual_key", "virtual_key_provider", "virtual_key_provider_key" ] }, "virtual_key_id": { "type": "string", "description": "Virtual key ID (required for virtual_key* scopes)" }, "provider_id": { "type": "string", "description": "Provider ID (required for provider* scopes)" }, "provider_key_id": { "type": "string", "description": "Provider key ID (required for provider_key and virtual_key_provider_key scopes)" }, "match_type": { "type": "string", "description": "How the pattern is matched against model names", "enum": ["exact", "wildcard"] }, "pattern": { "type": "string", "description": "Model name pattern to match (exact name or wildcard prefix ending with *)" }, "request_types": { "type": "array", "description": "Request types this override applies to. At least one value is required.", "minItems": 1, "items": { "type": "string" } }, "pricing_patch": { "type": "string", "description": "JSON-encoded pricing fields to override (e.g. '{\"input_cost_per_token\":0.000001}')" }, "config_hash": { "type": "string", "description": "Internal hash for change detection (auto-managed)" } }, "required": ["id", "name", "scope_kind", "match_type", "pattern", "request_types"], "additionalProperties": false }, "pricing_override_match_type": { "type": "string", "enum": ["exact", "wildcard"] }, "pricing_override_request_type": { "type": "string", "enum": [ "chat_completion", "text_completion", "responses", "embedding", "rerank", "speech", "transcription", "image_generation", "image_variation", "image_edit", "video_generation", "video_remix" ] }, "custom_provider_config": { "type": "object", "description": "Custom provider configuration for extending or customizing provider behavior", "properties": { "is_key_less": { "type": "boolean", "description": "Whether the custom provider requires a key" }, "base_provider_type": { "type": "string", "enum": [ "openai", "azure", "anthropic", "bedrock", "cohere", "vertex", "mistral", "ollama", "groq", "sgl", "parasail", "perplexity", "cerebras", "gemini", "openrouter", "elevenlabs", "huggingface", "nebius", "xai", "replicate", "vllm", "runway", "fireworks" ], "description": "Base provider type to extend" }, "request_path_overrides": { "type": "object", "description": "Mapping of request type to custom path overriding the default provider path", "additionalProperties": { "type": "string" } }, "allowed_requests": { "type": "object", "description": "Allowed request types for the custom provider", "properties": { "list_models": { "type": "boolean" }, "text_completion": { "type": "boolean" }, "text_completion_stream": { "type": "boolean" }, "chat_completion": { "type": "boolean" }, "chat_completion_stream": { "type": "boolean" }, "responses": { "type": "boolean" }, "responses_stream": { "type": "boolean" }, "count_tokens": { "type": "boolean" }, "embedding": { "type": "boolean" }, "rerank": { "type": "boolean" }, "ocr": { "type": "boolean" }, "speech": { "type": "boolean" }, "speech_stream": { "type": "boolean" }, "transcription": { "type": "boolean" }, "transcription_stream": { "type": "boolean" }, "image_generation": { "type": "boolean" }, "image_generation_stream": { "type": "boolean" }, "image_edit": { "type": "boolean" }, "image_edit_stream": { "type": "boolean" }, "image_variation": { "type": "boolean" }, "video_generation": { "type": "boolean" }, "video_retrieve": { "type": "boolean" }, "video_download": { "type": "boolean" }, "video_delete": { "type": "boolean" }, "video_list": { "type": "boolean" }, "video_remix": { "type": "boolean" }, "batch_create": { "type": "boolean" }, "batch_list": { "type": "boolean" }, "batch_retrieve": { "type": "boolean" }, "batch_cancel": { "type": "boolean" }, "batch_delete": { "type": "boolean" }, "batch_results": { "type": "boolean" }, "file_upload": { "type": "boolean" }, "file_list": { "type": "boolean" }, "file_retrieve": { "type": "boolean" }, "file_delete": { "type": "boolean" }, "file_content": { "type": "boolean" }, "container_create": { "type": "boolean" }, "container_list": { "type": "boolean" }, "container_retrieve": { "type": "boolean" }, "container_delete": { "type": "boolean" }, "container_file_create": { "type": "boolean" }, "container_file_list": { "type": "boolean" }, "container_file_retrieve": { "type": "boolean" }, "container_file_content": { "type": "boolean" }, "container_file_delete": { "type": "boolean" }, "passthrough": { "type": "boolean" }, "passthrough_stream": { "type": "boolean" }, "websocket_responses": { "type": "boolean" }, "realtime": { "type": "boolean" } }, "additionalProperties": false } }, "required": ["base_provider_type"], "additionalProperties": false } } }