950 lines
30 KiB
YAML
950 lines
30 KiB
YAML
# Default values for Bifrost
|
|
# This is a YAML-formatted file.
|
|
# Declare variables to be passed into your templates.
|
|
|
|
# Bifrost application configuration
|
|
replicaCount: 1
|
|
|
|
image:
|
|
# Container image repository
|
|
# Default: Docker Hub public image
|
|
# For enterprise customers with private registry, use full URL:
|
|
# repository: us-west1-docker.pkg.dev/bifrost-enterprise/your-org/bifrost
|
|
# repository: your-registry.example.com/your-org/bifrost
|
|
# repository: 123456789.dkr.ecr.us-east-1.amazonaws.com/bifrost
|
|
repository: docker.io/maximhq/bifrost
|
|
pullPolicy: IfNotPresent
|
|
# REQUIRED: Specify the image tag (e.g., v1.5.0, latest)
|
|
# Docker images are tagged with v prefix (e.g., v1.5.0)
|
|
# See available tags at: https://hub.docker.com/r/maximhq/bifrost/tags
|
|
tag: ""
|
|
|
|
imagePullSecrets: []
|
|
nameOverride: ""
|
|
fullnameOverride: ""
|
|
|
|
serviceAccount:
|
|
# Specifies whether a service account should be created
|
|
create: true
|
|
# Automatically mount a ServiceAccount's API credentials?
|
|
automount: true
|
|
# Annotations to add to the service account
|
|
annotations: {}
|
|
# The name of the service account to use.
|
|
# If not set and create is true, a name is generated using the fullname template
|
|
name: ""
|
|
|
|
# Annotations to add to the deployment metadata
|
|
# Useful for tools like Keel (keel.sh) for automatic image updates
|
|
# Example:
|
|
# deploymentAnnotations:
|
|
# keel.sh/policy: force
|
|
# keel.sh/trigger: poll
|
|
deploymentAnnotations: {}
|
|
|
|
# Labels to add to the deployment metadata (in addition to default labels)
|
|
deploymentLabels: {}
|
|
|
|
podAnnotations: {}
|
|
podLabels: {}
|
|
|
|
podSecurityContext:
|
|
fsGroup: 1000
|
|
runAsUser: 1000
|
|
runAsNonRoot: true
|
|
|
|
securityContext:
|
|
capabilities:
|
|
drop:
|
|
- ALL
|
|
readOnlyRootFilesystem: false
|
|
runAsNonRoot: true
|
|
runAsUser: 1000
|
|
|
|
service:
|
|
type: ClusterIP
|
|
port: 8080
|
|
annotations: {}
|
|
|
|
ingress:
|
|
enabled: false
|
|
className: ""
|
|
annotations: {}
|
|
hosts:
|
|
- host: bifrost.local
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
tls: []
|
|
|
|
resources:
|
|
limits:
|
|
cpu: 2000m
|
|
memory: 2Gi
|
|
requests:
|
|
cpu: 500m
|
|
memory: 512Mi
|
|
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 30
|
|
timeoutSeconds: 5
|
|
failureThreshold: 3
|
|
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 3
|
|
|
|
autoscaling:
|
|
enabled: false
|
|
minReplicas: 1
|
|
maxReplicas: 10
|
|
targetCPUUtilizationPercentage: 80
|
|
targetMemoryUtilizationPercentage: 80
|
|
# HPA scaling behavior configuration
|
|
# Controls how quickly the HPA scales up/down to prevent connection disruption
|
|
behavior:
|
|
scaleDown:
|
|
# Stabilization window prevents rapid scale-down oscillation.
|
|
# The HPA will wait this long after the last scale event before scaling down again.
|
|
# Important for long-lived streaming connections (e.g. SSE for LLM inference).
|
|
stabilizationWindowSeconds: 300
|
|
policies:
|
|
- type: Pods
|
|
value: 1
|
|
periodSeconds: 120
|
|
scaleUp:
|
|
stabilizationWindowSeconds: 30
|
|
|
|
# Additional volumes on the output Deployment definition.
|
|
volumes: []
|
|
|
|
# Additional volumeMounts on the output Deployment definition.
|
|
volumeMounts: []
|
|
|
|
nodeSelector: {}
|
|
|
|
tolerations: []
|
|
|
|
affinity: {}
|
|
|
|
# Graceful shutdown configuration for long-lived connections (SSE streaming)
|
|
# When a pod is terminated (e.g. during HPA scale-down), active streaming connections
|
|
# are severed abruptly. This causes clients to lose their SSE stream mid-response.
|
|
# The preStop hook and termination grace period give in-flight requests time to complete.
|
|
terminationGracePeriodSeconds: 60
|
|
lifecycle:
|
|
preStop:
|
|
exec:
|
|
# Sleep allows the pod to be removed from the Service endpoints and load balancer
|
|
# before the process starts shutting down, preventing new connections from arriving
|
|
# while existing ones drain.
|
|
command: ["sh", "-c", "sleep 15"]
|
|
|
|
# Bifrost specific configuration
|
|
# You can find entire schema at https://getbifrost.ai/schema
|
|
bifrost:
|
|
# Application settings
|
|
appDir: /app/data
|
|
port: 8080
|
|
host: 0.0.0.0
|
|
logLevel: info
|
|
logStyle: json
|
|
|
|
# Encryption key for sensitive data
|
|
# Can be set as a secret or environment variable
|
|
encryptionKey: ""
|
|
|
|
# Use an existing Kubernetes secret for the encryption key.
|
|
# When `name` is set, takes precedence over `encryptionKey`: the chart
|
|
# injects BIFROST_ENCRYPTION_KEY into the pod via secretKeyRef and writes
|
|
# `encryption_key: "env.BIFROST_ENCRYPTION_KEY"` in the rendered config.json.
|
|
encryptionKeySecret:
|
|
name: ""
|
|
key: "encryption-key"
|
|
|
|
# Authentication configuration (top-level)
|
|
# This controls authentication for Bifrost API and dashboard
|
|
authConfig:
|
|
adminUsername: ""
|
|
adminPassword: ""
|
|
isEnabled: false
|
|
disableAuthOnInference: false
|
|
# Use existing Kubernetes secret for admin credentials
|
|
existingSecret: ""
|
|
usernameKey: "username"
|
|
passwordKey: "password"
|
|
|
|
# Client configuration
|
|
client:
|
|
dropExcessRequests: false
|
|
initialPoolSize: 300
|
|
allowedOrigins:
|
|
- "*"
|
|
enableLogging: true
|
|
disableContentLogging: false
|
|
disableDbPingsInHealth: false
|
|
logRetentionDays: 365
|
|
enforceGovernanceHeader: false
|
|
allowDirectKeys: false
|
|
maxRequestBodySizeMb: 100
|
|
compat:
|
|
convertTextToChat: false
|
|
convertChatToResponses: false
|
|
shouldDropParams: false
|
|
shouldConvertParams: false
|
|
prometheusLabels: []
|
|
# Header filtering configuration for x-bf-eh-* headers forwarded to LLM providers
|
|
headerFilterConfig:
|
|
allowlist: []
|
|
denylist: []
|
|
# asyncJobResultTTL: 3600 # Default TTL for async job results in seconds
|
|
# requiredHeaders: [] # Headers that must be present on every request
|
|
# loggingHeaders: [] # Headers to capture in log metadata
|
|
# allowedHeaders: [] # Additional allowed headers for CORS and WebSocket
|
|
# mcpAgentDepth: 10 # Maximum depth for MCP agent mode tool execution
|
|
# mcpToolExecutionTimeout: 30 # Timeout for individual MCP tool execution in seconds
|
|
# mcpCodeModeBindingLevel: "" # Code mode binding level (server or tool)
|
|
# mcpToolSyncInterval: 0 # Global tool sync interval in minutes (0 = disabled)
|
|
# hideDeletedVirtualKeysInFilters: false # Omit deleted virtual keys from logs/MCP filter data
|
|
# whitelistedRoutes: [] # Routes that bypass auth middleware
|
|
# routingChainMaxDepth: 10 # Maximum depth for routing rule chain evaluation
|
|
|
|
# Framework configuration
|
|
framework:
|
|
pricing:
|
|
# Custom pricing URL for model cost data
|
|
pricingUrl: ""
|
|
# Sync interval in seconds (default: 86400 = 24 hours, minimum: 3600)
|
|
pricingSyncInterval: 86400
|
|
|
|
# Provider configurations (add your provider keys here)
|
|
# You can specify API keys directly or use env.VAR_NAME syntax to reference environment variables
|
|
# When using existingSecret in providerSecrets, the keys will be injected as env vars and
|
|
# you should use env.VAR_NAME syntax in the value field
|
|
# Note: The entire providers block is passed through to the config as-is.
|
|
# See https://getbifrost.ai/schema for the full provider schema.
|
|
providers: {}
|
|
# openai:
|
|
# keys:
|
|
# - name: "primary-key" # Key name (required, must be unique)
|
|
# value: "sk-..." # Direct value
|
|
# weight: 1
|
|
# models: ["gpt-4o", "gpt-4o-mini"] # Restrict key to specific models
|
|
# use_for_batch_api: false # Whether this key can be used for batch API
|
|
# - name: "secondary-key"
|
|
# value: "env.OPENAI_KEY" # Reference to environment variable
|
|
# weight: 1
|
|
# # Network configuration (optional, per-provider)
|
|
# network_config:
|
|
# base_url: "" # Custom base URL (required for Ollama)
|
|
# extra_headers: {} # Additional headers to send with requests
|
|
# default_request_timeout_in_seconds: 300 # Request timeout
|
|
# max_retries: 3 # Maximum number of retries
|
|
# retry_backoff_initial_ms: 500 # Initial retry backoff in ms
|
|
# retry_backoff_max_ms: 5000 # Max retry backoff in ms
|
|
# stream_idle_timeout_in_seconds: 60 # Max wait for next stream chunk (default: 60)
|
|
# max_conns_per_host: 5000 # Max TCP connections per host (default: 5000)
|
|
# beta_header_overrides: # Override Anthropic beta header support (optional)
|
|
# redact-thinking-: true # Enable/disable specific beta headers by prefix
|
|
# # Concurrency configuration (optional)
|
|
# concurrency_and_buffer_size:
|
|
# concurrency: 100 # Number of concurrent requests
|
|
# buffer_size: 200 # Buffer size for requests
|
|
# # Proxy configuration (optional)
|
|
# proxy_config:
|
|
# type: "none" # Options: none, http, socks5, environment
|
|
# url: ""
|
|
# username: ""
|
|
# password: ""
|
|
# ca_cert_pem: "" # PEM-encoded CA cert for SSL-intercepting proxies
|
|
# send_back_raw_response: false # Include raw response in BifrostResponse
|
|
# store_raw_request_response: false # Capture raw payloads for plugins only; not returned to client
|
|
#
|
|
# anthropic:
|
|
# keys:
|
|
# - name: "anthropic-key"
|
|
# value: "sk-ant-..."
|
|
# weight: 1
|
|
#
|
|
# # Azure OpenAI example (requires azure_key_config)
|
|
# azure:
|
|
# keys:
|
|
# - name: "azure-key"
|
|
# value: "..."
|
|
# weight: 1
|
|
# azure_key_config:
|
|
# endpoint: "https://your-resource.openai.azure.com"
|
|
# api_version: "2024-02-15-preview"
|
|
# deployments:
|
|
# gpt-4o: "my-gpt4o-deployment"
|
|
#
|
|
# # Google Vertex AI example (requires vertex_key_config)
|
|
# vertex:
|
|
# keys:
|
|
# - name: "vertex-key"
|
|
# value: ""
|
|
# weight: 1
|
|
# vertex_key_config:
|
|
# project_id: "my-gcp-project"
|
|
# region: "us-central1"
|
|
# auth_credentials: "env.GOOGLE_CREDENTIALS"
|
|
#
|
|
# # AWS Bedrock example (requires bedrock_key_config)
|
|
# bedrock:
|
|
# keys:
|
|
# - name: "bedrock-key"
|
|
# value: ""
|
|
# weight: 1
|
|
# bedrock_key_config:
|
|
# region: "us-east-1"
|
|
# access_key: "env.AWS_ACCESS_KEY_ID"
|
|
# secret_key: "env.AWS_SECRET_ACCESS_KEY"
|
|
|
|
# Provider secrets - use existing Kubernetes secrets for provider API keys
|
|
# These will be injected as environment variables that can be referenced in providers config
|
|
providerSecrets: {}
|
|
# openai:
|
|
# existingSecret: "my-openai-secret"
|
|
# key: "api-key"
|
|
# envVar: "OPENAI_API_KEY" # Environment variable name to inject
|
|
# anthropic:
|
|
# existingSecret: "my-anthropic-secret"
|
|
# key: "api-key"
|
|
# envVar: "ANTHROPIC_API_KEY"
|
|
|
|
# MCP (Model Context Protocol) configuration
|
|
mcp:
|
|
enabled: false
|
|
clientConfigs: []
|
|
# - name: "example-mcp"
|
|
# connectionType: "stdio"
|
|
# stdioConfig:
|
|
# command: "/path/to/mcp/server"
|
|
# args: []
|
|
# envs: []
|
|
# # Optional: source connection_string from a Kubernetes secret.
|
|
# # When set, chart injects BIFROST_MCP_<NAME>_CONNECTION_STRING
|
|
# # into the pod and rewrites connection_string in config.json
|
|
# # to `env.BIFROST_MCP_<NAME>_CONNECTION_STRING`.
|
|
# secretRef:
|
|
# name: "" # k8s secret name
|
|
# connectionStringKey: "connection-string" # key within the secret
|
|
# toolSyncInterval: "10m" # Global tool sync interval (Go duration)
|
|
# Tool manager configuration
|
|
toolManagerConfig:
|
|
toolExecutionTimeout: 30
|
|
maxAgentDepth: 10
|
|
# codeModeBindingLevel: "" # Code mode binding level (server or tool)
|
|
|
|
# Plugins configuration
|
|
# Plugin version must be >= 1 (schema minimum). Use values > 1 to force DB-backed plugin config replacement on upgrade.
|
|
plugins:
|
|
telemetry:
|
|
enabled: false
|
|
version: 1
|
|
config:
|
|
custom_labels: []
|
|
# push_gateway:
|
|
# enabled: false
|
|
# push_gateway_url: ""
|
|
# job_name: "bifrost"
|
|
# instance_id: ""
|
|
# push_interval: 15
|
|
# basic_auth:
|
|
# username: ""
|
|
# password: ""
|
|
|
|
logging:
|
|
enabled: false
|
|
version: 1
|
|
config:
|
|
disable_content_logging: false
|
|
logging_headers: []
|
|
|
|
governance:
|
|
enabled: false
|
|
version: 1
|
|
config:
|
|
is_vk_mandatory: false
|
|
required_headers: []
|
|
is_enterprise: false
|
|
|
|
maxim:
|
|
enabled: false
|
|
version: 1
|
|
config:
|
|
api_key: ""
|
|
log_repo_id: ""
|
|
# Use existing Kubernetes secret for API key (takes precedence over config.api_key)
|
|
secretRef:
|
|
name: ""
|
|
key: "api-key"
|
|
|
|
semanticCache:
|
|
enabled: false
|
|
version: 1
|
|
config:
|
|
# Semantic caching mode (dimension > 1): requires provider, keys, and embedding_model
|
|
# Direct caching mode (dimension: 1): hash-based exact matching, no embedding provider needed
|
|
provider: "openai"
|
|
keys: []
|
|
embedding_model: "text-embedding-3-small"
|
|
dimension: 1536
|
|
threshold: 0.8
|
|
ttl: "5m"
|
|
conversation_history_threshold: 3
|
|
cache_by_model: true
|
|
cache_by_provider: true
|
|
exclude_system_prompt: false
|
|
cleanup_on_shutdown: false
|
|
vector_store_namespace: ""
|
|
|
|
otel:
|
|
enabled: false
|
|
version: 1
|
|
config:
|
|
service_name: "bifrost"
|
|
collector_url: ""
|
|
trace_type: "genai_extension"
|
|
protocol: "grpc"
|
|
# Push-based metrics export via OTLP (recommended for multi-node clusters)
|
|
metrics_enabled: false
|
|
metrics_endpoint: "" # e.g., http://otel-collector:4318/v1/metrics (HTTP) or otel-collector:4317 (gRPC)
|
|
metrics_push_interval: 15 # Push interval in seconds (1-300)
|
|
# Custom headers for the collector (supports env.VAR_NAME prefix for env var substitution)
|
|
headers: {}
|
|
# TLS configuration
|
|
tls_ca_cert: "" # Path to TLS CA certificate file
|
|
insecure: false # Skip TLS verification (ignored if tls_ca_cert is set)
|
|
|
|
datadog:
|
|
enabled: false
|
|
version: 1
|
|
config:
|
|
service_name: "bifrost"
|
|
agent_addr: "localhost:8126"
|
|
env: ""
|
|
version: ""
|
|
custom_tags: {}
|
|
enable_traces: true
|
|
|
|
# Custom/dynamic plugins
|
|
custom: []
|
|
# - name: "my-custom-plugin"
|
|
# enabled: true
|
|
# path: "/plugins/my-plugin.so"
|
|
# version: 1 # must be >= 1; increase to force DB-backed plugin config replacement
|
|
# config:
|
|
# key: value
|
|
|
|
# Governance configuration for budgets, rate limits, customers, teams, virtual keys, and routing rules
|
|
governance:
|
|
budgets: []
|
|
# - id: "budget-1"
|
|
# max_limit: 100
|
|
# reset_duration: "1M" # Supports: 30s, 5m, 1h, 1d, 1w, 1M, 1Y
|
|
rateLimits: []
|
|
# - id: "rate-limit-1"
|
|
# token_max_limit: 100000
|
|
# token_reset_duration: "1d"
|
|
# request_max_limit: 1000
|
|
# request_reset_duration: "1h"
|
|
customers: []
|
|
# - id: "customer-1"
|
|
# name: "Customer Name"
|
|
# budget_id: "budget-1"
|
|
# rate_limit_id: "rate-limit-1"
|
|
teams: []
|
|
# - id: "team-1"
|
|
# name: "Team Name"
|
|
# customer_id: "customer-1"
|
|
# budget_id: "budget-1"
|
|
# rate_limit_id: "rate-limit-1"
|
|
# profile: {} # Team profile data
|
|
# config: {} # Team configuration data
|
|
# claims: {} # Team claims data
|
|
virtualKeys: []
|
|
# - id: "vk-1"
|
|
# name: "Virtual Key 1"
|
|
# description: "Virtual key description"
|
|
# value: "vk-..." # Optional - auto-generated if omitted
|
|
# is_active: true
|
|
# team_id: "team-1" # Mutually exclusive with customer_id
|
|
# customer_id: "" # Mutually exclusive with team_id
|
|
# budget_id: "budget-1"
|
|
# rate_limit_id: "rate-limit-1"
|
|
# # Provider-specific configurations (empty means all providers allowed)
|
|
# provider_configs:
|
|
# - provider: "openai"
|
|
# weight: 1.0
|
|
# allowed_models: ["gpt-4o"]
|
|
# budget_id: ""
|
|
# rate_limit_id: ""
|
|
# keys:
|
|
# - key_id: "uuid-of-key"
|
|
# name: "my-key"
|
|
# value: "sk-..."
|
|
# # MCP configurations for this virtual key
|
|
# mcp_configs:
|
|
# - mcp_client_id: 1
|
|
# tools_to_execute: ["tool1", "tool2"]
|
|
modelConfigs: []
|
|
# - id: "model-config-1"
|
|
# model_name: "gpt-4o"
|
|
# provider: "openai"
|
|
# budget_id: "budget-1"
|
|
# rate_limit_id: "rate-limit-1"
|
|
providers: []
|
|
# - name: "openai"
|
|
# budget_id: "budget-1"
|
|
# rate_limit_id: "rate-limit-1"
|
|
# send_back_raw_request: false
|
|
# send_back_raw_response: false
|
|
routingRules: []
|
|
# - id: "route-1"
|
|
# name: "Route to Azure"
|
|
# description: "Route GPT requests to Azure"
|
|
# enabled: true
|
|
# cel_expression: "model.startsWith('gpt-')"
|
|
# targets:
|
|
# - provider: "azure"
|
|
# model: "" # Empty means use original model
|
|
# weight: 1.0
|
|
# fallbacks: ["openai"]
|
|
# scope: "global" # Options: global, team, customer, virtual_key
|
|
# scope_id: "" # Required for non-global scopes
|
|
# priority: 0 # Lower = evaluated first
|
|
authConfig:
|
|
adminUsername: ""
|
|
adminPassword: ""
|
|
isEnabled: false
|
|
disableAuthOnInference: false
|
|
# Use existing Kubernetes secret for admin credentials
|
|
existingSecret: ""
|
|
usernameKey: "username"
|
|
passwordKey: "password"
|
|
|
|
# Cluster mode configuration for distributed deployments
|
|
cluster:
|
|
enabled: false
|
|
# region: "" # Region identifier for cluster
|
|
peers: []
|
|
# - "bifrost-0.bifrost-headless:7946"
|
|
# - "bifrost-1.bifrost-headless:7946"
|
|
gossip:
|
|
port: 7946
|
|
config:
|
|
timeoutSeconds: 10
|
|
successThreshold: 3
|
|
failureThreshold: 3
|
|
discovery:
|
|
enabled: false
|
|
# Discovery type: kubernetes, dns, udp, consul, etcd, mdns
|
|
type: ""
|
|
# Service name used by consul/etcd/udp discovery and as mDNS default
|
|
# This must be explicitly set for consul/etcd/udp discovery.
|
|
serviceName: ""
|
|
allowedAddressSpace: []
|
|
# Kubernetes discovery
|
|
k8sNamespace: ""
|
|
k8sLabelSelector: ""
|
|
# DNS discovery
|
|
dnsNames: []
|
|
# UDP broadcast discovery
|
|
udpBroadcastPort: 0
|
|
# Consul discovery
|
|
consulAddress: ""
|
|
# Etcd discovery
|
|
etcdEndpoints: []
|
|
# mDNS discovery
|
|
mdnsService: ""
|
|
|
|
# SCIM/SSO configuration for enterprise SSO
|
|
scim:
|
|
enabled: false
|
|
# Provider: okta, entra
|
|
provider: ""
|
|
config: {}
|
|
# Okta configuration:
|
|
# issuerUrl: "https://your-domain.okta.com/oauth2/default"
|
|
# clientId: ""
|
|
# clientSecret: ""
|
|
# apiToken: ""
|
|
# audience: ""
|
|
# userIdField: "sub"
|
|
# teamIdsField: "groups"
|
|
# rolesField: "roles"
|
|
#
|
|
# Entra (Azure AD) configuration:
|
|
# tenantId: ""
|
|
# clientId: ""
|
|
# clientSecret: ""
|
|
# cloud: "commercial" # or "gcc-high" or "dod"
|
|
# audience: ""
|
|
# appIdUri: ""
|
|
# userIdField: "oid"
|
|
# teamIdsField: "groups"
|
|
# rolesField: "roles"
|
|
|
|
# Load balancer configuration for intelligent request routing
|
|
loadBalancer:
|
|
enabled: false
|
|
trackerConfig: {}
|
|
bootstrap: {}
|
|
|
|
# Guardrails configuration for content moderation and policy enforcement
|
|
guardrails:
|
|
rules: []
|
|
# - id: 1
|
|
# name: "Block PII"
|
|
# description: "Block requests containing PII"
|
|
# enabled: true
|
|
# cel_expression: "!contains(request.body, 'SSN')"
|
|
# apply_to: "input"
|
|
# sampling_rate: 100
|
|
# timeout: 1000
|
|
providers: []
|
|
# - id: 1
|
|
# provider_name: "bedrock"
|
|
# policy_name: "content-filter"
|
|
# enabled: true
|
|
# config: {}
|
|
|
|
# Access profiles (enterprise): seed RBAC access profile templates from Helm.
|
|
# This is rendered directly as top-level `access_profiles` in config.json.
|
|
accessProfiles: []
|
|
# - name: "platform-default"
|
|
# description: "Default platform profile"
|
|
# is_active: true
|
|
# tags: ["platform", "default"]
|
|
# budgets:
|
|
# - id: "ap-budget-1"
|
|
# max_limit: 100
|
|
# reset_duration: "1M"
|
|
# rate_limit:
|
|
# id: "ap-rate-limit-1"
|
|
# token_max_limit: 200000
|
|
# token_reset_duration: "1h"
|
|
# provider_configs:
|
|
# - provider_name: "openai"
|
|
# all_models_allowed: false
|
|
# allowed_models: ["gpt-4o", "gpt-4o-mini"]
|
|
# mcp_tool_groups:
|
|
# - tool_group_id: 1
|
|
# mcp_servers:
|
|
# - mcp_server_id: "github"
|
|
# mcp_tool_overrides:
|
|
# - mcp_client_id: "github"
|
|
# tool_name: "create_pull_request"
|
|
# action: "include"
|
|
|
|
# Audit logs configuration for CADF-compliant activity logging
|
|
auditLogs:
|
|
disabled: false
|
|
hmacKey: ""
|
|
|
|
# Large payload optimization - streams large payloads without full materialization
|
|
# largePayloadOptimization:
|
|
# enabled: false
|
|
# requestThresholdBytes: 10485760 # 10MB
|
|
# responseThresholdBytes: 10485760 # 10MB
|
|
# prefetchSizeBytes: 65536 # 64KB
|
|
# maxPayloadBytes: 524288000 # 500MB
|
|
# truncatedLogBytes: 1048576 # 1MB
|
|
|
|
# WebSocket gateway configuration (Responses API, Realtime API)
|
|
# websocket:
|
|
# maxConnectionsPerUser: 100
|
|
# transcriptBufferSize: 100
|
|
# pool:
|
|
# maxIdlePerKey: 50
|
|
# maxTotalConnections: 1000
|
|
# idleTimeoutSeconds: 600
|
|
# maxConnectionLifetimeSeconds: 7200
|
|
|
|
# Storage configuration
|
|
storage:
|
|
# Default storage mode: sqlite or postgres
|
|
# Used as fallback when per-store type is not specified
|
|
mode: sqlite # Options: sqlite, postgres
|
|
|
|
# Persistent volume for SQLite databases (when using sqlite for any store)
|
|
persistence:
|
|
enabled: true
|
|
# storageClass: "-" # Use default storage class
|
|
accessMode: ReadWriteOnce
|
|
size: 10Gi
|
|
# existingClaim: "" # Use an existing PVC
|
|
|
|
# Configuration store settings
|
|
configStore:
|
|
enabled: true
|
|
# Backend type for config store. Empty string uses storage.mode as default
|
|
type: "" # Options: sqlite, postgres, or "" (uses storage.mode)
|
|
# PostgreSQL connection pool tuning (only applies when type is postgres)
|
|
# maxIdleConns: 5
|
|
# maxOpenConns: 50
|
|
|
|
# Logs store settings
|
|
logsStore:
|
|
enabled: true
|
|
# Backend type for logs store. Empty string uses storage.mode as default
|
|
type: "" # Options: sqlite, postgres, or "" (uses storage.mode)
|
|
# PostgreSQL connection pool tuning (only applies when type is postgres)
|
|
# maxIdleConns: 5
|
|
# maxOpenConns: 50
|
|
|
|
# Object storage for offloading large log payloads (optional)
|
|
# When enabled, request/response payloads are stored in S3/GCS
|
|
# while the DB keeps only lightweight index data for fast analytics.
|
|
objectStorage:
|
|
enabled: false
|
|
# type: s3 # Options: s3, gcs
|
|
# bucket: "" # Bucket name
|
|
# prefix: bifrost # Key prefix for stored objects
|
|
# compress: false # Enable gzip compression for stored objects
|
|
|
|
# S3 configuration (when type is s3)
|
|
# region: us-east-1
|
|
# endpoint: "" # Custom endpoint for MinIO/R2
|
|
# accessKeyId: "" # Leave empty to use default AWS credential chain
|
|
# secretAccessKey: "" # (instance role, env vars, shared credentials, etc.)
|
|
# sessionToken: "" # AWS STS session token (optional)
|
|
# roleArn: "" # AWS IAM role ARN to assume via STS (works with static creds or instance role)
|
|
# forcePathStyle: false # Set true for MinIO
|
|
|
|
# GCS configuration (when type is gcs)
|
|
# projectId: ""
|
|
# credentialsJson: "" # Service account JSON, omit for default credentials
|
|
|
|
# PostgreSQL configuration (when any store uses postgres)
|
|
postgresql:
|
|
# Deploy PostgreSQL as part of this chart
|
|
enabled: false
|
|
|
|
# Use external PostgreSQL instance
|
|
external:
|
|
enabled: false
|
|
host: ""
|
|
port: 5432
|
|
user: bifrost
|
|
password: ""
|
|
database: bifrost
|
|
sslMode: disable
|
|
# Use existing Kubernetes secret for password (takes precedence over password field)
|
|
existingSecret: ""
|
|
passwordKey: "password"
|
|
|
|
# PostgreSQL image configuration
|
|
image:
|
|
repository: postgres
|
|
tag: "16-alpine"
|
|
pullPolicy: IfNotPresent
|
|
|
|
# PostgreSQL subchart configuration (when postgresql.enabled is true)
|
|
auth:
|
|
username: bifrost
|
|
password: bifrost_password
|
|
database: bifrost
|
|
|
|
primary:
|
|
persistence:
|
|
enabled: true
|
|
size: 8Gi
|
|
|
|
resources:
|
|
limits:
|
|
cpu: 1000m
|
|
memory: 1Gi
|
|
requests:
|
|
cpu: 250m
|
|
memory: 256Mi
|
|
|
|
metrics:
|
|
enabled: false
|
|
|
|
# Vector store configuration
|
|
vectorStore:
|
|
# Enable vector store for semantic caching
|
|
enabled: false
|
|
type: none # Options: none, weaviate, redis, qdrant
|
|
|
|
# Weaviate configuration
|
|
weaviate:
|
|
# Deploy Weaviate as part of this chart
|
|
enabled: false
|
|
|
|
# Use external Weaviate instance
|
|
external:
|
|
enabled: false
|
|
scheme: http
|
|
host: ""
|
|
apiKey: ""
|
|
grpcHost: ""
|
|
grpcSecured: false
|
|
# timeout: "5s" # Timeout for operations (e.g., "5s", "30s")
|
|
# className: "" # Class name for vector store
|
|
# Use existing Kubernetes secret for API key (takes precedence over apiKey field)
|
|
existingSecret: ""
|
|
apiKeyKey: "api-key"
|
|
|
|
# Weaviate subchart configuration (when weaviate.enabled is true)
|
|
replicas: 1
|
|
|
|
image:
|
|
repository: semitechnologies/weaviate
|
|
tag: "1.24.1"
|
|
|
|
persistence:
|
|
enabled: true
|
|
size: 10Gi
|
|
|
|
resources:
|
|
limits:
|
|
cpu: 1000m
|
|
memory: 2Gi
|
|
requests:
|
|
cpu: 500m
|
|
memory: 1Gi
|
|
|
|
env:
|
|
QUERY_DEFAULTS_LIMIT: "25"
|
|
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true"
|
|
PERSISTENCE_DATA_PATH: "/var/lib/weaviate"
|
|
DEFAULT_VECTORIZER_MODULE: "none"
|
|
ENABLE_MODULES: ""
|
|
CLUSTER_HOSTNAME: "node1"
|
|
|
|
# Redis configuration
|
|
redis:
|
|
# Deploy Redis as part of this chart
|
|
enabled: false
|
|
|
|
# Use external Redis instance
|
|
external:
|
|
enabled: false
|
|
host: ""
|
|
port: 6379
|
|
username: ""
|
|
password: ""
|
|
database: 0
|
|
useTls: false # Enable TLS for Redis connection
|
|
insecureSkipVerify: false # Skip TLS certificate verification
|
|
caCertPem: "" # PEM-encoded CA certificate to trust for Redis TLS
|
|
clusterMode: false # Use Redis Cluster mode (required for AWS MemoryDB)
|
|
# Connection pool tuning (optional)
|
|
# poolSize: 10 # Maximum number of socket connections
|
|
# maxActiveConns: 0 # Maximum number of active connections
|
|
# minIdleConns: 0 # Minimum number of idle connections
|
|
# maxIdleConns: 0 # Maximum number of idle connections
|
|
# connMaxLifetime: "" # Connection max lifetime (e.g., "30m")
|
|
# connMaxIdleTime: "" # Connection max idle time (e.g., "5m")
|
|
# dialTimeout: "" # Socket connection timeout (e.g., "5s")
|
|
# readTimeout: "" # Socket read timeout (e.g., "3s")
|
|
# writeTimeout: "" # Socket write timeout (e.g., "3s")
|
|
# contextTimeout: "" # Redis operation timeout (e.g., "10s")
|
|
# Use existing Kubernetes secret for password (takes precedence over password field)
|
|
existingSecret: ""
|
|
passwordKey: "password"
|
|
|
|
# Redis image configuration
|
|
image:
|
|
repository: redis
|
|
tag: "7-alpine"
|
|
pullPolicy: IfNotPresent
|
|
|
|
# Redis subchart configuration (when redis.enabled is true)
|
|
auth:
|
|
enabled: true
|
|
password: "redis_password"
|
|
|
|
master:
|
|
persistence:
|
|
enabled: true
|
|
size: 8Gi
|
|
|
|
resources:
|
|
limits:
|
|
cpu: 500m
|
|
memory: 512Mi
|
|
requests:
|
|
cpu: 250m
|
|
memory: 256Mi
|
|
|
|
metrics:
|
|
enabled: false
|
|
|
|
# Qdrant configuration
|
|
qdrant:
|
|
# Deploy Qdrant as part of this chart
|
|
enabled: false
|
|
|
|
# Use external Qdrant instance
|
|
external:
|
|
enabled: false
|
|
host: ""
|
|
port: 6334
|
|
apiKey: ""
|
|
useTls: false
|
|
# Use existing Kubernetes secret for API key (takes precedence over apiKey field)
|
|
existingSecret: ""
|
|
apiKeyKey: "api-key"
|
|
|
|
# Qdrant image configuration
|
|
image:
|
|
repository: qdrant/qdrant
|
|
tag: "v1.16.0"
|
|
pullPolicy: IfNotPresent
|
|
|
|
# Qdrant subchart configuration (when qdrant.enabled is true)
|
|
persistence:
|
|
enabled: true
|
|
size: 10Gi
|
|
|
|
resources:
|
|
limits:
|
|
cpu: 1000m
|
|
memory: 2Gi
|
|
requests:
|
|
cpu: 500m
|
|
memory: 1Gi
|
|
|
|
# Pinecone configuration (external only, no self-hosted option)
|
|
pinecone:
|
|
external:
|
|
enabled: false
|
|
apiKey: ""
|
|
indexHost: "" # Index host URL from Pinecone console (e.g., your-index.svc.environment.pinecone.io)
|
|
# Use existing Kubernetes secret for API key (takes precedence over apiKey field)
|
|
existingSecret: ""
|
|
apiKeyKey: "api-key"
|
|
|
|
# Environment variables
|
|
env: []
|
|
# - name: CUSTOM_ENV_VAR
|
|
# value: "value"
|
|
|
|
# Additional environment variables appended after env
|
|
extraEnv: {}
|
|
# ANOTHER_ENV_VAR: "value"
|
|
|
|
# Environment variables from secrets/configmaps
|
|
envFrom: []
|
|
# - secretRef:
|
|
# name: my-secret
|
|
# - configMapRef:
|
|
# name: my-configmap
|
|
|
|
# Init containers to run before the main application container.
|
|
# Provide a list of init containers using standard Kubernetes container spec.
|
|
initContainers: []
|