first commit
This commit is contained in:
316
docs/deployment-guides/helm/client.mdx
Normal file
316
docs/deployment-guides/helm/client.mdx
Normal file
@@ -0,0 +1,316 @@
|
||||
---
|
||||
title: "Client Configuration"
|
||||
description: "Configure the Bifrost client: connection pool, logging, CORS, header filtering, compat shims, and MCP settings"
|
||||
icon: "gear"
|
||||
---
|
||||
|
||||
The `bifrost.client` block controls how Bifrost manages its internal worker pool, request logging, authentication enforcement, header policies, SDK compatibility shims, and MCP agent behaviour. All settings map directly to the `client` section of the rendered `config.json`.
|
||||
|
||||
---
|
||||
|
||||
## Connection Pool
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.client.initialPoolSize` | Pre-allocated worker goroutines per provider queue | `300` |
|
||||
| `bifrost.client.dropExcessRequests` | Drop requests when queue is full instead of waiting | `false` |
|
||||
|
||||
A larger pool reduces latency spikes under burst load at the cost of higher baseline memory. For production workloads with multiple providers, `1000` is a common starting point.
|
||||
|
||||
```yaml
|
||||
# client-pool.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
client:
|
||||
initialPoolSize: 1000
|
||||
dropExcessRequests: true # Return 429 instead of queuing indefinitely
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f client-pool.yaml
|
||||
|
||||
# Or set inline
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
--set bifrost.client.initialPoolSize=1000 \
|
||||
--set bifrost.client.dropExcessRequests=true
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Request & Response Logging
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.client.enableLogging` | Log all LLM requests and responses | `true` |
|
||||
| `bifrost.client.disableContentLogging` | Strip message content from logs (keeps metadata) | `false` |
|
||||
| `bifrost.client.logRetentionDays` | Days to retain log entries in the store | `365` |
|
||||
| `bifrost.client.loggingHeaders` | HTTP request headers to capture in log metadata | `[]` |
|
||||
|
||||
Set `disableContentLogging: true` for HIPAA / PCI compliance workloads where message content must not be persisted.
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
client:
|
||||
enableLogging: true
|
||||
disableContentLogging: true # PII / compliance: store metadata only
|
||||
logRetentionDays: 90
|
||||
loggingHeaders:
|
||||
- "x-request-id"
|
||||
- "x-user-id"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
--set bifrost.client.disableContentLogging=true \
|
||||
--set bifrost.client.logRetentionDays=90
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Security & CORS
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.client.allowedOrigins` | CORS allowed origins | `["*"]` |
|
||||
| `bifrost.client.allowDirectKeys` | Allow callers to pass provider keys directly in requests | `false` |
|
||||
| `bifrost.client.enforceGovernanceHeader` | Require `x-bf-vk` virtual-key header on every request | `false` |
|
||||
| `bifrost.client.maxRequestBodySizeMb` | Maximum allowed request body size | `100` |
|
||||
| `bifrost.client.whitelistedRoutes` | Routes that bypass auth middleware | `[]` |
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
client:
|
||||
allowedOrigins:
|
||||
- "https://app.yourdomain.com"
|
||||
- "https://admin.yourdomain.com"
|
||||
allowDirectKeys: false # Prevent callers from supplying raw provider keys
|
||||
enforceGovernanceHeader: true # Every request must carry a virtual key
|
||||
maxRequestBodySizeMb: 50
|
||||
whitelistedRoutes:
|
||||
- "/health"
|
||||
- "/metrics"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost \
|
||||
--set image.tag=v1.4.11 \
|
||||
--set bifrost.client.enforceGovernanceHeader=true \
|
||||
--set bifrost.client.allowDirectKeys=false
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Header Filtering
|
||||
|
||||
Controls which `x-bf-eh-*` headers are forwarded to upstream LLM providers.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.client.headerFilterConfig.allowlist` | Only these headers are forwarded (whitelist mode) | `[]` |
|
||||
| `bifrost.client.headerFilterConfig.denylist` | These headers are always blocked | `[]` |
|
||||
| `bifrost.client.requiredHeaders` | Headers that must be present on every request | `[]` |
|
||||
| `bifrost.client.allowedHeaders` | Additional headers permitted for CORS and WebSocket | `[]` |
|
||||
|
||||
When both lists are empty, all `x-bf-eh-*` headers pass through. Specifying an `allowlist` enables strict whitelist mode — only listed headers are forwarded.
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
client:
|
||||
headerFilterConfig:
|
||||
allowlist:
|
||||
- "x-bf-eh-anthropic-version"
|
||||
- "x-bf-eh-openai-beta"
|
||||
denylist: []
|
||||
requiredHeaders:
|
||||
- "x-request-id"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.authConfig.isEnabled` | Enable username/password auth for the API and dashboard | `false` |
|
||||
| `bifrost.authConfig.adminUsername` | Admin username (plain text, prefer secret) | `""` |
|
||||
| `bifrost.authConfig.adminPassword` | Admin password (plain text, prefer secret) | `""` |
|
||||
| `bifrost.authConfig.existingSecret` | Kubernetes Secret name for credentials | `""` |
|
||||
| `bifrost.authConfig.usernameKey` | Key within the secret for username | `"username"` |
|
||||
| `bifrost.authConfig.passwordKey` | Key within the secret for password | `"password"` |
|
||||
| `bifrost.authConfig.disableAuthOnInference` | Skip auth check on `/v1/*` inference routes | `false` |
|
||||
|
||||
```bash
|
||||
# Create secret first
|
||||
kubectl create secret generic bifrost-admin \
|
||||
--from-literal=username='admin' \
|
||||
--from-literal=password='your-secure-password'
|
||||
```
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
authConfig:
|
||||
isEnabled: true
|
||||
disableAuthOnInference: false
|
||||
existingSecret: "bifrost-admin"
|
||||
usernameKey: "username"
|
||||
passwordKey: "password"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
-f auth-values.yaml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Encryption
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.encryptionKey` | Optional encryption key (plain text — use `encryptionKeySecret` in production). If omitted, data is stored in plaintext. | `""` |
|
||||
| `bifrost.encryptionKeySecret.name` | Kubernetes Secret name containing the key | `""` |
|
||||
| `bifrost.encryptionKeySecret.key` | Key within the secret | `"encryption-key"` |
|
||||
|
||||
Always use a Kubernetes Secret in production:
|
||||
|
||||
```bash
|
||||
kubectl create secret generic bifrost-encryption \
|
||||
--from-literal=encryption-key='your-32-byte-encryption-key-here'
|
||||
```
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
encryptionKeySecret:
|
||||
name: "bifrost-encryption"
|
||||
key: "encryption-key"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost \
|
||||
--set image.tag=v1.4.11 \
|
||||
-f encryption-values.yaml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Async Jobs & Database Pings
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.client.disableDbPingsInHealth` | Exclude DB connectivity from `/health` checks | `false` |
|
||||
| `bifrost.client.asyncJobResultTTL` | TTL (seconds) for async job results | `3600` |
|
||||
|
||||
---
|
||||
|
||||
## Compat Shims
|
||||
|
||||
Compatibility flags that let Bifrost silently adapt request/response shapes for SDK integrations:
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.client.compat.convertTextToChat` | Wrap legacy text completions as chat messages | `false` |
|
||||
| `bifrost.client.compat.convertChatToResponses` | Translate chat completions to Responses API format | `false` |
|
||||
| `bifrost.client.compat.shouldDropParams` | Silently drop unsupported parameters instead of erroring | `false` |
|
||||
| `bifrost.client.compat.shouldConvertParams` | Auto-convert parameter names across provider schemas | `false` |
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
client:
|
||||
compat:
|
||||
shouldDropParams: true # Useful when proxying mixed SDK traffic
|
||||
convertTextToChat: true # For clients using the legacy /v1/completions endpoint
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Prometheus Labels
|
||||
|
||||
Add custom labels to every Prometheus metric emitted by Bifrost:
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
client:
|
||||
prometheusLabels:
|
||||
- name: "environment"
|
||||
value: "production"
|
||||
- name: "region"
|
||||
value: "us-east-1"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## MCP Agent Settings
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.client.mcpAgentDepth` | Maximum tool-call recursion depth for MCP agent mode | `10` |
|
||||
| `bifrost.client.mcpToolExecutionTimeout` | Timeout per tool execution in seconds | `30` |
|
||||
| `bifrost.client.mcpCodeModeBindingLevel` | Code mode binding level (`server` or `tool`) | `""` |
|
||||
| `bifrost.client.mcpToolSyncInterval` | Global tool sync interval in minutes (`0` = disabled) | `0` |
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
client:
|
||||
mcpAgentDepth: 15
|
||||
mcpToolExecutionTimeout: 60
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Full Example
|
||||
|
||||
```yaml
|
||||
# client-full.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
encryptionKeySecret:
|
||||
name: "bifrost-encryption"
|
||||
key: "encryption-key"
|
||||
|
||||
authConfig:
|
||||
isEnabled: true
|
||||
disableAuthOnInference: false
|
||||
existingSecret: "bifrost-admin"
|
||||
usernameKey: "username"
|
||||
passwordKey: "password"
|
||||
|
||||
client:
|
||||
initialPoolSize: 1000
|
||||
dropExcessRequests: true
|
||||
allowedOrigins:
|
||||
- "https://app.yourdomain.com"
|
||||
enableLogging: true
|
||||
disableContentLogging: false
|
||||
logRetentionDays: 90
|
||||
enforceGovernanceHeader: true
|
||||
allowDirectKeys: false
|
||||
maxRequestBodySizeMb: 100
|
||||
headerFilterConfig:
|
||||
allowlist: []
|
||||
denylist: []
|
||||
prometheusLabels:
|
||||
- name: "environment"
|
||||
value: "production"
|
||||
mcpAgentDepth: 10
|
||||
mcpToolExecutionTimeout: 30
|
||||
```
|
||||
|
||||
```bash
|
||||
# Create prerequisites
|
||||
kubectl create secret generic bifrost-encryption \
|
||||
--from-literal=encryption-key='your-32-byte-encryption-key-here'
|
||||
|
||||
kubectl create secret generic bifrost-admin \
|
||||
--from-literal=username='admin' \
|
||||
--from-literal=password='your-secure-password'
|
||||
|
||||
# Install
|
||||
helm install bifrost bifrost/bifrost -f client-full.yaml
|
||||
```
|
||||
523
docs/deployment-guides/helm/cluster.mdx
Normal file
523
docs/deployment-guides/helm/cluster.mdx
Normal file
@@ -0,0 +1,523 @@
|
||||
---
|
||||
title: "Cluster Mode & HA"
|
||||
description: "Run Bifrost in a multi-replica cluster with gossip-based peer discovery, distributed state sync, and high-availability configuration"
|
||||
icon: "network-wired"
|
||||
---
|
||||
|
||||
Cluster mode enables multiple Bifrost replicas to share state — rate limits, budget counters, and governance data — across pods. When `bifrost.cluster.enabled` is `false` (the default), each replica operates independently and state is only shared via the database.
|
||||
|
||||
<Note>
|
||||
Cluster mode requires **PostgreSQL** as the storage backend. SQLite is single-node only.
|
||||
</Note>
|
||||
|
||||
<Warning>
|
||||
`bifrost.cluster.*` is an enterprise capability. OSS images accept these values but do not run cluster mode at runtime.
|
||||
</Warning>
|
||||
|
||||
## When to Use Cluster Mode
|
||||
|
||||
| Scenario | Recommendation |
|
||||
|----------|---------------|
|
||||
| Single replica | Not needed |
|
||||
| Multiple replicas, shared DB only | Optional — DB provides eventual consistency |
|
||||
| Multiple replicas with strict per-minute rate limiting | **Enable cluster mode** — in-memory counters are synced via gossip |
|
||||
| Geographic multi-region | Enable cluster mode with DNS or Consul discovery |
|
||||
|
||||
---
|
||||
|
||||
## Basic Cluster Setup
|
||||
|
||||
```yaml
|
||||
# cluster-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
replicaCount: 3
|
||||
|
||||
storage:
|
||||
mode: postgres
|
||||
|
||||
postgresql:
|
||||
external:
|
||||
enabled: true
|
||||
host: "your-postgres-host.example.com"
|
||||
port: 5432
|
||||
user: bifrost
|
||||
database: bifrost
|
||||
sslMode: require
|
||||
existingSecret: "postgres-credentials"
|
||||
passwordKey: "password"
|
||||
|
||||
bifrost:
|
||||
encryptionKeySecret:
|
||||
name: "bifrost-encryption"
|
||||
key: "encryption-key"
|
||||
|
||||
cluster:
|
||||
enabled: true
|
||||
gossip:
|
||||
port: 7946
|
||||
config:
|
||||
timeoutSeconds: 10
|
||||
successThreshold: 3
|
||||
failureThreshold: 3
|
||||
|
||||
# Spread replicas across nodes for true HA
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: bifrost
|
||||
topologyKey: kubernetes.io/hostname
|
||||
|
||||
# Conservative scale-down: avoid killing pods mid-stream
|
||||
autoscaling:
|
||||
enabled: true
|
||||
minReplicas: 3
|
||||
maxReplicas: 10
|
||||
targetCPUUtilizationPercentage: 70
|
||||
behavior:
|
||||
scaleDown:
|
||||
stabilizationWindowSeconds: 300
|
||||
policies:
|
||||
- type: Pods
|
||||
value: 1
|
||||
periodSeconds: 120
|
||||
|
||||
# Give in-flight SSE streams time to drain
|
||||
terminationGracePeriodSeconds: 90
|
||||
lifecycle:
|
||||
preStop:
|
||||
exec:
|
||||
command: ["sh", "-c", "sleep 20"]
|
||||
```
|
||||
|
||||
```bash
|
||||
kubectl create secret generic postgres-credentials \
|
||||
--from-literal=password='your-postgres-password'
|
||||
|
||||
kubectl create secret generic bifrost-encryption \
|
||||
--from-literal=encryption-key='your-32-byte-encryption-key'
|
||||
|
||||
helm install bifrost bifrost/bifrost -f cluster-values.yaml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Peer Discovery
|
||||
|
||||
Bifrost uses a gossip protocol (memberlist) for peer-to-peer state sync. Configure how peers find each other:
|
||||
|
||||
<Note>
|
||||
For `consul`, `etcd`, and `udp` discovery, set `bifrost.cluster.discovery.serviceName` so nodes register/discover under a stable service identity.
|
||||
</Note>
|
||||
|
||||
<Tabs>
|
||||
|
||||
<Tab title="Kubernetes (Recommended)">
|
||||
|
||||
Bifrost queries the Kubernetes API to find other Bifrost pods by label selector. No static peer list needed — works with HPA.
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
cluster:
|
||||
enabled: true
|
||||
discovery:
|
||||
enabled: true
|
||||
type: kubernetes
|
||||
k8sNamespace: "default" # namespace where Bifrost runs
|
||||
k8sLabelSelector: "app.kubernetes.io/name=bifrost"
|
||||
gossip:
|
||||
port: 7946
|
||||
```
|
||||
|
||||
The service account needs permission to list pods:
|
||||
|
||||
```yaml
|
||||
serviceAccount:
|
||||
create: true
|
||||
annotations: {}
|
||||
```
|
||||
|
||||
```bash
|
||||
# Create a ClusterRole and binding for pod discovery (apply once)
|
||||
kubectl apply -f - <<'EOF'
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: bifrost-pod-discovery
|
||||
namespace: default
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["pods"]
|
||||
verbs: ["list", "get", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: bifrost-pod-discovery
|
||||
namespace: default
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: bifrost
|
||||
namespace: default
|
||||
roleRef:
|
||||
kind: Role
|
||||
name: bifrost-pod-discovery
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
EOF
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f cluster-k8s-discovery-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="DNS">
|
||||
|
||||
Uses a headless service DNS name to resolve peer IPs. Works well with StatefulSets (predictable pod DNS names).
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
cluster:
|
||||
enabled: true
|
||||
discovery:
|
||||
enabled: true
|
||||
type: dns
|
||||
dnsNames:
|
||||
- "bifrost-headless.default.svc.cluster.local"
|
||||
gossip:
|
||||
port: 7946
|
||||
```
|
||||
|
||||
The chart automatically creates a headless service (`bifrost-headless`) when cluster mode is enabled with a StatefulSet. For Deployments, create it manually:
|
||||
|
||||
```bash
|
||||
kubectl apply -f - <<'EOF'
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: bifrost-headless
|
||||
spec:
|
||||
clusterIP: None
|
||||
selector:
|
||||
app.kubernetes.io/name: bifrost
|
||||
ports:
|
||||
- name: gossip
|
||||
port: 7946
|
||||
protocol: TCP
|
||||
EOF
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f cluster-dns-discovery-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Static Peers">
|
||||
|
||||
Enumerate peer addresses explicitly. Use when discovery mechanisms are unavailable or you want deterministic membership.
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
cluster:
|
||||
enabled: true
|
||||
peers:
|
||||
- "bifrost-0.bifrost-headless.default.svc.cluster.local:7946"
|
||||
- "bifrost-1.bifrost-headless.default.svc.cluster.local:7946"
|
||||
- "bifrost-2.bifrost-headless.default.svc.cluster.local:7946"
|
||||
gossip:
|
||||
port: 7946
|
||||
```
|
||||
|
||||
<Note>
|
||||
Static peers require StatefulSet pod names to be stable. This approach doesn't adapt to HPA-driven scaling — use Kubernetes or DNS discovery for dynamic replica counts.
|
||||
</Note>
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Consul">
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
cluster:
|
||||
enabled: true
|
||||
discovery:
|
||||
enabled: true
|
||||
type: consul
|
||||
serviceName: "bifrost-cluster"
|
||||
consulAddress: "consul.consul.svc.cluster.local:8500"
|
||||
gossip:
|
||||
port: 7946
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f cluster-consul-discovery-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="etcd">
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
cluster:
|
||||
enabled: true
|
||||
discovery:
|
||||
enabled: true
|
||||
type: etcd
|
||||
serviceName: "bifrost-cluster"
|
||||
etcdEndpoints:
|
||||
- "http://etcd-0.etcd.default.svc.cluster.local:2379"
|
||||
- "http://etcd-1.etcd.default.svc.cluster.local:2379"
|
||||
- "http://etcd-2.etcd.default.svc.cluster.local:2379"
|
||||
gossip:
|
||||
port: 7946
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="mDNS">
|
||||
|
||||
Best for local development or bare-metal clusters where multicast is available.
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
cluster:
|
||||
enabled: true
|
||||
discovery:
|
||||
enabled: true
|
||||
type: mdns
|
||||
mdnsService: "_bifrost._tcp"
|
||||
gossip:
|
||||
port: 7946
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
</Tabs>
|
||||
|
||||
---
|
||||
|
||||
## Allowed Address Space
|
||||
|
||||
Restrict gossip to a specific subnet (useful in multi-tenant clusters):
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
cluster:
|
||||
discovery:
|
||||
enabled: true
|
||||
type: kubernetes
|
||||
k8sNamespace: "default"
|
||||
k8sLabelSelector: "app.kubernetes.io/name=bifrost"
|
||||
allowedAddressSpace:
|
||||
- "10.0.0.0/8"
|
||||
- "172.16.0.0/12"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Region-Aware Routing
|
||||
|
||||
Tag replicas with a region identifier for latency-aware routing:
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
cluster:
|
||||
enabled: true
|
||||
region: "us-east-1"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Full HA Production Example
|
||||
|
||||
```yaml
|
||||
# ha-production-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
replicaCount: 3
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 1000m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: 4000m
|
||||
memory: 4Gi
|
||||
|
||||
autoscaling:
|
||||
enabled: true
|
||||
minReplicas: 3
|
||||
maxReplicas: 15
|
||||
targetCPUUtilizationPercentage: 70
|
||||
targetMemoryUtilizationPercentage: 75
|
||||
behavior:
|
||||
scaleDown:
|
||||
stabilizationWindowSeconds: 300
|
||||
policies:
|
||||
- type: Pods
|
||||
value: 1
|
||||
periodSeconds: 120
|
||||
scaleUp:
|
||||
stabilizationWindowSeconds: 30
|
||||
|
||||
terminationGracePeriodSeconds: 90
|
||||
lifecycle:
|
||||
preStop:
|
||||
exec:
|
||||
command: ["sh", "-c", "sleep 20"]
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
|
||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
|
||||
hosts:
|
||||
- host: bifrost.yourdomain.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: bifrost-tls
|
||||
hosts:
|
||||
- bifrost.yourdomain.com
|
||||
|
||||
storage:
|
||||
mode: postgres
|
||||
|
||||
postgresql:
|
||||
external:
|
||||
enabled: true
|
||||
host: "rds.us-east-1.amazonaws.com"
|
||||
port: 5432
|
||||
user: bifrost
|
||||
database: bifrost
|
||||
sslMode: require
|
||||
existingSecret: "postgres-credentials"
|
||||
passwordKey: "password"
|
||||
|
||||
bifrost:
|
||||
encryptionKeySecret:
|
||||
name: "bifrost-encryption"
|
||||
key: "encryption-key"
|
||||
|
||||
client:
|
||||
initialPoolSize: 1000
|
||||
dropExcessRequests: true
|
||||
enableLogging: true
|
||||
enforceGovernanceHeader: true
|
||||
|
||||
cluster:
|
||||
enabled: true
|
||||
region: "us-east-1"
|
||||
discovery:
|
||||
enabled: true
|
||||
type: kubernetes
|
||||
k8sNamespace: "default"
|
||||
k8sLabelSelector: "app.kubernetes.io/name=bifrost"
|
||||
gossip:
|
||||
port: 7946
|
||||
config:
|
||||
timeoutSeconds: 10
|
||||
successThreshold: 3
|
||||
failureThreshold: 3
|
||||
|
||||
plugins:
|
||||
telemetry:
|
||||
enabled: true
|
||||
config:
|
||||
push_gateway:
|
||||
enabled: true
|
||||
push_gateway_url: "http://prometheus-pushgateway.monitoring.svc.cluster.local:9091"
|
||||
push_interval: 15
|
||||
logging:
|
||||
enabled: true
|
||||
governance:
|
||||
enabled: true
|
||||
config:
|
||||
is_vk_mandatory: true
|
||||
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: bifrost
|
||||
topologyKey: kubernetes.io/hostname
|
||||
|
||||
serviceAccount:
|
||||
create: true
|
||||
annotations: {}
|
||||
```
|
||||
|
||||
```bash
|
||||
# Prerequisites
|
||||
kubectl create secret generic postgres-credentials \
|
||||
--from-literal=password='your-secure-postgres-password'
|
||||
|
||||
kubectl create secret generic bifrost-encryption \
|
||||
--from-literal=encryption-key='your-32-byte-encryption-key'
|
||||
|
||||
# RBAC for Kubernetes pod discovery
|
||||
kubectl apply -f - <<'EOF'
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: bifrost-pod-discovery
|
||||
namespace: default
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["pods"]
|
||||
verbs: ["list", "get", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: bifrost-pod-discovery
|
||||
namespace: default
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: bifrost
|
||||
namespace: default
|
||||
roleRef:
|
||||
kind: Role
|
||||
name: bifrost-pod-discovery
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
EOF
|
||||
|
||||
# Install
|
||||
helm install bifrost bifrost/bifrost -f ha-production-values.yaml
|
||||
|
||||
# Verify all peers have found each other (check logs)
|
||||
kubectl logs -l app.kubernetes.io/name=bifrost --tail=50 | grep -i gossip
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Verifying Cluster Health
|
||||
|
||||
```bash
|
||||
# Check all pods are running
|
||||
kubectl get pods -l app.kubernetes.io/name=bifrost
|
||||
|
||||
# Check gossip port is reachable between pods
|
||||
kubectl exec -it bifrost-0 -- nc -zv bifrost-1.bifrost-headless 7946
|
||||
|
||||
# Check health endpoint
|
||||
kubectl port-forward svc/bifrost 8080:8080 &
|
||||
curl http://localhost:8080/health
|
||||
|
||||
# View HPA status
|
||||
kubectl get hpa bifrost
|
||||
|
||||
# Scale manually during maintenance
|
||||
kubectl scale deployment bifrost --replicas=5
|
||||
```
|
||||
446
docs/deployment-guides/helm/governance.mdx
Normal file
446
docs/deployment-guides/helm/governance.mdx
Normal file
@@ -0,0 +1,446 @@
|
||||
---
|
||||
title: "Governance"
|
||||
description: "Configure Bifrost governance in Helm — budgets, rate limits, virtual keys, routing rules, and admin authentication"
|
||||
icon: "shield"
|
||||
---
|
||||
|
||||
Governance lets you control who can call which providers, how much they can spend, how fast they can go, and how traffic is routed. Everything is declared under `bifrost.governance` in your values file and seeded into the database at startup.
|
||||
|
||||
<Note>
|
||||
The governance **plugin** must also be enabled for enforcement to take effect:
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
plugins:
|
||||
governance:
|
||||
enabled: true
|
||||
```
|
||||
|
||||
See the [Plugins](/deployment-guides/helm/plugins) page for plugin configuration details.
|
||||
</Note>
|
||||
|
||||
---
|
||||
|
||||
## Admin Authentication
|
||||
|
||||
Protect the Bifrost dashboard and management API with username/password auth.
|
||||
|
||||
```bash
|
||||
kubectl create secret generic bifrost-admin-credentials \
|
||||
--from-literal=username='admin' \
|
||||
--from-literal=password='your-secure-admin-password'
|
||||
```
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
governance:
|
||||
authConfig:
|
||||
isEnabled: true
|
||||
disableAuthOnInference: false # keep auth on inference routes
|
||||
existingSecret: "bifrost-admin-credentials"
|
||||
usernameKey: "username"
|
||||
passwordKey: "password"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost --reuse-values -f governance-auth-values.yaml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Budgets
|
||||
|
||||
Spending caps that reset on a configurable period. Budgets are referenced by ID from virtual keys, teams, customers, or providers.
|
||||
|
||||
| Reset duration | Syntax |
|
||||
|----------------|--------|
|
||||
| 30 seconds | `"30s"` |
|
||||
| 5 minutes | `"5m"` |
|
||||
| 1 hour | `"1h"` |
|
||||
| 1 day | `"1d"` |
|
||||
| 1 week | `"1w"` |
|
||||
| 1 month | `"1M"` |
|
||||
| 1 year | `"1Y"` |
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
governance:
|
||||
budgets:
|
||||
- id: "budget-dev"
|
||||
max_limit: 50 # $50 per month
|
||||
reset_duration: "1M"
|
||||
|
||||
- id: "budget-production"
|
||||
max_limit: 500 # $500 per month
|
||||
reset_duration: "1M"
|
||||
|
||||
- id: "budget-testing"
|
||||
max_limit: 10 # $10 per day
|
||||
reset_duration: "1d"
|
||||
|
||||
- id: "budget-enterprise"
|
||||
max_limit: 5000 # $5000 per month
|
||||
reset_duration: "1M"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rate Limits
|
||||
|
||||
Token and request-count caps per time window. Referenced by ID from virtual keys, teams, customers, or providers.
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
governance:
|
||||
rateLimits:
|
||||
- id: "rate-limit-standard"
|
||||
token_max_limit: 100000 # 100K tokens per hour
|
||||
token_reset_duration: "1h"
|
||||
request_max_limit: 1000 # 1000 requests per hour
|
||||
request_reset_duration: "1h"
|
||||
|
||||
- id: "rate-limit-high"
|
||||
token_max_limit: 500000 # 500K tokens per hour
|
||||
token_reset_duration: "1h"
|
||||
request_max_limit: 5000
|
||||
request_reset_duration: "1h"
|
||||
|
||||
- id: "rate-limit-burst"
|
||||
token_max_limit: 50000 # 50K tokens per minute (burst)
|
||||
token_reset_duration: "1m"
|
||||
request_max_limit: 500
|
||||
request_reset_duration: "1m"
|
||||
|
||||
- id: "rate-limit-testing"
|
||||
token_max_limit: 10000
|
||||
token_reset_duration: "1h"
|
||||
request_max_limit: 100
|
||||
request_reset_duration: "1h"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Customers & Teams
|
||||
|
||||
Optional organizational hierarchy. Virtual keys can be assigned to customers or teams, inheriting their budgets and rate limits.
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
governance:
|
||||
customers:
|
||||
- id: "customer-acme"
|
||||
name: "Acme Corp"
|
||||
budget_id: "budget-production"
|
||||
rate_limit_id: "rate-limit-high"
|
||||
|
||||
- id: "customer-startup"
|
||||
name: "Startup Inc"
|
||||
budget_id: "budget-dev"
|
||||
rate_limit_id: "rate-limit-standard"
|
||||
|
||||
teams:
|
||||
- id: "team-platform"
|
||||
name: "Platform Team"
|
||||
customer_id: "customer-acme"
|
||||
budget_id: "budget-enterprise"
|
||||
rate_limit_id: "rate-limit-high"
|
||||
|
||||
- id: "team-ml"
|
||||
name: "ML Team"
|
||||
customer_id: "customer-acme"
|
||||
budget_id: "budget-production"
|
||||
rate_limit_id: "rate-limit-standard"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Virtual Keys
|
||||
|
||||
Virtual keys are the primary access tokens issued to callers. They scope which providers, models, and underlying API keys are accessible.
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
governance:
|
||||
virtualKeys:
|
||||
# 1. Unrestricted dev key — access to every provider
|
||||
- id: "vk-dev-all"
|
||||
name: "Dev: all providers"
|
||||
value: "vk-dev-all-secret-token"
|
||||
is_active: true
|
||||
budget_id: "budget-dev"
|
||||
rate_limit_id: "rate-limit-standard"
|
||||
# No provider_configs → all providers allowed
|
||||
|
||||
# 2. OpenAI only — restricted to two models
|
||||
- id: "vk-openai-prod"
|
||||
name: "OpenAI Production"
|
||||
value: "vk-openai-prod-secret-token"
|
||||
is_active: true
|
||||
budget_id: "budget-production"
|
||||
rate_limit_id: "rate-limit-high"
|
||||
provider_configs:
|
||||
- provider: "openai"
|
||||
weight: 1
|
||||
allowed_models: ["gpt-4o", "gpt-4o-mini"]
|
||||
|
||||
# 3. Multi-provider with weighted routing
|
||||
- id: "vk-multi"
|
||||
name: "Multi-provider weighted"
|
||||
value: "vk-multi-secret-token"
|
||||
is_active: true
|
||||
budget_id: "budget-production"
|
||||
rate_limit_id: "rate-limit-high"
|
||||
provider_configs:
|
||||
- provider: "openai"
|
||||
weight: 2 # 50%
|
||||
allowed_models: ["*"]
|
||||
- provider: "anthropic"
|
||||
weight: 1 # 25%
|
||||
allowed_models: ["*"]
|
||||
- provider: "groq"
|
||||
weight: 1 # 25%
|
||||
allowed_models: ["*"]
|
||||
|
||||
# 4. Team-scoped key
|
||||
- id: "vk-platform-team"
|
||||
name: "Platform Team Key"
|
||||
value: "vk-platform-team-token"
|
||||
is_active: true
|
||||
team_id: "team-platform" # inherits team budget/rate-limit
|
||||
provider_configs:
|
||||
- provider: "openai"
|
||||
weight: 1
|
||||
allowed_models: ["*"]
|
||||
key_ids: ["openai-primary"] # pin to specific configured key by name
|
||||
|
||||
# 5. Restricted testing key
|
||||
- id: "vk-testing"
|
||||
name: "Testing (gpt-4o-mini only)"
|
||||
value: "vk-testing-token"
|
||||
is_active: true
|
||||
budget_id: "budget-testing"
|
||||
rate_limit_id: "rate-limit-testing"
|
||||
provider_configs:
|
||||
- provider: "openai"
|
||||
weight: 1
|
||||
allowed_models: ["gpt-4o-mini"]
|
||||
|
||||
# 6. Batch API key
|
||||
- id: "vk-batch"
|
||||
name: "Batch API workloads"
|
||||
value: "vk-batch-token"
|
||||
is_active: true
|
||||
budget_id: "budget-production"
|
||||
rate_limit_id: "rate-limit-burst"
|
||||
provider_configs:
|
||||
- provider: "openai"
|
||||
weight: 1
|
||||
allowed_models: ["*"]
|
||||
key_ids: ["openai-batch"] # only the batch-flagged key
|
||||
```
|
||||
|
||||
`provider_configs[].key_ids` and `provider_configs[].keys` are both supported in Helm values. Prefer `key_ids` for parity with `config.json` (`key_ids` should contain provider key names).
|
||||
|
||||
**Use a virtual key in API calls:**
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/chat/completions \
|
||||
-H "x-bf-vk: vk-openai-prod-secret-token" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"gpt-4o","messages":[{"role":"user","content":"Hello"}]}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Configs
|
||||
|
||||
Apply budgets and rate limits at the model level, independent of virtual keys:
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
governance:
|
||||
modelConfigs:
|
||||
- id: "model-gpt4o"
|
||||
model_name: "gpt-4o"
|
||||
provider: "openai"
|
||||
budget_id: "budget-production"
|
||||
rate_limit_id: "rate-limit-high"
|
||||
|
||||
- id: "model-claude"
|
||||
model_name: "claude-3-5-sonnet-20241022"
|
||||
provider: "anthropic"
|
||||
rate_limit_id: "rate-limit-standard"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Provider Governance
|
||||
|
||||
Apply budgets and rate limits at the provider level:
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
governance:
|
||||
providers:
|
||||
- name: "openai"
|
||||
budget_id: "budget-production"
|
||||
rate_limit_id: "rate-limit-high"
|
||||
send_back_raw_request: false
|
||||
send_back_raw_response: false
|
||||
|
||||
- name: "anthropic"
|
||||
budget_id: "budget-production"
|
||||
rate_limit_id: "rate-limit-standard"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Routing Rules
|
||||
|
||||
CEL-expression-based routing rules redirect requests to different providers or models based on request attributes.
|
||||
|
||||
| Field | Description |
|
||||
|-------|-------------|
|
||||
| `cel_expression` | CEL expression evaluated against the request; if `true`, rule fires |
|
||||
| `targets` | Provider/model targets with weights |
|
||||
| `fallbacks` | Providers to try if all targets fail |
|
||||
| `scope` | `global`, `team`, `customer`, or `virtual_key` |
|
||||
| `scope_id` | Required for non-global scopes |
|
||||
| `priority` | Lower number = evaluated first |
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
governance:
|
||||
routingRules:
|
||||
# Route all GPT requests to Azure
|
||||
- id: "route-gpt-to-azure"
|
||||
name: "GPT → Azure"
|
||||
description: "Route all GPT model requests to Azure OpenAI"
|
||||
enabled: true
|
||||
cel_expression: "model.startsWith('gpt-')"
|
||||
targets:
|
||||
- provider: "azure"
|
||||
model: "" # empty = use original model name
|
||||
weight: 1.0
|
||||
fallbacks: ["openai"]
|
||||
scope: "global"
|
||||
priority: 0
|
||||
|
||||
# Route heavy models to a slower but cheaper provider
|
||||
- id: "route-heavy-to-groq"
|
||||
name: "Large context → Groq"
|
||||
enabled: true
|
||||
cel_expression: "model == 'gpt-4o' && request_body.max_tokens > 4000"
|
||||
targets:
|
||||
- provider: "groq"
|
||||
model: "llama-3.3-70b-versatile"
|
||||
weight: 1.0
|
||||
fallbacks: ["openai"]
|
||||
scope: "global"
|
||||
priority: 1
|
||||
|
||||
# Team-scoped rule
|
||||
- id: "route-ml-team-bedrock"
|
||||
name: "ML Team → Bedrock"
|
||||
enabled: true
|
||||
cel_expression: "true" # match all requests for this scope
|
||||
targets:
|
||||
- provider: "bedrock"
|
||||
model: ""
|
||||
weight: 1.0
|
||||
fallbacks: ["openai"]
|
||||
scope: "team"
|
||||
scope_id: "team-ml"
|
||||
priority: 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Full Example
|
||||
|
||||
```yaml
|
||||
# governance-full-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
encryptionKeySecret:
|
||||
name: "bifrost-encryption"
|
||||
key: "encryption-key"
|
||||
|
||||
plugins:
|
||||
governance:
|
||||
enabled: true
|
||||
config:
|
||||
is_vk_mandatory: true
|
||||
|
||||
governance:
|
||||
authConfig:
|
||||
isEnabled: true
|
||||
existingSecret: "bifrost-admin-credentials"
|
||||
usernameKey: "username"
|
||||
passwordKey: "password"
|
||||
|
||||
budgets:
|
||||
- id: "budget-production"
|
||||
max_limit: 500
|
||||
reset_duration: "1M"
|
||||
- id: "budget-dev"
|
||||
max_limit: 50
|
||||
reset_duration: "1M"
|
||||
|
||||
rateLimits:
|
||||
- id: "rate-limit-standard"
|
||||
token_max_limit: 100000
|
||||
token_reset_duration: "1h"
|
||||
request_max_limit: 1000
|
||||
request_reset_duration: "1h"
|
||||
|
||||
virtualKeys:
|
||||
- id: "vk-production"
|
||||
name: "Production"
|
||||
value: "vk-prod-secret-token"
|
||||
is_active: true
|
||||
budget_id: "budget-production"
|
||||
rate_limit_id: "rate-limit-standard"
|
||||
provider_configs:
|
||||
- provider: "openai"
|
||||
weight: 1
|
||||
allowed_models: ["gpt-4o", "gpt-4o-mini"]
|
||||
```
|
||||
|
||||
```bash
|
||||
kubectl create secret generic bifrost-encryption \
|
||||
--from-literal=encryption-key='your-32-byte-key'
|
||||
|
||||
kubectl create secret generic bifrost-admin-credentials \
|
||||
--from-literal=username='admin' \
|
||||
--from-literal=password='secure-admin-password'
|
||||
|
||||
helm install bifrost bifrost/bifrost -f governance-full-values.yaml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Access Profiles (Enterprise)
|
||||
|
||||
You can seed enterprise `access_profiles` directly from Helm values. The chart renders `bifrost.accessProfiles` into top-level `access_profiles` in `config.json`.
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
accessProfiles:
|
||||
- name: "platform-default"
|
||||
description: "Default profile for platform users"
|
||||
is_active: true
|
||||
tags: ["platform", "default"]
|
||||
provider_configs:
|
||||
- provider_name: "openai"
|
||||
all_models_allowed: false
|
||||
allowed_models: ["gpt-4o", "gpt-4o-mini"]
|
||||
mcp_servers:
|
||||
- mcp_server_id: "github"
|
||||
mcp_tool_overrides:
|
||||
- mcp_client_id: "github"
|
||||
tool_name: "create_pull_request"
|
||||
action: "include"
|
||||
```
|
||||
262
docs/deployment-guides/helm/guardrails.mdx
Normal file
262
docs/deployment-guides/helm/guardrails.mdx
Normal file
@@ -0,0 +1,262 @@
|
||||
---
|
||||
title: "Guardrails"
|
||||
description: "Configure guardrails providers and rules in Bifrost Helm deployments"
|
||||
icon: "shield-halved"
|
||||
---
|
||||
|
||||
<Note>
|
||||
Guardrails are an **enterprise-only** feature. They require the enterprise Bifrost image.
|
||||
</Note>
|
||||
|
||||
Guardrails are configured under `bifrost.guardrails` in your values file. The configuration has two parts:
|
||||
|
||||
- **`providers`** — the backend that performs the check. Rules link to providers by `id`.
|
||||
- **`rules`** — CEL expressions that control when and where providers are invoked.
|
||||
|
||||
---
|
||||
|
||||
## Providers
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Regex">
|
||||
|
||||
Runs entirely in-process with no external dependency. Patterns use RE2 syntax. Supports optional per-pattern flags: `i` (case-insensitive), `m` (multiline), `s` (dot-all).
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
guardrails:
|
||||
providers:
|
||||
- id: 1
|
||||
provider_name: "regex"
|
||||
policy_name: "block-secrets"
|
||||
enabled: true
|
||||
timeout: 5
|
||||
config:
|
||||
patterns:
|
||||
- pattern: "sk-[A-Za-z0-9]{20,}"
|
||||
description: "OpenAI API key"
|
||||
- pattern: "AKIA[0-9A-Z]{16}"
|
||||
description: "AWS access key"
|
||||
flags: "i"
|
||||
- pattern: "gh[ps]_[A-Za-z0-9]{36}"
|
||||
description: "GitHub token"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="AWS Bedrock">
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
guardrails:
|
||||
providers:
|
||||
- id: 2
|
||||
provider_name: "bedrock"
|
||||
policy_name: "content-filter"
|
||||
enabled: true
|
||||
timeout: 15
|
||||
config:
|
||||
guardrail_arn: "arn:aws:bedrock:us-east-1::guardrail/abc123"
|
||||
guardrail_version: "DRAFT" # or a published version number
|
||||
region: "us-east-1"
|
||||
access_key: "env.AWS_ACCESS_KEY_ID" # omit to use instance role
|
||||
secret_key: "env.AWS_SECRET_ACCESS_KEY"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Azure Content Safety">
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
guardrails:
|
||||
providers:
|
||||
- id: 3
|
||||
provider_name: "azure"
|
||||
policy_name: "azure-content-safety"
|
||||
enabled: true
|
||||
timeout: 10
|
||||
config:
|
||||
endpoint: "https://your-resource.cognitiveservices.azure.com"
|
||||
api_key: "env.AZURE_CONTENT_SAFETY_KEY"
|
||||
analyze_enabled: true
|
||||
analyze_severity_threshold: "medium" # low | medium | high
|
||||
jailbreak_shield_enabled: true
|
||||
indirect_attack_shield_enabled: true
|
||||
copyright_enabled: false
|
||||
text_blocklist_enabled: false
|
||||
blocklist_names: []
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Gray Swan">
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
guardrails:
|
||||
providers:
|
||||
- id: 4
|
||||
provider_name: "grayswan"
|
||||
policy_name: "grayswan-jailbreak"
|
||||
enabled: true
|
||||
timeout: 15
|
||||
config:
|
||||
api_key: "env.GRAYSWAN_API_KEY"
|
||||
violation_threshold: 0.7 # 0.0–1.0; higher = more permissive
|
||||
reasoning_mode: "standard" # standard | fast
|
||||
policy_id: "" # optional: single policy ID
|
||||
policy_ids: [] # optional: multiple policy IDs
|
||||
rules: {} # optional: inline rule map
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
---
|
||||
|
||||
## Rules
|
||||
|
||||
Rules are CEL expressions that fire when their condition is met. Available CEL variables:
|
||||
|
||||
| Variable | Type | Description |
|
||||
|----------|------|-------------|
|
||||
| `model` | `string` | Model name from the request |
|
||||
| `provider` | `string` | Provider name (e.g. `"openai"`) |
|
||||
| `headers` | `map<string,string>` | HTTP request headers |
|
||||
| `params` | `map<string,string>` | Query parameters |
|
||||
| `customer` | `string` | Customer ID |
|
||||
| `team` | `string` | Team ID |
|
||||
| `user` | `string` | User ID |
|
||||
|
||||
Rule fields:
|
||||
|
||||
| Field | Required | Description |
|
||||
|-------|----------|-------------|
|
||||
| `id` | Yes | Unique integer ID |
|
||||
| `name` | Yes | Human-readable name |
|
||||
| `description` | No | Optional description |
|
||||
| `enabled` | Yes | `true` to activate |
|
||||
| `cel_expression` | Yes | CEL boolean expression; `"true"` matches all requests |
|
||||
| `apply_to` | Yes | `"input"`, `"output"`, or `"both"` |
|
||||
| `sampling_rate` | No | `0`–`100`; percentage of requests to check (default: 100) |
|
||||
| `timeout` | No | Rule timeout in seconds |
|
||||
| `provider_config_ids` | No | Provider `id`s to invoke when this rule matches |
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
guardrails:
|
||||
rules:
|
||||
- id: 101
|
||||
name: "block-secrets-input"
|
||||
description: "Block prompts containing API keys"
|
||||
enabled: true
|
||||
cel_expression: "true"
|
||||
apply_to: "input"
|
||||
sampling_rate: 100
|
||||
timeout: 10
|
||||
provider_config_ids: [1]
|
||||
|
||||
- id: 102
|
||||
name: "azure-output-gpt4o"
|
||||
description: "Scan GPT-4o responses"
|
||||
enabled: true
|
||||
cel_expression: "model == 'gpt-4o'"
|
||||
apply_to: "output"
|
||||
sampling_rate: 100
|
||||
timeout: 15
|
||||
provider_config_ids: [3]
|
||||
|
||||
- id: 103
|
||||
name: "grayswan-openai-input"
|
||||
enabled: true
|
||||
cel_expression: "provider == 'openai'"
|
||||
apply_to: "input"
|
||||
sampling_rate: 50
|
||||
timeout: 20
|
||||
provider_config_ids: [4]
|
||||
|
||||
- id: 104
|
||||
name: "strict-team-check"
|
||||
enabled: true
|
||||
cel_expression: "team == 'team-platform'"
|
||||
apply_to: "both"
|
||||
sampling_rate: 100
|
||||
timeout: 30
|
||||
provider_config_ids: [1, 3] # multiple providers run in parallel
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Full example
|
||||
|
||||
```yaml
|
||||
# guardrails-values.yaml
|
||||
image:
|
||||
tag: "latest"
|
||||
|
||||
bifrost:
|
||||
encryptionKeySecret:
|
||||
name: "bifrost-encryption"
|
||||
key: "encryption-key"
|
||||
|
||||
guardrails:
|
||||
providers:
|
||||
- id: 1
|
||||
provider_name: "regex"
|
||||
policy_name: "block-secrets"
|
||||
enabled: true
|
||||
timeout: 5
|
||||
config:
|
||||
patterns:
|
||||
- pattern: "sk-[A-Za-z0-9]{20,}"
|
||||
description: "OpenAI API key"
|
||||
- pattern: "AKIA[0-9A-Z]{16}"
|
||||
description: "AWS access key"
|
||||
- pattern: "gh[ps]_[A-Za-z0-9]{36}"
|
||||
description: "GitHub token"
|
||||
|
||||
- id: 2
|
||||
provider_name: "azure"
|
||||
policy_name: "content-safety"
|
||||
enabled: true
|
||||
timeout: 10
|
||||
config:
|
||||
endpoint: "https://your-resource.cognitiveservices.azure.com"
|
||||
api_key: "env.AZURE_CONTENT_SAFETY_KEY"
|
||||
analyze_enabled: true
|
||||
analyze_severity_threshold: "medium"
|
||||
jailbreak_shield_enabled: true
|
||||
indirect_attack_shield_enabled: false
|
||||
copyright_enabled: false
|
||||
text_blocklist_enabled: false
|
||||
|
||||
rules:
|
||||
- id: 101
|
||||
name: "block-secrets-input"
|
||||
description: "Block prompts leaking credentials"
|
||||
enabled: true
|
||||
cel_expression: "true"
|
||||
apply_to: "input"
|
||||
sampling_rate: 100
|
||||
timeout: 10
|
||||
provider_config_ids: [1]
|
||||
|
||||
- id: 102
|
||||
name: "content-safety-both"
|
||||
description: "Azure content safety on input and output"
|
||||
enabled: true
|
||||
cel_expression: "true"
|
||||
apply_to: "both"
|
||||
sampling_rate: 100
|
||||
timeout: 15
|
||||
provider_config_ids: [2]
|
||||
```
|
||||
|
||||
```bash
|
||||
kubectl create secret generic azure-content-safety \
|
||||
--from-literal=key='your-azure-content-safety-api-key'
|
||||
|
||||
helm install bifrost bifrost/bifrost \
|
||||
-f guardrails-values.yaml \
|
||||
--set env[0].name=AZURE_CONTENT_SAFETY_KEY \
|
||||
--set env[0].valueFrom.secretKeyRef.name=azure-content-safety \
|
||||
--set env[0].valueFrom.secretKeyRef.key=key
|
||||
```
|
||||
549
docs/deployment-guides/helm/plugins.mdx
Normal file
549
docs/deployment-guides/helm/plugins.mdx
Normal file
@@ -0,0 +1,549 @@
|
||||
---
|
||||
title: "Plugins"
|
||||
description: "Configure Bifrost plugins in Helm — telemetry, logging, semantic cache, OpenTelemetry, Datadog, governance, and custom plugins"
|
||||
icon: "puzzle-piece"
|
||||
---
|
||||
|
||||
Plugins are configured under `bifrost.plugins`. Each plugin is independently enabled/disabled. Pre-hooks run in registration order; post-hooks run in reverse order.
|
||||
|
||||
<Note>
|
||||
**Telemetry, logging, and governance are auto-loaded built-ins** — they are always active and do not need to be explicitly enabled. Their configuration lives in `bifrost.client.*` and `bifrost.governance.*`, not in the `plugins` block.
|
||||
|
||||
The `plugins` block controls the opt-in plugins: `semanticCache`, `otel`, `datadog`, `maxim`, and custom plugins.
|
||||
</Note>
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
plugins:
|
||||
semanticCache:
|
||||
enabled: false
|
||||
otel:
|
||||
enabled: false
|
||||
datadog:
|
||||
enabled: false
|
||||
```
|
||||
|
||||
```bash
|
||||
# Enable an opt-in plugin at install time
|
||||
helm install bifrost bifrost/bifrost \
|
||||
--set image.tag=v1.4.11 \
|
||||
--set bifrost.plugins.otel.enabled=true
|
||||
|
||||
# Or upgrade to enable a plugin without touching other values
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
--set bifrost.plugins.semanticCache.enabled=true
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
<Tabs>
|
||||
|
||||
<Tab title="Telemetry">
|
||||
|
||||
### Telemetry (Prometheus)
|
||||
|
||||
<Note>
|
||||
Telemetry is **always active** — it cannot be disabled. You do not need to set `bifrost.plugins.telemetry.enabled`.
|
||||
</Note>
|
||||
|
||||
Exposes Prometheus metrics at `GET /metrics`. Custom labels are set via `bifrost.client.prometheusLabels`:
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
client:
|
||||
prometheusLabels:
|
||||
- "environment=production"
|
||||
- "region=us-east-1"
|
||||
```
|
||||
|
||||
```bash
|
||||
# Verify metrics are exposed
|
||||
kubectl port-forward svc/bifrost 8080:8080 &
|
||||
curl http://localhost:8080/metrics | head -30
|
||||
```
|
||||
|
||||
**With Prometheus Push Gateway** (recommended for multi-replica / HA setups where pull-based scraping can miss pods):
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
plugins:
|
||||
telemetry:
|
||||
enabled: true
|
||||
config:
|
||||
push_gateway:
|
||||
enabled: true
|
||||
push_gateway_url: "http://prometheus-pushgateway.monitoring.svc.cluster.local:9091"
|
||||
job_name: "bifrost"
|
||||
instance_id: "" # auto-derived from pod name if empty
|
||||
push_interval: 15
|
||||
basic_auth:
|
||||
username: ""
|
||||
password: ""
|
||||
```
|
||||
|
||||
**ServiceMonitor for Prometheus Operator:**
|
||||
|
||||
```yaml
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
namespace: monitoring # namespace where Prometheus is deployed
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Logging">
|
||||
|
||||
### Request/Response Logging
|
||||
|
||||
<Note>
|
||||
Logging is **auto-loaded** when `bifrost.client.enableLogging: true` and a log store is configured. You do not need to set `bifrost.plugins.logging.enabled`.
|
||||
</Note>
|
||||
|
||||
Configure logging via the `client` block:
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.client.enableLogging` | Enable request/response logging | `true` |
|
||||
| `bifrost.client.disableContentLogging` | Strip message body from logs (HIPAA/PCI) | `false` |
|
||||
| `bifrost.client.loggingHeaders` | HTTP headers to capture in log metadata | `[]` |
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
client:
|
||||
enableLogging: true
|
||||
disableContentLogging: false # set true for HIPAA/compliance
|
||||
loggingHeaders:
|
||||
- "x-request-id"
|
||||
- "x-user-id"
|
||||
- "x-team-id"
|
||||
```
|
||||
|
||||
```bash
|
||||
# Verify logs are being written
|
||||
kubectl port-forward svc/bifrost 8080:8080 &
|
||||
curl -s "http://localhost:8080/api/logs?limit=5" | jq .
|
||||
```
|
||||
|
||||
See [Client Configuration](/deployment-guides/helm/client) for the full reference.
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Governance">
|
||||
|
||||
### Governance
|
||||
|
||||
<Note>
|
||||
Governance is **always active** for OSS deployments. You do not need to set `bifrost.plugins.governance.enabled`.
|
||||
</Note>
|
||||
|
||||
Virtual key enforcement is controlled by the `client` block:
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.client.enforceAuthOnInference` | Require a virtual key (`x-bf-vk`) on every inference request | `false` |
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
client:
|
||||
enforceAuthOnInference: true # require virtual key on all inference requests
|
||||
```
|
||||
|
||||
Define virtual keys, budgets, rate limits, and routing rules in `bifrost.governance.*`. See the [Governance](/deployment-guides/helm/governance) page.
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Semantic Cache">
|
||||
|
||||
### Semantic Cache
|
||||
|
||||
Caches LLM responses using vector similarity so semantically equivalent prompts return cached answers.
|
||||
|
||||
Two modes:
|
||||
- **Semantic mode** (`dimension > 1`): uses an embedding model + vector store for similarity search
|
||||
- **Direct / hash mode** (`dimension: 1`): exact-match hash-based caching, no embedding model needed
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.plugins.semanticCache.enabled` | Enable semantic caching | `false` |
|
||||
| `bifrost.plugins.semanticCache.version` | Plugin config version for DB-backed update tracking (`1` to `32767`) | `1` |
|
||||
| `bifrost.plugins.semanticCache.config.provider` | Embedding provider | `"openai"` |
|
||||
| `bifrost.plugins.semanticCache.config.embedding_model` | Embedding model name | `"text-embedding-3-small"` |
|
||||
| `bifrost.plugins.semanticCache.config.dimension` | Embedding dimension (`1` = direct/hash mode) | `1536` |
|
||||
| `bifrost.plugins.semanticCache.config.threshold` | Cosine similarity threshold (0–1) | `0.8` |
|
||||
| `bifrost.plugins.semanticCache.config.ttl` | Cache entry TTL (Go duration) | `"5m"` |
|
||||
| `bifrost.plugins.semanticCache.config.conversation_history_threshold` | Number of past messages to include in cache key | `3` |
|
||||
| `bifrost.plugins.semanticCache.config.cache_by_model` | Include model name in cache key | `true` |
|
||||
| `bifrost.plugins.semanticCache.config.cache_by_provider` | Include provider name in cache key | `true` |
|
||||
| `bifrost.plugins.semanticCache.config.exclude_system_prompt` | Exclude system prompt from cache key | `false` |
|
||||
| `bifrost.plugins.semanticCache.config.cleanup_on_shutdown` | Delete cache data on pod shutdown | `false` |
|
||||
|
||||
**Semantic mode (with OpenAI embeddings + Weaviate):**
|
||||
|
||||
```bash
|
||||
kubectl create secret generic semantic-cache-secret \
|
||||
--from-literal=openai-key='sk-your-openai-embedding-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# semantic-cache-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
vectorStore:
|
||||
enabled: true
|
||||
type: weaviate
|
||||
weaviate:
|
||||
enabled: true
|
||||
persistence:
|
||||
size: 20Gi
|
||||
|
||||
bifrost:
|
||||
plugins:
|
||||
semanticCache:
|
||||
enabled: true
|
||||
config:
|
||||
provider: "openai"
|
||||
keys:
|
||||
- value: "env.SEMANTIC_CACHE_OPENAI_KEY"
|
||||
weight: 1
|
||||
embedding_model: "text-embedding-3-small"
|
||||
dimension: 1536
|
||||
threshold: 0.85
|
||||
ttl: "1h"
|
||||
conversation_history_threshold: 5
|
||||
cache_by_model: true
|
||||
cache_by_provider: true
|
||||
|
||||
providerSecrets:
|
||||
semantic-cache-key:
|
||||
existingSecret: "semantic-cache-secret"
|
||||
key: "openai-key"
|
||||
envVar: "SEMANTIC_CACHE_OPENAI_KEY"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f semantic-cache-values.yaml
|
||||
```
|
||||
|
||||
**Direct / hash mode** (no embedding provider needed):
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
plugins:
|
||||
semanticCache:
|
||||
enabled: true
|
||||
config:
|
||||
dimension: 1 # triggers hash-based exact matching
|
||||
ttl: "30m"
|
||||
cache_by_model: true
|
||||
cache_by_provider: true
|
||||
```
|
||||
|
||||
<Note>
|
||||
The vector store (`vectorStore.*`) must be configured and enabled for semantic mode. Direct/hash mode works without a vector store but still requires a storage backend.
|
||||
</Note>
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="OpenTelemetry">
|
||||
|
||||
### OpenTelemetry (OTel)
|
||||
|
||||
Sends distributed traces and push-based metrics to any OTLP-compatible collector (Jaeger, Tempo, Honeycomb, etc.).
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.plugins.otel.enabled` | Enable OTel tracing | `false` |
|
||||
| `bifrost.plugins.otel.version` | Plugin config version for DB-backed update tracking (`1` to `32767`) | `1` |
|
||||
| `bifrost.plugins.otel.config.service_name` | Service name in traces | `"bifrost"` |
|
||||
| `bifrost.plugins.otel.config.collector_url` | OTLP collector endpoint | `""` |
|
||||
| `bifrost.plugins.otel.config.trace_type` | Trace type (`genai_extension`, `vercel`, or `open_inference`) | `"genai_extension"` |
|
||||
| `bifrost.plugins.otel.config.protocol` | Transport protocol (`grpc` or `http`) | `"grpc"` |
|
||||
| `bifrost.plugins.otel.config.metrics_enabled` | Enable OTLP push-based metrics | `false` |
|
||||
| `bifrost.plugins.otel.config.metrics_endpoint` | OTLP metrics endpoint | `""` |
|
||||
| `bifrost.plugins.otel.config.metrics_push_interval` | Push interval in seconds | `15` |
|
||||
| `bifrost.plugins.otel.config.headers` | Custom headers for the collector | `{}` |
|
||||
| `bifrost.plugins.otel.config.insecure` | Skip TLS verification | `false` |
|
||||
| `bifrost.plugins.otel.config.tls_ca_cert` | Path to CA cert for TLS | `""` |
|
||||
|
||||
```yaml
|
||||
# otel-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
plugins:
|
||||
otel:
|
||||
enabled: true
|
||||
config:
|
||||
service_name: "bifrost-production"
|
||||
collector_url: "otel-collector.observability.svc.cluster.local:4317"
|
||||
trace_type: "genai_extension"
|
||||
protocol: "grpc"
|
||||
insecure: true # set false in production with a proper cert
|
||||
metrics_enabled: true
|
||||
metrics_endpoint: "otel-collector.observability.svc.cluster.local:4317"
|
||||
metrics_push_interval: 15
|
||||
headers:
|
||||
x-honeycomb-team: "env.HONEYCOMB_API_KEY"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost --reuse-values -f otel-values.yaml
|
||||
```
|
||||
|
||||
**With authentication headers from a Kubernetes Secret:**
|
||||
|
||||
```bash
|
||||
kubectl create secret generic otel-credentials \
|
||||
--from-literal=api-key='your-honeycomb-or-grafana-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
plugins:
|
||||
otel:
|
||||
enabled: true
|
||||
config:
|
||||
collector_url: "api.honeycomb.io:443"
|
||||
protocol: "grpc"
|
||||
headers:
|
||||
x-honeycomb-team: "env.OTEL_API_KEY"
|
||||
|
||||
providerSecrets:
|
||||
otel-key:
|
||||
existingSecret: "otel-credentials"
|
||||
key: "api-key"
|
||||
envVar: "OTEL_API_KEY"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Datadog">
|
||||
|
||||
### Datadog APM
|
||||
|
||||
Sends traces to a Datadog Agent running in the cluster.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.plugins.datadog.enabled` | Enable Datadog tracing | `false` |
|
||||
| `bifrost.plugins.datadog.version` | Plugin config version for DB-backed update tracking (`1` to `32767`) | `1` |
|
||||
| `bifrost.plugins.datadog.config.service_name` | Service name | `"bifrost"` |
|
||||
| `bifrost.plugins.datadog.config.agent_addr` | Datadog Agent address | `"localhost:8126"` |
|
||||
| `bifrost.plugins.datadog.config.env` | Deployment environment tag | `""` |
|
||||
| `bifrost.plugins.datadog.config.version` | Version tag | `""` |
|
||||
| `bifrost.plugins.datadog.config.enable_traces` | Enable trace collection | `true` |
|
||||
| `bifrost.plugins.datadog.config.custom_tags` | Extra tags on all spans | `{}` |
|
||||
|
||||
The Datadog Agent is typically deployed via the [Datadog Helm chart](https://docs.datadoghq.com/containers/kubernetes/installation/) as a DaemonSet, making it available at the node's hostIP.
|
||||
|
||||
```yaml
|
||||
# datadog-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
plugins:
|
||||
datadog:
|
||||
enabled: true
|
||||
config:
|
||||
service_name: "bifrost"
|
||||
agent_addr: "$(HOST_IP):8126" # uses Datadog DaemonSet pattern
|
||||
env: "production"
|
||||
version: "v1.4.11"
|
||||
enable_traces: true
|
||||
custom_tags:
|
||||
team: "platform"
|
||||
region: "us-east-1"
|
||||
|
||||
# Inject HOST_IP so Bifrost can reach the DaemonSet agent on the same node
|
||||
env:
|
||||
- name: HOST_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.hostIP
|
||||
```
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost --reuse-values -f datadog-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Maxim">
|
||||
|
||||
### Maxim Observability
|
||||
|
||||
Sends LLM request/response data to [Maxim](https://getmaxim.ai) for tracing, evaluation, and observability.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.plugins.maxim.enabled` | Enable Maxim plugin | `false` |
|
||||
| `bifrost.plugins.maxim.version` | Plugin config version for DB-backed update tracking (`1` to `32767`) | `1` |
|
||||
| `bifrost.plugins.maxim.config.api_key` | Maxim API key (plain text, prefer secret) | `""` |
|
||||
| `bifrost.plugins.maxim.config.log_repo_id` | Maxim log repository ID | `""` |
|
||||
| `bifrost.plugins.maxim.secretRef.name` | Kubernetes Secret name for API key | `""` |
|
||||
| `bifrost.plugins.maxim.secretRef.key` | Key within the secret | `"api-key"` |
|
||||
|
||||
```bash
|
||||
kubectl create secret generic maxim-credentials \
|
||||
--from-literal=api-key='your-maxim-api-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# maxim-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
plugins:
|
||||
maxim:
|
||||
enabled: true
|
||||
config:
|
||||
log_repo_id: "your-log-repo-id"
|
||||
secretRef:
|
||||
name: "maxim-credentials"
|
||||
key: "api-key"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost --reuse-values -f maxim-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Custom Plugin">
|
||||
|
||||
### Custom / Dynamic Plugins
|
||||
|
||||
Load a custom Go plugin (compiled `.so` file) at runtime.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.plugins.custom[].name` | Unique plugin name | `""` |
|
||||
| `bifrost.plugins.custom[].enabled` | Enable custom plugin | `false` |
|
||||
| `bifrost.plugins.custom[].path` | Path to compiled `.so` file in the container | `""` |
|
||||
| `bifrost.plugins.custom[].version` | Plugin config version (`1` to `32767`) | `1` |
|
||||
| `bifrost.plugins.custom[].config` | Arbitrary plugin-specific configuration | `{}` |
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
plugins:
|
||||
custom:
|
||||
- name: "my-custom-plugin"
|
||||
enabled: true
|
||||
path: "/plugins/my-plugin.so"
|
||||
version: 1
|
||||
config:
|
||||
api_endpoint: "https://my-service.example.com"
|
||||
timeout: 5000
|
||||
```
|
||||
|
||||
Mount the `.so` file via a volume:
|
||||
|
||||
```yaml
|
||||
volumes:
|
||||
- name: custom-plugins
|
||||
configMap:
|
||||
name: bifrost-custom-plugins
|
||||
|
||||
volumeMounts:
|
||||
- name: custom-plugins
|
||||
mountPath: /plugins
|
||||
```
|
||||
|
||||
Or use an init container to download the plugin binary:
|
||||
|
||||
```yaml
|
||||
initContainers:
|
||||
- name: download-plugin
|
||||
image: curlimages/curl:8.6.0
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
curl -fsSL https://plugins.example.com/my-plugin.so \
|
||||
-o /plugins/my-plugin.so
|
||||
volumeMounts:
|
||||
- name: plugin-dir
|
||||
mountPath: /plugins
|
||||
|
||||
volumes:
|
||||
- name: plugin-dir
|
||||
emptyDir: {}
|
||||
|
||||
volumeMounts:
|
||||
- name: plugin-dir
|
||||
mountPath: /plugins
|
||||
```
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost --reuse-values -f custom-plugin-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
</Tabs>
|
||||
|
||||
---
|
||||
|
||||
## All Plugins Together
|
||||
|
||||
```yaml
|
||||
# all-plugins-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
encryptionKeySecret:
|
||||
name: "bifrost-encryption"
|
||||
key: "encryption-key"
|
||||
|
||||
plugins:
|
||||
telemetry:
|
||||
enabled: true
|
||||
config:
|
||||
custom_labels:
|
||||
- name: "environment"
|
||||
value: "production"
|
||||
|
||||
logging:
|
||||
enabled: true
|
||||
config:
|
||||
disable_content_logging: false
|
||||
logging_headers:
|
||||
- "x-request-id"
|
||||
|
||||
governance:
|
||||
enabled: true
|
||||
config:
|
||||
is_vk_mandatory: true
|
||||
|
||||
semanticCache:
|
||||
enabled: true
|
||||
config:
|
||||
provider: "openai"
|
||||
keys:
|
||||
- value: "env.CACHE_OPENAI_KEY"
|
||||
weight: 1
|
||||
embedding_model: "text-embedding-3-small"
|
||||
dimension: 1536
|
||||
threshold: 0.85
|
||||
ttl: "1h"
|
||||
|
||||
otel:
|
||||
enabled: true
|
||||
config:
|
||||
service_name: "bifrost"
|
||||
collector_url: "otel-collector.observability.svc.cluster.local:4317"
|
||||
protocol: "grpc"
|
||||
insecure: true
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f all-plugins-values.yaml
|
||||
```
|
||||
941
docs/deployment-guides/helm/providers.mdx
Normal file
941
docs/deployment-guides/helm/providers.mdx
Normal file
@@ -0,0 +1,941 @@
|
||||
---
|
||||
title: "Provider Setup"
|
||||
description: "Configure LLM providers in the Bifrost Helm chart — API keys, cloud-native auth, and self-hosted endpoints"
|
||||
icon: "plug"
|
||||
---
|
||||
|
||||
All providers are configured under `bifrost.providers` in your values file. Each provider entry contains a `keys` list where each key has a `name`, `value`, `weight`, and optional provider-specific config.
|
||||
|
||||
**Two ways to supply credentials:**
|
||||
|
||||
- **Direct value** — `value: "sk-..."` (fine for dev; avoid in production)
|
||||
- **Kubernetes Secret + env var** — store the key in a Secret, inject as an env var, and reference it with `value: "env.VAR_NAME"`
|
||||
|
||||
The `providerSecrets` block handles the Secret → env var injection automatically:
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
providers:
|
||||
openai:
|
||||
keys:
|
||||
- name: "primary"
|
||||
value: "env.OPENAI_API_KEY" # resolved at runtime
|
||||
weight: 1
|
||||
|
||||
providerSecrets:
|
||||
openai:
|
||||
existingSecret: "my-openai-secret"
|
||||
key: "api-key"
|
||||
envVar: "OPENAI_API_KEY" # injected into the pod
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
<Tabs>
|
||||
|
||||
<Tab title="OpenAI">
|
||||
|
||||
### OpenAI
|
||||
|
||||
Supports multiple keys with weighted load balancing. The key with `use_for_batch_api: true` is eligible for the Batch API.
|
||||
|
||||
**Step 1 — Create secret**
|
||||
|
||||
```bash
|
||||
kubectl create secret generic openai-credentials \
|
||||
--from-literal=api-key-1='sk-your-primary-key' \
|
||||
--from-literal=api-key-2='sk-your-secondary-key' \
|
||||
--from-literal=api-key-batch='sk-your-batch-key'
|
||||
```
|
||||
|
||||
**Step 2 — Values file**
|
||||
|
||||
```yaml
|
||||
# openai-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
openai:
|
||||
keys:
|
||||
- name: "openai-primary"
|
||||
value: "env.OPENAI_KEY_1"
|
||||
weight: 2 # 50% of traffic
|
||||
models: ["*"]
|
||||
- name: "openai-secondary"
|
||||
value: "env.OPENAI_KEY_2"
|
||||
weight: 1 # 25%
|
||||
models: ["gpt-4o-mini"] # restrict to cheaper model
|
||||
- name: "openai-batch"
|
||||
value: "env.OPENAI_KEY_BATCH"
|
||||
weight: 1 # 25%
|
||||
models: ["*"]
|
||||
use_for_batch_api: true
|
||||
|
||||
providerSecrets:
|
||||
openai-key-1:
|
||||
existingSecret: "openai-credentials"
|
||||
key: "api-key-1"
|
||||
envVar: "OPENAI_KEY_1"
|
||||
openai-key-2:
|
||||
existingSecret: "openai-credentials"
|
||||
key: "api-key-2"
|
||||
envVar: "OPENAI_KEY_2"
|
||||
openai-key-batch:
|
||||
existingSecret: "openai-credentials"
|
||||
key: "api-key-batch"
|
||||
envVar: "OPENAI_KEY_BATCH"
|
||||
```
|
||||
|
||||
**Step 3 — Install**
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f openai-values.yaml
|
||||
```
|
||||
|
||||
**Optional — per-provider network config**
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
providers:
|
||||
openai:
|
||||
keys:
|
||||
- name: "primary"
|
||||
value: "env.OPENAI_KEY_1"
|
||||
weight: 1
|
||||
network_config:
|
||||
default_request_timeout_in_seconds: 120
|
||||
max_retries: 3
|
||||
retry_backoff_initial_ms: 500
|
||||
retry_backoff_max_ms: 5000
|
||||
max_conns_per_host: 5000
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Anthropic">
|
||||
|
||||
### Anthropic
|
||||
|
||||
```bash
|
||||
kubectl create secret generic anthropic-credentials \
|
||||
--from-literal=api-key-1='sk-ant-your-primary-key' \
|
||||
--from-literal=api-key-2='sk-ant-your-secondary-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# anthropic-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
anthropic:
|
||||
keys:
|
||||
- name: "anthropic-primary"
|
||||
value: "env.ANTHROPIC_KEY_1"
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
- name: "anthropic-secondary"
|
||||
value: "env.ANTHROPIC_KEY_2"
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
|
||||
providerSecrets:
|
||||
anthropic-key-1:
|
||||
existingSecret: "anthropic-credentials"
|
||||
key: "api-key-1"
|
||||
envVar: "ANTHROPIC_KEY_1"
|
||||
anthropic-key-2:
|
||||
existingSecret: "anthropic-credentials"
|
||||
key: "api-key-2"
|
||||
envVar: "ANTHROPIC_KEY_2"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f anthropic-values.yaml
|
||||
```
|
||||
|
||||
**Override Anthropic beta headers** (optional):
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
providers:
|
||||
anthropic:
|
||||
keys:
|
||||
- name: "primary"
|
||||
value: "env.ANTHROPIC_KEY_1"
|
||||
weight: 1
|
||||
network_config:
|
||||
beta_header_overrides:
|
||||
redact-thinking-: true
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Azure OpenAI">
|
||||
|
||||
### Azure OpenAI
|
||||
|
||||
Azure requires `azure_key_config` on every key with `endpoint` and `api_version`. Use top-level `aliases` to map logical model names to Azure deployment names.
|
||||
|
||||
Two auth modes are supported:
|
||||
|
||||
<Tabs>
|
||||
<Tab title="API Key">
|
||||
|
||||
**Step 1 — Create secret**
|
||||
|
||||
```bash
|
||||
kubectl create secret generic azure-credentials \
|
||||
--from-literal=api-key='your-azure-openai-api-key' \
|
||||
--from-literal=endpoint='https://your-resource.openai.azure.com'
|
||||
```
|
||||
|
||||
**Step 2 — Values file**
|
||||
|
||||
```yaml
|
||||
# azure-apikey-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
azure:
|
||||
keys:
|
||||
- name: "azure-primary"
|
||||
value: "env.AZURE_API_KEY"
|
||||
weight: 1
|
||||
models: ["gpt-4o", "gpt-4o-mini", "text-embedding-3-small"]
|
||||
azure_key_config:
|
||||
endpoint: "env.AZURE_ENDPOINT"
|
||||
api_version: "2024-10-21"
|
||||
aliases:
|
||||
gpt-4o: "gpt-4o-prod"
|
||||
gpt-4o-mini: "gpt-4o-mini-prod"
|
||||
text-embedding-3-small: "embeddings-prod"
|
||||
|
||||
providerSecrets:
|
||||
azure-api-key:
|
||||
existingSecret: "azure-credentials"
|
||||
key: "api-key"
|
||||
envVar: "AZURE_API_KEY"
|
||||
azure-endpoint:
|
||||
existingSecret: "azure-credentials"
|
||||
key: "endpoint"
|
||||
envVar: "AZURE_ENDPOINT"
|
||||
```
|
||||
|
||||
**Step 3 — Install**
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f azure-apikey-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Managed Identity / Workload Identity">
|
||||
|
||||
When `value` is empty, Bifrost uses `DefaultAzureCredential` — which automatically resolves credentials from:
|
||||
- AKS Workload Identity (recommended for production)
|
||||
- Azure VM managed identity
|
||||
- `az login` (developer machines)
|
||||
|
||||
**Step 1 — Annotate the service account** (AKS Workload Identity)
|
||||
|
||||
```bash
|
||||
# Associate the Kubernetes service account with your Azure managed identity
|
||||
kubectl annotate serviceaccount bifrost \
|
||||
azure.workload.identity/client-id="<MANAGED_IDENTITY_CLIENT_ID>"
|
||||
```
|
||||
|
||||
```yaml
|
||||
serviceAccount:
|
||||
annotations:
|
||||
azure.workload.identity/client-id: "<MANAGED_IDENTITY_CLIENT_ID>"
|
||||
```
|
||||
|
||||
**Step 2 — Values file**
|
||||
|
||||
```bash
|
||||
kubectl create secret generic azure-config \
|
||||
--from-literal=endpoint='https://your-resource.openai.azure.com'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# azure-msi-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
serviceAccount:
|
||||
annotations:
|
||||
azure.workload.identity/client-id: "<MANAGED_IDENTITY_CLIENT_ID>"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
azure:
|
||||
keys:
|
||||
- name: "azure-workload-identity"
|
||||
value: "" # empty = DefaultAzureCredential
|
||||
weight: 1
|
||||
models: ["gpt-4o"]
|
||||
azure_key_config:
|
||||
endpoint: "env.AZURE_ENDPOINT"
|
||||
api_version: "2024-10-21"
|
||||
aliases:
|
||||
gpt-4o: "gpt-4o-prod"
|
||||
|
||||
providerSecrets:
|
||||
azure-endpoint:
|
||||
existingSecret: "azure-config"
|
||||
key: "endpoint"
|
||||
envVar: "AZURE_ENDPOINT"
|
||||
```
|
||||
|
||||
**Step 3 — Install**
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f azure-msi-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
**Multi-region failover** (two deployments, different regions):
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
providers:
|
||||
azure:
|
||||
keys:
|
||||
- name: "eastus"
|
||||
value: "env.AZURE_KEY_EAST"
|
||||
weight: 1
|
||||
azure_key_config:
|
||||
endpoint: "env.AZURE_ENDPOINT_EAST"
|
||||
api_version: "2024-10-21"
|
||||
aliases:
|
||||
gpt-4o: "gpt-4o-eastus"
|
||||
- name: "westus"
|
||||
value: "env.AZURE_KEY_WEST"
|
||||
weight: 1
|
||||
azure_key_config:
|
||||
endpoint: "env.AZURE_ENDPOINT_WEST"
|
||||
api_version: "2024-10-21"
|
||||
aliases:
|
||||
gpt-4o: "gpt-4o-westus"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="AWS Bedrock">
|
||||
|
||||
### AWS Bedrock
|
||||
|
||||
Bedrock requires `bedrock_key_config` with at minimum a `region`. Three auth modes:
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Static Credentials">
|
||||
|
||||
```bash
|
||||
kubectl create secret generic aws-credentials \
|
||||
--from-literal=access-key-id='AKIAIOSFODNN7EXAMPLE' \
|
||||
--from-literal=secret-access-key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# bedrock-static-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
bedrock:
|
||||
keys:
|
||||
- name: "bedrock-static"
|
||||
value: ""
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
bedrock_key_config:
|
||||
region: "us-east-1"
|
||||
access_key: "env.AWS_ACCESS_KEY_ID"
|
||||
secret_key: "env.AWS_SECRET_ACCESS_KEY"
|
||||
deployments:
|
||||
# Logical name -> Bedrock inference profile
|
||||
anthropic.claude-3-5-sonnet: "us.anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
|
||||
providerSecrets:
|
||||
aws-access-key:
|
||||
existingSecret: "aws-credentials"
|
||||
key: "access-key-id"
|
||||
envVar: "AWS_ACCESS_KEY_ID"
|
||||
aws-secret-key:
|
||||
existingSecret: "aws-credentials"
|
||||
key: "secret-access-key"
|
||||
envVar: "AWS_SECRET_ACCESS_KEY"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f bedrock-static-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="IRSA / EKS Pod Identity">
|
||||
|
||||
When only `region` is set, Bifrost inherits credentials from the AWS SDK default chain — IRSA (IAM Roles for Service Accounts), EC2 instance profile, or `AWS_*` env vars.
|
||||
|
||||
**Step 1 — Annotate the service account with the IAM role**
|
||||
|
||||
```bash
|
||||
kubectl annotate serviceaccount bifrost \
|
||||
eks.amazonaws.com/role-arn="arn:aws:iam::123456789012:role/BifrostBedrockRole"
|
||||
```
|
||||
|
||||
```yaml
|
||||
serviceAccount:
|
||||
annotations:
|
||||
eks.amazonaws.com/role-arn: "arn:aws:iam::123456789012:role/BifrostBedrockRole"
|
||||
```
|
||||
|
||||
**Step 2 — Values file**
|
||||
|
||||
```yaml
|
||||
# bedrock-irsa-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
serviceAccount:
|
||||
annotations:
|
||||
eks.amazonaws.com/role-arn: "arn:aws:iam::123456789012:role/BifrostBedrockRole"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
bedrock:
|
||||
keys:
|
||||
- name: "bedrock-irsa"
|
||||
value: ""
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
bedrock_key_config:
|
||||
region: "us-east-1"
|
||||
# No access_key / secret_key — SDK uses IRSA token automatically
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f bedrock-irsa-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="STS AssumeRole">
|
||||
|
||||
Assumes a cross-account role on top of the default credential chain.
|
||||
|
||||
```yaml
|
||||
# bedrock-assumerole-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
bedrock:
|
||||
keys:
|
||||
- name: "bedrock-assumerole"
|
||||
value: ""
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
bedrock_key_config:
|
||||
region: "us-west-2"
|
||||
# Source identity from pod's default chain, then assume this role
|
||||
role_arn: "env.AWS_ROLE_ARN"
|
||||
external_id: "env.AWS_EXTERNAL_ID"
|
||||
session_name: "bifrost-session"
|
||||
```
|
||||
|
||||
```bash
|
||||
kubectl create secret generic aws-role-config \
|
||||
--from-literal=role-arn='arn:aws:iam::999999999999:role/CrossAccountBedrockRole' \
|
||||
--from-literal=external-id='your-external-id'
|
||||
```
|
||||
|
||||
```yaml
|
||||
providerSecrets:
|
||||
aws-role-arn:
|
||||
existingSecret: "aws-role-config"
|
||||
key: "role-arn"
|
||||
envVar: "AWS_ROLE_ARN"
|
||||
aws-external-id:
|
||||
existingSecret: "aws-role-config"
|
||||
key: "external-id"
|
||||
envVar: "AWS_EXTERNAL_ID"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f bedrock-assumerole-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
**Batch API — S3 configuration**
|
||||
|
||||
```yaml
|
||||
bedrock_key_config:
|
||||
region: "us-east-1"
|
||||
access_key: "env.AWS_ACCESS_KEY_ID"
|
||||
secret_key: "env.AWS_SECRET_ACCESS_KEY"
|
||||
batch_s3_config:
|
||||
buckets:
|
||||
- bucket_name: "my-bedrock-batch-bucket"
|
||||
prefix: "batch/"
|
||||
is_default: true
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Google Vertex AI">
|
||||
|
||||
### Google Vertex AI
|
||||
|
||||
Vertex requires `vertex_key_config` with `project_id` and `region`. Two auth modes:
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Service Account Key">
|
||||
|
||||
```bash
|
||||
# Base64-encode the service account JSON
|
||||
SA_JSON=$(cat service-account-key.json | base64 -w 0)
|
||||
|
||||
kubectl create secret generic gcp-credentials \
|
||||
--from-literal=service-account-json="${SA_JSON}"
|
||||
```
|
||||
|
||||
```yaml
|
||||
# vertex-sa-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
vertex:
|
||||
keys:
|
||||
- name: "vertex-sa-key"
|
||||
value: ""
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
vertex_key_config:
|
||||
project_id: "env.VERTEX_PROJECT_ID"
|
||||
region: "us-central1"
|
||||
auth_credentials: "env.VERTEX_AUTH_CREDENTIALS"
|
||||
|
||||
providerSecrets:
|
||||
vertex-project-id:
|
||||
existingSecret: "gcp-credentials"
|
||||
key: "project-id"
|
||||
envVar: "VERTEX_PROJECT_ID"
|
||||
vertex-sa:
|
||||
existingSecret: "gcp-credentials"
|
||||
key: "service-account-json"
|
||||
envVar: "VERTEX_AUTH_CREDENTIALS"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f vertex-sa-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="GKE Workload Identity / ADC">
|
||||
|
||||
When `auth_credentials` is omitted, Bifrost calls `google.FindDefaultCredentials` — which resolves to:
|
||||
- GKE Workload Identity (recommended)
|
||||
- GCE metadata server (on Compute Engine / Cloud Run)
|
||||
- `GOOGLE_APPLICATION_CREDENTIALS` path
|
||||
- `gcloud auth application-default login` (developer machines)
|
||||
|
||||
**Step 1 — Annotate the service account** (GKE Workload Identity)
|
||||
|
||||
```bash
|
||||
gcloud iam service-accounts add-iam-policy-binding \
|
||||
bifrost-sa@my-project.iam.gserviceaccount.com \
|
||||
--role roles/iam.workloadIdentityUser \
|
||||
--member "serviceAccount:my-project.svc.id.goog[default/bifrost]"
|
||||
```
|
||||
|
||||
```yaml
|
||||
serviceAccount:
|
||||
annotations:
|
||||
iam.gke.io/gcp-service-account: "bifrost-sa@my-project.iam.gserviceaccount.com"
|
||||
```
|
||||
|
||||
**Step 2 — Values file**
|
||||
|
||||
```yaml
|
||||
# vertex-wli-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
serviceAccount:
|
||||
annotations:
|
||||
iam.gke.io/gcp-service-account: "bifrost-sa@my-project.iam.gserviceaccount.com"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
vertex:
|
||||
keys:
|
||||
- name: "vertex-workload-identity"
|
||||
value: ""
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
vertex_key_config:
|
||||
project_id: "my-gcp-project"
|
||||
region: "us-central1"
|
||||
# auth_credentials intentionally omitted → ADC lookup
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f vertex-wli-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Groq / Mistral / Gemini / Others">
|
||||
|
||||
### Standard API-Key Providers
|
||||
|
||||
These providers follow the same simple pattern — one or more keys with weights.
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Groq">
|
||||
|
||||
```bash
|
||||
kubectl create secret generic groq-credentials \
|
||||
--from-literal=api-key='gsk_your_groq_api_key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
providers:
|
||||
groq:
|
||||
keys:
|
||||
- name: "groq-primary"
|
||||
value: "env.GROQ_API_KEY"
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
|
||||
providerSecrets:
|
||||
groq-key:
|
||||
existingSecret: "groq-credentials"
|
||||
key: "api-key"
|
||||
envVar: "GROQ_API_KEY"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Gemini">
|
||||
|
||||
```bash
|
||||
kubectl create secret generic gemini-credentials \
|
||||
--from-literal=api-key='your-gemini-api-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
providers:
|
||||
gemini:
|
||||
keys:
|
||||
- name: "gemini-main"
|
||||
value: "env.GEMINI_API_KEY"
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
|
||||
providerSecrets:
|
||||
gemini-key:
|
||||
existingSecret: "gemini-credentials"
|
||||
key: "api-key"
|
||||
envVar: "GEMINI_API_KEY"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Mistral">
|
||||
|
||||
```bash
|
||||
kubectl create secret generic mistral-credentials \
|
||||
--from-literal=api-key='your-mistral-api-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
providers:
|
||||
mistral:
|
||||
keys:
|
||||
- name: "mistral-main"
|
||||
value: "env.MISTRAL_API_KEY"
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
|
||||
providerSecrets:
|
||||
mistral-key:
|
||||
existingSecret: "mistral-credentials"
|
||||
key: "api-key"
|
||||
envVar: "MISTRAL_API_KEY"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Cohere / Perplexity / xAI / Others">
|
||||
|
||||
All standard API-key providers follow the same pattern. Replace the provider name and env var name accordingly:
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
providers:
|
||||
cohere:
|
||||
keys:
|
||||
- name: "cohere-main"
|
||||
value: "env.COHERE_API_KEY"
|
||||
weight: 1
|
||||
perplexity:
|
||||
keys:
|
||||
- name: "perplexity-main"
|
||||
value: "env.PERPLEXITY_API_KEY"
|
||||
weight: 1
|
||||
xai:
|
||||
keys:
|
||||
- name: "xai-main"
|
||||
value: "env.XAI_API_KEY"
|
||||
weight: 1
|
||||
cerebras:
|
||||
keys:
|
||||
- name: "cerebras-main"
|
||||
value: "env.CEREBRAS_API_KEY"
|
||||
weight: 1
|
||||
openrouter:
|
||||
keys:
|
||||
- name: "openrouter-main"
|
||||
value: "env.OPENROUTER_API_KEY"
|
||||
weight: 1
|
||||
nebius:
|
||||
keys:
|
||||
- name: "nebius-main"
|
||||
value: "env.NEBIUS_API_KEY"
|
||||
weight: 1
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
**Install command (any of the above)**
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost \
|
||||
--set image.tag=v1.4.11 \
|
||||
-f provider-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Self-Hosted">
|
||||
|
||||
### Self-Hosted Providers
|
||||
|
||||
Self-hosted providers point to a URL you operate. No API key is typically required (`value: ""`).
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Ollama">
|
||||
|
||||
```yaml
|
||||
# ollama-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
ollama:
|
||||
keys:
|
||||
- name: "ollama-local"
|
||||
value: ""
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
ollama_key_config:
|
||||
url: "http://ollama.default.svc.cluster.local:11434"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f ollama-values.yaml
|
||||
```
|
||||
|
||||
Using an env var for the URL (useful across environments):
|
||||
|
||||
```bash
|
||||
kubectl create secret generic ollama-config \
|
||||
--from-literal=url='http://ollama.default.svc.cluster.local:11434'
|
||||
```
|
||||
|
||||
```yaml
|
||||
ollama_key_config:
|
||||
url: "env.OLLAMA_URL"
|
||||
|
||||
providerSecrets:
|
||||
ollama-url:
|
||||
existingSecret: "ollama-config"
|
||||
key: "url"
|
||||
envVar: "OLLAMA_URL"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="vLLM">
|
||||
|
||||
vLLM instances are model-specific — one key per served model.
|
||||
|
||||
```yaml
|
||||
# vllm-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
vllm:
|
||||
keys:
|
||||
- name: "vllm-llama3-70b"
|
||||
value: ""
|
||||
weight: 1
|
||||
models: ["llama-3-70b"]
|
||||
vllm_key_config:
|
||||
url: "http://vllm.default.svc.cluster.local:8000"
|
||||
model_name: "meta-llama/Meta-Llama-3-70B-Instruct"
|
||||
- name: "vllm-mistral"
|
||||
value: ""
|
||||
weight: 1
|
||||
models: ["mistral-7b"]
|
||||
vllm_key_config:
|
||||
url: "http://vllm-mistral.default.svc.cluster.local:8000"
|
||||
model_name: "mistralai/Mistral-7B-Instruct-v0.3"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f vllm-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="SGLang">
|
||||
|
||||
```yaml
|
||||
# sgl-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
sgl:
|
||||
keys:
|
||||
- name: "sgl-main"
|
||||
value: ""
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
sgl_key_config:
|
||||
url: "http://sgl-router.default.svc.cluster.local:30000"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f sgl-values.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="HuggingFace / Replicate">
|
||||
|
||||
These providers use `aliases` to map logical model names to provider-specific IDs.
|
||||
|
||||
```yaml
|
||||
bifrost:
|
||||
providers:
|
||||
huggingface:
|
||||
keys:
|
||||
- name: "hf-main"
|
||||
value: "env.HF_API_KEY"
|
||||
weight: 1
|
||||
models: ["llama-3", "mixtral"]
|
||||
aliases:
|
||||
llama-3: "meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
mixtral: "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
||||
|
||||
replicate:
|
||||
keys:
|
||||
- name: "replicate-main"
|
||||
value: "env.REPLICATE_API_KEY"
|
||||
weight: 1
|
||||
models: ["llama-3"]
|
||||
aliases:
|
||||
llama-3: "meta/meta-llama-3-70b-instruct"
|
||||
replicate_key_config:
|
||||
use_deployments_endpoint: false
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
</Tab>
|
||||
|
||||
</Tabs>
|
||||
|
||||
---
|
||||
|
||||
## Multi-Provider Example
|
||||
|
||||
Combine providers in a single values file:
|
||||
|
||||
```yaml
|
||||
# multi-provider-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
providers:
|
||||
openai:
|
||||
keys:
|
||||
- name: "openai-primary"
|
||||
value: "env.OPENAI_API_KEY"
|
||||
weight: 2
|
||||
models: ["*"]
|
||||
anthropic:
|
||||
keys:
|
||||
- name: "anthropic-primary"
|
||||
value: "env.ANTHROPIC_API_KEY"
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
groq:
|
||||
keys:
|
||||
- name: "groq-primary"
|
||||
value: "env.GROQ_API_KEY"
|
||||
weight: 1
|
||||
models: ["*"]
|
||||
|
||||
providerSecrets:
|
||||
openai-key:
|
||||
existingSecret: "provider-keys"
|
||||
key: "openai"
|
||||
envVar: "OPENAI_API_KEY"
|
||||
anthropic-key:
|
||||
existingSecret: "provider-keys"
|
||||
key: "anthropic"
|
||||
envVar: "ANTHROPIC_API_KEY"
|
||||
groq-key:
|
||||
existingSecret: "provider-keys"
|
||||
key: "groq"
|
||||
envVar: "GROQ_API_KEY"
|
||||
|
||||
plugins:
|
||||
logging:
|
||||
enabled: true
|
||||
governance:
|
||||
enabled: true
|
||||
```
|
||||
|
||||
```bash
|
||||
# Create a single secret with all provider keys
|
||||
kubectl create secret generic provider-keys \
|
||||
--from-literal=openai='sk-your-openai-key' \
|
||||
--from-literal=anthropic='sk-ant-your-anthropic-key' \
|
||||
--from-literal=groq='gsk_your-groq-key'
|
||||
|
||||
helm install bifrost bifrost/bifrost -f multi-provider-values.yaml
|
||||
```
|
||||
550
docs/deployment-guides/helm/storage.mdx
Normal file
550
docs/deployment-guides/helm/storage.mdx
Normal file
@@ -0,0 +1,550 @@
|
||||
---
|
||||
title: "Storage"
|
||||
description: "Configure Bifrost storage backends in Helm — SQLite, PostgreSQL (embedded and external), per-store overrides, and S3/GCS object storage for logs"
|
||||
icon: "database"
|
||||
---
|
||||
|
||||
Bifrost persists two types of data — **config** (providers, virtual keys, governance rules) and **logs** (request/response records). Each has its own store, both defaulting to the top-level `storage.mode`.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `storage.mode` | Default backend for both stores (`sqlite` or `postgres`) | `sqlite` |
|
||||
| `storage.configStore.type` | Override backend for the config store | `""` (inherits `storage.mode`) |
|
||||
| `storage.logsStore.type` | Override backend for the logs store | `""` (inherits `storage.mode`) |
|
||||
|
||||
<Note>
|
||||
When any store uses SQLite the chart deploys a **StatefulSet** with a PVC. With PostgreSQL only (no SQLite) it deploys a **Deployment**. Mixing backends (e.g. config=postgres, logs=sqlite) still requires a StatefulSet.
|
||||
</Note>
|
||||
|
||||
---
|
||||
|
||||
<Tabs>
|
||||
|
||||
<Tab title="SQLite">
|
||||
|
||||
### SQLite (Default)
|
||||
|
||||
Simplest setup — no external database required. Bifrost runs as a StatefulSet with a persistent volume for the SQLite files.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `storage.persistence.enabled` | Create a PVC for SQLite data | `true` |
|
||||
| `storage.persistence.size` | PVC size | `10Gi` |
|
||||
| `storage.persistence.accessMode` | PVC access mode | `ReadWriteOnce` |
|
||||
| `storage.persistence.storageClass` | Storage class (leave empty for cluster default) | `""` |
|
||||
| `storage.persistence.existingClaim` | Reuse an existing PVC | `""` |
|
||||
|
||||
```yaml
|
||||
# sqlite-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
storage:
|
||||
mode: sqlite
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 20Gi
|
||||
# storageClass: "gp3" # uncomment to pin storage class
|
||||
|
||||
bifrost:
|
||||
encryptionKey: "your-32-byte-encryption-key-here"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f sqlite-values.yaml
|
||||
```
|
||||
|
||||
**Reuse an existing PVC** (e.g. after a StatefulSet migration):
|
||||
|
||||
```yaml
|
||||
storage:
|
||||
persistence:
|
||||
existingClaim: "bifrost-data"
|
||||
```
|
||||
|
||||
<Warning>
|
||||
Upgrading from SQLite to PostgreSQL requires a data migration — the two stores are not compatible. Plan accordingly before switching `storage.mode` on a running deployment.
|
||||
</Warning>
|
||||
|
||||
#### StatefulSet Migration (chart v2.0.0+)
|
||||
|
||||
Prior to v2.0.0, SQLite used a Deployment + manual PVC. v2.0.0 moved SQLite to a StatefulSet. If upgrading from an older chart:
|
||||
|
||||
```bash
|
||||
# 1. Scale down the old deployment
|
||||
kubectl scale deployment bifrost --replicas=0
|
||||
|
||||
# 2. Note the existing PVC name
|
||||
kubectl get pvc
|
||||
|
||||
# 3. Upgrade the chart, pointing at the existing claim
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
--set storage.persistence.existingClaim=<your-old-pvc-name> \
|
||||
--set image.tag=v1.4.11
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Embedded PostgreSQL">
|
||||
|
||||
### Embedded PostgreSQL
|
||||
|
||||
The chart can deploy a PostgreSQL instance alongside Bifrost. Good for simple production setups where you don't have an existing database.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `storage.mode` | Set to `postgres` | `sqlite` |
|
||||
| `postgresql.enabled` | Deploy PostgreSQL as a sub-deployment | `false` |
|
||||
| `postgresql.auth.username` | Database user | `bifrost` |
|
||||
| `postgresql.auth.password` | Database password | `bifrost_password` |
|
||||
| `postgresql.auth.database` | Database name | `bifrost` |
|
||||
| `postgresql.primary.persistence.size` | PVC size for PostgreSQL data | `8Gi` |
|
||||
|
||||
<Note>
|
||||
Ensure the database is created with **UTF8 encoding**. The embedded PostgreSQL deployment handles this automatically. See [PostgreSQL UTF8 Requirement](/quickstart/gateway/setting-up#postgresql-utf8-requirement) for manual setups.
|
||||
</Note>
|
||||
|
||||
```bash
|
||||
kubectl create secret generic postgres-credentials \
|
||||
--from-literal=password='your-secure-postgres-password'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# embedded-postgres-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
storage:
|
||||
mode: postgres
|
||||
|
||||
postgresql:
|
||||
enabled: true
|
||||
auth:
|
||||
username: bifrost
|
||||
password: "your-secure-postgres-password" # use existingSecret in production
|
||||
database: bifrost
|
||||
primary:
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 50Gi
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 4Gi
|
||||
|
||||
bifrost:
|
||||
encryptionKey: "your-32-byte-encryption-key-here"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f embedded-postgres-values.yaml
|
||||
```
|
||||
|
||||
**Verify the connection from Bifrost:**
|
||||
|
||||
```bash
|
||||
kubectl exec -it deployment/bifrost -- nc -zv bifrost-postgresql 5432
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="External PostgreSQL">
|
||||
|
||||
### External PostgreSQL
|
||||
|
||||
Point Bifrost at an existing PostgreSQL instance — RDS, Cloud SQL, Azure Database, or self-managed.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `postgresql.enabled` | Must be `false` | `false` |
|
||||
| `postgresql.external.enabled` | Enable external connection | `false` |
|
||||
| `postgresql.external.host` | Hostname or IP | `""` |
|
||||
| `postgresql.external.port` | Port | `5432` |
|
||||
| `postgresql.external.user` | Username | `bifrost` |
|
||||
| `postgresql.external.database` | Database name | `bifrost` |
|
||||
| `postgresql.external.sslMode` | SSL mode (`disable`, `require`, `verify-ca`, `verify-full`) | `disable` |
|
||||
| `postgresql.external.existingSecret` | Secret name for the password | `""` |
|
||||
| `postgresql.external.passwordKey` | Key within the secret | `"password"` |
|
||||
|
||||
```bash
|
||||
kubectl create secret generic external-postgres-credentials \
|
||||
--from-literal=password='your-external-postgres-password'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# external-postgres-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
storage:
|
||||
mode: postgres
|
||||
|
||||
postgresql:
|
||||
enabled: false
|
||||
external:
|
||||
enabled: true
|
||||
host: "your-rds-endpoint.us-east-1.rds.amazonaws.com"
|
||||
port: 5432
|
||||
user: bifrost
|
||||
database: bifrost
|
||||
sslMode: require
|
||||
existingSecret: "external-postgres-credentials"
|
||||
passwordKey: "password"
|
||||
|
||||
bifrost:
|
||||
encryptionKey: "your-32-byte-encryption-key-here"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f external-postgres-values.yaml
|
||||
```
|
||||
|
||||
**Test connectivity before installing:**
|
||||
|
||||
```bash
|
||||
kubectl run pg-test --image=postgres:16-alpine --rm -it --restart=Never -- \
|
||||
psql "host=your-rds-endpoint.us-east-1.rds.amazonaws.com dbname=bifrost user=bifrost sslmode=require" \
|
||||
-c "SELECT version();"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
<Tab title="Mixed (Config=Postgres, Logs=SQLite)">
|
||||
|
||||
### Mixed Backend
|
||||
|
||||
Run the config store on PostgreSQL (fast lookups, shared across replicas) while keeping logs on SQLite (simpler, cheaper for append-heavy workloads).
|
||||
|
||||
```yaml
|
||||
# mixed-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
storage:
|
||||
mode: sqlite # default fallback
|
||||
configStore:
|
||||
type: postgres # override: config uses postgres
|
||||
logsStore:
|
||||
type: sqlite # explicit: logs use sqlite
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 20Gi # for the SQLite logs store
|
||||
|
||||
postgresql:
|
||||
external:
|
||||
enabled: true
|
||||
host: "your-postgres-host.example.com"
|
||||
port: 5432
|
||||
user: bifrost
|
||||
database: bifrost
|
||||
sslMode: require
|
||||
existingSecret: "postgres-credentials"
|
||||
passwordKey: "password"
|
||||
|
||||
bifrost:
|
||||
encryptionKey: "your-32-byte-encryption-key-here"
|
||||
```
|
||||
|
||||
```bash
|
||||
kubectl create secret generic postgres-credentials \
|
||||
--from-literal=password='your-postgres-password'
|
||||
|
||||
helm install bifrost bifrost/bifrost -f mixed-values.yaml
|
||||
```
|
||||
|
||||
<Note>
|
||||
In mixed mode, Bifrost deploys a StatefulSet (because SQLite is in use) with both a PostgreSQL connection and a local PVC for the SQLite log store.
|
||||
</Note>
|
||||
|
||||
**PostgreSQL connection pool tuning** (high log volume):
|
||||
|
||||
```yaml
|
||||
storage:
|
||||
configStore:
|
||||
type: postgres
|
||||
maxIdleConns: 5
|
||||
maxOpenConns: 50
|
||||
logsStore:
|
||||
type: postgres
|
||||
maxIdleConns: 10
|
||||
maxOpenConns: 100
|
||||
```
|
||||
|
||||
</Tab>
|
||||
|
||||
</Tabs>
|
||||
|
||||
---
|
||||
|
||||
## Object Storage for Logs
|
||||
|
||||
Offload large request/response payloads from the database to S3 or GCS. The DB retains only lightweight index records; payloads are fetched on demand.
|
||||
|
||||
<Tabs>
|
||||
<Tab title="AWS S3">
|
||||
|
||||
```bash
|
||||
kubectl create secret generic s3-credentials \
|
||||
--from-literal=access-key-id='AKIAIOSFODNN7EXAMPLE' \
|
||||
--from-literal=secret-access-key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
|
||||
```
|
||||
|
||||
```yaml
|
||||
storage:
|
||||
logsStore:
|
||||
objectStorage:
|
||||
enabled: true
|
||||
type: s3
|
||||
bucket: "bifrost-logs"
|
||||
prefix: "bifrost"
|
||||
compress: true # gzip compression
|
||||
|
||||
# S3 configuration
|
||||
region: us-east-1
|
||||
accessKeyId: "env.S3_ACCESS_KEY_ID"
|
||||
secretAccessKey: "env.S3_SECRET_ACCESS_KEY"
|
||||
# endpoint: "" # Custom endpoint for MinIO / Cloudflare R2
|
||||
# forcePathStyle: false # Set true for MinIO
|
||||
|
||||
bifrost:
|
||||
# inject S3 credentials as env vars
|
||||
providerSecrets:
|
||||
s3-access-key:
|
||||
existingSecret: "s3-credentials"
|
||||
key: "access-key-id"
|
||||
envVar: "S3_ACCESS_KEY_ID"
|
||||
s3-secret-key:
|
||||
existingSecret: "s3-credentials"
|
||||
key: "secret-access-key"
|
||||
envVar: "S3_SECRET_ACCESS_KEY"
|
||||
```
|
||||
|
||||
**Using IAM role (IRSA / instance profile) instead of static keys:**
|
||||
|
||||
```yaml
|
||||
storage:
|
||||
logsStore:
|
||||
objectStorage:
|
||||
enabled: true
|
||||
type: s3
|
||||
bucket: "bifrost-logs"
|
||||
region: us-east-1
|
||||
# No accessKeyId / secretAccessKey — uses SDK default chain
|
||||
roleArn: "arn:aws:iam::123456789012:role/BifrostS3Role"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Google Cloud Storage">
|
||||
|
||||
```bash
|
||||
kubectl create secret generic gcs-credentials \
|
||||
--from-literal=service-account-json="$(cat service-account-key.json)"
|
||||
```
|
||||
|
||||
```yaml
|
||||
storage:
|
||||
logsStore:
|
||||
objectStorage:
|
||||
enabled: true
|
||||
type: gcs
|
||||
bucket: "bifrost-logs"
|
||||
prefix: "bifrost"
|
||||
compress: true
|
||||
|
||||
# GCS configuration
|
||||
projectId: "my-gcp-project"
|
||||
credentialsJson: "env.GCS_CREDENTIALS_JSON" # omit for Workload Identity
|
||||
|
||||
bifrost:
|
||||
providerSecrets:
|
||||
gcs-creds:
|
||||
existingSecret: "gcs-credentials"
|
||||
key: "service-account-json"
|
||||
envVar: "GCS_CREDENTIALS_JSON"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="MinIO (Self-Hosted)">
|
||||
|
||||
```yaml
|
||||
storage:
|
||||
logsStore:
|
||||
objectStorage:
|
||||
enabled: true
|
||||
type: s3
|
||||
bucket: "bifrost-logs"
|
||||
prefix: "bifrost"
|
||||
compress: false
|
||||
|
||||
region: us-east-1 # can be any value for MinIO
|
||||
endpoint: "http://minio.minio-ns.svc.cluster.local:9000"
|
||||
accessKeyId: "env.MINIO_ACCESS_KEY"
|
||||
secretAccessKey: "env.MINIO_SECRET_KEY"
|
||||
forcePathStyle: true # required for MinIO
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
-f object-storage-values.yaml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Vector Store
|
||||
|
||||
A vector store is required for [semantic caching](/deployment-guides/helm/plugins). Choose from Weaviate, Redis, or Qdrant (embedded or external), or Pinecone (external only).
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Weaviate">
|
||||
|
||||
```yaml
|
||||
vectorStore:
|
||||
enabled: true
|
||||
type: weaviate
|
||||
weaviate:
|
||||
enabled: true # deploy embedded Weaviate
|
||||
replicas: 1
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 20Gi
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 4Gi
|
||||
```
|
||||
|
||||
**External Weaviate:**
|
||||
|
||||
```yaml
|
||||
vectorStore:
|
||||
enabled: true
|
||||
type: weaviate
|
||||
weaviate:
|
||||
enabled: false
|
||||
external:
|
||||
enabled: true
|
||||
scheme: https
|
||||
host: "weaviate.example.com"
|
||||
apiKey: "env.WEAVIATE_API_KEY"
|
||||
grpcHost: "weaviate-grpc.example.com"
|
||||
grpcSecured: true
|
||||
existingSecret: "weaviate-credentials"
|
||||
apiKeyKey: "api-key"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Redis / Valkey">
|
||||
|
||||
```yaml
|
||||
vectorStore:
|
||||
enabled: true
|
||||
type: redis
|
||||
redis:
|
||||
enabled: true # deploy embedded Redis
|
||||
auth:
|
||||
enabled: true
|
||||
password: "redis_password"
|
||||
master:
|
||||
persistence:
|
||||
size: 8Gi
|
||||
```
|
||||
|
||||
**External Redis / AWS MemoryDB:**
|
||||
|
||||
```bash
|
||||
kubectl create secret generic redis-credentials \
|
||||
--from-literal=password='your-redis-password'
|
||||
```
|
||||
|
||||
```yaml
|
||||
vectorStore:
|
||||
enabled: true
|
||||
type: redis
|
||||
redis:
|
||||
enabled: false
|
||||
external:
|
||||
enabled: true
|
||||
host: "your-redis.cache.amazonaws.com"
|
||||
port: 6379
|
||||
useTls: true
|
||||
clusterMode: true # required for AWS MemoryDB
|
||||
existingSecret: "redis-credentials"
|
||||
passwordKey: "password"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Qdrant">
|
||||
|
||||
```yaml
|
||||
vectorStore:
|
||||
enabled: true
|
||||
type: qdrant
|
||||
qdrant:
|
||||
enabled: true # deploy embedded Qdrant
|
||||
persistence:
|
||||
size: 10Gi
|
||||
```
|
||||
|
||||
**External Qdrant:**
|
||||
|
||||
```bash
|
||||
kubectl create secret generic qdrant-credentials \
|
||||
--from-literal=api-key='your-qdrant-api-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
vectorStore:
|
||||
enabled: true
|
||||
type: qdrant
|
||||
qdrant:
|
||||
enabled: false
|
||||
external:
|
||||
enabled: true
|
||||
host: "qdrant.example.com"
|
||||
port: 6334
|
||||
useTls: true
|
||||
existingSecret: "qdrant-credentials"
|
||||
apiKeyKey: "api-key"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Pinecone">
|
||||
|
||||
Pinecone is external-only.
|
||||
|
||||
```bash
|
||||
kubectl create secret generic pinecone-credentials \
|
||||
--from-literal=api-key='your-pinecone-api-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
vectorStore:
|
||||
enabled: true
|
||||
type: pinecone
|
||||
pinecone:
|
||||
external:
|
||||
enabled: true
|
||||
indexHost: "your-index.svc.us-east1-gcp.pinecone.io"
|
||||
existingSecret: "pinecone-credentials"
|
||||
apiKeyKey: "api-key"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost \
|
||||
--set image.tag=v1.4.11 \
|
||||
-f storage-values.yaml
|
||||
```
|
||||
401
docs/deployment-guides/helm/troubleshooting.mdx
Normal file
401
docs/deployment-guides/helm/troubleshooting.mdx
Normal file
@@ -0,0 +1,401 @@
|
||||
---
|
||||
title: "Troubleshooting"
|
||||
description: "Diagnose and fix common issues with Bifrost Helm deployments — pods, database, ingress, secrets, PVCs, and performance"
|
||||
icon: "wrench"
|
||||
---
|
||||
|
||||
This page covers the most common problems encountered when deploying Bifrost with Helm, along with diagnostic commands and fixes.
|
||||
|
||||
---
|
||||
|
||||
## Pod Not Starting
|
||||
|
||||
### Quick diagnostics
|
||||
|
||||
```bash
|
||||
# Show pod status
|
||||
kubectl get pods -l app.kubernetes.io/name=bifrost
|
||||
|
||||
# Show pod events (most useful first step)
|
||||
kubectl describe pod -l app.kubernetes.io/name=bifrost
|
||||
|
||||
# Show pod logs (use --previous if the pod has already crashed)
|
||||
kubectl logs -l app.kubernetes.io/name=bifrost
|
||||
kubectl logs -l app.kubernetes.io/name=bifrost --previous
|
||||
```
|
||||
|
||||
### Image pull errors (`ErrImagePull` / `ImagePullBackOff`)
|
||||
|
||||
```bash
|
||||
# Check which image is being pulled
|
||||
kubectl describe pod -l app.kubernetes.io/name=bifrost | grep "Image:"
|
||||
|
||||
# Verify imagePullSecrets are attached
|
||||
kubectl get pod -l app.kubernetes.io/name=bifrost -o jsonpath='{.items[0].spec.imagePullSecrets}'
|
||||
|
||||
# Test secret manually
|
||||
kubectl get secret <pull-secret-name> -o jsonpath='{.data.\.dockerconfigjson}' | base64 -d | jq .
|
||||
```
|
||||
|
||||
Common causes:
|
||||
- `image.tag` not set — the chart requires it; the pod will not start without it
|
||||
- Pull secret missing or expired (ECR tokens expire after 12 hours)
|
||||
- Incorrect `image.repository` for enterprise registry
|
||||
|
||||
```bash
|
||||
# Fix: set the correct tag
|
||||
helm upgrade bifrost bifrost/bifrost --reuse-values --set image.tag=v1.4.11
|
||||
```
|
||||
|
||||
### PVC not binding (`Pending`)
|
||||
|
||||
```bash
|
||||
# Check PVC status
|
||||
kubectl get pvc -l app.kubernetes.io/instance=bifrost
|
||||
|
||||
# Show binding events
|
||||
kubectl describe pvc -l app.kubernetes.io/instance=bifrost
|
||||
```
|
||||
|
||||
Common causes:
|
||||
- No Persistent Volume provisioner in the cluster
|
||||
- `storageClass` set to a class that doesn't exist
|
||||
- `ReadWriteOnce` access mode with multiple replicas (SQLite PVCs are single-node)
|
||||
|
||||
```bash
|
||||
# List available storage classes
|
||||
kubectl get storageclass
|
||||
|
||||
# Fix: pin to a valid storage class
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
--set storage.persistence.storageClass=standard
|
||||
```
|
||||
|
||||
### ConfigMap / Secret errors
|
||||
|
||||
```bash
|
||||
# View the generated ConfigMap (contains rendered config.json)
|
||||
kubectl get configmap bifrost-config -o yaml
|
||||
|
||||
# View secrets the pod depends on
|
||||
kubectl get secret -l app.kubernetes.io/instance=bifrost
|
||||
|
||||
# Decode a specific secret value
|
||||
kubectl get secret bifrost-encryption -o jsonpath='{.data.key}' | base64 -d
|
||||
```
|
||||
|
||||
### CrashLoopBackOff
|
||||
|
||||
```bash
|
||||
# Get last log lines before the crash
|
||||
kubectl logs -l app.kubernetes.io/name=bifrost --previous --tail=50
|
||||
|
||||
# Common causes shown in logs:
|
||||
# "encryption key is not initialized" → no key provided; optional, but data will be stored in plaintext
|
||||
# "failed to connect to database" → see Database section below
|
||||
# "image.tag is required" → set image.tag in values
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Database Connection Issues
|
||||
|
||||
### Embedded PostgreSQL
|
||||
|
||||
```bash
|
||||
# Check if the PostgreSQL pod is running
|
||||
kubectl get pods -l app.kubernetes.io/name=bifrost-postgresql
|
||||
|
||||
# Connect directly to inspect the database
|
||||
kubectl exec -it deployment/bifrost-postgresql -- psql -U bifrost -d bifrost
|
||||
|
||||
# Test connectivity from the Bifrost pod
|
||||
kubectl exec -it deployment/bifrost -- nc -zv bifrost-postgresql 5432
|
||||
|
||||
# Check PostgreSQL logs
|
||||
kubectl logs deployment/bifrost-postgresql --tail=50
|
||||
```
|
||||
|
||||
### External PostgreSQL
|
||||
|
||||
```bash
|
||||
# Test connectivity from within the cluster
|
||||
kubectl run pg-test --image=postgres:16-alpine --rm -it --restart=Never -- \
|
||||
psql "host=your-db-host dbname=bifrost user=bifrost sslmode=require"
|
||||
|
||||
# Verify the secret value is correct
|
||||
kubectl get secret postgres-credentials -o jsonpath='{.data.password}' | base64 -d
|
||||
|
||||
# Check that the external host/port is reachable
|
||||
kubectl exec -it deployment/bifrost -- nc -zv your-db-host 5432
|
||||
```
|
||||
|
||||
Common causes:
|
||||
- `sslMode: disable` when the database requires SSL — set `sslMode: require`
|
||||
- Password in secret doesn't match the database user
|
||||
- Network policy blocking pod → database traffic
|
||||
- Database not UTF8 encoded (see [PostgreSQL UTF8 Requirement](/quickstart/gateway/setting-up#postgresql-utf8-requirement))
|
||||
|
||||
```bash
|
||||
# Fix: update the secret and restart
|
||||
kubectl create secret generic postgres-credentials \
|
||||
--from-literal=password='correct-password' \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
|
||||
kubectl rollout restart deployment/bifrost
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Ingress Not Working
|
||||
|
||||
```bash
|
||||
# Check ingress resource status
|
||||
kubectl describe ingress bifrost
|
||||
|
||||
# Check if the ingress controller is running
|
||||
kubectl get pods -n ingress-nginx -l app.kubernetes.io/name=ingress-nginx
|
||||
|
||||
# View ingress controller logs for routing errors
|
||||
kubectl logs -n ingress-nginx -l app.kubernetes.io/name=ingress-nginx --tail=50
|
||||
|
||||
# Verify DNS resolves to the correct load balancer IP
|
||||
nslookup bifrost.yourdomain.com
|
||||
kubectl get ingress bifrost -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
|
||||
|
||||
# Test without TLS first
|
||||
curl -v http://bifrost.yourdomain.com/health
|
||||
```
|
||||
|
||||
Common causes:
|
||||
- `ingress.className` not set or set to a class not installed in the cluster
|
||||
- TLS certificate not issued yet (cert-manager can take up to 60 seconds)
|
||||
- Service port mismatch — Bifrost listens on `8080` by default
|
||||
|
||||
```bash
|
||||
# Check cert-manager certificate status
|
||||
kubectl get certificate -l app.kubernetes.io/instance=bifrost
|
||||
kubectl describe certificate bifrost-tls
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Secret and Credential Issues
|
||||
|
||||
### Provider API key not resolving
|
||||
|
||||
If Bifrost logs show `env.OPENAI_API_KEY: not set` or similar:
|
||||
|
||||
```bash
|
||||
# Check the env var is present in the running pod
|
||||
kubectl exec -it deployment/bifrost -- env | grep OPENAI
|
||||
|
||||
# Verify the providerSecrets secret exists with the right key
|
||||
kubectl get secret provider-api-keys -o yaml
|
||||
|
||||
# Check the providerSecrets configuration rendered correctly
|
||||
kubectl get configmap bifrost-config -o yaml | grep -A5 providers
|
||||
```
|
||||
|
||||
### Encryption key issues
|
||||
|
||||
```bash
|
||||
# Verify the secret exists and contains the right key name
|
||||
kubectl get secret bifrost-encryption -o yaml
|
||||
|
||||
# Check the exact key name matches encryptionKeySecret.key in values
|
||||
# Default key name is "encryption-key" — if you used "key", set:
|
||||
# bifrost.encryptionKeySecret.key: "key"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## High Memory Usage
|
||||
|
||||
```bash
|
||||
# Check current resource usage
|
||||
kubectl top pods -l app.kubernetes.io/name=bifrost
|
||||
|
||||
# Check if OOM kills are happening
|
||||
kubectl describe pod -l app.kubernetes.io/name=bifrost | grep -A3 "OOMKilled\|Limits"
|
||||
|
||||
# View resource requests/limits on running pods
|
||||
kubectl get pod -l app.kubernetes.io/name=bifrost \
|
||||
-o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].resources}{"\n"}{end}'
|
||||
```
|
||||
|
||||
**Increase resource limits:**
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
--set resources.limits.memory=4Gi \
|
||||
--set resources.requests.memory=1Gi
|
||||
```
|
||||
|
||||
**Tune Go runtime** (see [Docker Tuning](/deployment-guides/docker-tuning)):
|
||||
|
||||
```yaml
|
||||
env:
|
||||
- name: GOGC
|
||||
value: "200" # run GC less often
|
||||
- name: GOMEMLIMIT
|
||||
value: "3500MiB" # hard memory ceiling slightly below the container limit
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## High CPU Usage / Latency
|
||||
|
||||
```bash
|
||||
# Check CPU usage
|
||||
kubectl top pods -l app.kubernetes.io/name=bifrost
|
||||
|
||||
# Check if HPA is scaling correctly
|
||||
kubectl get hpa bifrost
|
||||
kubectl describe hpa bifrost
|
||||
```
|
||||
|
||||
Common causes:
|
||||
- `initialPoolSize` too small — goroutines queuing up; increase to `500`–`1000`
|
||||
- `dropExcessRequests: false` with a small pool — queue depth growing unboundedly
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
--set bifrost.client.initialPoolSize=1000 \
|
||||
--set bifrost.client.dropExcessRequests=true
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Autoscaling Issues
|
||||
|
||||
### HPA not scaling
|
||||
|
||||
```bash
|
||||
# Check HPA status and current metrics
|
||||
kubectl describe hpa bifrost
|
||||
|
||||
# Verify metrics server is installed
|
||||
kubectl top nodes
|
||||
kubectl top pods
|
||||
|
||||
# Common fix: metrics server not installed
|
||||
# Install with:
|
||||
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
|
||||
```
|
||||
|
||||
### Pods scaling down too aggressively (drops active SSE streams)
|
||||
|
||||
The default `scaleDown.stabilizationWindowSeconds: 300` and `preStop` sleep of 15 seconds should prevent this. If streams are still being cut:
|
||||
|
||||
```yaml
|
||||
terminationGracePeriodSeconds: 120 # increase if streams run longer than 105s
|
||||
|
||||
autoscaling:
|
||||
behavior:
|
||||
scaleDown:
|
||||
stabilizationWindowSeconds: 600 # wait 10 min before scaling down
|
||||
policies:
|
||||
- type: Pods
|
||||
value: 1
|
||||
periodSeconds: 300 # remove at most 1 pod per 5 min
|
||||
|
||||
lifecycle:
|
||||
preStop:
|
||||
exec:
|
||||
command: ["sh", "-c", "sleep 30"] # give load balancer more time to drain
|
||||
```
|
||||
|
||||
```bash
|
||||
helm upgrade bifrost bifrost/bifrost --reuse-values -f graceful-shutdown-values.yaml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## SQLite / PVC Issues
|
||||
|
||||
### StatefulSet migration (upgrading from chart < v2.0.0)
|
||||
|
||||
Older chart versions used a Deployment + manual PVC. v2.0.0 moved SQLite to a StatefulSet. If upgrading:
|
||||
|
||||
```bash
|
||||
# 1. Scale down the old deployment
|
||||
kubectl scale deployment bifrost --replicas=0
|
||||
|
||||
# 2. Note the existing PVC name
|
||||
kubectl get pvc
|
||||
|
||||
# 3. Upgrade, pointing at the existing claim
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
--set storage.persistence.existingClaim=<your-old-pvc-name> \
|
||||
--set image.tag=v1.4.11
|
||||
```
|
||||
|
||||
### Data lost after upgrade
|
||||
|
||||
```bash
|
||||
# Check if PVCs still exist (they persist after helm uninstall)
|
||||
kubectl get pvc -l app.kubernetes.io/instance=bifrost
|
||||
|
||||
# Re-attach by setting existingClaim
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
--set storage.persistence.existingClaim=<pvc-name>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cluster Mode Issues
|
||||
|
||||
### Peers not discovering each other
|
||||
|
||||
```bash
|
||||
# Check gossip port is reachable between pods
|
||||
kubectl exec -it bifrost-0 -- nc -zv bifrost-1.bifrost-headless 7946
|
||||
|
||||
# View gossip-related log lines
|
||||
kubectl logs -l app.kubernetes.io/name=bifrost --tail=100 | grep -i gossip
|
||||
|
||||
# Check the headless service exists
|
||||
kubectl get svc bifrost-headless
|
||||
```
|
||||
|
||||
For Kubernetes-based discovery, verify the service account has pod list permissions:
|
||||
|
||||
```bash
|
||||
kubectl auth can-i list pods --as=system:serviceaccount:default:bifrost
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Useful Diagnostic Commands
|
||||
|
||||
```bash
|
||||
# Full state dump for a support ticket
|
||||
kubectl get all -l app.kubernetes.io/instance=bifrost
|
||||
kubectl describe pod -l app.kubernetes.io/name=bifrost > pod-describe.txt
|
||||
kubectl logs -l app.kubernetes.io/name=bifrost --tail=200 > pod-logs.txt
|
||||
|
||||
# View the full rendered config.json
|
||||
kubectl get configmap bifrost-config -o jsonpath='{.data.config\.json}' | jq .
|
||||
|
||||
# Check current Helm values (shows all overrides)
|
||||
helm get values bifrost
|
||||
|
||||
# Check Helm release status
|
||||
helm status bifrost
|
||||
|
||||
# View Helm release history
|
||||
helm history bifrost
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Still Stuck?
|
||||
|
||||
- [GitHub Issues](https://github.com/maximhq/bifrost/issues) — search existing issues or open a new one
|
||||
- [Enterprise Support](mailto:support@getmaxim.ai) — for enterprise customers with SLA
|
||||
718
docs/deployment-guides/helm/values.mdx
Normal file
718
docs/deployment-guides/helm/values.mdx
Normal file
@@ -0,0 +1,718 @@
|
||||
---
|
||||
title: "Values Reference"
|
||||
description: "Complete reference for Bifrost Helm chart values — key parameters, how to supply them, and links to example files"
|
||||
icon: "sliders"
|
||||
---
|
||||
|
||||
This page covers every top-level parameter group in the Bifrost Helm chart's `values.yaml`, how to supply values via `--set` vs `-f`, and where to find ready-made example files.
|
||||
|
||||
<Note>
|
||||
The full values schema is available at [https://getbifrost.ai/schema](https://getbifrost.ai/schema). All `values.yaml` fields map directly to `config.json` fields generated by the chart.
|
||||
</Note>
|
||||
|
||||
## Supplying Values
|
||||
|
||||
### One-liner with `--set`
|
||||
|
||||
Good for a single field or quick experiments:
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost \
|
||||
--set image.tag=v1.4.11 \
|
||||
--set replicaCount=3 \
|
||||
--set bifrost.client.initialPoolSize=500
|
||||
```
|
||||
|
||||
### Values file with `-f`
|
||||
|
||||
Recommended for anything beyond a couple of fields:
|
||||
|
||||
```bash
|
||||
# Create your values file
|
||||
cat > my-values.yaml <<'EOF'
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
replicaCount: 2
|
||||
|
||||
bifrost:
|
||||
encryptionKey: "your-32-byte-encryption-key-here"
|
||||
client:
|
||||
initialPoolSize: 500
|
||||
enableLogging: true
|
||||
EOF
|
||||
|
||||
# Install
|
||||
helm install bifrost bifrost/bifrost -f my-values.yaml
|
||||
|
||||
# Upgrade later
|
||||
helm upgrade bifrost bifrost/bifrost -f my-values.yaml
|
||||
|
||||
# Upgrade and reuse all previously set values, overriding only one field
|
||||
helm upgrade bifrost bifrost/bifrost \
|
||||
--reuse-values \
|
||||
--set replicaCount=5
|
||||
```
|
||||
|
||||
### Multiple values files
|
||||
|
||||
Later files override earlier ones — useful for a base + environment-specific overlay:
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost \
|
||||
-f base-values.yaml \
|
||||
-f production-overrides.yaml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Key Parameters Reference
|
||||
|
||||
### Image
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `image.repository` | Container image repository | `docker.io/maximhq/bifrost` |
|
||||
| `image.tag` | **Required.** Image version (e.g. `v1.4.11`) | `""` |
|
||||
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
|
||||
| `imagePullSecrets` | List of pull secret names for private registries | `[]` |
|
||||
|
||||
```bash
|
||||
# Always specify the tag — the chart will not start without it
|
||||
helm install bifrost bifrost/bifrost --set image.tag=v1.4.11
|
||||
```
|
||||
|
||||
### Replicas & Autoscaling
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `replicaCount` | Static replica count (ignored when HPA is enabled) | `1` |
|
||||
| `autoscaling.enabled` | Enable Horizontal Pod Autoscaler | `false` |
|
||||
| `autoscaling.minReplicas` | Minimum replicas | `1` |
|
||||
| `autoscaling.maxReplicas` | Maximum replicas | `10` |
|
||||
| `autoscaling.targetCPUUtilizationPercentage` | CPU target for scaling | `80` |
|
||||
| `autoscaling.targetMemoryUtilizationPercentage` | Memory target for scaling | `80` |
|
||||
| `autoscaling.behavior.scaleDown.stabilizationWindowSeconds` | Cooldown before scale-down (important for SSE streams) | `300` |
|
||||
| `autoscaling.behavior.scaleDown.policies[0].value` | Max pods removed per period | `1` |
|
||||
|
||||
### Resources
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `resources.requests.cpu` | CPU request | `500m` |
|
||||
| `resources.requests.memory` | Memory request | `512Mi` |
|
||||
| `resources.limits.cpu` | CPU limit | `2000m` |
|
||||
| `resources.limits.memory` | Memory limit | `2Gi` |
|
||||
|
||||
### Service
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `service.type` | `ClusterIP`, `LoadBalancer`, or `NodePort` | `ClusterIP` |
|
||||
| `service.port` | Service port | `8080` |
|
||||
|
||||
### Ingress
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `ingress.enabled` | Enable ingress | `false` |
|
||||
| `ingress.className` | Ingress class (e.g. `nginx`, `traefik`) | `""` |
|
||||
| `ingress.annotations` | Ingress annotations | `{}` |
|
||||
| `ingress.hosts` | Host rules | see values.yaml |
|
||||
| `ingress.tls` | TLS configuration | `[]` |
|
||||
|
||||
```yaml
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
|
||||
hosts:
|
||||
- host: bifrost.yourdomain.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: bifrost-tls
|
||||
hosts:
|
||||
- bifrost.yourdomain.com
|
||||
```
|
||||
|
||||
### Probes
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `livenessProbe.initialDelaySeconds` | Seconds before first liveness check | `30` |
|
||||
| `livenessProbe.periodSeconds` | Liveness check interval | `30` |
|
||||
| `readinessProbe.initialDelaySeconds` | Seconds before first readiness check | `10` |
|
||||
| `readinessProbe.periodSeconds` | Readiness check interval | `10` |
|
||||
|
||||
Both probes hit `GET /health`.
|
||||
|
||||
### Graceful Shutdown
|
||||
|
||||
Bifrost supports long-lived SSE streaming connections. The default `preStop` hook and termination grace period let in-flight streams finish before the pod is killed:
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `terminationGracePeriodSeconds` | Total grace period | `60` |
|
||||
| `lifecycle.preStop.exec.command` | Sleep before SIGTERM so load balancer drains | `["sh", "-c", "sleep 15"]` |
|
||||
|
||||
Increase `terminationGracePeriodSeconds` if your typical stream responses take longer than 45 seconds.
|
||||
|
||||
### Service Account
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `serviceAccount.create` | Create a dedicated service account | `true` |
|
||||
| `serviceAccount.annotations` | Annotations (e.g. for IRSA, Workload Identity) | `{}` |
|
||||
| `serviceAccount.name` | Override the generated name | `""` |
|
||||
|
||||
### Pod Scheduling
|
||||
|
||||
```yaml
|
||||
# Spread replicas across nodes
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: bifrost
|
||||
topologyKey: kubernetes.io/hostname
|
||||
|
||||
# Pin to specific node pool
|
||||
nodeSelector:
|
||||
node-type: ai-workload
|
||||
|
||||
# Tolerate GPU taints
|
||||
tolerations:
|
||||
- key: "gpu"
|
||||
operator: "Equal"
|
||||
value: "true"
|
||||
effect: "NoSchedule"
|
||||
```
|
||||
|
||||
### Extra Environment Variables
|
||||
|
||||
Three ways to inject env vars:
|
||||
|
||||
```yaml
|
||||
# Inline key/value pairs
|
||||
env:
|
||||
- name: HTTP_PROXY
|
||||
value: "http://proxy.corp.example.com:3128"
|
||||
|
||||
# Map syntax (appended after env)
|
||||
extraEnv:
|
||||
NO_PROXY: "169.254.169.254,10.0.0.0/8"
|
||||
|
||||
# Bulk-load from existing Secrets or ConfigMaps
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: my-corp-secrets
|
||||
- configMapRef:
|
||||
name: my-app-config
|
||||
```
|
||||
|
||||
### Init Containers
|
||||
|
||||
```yaml
|
||||
initContainers:
|
||||
- name: wait-for-db
|
||||
image: busybox:1.35
|
||||
command: ["sh", "-c", "until nc -z postgres-svc 5432; do sleep 2; done"]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Values Examples
|
||||
|
||||
The chart ships ready-made example files under [`helm-charts/bifrost/values-examples/`](https://github.com/maximhq/bifrost/tree/main/helm-charts/bifrost/values-examples):
|
||||
|
||||
| File | Use case |
|
||||
|------|----------|
|
||||
| `sqlite-only.yaml` | Minimal local/dev setup |
|
||||
| `postgres-only.yaml` | Single-store Postgres |
|
||||
| `production-ha.yaml` | HA: 3 replicas, Postgres, Weaviate, HPA, Ingress |
|
||||
| `providers-and-virtual-keys.yaml` | All 23 providers + 7 virtual key patterns |
|
||||
| `secrets-from-k8s.yaml` | All sensitive values from Kubernetes Secrets |
|
||||
| `external-postgres.yaml` | Point at an existing Postgres instance |
|
||||
| `postgres-redis.yaml` | Postgres + Redis vector store |
|
||||
| `postgres-weaviate.yaml` | Postgres + Weaviate vector store |
|
||||
| `postgres-qdrant.yaml` | Postgres + Qdrant vector store |
|
||||
| `semantic-cache-secret-example.yaml` | Semantic cache with secret injection |
|
||||
| `mixed-backend.yaml` | Config store = postgres, logs store = sqlite |
|
||||
|
||||
Install from an example file directly:
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost \
|
||||
-f https://raw.githubusercontent.com/maximhq/bifrost/main/helm-charts/bifrost/values-examples/production-ha.yaml \
|
||||
--set image.tag=v1.4.11
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Helm Operations
|
||||
|
||||
### View current values
|
||||
|
||||
```bash
|
||||
helm get values bifrost
|
||||
```
|
||||
|
||||
### Diff before upgrading (requires helm-diff plugin)
|
||||
|
||||
```bash
|
||||
helm diff upgrade bifrost bifrost/bifrost -f my-values.yaml
|
||||
```
|
||||
|
||||
### Rollback
|
||||
|
||||
```bash
|
||||
helm history bifrost
|
||||
helm rollback bifrost # to previous revision
|
||||
helm rollback bifrost 2 # to revision 2
|
||||
```
|
||||
|
||||
### Uninstall
|
||||
|
||||
```bash
|
||||
helm uninstall bifrost
|
||||
|
||||
# Also remove PVCs (deletes all data)
|
||||
kubectl delete pvc -l app.kubernetes.io/instance=bifrost
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## All Key Parameters
|
||||
|
||||
A quick-reference table of the most commonly used top-level parameters:
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `image.tag` | **Required.** Bifrost image version (e.g., `v1.4.11`) | `""` |
|
||||
| `replicaCount` | Number of replicas | `1` |
|
||||
| `storage.mode` | Storage backend (`sqlite` or `postgres`) | `sqlite` |
|
||||
| `storage.persistence.size` | PVC size for SQLite | `10Gi` |
|
||||
| `postgresql.enabled` | Deploy embedded PostgreSQL | `false` |
|
||||
| `vectorStore.enabled` | Enable vector store | `false` |
|
||||
| `vectorStore.type` | Vector store type (`weaviate`, `redis`, `qdrant`) | `none` |
|
||||
| `bifrost.encryptionKey` | Optional encryption key (use `encryptionKeySecret` in production). If omitted, data is stored in plaintext. | `""` |
|
||||
| `ingress.enabled` | Enable ingress | `false` |
|
||||
| `autoscaling.enabled` | Enable HPA | `false` |
|
||||
|
||||
### Secret Reference Parameters
|
||||
|
||||
Use existing Kubernetes Secrets instead of plain-text values. Every sensitive field in the chart has a corresponding `existingSecret` / `secretRef` alternative:
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `bifrost.encryptionKeySecret.name` | Secret name for encryption key | `""` |
|
||||
| `bifrost.encryptionKeySecret.key` | Key within the secret | `"encryption-key"` |
|
||||
| `postgresql.external.existingSecret` | Secret name for PostgreSQL password | `""` |
|
||||
| `postgresql.external.passwordKey` | Key within the secret | `"password"` |
|
||||
| `vectorStore.redis.external.existingSecret` | Secret name for Redis password | `""` |
|
||||
| `vectorStore.redis.external.passwordKey` | Key within the secret | `"password"` |
|
||||
| `vectorStore.weaviate.external.existingSecret` | Secret name for Weaviate API key | `""` |
|
||||
| `vectorStore.weaviate.external.apiKeyKey` | Key within the secret | `"api-key"` |
|
||||
| `vectorStore.qdrant.external.existingSecret` | Secret name for Qdrant API key | `""` |
|
||||
| `vectorStore.qdrant.external.apiKeyKey` | Key within the secret | `"api-key"` |
|
||||
| `bifrost.plugins.maxim.secretRef.name` | Secret name for Maxim API key | `""` |
|
||||
| `bifrost.plugins.maxim.secretRef.key` | Key within the secret | `"api-key"` |
|
||||
| `bifrost.providerSecrets.<provider>.existingSecret` | Secret name for provider API key | `""` |
|
||||
| `bifrost.providerSecrets.<provider>.key` | Key within the secret | `"api-key"` |
|
||||
| `bifrost.providerSecrets.<provider>.envVar` | Environment variable name to inject | `""` |
|
||||
|
||||
---
|
||||
|
||||
## Advanced Configuration
|
||||
|
||||
### Comprehensive Example
|
||||
|
||||
A production-ready values file combining the most common settings:
|
||||
|
||||
```yaml
|
||||
# my-values.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
replicaCount: 3
|
||||
|
||||
storage:
|
||||
mode: postgres
|
||||
|
||||
postgresql:
|
||||
enabled: true
|
||||
auth:
|
||||
password: "secure-password" # use existingSecret in production
|
||||
|
||||
autoscaling:
|
||||
enabled: true
|
||||
minReplicas: 3
|
||||
maxReplicas: 10
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
hosts:
|
||||
- host: bifrost.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
bifrost:
|
||||
encryptionKeySecret:
|
||||
name: "bifrost-encryption"
|
||||
key: "key"
|
||||
providers:
|
||||
openai:
|
||||
keys:
|
||||
- name: "primary"
|
||||
value: "env.OPENAI_API_KEY"
|
||||
weight: 1
|
||||
providerSecrets:
|
||||
openai:
|
||||
existingSecret: "provider-api-keys"
|
||||
key: "openai-api-key"
|
||||
envVar: "OPENAI_API_KEY"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f my-values.yaml
|
||||
```
|
||||
|
||||
### Node Affinity & Scheduling
|
||||
|
||||
Deploy to specific nodes and spread replicas across hosts:
|
||||
|
||||
```yaml
|
||||
nodeSelector:
|
||||
node-type: ai-workload
|
||||
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: bifrost
|
||||
topologyKey: kubernetes.io/hostname
|
||||
|
||||
tolerations:
|
||||
- key: "gpu"
|
||||
operator: "Equal"
|
||||
value: "true"
|
||||
effect: "NoSchedule"
|
||||
```
|
||||
|
||||
### Deployment & Pod Annotations
|
||||
|
||||
Useful for tooling like [Keel](https://keel.sh) for automatic image updates or Datadog APM injection:
|
||||
|
||||
```yaml
|
||||
deploymentAnnotations:
|
||||
keel.sh/policy: force
|
||||
keel.sh/trigger: poll
|
||||
|
||||
podAnnotations:
|
||||
ad.datadoghq.com/bifrost.logs: '[{"source":"bifrost","service":"bifrost"}]'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Patterns
|
||||
|
||||
Ready-made values files for the most common deployment scenarios. Each pattern builds on the [quickstart](/deployment-guides/helm).
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Development">
|
||||
|
||||
Simple setup for local testing. SQLite, single replica, no autoscaling.
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost \
|
||||
--set image.tag=v1.4.11 \
|
||||
--set 'bifrost.providers.openai.keys[0].name=dev-key' \
|
||||
--set 'bifrost.providers.openai.keys[0].value=sk-your-key' \
|
||||
--set 'bifrost.providers.openai.keys[0].weight=1'
|
||||
```
|
||||
|
||||
```bash
|
||||
# Access
|
||||
kubectl port-forward svc/bifrost 8080:8080
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Multi-Provider">
|
||||
|
||||
Multiple LLM providers with weighted load balancing.
|
||||
|
||||
```bash
|
||||
kubectl create secret generic provider-keys \
|
||||
--from-literal=openai-api-key='sk-...' \
|
||||
--from-literal=anthropic-api-key='sk-ant-...' \
|
||||
--from-literal=gemini-api-key='your-gemini-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# multi-provider.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
bifrost:
|
||||
encryptionKey: "your-encryption-key"
|
||||
|
||||
client:
|
||||
enableLogging: true
|
||||
allowDirectKeys: false
|
||||
|
||||
providers:
|
||||
openai:
|
||||
keys:
|
||||
- name: "openai-primary"
|
||||
value: "env.OPENAI_API_KEY"
|
||||
weight: 2 # 50% of traffic
|
||||
anthropic:
|
||||
keys:
|
||||
- name: "anthropic-primary"
|
||||
value: "env.ANTHROPIC_API_KEY"
|
||||
weight: 1 # 25%
|
||||
gemini:
|
||||
keys:
|
||||
- name: "gemini-primary"
|
||||
value: "env.GEMINI_API_KEY"
|
||||
weight: 1 # 25%
|
||||
|
||||
providerSecrets:
|
||||
openai:
|
||||
existingSecret: "provider-keys"
|
||||
key: "openai-api-key"
|
||||
envVar: "OPENAI_API_KEY"
|
||||
anthropic:
|
||||
existingSecret: "provider-keys"
|
||||
key: "anthropic-api-key"
|
||||
envVar: "ANTHROPIC_API_KEY"
|
||||
gemini:
|
||||
existingSecret: "provider-keys"
|
||||
key: "gemini-api-key"
|
||||
envVar: "GEMINI_API_KEY"
|
||||
|
||||
plugins:
|
||||
telemetry:
|
||||
enabled: true
|
||||
logging:
|
||||
enabled: true
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f multi-provider.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="External Database">
|
||||
|
||||
Use an existing PostgreSQL instance — RDS, Cloud SQL, Azure Database, or self-managed.
|
||||
|
||||
```bash
|
||||
kubectl create secret generic postgres-credentials \
|
||||
--from-literal=password='your-external-postgres-password'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# external-db.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
storage:
|
||||
mode: postgres
|
||||
|
||||
postgresql:
|
||||
enabled: false
|
||||
external:
|
||||
enabled: true
|
||||
host: "your-rds-endpoint.us-east-1.rds.amazonaws.com"
|
||||
port: 5432
|
||||
user: "bifrost"
|
||||
database: "bifrost"
|
||||
sslMode: "require"
|
||||
existingSecret: "postgres-credentials"
|
||||
passwordKey: "password"
|
||||
|
||||
bifrost:
|
||||
encryptionKey: "your-encryption-key"
|
||||
|
||||
providers:
|
||||
openai:
|
||||
keys:
|
||||
- name: "openai-primary"
|
||||
value: "sk-..."
|
||||
weight: 1
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f external-db.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="AI Workloads">
|
||||
|
||||
Semantic response caching for high-volume AI inference.
|
||||
|
||||
```bash
|
||||
kubectl create secret generic bifrost-encryption \
|
||||
--from-literal=key='your-32-byte-encryption-key'
|
||||
|
||||
kubectl create secret generic provider-keys \
|
||||
--from-literal=openai-api-key='sk-your-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# ai-workload.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
storage:
|
||||
mode: postgres
|
||||
|
||||
postgresql:
|
||||
enabled: true
|
||||
auth:
|
||||
password: "secure-password"
|
||||
primary:
|
||||
persistence:
|
||||
size: 50Gi
|
||||
|
||||
vectorStore:
|
||||
enabled: true
|
||||
type: weaviate
|
||||
weaviate:
|
||||
enabled: true
|
||||
persistence:
|
||||
size: 50Gi
|
||||
|
||||
bifrost:
|
||||
encryptionKeySecret:
|
||||
name: "bifrost-encryption"
|
||||
key: "key"
|
||||
|
||||
providers:
|
||||
openai:
|
||||
keys:
|
||||
- name: "openai-primary"
|
||||
value: "env.OPENAI_API_KEY"
|
||||
weight: 1
|
||||
|
||||
providerSecrets:
|
||||
openai:
|
||||
existingSecret: "provider-keys"
|
||||
key: "openai-api-key"
|
||||
envVar: "OPENAI_API_KEY"
|
||||
|
||||
plugins:
|
||||
semanticCache:
|
||||
enabled: true
|
||||
config:
|
||||
provider: "openai"
|
||||
keys:
|
||||
- value: "env.OPENAI_API_KEY"
|
||||
weight: 1
|
||||
embedding_model: "text-embedding-3-small"
|
||||
dimension: 1536
|
||||
threshold: 0.85
|
||||
ttl: "1h"
|
||||
cache_by_model: true
|
||||
cache_by_provider: true
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f ai-workload.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="Kubernetes Secrets Only">
|
||||
|
||||
Zero credentials in values files — all sensitive data in Kubernetes Secrets.
|
||||
|
||||
```bash
|
||||
kubectl create secret generic postgres-credentials \
|
||||
--from-literal=password='your-postgres-password'
|
||||
|
||||
kubectl create secret generic bifrost-encryption \
|
||||
--from-literal=key='your-encryption-key'
|
||||
|
||||
kubectl create secret generic provider-keys \
|
||||
--from-literal=openai-api-key='sk-...' \
|
||||
--from-literal=anthropic-api-key='sk-ant-...'
|
||||
|
||||
kubectl create secret generic qdrant-credentials \
|
||||
--from-literal=api-key='your-qdrant-api-key'
|
||||
```
|
||||
|
||||
```yaml
|
||||
# secrets-only.yaml
|
||||
image:
|
||||
tag: "v1.4.11"
|
||||
|
||||
storage:
|
||||
mode: postgres
|
||||
|
||||
postgresql:
|
||||
enabled: false
|
||||
external:
|
||||
enabled: true
|
||||
host: "postgres.example.com"
|
||||
port: 5432
|
||||
user: "bifrost"
|
||||
database: "bifrost"
|
||||
sslMode: "require"
|
||||
existingSecret: "postgres-credentials"
|
||||
passwordKey: "password"
|
||||
|
||||
vectorStore:
|
||||
enabled: true
|
||||
type: qdrant
|
||||
qdrant:
|
||||
enabled: false
|
||||
external:
|
||||
enabled: true
|
||||
host: "qdrant.example.com"
|
||||
port: 6334
|
||||
existingSecret: "qdrant-credentials"
|
||||
apiKeyKey: "api-key"
|
||||
|
||||
bifrost:
|
||||
encryptionKeySecret:
|
||||
name: "bifrost-encryption"
|
||||
key: "key"
|
||||
|
||||
providers:
|
||||
openai:
|
||||
keys:
|
||||
- name: "openai-primary"
|
||||
value: "env.OPENAI_API_KEY"
|
||||
weight: 1
|
||||
anthropic:
|
||||
keys:
|
||||
- name: "anthropic-primary"
|
||||
value: "env.ANTHROPIC_API_KEY"
|
||||
weight: 1
|
||||
|
||||
providerSecrets:
|
||||
openai:
|
||||
existingSecret: "provider-keys"
|
||||
key: "openai-api-key"
|
||||
envVar: "OPENAI_API_KEY"
|
||||
anthropic:
|
||||
existingSecret: "provider-keys"
|
||||
key: "anthropic-api-key"
|
||||
envVar: "ANTHROPIC_API_KEY"
|
||||
```
|
||||
|
||||
```bash
|
||||
helm install bifrost bifrost/bifrost -f secrets-only.yaml
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
Reference in New Issue
Block a user