first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/docs/deployment-guides/helm/governance.mdx
+++ b/docs/deployment-guides/helm/governance.mdx
@@ -0,0 +1,446 @@
+---
+title: "Governance"
+description: "Configure Bifrost governance in Helm — budgets, rate limits, virtual keys, routing rules, and admin authentication"
+icon: "shield"
+---
+
+Governance lets you control who can call which providers, how much they can spend, how fast they can go, and how traffic is routed. Everything is declared under `bifrost.governance` in your values file and seeded into the database at startup.
+
+<Note>
+The governance **plugin** must also be enabled for enforcement to take effect:
+
+```yaml
+bifrost:
+  plugins:
+    governance:
+      enabled: true
+```
+
+See the [Plugins](/deployment-guides/helm/plugins) page for plugin configuration details.
+</Note>
+
+---
+
+## Admin Authentication
+
+Protect the Bifrost dashboard and management API with username/password auth.
+
+```bash
+kubectl create secret generic bifrost-admin-credentials \
+  --from-literal=username='admin' \
+  --from-literal=password='your-secure-admin-password'
+```
+
+```yaml
+bifrost:
+  governance:
+    authConfig:
+      isEnabled: true
+      disableAuthOnInference: false   # keep auth on inference routes
+      existingSecret: "bifrost-admin-credentials"
+      usernameKey: "username"
+      passwordKey: "password"
+```
+
+```bash
+helm upgrade bifrost bifrost/bifrost --reuse-values -f governance-auth-values.yaml
+```
+
+---
+
+## Budgets
+
+Spending caps that reset on a configurable period. Budgets are referenced by ID from virtual keys, teams, customers, or providers.
+
+| Reset duration | Syntax |
+|----------------|--------|
+| 30 seconds | `"30s"` |
+| 5 minutes | `"5m"` |
+| 1 hour | `"1h"` |
+| 1 day | `"1d"` |
+| 1 week | `"1w"` |
+| 1 month | `"1M"` |
+| 1 year | `"1Y"` |
+
+```yaml
+bifrost:
+  governance:
+    budgets:
+      - id: "budget-dev"
+        max_limit: 50          # $50 per month
+        reset_duration: "1M"
+
+      - id: "budget-production"
+        max_limit: 500         # $500 per month
+        reset_duration: "1M"
+
+      - id: "budget-testing"
+        max_limit: 10          # $10 per day
+        reset_duration: "1d"
+
+      - id: "budget-enterprise"
+        max_limit: 5000        # $5000 per month
+        reset_duration: "1M"
+```
+
+---
+
+## Rate Limits
+
+Token and request-count caps per time window. Referenced by ID from virtual keys, teams, customers, or providers.
+
+```yaml
+bifrost:
+  governance:
+    rateLimits:
+      - id: "rate-limit-standard"
+        token_max_limit: 100000       # 100K tokens per hour
+        token_reset_duration: "1h"
+        request_max_limit: 1000       # 1000 requests per hour
+        request_reset_duration: "1h"
+
+      - id: "rate-limit-high"
+        token_max_limit: 500000       # 500K tokens per hour
+        token_reset_duration: "1h"
+        request_max_limit: 5000
+        request_reset_duration: "1h"
+
+      - id: "rate-limit-burst"
+        token_max_limit: 50000        # 50K tokens per minute (burst)
+        token_reset_duration: "1m"
+        request_max_limit: 500
+        request_reset_duration: "1m"
+
+      - id: "rate-limit-testing"
+        token_max_limit: 10000
+        token_reset_duration: "1h"
+        request_max_limit: 100
+        request_reset_duration: "1h"
+```
+
+---
+
+## Customers & Teams
+
+Optional organizational hierarchy. Virtual keys can be assigned to customers or teams, inheriting their budgets and rate limits.
+
+```yaml
+bifrost:
+  governance:
+    customers:
+      - id: "customer-acme"
+        name: "Acme Corp"
+        budget_id: "budget-production"
+        rate_limit_id: "rate-limit-high"
+
+      - id: "customer-startup"
+        name: "Startup Inc"
+        budget_id: "budget-dev"
+        rate_limit_id: "rate-limit-standard"
+
+    teams:
+      - id: "team-platform"
+        name: "Platform Team"
+        customer_id: "customer-acme"
+        budget_id: "budget-enterprise"
+        rate_limit_id: "rate-limit-high"
+
+      - id: "team-ml"
+        name: "ML Team"
+        customer_id: "customer-acme"
+        budget_id: "budget-production"
+        rate_limit_id: "rate-limit-standard"
+```
+
+---
+
+## Virtual Keys
+
+Virtual keys are the primary access tokens issued to callers. They scope which providers, models, and underlying API keys are accessible.
+
+```yaml
+bifrost:
+  governance:
+    virtualKeys:
+      # 1. Unrestricted dev key — access to every provider
+      - id: "vk-dev-all"
+        name: "Dev: all providers"
+        value: "vk-dev-all-secret-token"
+        is_active: true
+        budget_id: "budget-dev"
+        rate_limit_id: "rate-limit-standard"
+        # No provider_configs → all providers allowed
+
+      # 2. OpenAI only — restricted to two models
+      - id: "vk-openai-prod"
+        name: "OpenAI Production"
+        value: "vk-openai-prod-secret-token"
+        is_active: true
+        budget_id: "budget-production"
+        rate_limit_id: "rate-limit-high"
+        provider_configs:
+          - provider: "openai"
+            weight: 1
+            allowed_models: ["gpt-4o", "gpt-4o-mini"]
+
+      # 3. Multi-provider with weighted routing
+      - id: "vk-multi"
+        name: "Multi-provider weighted"
+        value: "vk-multi-secret-token"
+        is_active: true
+        budget_id: "budget-production"
+        rate_limit_id: "rate-limit-high"
+        provider_configs:
+          - provider: "openai"
+            weight: 2         # 50%
+            allowed_models: ["*"]
+          - provider: "anthropic"
+            weight: 1         # 25%
+            allowed_models: ["*"]
+          - provider: "groq"
+            weight: 1         # 25%
+            allowed_models: ["*"]
+
+      # 4. Team-scoped key
+      - id: "vk-platform-team"
+        name: "Platform Team Key"
+        value: "vk-platform-team-token"
+        is_active: true
+        team_id: "team-platform"       # inherits team budget/rate-limit
+        provider_configs:
+          - provider: "openai"
+            weight: 1
+            allowed_models: ["*"]
+            key_ids: ["openai-primary"]  # pin to specific configured key by name
+
+      # 5. Restricted testing key
+      - id: "vk-testing"
+        name: "Testing (gpt-4o-mini only)"
+        value: "vk-testing-token"
+        is_active: true
+        budget_id: "budget-testing"
+        rate_limit_id: "rate-limit-testing"
+        provider_configs:
+          - provider: "openai"
+            weight: 1
+            allowed_models: ["gpt-4o-mini"]
+
+      # 6. Batch API key
+      - id: "vk-batch"
+        name: "Batch API workloads"
+        value: "vk-batch-token"
+        is_active: true
+        budget_id: "budget-production"
+        rate_limit_id: "rate-limit-burst"
+        provider_configs:
+          - provider: "openai"
+            weight: 1
+            allowed_models: ["*"]
+            key_ids: ["openai-batch"]    # only the batch-flagged key
+```
+
+`provider_configs[].key_ids` and `provider_configs[].keys` are both supported in Helm values. Prefer `key_ids` for parity with `config.json` (`key_ids` should contain provider key names).
+
+**Use a virtual key in API calls:**
+
+```bash
+curl http://localhost:8080/v1/chat/completions \
+  -H "x-bf-vk: vk-openai-prod-secret-token" \
+  -H "Content-Type: application/json" \
+  -d '{"model":"gpt-4o","messages":[{"role":"user","content":"Hello"}]}'
+```
+
+---
+
+## Model Configs
+
+Apply budgets and rate limits at the model level, independent of virtual keys:
+
+```yaml
+bifrost:
+  governance:
+    modelConfigs:
+      - id: "model-gpt4o"
+        model_name: "gpt-4o"
+        provider: "openai"
+        budget_id: "budget-production"
+        rate_limit_id: "rate-limit-high"
+
+      - id: "model-claude"
+        model_name: "claude-3-5-sonnet-20241022"
+        provider: "anthropic"
+        rate_limit_id: "rate-limit-standard"
+```
+
+---
+
+## Provider Governance
+
+Apply budgets and rate limits at the provider level:
+
+```yaml
+bifrost:
+  governance:
+    providers:
+      - name: "openai"
+        budget_id: "budget-production"
+        rate_limit_id: "rate-limit-high"
+        send_back_raw_request: false
+        send_back_raw_response: false
+
+      - name: "anthropic"
+        budget_id: "budget-production"
+        rate_limit_id: "rate-limit-standard"
+```
+
+---
+
+## Routing Rules
+
+CEL-expression-based routing rules redirect requests to different providers or models based on request attributes.
+
+| Field | Description |
+|-------|-------------|
+| `cel_expression` | CEL expression evaluated against the request; if `true`, rule fires |
+| `targets` | Provider/model targets with weights |
+| `fallbacks` | Providers to try if all targets fail |
+| `scope` | `global`, `team`, `customer`, or `virtual_key` |
+| `scope_id` | Required for non-global scopes |
+| `priority` | Lower number = evaluated first |
+
+```yaml
+bifrost:
+  governance:
+    routingRules:
+      # Route all GPT requests to Azure
+      - id: "route-gpt-to-azure"
+        name: "GPT → Azure"
+        description: "Route all GPT model requests to Azure OpenAI"
+        enabled: true
+        cel_expression: "model.startsWith('gpt-')"
+        targets:
+          - provider: "azure"
+            model: ""        # empty = use original model name
+            weight: 1.0
+        fallbacks: ["openai"]
+        scope: "global"
+        priority: 0
+
+      # Route heavy models to a slower but cheaper provider
+      - id: "route-heavy-to-groq"
+        name: "Large context → Groq"
+        enabled: true
+        cel_expression: "model == 'gpt-4o' && request_body.max_tokens > 4000"
+        targets:
+          - provider: "groq"
+            model: "llama-3.3-70b-versatile"
+            weight: 1.0
+        fallbacks: ["openai"]
+        scope: "global"
+        priority: 1
+
+      # Team-scoped rule
+      - id: "route-ml-team-bedrock"
+        name: "ML Team → Bedrock"
+        enabled: true
+        cel_expression: "true"    # match all requests for this scope
+        targets:
+          - provider: "bedrock"
+            model: ""
+            weight: 1.0
+        fallbacks: ["openai"]
+        scope: "team"
+        scope_id: "team-ml"
+        priority: 0
+```
+
+---
+
+## Full Example
+
+```yaml
+# governance-full-values.yaml
+image:
+  tag: "v1.4.11"
+
+bifrost:
+  encryptionKeySecret:
+    name: "bifrost-encryption"
+    key: "encryption-key"
+
+  plugins:
+    governance:
+      enabled: true
+      config:
+        is_vk_mandatory: true
+
+  governance:
+    authConfig:
+      isEnabled: true
+      existingSecret: "bifrost-admin-credentials"
+      usernameKey: "username"
+      passwordKey: "password"
+
+    budgets:
+      - id: "budget-production"
+        max_limit: 500
+        reset_duration: "1M"
+      - id: "budget-dev"
+        max_limit: 50
+        reset_duration: "1M"
+
+    rateLimits:
+      - id: "rate-limit-standard"
+        token_max_limit: 100000
+        token_reset_duration: "1h"
+        request_max_limit: 1000
+        request_reset_duration: "1h"
+
+    virtualKeys:
+      - id: "vk-production"
+        name: "Production"
+        value: "vk-prod-secret-token"
+        is_active: true
+        budget_id: "budget-production"
+        rate_limit_id: "rate-limit-standard"
+        provider_configs:
+          - provider: "openai"
+            weight: 1
+            allowed_models: ["gpt-4o", "gpt-4o-mini"]
+```
+
+```bash
+kubectl create secret generic bifrost-encryption \
+  --from-literal=encryption-key='your-32-byte-key'
+
+kubectl create secret generic bifrost-admin-credentials \
+  --from-literal=username='admin' \
+  --from-literal=password='secure-admin-password'
+
+helm install bifrost bifrost/bifrost -f governance-full-values.yaml
+```
+
+---
+
+## Access Profiles (Enterprise)
+
+You can seed enterprise `access_profiles` directly from Helm values. The chart renders `bifrost.accessProfiles` into top-level `access_profiles` in `config.json`.
+
+```yaml
+bifrost:
+  accessProfiles:
+    - name: "platform-default"
+      description: "Default profile for platform users"
+      is_active: true
+      tags: ["platform", "default"]
+      provider_configs:
+        - provider_name: "openai"
+          all_models_allowed: false
+          allowed_models: ["gpt-4o", "gpt-4o-mini"]
+      mcp_servers:
+        - mcp_server_id: "github"
+      mcp_tool_overrides:
+        - mcp_client_id: "github"
+          tool_name: "create_pull_request"
+          action: "include"
+```