first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/docs/openapi/schemas/management/cache.yaml
+++ b/docs/openapi/schemas/management/cache.yaml
@@ -0,0 +1,9 @@
+# Cache API schemas
+
+ClearCacheResponse:
+  type: object
+  description: Clear cache response
+  properties:
+    message:
+      type: string
+      example: Cache cleared successfully
--- a/docs/openapi/schemas/management/common.yaml
+++ b/docs/openapi/schemas/management/common.yaml
@@ -0,0 +1,35 @@
+# Common schemas used across management APIs
+
+SuccessResponse:
+  type: object
+  description: Generic success response
+  properties:
+    status:
+      type: string
+      example: success
+    message:
+      type: string
+      example: Operation completed successfully
+
+ErrorResponse:
+  type: object
+  description: Error response
+  $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+MessageResponse:
+  type: object
+  description: Simple message response
+  properties:
+    message:
+      type: string
+
+EnvVar:
+  type: object
+  description: Environment variable configuration
+  properties:
+        value:
+          type: string
+        env_var:
+          type: string
+        from_env:
+          type: boolean
--- a/docs/openapi/schemas/management/config.yaml
+++ b/docs/openapi/schemas/management/config.yaml
@@ -0,0 +1,200 @@
+# Config API schemas
+
+Version:
+  type: string
+  description: Version information
+  example: "1.0.0"
+
+ClientConfig:
+  type: object
+  description: Client configuration
+  properties:
+    drop_excess_requests:
+      type: boolean
+      description: Whether to drop excess requests when rate limited
+    prometheus_labels:
+      type: array
+      items:
+        type: string
+      description: Custom Prometheus labels
+    allowed_origins:
+      type: array
+      items:
+        type: string
+      description: Allowed CORS origins
+    initial_pool_size:
+      type: integer
+      description: Initial connection pool size
+    enable_logging:
+      type: boolean
+      description: Whether logging is enabled
+    disable_content_logging:
+      type: boolean
+      description: Whether content logging is disabled
+    enforce_auth_on_inference:
+      type: boolean
+      description: Whether to enforce virtual key authentication on inference requests
+    enforce_governance_header:
+      type: boolean
+      deprecated: true
+      description: "Deprecated: use enforce_auth_on_inference instead"
+    allow_direct_keys:
+      type: boolean
+      description: Whether to allow direct API keys
+    max_request_body_size_mb:
+      type: integer
+      description: Maximum request body size in MB
+    compat:
+      type: object
+      description: Compat plugin configuration
+      properties:
+        convert_text_to_chat:
+          type: boolean
+          description: Convert text completion requests to chat
+        convert_chat_to_responses:
+          type: boolean
+          description: Convert chat completion requests to responses
+        should_drop_params:
+          type: boolean
+          description: Drop unsupported parameters based on model catalog
+        should_convert_params:
+          type: boolean
+          default: false
+          description: Converts model parameter values that are not supported by the model
+      additionalProperties: false
+    log_retention_days:
+      type: integer
+      description: Number of days to retain logs
+    header_filter_config:
+      $ref: '#/HeaderFilterConfig'
+    mcp_agent_depth:
+      type: integer
+      description: Depth of MCP agent
+    mcp_tool_execution_timeout:
+      type: integer
+      description: Timeout for MCP tool execution in seconds
+    mcp_code_mode_binding_level:
+      type: string
+      description: Binding level for MCP code mode
+    required_headers:
+      type: array
+      items:
+        type: string
+      description: Headers that must be present on every request. Requests missing any of these headers are rejected with 400. Case-insensitive matching.
+    logging_headers:
+      type: array
+      items:
+        type: string
+      description: Headers to capture in log metadata. Values are extracted from incoming requests and stored in the metadata field of log entries. Case-insensitive matching. No restart required.
+
+FrameworkConfig:
+  type: object
+  description: Framework configuration
+  properties:
+    id:
+      type: integer
+      description: Unique identifier for the framework config
+    pricing_url:
+      type: string
+      description: URL for pricing data
+    pricing_sync_interval:
+      type: integer
+      format: int64
+      description: Pricing sync interval in seconds
+
+AuthConfig:
+  type: object
+  description: Authentication configuration
+  properties:
+    admin_username:
+      type: string
+    admin_password:
+      type: string
+      description: Password (redacted as <redacted> in responses)
+    is_enabled:
+      type: boolean
+    disable_auth_on_inference:
+      type: boolean
+
+HeaderFilterConfig:
+  type: object
+  description: Header filter configuration
+  properties:
+    allowlist:
+      type: array
+      items:
+        type: string
+    denylist:
+      type: array
+      items:
+        type: string
+
+ProxyConfig:
+  type: object
+  description: Global proxy configuration
+  properties:
+    enabled:
+      type: boolean
+    type:
+      type: string
+      enum: [http, socks5, tcp]
+    url:
+      type: string
+    username:
+      type: string
+    password:
+      type: string
+      description: Password (redacted as <redacted> in responses)
+    no_proxy:
+      type: string
+    timeout:
+      type: integer
+    skip_tls_verify:
+      type: boolean
+    enable_for_scim:
+      type: boolean
+    enable_for_inference:
+      type: boolean
+    enable_for_api:
+      type: boolean
+
+RestartRequiredConfig:
+  type: object
+  description: Restart required configuration
+  properties:
+    required:
+      type: boolean
+    reason:
+      type: string
+
+GetConfigResponse:
+  type: object
+  description: Configuration response
+  properties:
+    client_config:
+      $ref: '#/ClientConfig'
+    framework_config:
+      $ref: '#/FrameworkConfig'
+    auth_config:
+      $ref: '#/AuthConfig'
+    is_db_connected:
+      type: boolean
+    is_cache_connected:
+      type: boolean
+    is_logs_connected:
+      type: boolean
+    proxy_config:
+      $ref: '#/ProxyConfig'
+    restart_required:
+      $ref: '#/RestartRequiredConfig'
+
+UpdateConfigRequest:
+  type: object
+  description: Update configuration request
+  properties:
+    client_config:
+      $ref: '#/ClientConfig'
+    framework_config:
+      $ref: '#/FrameworkConfig'
+    auth_config:
+      $ref: '#/AuthConfig'
--- a/docs/openapi/schemas/management/governance.yaml
+++ b/docs/openapi/schemas/management/governance.yaml
--- a/docs/openapi/schemas/management/health.yaml
+++ b/docs/openapi/schemas/management/health.yaml
@@ -0,0 +1,15 @@
+# Health API schemas
+
+HealthResponse:
+  type: object
+  description: Health check response
+  properties:
+    status:
+      type: string
+      enum: [ok]
+      example: ok
+    components:
+      type: object
+      description: Health status of individual components (config_store, log_store, vector_store)
+      additionalProperties:
+        type: string
--- a/docs/openapi/schemas/management/logging.yaml
+++ b/docs/openapi/schemas/management/logging.yaml
@@ -0,0 +1,829 @@
+# Logging API schemas
+
+LogEntry:
+  type: object
+  description: Log entry
+  properties:
+    id:
+      type: string
+    parent_request_id:
+      type: string
+    provider:
+      type: string
+    model:
+      type: string
+    status:
+      type: string
+      enum: ["processing", "success", "error"]
+    object:
+      type: string
+    timestamp:
+      type: string
+      format: date-time
+    number_of_retries:
+      type: integer
+    fallback_index:
+      type: integer
+    latency:
+      type: number
+    cost:
+      type: number
+    selected_key_id:
+      type: string
+    selected_key_name:
+      type: string
+    virtual_key_id:
+      type: string
+    virtual_key_name:
+      type: string
+      nullable: true
+    routing_engines_used:
+      type: array
+      items:
+        type: string
+      description: Array of routing engines used for this request (routing-rule, governance, or loadbalancing)
+      nullable: true
+    routing_rule_id:
+      type: string
+      nullable: true
+    routing_rule_name:
+      type: string
+      nullable: true
+    stream:
+      type: boolean
+    raw_request:
+      type: string
+    raw_response:
+      type: string
+    created_at:
+      type: string
+      format: date-time
+    token_usage:
+      $ref: '../../schemas/inference/usage.yaml#/BifrostLLMUsage'
+    error_details:
+      $ref: '../../schemas/inference/common.yaml#/BifrostError'
+    input_history:
+      type: array
+      items:
+        $ref: '../../schemas/inference/chat.yaml#/ChatMessage'
+    responses_input_history:
+      type: array
+      items:
+        $ref: '../../schemas/inference/responses.yaml#/ResponsesMessage'
+    output_message:
+      $ref: '../../schemas/inference/chat.yaml#/ChatMessage'
+    responses_output:
+      type: array
+      items:
+        $ref: '../../schemas/inference/responses.yaml#/ResponsesMessage'
+    embedding_output:
+      type: array
+      items:
+        type: array
+        items:
+          type: number
+    params:
+      type: object
+      additionalProperties: true
+    tools:
+      type: array
+      items:
+        $ref: '../../schemas/inference/chat.yaml#/ChatTool'
+    tool_calls:
+      type: array
+      items:
+        $ref: '../../schemas/inference/chat.yaml#/ChatAssistantMessageToolCall'
+    speech_input:
+      type: object
+      additionalProperties: true
+    transcription_input:
+      type: object
+      additionalProperties: true
+    image_generation_input:
+      type: object
+      additionalProperties: true
+    speech_output:
+      type: object
+      additionalProperties: true
+    transcription_output:
+      type: object
+      additionalProperties: true
+    image_generation_output:
+      type: object
+      additionalProperties: true
+    cache_debug:
+      type: object
+      additionalProperties: true
+    metadata:
+      type: object
+      additionalProperties: true
+      description: Custom metadata captured from request headers (configured via logging_headers or x-bf-lh-* prefix)
+    selected_key:
+      type: object
+      additionalProperties: true
+    virtual_key:
+      type: object
+      additionalProperties: true
+    passthrough_request_body:
+      type: string
+      description: Raw passthrough request body (for passthrough integration routes)
+    passthrough_response_body:
+      type: string
+      description: Raw passthrough response body (for passthrough integration routes)
+    routing_engine_logs:
+      type: object
+      additionalProperties: true
+      description: Detailed logs from the routing engine decision process
+    is_large_payload_request:
+      type: boolean
+      description: Whether the request payload exceeded the large payload threshold
+    is_large_payload_response:
+      type: boolean
+      description: Whether the response payload exceeded the large payload threshold
+    rerank_output:
+      type: object
+      additionalProperties: true
+      description: Rerank operation output
+    video_generation_input:
+      type: object
+      additionalProperties: true
+      description: Video generation request input
+    video_generation_output:
+      type: object
+      additionalProperties: true
+      description: Video generation response output
+    video_retrieve_output:
+      type: object
+      additionalProperties: true
+      description: Video retrieve response output
+    video_list_output:
+      type: object
+      additionalProperties: true
+      description: Video list response output
+    video_delete_output:
+      type: object
+      additionalProperties: true
+      description: Video delete response output
+    video_download_output:
+      type: object
+      additionalProperties: true
+      description: Video download response output
+    list_models_output:
+      type: object
+      additionalProperties: true
+      description: List models response output
+
+MCPToolLogEntry:
+  type: object
+  description: MCP tool execution log entry
+  properties:
+    id:
+      type: string
+      description: Unique identifier for the log entry
+    llm_request_id:
+      type: string
+      description: Links to the LLM request that triggered this tool call
+    timestamp:
+      type: string
+      format: date-time
+      description: When the tool execution started
+    tool_name:
+      type: string
+      description: Name of the MCP tool that was executed
+    server_label:
+      type: string
+      description: Label of the MCP server that provided the tool
+    virtual_key_id:
+      type: string
+      description: ID of the virtual key used for this tool execution
+    virtual_key_name:
+      type: string
+      description: Name of the virtual key used for this tool execution
+    arguments:
+      type: object
+      additionalProperties: true
+      description: Tool execution arguments
+    result:
+      type: object
+      additionalProperties: true
+      description: Tool execution result
+    error_details:
+      $ref: '../../schemas/inference/common.yaml#/BifrostError'
+    latency:
+      type: number
+      description: Execution time in milliseconds
+    cost:
+      type: number
+      description: Cost in dollars for this tool execution
+    status:
+      type: string
+      enum: ["processing", "success", "error"]
+      description: Execution status
+    metadata:
+      type: object
+      additionalProperties: true
+      description: Custom metadata captured from request headers (configured via logging_headers or x-bf-lh-* prefix)
+    created_at:
+      type: string
+      format: date-time
+      description: When the log entry was created
+    virtual_key:
+      type: object
+      additionalProperties: true
+      description: Full virtual key object (populated when virtual_key_id is set)
+
+MCPToolLogSearchFilters:
+  type: object
+  description: MCP tool log search filters
+  properties:
+    tool_names:
+      type: array
+      items:
+        type: string
+      description: Filter by tool names
+    server_labels:
+      type: array
+      items:
+        type: string
+      description: Filter by server labels
+    status:
+      type: array
+      items:
+        type: string
+      description: Filter by execution status
+    llm_request_ids:
+      type: array
+      items:
+        type: string
+      description: Filter by linked LLM request IDs
+    start_time:
+      type: string
+      format: date-time
+      description: Filter by start time (RFC3339 format)
+    end_time:
+      type: string
+      format: date-time
+      description: Filter by end time (RFC3339 format)
+    min_latency:
+      type: number
+      description: Filter by minimum latency
+    max_latency:
+      type: number
+      description: Filter by maximum latency
+    content_search:
+      type: string
+      description: Search in tool arguments and results
+
+MCPToolLogStats:
+  type: object
+  description: MCP tool log statistics
+  properties:
+    total_executions:
+      type: integer
+      description: Total number of tool executions
+    success_rate:
+      type: number
+      description: Success rate percentage
+    average_latency:
+      type: number
+      description: Average execution latency in milliseconds
+    total_cost:
+      type: number
+      description: Total cost in dollars for all executions
+
+SearchMCPLogsResponse:
+  type: object
+  description: Search MCP logs response
+  properties:
+    logs:
+      type: array
+      items:
+        $ref: '#/MCPToolLogEntry'
+    pagination:
+      type: object
+      required:
+        - total_count
+      properties:
+        limit:
+          type: integer
+        offset:
+          type: integer
+        sort_by:
+          type: string
+        order:
+          type: string
+        total_count:
+          type: integer
+          format: int64
+          description: Total number of items matching the query
+    stats:
+      $ref: '#/MCPToolLogStats'
+    has_logs:
+      type: boolean
+      description: Whether any logs exist in the system
+
+MCPLogsFilterDataResponse:
+  type: object
+  description: Available MCP log filter data
+  properties:
+    tool_names:
+      type: array
+      items:
+        type: string
+      description: All unique tool names
+    server_labels:
+      type: array
+      items:
+        type: string
+      description: All unique server labels
+    virtual_keys:
+      type: array
+      items:
+        type: object
+        properties:
+          id:
+            type: string
+            description: Virtual key ID
+          name:
+            type: string
+            description: Virtual key name
+          value:
+            type: string
+            description: Virtual key value (redacted if applicable)
+      description: All unique virtual keys
+
+DeleteMCPLogsRequest:
+  type: object
+  description: Delete MCP logs request
+  required:
+    - ids
+  properties:
+    ids:
+      type: array
+      items:
+        type: string
+      description: Array of log IDs to delete
+
+SearchFilters:
+  type: object
+  description: Log search filters
+  properties:
+    providers:
+      type: array
+      items:
+        type: string
+    models:
+      type: array
+      items:
+        type: string
+    status:
+      type: array
+      items:
+        type: string
+    objects:
+      type: array
+      items:
+        type: string
+    selected_key_ids:
+      type: array
+      items:
+        type: string
+    virtual_key_ids:
+      type: array
+      items:
+        type: string
+    routing_rule_ids:
+      type: array
+      items:
+        type: string
+    routing_engine_used:
+      type: array
+      items:
+        type: string
+      description: Filter by routing engine (routing-rule, governance, or loadbalancing)
+    start_time:
+      type: string
+      format: date-time
+    end_time:
+      type: string
+      format: date-time
+    min_latency:
+      type: number
+    max_latency:
+      type: number
+    min_tokens:
+      type: integer
+    max_tokens:
+      type: integer
+    min_cost:
+      type: number
+    max_cost:
+      type: number
+    missing_cost_only:
+      type: boolean
+    content_search:
+      type: string
+
+SearchLogsResponse:
+  type: object
+  description: Search logs response
+  properties:
+    logs:
+      type: array
+      items:
+        $ref: '#/LogEntry'
+    pagination:
+      $ref: '#/PaginationOptions'
+    stats:
+      $ref: '#/LogStats'
+    has_logs:
+      type: boolean
+      description: Whether any logs exist in the system
+
+PaginationOptions:
+  type: object
+  description: Pagination metadata for list responses
+  properties:
+    limit:
+      type: integer
+    offset:
+      type: integer
+    sort_by:
+      type: string
+      enum: [timestamp, latency, tokens, cost]
+    order:
+      type: string
+      enum: [asc, desc]
+    total_count:
+      type: integer
+      format: int64
+      description: Total number of items matching the query
+
+LogStats:
+  type: object
+  description: Log statistics
+  properties:
+    total_requests:
+      type: integer
+    total_tokens:
+      type: integer
+    total_cost:
+      type: number
+    average_latency:
+      type: number
+    success_rate:
+      type: number
+
+DroppedRequestsResponse:
+  type: object
+  description: Dropped requests response
+  properties:
+    dropped_requests:
+      type: integer
+      format: int64
+
+FilterDataResponse:
+  type: object
+  description: Available filter data response
+  properties:
+    models:
+      type: array
+      items:
+        type: string
+    selected_keys:
+      type: array
+      items:
+        $ref: '../../schemas/management/providers.yaml#/Key'
+    virtual_keys:
+      type: array
+      items:
+        $ref: '../../schemas/management/governance.yaml#/VirtualKey'
+    routing_rules:
+      type: array
+      items:
+        $ref: '../../schemas/management/governance.yaml#/RoutingRule'
+      description: Available routing rules for filtering
+    routing_engines:
+      type: array
+      items:
+        type: string
+      description: Available routing engine types (routing-rule, governance, loadbalancing)
+
+DeleteLogsRequest:
+  type: object
+  description: Delete logs request
+  required:
+    - ids
+  properties:
+    ids:
+      type: array
+      items:
+        type: string
+
+RecalculateCostRequest:
+  type: object
+  description: Recalculate cost request
+  properties:
+    filters:
+      $ref: '#/SearchFilters'
+    limit:
+      type: integer
+      description: Maximum number of logs to process (default 200, max 1000)
+
+RecalculateCostResponse:
+  type: object
+  description: Recalculate cost response
+  properties:
+    total_matched:
+      type: integer
+    updated:
+      type: integer
+    skipped:
+      type: integer
+    remaining:
+      type: integer
+
+# Histogram schemas
+
+HistogramBucket:
+  type: object
+  description: Time-bucketed request count
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    count:
+      type: integer
+      format: int64
+    success:
+      type: integer
+      format: int64
+    error:
+      type: integer
+      format: int64
+
+HistogramResult:
+  type: object
+  description: Time-bucketed request count histogram
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/HistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+
+TokenHistogramBucket:
+  type: object
+  description: Time-bucketed token usage
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    prompt_tokens:
+      type: integer
+      format: int64
+    completion_tokens:
+      type: integer
+      format: int64
+    total_tokens:
+      type: integer
+      format: int64
+
+TokenHistogramResult:
+  type: object
+  description: Time-bucketed token usage histogram
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/TokenHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+
+CostHistogramBucket:
+  type: object
+  description: Time-bucketed cost data with model breakdown
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    total_cost:
+      type: number
+    by_model:
+      type: object
+      additionalProperties:
+        type: number
+      description: Cost breakdown by model name
+
+CostHistogramResult:
+  type: object
+  description: Time-bucketed cost histogram with model breakdown
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/CostHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+    models:
+      type: array
+      items:
+        type: string
+      description: List of models present in the histogram
+
+ModelUsageStats:
+  type: object
+  description: Usage statistics for a single model
+  properties:
+    total:
+      type: integer
+      format: int64
+    success:
+      type: integer
+      format: int64
+    error:
+      type: integer
+      format: int64
+
+ModelHistogramBucket:
+  type: object
+  description: Time-bucketed model usage with success/error breakdown
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    by_model:
+      type: object
+      additionalProperties:
+        $ref: '#/ModelUsageStats'
+      description: Usage breakdown by model name
+
+ModelHistogramResult:
+  type: object
+  description: Time-bucketed model usage histogram
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/ModelHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+    models:
+      type: array
+      items:
+        type: string
+
+LatencyHistogramBucket:
+  type: object
+  description: Time-bucketed latency percentiles
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    avg_latency:
+      type: number
+    p90_latency:
+      type: number
+    p95_latency:
+      type: number
+    p99_latency:
+      type: number
+    total_requests:
+      type: integer
+      format: int64
+
+LatencyHistogramResult:
+  type: object
+  description: Time-bucketed latency histogram
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/LatencyHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+
+ProviderCostHistogramBucket:
+  type: object
+  description: Time-bucketed cost data with provider breakdown
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    total_cost:
+      type: number
+    by_provider:
+      type: object
+      additionalProperties:
+        type: number
+      description: Cost breakdown by provider name
+
+ProviderCostHistogramResult:
+  type: object
+  description: Time-bucketed cost histogram with provider breakdown
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/ProviderCostHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+    providers:
+      type: array
+      items:
+        type: string
+
+ProviderTokenStats:
+  type: object
+  description: Token statistics for a single provider
+  properties:
+    prompt_tokens:
+      type: integer
+      format: int64
+    completion_tokens:
+      type: integer
+      format: int64
+    total_tokens:
+      type: integer
+      format: int64
+
+ProviderTokenHistogramBucket:
+  type: object
+  description: Time-bucketed token usage with provider breakdown
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    by_provider:
+      type: object
+      additionalProperties:
+        $ref: '#/ProviderTokenStats'
+      description: Token usage breakdown by provider name
+
+ProviderTokenHistogramResult:
+  type: object
+  description: Time-bucketed token histogram with provider breakdown
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/ProviderTokenHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+    providers:
+      type: array
+      items:
+        type: string
+
+ProviderLatencyStats:
+  type: object
+  description: Latency statistics for a single provider
+  properties:
+    avg_latency:
+      type: number
+    p90_latency:
+      type: number
+    p95_latency:
+      type: number
+    p99_latency:
+      type: number
+    total_requests:
+      type: integer
+      format: int64
+
+ProviderLatencyHistogramBucket:
+  type: object
+  description: Time-bucketed latency data with provider breakdown
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    by_provider:
+      type: object
+      additionalProperties:
+        $ref: '#/ProviderLatencyStats'
+      description: Latency breakdown by provider name
+
+ProviderLatencyHistogramResult:
+  type: object
+  description: Time-bucketed latency histogram with provider breakdown
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/ProviderLatencyHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+    providers:
+      type: array
+      items:
+        type: string
--- a/docs/openapi/schemas/management/mcp.yaml
+++ b/docs/openapi/schemas/management/mcp.yaml
@@ -0,0 +1,446 @@
+# MCP API schemas
+
+MCPAuthType:
+  type: string
+  enum: [none, headers, oauth, per_user_oauth]
+  description: |
+    Authentication type for MCP connections:
+    - none: No authentication
+    - headers: Header-based authentication (API keys, custom headers, etc.)
+    - oauth: OAuth 2.0 authentication (server-level, admin authenticates once)
+    - per_user_oauth: Per-user OAuth 2.0 authentication (each user authenticates individually)
+
+MCPConnectionType:
+  type: string
+  enum: [http, stdio, sse, inprocess]
+  description: Connection type for MCP client
+
+MCPConnectionState:
+  type: string
+  enum: [connected, disconnected, error]
+  description: Connection state of an MCP client
+
+MCPStdioConfig:
+  type: object
+  description: STDIO configuration for MCP client
+  properties:
+    command:
+      type: string
+      description: Executable command to run
+    args:
+      type: array
+      items:
+        type: string
+      description: Command line arguments
+    envs:
+      type: array
+      items:
+        type: string
+      description: Environment variables required
+
+MCPClientCreateRequest:
+  oneOf:
+    - $ref: '#/MCPClientCreateRequestHTTP'
+    - $ref: '#/MCPClientCreateRequestSSE'
+    - $ref: '#/MCPClientCreateRequestSTDIO'
+  discriminator:
+    propertyName: connection_type
+    mapping:
+      http: '#/MCPClientCreateRequestHTTP'
+      sse: '#/MCPClientCreateRequestSSE'
+      stdio: '#/MCPClientCreateRequestSTDIO'
+  description: |
+    MCP client configuration for creating a new client (tool_pricing not available at creation).
+    The schema varies based on connection_type:
+    - HTTP/SSE: connection_string is required
+    - STDIO: stdio_config is required
+    - InProcess: server instance must be provided programmatically (Go package only)
+
+MCPClientCreateRequestBase:
+  type: object
+  required:
+    - name
+    - connection_type
+  properties:
+    client_id:
+      type: string
+      description: Unique identifier for the MCP client (optional, auto-generated if not provided)
+    name:
+      type: string
+      description: Display name for the MCP client
+    is_code_mode_client:
+      type: boolean
+
+    is_ping_available:
+      type: boolean
+      default: true
+      description: |
+        Whether the MCP server supports ping for health checks.
+        If true, uses lightweight ping method for health checks.
+        If false, uses listTools method for health checks instead.
+    connection_type:
+      $ref: '#/MCPConnectionType'
+    auth_type:
+      $ref: '#/MCPAuthType'
+      description: Authentication type for the MCP connection
+    oauth_config_id:
+      type: string
+      description: |
+        OAuth config ID for OAuth authentication.
+        Set after OAuth flow is completed. References the oauth_configs table.
+        Only relevant when auth_type is "oauth".
+    headers:
+      type: object
+      additionalProperties:
+        type: string
+      description: |
+        Custom headers to include in requests.
+        Only used when auth_type is "headers".
+    oauth_config:
+      $ref: '../../schemas/management/oauth.yaml#/OAuthConfigRequest'
+      description: |
+        OAuth configuration for initiating OAuth flow.
+        Only include this when creating a client with auth_type "oauth".
+        This will trigger the OAuth flow and return an authorization URL.
+    tools_to_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        Include-only list for tools.
+        ["*"] => all tools are included
+        [] => no tools are included
+        ["tool1", "tool2"] => include only the specified tools
+    tools_to_auto_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        List of tools that can be auto-executed without user approval.
+        Must be a subset of tools_to_execute.
+        ["*"] => all executable tools can be auto-executed
+        [] => no tools are auto-executed
+        ["tool1", "tool2"] => only specified tools can be auto-executed
+    allow_on_all_virtual_keys:
+      type: boolean
+      default: false
+      description: |
+        When true, this MCP client's tools are available to all virtual keys by default,
+        without requiring an explicit virtual key assignment.
+        An explicit virtual key config always overrides this setting for that key.
+MCPClientCreateRequestHTTP:
+  allOf:
+    - $ref: '#/MCPClientCreateRequestBase'
+    - type: object
+      required:
+        - connection_string
+      properties:
+        connection_type:
+          type: string
+          enum: [http]
+        connection_string:
+          type: string
+          description: HTTP URL (required for HTTP connection type)
+
+MCPClientCreateRequestSSE:
+  allOf:
+    - $ref: '#/MCPClientCreateRequestBase'
+    - type: object
+      required:
+        - connection_string
+      properties:
+        connection_type:
+          type: string
+          enum: [sse]
+        connection_string:
+          type: string
+          description: SSE URL (required for SSE connection type)
+
+MCPClientCreateRequestSTDIO:
+  allOf:
+    - $ref: '#/MCPClientCreateRequestBase'
+    - type: object
+      required:
+        - stdio_config
+      properties:
+        connection_type:
+          type: string
+          enum: [stdio]
+        stdio_config:
+          $ref: '#/MCPStdioConfig'
+          description: STDIO configuration (required for STDIO connection type)
+
+MCPClientUpdateRequest:
+  type: object
+  description: MCP client configuration for updating an existing client (includes tool_pricing)
+  properties:
+    client_id:
+      type: string
+      description: Unique identifier for the MCP client
+    name:
+      type: string
+      description: Display name for the MCP client
+    is_code_mode_client:
+      type: boolean
+      description: Whether this client is available in code mode
+    connection_type:
+      $ref: '#/MCPConnectionType'
+    connection_string:
+      type: string
+      description: HTTP or SSE URL (required for HTTP or SSE connections)
+    stdio_config:
+      $ref: '#/MCPStdioConfig'
+    auth_type:
+      $ref: '#/MCPAuthType'
+      description: Authentication type for the MCP connection
+    oauth_config_id:
+      type: string
+      description: |
+        OAuth config ID for OAuth authentication.
+        References the oauth_configs table.
+        Only relevant when auth_type is "oauth".
+    headers:
+      type: object
+      additionalProperties:
+        type: string
+      description: |
+        Custom headers to include in requests.
+        Only used when auth_type is "headers".
+    tools_to_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        Include-only list for tools.
+        ["*"] => all tools are included
+        [] => no tools are included
+        ["tool1", "tool2"] => include only the specified tools
+    tools_to_auto_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        List of tools that can be auto-executed without user approval.
+        Must be a subset of tools_to_execute.
+        ["*"] => all executable tools can be auto-executed
+        [] => no tools are auto-executed
+        ["tool1", "tool2"] => only specified tools can be auto-executed
+    tool_pricing:
+      type: object
+      additionalProperties:
+        type: number
+        format: double
+      description: |
+        Per-tool cost in USD for execution.
+        Key is the tool name, value is the cost per execution.
+        Example: {"read_file": 0.001, "write_file": 0.002}
+        Note: Only available when updating an existing client after tools have been fetched.
+    allow_on_all_virtual_keys:
+      type: boolean
+      default: false
+      description: |
+        When true, this MCP client's tools are accessible to all virtual keys without requiring
+        explicit per-key assignment. All tools are allowed by default. If a virtual key has an
+        explicit MCP config for this client, that config takes precedence and overrides this behaviour.
+    vk_configs:
+      type: array
+      items:
+        $ref: '#/MCPVKConfig'
+      description: |
+        When provided, replaces all virtual key assignments for this MCP client.
+        Each entry specifies a virtual key and the tools it is allowed to call.
+        To remove all VK access, provide an empty array [].
+        Omit this field to leave existing VK assignments unchanged.
+
+MCPVKConfig:
+  type: object
+  description: Per-virtual-key tool access configuration for an MCP client
+  required:
+    - virtual_key_id
+    - tools_to_execute
+  properties:
+    virtual_key_id:
+      type: string
+      description: ID of the virtual key
+    tools_to_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        Tools this virtual key is allowed to call on this MCP server.
+        ["*"] => all tools allowed
+        ["tool1", "tool2"] => only the specified tools
+
+MCPClientConfig:
+  type: object
+  description: Full MCP client configuration (used in responses)
+  properties:
+    client_id:
+      type: string
+      description: Unique identifier for the MCP client
+    name:
+      type: string
+      description: Display name for the MCP client
+    is_code_mode_client:
+      type: boolean
+      description: Whether this client is available in code mode
+    connection_type:
+      $ref: '#/MCPConnectionType'
+    connection_string:
+      type: string
+      description: HTTP or SSE URL (required for HTTP or SSE connections)
+    stdio_config:
+      $ref: '#/MCPStdioConfig'
+    auth_type:
+      $ref: '#/MCPAuthType'
+      description: Authentication type for the MCP connection
+    oauth_config_id:
+      type: string
+      description: |
+        OAuth config ID for OAuth authentication.
+        References the oauth_configs table.
+        Only set when auth_type is "oauth".
+    headers:
+      type: object
+      additionalProperties:
+        type: string
+      description: |
+        Custom headers to include in requests.
+        Only used when auth_type is "headers".
+    tools_to_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        Include-only list for tools.
+        ["*"] => all tools are included
+        [] => no tools are included
+        ["tool1", "tool2"] => include only the specified tools
+    tools_to_auto_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        List of tools that can be auto-executed without user approval.
+        Must be a subset of tools_to_execute.
+        ["*"] => all executable tools can be auto-executed
+        [] => no tools are auto-executed
+        ["tool1", "tool2"] => only specified tools can be auto-executed
+    tool_pricing:
+      type: object
+      additionalProperties:
+        type: number
+        format: double
+      description: |
+        Per-tool cost in USD for execution.
+        Key is the tool name, value is the cost per execution.
+        Example: {"read_file": 0.001, "write_file": 0.002}
+    allow_on_all_virtual_keys:
+      type: boolean
+      default: false
+      description: |
+        When true, this MCP client's tools are accessible to all virtual keys without requiring
+        explicit per-key assignment. All tools are allowed by default. If a virtual key has an
+        explicit MCP config for this client, that config takes precedence and overrides this behaviour.
+
+ChatToolFunction:
+  type: object
+  description: Tool function definition
+  properties:
+    name:
+      type: string
+    description:
+      type: string
+    parameters:
+      type: object
+      additionalProperties: true
+    strict:
+      type: boolean
+
+MCPVKConfigResponse:
+  type: object
+  description: Per-virtual-key tool access configuration as returned in list/get responses
+  properties:
+    virtual_key_id:
+      type: string
+      description: ID of the virtual key
+    virtual_key_name:
+      type: string
+      description: Display name of the virtual key
+    tools_to_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        Tools this virtual key is allowed to call on this MCP client.
+        ["*"] => all tools allowed
+        ["tool1", "tool2"] => only the specified tools
+
+MCPClient:
+  type: object
+  description: Connected MCP client with its tools
+  properties:
+    config:
+      $ref: '#/MCPClientConfig'
+    tools:
+      type: array
+      items:
+        $ref: '#/ChatToolFunction'
+    state:
+      $ref: '#/MCPConnectionState'
+    vk_configs:
+      type: array
+      items:
+        $ref: '#/MCPVKConfigResponse'
+      description: Virtual key assignments for this MCP client
+
+ExecuteToolRequest:
+  oneOf:
+    - title: Chat (Default)
+      description: Chat format - uses ChatAssistantMessageToolCall schema
+      $ref: '../../schemas/inference/chat.yaml#/ChatAssistantMessageToolCall'
+    - title: Responses
+      description: Responses format - uses ResponsesToolMessage schema
+      $ref: '#/ResponsesToolMessage'
+  description: |
+    MCP tool execution request. The schema depends on the `format` query parameter:
+    - `format=chat` or empty (default): Use `ChatAssistantMessageToolCall` schema
+    - `format=responses`: Use `ResponsesToolMessage` schema
+
+ExecuteToolResponse:
+  oneOf:
+    - title: Chat (Default)
+      description: Chat format response
+      $ref: '../../schemas/inference/chat.yaml#/ChatMessage'
+    - title: Responses
+      description: Responses format response
+      $ref: '../../schemas/inference/responses.yaml#/ResponsesMessage'
+  description: |
+    MCP tool execution response.
+
+ResponsesToolMessage:
+  type: object
+  description: Tool message for Responses API format
+  required:
+    - name
+  properties:
+    call_id:
+      type: string
+      description: Common call ID for tool calls and outputs
+    name:
+      type: string
+      description: Tool function name (required for execution)
+    arguments:
+      type: string
+      description: Tool function arguments as JSON string
+    output:
+      type: object
+      description: Tool execution output
+      additionalProperties: true
+    action:
+      type: object
+      description: Tool action configuration
+      additionalProperties: true
+    error:
+      type: string
+      description: Error message if tool execution failed
--- a/docs/openapi/schemas/management/oauth.yaml
+++ b/docs/openapi/schemas/management/oauth.yaml
@@ -0,0 +1,305 @@
+# OAuth API schemas
+
+MCPAuthType:
+  type: string
+  enum: [none, headers, oauth, per_user_oauth]
+  description: |
+    Authentication type for MCP connections:
+    - none: No authentication
+    - headers: Header-based authentication (API keys, custom headers, etc.)
+    - oauth: OAuth 2.0 authentication (shared admin token)
+    - per_user_oauth: Per-user OAuth 2.1 (each end-user authenticates individually)
+
+OAuthConfigRequest:
+  type: object
+  description: OAuth configuration for MCP client creation
+  properties:
+    client_id:
+      type: string
+      description: |
+        OAuth client ID. Optional if client supports dynamic client registration (RFC 7591).
+        If not provided, the server_url must be set for OAuth discovery and dynamic registration.
+    client_secret:
+      type: string
+      description: |
+        OAuth client secret. Optional for public clients using PKCE or clients obtained via dynamic registration.
+    authorize_url:
+      type: string
+      description: |
+        OAuth authorization endpoint URL. Optional - will be discovered from server_url if not provided.
+    token_url:
+      type: string
+      description: |
+        OAuth token endpoint URL. Optional - will be discovered from server_url if not provided.
+    registration_url:
+      type: string
+      description: |
+        Dynamic client registration endpoint URL (RFC 7591). Optional - will be discovered from server_url if not provided.
+    scopes:
+      type: array
+      items:
+        type: string
+      description: |
+        OAuth scopes requested. Optional - can be discovered from server_url if not provided.
+        Example: ["read", "write"]
+
+OAuthFlowInitiation:
+  type: object
+  description: Response when initiating an OAuth flow
+  properties:
+    status:
+      type: string
+      enum: [pending_oauth]
+    message:
+      type: string
+    oauth_config_id:
+      type: string
+      description: ID of the OAuth config created for this flow
+    authorize_url:
+      type: string
+      description: URL to redirect the user to for authorization
+    expires_at:
+      type: string
+      format: date-time
+      description: When the OAuth authorization request expires
+    mcp_client_id:
+      type: string
+      description: The MCP client ID that initiated this OAuth flow
+
+OAuthConfigStatus:
+  type: object
+  description: Status of an OAuth configuration
+  properties:
+    id:
+      type: string
+      description: OAuth config ID
+    status:
+      type: string
+      enum: [pending, authorized, failed]
+      description: |
+        Current status of the OAuth flow:
+        - pending: User has not yet authorized
+        - authorized: User authorized and token is stored
+        - failed: Authorization failed
+    created_at:
+      type: string
+      format: date-time
+      description: When this OAuth config was created
+    expires_at:
+      type: string
+      format: date-time
+      description: When this OAuth config expires (becomes invalid if not completed)
+    token_id:
+      type: string
+      description: ID of the associated OAuth token (only present if status is authorized)
+    token_expires_at:
+      type: string
+      format: date-time
+      description: When the OAuth access token expires (only present if status is authorized)
+    token_scopes:
+      type: array
+      items:
+        type: string
+      description: Scopes granted in the OAuth token (only present if status is authorized)
+
+OAuthToken:
+  type: object
+  description: OAuth access and refresh tokens
+  properties:
+    id:
+      type: string
+      description: Unique token identifier
+    access_token:
+      type: string
+      description: OAuth access token
+    refresh_token:
+      type: string
+      description: OAuth refresh token for obtaining new access tokens
+    token_type:
+      type: string
+      description: Token type (typically "Bearer")
+    expires_at:
+      type: string
+      format: date-time
+      description: When the access token expires
+    scopes:
+      type: array
+      items:
+        type: string
+      description: Scopes granted in this token
+    last_refreshed_at:
+      type: string
+      format: date-time
+      description: When the token was last refreshed
+
+# Per-User OAuth 2.1 Authorization Server schemas
+
+PerUserOAuthClientRegistrationRequest:
+  type: object
+  description: |
+    Dynamic Client Registration request per RFC 7591.
+    MCP clients (Claude Code, Cursor, etc.) call this to obtain a client_id
+    before initiating the authorization flow.
+  required:
+    - redirect_uris
+  properties:
+    client_name:
+      type: string
+      description: Human-readable name of the client application
+      example: Claude Code
+    redirect_uris:
+      type: array
+      items:
+        type: string
+      description: List of allowed redirect URIs for this client
+      example: ["http://localhost:54321/callback"]
+    grant_types:
+      type: array
+      items:
+        type: string
+      description: Supported grant types. Defaults to ["authorization_code"]
+      example: ["authorization_code"]
+    response_types:
+      type: array
+      items:
+        type: string
+      description: Supported response types
+      example: ["code"]
+    token_endpoint_auth_method:
+      type: string
+      description: Token endpoint authentication method. Always "none" (public client)
+      example: none
+    scope:
+      type: string
+      description: Space-separated list of requested scopes
+      example: "mcp:read mcp:write"
+
+PerUserOAuthClientRegistrationResponse:
+  type: object
+  description: Dynamic Client Registration response per RFC 7591
+  properties:
+    client_id:
+      type: string
+      description: Issued client identifier
+      example: "550e8400-e29b-41d4-a716-446655440000"
+    client_name:
+      type: string
+      description: Human-readable name of the client application
+    redirect_uris:
+      type: array
+      items:
+        type: string
+      description: Registered redirect URIs
+    grant_types:
+      type: array
+      items:
+        type: string
+      description: Registered grant types
+    response_types:
+      type: array
+      items:
+        type: string
+      description: Registered response types
+    token_endpoint_auth_method:
+      type: string
+      description: Token endpoint authentication method (always "none")
+
+PerUserOAuthTokenResponse:
+  type: object
+  description: OAuth 2.1 token response from the token endpoint
+  properties:
+    access_token:
+      type: string
+      description: Bifrost-issued access token (24h TTL). Use as Bearer token on /mcp requests.
+    token_type:
+      type: string
+      description: Token type, always "Bearer"
+      example: Bearer
+    expires_in:
+      type: integer
+      description: Seconds until the access token expires (86400 for 24h)
+      example: 86400
+    scope:
+      type: string
+      description: Space-separated scopes granted
+
+ProtectedResourceMetadata:
+  type: object
+  description: |
+    OAuth 2.0 Protected Resource Metadata per RFC 9728.
+    Returned by /.well-known/oauth-protected-resource to tell MCP clients
+    which authorization server(s) protect the /mcp endpoint.
+  properties:
+    resource:
+      type: string
+      description: URL of the protected resource (Bifrost's /mcp endpoint)
+      example: "https://your-bifrost-domain.com/mcp"
+    authorization_servers:
+      type: array
+      items:
+        type: string
+      description: List of authorization server issuer URLs
+      example: ["https://your-bifrost-domain.com"]
+    scopes_supported:
+      type: array
+      items:
+        type: string
+      description: Scopes supported by this resource
+      example: ["mcp:read", "mcp:write"]
+    bearer_methods_supported:
+      type: array
+      items:
+        type: string
+      description: Supported methods for passing Bearer tokens
+      example: ["header"]
+
+AuthorizationServerMetadata:
+  type: object
+  description: |
+    OAuth 2.0 Authorization Server Metadata per RFC 8414.
+    Returned by /.well-known/oauth-authorization-server to let MCP clients
+    discover Bifrost's OAuth endpoints and capabilities.
+  properties:
+    issuer:
+      type: string
+      description: Authorization server issuer URL (Bifrost base URL)
+      example: "https://your-bifrost-domain.com"
+    authorization_endpoint:
+      type: string
+      description: Authorization endpoint URL
+      example: "https://your-bifrost-domain.com/api/oauth/per-user/authorize"
+    token_endpoint:
+      type: string
+      description: Token endpoint URL
+      example: "https://your-bifrost-domain.com/api/oauth/per-user/token"
+    registration_endpoint:
+      type: string
+      description: Dynamic client registration endpoint URL
+      example: "https://your-bifrost-domain.com/api/oauth/per-user/register"
+    response_types_supported:
+      type: array
+      items:
+        type: string
+      example: ["code"]
+    grant_types_supported:
+      type: array
+      items:
+        type: string
+      example: ["authorization_code"]
+    code_challenge_methods_supported:
+      type: array
+      items:
+        type: string
+      description: Supported PKCE methods (only S256)
+      example: ["S256"]
+    token_endpoint_auth_methods_supported:
+      type: array
+      items:
+        type: string
+      description: Supported token endpoint auth methods (public clients only)
+      example: ["none"]
+    scopes_supported:
+      type: array
+      items:
+        type: string
+      example: ["mcp:read", "mcp:write"]
--- a/docs/openapi/schemas/management/plugins.yaml
+++ b/docs/openapi/schemas/management/plugins.yaml
@@ -0,0 +1,131 @@
+# Plugins API schemas
+
+PluginStatus:
+  type: object
+  description: Plugin status information
+  properties:
+    name:
+      type: string
+      description: Display name of the plugin
+    status:
+      type: string
+      enum: [active, error, disabled, loading, uninitialized, unloaded, loaded]
+    logs:
+      type: array
+      items:
+        type: string
+    types:
+      type: array
+      description: Plugin types indicating which interfaces the plugin implements
+      items:
+        type: string
+        enum: [llm, mcp, http, observability]
+  example:
+    name: my_custom_plugin
+    status: active
+    logs:
+      - "plugin my_custom_plugin initialized successfully"
+    types:
+      - llm
+      - http
+
+Plugin:
+  type: object
+  description: Plugin configuration
+  properties:
+    id:
+      type: integer
+      description: Plugin ID (auto-generated)
+    name:
+      type: string
+      description: Display name of the plugin (from config)
+    actualName:
+      type: string
+      description: Actual plugin name from GetName() (used as map key in plugin status). Only populated for active plugins.
+    enabled:
+      type: boolean
+    config:
+      type: object
+      additionalProperties: true
+    isCustom:
+      type: boolean
+    path:
+      type: string
+    status:
+      $ref: '#/PluginStatus'
+      description: Current plugin status including types array (only populated for active plugins)
+    created_at:
+      type: string
+      format: date-time
+    version:
+      type: integer
+      format: int16
+    updated_at:
+      type: string
+      format: date-time
+    config_hash:
+      type: string
+  example:
+    name: my_custom_plugin
+    actualName: MyCustomPlugin
+    enabled: true
+    config:
+      api_key: "xxx"
+    isCustom: true
+    path: "/plugins/my_custom_plugin.so"
+    status:
+      name: my_custom_plugin
+      status: active
+      logs:
+        - "plugin my_custom_plugin initialized successfully"
+      types:
+        - llm
+        - http
+
+ListPluginsResponse:
+  type: object
+  description: List plugins response
+  properties:
+    plugins:
+      type: array
+      items:
+        $ref: '#/Plugin'
+    count:
+      type: integer
+
+CreatePluginRequest:
+  type: object
+  description: Create plugin request
+  required:
+    - name
+  properties:
+    name:
+      type: string
+    enabled:
+      type: boolean
+    config:
+      type: object
+      additionalProperties: true
+    path:
+      type: string
+
+UpdatePluginRequest:
+  type: object
+  description: Update plugin request
+  properties:
+    enabled:
+      type: boolean
+    config:
+      type: object
+      additionalProperties: true
+    path:
+      type: string
+
+PluginResponse:
+  type: object
+  description: Plugin operation response
+  properties:
+    message:
+      type: string
+    plugin:
+      $ref: '#/Plugin'
--- a/docs/openapi/schemas/management/prompts.yaml
+++ b/docs/openapi/schemas/management/prompts.yaml
@@ -0,0 +1,276 @@
+# Prompt Repository schemas
+
+Folder:
+  type: object
+  description: Prompt folder
+  properties:
+    id:
+      type: string
+      description: Unique folder ID (UUID)
+    name:
+      type: string
+    description:
+      type: string
+      nullable: true
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+    prompts_count:
+      type: integer
+      description: Number of prompts in this folder (virtual field)
+
+Prompt:
+  type: object
+  description: Prompt
+  properties:
+    id:
+      type: string
+      description: Unique prompt ID (UUID)
+    name:
+      type: string
+    folder_id:
+      type: string
+      nullable: true
+    folder:
+      $ref: '#/Folder'
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+    versions:
+      type: array
+      items:
+        $ref: '#/PromptVersion'
+    sessions:
+      type: array
+      items:
+        $ref: '#/PromptSession'
+    latest_version:
+      $ref: '#/PromptVersion'
+
+PromptVersion:
+  type: object
+  description: Prompt version (immutable snapshot)
+  properties:
+    id:
+      type: integer
+      description: Auto-increment version ID
+    prompt_id:
+      type: string
+    version_number:
+      type: integer
+    commit_message:
+      type: string
+    model_params:
+      type: object
+      additionalProperties: true
+      description: Model parameters (flexible JSON object)
+    provider:
+      type: string
+    model:
+      type: string
+    is_latest:
+      type: boolean
+    created_at:
+      type: string
+      format: date-time
+    messages:
+      type: array
+      items:
+        $ref: '#/PromptVersionMessage'
+
+PromptVersionMessage:
+  type: object
+  description: Message within a prompt version
+  properties:
+    id:
+      type: integer
+    order_index:
+      type: integer
+    message:
+      type: object
+      additionalProperties: true
+      description: Opaque message content (JSON)
+
+PromptSession:
+  type: object
+  description: Prompt playground session
+  properties:
+    id:
+      type: integer
+      description: Auto-increment session ID
+    prompt_id:
+      type: string
+    version_id:
+      type: integer
+      nullable: true
+      description: Version this session was forked from
+    name:
+      type: string
+    model_params:
+      type: object
+      additionalProperties: true
+      description: Model parameters (flexible JSON object)
+    provider:
+      type: string
+    model:
+      type: string
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+    messages:
+      type: array
+      items:
+        $ref: '#/PromptSessionMessage'
+
+PromptSessionMessage:
+  type: object
+  description: Message within a prompt session
+  properties:
+    id:
+      type: integer
+    order_index:
+      type: integer
+    message:
+      type: object
+      additionalProperties: true
+      description: Opaque message content (JSON)
+
+# Request schemas
+
+CreateFolderRequest:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+    description:
+      type: string
+
+UpdateFolderRequest:
+  type: object
+  properties:
+    name:
+      type: string
+    description:
+      type: string
+      nullable: true
+
+CreatePromptRequest:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+    folder_id:
+      type: string
+
+UpdatePromptRequest:
+  type: object
+  properties:
+    name:
+      type: string
+    folder_id:
+      type: string
+      nullable: true
+
+CreateVersionRequest:
+  type: object
+  required:
+    - commit_message
+    - messages
+    - model_params
+    - provider
+    - model
+  properties:
+    commit_message:
+      type: string
+    messages:
+      type: array
+      items:
+        type: object
+        additionalProperties: true
+      description: Array of message objects
+    model_params:
+      type: object
+      additionalProperties: true
+    provider:
+      type: string
+    model:
+      type: string
+
+CreateSessionRequest:
+  type: object
+  required:
+    - name
+    - model_params
+    - provider
+    - model
+  properties:
+    name:
+      type: string
+    version_id:
+      type: integer
+      description: Fork from this version
+    messages:
+      type: array
+      items:
+        type: object
+        additionalProperties: true
+    model_params:
+      type: object
+      additionalProperties: true
+    provider:
+      type: string
+    model:
+      type: string
+
+UpdateSessionRequest:
+  type: object
+  required:
+    - name
+    - messages
+    - model_params
+    - provider
+    - model
+  properties:
+    name:
+      type: string
+    messages:
+      type: array
+      items:
+        type: object
+        additionalProperties: true
+    model_params:
+      type: object
+      additionalProperties: true
+    provider:
+      type: string
+    model:
+      type: string
+
+RenameSessionRequest:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+
+CommitSessionRequest:
+  type: object
+  required:
+    - commit_message
+  properties:
+    commit_message:
+      type: string
--- a/docs/openapi/schemas/management/providers.yaml
+++ b/docs/openapi/schemas/management/providers.yaml
@@ -0,0 +1,491 @@
+# Providers API schemas
+
+ProviderStatus:
+  type: string
+  enum: [active, error, deleted]
+  description: Status of the provider
+
+NetworkConfig:
+  type: object
+  description: Network configuration for provider connections
+  properties:
+    base_url:
+      type: string
+      description: Base URL for the provider (optional)
+    extra_headers:
+      type: object
+      additionalProperties:
+        type: string
+      description: Additional headers to include in requests
+    default_request_timeout_in_seconds:
+      type: integer
+      description: Default timeout for requests
+    max_retries:
+      type: integer
+      description: Maximum number of retries
+    retry_backoff_initial:
+      type: integer
+      format: int64
+      description: Initial backoff duration in milliseconds
+    retry_backoff_max:
+      type: integer
+      format: int64
+      description: Maximum backoff duration in milliseconds
+    insecure_skip_verify:
+      type: boolean
+      description: Disable TLS certificate verification for provider connections. This bypasses server certificate validation and should be used only as a last resort when a trusted CA chain cannot be configured. Prefer ca_cert_pem for self-signed or private CA deployments.
+    ca_cert_pem:
+      type: string
+      description: PEM-encoded CA certificate to trust for provider endpoint connections (e.g. self-signed or internal CA)
+
+ConcurrencyAndBufferSize:
+  type: object
+  description: Concurrency settings
+  properties:
+    concurrency:
+      type: integer
+      description: Number of concurrent operations
+    buffer_size:
+      type: integer
+      description: Size of the buffer
+
+ProxyConfig:
+  type: object
+  description: Proxy configuration
+  properties:
+    type:
+      type: string
+      enum: [none, http, socks5, environment]
+    url:
+      type: string
+    username:
+      type: string
+    password:
+      type: string
+    ca_cert_pem:
+      type: string
+
+AzureKeyConfig:
+  type: object
+  description: Azure-specific key configuration
+  properties:
+    endpoint:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    api_version:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    client_id:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    client_secret:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    tenant_id:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    scopes:
+      type: array
+      items:
+        type: string
+      description: List of scopes to use for authentication
+
+VertexKeyConfig:
+  type: object
+  description: Vertex-specific key configuration
+  properties:
+    project_id:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    project_number:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    region:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    auth_credentials:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+
+BedrockKeyConfig:
+  type: object
+  description: AWS Bedrock-specific key configuration
+  properties:
+    access_key:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    secret_key:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    session_token:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    region:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    arn:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    batch_s3_config:
+      type: object
+      properties:
+        buckets:
+          type: array
+          items:
+            type: object
+            properties:
+              bucket_name:
+                type: string
+              prefix:
+                type: string
+              is_default:
+                type: boolean
+
+VllmKeyConfig:
+  type: object
+  description: VLLM-specific key configuration
+  properties:
+    url:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    model_name:
+      type: string
+  required:
+    - url
+
+OllamaKeyConfig:
+  type: object
+  description: Ollama-specific key configuration
+  properties:
+    url:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+  required:
+    - url
+
+ReplicateKeyConfig:
+  type: object
+  description: Replicate-specific key configuration
+  properties:
+    use_deployments_endpoint:
+      type: boolean
+      description: Whether to use the deployments endpoint instead of the models endpoint
+
+SglKeyConfig:
+  type: object
+  description: SGLang-specific key configuration
+  properties:
+    url:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+  required:
+    - url
+
+VLLMKeyConfig:
+  type: object
+  description: vLLM-specific key configuration for per-key routing to different vLLM instances
+  properties:
+    url:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+      description: vLLM server base URL (required)
+    model_name:
+      type: string
+      description: Exact model name served on this vLLM instance
+
+VLLMKeyConfig:
+  type: object
+  description: vLLM-specific key configuration for per-key routing to different vLLM instances
+  properties:
+    url:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+      description: vLLM server base URL (required)
+    model_name:
+      type: string
+      description: Exact model name served on this vLLM instance
+
+Key:
+  type: object
+  description: API key configuration
+  properties:
+    id:
+      type: string
+      description: Unique identifier for the key
+    name:
+      type: string
+      description: Name of the key
+    value:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+      description: API key value (redacted in responses)
+    models:
+      type: array
+      items:
+        type: string
+      description: List of models this key can access (whitelist)
+    blacklisted_models:
+      type: array
+      items:
+        type: string
+      description: List of models this key cannot access (blacklist)
+    weight:
+      type: number
+      description: Weight for load balancing
+    aliases:
+      type: object
+      propertyNames:
+        minLength: 1
+      additionalProperties:
+        type: string
+        minLength: 1
+      description: Model alias mappings — maps a user-facing model name to a provider-specific identifier (deployment name, inference profile ID, fine-tuned model ID, etc.)
+    azure_key_config:
+      $ref: '#/AzureKeyConfig'
+    vertex_key_config:
+      $ref: '#/VertexKeyConfig'
+    bedrock_key_config:
+      $ref: '#/BedrockKeyConfig'
+    vllm_key_config:
+      $ref: '#/VllmKeyConfig'
+    ollama_key_config:
+      $ref: '#/OllamaKeyConfig'
+    sgl_key_config:
+      $ref: '#/SglKeyConfig'
+    replicate_key_config:
+      $ref: '#/ReplicateKeyConfig'
+    enabled:
+      type: boolean
+      description: Whether the key is active (defaults to true)
+    use_for_batch_api:
+      type: boolean
+      description: Whether this key can be used for batch API operations
+    config_hash:
+      type: string
+      description: Hash of config.json version, used for change detection
+    status:
+      type: string
+      description: Status of key (e.g., success, list_models_failed)
+    description:
+      type: string
+      description: Error or status description for the key
+
+AllowedRequests:
+  type: object
+  description: Allowed request types for custom providers
+  properties:
+    list_models:
+      type: boolean
+    text_completion:
+      type: boolean
+    text_completion_stream:
+      type: boolean
+    chat_completion:
+      type: boolean
+    chat_completion_stream:
+      type: boolean
+    responses:
+      type: boolean
+    responses_stream:
+      type: boolean
+    count_tokens:
+      type: boolean
+    embedding:
+      type: boolean
+    speech:
+      type: boolean
+    speech_stream:
+      type: boolean
+    transcription:
+      type: boolean
+    transcription_stream:
+      type: boolean
+    image_generation:
+      type: boolean
+    image_generation_stream:
+      type: boolean
+    batch_create:
+      type: boolean
+    batch_list:
+      type: boolean
+    batch_retrieve:
+      type: boolean
+    batch_cancel:
+      type: boolean
+    batch_results:
+      type: boolean
+    file_upload:
+      type: boolean
+    file_list:
+      type: boolean
+    file_retrieve:
+      type: boolean
+    file_delete:
+      type: boolean
+    file_content:
+      type: boolean
+
+CustomProviderConfig:
+  type: object
+  description: Custom provider configuration
+  properties:
+    is_key_less:
+      type: boolean
+    base_provider_type:
+      $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    allowed_requests:
+      $ref: '#/AllowedRequests'
+    request_path_overrides:
+      type: object
+      additionalProperties:
+        type: string
+
+ProviderResponse:
+  type: object
+  description: Provider configuration response
+  properties:
+    name:
+      $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    network_config:
+      $ref: '#/NetworkConfig'
+    concurrency_and_buffer_size:
+      $ref: '#/ConcurrencyAndBufferSize'
+    proxy_config:
+      $ref: '#/ProxyConfig'
+    send_back_raw_request:
+      type: boolean
+    send_back_raw_response:
+      type: boolean
+    store_raw_request_response:
+      type: boolean
+    custom_provider_config:
+      $ref: '#/CustomProviderConfig'
+    provider_status:
+      $ref: '#/ProviderStatus'
+    status:
+      type: string
+      description: Operational status (e.g., list_models_failed)
+    description:
+      type: string
+      description: Error/status description
+    config_hash:
+      type: string
+      description: Hash of config.json version, used for change detection
+
+ListProvidersResponse:
+  type: object
+  description: List providers response
+  properties:
+    providers:
+      type: array
+      items:
+        $ref: '#/ProviderResponse'
+    total:
+      type: integer
+
+AddProviderRequest:
+  type: object
+  description: Add provider request. Keys are managed separately via /api/providers/{provider}/keys.
+  required:
+    - provider
+  properties:
+    provider:
+      $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    network_config:
+      $ref: '#/NetworkConfig'
+    concurrency_and_buffer_size:
+      $ref: '#/ConcurrencyAndBufferSize'
+    proxy_config:
+      $ref: '#/ProxyConfig'
+    send_back_raw_request:
+      type: boolean
+    send_back_raw_response:
+      type: boolean
+    store_raw_request_response:
+      type: boolean
+    custom_provider_config:
+      $ref: '#/CustomProviderConfig'
+
+UpdateProviderRequest:
+  type: object
+  description: Update provider request. Keys are managed separately via /api/providers/{provider}/keys.
+  properties:
+    network_config:
+      $ref: '#/NetworkConfig'
+    concurrency_and_buffer_size:
+      $ref: '#/ConcurrencyAndBufferSize'
+    proxy_config:
+      $ref: '#/ProxyConfig'
+    send_back_raw_request:
+      type: boolean
+    send_back_raw_response:
+      type: boolean
+    store_raw_request_response:
+      type: boolean
+    custom_provider_config:
+      $ref: '#/CustomProviderConfig'
+
+ListProviderKeysResponse:
+  type: object
+  description: Response for listing keys for a provider
+  properties:
+    keys:
+      type: array
+      items:
+        $ref: '#/Key'
+    total:
+      type: integer
+
+ModelResponse:
+  type: object
+  description: Model information
+  properties:
+    name:
+      type: string
+    provider:
+      type: string
+    accessible_by_keys:
+      type: array
+      items:
+        type: string
+
+Architecture:
+  type: object
+  properties:
+    modality:
+      type: string
+    tokenizer:
+      type: string
+    instruct_type:
+      type: string
+    input_modalities:
+      type: array
+      items:
+        type: string
+    output_modalities:
+      type: array
+      items:
+        type: string
+
+ModelDetailsResponse:
+  type: object
+  description: Model details with capability metadata
+  properties:
+    name:
+      type: string
+    provider:
+      type: string
+    context_length:
+      type: integer
+    max_input_tokens:
+      type: integer
+    max_output_tokens:
+      type: integer
+    architecture:
+      $ref: '#/Architecture'
+    accessible_by_keys:
+      type: array
+      items:
+        type: string
+
+ListModelsResponse:
+  type: object
+  description: List models response
+  properties:
+    models:
+      type: array
+      items:
+        $ref: '#/ModelResponse'
+    total:
+      type: integer
+
+ListModelDetailsResponse:
+  type: object
+  description: List model details response
+  properties:
+    models:
+      type: array
+      items:
+        $ref: '#/ModelDetailsResponse'
+    total:
+      type: integer
--- a/docs/openapi/schemas/management/session.yaml
+++ b/docs/openapi/schemas/management/session.yaml
@@ -0,0 +1,41 @@
+# Session API schemas
+
+LoginRequest:
+  type: object
+  description: Login request
+  required:
+    - username
+    - password
+  properties:
+    username:
+      type: string
+    password:
+      type: string
+
+LoginResponse:
+  type: object
+  description: Login response
+  properties:
+    message:
+      type: string
+      example: Login successful
+    token:
+      type: string
+      description: Session token
+
+IsAuthEnabledResponse:
+  type: object
+  description: Auth enabled status response
+  properties:
+    is_auth_enabled:
+      type: boolean
+    has_valid_token:
+      type: boolean
+
+LogoutResponse:
+  type: object
+  description: Logout response
+  properties:
+    message:
+      type: string
+      example: Logout successful
--- a/docs/openapi/schemas/management/users.yaml
+++ b/docs/openapi/schemas/management/users.yaml
@@ -0,0 +1,295 @@
+UserObject:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Unique user identifier
+    name:
+      type: string
+      description: User's display name
+    email:
+      type: string
+      format: email
+      description: User's email address
+    role_id:
+      type: integer
+      nullable: true
+      description: ID of the assigned RBAC role
+    role:
+      type: object
+      nullable: true
+      description: RBAC role details
+      properties:
+        id:
+          type: integer
+        name:
+          type: string
+        description:
+          type: string
+        is_system_role:
+          type: boolean
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+    teams:
+      type: array
+      description: Teams the user belongs to.
+      items:
+        $ref: '#/UserTeamSummaryEntry'
+    access_profile:
+      $ref: '#/AccessProfile'
+
+CreateUserRequest:
+  type: object
+  required:
+    - name
+    - email
+  properties:
+    name:
+      type: string
+      description: User's display name
+    email:
+      type: string
+      format: email
+      pattern: '^[^\s@]+@[^\s@]+\.[^\s@]+$'
+      description: User's email address (must be unique)
+    role_id:
+      type: integer
+      description: Optional RBAC role ID to assign
+
+UserResponse:
+  type: object
+  properties:
+    user:
+      $ref: '#/UserObject'
+
+ListUsersResponse:
+  type: object
+  properties:
+    users:
+      type: array
+      items:
+        $ref: '#/UserObject'
+    total:
+      type: integer
+      description: Total number of users matching the query
+    page:
+      type: integer
+      description: Current page number
+    limit:
+      type: integer
+      description: Number of users per page
+    total_pages:
+      type: integer
+      description: Total number of pages
+    has_more:
+      type: boolean
+      description: Whether more pages are available
+
+# ---- User Permissions ----
+
+PermissionsResponse:
+  type: object
+  properties:
+    permissions:
+      type: object
+      description: >
+        Map of resource names to their permitted operations.
+        When SCIM is disabled, returns full permissions for all resources.
+      additionalProperties:
+        type: object
+        additionalProperties:
+          type: boolean
+
+# ---- User Role ----
+
+AssignUserRoleRequest:
+  type: object
+  required:
+    - role_id
+  properties:
+    role_id:
+      type: integer
+      description: ID of the RBAC role to assign
+
+# ---- User Teams ----
+
+UserTeamSummaryEntry:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Team ID
+    name:
+      type: string
+      description: Team name
+    business_unit_id:
+      type: string
+      nullable: true
+      description: Business unit ID associated with this team (if any)
+    business_unit_name:
+      type: string
+      nullable: true
+      description: Business unit name associated with this team (if any)
+
+UserTeamEntry:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Team ID
+    name:
+      type: string
+      description: Team name
+    source:
+      type: string
+      description: How the user was added to this team (e.g. "manual", "scim_sync")
+
+AccessProfile:
+  type: object
+  nullable: true
+  description: Active or fallback user access profile, if assigned.
+  properties:
+    id:
+      type: integer
+    user_id:
+      type: string
+    parent_profile_id:
+      type: integer
+      nullable: true
+    name:
+      type: string
+    is_active:
+      type: boolean
+    expires_at:
+      type: string
+      format: date-time
+      nullable: true
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+
+UserTeamsResponse:
+  type: object
+  properties:
+    teams:
+      type: array
+      items:
+        $ref: '#/UserTeamEntry'
+
+UpdateUserTeamsRequest:
+  type: object
+  required:
+    - team_ids
+  properties:
+    team_ids:
+      type: array
+      items:
+        type: string
+      description: List of team IDs to assign (replaces existing manual assignments; synced memberships are preserved)
+
+# ---- Teams ----
+
+TeamObject:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Team ID (derived from name)
+    name:
+      type: string
+      description: Team name
+    member_count:
+      type: integer
+      description: Number of members in the team
+    virtual_key_count:
+      type: integer
+      description: Number of virtual keys assigned to the team
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+
+CreateTeamRequest:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+      description: Team name (must be unique)
+
+UpdateTeamRequest:
+  type: object
+  properties:
+    description:
+      type: string
+      description: Updated team description
+
+CreateTeamResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    name:
+      type: string
+
+ListTeamsResponse:
+  type: object
+  properties:
+    teams:
+      type: array
+      items:
+        $ref: '#/TeamObject'
+    total:
+      type: integer
+    page:
+      type: integer
+    limit:
+      type: integer
+    total_pages:
+      type: integer
+      description: Total number of pages
+    has_more:
+      type: boolean
+      description: Whether more pages are available
+
+# ---- Team Members ----
+
+TeamMemberObject:
+  type: object
+  properties:
+    user_id:
+      type: string
+    user_name:
+      type: string
+    user_email:
+      type: string
+    source:
+      type: string
+      description: How the member was added (e.g. "manual", "scim_sync")
+
+TeamMembersResponse:
+  type: object
+  properties:
+    members:
+      type: array
+      items:
+        $ref: '#/TeamMemberObject'
+
+AddTeamMemberRequest:
+  type: object
+  required:
+    - user_id
+  properties:
+    user_id:
+      type: string
+      description: ID of the user to add to the team