first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/docs/openapi/schemas/inference/async.yaml
+++ b/docs/openapi/schemas/inference/async.yaml
@@ -0,0 +1,43 @@
+# Async Job schemas
+
+AsyncJobStatus:
+  type: string
+  description: The status of an async job
+  enum:
+    - pending
+    - processing
+    - completed
+    - failed
+
+AsyncJobResponse:
+  type: object
+  description: Response returned when creating or polling an async job
+  required:
+    - id
+    - status
+    - created_at
+  properties:
+    id:
+      type: string
+      description: Unique identifier for the async job
+    status:
+      $ref: '#/AsyncJobStatus'
+    expires_at:
+      type: string
+      format: date-time
+      description: When the job result expires and will be cleaned up
+    created_at:
+      type: string
+      format: date-time
+      description: When the job was created
+    completed_at:
+      type: string
+      format: date-time
+      description: When the job completed (successfully or with failure)
+    status_code:
+      type: integer
+      description: HTTP status code of the completed operation
+    result:
+      description: The result of the completed operation (shape depends on the request type)
+    error:
+      $ref: './common.yaml#/BifrostError'
--- a/docs/openapi/schemas/inference/batch.yaml
+++ b/docs/openapi/schemas/inference/batch.yaml
@@ -0,0 +1,309 @@
+# Batch API schemas
+
+BatchStatus:
+  type: string
+  enum:
+    - validating
+    - failed
+    - in_progress
+    - finalizing
+    - completed
+    - expired
+    - cancelling
+    - canceled
+    - ended
+
+BatchEndpoint:
+  type: string
+  enum:
+    - /v1/chat/completions
+    - /v1/embeddings
+    - /v1/completions
+    - /v1/responses
+    - /v1/messages
+
+BatchCreateRequest:
+  type: object
+  required:
+    - model
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    input_file_id:
+      type: string
+      description: OpenAI-style file ID
+    requests:
+      type: array
+      items:
+        $ref: '#/BatchRequestItem'
+      description: Anthropic-style inline requests
+    endpoint:
+      $ref: '#/BatchEndpoint'
+    completion_window:
+      type: string
+      description: e.g., "24h"
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+
+BatchRequestItem:
+  type: object
+  required:
+    - custom_id
+  properties:
+    custom_id:
+      type: string
+    method:
+      type: string
+    url:
+      type: string
+    body:
+      type: object
+    params:
+      type: object
+
+BatchCreateResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    endpoint:
+      type: string
+    input_file_id:
+      type: string
+    completion_window:
+      type: string
+    status:
+      $ref: '#/BatchStatus'
+    request_counts:
+      $ref: '#/BatchRequestCounts'
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+    created_at:
+      type: integer
+      format: int64
+    expires_at:
+      type: integer
+      format: int64
+    output_file_id:
+      type: string
+    error_file_id:
+      type: string
+    processing_status:
+      type: string
+    results_url:
+      type: string
+    operation_name:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+BatchRequestCounts:
+  type: object
+  properties:
+    total:
+      type: integer
+    completed:
+      type: integer
+    failed:
+      type: integer
+    succeeded:
+      type: integer
+    expired:
+      type: integer
+    canceled:
+      type: integer
+    pending:
+      type: integer
+
+BatchListResponse:
+  type: object
+  properties:
+    object:
+      type: string
+    data:
+      type: array
+      items:
+        $ref: '#/BatchRetrieveResponse'
+    first_id:
+      type: string
+    last_id:
+      type: string
+    has_more:
+      type: boolean
+    next_cursor:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+BatchRetrieveResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    endpoint:
+      type: string
+    input_file_id:
+      type: string
+    completion_window:
+      type: string
+    status:
+      $ref: '#/BatchStatus'
+    request_counts:
+      $ref: '#/BatchRequestCounts'
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+    created_at:
+      type: integer
+      format: int64
+    expires_at:
+      type: integer
+      format: int64
+    in_progress_at:
+      type: integer
+      format: int64
+    finalizing_at:
+      type: integer
+      format: int64
+    completed_at:
+      type: integer
+      format: int64
+    failed_at:
+      type: integer
+      format: int64
+    expired_at:
+      type: integer
+      format: int64
+    cancelling_at:
+      type: integer
+      format: int64
+    cancelled_at:
+      type: integer
+      format: int64
+    output_file_id:
+      type: string
+    error_file_id:
+      type: string
+    errors:
+      $ref: '#/BatchErrors'
+    processing_status:
+      type: string
+    results_url:
+      type: string
+    archived_at:
+      type: integer
+      format: int64
+    operation_name:
+      type: string
+    done:
+      type: boolean
+    progress:
+      type: integer
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+BatchErrors:
+  type: object
+  properties:
+    object:
+      type: string
+    data:
+      type: array
+      items:
+        $ref: '#/BatchError'
+
+BatchError:
+  type: object
+  properties:
+    code:
+      type: string
+    message:
+      type: string
+    param:
+      type: string
+    line:
+      type: integer
+
+BatchCancelResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    status:
+      $ref: '#/BatchStatus'
+    request_counts:
+      $ref: '#/BatchRequestCounts'
+    cancelling_at:
+      type: integer
+      format: int64
+    cancelled_at:
+      type: integer
+      format: int64
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+BatchResultsResponse:
+  type: object
+  properties:
+    batch_id:
+      type: string
+    results:
+      type: array
+      items:
+        $ref: '#/BatchResultItem'
+    has_more:
+      type: boolean
+    next_cursor:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+BatchResultItem:
+  type: object
+  properties:
+    custom_id:
+      type: string
+    response:
+      $ref: '#/BatchResultResponse'
+    result:
+      $ref: '#/BatchResultData'
+    error:
+      $ref: '#/BatchResultError'
+
+BatchResultResponse:
+  type: object
+  properties:
+    status_code:
+      type: integer
+    request_id:
+      type: string
+    body:
+      type: object
+
+BatchResultData:
+  type: object
+  properties:
+    type:
+      type: string
+    message:
+      type: object
+
+BatchResultError:
+  type: object
+  properties:
+    code:
+      type: string
+    message:
+      type: string
--- a/docs/openapi/schemas/inference/chat.yaml
+++ b/docs/openapi/schemas/inference/chat.yaml
@@ -0,0 +1,673 @@
+# Chat Completions API schemas
+
+ChatCompletionRequest:
+  type: object
+  required:
+    - model
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format (e.g., openai/gpt-4)
+      example: openai/gpt-4
+    messages:
+      type: array
+      items:
+        $ref: '#/ChatMessage'
+      description: List of messages in the conversation
+    fallbacks:
+      type: array
+      items:
+        type: string
+      description: Fallback models in provider/model format
+    stream:
+      type: boolean
+      description: Whether to stream the response
+    frequency_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    logit_bias:
+      type: object
+      additionalProperties:
+        type: number
+    logprobs:
+      type: boolean
+    max_completion_tokens:
+      type: integer
+    metadata:
+      type: object
+      additionalProperties: true
+    modalities:
+      type: array
+      items:
+        type: string
+    parallel_tool_calls:
+      type: boolean
+    presence_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    prompt_cache_key:
+      type: string
+    reasoning:
+      $ref: '#/ChatReasoning'
+    response_format:
+      type: object
+      description: Format for the response
+    safety_identifier:
+      type: string
+    service_tier:
+      type: string
+    stream_options:
+      $ref: '#/ChatStreamOptions'
+    store:
+      type: boolean
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 2
+    tool_choice:
+      $ref: '#/ChatToolChoice'
+    tools:
+      type: array
+      items:
+        $ref: '#/ChatTool'
+    seed:
+      type: integer
+      description: Deterministic sampling seed
+    top_p:
+      type: number
+      minimum: 0
+      maximum: 1
+      description: Nucleus sampling parameter
+    top_logprobs:
+      type: integer
+      minimum: 0
+      maximum: 20
+      description: Number of most likely tokens to return at each position
+    stop:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+      description: Up to 4 sequences where the API will stop generating tokens
+    prediction:
+      $ref: '#/ChatPrediction'
+    prompt_cache_retention:
+      type: string
+      enum: [in-memory, 24h]
+      description: Prompt cache retention policy
+    web_search_options:
+      $ref: '#/ChatWebSearchOptions'
+    truncation:
+      type: string
+    user:
+      type: string
+    verbosity:
+      type: string
+      enum: [low, medium, high]
+
+ChatMessage:
+  type: object
+  required:
+    - role
+  properties:
+    role:
+      $ref: '#/ChatMessageRole'
+    name:
+      type: string
+    content:
+      $ref: '#/ChatMessageContent'
+    tool_call_id:
+      type: string
+      description: For tool messages
+    refusal:
+      type: string
+    audio:
+      $ref: '#/ChatAudioMessageAudio'
+    reasoning:
+      type: string
+    reasoning_details:
+      type: array
+      items:
+        $ref: '#/ChatReasoningDetails'
+    annotations:
+      type: array
+      items:
+        $ref: '#/ChatAssistantMessageAnnotation'
+    tool_calls:
+      type: array
+      items:
+        $ref: '#/ChatAssistantMessageToolCall'
+
+ChatMessageRole:
+  type: string
+  enum:
+    - assistant
+    - user
+    - system
+    - tool
+    - developer
+
+ChatMessageContent:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/ChatContentBlock'
+  description: Message content - can be a string or array of content blocks
+
+ChatContentBlock:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [text, image_url, input_audio, file, refusal]
+    text:
+      type: string
+    refusal:
+      type: string
+    image_url:
+      $ref: '#/ChatInputImage'
+    input_audio:
+      $ref: '#/ChatInputAudio'
+    file:
+      $ref: '#/ChatInputFile'
+    cache_control:
+      $ref: './common.yaml#/CacheControl'
+
+ChatInputImage:
+  type: object
+  required:
+    - url
+  properties:
+    url:
+      type: string
+    detail:
+      type: string
+      enum: [low, high, auto]
+
+ChatInputAudio:
+  type: object
+  required:
+    - data
+  properties:
+    data:
+      type: string
+    format:
+      type: string
+
+ChatInputFile:
+  type: object
+  properties:
+    file_data:
+      type: string
+    file_id:
+      type: string
+    filename:
+      type: string
+    file_type:
+      type: string
+
+ChatReasoning:
+  type: object
+  properties:
+    effort:
+      type: string
+      description: Reasoning effort level
+      enum: [none, minimal, low, medium, high, xhigh]
+    max_tokens:
+      type: integer
+
+ChatStreamOptions:
+  type: object
+  properties:
+    include_obfuscation:
+      type: boolean
+    include_usage:
+      type: boolean
+
+ChatToolChoice:
+  oneOf:
+    - type: string
+      enum: [none, auto, required]
+    - $ref: '#/ChatToolChoiceStruct'
+
+ChatToolChoiceStruct:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [none, any, required, function, allowed_tools, custom]
+    function:
+      $ref: '#/ChatToolChoiceFunction'
+    allowed_tools:
+      $ref: '#/ChatToolChoiceAllowedTools'
+
+ChatToolChoiceFunction:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+
+ChatToolChoiceAllowedTools:
+  type: object
+  properties:
+    mode:
+      type: string
+      enum: [auto, required]
+    tools:
+      type: array
+      items:
+        $ref: '#/ChatToolChoiceAllowedToolsTool'
+
+ChatToolChoiceAllowedToolsTool:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+    function:
+      $ref: '#/ChatToolChoiceFunction'
+
+ChatTool:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [function, custom]
+    function:
+      $ref: '#/ChatToolFunction'
+    custom:
+      $ref: '#/ChatToolCustom'
+    cache_control:
+      $ref: './common.yaml#/CacheControl'
+
+ChatToolFunction:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+    description:
+      type: string
+    parameters:
+      $ref: '#/ToolFunctionParameters'
+    strict:
+      type: boolean
+
+ToolFunctionParameters:
+  type: object
+  properties:
+    type:
+      type: string
+    description:
+      type: string
+    required:
+      type: array
+      items:
+        type: string
+    properties:
+      type: object
+      additionalProperties: true
+    enum:
+      type: array
+      items:
+        type: string
+    additionalProperties:
+      type: boolean
+
+ChatToolCustom:
+  type: object
+  properties:
+    format:
+      $ref: '#/ChatToolCustomFormat'
+
+ChatToolCustomFormat:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+    grammar:
+      $ref: '#/ChatToolCustomGrammarFormat'
+
+ChatToolCustomGrammarFormat:
+  type: object
+  required:
+    - definition
+    - syntax
+  properties:
+    definition:
+      type: string
+    syntax:
+      type: string
+      enum: [lark, regex]
+
+ChatReasoningDetails:
+  type: object
+  properties:
+    id:
+      type: string
+    index:
+      type: integer
+    type:
+      type: string
+      enum: [reasoning.summary, reasoning.encrypted, reasoning.text]
+    summary:
+      type: string
+    text:
+      type: string
+    signature:
+      type: string
+    data:
+      type: string
+
+ChatAssistantMessageAnnotation:
+  type: object
+  properties:
+    type:
+      type: string
+    url_citation:
+      $ref: '#/ChatAssistantMessageAnnotationCitation'
+
+ChatAssistantMessageAnnotationCitation:
+  type: object
+  properties:
+    start_index:
+      type: integer
+    end_index:
+      type: integer
+    title:
+      type: string
+    url:
+      type: string
+    sources:
+      type: object
+    type:
+      type: string
+
+ChatAssistantMessageToolCall:
+  type: object
+  required:
+    - function
+  properties:
+    index:
+      type: integer
+    type:
+      type: string
+    id:
+      type: string
+    function:
+      $ref: '#/ChatAssistantMessageToolCallFunction'
+
+ChatAssistantMessageToolCallFunction:
+  type: object
+  properties:
+    name:
+      type: string
+    arguments:
+      type: string
+
+ChatAudioMessageAudio:
+  type: object
+  properties:
+    id:
+      type: string
+    data:
+      type: string
+    expires_at:
+      type: integer
+    transcript:
+      type: string
+
+ChatCompletionResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    choices:
+      type: array
+      items:
+        $ref: '#/BifrostResponseChoice'
+    created:
+      type: integer
+    model:
+      type: string
+    object:
+      type: string
+    service_tier:
+      type: string
+    system_fingerprint:
+      type: string
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+    search_results:
+      type: array
+      items:
+        $ref: '#/PerplexitySearchResult'
+    videos:
+      type: array
+      items:
+        $ref: '#/VideoResult'
+    citations:
+      type: array
+      items:
+        type: string
+
+BifrostResponseChoice:
+  type: object
+  properties:
+    index:
+      type: integer
+    finish_reason:
+      type: string
+    log_probs:
+      $ref: '#/BifrostLogProbs'
+    text:
+      type: string
+      description: For text completions
+    message:
+      $ref: '#/ChatMessage'
+      description: For non-streaming chat completions
+    delta:
+      $ref: '#/ChatStreamResponseChoiceDelta'
+      description: For streaming chat completions
+
+BifrostLogProbs:
+  type: object
+  properties:
+    content:
+      type: array
+      items:
+        $ref: '#/ContentLogProb'
+    refusal:
+      type: array
+      items:
+        $ref: '#/LogProb'
+    text_offset:
+      type: array
+      items:
+        type: integer
+    token_logprobs:
+      type: array
+      items:
+        type: number
+    tokens:
+      type: array
+      items:
+        type: string
+    top_logprobs:
+      type: array
+      items:
+        type: object
+        additionalProperties:
+          type: number
+
+ContentLogProb:
+  type: object
+  properties:
+    bytes:
+      type: array
+      items:
+        type: integer
+    logprob:
+      type: number
+    token:
+      type: string
+    top_logprobs:
+      type: array
+      items:
+        $ref: '#/LogProb'
+
+LogProb:
+  type: object
+  properties:
+    bytes:
+      type: array
+      items:
+        type: integer
+    logprob:
+      type: number
+    token:
+      type: string
+
+ChatStreamResponseChoiceDelta:
+  type: object
+  properties:
+    role:
+      type: string
+    content:
+      type: string
+    refusal:
+      type: string
+    audio:
+      $ref: '#/ChatAudioMessageAudio'
+    reasoning:
+      type: string
+    reasoning_details:
+      type: array
+      items:
+        $ref: '#/ChatReasoningDetails'
+    tool_calls:
+      type: array
+      items:
+        $ref: '#/ChatAssistantMessageToolCall'
+
+ChatCompletionStreamResponse:
+  type: object
+  description: Streaming chat completion response (SSE format)
+  properties:
+    id:
+      type: string
+    choices:
+      type: array
+      items:
+        $ref: '#/BifrostResponseChoice'
+    created:
+      type: integer
+    model:
+      type: string
+    object:
+      type: string
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+PerplexitySearchResult:
+  type: object
+  description: Search result from Perplexity AI search
+  properties:
+    title:
+      type: string
+    url:
+      type: string
+    date:
+      type: string
+    last_updated:
+      type: string
+    snippet:
+      type: string
+    source:
+      type: string
+
+VideoResult:
+  type: object
+  properties:
+    url:
+      type: string
+    thumbnail_url:
+      type: string
+    thumbnail_width:
+      type: integer
+    thumbnail_height:
+      type: integer
+    duration:
+      type: number
+
+ChatPrediction:
+  type: object
+  description: Predicted output content for the model to reference (OpenAI only). Can reduce latency.
+  properties:
+    type:
+      type: string
+      description: Always "content"
+    content:
+      description: Predicted content (string or array of content parts)
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: object
+            additionalProperties: true
+
+ChatWebSearchOptions:
+  type: object
+  description: Web search options for chat completions (OpenAI only)
+  properties:
+    search_context_size:
+      type: string
+      enum: [low, medium, high]
+      description: Amount of search context to include
+    user_location:
+      $ref: '#/ChatWebSearchOptionsUserLocation'
+
+ChatWebSearchOptionsUserLocation:
+  type: object
+  properties:
+    type:
+      type: string
+      description: Location type (e.g., "approximate")
+    approximate:
+      $ref: '#/ChatWebSearchOptionsUserLocationApproximate'
+
+ChatWebSearchOptionsUserLocationApproximate:
+  type: object
+  properties:
+    city:
+      type: string
+    country:
+      type: string
+      description: Two-letter ISO country code (e.g., "US")
+    region:
+      type: string
+      description: Region or state (e.g., "California")
+    timezone:
+      type: string
+      description: IANA timezone (e.g., "America/Los_Angeles")
--- a/docs/openapi/schemas/inference/common.yaml
+++ b/docs/openapi/schemas/inference/common.yaml
@@ -0,0 +1,149 @@
+# Common schemas used across the API
+
+ModelProvider:
+  type: string
+  description: AI model provider identifier
+  enum:
+    - openai
+    - azure
+    - anthropic
+    - bedrock
+    - cohere
+    - vertex
+    - vllm
+    - mistral
+    - ollama
+    - groq
+    - sgl
+    - parasail
+    - perplexity
+    - replicate
+    - cerebras
+    - gemini
+    - openrouter
+    - elevenlabs
+    - huggingface
+    - nebius
+    - xai
+    - runway
+    - fireworks
+
+Fallback:
+  type: object
+  description: Fallback model configuration
+  required:
+    - provider
+    - model
+  properties:
+    provider:
+      $ref: '#/ModelProvider'
+    model:
+      type: string
+      description: Model name
+
+BifrostError:
+  type: object
+  description: Error response from Bifrost
+  properties:
+    event_id:
+      type: string
+    type:
+      type: string
+    is_bifrost_error:
+      type: boolean
+    status_code:
+      type: integer
+    error:
+      $ref: '#/ErrorField'
+    extra_fields:
+      $ref: '#/BifrostErrorExtraFields'
+
+ErrorField:
+  type: object
+  properties:
+    type:
+      type: string
+    code:
+      type: string
+    message:
+      type: string
+    param:
+      type: string
+    event_id:
+      type: string
+
+BifrostErrorExtraFields:
+  type: object
+  properties:
+    provider:
+      $ref: '#/ModelProvider'
+    model_requested:
+      type: string
+    request_type:
+      type: string
+
+BifrostResponseExtraFields:
+  type: object
+  description: Additional fields included in responses
+  properties:
+    request_type:
+      type: string
+      description: Type of request that was made
+    provider:
+      $ref: '#/ModelProvider'
+    model_requested:
+      type: string
+      description: The model that was requested
+    model_deployment:
+      type: string
+      description: The actual model deployment used
+    latency:
+      type: integer
+      format: int64
+      description: Request latency in milliseconds
+    chunk_index:
+      type: integer
+      description: Index of the chunk for streaming responses
+    raw_request:
+      type: object
+      description: Raw request if enabled
+    raw_response:
+      type: object
+      description: Raw response if enabled
+    cache_debug:
+      $ref: '#/BifrostCacheDebug'
+
+BifrostCacheDebug:
+  type: object
+  properties:
+    cache_hit:
+      type: boolean
+    cache_id:
+      type: string
+    hit_type:
+      type: string
+    requested_provider:
+      type: string
+    requested_model:
+      type: string
+    provider_used:
+      type: string
+    model_used:
+      type: string
+    input_tokens:
+      type: integer
+    threshold:
+      type: number
+    similarity:
+      type: number
+
+CacheControl:
+  type: object
+  description: Cache control settings for content blocks
+  properties:
+    type:
+      type: string
+      enum: [ephemeral]
+    ttl:
+      type: string
+      description: Time to live (e.g., "1m", "1h")
--- a/docs/openapi/schemas/inference/containers.yaml
+++ b/docs/openapi/schemas/inference/containers.yaml
@@ -0,0 +1,344 @@
+# Containers API schemas
+
+ContainerStatus:
+  type: string
+  enum:
+    - running
+  description: The status of a container
+
+ContainerExpiresAfter:
+  type: object
+  description: Expiration configuration for a container
+  properties:
+    anchor:
+      type: string
+      description: The anchor point for expiration (e.g., "last_active_at")
+    minutes:
+      type: integer
+      description: Number of minutes after anchor point
+
+ContainerObject:
+  type: object
+  description: A container object
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the container
+    object:
+      type: string
+      description: The object type (always "container")
+    name:
+      type: string
+      description: The name of the container
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the container was created
+    status:
+      $ref: '#/ContainerStatus'
+    expires_after:
+      $ref: '#/ContainerExpiresAfter'
+    last_active_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of last activity
+    memory_limit:
+      type: string
+      description: Memory limit for the container (e.g., "1g", "4g")
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+      description: User-provided metadata
+
+ContainerCreateRequest:
+  type: object
+  required:
+    - provider
+    - name
+  properties:
+    provider:
+      $ref: './common.yaml#/ModelProvider'
+    name:
+      type: string
+      description: Name of the container
+    expires_after:
+      $ref: '#/ContainerExpiresAfter'
+    file_ids:
+      type: array
+      items:
+        type: string
+      description: IDs of existing files to copy into this container
+    memory_limit:
+      type: string
+      description: Memory limit for the container (e.g., "1g", "4g")
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+      description: User-provided metadata
+
+ContainerCreateResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the created container
+    object:
+      type: string
+      description: The object type (always "container")
+    name:
+      type: string
+      description: The name of the container
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the container was created
+    status:
+      $ref: '#/ContainerStatus'
+    expires_after:
+      $ref: '#/ContainerExpiresAfter'
+    last_active_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of last activity
+    memory_limit:
+      type: string
+      description: Memory limit for the container
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+      description: User-provided metadata
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerListResponse:
+  type: object
+  properties:
+    object:
+      type: string
+      description: The object type (always "list")
+    data:
+      type: array
+      items:
+        $ref: '#/ContainerObject'
+      description: List of container objects
+    first_id:
+      type: string
+      description: ID of the first container in the list
+    last_id:
+      type: string
+      description: ID of the last container in the list
+    has_more:
+      type: boolean
+      description: Whether there are more containers to fetch
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerRetrieveResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the container
+    object:
+      type: string
+      description: The object type (always "container")
+    name:
+      type: string
+      description: The name of the container
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the container was created
+    status:
+      $ref: '#/ContainerStatus'
+    expires_after:
+      $ref: '#/ContainerExpiresAfter'
+    last_active_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of last activity
+    memory_limit:
+      type: string
+      description: Memory limit for the container
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+      description: User-provided metadata
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerDeleteResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: The ID of the deleted container
+    object:
+      type: string
+      description: The object type (always "container.deleted")
+    deleted:
+      type: boolean
+      description: Whether the container was successfully deleted
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+# =============================================================================
+# CONTAINER FILES SCHEMAS
+# =============================================================================
+
+ContainerFileObject:
+  type: object
+  description: A file object within a container
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the file
+    object:
+      type: string
+      description: The object type (always "container.file")
+    container_id:
+      type: string
+      description: The ID of the container this file belongs to
+    path:
+      type: string
+      description: The path of the file within the container
+    bytes:
+      type: integer
+      format: int64
+      description: The size of the file in bytes
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the file was created
+    source:
+      type: string
+      description: The source of the file (e.g., "user_upload", "copied")
+
+ContainerFileCreateMultipartRequest:
+  type: object
+  description: Request to create a file in a container via multipart upload
+  properties:
+    file:
+      type: string
+      format: binary
+      description: The file content to upload
+    file_path:
+      type: string
+      description: Optional path for the file within the container
+
+ContainerFileCreateJsonRequest:
+  type: object
+  description: Request to create a file in a container by referencing an existing file
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+      description: The ID of an existing file to copy into the container
+    file_path:
+      type: string
+      description: Optional path for the file within the container
+
+ContainerFileCreateResponse:
+  type: object
+  description: Response from creating a file in a container
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the created file
+    object:
+      type: string
+      description: The object type (always "container.file")
+    container_id:
+      type: string
+      description: The ID of the container this file belongs to
+    path:
+      type: string
+      description: The path of the file within the container
+    bytes:
+      type: integer
+      format: int64
+      description: The size of the file in bytes
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the file was created
+    source:
+      type: string
+      description: The source of the file
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerFileListResponse:
+  type: object
+  description: Response containing a list of files in a container
+  properties:
+    object:
+      type: string
+      description: The object type (always "list")
+    data:
+      type: array
+      items:
+        $ref: '#/ContainerFileObject'
+      description: List of file objects
+    first_id:
+      type: string
+      description: ID of the first file in the list
+    last_id:
+      type: string
+      description: ID of the last file in the list
+    has_more:
+      type: boolean
+      description: Whether there are more files to fetch
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerFileRetrieveResponse:
+  type: object
+  description: Response from retrieving a file from a container
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the file
+    object:
+      type: string
+      description: The object type (always "container.file")
+    container_id:
+      type: string
+      description: The ID of the container this file belongs to
+    path:
+      type: string
+      description: The path of the file within the container
+    bytes:
+      type: integer
+      format: int64
+      description: The size of the file in bytes
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the file was created
+    source:
+      type: string
+      description: The source of the file
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerFileDeleteResponse:
+  type: object
+  description: Response from deleting a file from a container
+  properties:
+    id:
+      type: string
+      description: The ID of the deleted file
+    object:
+      type: string
+      description: The object type (always "container.file.deleted")
+    deleted:
+      type: boolean
+      description: Whether the file was successfully deleted
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/count-tokens.yaml
+++ b/docs/openapi/schemas/inference/count-tokens.yaml
@@ -0,0 +1,53 @@
+# Count Tokens API schemas
+
+CountTokensRequest:
+  type: object
+  required:
+    - model
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    messages:
+      type: array
+      items:
+        $ref: './responses.yaml#/ResponsesMessage'
+    fallbacks:
+      type: array
+      items:
+        type: string
+    tools:
+      type: array
+      items:
+        $ref: './responses.yaml#/ResponsesTool'
+    instructions:
+      type: string
+    text:
+      type: string
+
+CountTokensResponse:
+  type: object
+  properties:
+    object:
+      type: string
+    model:
+      type: string
+    input_tokens:
+      type: integer
+    input_tokens_details:
+      $ref: './responses.yaml#/ResponsesResponseInputTokens'
+    tokens:
+      type: array
+      items:
+        type: integer
+    token_strings:
+      type: array
+      items:
+        type: string
+    output_tokens:
+      type: integer
+    total_tokens:
+      type: integer
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/embeddings.yaml
+++ b/docs/openapi/schemas/inference/embeddings.yaml
@@ -0,0 +1,76 @@
+# Embeddings API schemas
+
+EmbeddingRequest:
+  type: object
+  required:
+    - model
+    - input
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    input:
+      $ref: '#/EmbeddingInput'
+    fallbacks:
+      type: array
+      items:
+        type: string
+    encoding_format:
+      type: string
+      enum: [float, base64]
+    dimensions:
+      type: integer
+
+EmbeddingInput:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        type: string
+    - type: array
+      items:
+        type: integer
+    - type: array
+      items:
+        type: array
+        items:
+          type: integer
+  description: Input for embedding - text or token arrays
+
+EmbeddingResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/EmbeddingData'
+    model:
+      type: string
+    object:
+      type: string
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+EmbeddingData:
+  type: object
+  properties:
+    index:
+      type: integer
+    object:
+      type: string
+    embedding:
+      $ref: '#/EmbeddingStruct'
+
+EmbeddingStruct:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        type: number
+    - type: array
+      items:
+        type: array
+        items:
+          type: number
--- a/docs/openapi/schemas/inference/files.yaml
+++ b/docs/openapi/schemas/inference/files.yaml
@@ -0,0 +1,188 @@
+# Files API schemas
+
+S3StorageConfig:
+  type: object
+  description: AWS S3 storage configuration
+  properties:
+    bucket:
+      type: string
+      description: S3 bucket name
+    region:
+      type: string
+      description: AWS region
+    prefix:
+      type: string
+      description: Path prefix for stored files
+
+GCSStorageConfig:
+  type: object
+  description: Google Cloud Storage configuration
+  properties:
+    bucket:
+      type: string
+      description: GCS bucket name
+    project:
+      type: string
+      description: GCP project ID
+    prefix:
+      type: string
+      description: Path prefix for stored files
+
+FileStorageConfig:
+  type: object
+  description: Storage configuration for cloud storage backends
+  properties:
+    s3:
+      $ref: '#/S3StorageConfig'
+    gcs:
+      $ref: '#/GCSStorageConfig'
+
+FilePurpose:
+  type: string
+  enum:
+    - batch
+    - assistants
+    - fine-tune
+    - vision
+    - batch_output
+    - user_data
+    - responses
+    - evals
+
+FileStatus:
+  type: string
+  enum:
+    - uploaded
+    - processed
+    - processing
+    - error
+    - deleted
+
+FileUploadRequest:
+  type: object
+  required:
+    - file
+    - purpose
+  properties:
+    file:
+      type: string
+      format: binary
+    purpose:
+      $ref: '#/FilePurpose'
+    provider:
+      $ref: './common.yaml#/ModelProvider'
+
+FileUploadResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    bytes:
+      type: integer
+      format: int64
+    created_at:
+      type: integer
+      format: int64
+    filename:
+      type: string
+    purpose:
+      $ref: '#/FilePurpose'
+    status:
+      $ref: '#/FileStatus'
+    status_details:
+      type: string
+    expires_at:
+      type: integer
+      format: int64
+    storage_backend:
+      type: string
+    storage_uri:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+FileListResponse:
+  type: object
+  properties:
+    object:
+      type: string
+    data:
+      type: array
+      items:
+        $ref: '#/FileObject'
+    has_more:
+      type: boolean
+    after:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+FileObject:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    bytes:
+      type: integer
+      format: int64
+    created_at:
+      type: integer
+      format: int64
+    filename:
+      type: string
+    purpose:
+      $ref: '#/FilePurpose'
+    status:
+      $ref: '#/FileStatus'
+    status_details:
+      type: string
+    expires_at:
+      type: integer
+      format: int64
+
+FileRetrieveResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    bytes:
+      type: integer
+      format: int64
+    created_at:
+      type: integer
+      format: int64
+    filename:
+      type: string
+    purpose:
+      $ref: '#/FilePurpose'
+    status:
+      $ref: '#/FileStatus'
+    status_details:
+      type: string
+    expires_at:
+      type: integer
+      format: int64
+    storage_backend:
+      type: string
+    storage_uri:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+FileDeleteResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    deleted:
+      type: boolean
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/images.yaml
+++ b/docs/openapi/schemas/inference/images.yaml
@@ -0,0 +1,514 @@
+# Image Generation Schemas
+
+ImageGenerationRequest:
+  allOf:
+    - type: object
+      required:
+        - model
+        - prompt
+      properties:
+        model:
+          type: string
+          description: Model identifier in format `provider/model`
+        prompt:
+          type: string
+          description: Text prompt to generate image
+        n:
+          type: integer
+          minimum: 1
+          maximum: 10
+          description: Number of images to generate
+        size:
+          type: string
+          enum:
+            - "256x256"
+            - "512x512"
+            - "1024x1024"
+            - "1792x1024"
+            - "1024x1792"
+            - "1536x1024"
+            - "1024x1536"
+            - "auto"
+          description: Size of the generated image
+        quality:
+          type: string
+          enum:
+            - "auto"
+            - "high"
+            - "medium"
+            - "low"
+            - "hd"
+            - "standard"
+          description: Quality of the generated image
+        style:
+          type: string
+          enum:
+            - "natural"
+            - "vivid"
+          description: Style of the generated image
+        response_format:
+          type: string
+          enum:
+            - "url"
+            - "b64_json"
+          default: "url"
+          description: |
+            Format of the response.
+        background:
+          type: string
+          enum:
+            - "transparent"
+            - "opaque"
+            - "auto"
+          description: Background type for the image
+        moderation:
+          type: string
+          enum:
+            - "low"
+            - "auto"
+          description: Content moderation level
+        partial_images:
+          type: integer
+          minimum: 0
+          maximum: 3
+          description: Number of partial images to generate
+        output_compression:
+          type: integer
+          minimum: 0
+          maximum: 100
+          description: Compression level (0-100%)
+        output_format:
+          type: string
+          enum:
+            - "png"
+            - "webp"
+            - "jpeg"
+          description: Output image format
+        user:
+          type: string
+          description: User identifier for tracking
+        seed:
+          type: integer
+          description: Seed for reproducible image generation
+        negative_prompt:
+          type: string
+          description: Negative prompt to guide what to avoid in generation
+        num_inference_steps:
+          type: integer
+          description: Number of inference steps for generation
+        stream:
+          type: boolean
+          default: false
+          description: |
+            Whether to stream the response. When true, images are sent as SSE.
+            When streaming, providers may return base64 chunks (`b64_json`) and/or URLs (`url`) depending on provider and configuration.
+        fallbacks:
+          type: array
+          items:
+            $ref: './common.yaml#/Fallback'
+          description: Fallback models to try if primary model fails
+
+ImageGenerationResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Unique identifier for the generation request
+    created:
+      type: integer
+      format: int64
+      description: Unix timestamp when the image was created
+    model:
+      type: string
+      description: Model used for generation
+    data:
+      type: array
+      items:
+        $ref: '#/ImageData'
+      description: Array of generated images
+    background:
+      type: string
+      description: Background type for the image
+    output_format:
+      type: string
+      enum:
+        - "png"
+        - "webp"
+        - "jpeg"
+      description: Output image format
+    quality:
+      type: string
+      description: Quality of the generated image
+    size:
+      type: string
+      enum:
+        - "256x256"
+        - "512x512"
+        - "1024x1024"
+        - "1792x1024"
+        - "1024x1792"
+        - "1536x1024"
+        - "1024x1536"
+        - "auto"
+      description: Size of the generated image
+    usage:
+      $ref: '#/ImageUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ImageData:
+  type: object
+  properties:
+    url:
+      type: string
+      format: uri
+      description: URL of the generated image
+    b64_json:
+      type: string
+      description: Base64-encoded image data
+    revised_prompt:
+      type: string
+      description: Revised prompt used for generation
+    index:
+      type: integer
+      description: Index of this image
+
+ImageGenerationResponseParameters:
+  type: object
+  properties:
+    background:
+      type: string
+    output_format:
+      type: string
+    quality:
+      type: string
+    size:
+      type: string
+
+ImageUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of input tokens
+    input_tokens_details:
+      $ref: '#/ImageTokenDetails'
+    total_tokens:
+      type: integer
+      description: Total tokens used
+    output_tokens:
+      type: integer
+      description: Number of output tokens
+    output_tokens_details:
+      $ref: '#/ImageTokenDetails'
+
+ImageTokenDetails:
+  type: object
+  properties:
+    image_tokens:
+      type: integer
+      description: Tokens used for images
+    text_tokens:
+      type: integer
+      description: Tokens used for text
+
+ImageGenerationStreamResponse:
+  type: object
+  description: |
+    Streaming response chunk for image generation.
+    Sent via Server-Sent Events (SSE).
+    Providers may return either b64_json (base64-encoded image data) or url (public URL to the image).
+  properties:
+    id:
+      type: string
+      description: Request identifier
+    type:
+      type: string
+      enum:
+        - "image_generation.partial_image"
+        - "image_generation.completed"
+        - "error"
+      description: Type of stream event
+    partial_image_index:
+      type: integer
+      description: Index of the partial image chunk
+    sequence_number:
+      type: integer
+      description: Sequence number for event ordering within the stream
+    b64_json:
+      type: string
+      description: |
+        Base64-encoded chunk of image data.
+        Optional; either b64_json or url may be present.
+    url:
+      type: string
+      format: uri
+      description: |
+        Optional public URL to the generated image chunk.
+        Used by HuggingFace and other providers that return image URLs instead of base64 data.
+    created_at:
+      type: integer
+      format: int64
+      description: Timestamp when chunk was created
+    size:
+      type: string
+      enum:
+        - "256x256"
+        - "512x512"
+        - "1024x1024"
+        - "1792x1024"
+        - "1024x1792"
+        - "1536x1024"
+        - "1024x1536"
+        - "auto"
+      description: Size of the generated image
+    quality:
+      type: string
+      description: Quality setting used
+    background:
+      type: string
+      description: Background type used
+    output_format:
+      type: string
+      enum:
+        - "png"
+        - "webp"
+        - "jpeg"
+      description: Output format used
+    revised_prompt:
+      type: string
+      description: Revised prompt
+    usage:
+      $ref: '#/ImageUsage'
+      description: Token usage
+    error:
+      $ref: './common.yaml#/BifrostError'
+      description: Error information if generation failed
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+# Image Edit Schemas (multipart/form-data)
+
+ImageEditRequest:
+  type: object
+  required:
+    - model
+    - image
+  properties:
+    model:
+      type: string
+      description: Model identifier in format `provider/model`
+    prompt:
+      type: string
+      description: |
+        Text prompt describing the edit. Required unless `type` is `background_removal`.
+    image:
+      type: string
+      format: binary
+      description: |
+        Image file to edit. Use field name `image` for a single file or `image[]` for multiple files.
+    mask:
+      type: string
+      format: binary
+      description: Optional mask image for inpainting (transparent areas indicate regions to edit)
+    type:
+      type: string
+      enum:
+        - "inpainting"
+        - "outpainting"
+        - "background_removal"
+      description: Type of edit operation
+    n:
+      type: integer
+      minimum: 1
+      maximum: 10
+      description: Number of images to generate
+    size:
+      type: string
+      enum:
+        - "256x256"
+        - "512x512"
+        - "1024x1024"
+        - "1536x1024"
+        - "1024x1536"
+        - "auto"
+      description: Size of the output image
+    response_format:
+      type: string
+      enum:
+        - "url"
+        - "b64_json"
+      default: "url"
+      description: Format of the response
+    stream:
+      type: boolean
+      default: false
+      description: When true, stream the response via Server-Sent Events
+    background:
+      type: string
+      enum:
+        - "transparent"
+        - "opaque"
+        - "auto"
+      description: Background type for the image
+    input_fidelity:
+      type: string
+      enum:
+        - "low"
+        - "high"
+      description: How closely to follow the original image
+    partial_images:
+      type: integer
+      minimum: 0
+      maximum: 3
+      description: Number of partial images to generate when streaming
+    quality:
+      type: string
+      enum:
+        - "auto"
+        - "high"
+        - "medium"
+        - "low"
+        - "standard"
+      description: Quality of the output image
+    output_format:
+      type: string
+      enum:
+        - "png"
+        - "webp"
+        - "jpeg"
+      description: Output image format
+    num_inference_steps:
+      type: integer
+      description: Number of inference steps
+    seed:
+      type: integer
+      description: Seed for reproducible editing
+    output_compression:
+      type: integer
+      minimum: 0
+      maximum: 100
+      description: Compression level (0-100%)
+    negative_prompt:
+      type: string
+      description: What to avoid in the edit
+    user:
+      type: string
+      description: User identifier for tracking
+    fallbacks:
+      type: array
+      items:
+        $ref: './common.yaml#/Fallback'
+      description: Fallback models to try if primary model fails
+
+# Image Variation Schemas (multipart/form-data)
+
+ImageVariationRequest:
+  type: object
+  required:
+    - model
+    - image
+  properties:
+    model:
+      type: string
+      description: Model identifier in format `provider/model`
+    image:
+      type: string
+      format: binary
+      description: |
+        Image file to create variations of. Use field name `image` for a single file or `image[]` for multiple (first image is used).
+    n:
+      type: integer
+      minimum: 1
+      maximum: 10
+      description: Number of variations to generate
+    size:
+      type: string
+      enum:
+        - "256x256"
+        - "512x512"
+        - "1024x1024"
+        - "1792x1024"
+        - "1024x1792"
+        - "1536x1024"
+        - "1024x1536"
+        - "auto"
+      description: Size of the output images
+    response_format:
+      type: string
+      enum:
+        - "url"
+        - "b64_json"
+      default: "url"
+      description: Format of the response
+    user:
+      type: string
+      description: User identifier for tracking
+    fallbacks:
+      type: array
+      items:
+        $ref: './common.yaml#/Fallback'
+      description: Fallback models to try if primary model fails
+
+# Image Edit Streaming (SSE)
+
+ImageEditStreamResponse:
+  type: object
+  description: |
+    Streaming response chunk for image edit.
+    Sent via Server-Sent Events (SSE) when `stream=true`.
+  properties:
+    id:
+      type: string
+      description: Request identifier
+    type:
+      type: string
+      enum:
+        - "image_edit.partial_image"
+        - "image_edit.completed"
+        - "error"
+      description: Type of stream event
+    partial_image_index:
+      type: integer
+      description: Index of the partial image chunk
+    sequence_number:
+      type: integer
+      description: Sequence number for event ordering within the stream
+    b64_json:
+      type: string
+      description: Base64-encoded chunk of image data; optional
+    url:
+      type: string
+      format: uri
+      description: Optional public URL to the image chunk
+    created_at:
+      type: integer
+      format: int64
+      description: Timestamp when chunk was created
+    size:
+      type: string
+      description: Size of the image
+    quality:
+      type: string
+      description: Quality setting used
+    background:
+      type: string
+      description: Background type used
+    output_format:
+      type: string
+      enum:
+        - "png"
+        - "webp"
+        - "jpeg"
+      description: Output format used
+    revised_prompt:
+      type: string
+      description: Revised prompt
+    usage:
+      $ref: '#/ImageUsage'
+      description: Token usage
+    error:
+      $ref: './common.yaml#/BifrostError'
+      description: Error information if edit failed
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/models.yaml
+++ b/docs/openapi/schemas/inference/models.yaml
@@ -0,0 +1,125 @@
+# Models API schemas
+
+ListModelsResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/Model'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+    next_page_token:
+      type: string
+
+Model:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Model ID in provider/model format
+    canonical_slug:
+      type: string
+    name:
+      type: string
+    deployment:
+      type: string
+    created:
+      type: integer
+      format: int64
+    context_length:
+      type: integer
+    max_input_tokens:
+      type: integer
+    max_output_tokens:
+      type: integer
+    architecture:
+      $ref: '#/Architecture'
+    pricing:
+      $ref: '#/Pricing'
+    top_provider:
+      $ref: '#/TopProvider'
+    per_request_limits:
+      $ref: '#/PerRequestLimits'
+    supported_parameters:
+      type: array
+      items:
+        type: string
+    default_parameters:
+      $ref: '#/DefaultParameters'
+    hugging_face_id:
+      type: string
+    description:
+      type: string
+    owned_by:
+      type: string
+    supported_methods:
+      type: array
+      items:
+        type: string
+
+Architecture:
+  type: object
+  properties:
+    modality:
+      type: string
+    tokenizer:
+      type: string
+    instruct_type:
+      type: string
+    input_modalities:
+      type: array
+      items:
+        type: string
+    output_modalities:
+      type: array
+      items:
+        type: string
+
+Pricing:
+  type: object
+  properties:
+    prompt:
+      type: string
+    completion:
+      type: string
+    request:
+      type: string
+    image:
+      type: string
+    web_search:
+      type: string
+    internal_reasoning:
+      type: string
+    input_cache_read:
+      type: string
+    input_cache_write:
+      type: string
+
+TopProvider:
+  type: object
+  properties:
+    is_moderated:
+      type: boolean
+    context_length:
+      type: integer
+    max_completion_tokens:
+      type: integer
+
+PerRequestLimits:
+  type: object
+  properties:
+    prompt_tokens:
+      type: integer
+    completion_tokens:
+      type: integer
+
+DefaultParameters:
+  type: object
+  properties:
+    temperature:
+      type: number
+    top_p:
+      type: number
+    frequency_penalty:
+      type: number
--- a/docs/openapi/schemas/inference/rerank.yaml
+++ b/docs/openapi/schemas/inference/rerank.yaml
@@ -0,0 +1,98 @@
+# Rerank API schemas
+
+RerankRequest:
+  type: object
+  required:
+    - model
+    - query
+    - documents
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+      example: cohere/rerank-v3.5
+    query:
+      type: string
+      minLength: 1
+      description: Query used to score and reorder documents
+    documents:
+      type: array
+      description: Documents to rerank
+      minItems: 1
+      items:
+        $ref: '#/RerankDocument'
+    fallbacks:
+      type: array
+      items:
+        type: string
+      description: Fallback models in provider/model format
+    top_n:
+      type: integer
+      minimum: 1
+      description: Maximum number of ranked results to return
+    max_tokens_per_doc:
+      type: integer
+      minimum: 1
+      description: Maximum tokens to consider per document (provider-dependent)
+    priority:
+      type: integer
+      description: Request priority hint (provider-dependent)
+    return_documents:
+      type: boolean
+      description: Whether to include document content in each result
+
+RerankDocument:
+  type: object
+  required:
+    - text
+  properties:
+    text:
+      type: string
+      minLength: 1
+      description: Document text content
+    id:
+      type: string
+      minLength: 1
+      description: Optional document identifier
+    meta:
+      type: object
+      description: Optional document metadata
+      additionalProperties: true
+
+RerankResponse:
+  type: object
+  required:
+    - results
+    - model
+  properties:
+    id:
+      type: string
+      description: Unique identifier for the rerank response
+    results:
+      type: array
+      description: Ranked results ordered by relevance score descending
+      items:
+        $ref: '#/RerankResult'
+    model:
+      type: string
+      description: Model used to perform reranking
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+RerankResult:
+  type: object
+  required:
+    - index
+    - relevance_score
+  properties:
+    index:
+      type: integer
+      minimum: 0
+      description: Index into the original documents array
+    relevance_score:
+      type: number
+      description: Relevance score for this document
+    document:
+      $ref: '#/RerankDocument'
--- a/docs/openapi/schemas/inference/responses.yaml
+++ b/docs/openapi/schemas/inference/responses.yaml
@@ -0,0 +1,716 @@
+# Responses API schemas
+
+ResponsesRequest:
+  type: object
+  required:
+    - model
+    - input
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    input:
+      $ref: '#/ResponsesRequestInput'
+    fallbacks:
+      type: array
+      items:
+        type: string
+    stream:
+      type: boolean
+    background:
+      type: boolean
+    conversation:
+      type: string
+    include:
+      type: array
+      items:
+        type: string
+    instructions:
+      type: string
+    max_output_tokens:
+      type: integer
+    max_tool_calls:
+      type: integer
+    metadata:
+      type: object
+      additionalProperties: true
+    parallel_tool_calls:
+      type: boolean
+    previous_response_id:
+      type: string
+    prompt_cache_key:
+      type: string
+    reasoning:
+      $ref: '#/ResponsesParametersReasoning'
+    safety_identifier:
+      type: string
+    service_tier:
+      type: string
+    stream_options:
+      $ref: '#/ResponsesStreamOptions'
+    store:
+      type: boolean
+    temperature:
+      type: number
+    text:
+      $ref: '#/ResponsesTextConfig'
+    top_logprobs:
+      type: integer
+    top_p:
+      type: number
+    tool_choice:
+      $ref: '#/ResponsesToolChoice'
+    tools:
+      type: array
+      items:
+        $ref: '#/ResponsesTool'
+    truncation:
+      type: string
+
+ResponsesRequestInput:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/ResponsesMessage'
+  description: Input - can be a string or array of messages
+
+ResponsesMessage:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      $ref: '#/ResponsesMessageType'
+    status:
+      type: string
+      enum: [in_progress, completed, incomplete, interpreting, failed]
+    role:
+      type: string
+      enum: [assistant, user, system, developer]
+    content:
+      $ref: '#/ResponsesMessageContent'
+    call_id:
+      type: string
+    name:
+      type: string
+    arguments:
+      type: string
+    output:
+      type: object
+    action:
+      type: object
+    error:
+      type: string
+    queries:
+      type: array
+      items:
+        type: string
+    results:
+      type: array
+      items:
+        type: object
+    summary:
+      type: array
+      items:
+        $ref: '#/ResponsesReasoningSummary'
+    encrypted_content:
+      type: string
+
+ResponsesMessageType:
+  type: string
+  enum:
+    - message
+    - file_search_call
+    - computer_call
+    - computer_call_output
+    - web_search_call
+    - web_fetch_call
+    - function_call
+    - function_call_output
+    - code_interpreter_call
+    - local_shell_call
+    - local_shell_call_output
+    - mcp_call
+    - custom_tool_call
+    - custom_tool_call_output
+    - image_generation_call
+    - mcp_list_tools
+    - mcp_approval_request
+    - mcp_approval_responses
+    - reasoning
+    - item_reference
+    - refusal
+
+ResponsesMessageContent:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/ResponsesMessageContentBlock'
+
+ResponsesMessageContentBlock:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [input_text, input_image, input_file, input_audio, output_text, refusal, reasoning_text]
+    file_id:
+      type: string
+    text:
+      type: string
+    signature:
+      type: string
+    image_url:
+      type: string
+    detail:
+      type: string
+    file_data:
+      type: string
+    file_url:
+      type: string
+    filename:
+      type: string
+    file_type:
+      type: string
+    input_audio:
+      $ref: '#/ResponsesInputMessageContentBlockAudio'
+    annotations:
+      type: array
+      items:
+        $ref: '#/ResponsesOutputMessageContentTextAnnotation'
+    logprobs:
+      type: array
+      items:
+        $ref: '#/ResponsesOutputMessageContentTextLogProb'
+    refusal:
+      type: string
+    cache_control:
+      $ref: './common.yaml#/CacheControl'
+
+ResponsesInputMessageContentBlockAudio:
+  type: object
+  required:
+    - format
+    - data
+  properties:
+    format:
+      type: string
+      enum: [mp3, wav]
+    data:
+      type: string
+
+ResponsesOutputMessageContentTextAnnotation:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [file_citation, url_citation, container_file_citation, file_path]
+    index:
+      type: integer
+    file_id:
+      type: string
+    text:
+      type: string
+    start_index:
+      type: integer
+    end_index:
+      type: integer
+    filename:
+      type: string
+    title:
+      type: string
+    url:
+      type: string
+    container_id:
+      type: string
+
+ResponsesOutputMessageContentTextLogProb:
+  type: object
+  properties:
+    bytes:
+      type: array
+      items:
+        type: integer
+    logprob:
+      type: number
+    token:
+      type: string
+    top_logprobs:
+      type: array
+      items:
+        $ref: './chat.yaml#/LogProb'
+
+ResponsesParametersReasoning:
+  type: object
+  properties:
+    effort:
+      type: string
+      enum: [none, minimal, low, medium, high, xhigh]
+    generate_summary:
+      type: string
+      deprecated: true
+    summary:
+      type: string
+      enum: [auto, concise, detailed]
+    max_tokens:
+      type: integer
+
+ResponsesStreamOptions:
+  type: object
+  properties:
+    include_obfuscation:
+      type: boolean
+
+ResponsesTextConfig:
+  type: object
+  properties:
+    format:
+      $ref: '#/ResponsesTextConfigFormat'
+    verbosity:
+      type: string
+      enum: [low, medium, high]
+
+ResponsesTextConfigFormat:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [text, json_schema, json_object]
+    name:
+      type: string
+    schema:
+      type: object
+    strict:
+      type: boolean
+
+ResponsesToolChoice:
+  oneOf:
+    - type: string
+      enum: [none, auto, required]
+    - $ref: '#/ResponsesToolChoiceStruct'
+
+ResponsesToolChoiceStruct:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum:
+        - none
+        - auto
+        - any
+        - required
+        - function
+        - allowed_tools
+        - file_search
+        - web_search_preview
+        - computer_use_preview
+        - code_interpreter
+        - image_generation
+        - mcp
+        - custom
+    mode:
+      type: string
+    name:
+      type: string
+    server_label:
+      type: string
+    tools:
+      type: array
+      items:
+        $ref: '#/ResponsesToolChoiceAllowedToolDef'
+
+ResponsesToolChoiceAllowedToolDef:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [function, mcp, image_generation]
+    name:
+      type: string
+    server_label:
+      type: string
+
+ResponsesTool:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum:
+        - function
+        - file_search
+        - computer_use_preview
+        - web_search
+        - web_fetch
+        - mcp
+        - code_interpreter
+        - image_generation
+        - local_shell
+        - custom
+        - web_search_preview
+        - memory
+        - tool_search
+    name:
+      type: string
+    description:
+      type: string
+    cache_control:
+      $ref: './common.yaml#/CacheControl'
+    parameters:
+      $ref: './chat.yaml#/ToolFunctionParameters'
+    strict:
+      type: boolean
+    vector_store_ids:
+      type: array
+      items:
+        type: string
+    filters:
+      type: object
+    max_num_results:
+      type: integer
+    ranking_options:
+      type: object
+    display_height:
+      type: integer
+    display_width:
+      type: integer
+    environment:
+      type: string
+    enable_zoom:
+      type: boolean
+    search_context_size:
+      type: string
+    user_location:
+      type: object
+    server_label:
+      type: string
+    server_url:
+      type: string
+    allowed_tools:
+      type: object
+    authorization:
+      type: string
+    connector_id:
+      type: string
+    headers:
+      type: object
+      additionalProperties:
+        type: string
+    require_approval:
+      type: object
+    server_description:
+      type: string
+    container:
+      type: object
+    background:
+      type: string
+    input_fidelity:
+      type: string
+    input_image_mask:
+      type: object
+    moderation:
+      type: string
+    output_compression:
+      type: integer
+    output_format:
+      type: string
+    partial_images:
+      type: integer
+    quality:
+      type: string
+    size:
+      type: string
+    format:
+      type: object
+
+ResponsesReasoningSummary:
+  type: object
+  required:
+    - type
+    - text
+  properties:
+    type:
+      type: string
+      enum: [summary_text]
+    text:
+      type: string
+
+ResponsesResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    background:
+      type: boolean
+    conversation:
+      type: object
+    created_at:
+      type: integer
+    error:
+      $ref: '#/ResponsesResponseError'
+    include:
+      type: array
+      items:
+        type: string
+    incomplete_details:
+      $ref: '#/ResponsesResponseIncompleteDetails'
+    instructions:
+      type: object
+    max_output_tokens:
+      type: integer
+    max_tool_calls:
+      type: integer
+    metadata:
+      type: object
+    model:
+      type: string
+    output:
+      type: array
+      items:
+        $ref: '#/ResponsesMessage'
+    parallel_tool_calls:
+      type: boolean
+    previous_response_id:
+      type: string
+    prompt:
+      type: object
+    prompt_cache_key:
+      type: string
+    reasoning:
+      $ref: '#/ResponsesParametersReasoning'
+    safety_identifier:
+      type: string
+    service_tier:
+      type: string
+    status:
+      type: string
+      enum: [completed, failed, in_progress, canceled, queued, incomplete]
+    stop_reason:
+      type: string
+    store:
+      type: boolean
+    temperature:
+      type: number
+    text:
+      $ref: '#/ResponsesTextConfig'
+    top_logprobs:
+      type: integer
+    top_p:
+      type: number
+    tool_choice:
+      $ref: '#/ResponsesToolChoice'
+    tools:
+      type: array
+      items:
+        $ref: '#/ResponsesTool'
+    truncation:
+      type: string
+    usage:
+      $ref: '#/ResponsesResponseUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+    search_results:
+      type: array
+      items:
+        $ref: './chat.yaml#/PerplexitySearchResult'
+    videos:
+      type: array
+      items:
+        $ref: './chat.yaml#/VideoResult'
+    citations:
+      type: array
+      items:
+        type: string
+
+ResponsesResponseError:
+  type: object
+  required:
+    - code
+    - message
+  properties:
+    code:
+      type: string
+    message:
+      type: string
+
+ResponsesResponseIncompleteDetails:
+  type: object
+  required:
+    - reason
+  properties:
+    reason:
+      type: string
+
+ResponsesResponseUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+    input_tokens_details:
+      $ref: '#/ResponsesResponseInputTokens'
+    output_tokens:
+      type: integer
+    output_tokens_details:
+      $ref: '#/ResponsesResponseOutputTokens'
+    total_tokens:
+      type: integer
+    cost:
+      $ref: './usage.yaml#/BifrostCost'
+
+ResponsesResponseInputTokens:
+  type: object
+  properties:
+    text_tokens:
+      type: integer
+    audio_tokens:
+      type: integer
+    image_tokens:
+      type: integer
+    cached_read_tokens:
+      type: integer
+      description: >
+        Tokens served from the prompt cache (cache hit), billed at the reduced
+        cache-read rate. Already included in the parent input_tokens total.
+    cached_write_tokens:
+      type: integer
+      description: >
+        Tokens written to the prompt cache on this request, billed at the
+        cache-creation rate. Already included in the parent input_tokens total.
+        Populated for providers that separately report cache write tokens
+        (Anthropic, Bedrock).
+
+ResponsesResponseOutputTokens:
+  type: object
+  properties:
+    text_tokens:
+      type: integer
+    accepted_prediction_tokens:
+      type: integer
+    audio_tokens:
+      type: integer
+    reasoning_tokens:
+      type: integer
+    rejected_prediction_tokens:
+      type: integer
+    citation_tokens:
+      type: integer
+    num_search_queries:
+      type: integer
+
+ResponsesStreamResponse:
+  type: object
+  description: Streaming responses API response (SSE format)
+  properties:
+    type:
+      $ref: '#/ResponsesStreamResponseType'
+    sequence_number:
+      type: integer
+    response:
+      $ref: '#/ResponsesResponse'
+    output_index:
+      type: integer
+    item:
+      $ref: '#/ResponsesMessage'
+    content_index:
+      type: integer
+    item_id:
+      type: string
+    part:
+      $ref: '#/ResponsesMessageContentBlock'
+    delta:
+      type: string
+    signature:
+      type: string
+    logprobs:
+      type: array
+      items:
+        $ref: '#/ResponsesOutputMessageContentTextLogProb'
+    text:
+      type: string
+    refusal:
+      type: string
+    arguments:
+      type: string
+    partial_image_b64:
+      type: string
+    partial_image_index:
+      type: integer
+    annotation:
+      $ref: '#/ResponsesOutputMessageContentTextAnnotation'
+    annotation_index:
+      type: integer
+    code:
+      type: string
+    message:
+      type: string
+    param:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ResponsesStreamResponseType:
+  type: string
+  enum:
+    - response.ping
+    - response.created
+    - response.in_progress
+    - response.completed
+    - response.failed
+    - response.incomplete
+    - response.output_item.added
+    - response.output_item.done
+    - response.content_part.added
+    - response.content_part.done
+    - response.output_text.delta
+    - response.output_text.done
+    - response.refusal.delta
+    - response.refusal.done
+    - response.function_call_arguments.delta
+    - response.function_call_arguments.done
+    - response.file_search_call.in_progress
+    - response.file_search_call.searching
+    - response.file_search_call.results.added
+    - response.file_search_call.results.completed
+    - response.web_search_call.searching
+    - response.web_search_call.results.added
+    - response.web_search_call.results.completed
+    - response.web_fetch_call.in_progress
+    - response.web_fetch_call.fetching
+    - response.web_fetch_call.completed
+    - response.reasoning_summary_part.added
+    - response.reasoning_summary_part.done
+    - response.reasoning_summary_text.delta
+    - response.reasoning_summary_text.done
+    - response.image_generation_call.completed
+    - response.image_generation_call.generating
+    - response.image_generation_call.in_progress
+    - response.image_generation_call.partial_image
+    - response.mcp_call_arguments.delta
+    - response.mcp_call_arguments.done
+    - response.mcp_call.completed
+    - response.mcp_call.failed
+    - response.mcp_call.in_progress
+    - response.mcp_list_tools.completed
+    - response.mcp_list_tools.failed
+    - response.mcp_list_tools.in_progress
+    - response.code_interpreter_call.in_progress
+    - response.code_interpreter_call.interpreting
+    - response.code_interpreter_call.completed
+    - response.code_interpreter_call_code.delta
+    - response.code_interpreter_call_code.done
+    - response.output_text.annotation.added
+    - response.output_text.annotation.done
+    - response.queued
+    - response.custom_tool_call_input.delta
+    - response.custom_tool_call_input.done
+    - error
--- a/docs/openapi/schemas/inference/speech.yaml
+++ b/docs/openapi/schemas/inference/speech.yaml
@@ -0,0 +1,132 @@
+# Speech API schemas
+
+SpeechRequest:
+  type: object
+  required:
+    - model
+    - input
+    - voice
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    input:
+      type: string
+      description: Text to convert to speech
+    fallbacks:
+      type: array
+      items:
+        type: string
+    stream_format:
+      type: string
+      enum: [sse]
+      description: Set to "sse" to enable streaming
+    voice:
+      $ref: '#/SpeechVoiceInput'
+    instructions:
+      type: string
+    response_format:
+      type: string
+      enum: [mp3, opus, aac, flac, wav, pcm]
+    speed:
+      type: number
+      minimum: 0.25
+      maximum: 4.0
+    language_code:
+      type: string
+    pronunciation_dictionary_locators:
+      type: array
+      items:
+        $ref: '#/SpeechPronunciationDictionaryLocator'
+    enable_logging:
+      type: boolean
+    optimize_streaming_latency:
+      type: boolean
+    with_timestamps:
+      type: boolean
+
+SpeechVoiceInput:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/VoiceConfig'
+
+VoiceConfig:
+  type: object
+  required:
+    - speaker
+    - voice
+  properties:
+    speaker:
+      type: string
+    voice:
+      type: string
+
+SpeechPronunciationDictionaryLocator:
+  type: object
+  required:
+    - pronunciation_dictionary_id
+  properties:
+    pronunciation_dictionary_id:
+      type: string
+    version_id:
+      type: string
+
+SpeechResponse:
+  type: object
+  properties:
+    audio:
+      type: string
+      format: byte
+      description: Audio data (binary)
+    usage:
+      $ref: '#/SpeechUsage'
+    alignment:
+      $ref: '#/SpeechAlignment'
+    normalized_alignment:
+      $ref: '#/SpeechAlignment'
+    audio_base64:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+SpeechUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+    output_tokens:
+      type: integer
+    total_tokens:
+      type: integer
+
+SpeechAlignment:
+  type: object
+  properties:
+    char_start_times_ms:
+      type: array
+      items:
+        type: number
+    char_end_times_ms:
+      type: array
+      items:
+        type: number
+    characters:
+      type: array
+      items:
+        type: string
+
+SpeechStreamResponse:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [speech.audio.delta, speech.audio.done]
+    audio:
+      type: string
+      format: byte
+    usage:
+      $ref: '#/SpeechUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/text.yaml
+++ b/docs/openapi/schemas/inference/text.yaml
@@ -0,0 +1,98 @@
+# Text Completions API schemas
+
+TextCompletionRequest:
+  type: object
+  required:
+    - model
+    - prompt
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    prompt:
+      $ref: '#/TextCompletionInput'
+    fallbacks:
+      type: array
+      items:
+        type: string
+    stream:
+      type: boolean
+    best_of:
+      type: integer
+    echo:
+      type: boolean
+    frequency_penalty:
+      type: number
+    logit_bias:
+      type: object
+      additionalProperties:
+        type: number
+    logprobs:
+      type: integer
+    max_tokens:
+      type: integer
+    n:
+      type: integer
+    presence_penalty:
+      type: number
+    seed:
+      type: integer
+    stop:
+      type: array
+      items:
+        type: string
+    suffix:
+      type: string
+    temperature:
+      type: number
+    top_p:
+      type: number
+    user:
+      type: string
+
+TextCompletionInput:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        type: string
+  description: Prompt input - can be a string or array of strings
+
+TextCompletionResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    choices:
+      type: array
+      items:
+        $ref: './chat.yaml#/BifrostResponseChoice'
+    model:
+      type: string
+    object:
+      type: string
+    system_fingerprint:
+      type: string
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+TextCompletionStreamResponse:
+  type: object
+  description: Streaming text completion response
+  properties:
+    id:
+      type: string
+    choices:
+      type: array
+      items:
+        $ref: './chat.yaml#/BifrostResponseChoice'
+    model:
+      type: string
+    object:
+      type: string
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/transcription.yaml
+++ b/docs/openapi/schemas/inference/transcription.yaml
@@ -0,0 +1,150 @@
+# Transcription API schemas
+
+TranscriptionRequest:
+  type: object
+  required:
+    - model
+    - file
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    file:
+      type: string
+      format: binary
+      description: Audio file to transcribe
+    fallbacks:
+      type: array
+      items:
+        type: string
+    stream:
+      type: boolean
+    language:
+      type: string
+    prompt:
+      type: string
+    response_format:
+      type: string
+      enum: [json, text, srt, verbose_json, vtt]
+    file_format:
+      type: string
+
+TranscriptionResponse:
+  type: object
+  properties:
+    duration:
+      type: number
+    language:
+      type: string
+    logprobs:
+      type: array
+      items:
+        $ref: '#/TranscriptionLogProb'
+    segments:
+      type: array
+      items:
+        $ref: '#/TranscriptionSegment'
+    task:
+      type: string
+    text:
+      type: string
+    usage:
+      $ref: '#/TranscriptionUsage'
+    words:
+      type: array
+      items:
+        $ref: '#/TranscriptionWord'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+TranscriptionLogProb:
+  type: object
+  properties:
+    token:
+      type: string
+    logprob:
+      type: number
+    bytes:
+      type: array
+      items:
+        type: integer
+
+TranscriptionSegment:
+  type: object
+  properties:
+    id:
+      type: integer
+    seek:
+      type: integer
+    start:
+      type: number
+    end:
+      type: number
+    text:
+      type: string
+    tokens:
+      type: array
+      items:
+        type: integer
+    temperature:
+      type: number
+    avg_logprob:
+      type: number
+    compression_ratio:
+      type: number
+    no_speech_prob:
+      type: number
+
+TranscriptionWord:
+  type: object
+  properties:
+    word:
+      type: string
+    start:
+      type: number
+    end:
+      type: number
+
+TranscriptionUsage:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [tokens, duration]
+    input_tokens:
+      type: integer
+    input_token_details:
+      $ref: '#/TranscriptionUsageInputTokenDetails'
+    output_tokens:
+      type: integer
+    total_tokens:
+      type: integer
+    seconds:
+      type: integer
+
+TranscriptionUsageInputTokenDetails:
+  type: object
+  properties:
+    text_tokens:
+      type: integer
+    audio_tokens:
+      type: integer
+
+TranscriptionStreamResponse:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [transcript.text.delta, transcript.text.done]
+    delta:
+      type: string
+    logprobs:
+      type: array
+      items:
+        $ref: '#/TranscriptionLogProb'
+    text:
+      type: string
+    usage:
+      $ref: '#/TranscriptionUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/usage.yaml
+++ b/docs/openapi/schemas/inference/usage.yaml
@@ -0,0 +1,89 @@
+# Usage and cost related schemas
+
+BifrostLLMUsage:
+  type: object
+  description: Token usage information
+  properties:
+    prompt_tokens:
+      type: integer
+      description: >
+        Total input tokens including any prompt-cache tokens (read + write).
+        Subtract prompt_tokens_details.cached_read_tokens and
+        prompt_tokens_details.cached_write_tokens to get the non-cached portion.
+    prompt_tokens_details:
+      $ref: '#/ChatPromptTokensDetails'
+    completion_tokens:
+      type: integer
+      description: Number of output/completion tokens generated.
+    completion_tokens_details:
+      $ref: '#/ChatCompletionTokensDetails'
+    total_tokens:
+      type: integer
+    cost:
+      $ref: '#/BifrostCost'
+
+ChatPromptTokensDetails:
+  type: object
+  properties:
+    text_tokens:
+      type: integer
+    audio_tokens:
+      type: integer
+    image_tokens:
+      type: integer
+    cached_read_tokens:
+      type: integer
+      description: >
+        Tokens served from the prompt cache (cache hit). These tokens are already
+        included in prompt_tokens and are billed at the reduced cache-read rate.
+        Populated for all providers that support prompt caching (Anthropic, Bedrock,
+        OpenAI, Gemini, xAI, etc.).
+    cached_write_tokens:
+      type: integer
+      description: >
+        Tokens written to the prompt cache on this request (cache creation / write).
+        These tokens are already included in prompt_tokens and are billed at the
+        cache-creation rate. Populated for providers that separately report cache
+        write tokens (Anthropic, Bedrock).
+
+ChatCompletionTokensDetails:
+  type: object
+  properties:
+    text_tokens:
+      type: integer
+    accepted_prediction_tokens:
+      type: integer
+    audio_tokens:
+      type: integer
+    citation_tokens:
+      type: integer
+    num_search_queries:
+      type: integer
+    reasoning_tokens:
+      type: integer
+    image_tokens:
+      type: integer
+    rejected_prediction_tokens:
+      type: integer
+
+BifrostCost:
+  type: object
+  description: Cost breakdown for the request
+  properties:
+    input_tokens_cost:
+      type: number
+    output_tokens_cost:
+      type: number
+    reasoning_tokens_cost:
+      type: number
+      description: Cost for reasoning/thinking tokens (reasoning models)
+    citation_tokens_cost:
+      type: number
+      description: Cost for citation tokens
+    search_queries_cost:
+      type: number
+      description: Cost for web search queries
+    request_cost:
+      type: number
+    total_cost:
+      type: number
--- a/docs/openapi/schemas/inference/videos.yaml
+++ b/docs/openapi/schemas/inference/videos.yaml
@@ -0,0 +1,254 @@
+# Video Generation Schemas
+
+VideoGenerationRequest:
+  type: object
+  required:
+    - model
+    - prompt
+  properties:
+    model:
+      type: string
+      description: Model identifier in format `provider/model`
+    prompt:
+      type: string
+      description: Text prompt describing the video to generate
+    input_reference:
+      type: string
+      description: Optional reference image for image-to-video. OpenAI and Gemini require a base64 data URL (e.g., `data:image/png;base64,...`). Runway and Replicate accept both data URLs and plain URLs.
+    seconds:
+      type: string
+      description: Duration of the video in seconds as a string (e.g., "4")
+    size:
+      type: string
+      description: Resolution of the generated video (e.g., `1280x720`, `720x1280`, `1920x1080`)
+    negative_prompt:
+      type: string
+      description: Text describing what to avoid in the generated video
+    seed:
+      type: integer
+      description: Seed for reproducible generation
+    video_uri:
+      type: string
+      description: Source video URI for video-to-video generation (provider-specific, e.g. GCS URI)
+    audio:
+      type: boolean
+      description: Enable audio generation in the video (supported by select providers/models)
+    fallbacks:
+      type: array
+      items:
+        $ref: './common.yaml#/Fallback'
+      description: Fallback models to try if primary model fails
+
+VideoGenerationResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Provider-native job ID. To use in path parameters (retrieve/delete/download), combine as `{id}:{provider}` (e.g., `task_abc123:runway`)
+    object:
+      type: string
+      enum:
+        - "video"
+      description: Object type, always "video"
+    model:
+      type: string
+      description: Model used for video generation
+    status:
+      $ref: '#/VideoStatus'
+    progress:
+      type: number
+      format: float
+      minimum: 0
+      maximum: 100
+      description: Approximate completion percentage (0-100)
+    prompt:
+      type: string
+      description: Prompt used to generate the video
+    remixed_from_video_id:
+      type: string
+      description: Source video ID if this is a remix
+    seconds:
+      type: string
+      description: Duration of the generated video in seconds as a string (e.g., "4")
+    size:
+      $ref: '#/VideoSize'
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when the job was created
+    completed_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when the job completed
+    expires_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when downloadable assets expire
+    videos:
+      type: array
+      description: Generated video outputs (only present when status is "completed")
+      items:
+        type: object
+        properties:
+          type:
+            type: string
+            enum:
+              - "url"
+              - "base64"
+            description: Output format of this video
+          url:
+            type: string
+            format: uri
+            description: URL to the generated video (present when type is "url")
+          base64:
+            type: string
+            description: Base64-encoded video content (present when type is "base64")
+          content_type:
+            type: string
+            description: MIME type of the video (e.g., "video/mp4")
+    error:
+      $ref: '#/VideoError'
+    content_filter:
+      $ref: '#/VideoContentFilter'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+VideoRemixRequest:
+  type: object
+  required:
+    - prompt
+  properties:
+    prompt:
+      type: string
+      description: Text prompt describing how to remix the video
+
+VideoListResponse:
+  type: object
+  properties:
+    object:
+      type: string
+      enum:
+        - "list"
+      description: Object type, always "list"
+    data:
+      type: array
+      items:
+        $ref: '#/VideoObject'
+      description: Array of video generation jobs
+    first_id:
+      type: string
+      description: ID of the first item in the list
+    last_id:
+      type: string
+      description: ID of the last item in the list
+    has_more:
+      type: boolean
+      description: Whether there are more results available
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+VideoObject:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Provider-native video ID. To use in path parameters (retrieve/delete/download), combine as `{id}:{provider}` (e.g., `task_abc123:runway`)
+    object:
+      type: string
+      enum:
+        - "video"
+      description: Object type, always "video"
+    model:
+      type: string
+      description: Model used for generation
+    status:
+      $ref: '#/VideoStatus'
+    progress:
+      type: number
+      format: float
+      minimum: 0
+      maximum: 100
+      description: Approximate completion percentage (0-100)
+    prompt:
+      type: string
+      description: Prompt used to generate the video
+    remixed_from_video_id:
+      type: string
+      description: Source video ID if this is a remix
+    seconds:
+      type: string
+      description: Duration of the video in seconds as a string (e.g., "4")
+    size:
+      $ref: '#/VideoSize'
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when the job was created
+    completed_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when the job completed
+    expires_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when downloadable assets expire
+    error:
+      $ref: '#/VideoError'
+
+VideoDeleteResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: ID of the deleted video
+    object:
+      type: string
+      enum:
+        - "video.deleted"
+      description: Object type, always "video.deleted"
+    deleted:
+      type: boolean
+      description: Whether the video was successfully deleted
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+VideoStatus:
+  type: string
+  enum:
+    - "queued"
+    - "in_progress"
+    - "completed"
+    - "failed"
+  description: |
+    Current lifecycle status of the video generation job:
+    - `queued`: Job is waiting to be processed
+    - `in_progress`: Video is currently being generated
+    - `completed`: Video generation completed successfully
+    - `failed`: Video generation failed
+
+VideoSize:
+  type: string
+  description: Resolution of the generated video (e.g., "1920x1080")
+
+VideoError:
+  type: object
+  properties:
+    code:
+      type: string
+      description: Error code
+    message:
+      type: string
+      description: Human-readable error message
+
+VideoContentFilter:
+  type: object
+  description: Information about content that was filtered due to safety policies
+  properties:
+    filtered_count:
+      type: integer
+      description: Number of items filtered
+    reasons:
+      type: array
+      items:
+        type: string
+      description: Human-readable reasons for filtering
--- a/docs/openapi/schemas/integrations/anthropic/batch.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/batch.yaml
@@ -0,0 +1,105 @@
+# Anthropic Integration Batch API Schemas
+
+AnthropicBatchCreateRequest:
+  type: object
+  required:
+    - requests
+  properties:
+    requests:
+      type: array
+      items:
+        $ref: '#/AnthropicBatchRequestItem'
+      description: Array of batch request items
+
+AnthropicBatchRequestItem:
+  type: object
+  required:
+    - custom_id
+    - params
+  properties:
+    custom_id:
+      type: string
+      description: Unique identifier for this request
+    params:
+      type: object
+      description: Request parameters (same as AnthropicMessageRequest)
+
+AnthropicBatchCreateResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: message_batch
+    processing_status:
+      type: string
+      enum: [in_progress, ended, canceling]
+    request_counts:
+      $ref: '#/AnthropicBatchRequestCounts'
+    ended_at:
+      type: string
+      format: date-time
+      nullable: true
+    created_at:
+      type: string
+      format: date-time
+    expires_at:
+      type: string
+      format: date-time
+    archived_at:
+      type: string
+      format: date-time
+      nullable: true
+    cancel_initiated_at:
+      type: string
+      format: date-time
+      nullable: true
+    results_url:
+      type: string
+      nullable: true
+
+AnthropicBatchRequestCounts:
+  type: object
+  properties:
+    processing:
+      type: integer
+    succeeded:
+      type: integer
+    errored:
+      type: integer
+    canceled:
+      type: integer
+    expired:
+      type: integer
+
+AnthropicBatchListRequest:
+  type: object
+  properties:
+    page_size:
+      type: integer
+      default: 20
+    page_token:
+      type: string
+      description: Cursor for pagination
+
+AnthropicBatchListResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/AnthropicBatchCreateResponse'
+    has_more:
+      type: boolean
+    first_id:
+      type: string
+    last_id:
+      type: string
+
+AnthropicBatchRetrieveResponse:
+  $ref: '#/AnthropicBatchCreateResponse'
+
+AnthropicBatchCancelResponse:
+  $ref: '#/AnthropicBatchCreateResponse'
+
--- a/docs/openapi/schemas/integrations/anthropic/common.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/common.yaml
@@ -0,0 +1,53 @@
+# Anthropic Integration Common Types
+
+AnthropicError:
+  type: object
+  properties:
+    type:
+      type: string
+      default: error
+    error:
+      type: object
+      properties:
+        type:
+          type: string
+          description: Error type (e.g., invalid_request_error, api_error)
+        message:
+          type: string
+          description: Error message
+
+AnthropicModel:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Model identifier
+    type:
+      type: string
+      default: model
+    display_name:
+      type: string
+    created_at:
+      type: string
+      format: date-time
+
+AnthropicListModelsResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/AnthropicModel'
+    has_more:
+      type: boolean
+    first_id:
+      type: string
+    last_id:
+      type: string
+
+# Anthropic Message Roles
+AnthropicMessageRole:
+  type: string
+  enum:
+    - user
+    - assistant
--- a/docs/openapi/schemas/integrations/anthropic/count-tokens.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/count-tokens.yaml
@@ -0,0 +1,13 @@
+# Anthropic Integration Count Tokens Schemas
+
+AnthropicCountTokensRequest:
+  # Uses the same format as AnthropicMessageRequest
+  allOf:
+    - $ref: './messages.yaml#/AnthropicMessageRequest'
+
+AnthropicCountTokensResponse:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of input tokens
--- a/docs/openapi/schemas/integrations/anthropic/files.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/files.yaml
@@ -0,0 +1,102 @@
+# Anthropic Integration Files API Schemas
+
+AnthropicFileUploadRequest:
+  type: object
+  required:
+    - file
+  properties:
+    file:
+      type: string
+      format: binary
+      description: File to upload (raw file content)
+    filename:
+      type: string
+      description: Original filename
+    purpose:
+      type: string
+      description: Purpose of the file (e.g., "batch")
+
+AnthropicFileUploadResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: file
+    filename:
+      type: string
+    mime_type:
+      type: string
+      description: MIME type of the file
+    size_bytes:
+      type: integer
+      description: Size of the file in bytes
+    created_at:
+      type: string
+      format: date-time
+    downloadable:
+      type: boolean
+
+AnthropicFileListRequest:
+  type: object
+  properties:
+    limit:
+      type: integer
+      default: 30
+    after:
+      type: string
+      description: Cursor for pagination (after_id)
+    order:
+      type: string
+      enum: [asc, desc]
+
+AnthropicFileListResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/AnthropicFileUploadResponse'
+    has_more:
+      type: boolean
+    first_id:
+      type: string
+    last_id:
+      type: string
+
+AnthropicFileRetrieveRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+
+AnthropicFileRetrieveResponse:
+  $ref: '#/AnthropicFileUploadResponse'
+
+AnthropicFileDeleteRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+
+AnthropicFileDeleteResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: file_deleted
+
+AnthropicFileContentRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
--- a/docs/openapi/schemas/integrations/anthropic/messages.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/messages.yaml
@@ -0,0 +1,403 @@
+# Anthropic Integration Messages API Schemas
+
+AnthropicMessageRequest:
+  type: object
+  required:
+    - model
+    - max_tokens
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model identifier (e.g., claude-3-opus-20240229)
+      example: claude-3-opus-20240229
+    max_tokens:
+      type: integer
+      description: Maximum tokens to generate
+    messages:
+      type: array
+      items:
+        $ref: '#/AnthropicMessage'
+      description: List of messages in the conversation
+    system:
+      $ref: '#/AnthropicContent'
+      description: System prompt
+    cache_control:
+      $ref: '../../inference/common.yaml#/CacheControl'
+      description: Automatic caching directives for the whole request
+    metadata:
+      $ref: '#/AnthropicMetadata'
+    stream:
+      type: boolean
+      description: Whether to stream the response
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    top_p:
+      type: number
+    top_k:
+      type: integer
+    stop_sequences:
+      type: array
+      items:
+        type: string
+    tools:
+      type: array
+      items:
+        $ref: '#/AnthropicTool'
+    tool_choice:
+      $ref: '#/AnthropicToolChoice'
+    mcp_servers:
+      type: array
+      items:
+        $ref: '#/AnthropicMCPServer'
+      description: MCP servers configuration (requires beta header)
+    thinking:
+      $ref: '#/AnthropicThinking'
+    output_format:
+      type: object
+      description: Structured output format (requires beta header)
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+AnthropicMessage:
+  type: object
+  required:
+    - role
+    - content
+  properties:
+    role:
+      $ref: './common.yaml#/AnthropicMessageRole'
+    content:
+      $ref: '#/AnthropicContent'
+
+AnthropicContent:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/AnthropicContentBlock'
+  description: Content - can be a string or array of content blocks
+
+AnthropicContentBlock:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum:
+        - text
+        - image
+        - document
+        - tool_use
+        - server_tool_use
+        - tool_result
+        - web_search_result
+        - mcp_tool_use
+        - mcp_tool_result
+        - thinking
+        - redacted_thinking
+    text:
+      type: string
+      description: For text content
+    thinking:
+      type: string
+      description: For thinking content
+    signature:
+      type: string
+      description: For signature content
+    data:
+      type: string
+      description: For data content (encrypted data for redacted thinking)
+    tool_use_id:
+      type: string
+      description: For tool_result content
+    id:
+      type: string
+      description: For tool_use content
+    name:
+      type: string
+      description: For tool_use content
+    input:
+      type: object
+      description: For tool_use content
+    server_name:
+      type: string
+      description: For mcp_tool_use content
+    content:
+      $ref: '#/AnthropicContent'
+      description: For tool_result content
+    source:
+      $ref: '#/AnthropicSource'
+      description: For image/document content
+    cache_control:
+      $ref: '../../inference/common.yaml#/CacheControl'
+    citations:
+      $ref: '#/AnthropicCitationsConfig'
+      description: For document content
+    context:
+      type: string
+      description: For document content
+    title:
+      type: string
+      description: For document content
+
+AnthropicSource:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [base64, url, text, content_block]
+    media_type:
+      type: string
+      description: MIME type (e.g., image/jpeg, application/pdf)
+    data:
+      type: string
+      description: Base64-encoded data (for base64 type)
+    url:
+      type: string
+      description: URL (for url type)
+
+AnthropicCitationsConfig:
+  type: object
+  properties:
+    enabled:
+      type: boolean
+
+AnthropicMetadata:
+  type: object
+  properties:
+    user_id:
+      type: string
+
+AnthropicThinking:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [enabled, disabled]
+    budget_tokens:
+      type: integer
+
+AnthropicTool:
+  type: object
+  properties:
+    type:
+      type: string
+      enum:
+        - custom
+        - bash_20250124
+        - computer_20250124
+        - computer_20251124
+        - code_execution_20250522
+        - code_execution_20250825
+        - code_execution_20260120
+        - text_editor_20250124
+        - text_editor_20250429
+        - text_editor_20250728
+        - web_search_20250305
+        - web_search_20260209
+        - web_fetch_20250910
+        - web_fetch_20260209
+        - web_fetch_20260309
+        - memory_20250818
+        - tool_search_tool_bm25
+        - tool_search_tool_bm25_20251119
+        - tool_search_tool_regex
+        - tool_search_tool_regex_20251119
+    name:
+      type: string
+      description: Tool name (for custom tools)
+    description:
+      type: string
+    input_schema:
+      type: object
+      description: JSON Schema for tool input
+    cache_control:
+      $ref: '../../inference/common.yaml#/CacheControl'
+    # Computer use tool settings
+    display_width_px:
+      type: integer
+    display_height_px:
+      type: integer
+    display_number:
+      type: integer
+    enable_zoom:
+      type: boolean
+    # Web search settings
+    max_uses:
+      type: integer
+    allowed_domains:
+      type: array
+      items:
+        type: string
+    blocked_domains:
+      type: array
+      items:
+        type: string
+    user_location:
+      $ref: '#/AnthropicToolWebSearchUserLocation'
+
+AnthropicToolWebSearchUserLocation:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [approximate]
+    city:
+      type: string
+    country:
+      type: string
+    timezone:
+      type: string
+
+AnthropicToolChoice:
+  oneOf:
+    - type: object
+      properties:
+        type:
+          type: string
+          enum: [auto, any, tool, none]
+        name:
+          type: string
+          description: Required when type is 'tool'
+        disable_parallel_tool_use:
+          type: boolean
+
+AnthropicMCPServer:
+  type: object
+  properties:
+    type:
+      type: string
+    name:
+      type: string
+    url:
+      type: string
+    authorization_token:
+      type: string
+      description: Authorization token for the MCP server
+    tool_configuration:
+      $ref: '#/AnthropicMCPToolConfig'
+
+AnthropicMCPToolConfig:
+  type: object
+  properties:
+    enabled:
+      type: boolean
+    allowed_tools:
+      type: array
+      items:
+        type: string
+
+# Response types
+AnthropicMessageResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: message
+    role:
+      type: string
+      default: assistant
+    content:
+      type: array
+      items:
+        $ref: '#/AnthropicContentBlock'
+    model:
+      type: string
+    stop_reason:
+      type: string
+      enum: [end_turn, max_tokens, stop_sequence, tool_use, pause_turn, refusal, model_context_window_exceeded, null]
+    stop_sequence:
+      type: string
+      nullable: true
+    usage:
+      $ref: '#/AnthropicUsage'
+
+AnthropicUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+    output_tokens:
+      type: integer
+    cache_creation_input_tokens:
+      type: integer
+    cache_read_input_tokens:
+      type: integer
+    cache_creation:
+      $ref: '#/AnthropicUsageCacheCreation'
+
+AnthropicUsageCacheCreation:
+  type: object
+  properties:
+    ephemeral_5m_input_tokens:
+      type: integer
+    ephemeral_1h_input_tokens:
+      type: integer
+
+# Stream event types
+AnthropicStreamEvent:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      enum:
+        - message_start
+        - content_block_start
+        - content_block_delta
+        - content_block_stop
+        - message_delta
+        - message_stop
+        - ping
+        - error
+    message:
+      $ref: '#/AnthropicMessageResponse'
+    index:
+      type: integer
+    content_block:
+      $ref: '#/AnthropicContentBlock'
+    delta:
+      $ref: '#/AnthropicStreamDelta'
+    usage:
+      $ref: '#/AnthropicUsage'
+    error:
+      $ref: '#/AnthropicStreamError'
+
+AnthropicStreamDelta:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text_delta, input_json_delta, thinking_delta, signature_delta]
+    text:
+      type: string
+    partial_json:
+      type: string
+    thinking:
+      type: string
+    signature:
+      type: string
+    stop_reason:
+      type: string
+    stop_sequence:
+      type: string
+
+AnthropicStreamError:
+  type: object
+  properties:
+    type:
+      type: string
+    message:
+      type: string
--- a/docs/openapi/schemas/integrations/anthropic/text.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/text.yaml
@@ -0,0 +1,62 @@
+# Anthropic Integration Text Completions Schemas (Legacy Complete API)
+
+AnthropicTextRequest:
+  type: object
+  required:
+    - model
+    - prompt
+    - max_tokens_to_sample
+  properties:
+    model:
+      type: string
+      description: Model identifier
+    prompt:
+      type: string
+      description: The prompt to complete
+    max_tokens_to_sample:
+      type: integer
+      description: Maximum tokens to generate
+    stream:
+      type: boolean
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    top_p:
+      type: number
+    top_k:
+      type: integer
+    stop_sequences:
+      type: array
+      items:
+        type: string
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+AnthropicTextResponse:
+  type: object
+  properties:
+    type:
+      type: string
+      default: completion
+    id:
+      type: string
+    completion:
+      type: string
+    stop_reason:
+      type: string
+      enum: [stop_sequence, max_tokens, null]
+    model:
+      type: string
+    usage:
+      type: object
+      properties:
+        input_tokens:
+          type: integer
+          description: Number of input tokens used
+        output_tokens:
+          type: integer
+          description: Number of output tokens generated
--- a/docs/openapi/schemas/integrations/bedrock/batch.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/batch.yaml
@@ -0,0 +1,153 @@
+# AWS Bedrock Batch API Schemas
+
+BedrockBatchJobRequest:
+  type: object
+  required:
+    - roleArn
+    - inputDataConfig
+    - outputDataConfig
+  properties:
+    modelId:
+      type: string
+      description: Model ID for the batch job (optional, can be specified in request)
+    jobName:
+      type: string
+      description: Name for the batch job
+    roleArn:
+      type: string
+      description: IAM role ARN for the job
+    inputDataConfig:
+      type: object
+      properties:
+        s3InputDataConfig:
+          type: object
+          properties:
+            s3Uri:
+              type: string
+              description: S3 URI for input data
+    outputDataConfig:
+      type: object
+      properties:
+        s3OutputDataConfig:
+          type: object
+          properties:
+            s3Uri:
+              type: string
+              description: S3 URI for output data
+    timeoutDurationInHours:
+      type: integer
+      description: Timeout in hours
+    tags:
+      type: array
+      items:
+        type: object
+        properties:
+          key:
+            type: string
+          value:
+            type: string
+
+BedrockBatchJobResponse:
+  type: object
+  properties:
+    jobArn:
+      type: string
+    status:
+      type: string
+      enum: [Submitted, InProgress, Completed, Failed, Stopping, Stopped, PartiallyCompleted, Expired, Validating, Scheduled]
+    jobName:
+      type: string
+    modelId:
+      type: string
+    roleArn:
+      type: string
+    inputDataConfig:
+      type: object
+    outputDataConfig:
+      type: object
+    vpcConfig:
+      type: object
+      properties:
+        securityGroupIds:
+          type: array
+          items:
+            type: string
+        subnetIds:
+          type: array
+          items:
+            type: string
+    submitTime:
+      type: string
+      format: date-time
+    lastModifiedTime:
+      type: string
+      format: date-time
+    endTime:
+      type: string
+      format: date-time
+    message:
+      type: string
+    clientRequestToken:
+      type: string
+    jobExpirationTime:
+      type: string
+      format: date-time
+    timeoutDurationInHours:
+      type: integer
+
+BedrockBatchListRequest:
+  type: object
+  properties:
+    maxResults:
+      type: integer
+    nextToken:
+      type: string
+    statusEquals:
+      type: string
+    nameContains:
+      type: string
+
+BedrockBatchListResponse:
+  type: object
+  properties:
+    invocationJobSummaries:
+      type: array
+      items:
+        $ref: '#/BedrockBatchJobSummary'
+    nextToken:
+      type: string
+
+BedrockBatchJobSummary:
+  type: object
+  properties:
+    jobArn:
+      type: string
+    jobName:
+      type: string
+    modelId:
+      type: string
+    status:
+      type: string
+    submitTime:
+      type: string
+      format: date-time
+    lastModifiedTime:
+      type: string
+      format: date-time
+    endTime:
+      type: string
+      format: date-time
+    message:
+      type: string
+
+BedrockBatchRetrieveResponse:
+  description: Uses same structure as BedrockBatchJobResponse
+  $ref: '#/BedrockBatchJobResponse'
+
+BedrockBatchCancelResponse:
+  type: object
+  properties:
+    jobArn:
+      type: string
+    status:
+      type: string
--- a/docs/openapi/schemas/integrations/bedrock/common.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/common.yaml
@@ -0,0 +1,15 @@
+# AWS Bedrock Integration Common Types
+
+BedrockError:
+  type: object
+  properties:
+    message:
+      type: string
+    type:
+      type: string
+
+BedrockMessageRole:
+  type: string
+  enum:
+    - user
+    - assistant
--- a/docs/openapi/schemas/integrations/bedrock/converse.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/converse.yaml
@@ -0,0 +1,367 @@
+# AWS Bedrock Converse API Schemas
+
+BedrockConverseRequest:
+  type: object
+  properties:
+    messages:
+      type: array
+      items:
+        $ref: '#/BedrockMessage'
+      description: Array of messages for the conversation
+    system:
+      type: array
+      items:
+        $ref: '#/BedrockSystemMessage'
+      description: System messages/prompts
+    inferenceConfig:
+      $ref: '#/BedrockInferenceConfig'
+    toolConfig:
+      $ref: '#/BedrockToolConfig'
+    guardrailConfig:
+      $ref: '#/BedrockGuardrailConfig'
+    additionalModelRequestFields:
+      type: object
+      description: Model-specific parameters
+    additionalModelResponseFieldPaths:
+      type: array
+      items:
+        type: string
+    performanceConfig:
+      $ref: '#/BedrockPerformanceConfig'
+    promptVariables:
+      type: object
+      additionalProperties:
+        $ref: '#/BedrockPromptVariable'
+    requestMetadata:
+      type: object
+      additionalProperties:
+        type: string
+    serviceTier:
+      $ref: '#/BedrockServiceTier'
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+BedrockMessage:
+  type: object
+  required:
+    - role
+    - content
+  properties:
+    role:
+      $ref: './common.yaml#/BedrockMessageRole'
+    content:
+      type: array
+      items:
+        $ref: '#/BedrockContentBlock'
+
+BedrockSystemMessage:
+  type: object
+  properties:
+    text:
+      type: string
+    guardContent:
+      $ref: '#/BedrockGuardContent'
+    cachePoint:
+      $ref: '#/BedrockCachePoint'
+
+BedrockContentBlock:
+  type: object
+  properties:
+    text:
+      type: string
+    image:
+      $ref: '#/BedrockImageSource'
+    document:
+      $ref: '#/BedrockDocumentSource'
+    toolUse:
+      $ref: '#/BedrockToolUse'
+    toolResult:
+      $ref: '#/BedrockToolResult'
+    guardContent:
+      $ref: '#/BedrockGuardContent'
+    reasoningContent:
+      $ref: '#/BedrockReasoningContent'
+    json:
+      type: object
+      description: JSON content for tool call results
+    cachePoint:
+      $ref: '#/BedrockCachePoint'
+
+BedrockImageSource:
+  type: object
+  properties:
+    format:
+      type: string
+      enum: [jpeg, png, gif, webp]
+    source:
+      type: object
+      properties:
+        bytes:
+          type: string
+          format: byte
+
+BedrockDocumentSource:
+  type: object
+  properties:
+    format:
+      type: string
+      enum: [pdf, csv, doc, docx, xls, xlsx, html, txt, md]
+    name:
+      type: string
+    source:
+      type: object
+      properties:
+        bytes:
+          type: string
+          format: byte
+        text:
+          type: string
+          description: Plain text content (for text-based documents)
+
+BedrockToolUse:
+  type: object
+  properties:
+    toolUseId:
+      type: string
+    name:
+      type: string
+    input:
+      type: object
+
+BedrockToolResult:
+  type: object
+  properties:
+    toolUseId:
+      type: string
+    content:
+      type: array
+      items:
+        $ref: '#/BedrockContentBlock'
+    status:
+      type: string
+      enum: [success, error]
+
+BedrockGuardContent:
+  type: object
+  properties:
+    text:
+      type: object
+      properties:
+        text:
+          type: string
+        qualifiers:
+          type: array
+          items:
+            type: string
+
+BedrockReasoningContent:
+  type: object
+  properties:
+    reasoningText:
+      type: object
+      properties:
+        text:
+          type: string
+        signature:
+          type: string
+
+BedrockCachePoint:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [default]
+
+BedrockInferenceConfig:
+  type: object
+  properties:
+    maxTokens:
+      type: integer
+    temperature:
+      type: number
+    topP:
+      type: number
+    stopSequences:
+      type: array
+      items:
+        type: string
+
+BedrockToolConfig:
+  type: object
+  properties:
+    tools:
+      type: array
+      items:
+        $ref: '#/BedrockTool'
+    toolChoice:
+      $ref: '#/BedrockToolChoice'
+
+BedrockTool:
+  type: object
+  properties:
+    toolSpec:
+      type: object
+      properties:
+        name:
+          type: string
+        description:
+          type: string
+        inputSchema:
+          type: object
+          properties:
+            json:
+              type: object
+    cachePoint:
+      $ref: '#/BedrockCachePoint'
+
+BedrockToolChoice:
+  type: object
+  properties:
+    auto:
+      type: object
+    any:
+      type: object
+    tool:
+      type: object
+      properties:
+        name:
+          type: string
+
+BedrockGuardrailConfig:
+  type: object
+  properties:
+    guardrailIdentifier:
+      type: string
+    guardrailVersion:
+      type: string
+    trace:
+      type: string
+      enum: [enabled, disabled]
+
+BedrockPerformanceConfig:
+  type: object
+  properties:
+    latency:
+      type: string
+      enum: [standard, optimized]
+
+BedrockPromptVariable:
+  type: object
+  properties:
+    text:
+      type: string
+
+BedrockServiceTier:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [reserved, priority, default, flex]
+
+# Response types
+BedrockConverseResponse:
+  type: object
+  properties:
+    output:
+      type: object
+      properties:
+        message:
+          $ref: '#/BedrockMessage'
+    stopReason:
+      type: string
+      enum: [end_turn, tool_use, max_tokens, stop_sequence, guardrail_intervened, content_filtered]
+    usage:
+      $ref: '#/BedrockUsage'
+    metrics:
+      type: object
+      properties:
+        latencyMs:
+          type: integer
+    additionalModelResponseFields:
+      type: object
+    trace:
+      type: object
+    performanceConfig:
+      $ref: '#/BedrockPerformanceConfig'
+    serviceTier:
+      $ref: '#/BedrockServiceTier'
+
+BedrockUsage:
+  type: object
+  properties:
+    inputTokens:
+      type: integer
+    outputTokens:
+      type: integer
+    totalTokens:
+      type: integer
+    cacheReadInputTokens:
+      type: integer
+    cacheWriteInputTokens:
+      type: integer
+
+# Stream event types
+BedrockStreamEvent:
+  type: object
+  description: Flat structure for streaming events matching actual Bedrock API response
+  properties:
+    role:
+      type: string
+      description: For messageStart events
+    contentBlockIndex:
+      type: integer
+      description: For content block events
+    delta:
+      $ref: '#/BedrockContentBlockDelta'
+    stopReason:
+      type: string
+      description: For messageStop events
+    start:
+      $ref: '#/BedrockContentBlockStart'
+    usage:
+      $ref: '#/BedrockUsage'
+    metrics:
+      type: object
+      properties:
+        latencyMs:
+          type: integer
+    trace:
+      type: object
+    additionalModelResponseFields:
+      type: object
+    invokeModelRawChunk:
+      type: string
+      format: byte
+      description: Raw bytes for legacy invoke stream
+
+BedrockContentBlockDelta:
+  type: object
+  properties:
+    text:
+      type: string
+    reasoningContent:
+      type: object
+      properties:
+        text:
+          type: string
+        signature:
+          type: string
+    toolUse:
+      type: object
+      properties:
+        input:
+          type: string
+
+BedrockContentBlockStart:
+  type: object
+  properties:
+    toolUse:
+      type: object
+      properties:
+        toolUseId:
+          type: string
+        name:
+          type: string
--- a/docs/openapi/schemas/integrations/bedrock/invoke.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/invoke.yaml
@@ -0,0 +1,50 @@
+# AWS Bedrock Invoke API Schemas (Legacy/Raw Model Invocation)
+
+BedrockInvokeRequest:
+  type: object
+  description: |
+    Raw model invocation request. The body format depends on the model provider.
+    For Anthropic models, use Anthropic format. For other models, use their native format.
+  properties:
+    prompt:
+      type: string
+      description: Text prompt to complete
+    max_tokens:
+      type: integer
+    max_tokens_to_sample:
+      type: integer
+      description: Anthropic-style max tokens
+    temperature:
+      type: number
+    top_p:
+      type: number
+    top_k:
+      type: integer
+    stop:
+      type: array
+      items:
+        type: string
+    stop_sequences:
+      type: array
+      items:
+        type: string
+      description: Anthropic-style stop sequences
+    messages:
+      type: array
+      items:
+        type: object
+      description: For Claude 3 models
+    system:
+      description: System prompt (string or array of strings)
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+    anthropic_version:
+      type: string
+
+BedrockInvokeResponse:
+  type: object
+  description: Raw model response. Format depends on the model provider.
+  additionalProperties: true
--- a/docs/openapi/schemas/integrations/cohere/chat.yaml
+++ b/docs/openapi/schemas/integrations/cohere/chat.yaml
@@ -0,0 +1,364 @@
+# Cohere v2 Chat API Schemas
+
+CohereChatRequest:
+  type: object
+  required:
+    - model
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model to use for chat completion
+      example: command-r-plus
+    messages:
+      type: array
+      items:
+        $ref: '#/CohereMessage'
+      description: Array of message objects
+    tools:
+      type: array
+      items:
+        $ref: '#/CohereTool'
+    tool_choice:
+      $ref: '#/CohereToolChoice'
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    p:
+      type: number
+      description: Top-p sampling
+    k:
+      type: integer
+      description: Top-k sampling
+    max_tokens:
+      type: integer
+    stop_sequences:
+      type: array
+      items:
+        type: string
+    frequency_penalty:
+      type: number
+    presence_penalty:
+      type: number
+    stream:
+      type: boolean
+    safety_mode:
+      type: string
+      enum: [CONTEXTUAL, STRICT, NONE]
+    log_probs:
+      type: boolean
+    strict_tool_choice:
+      type: boolean
+    thinking:
+      $ref: '#/CohereThinking'
+    response_format:
+      $ref: '#/CohereResponseFormat'
+
+CohereMessage:
+  type: object
+  required:
+    - role
+  properties:
+    role:
+      type: string
+      enum: [system, user, assistant, tool]
+    content:
+      $ref: '#/CohereMessageContent'
+    tool_calls:
+      type: array
+      items:
+        $ref: '#/CohereToolCall'
+    tool_call_id:
+      type: string
+    tool_plan:
+      type: string
+      description: Chain-of-thought style reflection (assistant only)
+
+CohereMessageContent:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/CohereContentBlock'
+  description: Message content - can be a string or array of content blocks
+
+CohereContentBlock:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [text, image_url, thinking, document]
+    text:
+      type: string
+    image_url:
+      type: object
+      properties:
+        url:
+          type: string
+    thinking:
+      type: string
+    document:
+      type: object
+      properties:
+        data:
+          type: object
+        id:
+          type: string
+
+CohereTool:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [function]
+    function:
+      type: object
+      properties:
+        name:
+          type: string
+        description:
+          type: string
+        parameters:
+          type: object
+
+CohereToolChoice:
+  type: string
+  enum: [AUTO, NONE, REQUIRED]
+  description: Tool choice mode - AUTO lets the model decide, NONE disables tools, REQUIRED forces tool use
+
+CohereToolCall:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      enum: [function]
+    function:
+      type: object
+      properties:
+        name:
+          type: string
+        arguments:
+          type: string
+
+CohereThinking:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [enabled, disabled]
+    token_budget:
+      type: integer
+      minimum: 1
+
+CohereResponseFormat:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text, json_object]
+      description: Response format type
+    schema:
+      type: object
+      description: JSON schema for structured output (used with json_object type)
+
+# Response types
+CohereChatResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    finish_reason:
+      type: string
+      enum: [COMPLETE, STOP_SEQUENCE, MAX_TOKENS, TOOL_CALL, ERROR, TIMEOUT]
+    message:
+      type: object
+      properties:
+        role:
+          type: string
+        content:
+          type: array
+          items:
+            $ref: '#/CohereContentBlock'
+        tool_calls:
+          type: array
+          items:
+            $ref: '#/CohereToolCall'
+        tool_plan:
+          type: string
+    usage:
+      $ref: '#/CohereUsage'
+    logprobs:
+      type: array
+      items:
+        $ref: '#/CohereLogProb'
+      description: Log probabilities (if requested)
+
+CohereUsage:
+  type: object
+  properties:
+    billed_units:
+      $ref: '#/CohereBilledUnits'
+    tokens:
+      $ref: '#/CohereTokenUsage'
+    cached_tokens:
+      type: integer
+      description: Cached tokens
+
+CohereBilledUnits:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of billed input tokens
+    output_tokens:
+      type: integer
+      description: Number of billed output tokens
+    search_units:
+      type: integer
+      description: Number of billed search units
+    classifications:
+      type: integer
+      description: Number of billed classification units
+
+CohereTokenUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of input tokens used
+    output_tokens:
+      type: integer
+      description: Number of output tokens produced
+
+CohereLogProb:
+  type: object
+  properties:
+    token_ids:
+      type: array
+      items:
+        type: integer
+      description: Token IDs of each token in text chunk
+    text:
+      type: string
+      description: Text chunk for log probabilities
+    logprobs:
+      type: array
+      items:
+        type: number
+      description: Log probability of each token
+
+# Stream event types
+CohereChatStreamEvent:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [message-start, content-start, content-delta, content-end, tool-plan-delta, tool-call-start, tool-call-delta, tool-call-end, citation-start, citation-end, message-end, debug]
+      description: Type of streaming event
+    id:
+      type: string
+      description: Event ID (for message-start)
+    index:
+      type: integer
+      description: Index for indexed events
+    delta:
+      $ref: '#/CohereStreamDelta'
+
+CohereStreamDelta:
+  type: object
+  properties:
+    message:
+      $ref: '#/CohereStreamMessage'
+    finish_reason:
+      type: string
+      enum: [COMPLETE, STOP_SEQUENCE, MAX_TOKENS, TOOL_CALL, ERROR, TIMEOUT]
+    usage:
+      $ref: '#/CohereUsage'
+
+CohereStreamMessage:
+  type: object
+  properties:
+    role:
+      type: string
+      description: Message role (for message-start)
+    content:
+      oneOf:
+        - $ref: '#/CohereStreamContent'
+        - type: array
+          items:
+            $ref: '#/CohereStreamContent'
+      description: Content for content events
+    tool_plan:
+      type: string
+      description: Tool plan content (for tool-plan-delta)
+    tool_calls:
+      oneOf:
+        - $ref: '#/CohereToolCall'
+        - type: array
+          items:
+            $ref: '#/CohereToolCall'
+      description: Tool calls (for tool-call events)
+    citations:
+      oneOf:
+        - $ref: '#/CohereCitation'
+        - type: array
+          items:
+            $ref: '#/CohereCitation'
+      description: Citations (for citation events)
+
+CohereStreamContent:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text, image_url, thinking, document]
+    text:
+      type: string
+    thinking:
+      type: string
+
+CohereCitation:
+  type: object
+  properties:
+    start:
+      type: integer
+      description: Start position of cited text
+    end:
+      type: integer
+      description: End position of cited text
+    text:
+      type: string
+      description: Cited text
+    sources:
+      type: array
+      items:
+        $ref: '#/CohereSource'
+    content_index:
+      type: integer
+      description: Content index of the citation
+    type:
+      type: string
+      enum: [TEXT_CONTENT, THINKING_CONTENT, PLAN]
+      description: Type of citation
+
+CohereSource:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [tool, document]
+      description: Source type
+    id:
+      type: string
+      description: Source ID (nullable)
+    tool_output:
+      type: object
+      description: Tool output (for tool sources)
+    document:
+      type: object
+      description: Document data (for document sources)
--- a/docs/openapi/schemas/integrations/cohere/common.yaml
+++ b/docs/openapi/schemas/integrations/cohere/common.yaml
@@ -0,0 +1,14 @@
+# Cohere Integration Common Types
+
+CohereError:
+  type: object
+  properties:
+    type:
+      type: string
+      description: Error type
+    message:
+      type: string
+      description: Error message
+    code:
+      type: string
+      description: Optional error code
--- a/docs/openapi/schemas/integrations/cohere/embed.yaml
+++ b/docs/openapi/schemas/integrations/cohere/embed.yaml
@@ -0,0 +1,172 @@
+# Cohere v2 Embed API Schemas
+
+CohereEmbeddingRequest:
+  type: object
+  required:
+    - model
+    - input_type
+  properties:
+    model:
+      type: string
+      description: ID of an available embedding model
+      example: embed-english-v3.0
+    input_type:
+      type: string
+      description: Specifies the type of input passed to the model. Required for embedding models v3 and higher.
+    texts:
+      type: array
+      items:
+        type: string
+      description: Array of strings to embed. Maximum 96 texts per call. At least one of texts, images, or inputs is required.
+      maxItems: 96
+    images:
+      type: array
+      items:
+        type: string
+      description: Array of image data URIs for multimodal embedding. Maximum 1 image per call. Supports JPEG, PNG, WebP, GIF up to 5MB.
+      maxItems: 1
+    inputs:
+      type: array
+      items:
+        $ref: '#/CohereEmbeddingInput'
+      description: Array of mixed text/image components for embedding. Maximum 96 per call.
+      maxItems: 96
+    embedding_types:
+      type: array
+      items:
+        type: string
+      description: Specifies the return format types (float, int8, uint8, binary, ubinary, base64). Defaults to float if unspecified.
+    output_dimension:
+      type: integer
+      description: Number of dimensions for output embeddings (256, 512, 1024, 1536). Available only for embed-v4 and newer models.
+    max_tokens:
+      type: integer
+      description: Maximum tokens to embed per input before truncation.
+    truncate:
+      type: string
+      description: Handling for inputs exceeding token limits. Defaults to END.
+
+CohereEmbeddingInput:
+  type: object
+  properties:
+    content:
+      type: array
+      items:
+        $ref: './chat.yaml#/CohereContentBlock'
+      description: Array of content blocks (reuses chat content blocks)
+
+CohereEmbeddingResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Response ID
+    embeddings:
+      $ref: '#/CohereEmbeddingData'
+    response_type:
+      type: string
+      description: Response type (embeddings_floats, embeddings_by_type)
+    texts:
+      type: array
+      items:
+        type: string
+      description: Original text entries
+    images:
+      type: array
+      items:
+        $ref: '#/CohereEmbeddingImageInfo'
+      description: Original image entries
+    meta:
+      $ref: '#/CohereEmbeddingMeta'
+
+CohereEmbeddingData:
+  type: object
+  description: Embedding data object with different types
+  properties:
+    float:
+      type: array
+      items:
+        type: array
+        items:
+          type: number
+      description: Float embeddings
+    int8:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Int8 embeddings
+    uint8:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Uint8 embeddings
+    binary:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Binary embeddings
+    ubinary:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Unsigned binary embeddings
+    base64:
+      type: array
+      items:
+        type: string
+      description: Base64-encoded embeddings
+
+CohereEmbeddingImageInfo:
+  type: object
+  description: Image information in the response
+  properties:
+    width:
+      type: integer
+      description: Width in pixels
+    height:
+      type: integer
+      description: Height in pixels
+    format:
+      type: string
+      description: Image format
+    bit_depth:
+      type: integer
+      description: Bit depth
+
+CohereEmbeddingMeta:
+  type: object
+  description: Metadata in embedding response
+  properties:
+    api_version:
+      $ref: '#/CohereEmbeddingAPIVersion'
+    billed_units:
+      $ref: './chat.yaml#/CohereBilledUnits'
+    tokens:
+      $ref: './chat.yaml#/CohereTokenUsage'
+    warnings:
+      type: array
+      items:
+        type: string
+      description: Any warnings
+
+CohereEmbeddingAPIVersion:
+  type: object
+  description: API version information
+  properties:
+    version:
+      type: string
+      description: API version
+    is_deprecated:
+      type: boolean
+      description: Deprecation status
+    is_experimental:
+      type: boolean
+      description: Experimental status
--- a/docs/openapi/schemas/integrations/cohere/tokenize.yaml
+++ b/docs/openapi/schemas/integrations/cohere/tokenize.yaml
@@ -0,0 +1,48 @@
+# Cohere Tokenize API Schemas
+
+CohereCountTokensRequest:
+  type: object
+  required:
+    - text
+    - model
+  properties:
+    model:
+      type: string
+      description: Model whose tokenizer should be used
+      example: command-r-plus
+    text:
+      type: string
+      description: Text to tokenize (1-65536 characters)
+      minLength: 1
+      maxLength: 65536
+
+CohereCountTokensResponse:
+  type: object
+  properties:
+    tokens:
+      type: array
+      items:
+        type: integer
+      description: Token IDs
+    token_strings:
+      type: array
+      items:
+        type: string
+      description: Token strings
+    meta:
+      $ref: '#/CohereTokenizeMeta'
+
+CohereTokenizeMeta:
+  type: object
+  description: Metadata returned by the tokenize endpoint
+  properties:
+    api_version:
+      $ref: '#/CohereTokenizeAPIVersion'
+
+CohereTokenizeAPIVersion:
+  type: object
+  description: API version metadata
+  properties:
+    version:
+      type: string
+      description: API version
--- a/docs/openapi/schemas/integrations/genai/common.yaml
+++ b/docs/openapi/schemas/integrations/genai/common.yaml
@@ -0,0 +1,80 @@
+# Google GenAI (Gemini) Integration Common Types
+
+GeminiError:
+  type: object
+  properties:
+    error:
+      type: object
+      properties:
+        code:
+          type: integer
+        message:
+          type: string
+        status:
+          type: string
+        details:
+          type: array
+          items:
+            $ref: '#/GeminiErrorDetails'
+
+GeminiErrorDetails:
+  type: object
+  properties:
+    '@type':
+      type: string
+      description: Type identifier for the error details
+    fieldViolations:
+      type: array
+      items:
+        type: object
+        properties:
+          description:
+            type: string
+
+GeminiModel:
+  type: object
+  properties:
+    name:
+      type: string
+      description: Model resource name (e.g., models/gemini-pro)
+    baseModelId:
+      type: string
+    version:
+      type: string
+    displayName:
+      type: string
+    description:
+      type: string
+    inputTokenLimit:
+      type: integer
+    outputTokenLimit:
+      type: integer
+    supportedGenerationMethods:
+      type: array
+      items:
+        type: string
+    thinking:
+      type: boolean
+      description: Whether the model supports thinking mode
+    temperature:
+      type: number
+      description: Default temperature for the model
+    maxTemperature:
+      type: number
+      description: Maximum allowed temperature for the model
+    topP:
+      type: number
+      description: Default nucleus-sampling value
+    topK:
+      type: integer
+      description: Default top-k sampling value
+
+GeminiListModelsResponse:
+  type: object
+  properties:
+    models:
+      type: array
+      items:
+        $ref: '#/GeminiModel'
+    nextPageToken:
+      type: string
--- a/docs/openapi/schemas/integrations/genai/files.yaml
+++ b/docs/openapi/schemas/integrations/genai/files.yaml
@@ -0,0 +1,94 @@
+# Google GenAI (Gemini) Files API Schemas
+
+GeminiFileUploadRequest:
+  type: object
+  description: >
+    Multipart upload for Gemini Files API. Send two parts:
+    - "metadata": JSON object {"file": {"displayName": "<optional label>"}}
+    - "file": binary content
+    Note: Direct file content download is not supported by Gemini Files API.
+    Use the file.uri field from the response to access the file.
+  required:
+    - file
+  properties:
+    metadata:
+      type: object
+      description: JSON metadata part; see encoding at the path for contentType application/json.
+      properties:
+        file:
+          type: object
+          properties:
+            displayName:
+              type: string
+          additionalProperties: false
+      additionalProperties: false
+    file:
+      type: string
+      format: binary
+  additionalProperties: false
+
+GeminiFile:
+  type: object
+  properties:
+    name:
+      type: string
+      description: File resource name (e.g., files/abc123)
+    displayName:
+      type: string
+    mimeType:
+      type: string
+    sizeBytes:
+      type: string
+      description: Size in bytes (returned as string by Gemini API)
+    createTime:
+      type: string
+      format: date-time
+    updateTime:
+      type: string
+      format: date-time
+    expirationTime:
+      type: string
+      format: date-time
+    sha256Hash:
+      type: string
+    uri:
+      type: string
+      description: URI for accessing the file content
+    state:
+      type: string
+      enum: [STATE_UNSPECIFIED, PROCESSING, ACTIVE, FAILED]
+    error:
+      type: object
+      properties:
+        code:
+          type: integer
+        message:
+          type: string
+    videoMetadata:
+      type: object
+      properties:
+        videoDuration:
+          type: string
+
+GeminiFileUploadResponse:
+  type: object
+  properties:
+    file:
+      $ref: '#/GeminiFile'
+
+GeminiFileListResponse:
+  type: object
+  properties:
+    files:
+      type: array
+      items:
+        $ref: '#/GeminiFile'
+    nextPageToken:
+      type: string
+
+GeminiFileRetrieveResponse:
+  $ref: '#/GeminiFile'
+
+GeminiFileDeleteResponse:
+  type: object
+  description: Empty response on successful deletion
--- a/docs/openapi/schemas/integrations/genai/generation.yaml
+++ b/docs/openapi/schemas/integrations/genai/generation.yaml
--- a/docs/openapi/schemas/integrations/openai/audio.yaml
+++ b/docs/openapi/schemas/integrations/openai/audio.yaml
@@ -0,0 +1,90 @@
+# OpenAI Integration Audio Schemas (Speech and Transcription)
+
+# Speech (TTS) Request
+OpenAISpeechRequest:
+  type: object
+  required:
+    - model
+    - input
+  properties:
+    model:
+      type: string
+      description: Model identifier (e.g., tts-1, tts-1-hd)
+      example: tts-1
+    input:
+      type: string
+      description: Text to convert to speech
+    voice:
+      type: string
+      description: Voice to use
+      enum: [alloy, echo, fable, onyx, nova, shimmer]
+    response_format:
+      type: string
+      enum: [mp3, opus, aac, flac, wav, pcm]
+    speed:
+      type: number
+      minimum: 0.25
+      maximum: 4.0
+    stream_format:
+      type: string
+      enum: [sse]
+      description: Set to 'sse' for streaming
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+# Transcription Request
+OpenAITranscriptionRequest:
+  type: object
+  required:
+    - model
+    - file
+  properties:
+    model:
+      type: string
+      description: Model identifier (e.g., whisper-1)
+      example: whisper-1
+    file:
+      type: string
+      format: binary
+      description: Audio file to transcribe
+    language:
+      type: string
+      description: Language of the audio (ISO 639-1)
+    prompt:
+      type: string
+      description: Prompt to guide transcription
+    response_format:
+      type: string
+      enum: [json, text, srt, verbose_json, vtt]
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    timestamp_granularities:
+      type: array
+      items:
+        type: string
+        enum: [word, segment]
+    stream:
+      type: boolean
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+# Responses reuse inference schemas
+OpenAISpeechResponse:
+  $ref: '../../inference/speech.yaml#/SpeechResponse'
+
+OpenAISpeechStreamResponse:
+  $ref: '../../inference/speech.yaml#/SpeechStreamResponse'
+
+OpenAITranscriptionResponse:
+  $ref: '../../inference/transcription.yaml#/TranscriptionResponse'
+
+OpenAITranscriptionStreamResponse:
+  $ref: '../../inference/transcription.yaml#/TranscriptionStreamResponse'
--- a/docs/openapi/schemas/integrations/openai/batch.yaml
+++ b/docs/openapi/schemas/integrations/openai/batch.yaml
@@ -0,0 +1,57 @@
+# OpenAI Integration Batch API Schemas
+# Reuses inference batch schemas since OpenAI integration uses Bifrost format
+
+# Batch Create Request - uses Bifrost format with provider field
+OpenAIBatchCreateRequest:
+  $ref: '../../inference/batch.yaml#/BatchCreateRequest'
+
+OpenAIBatchCreateResponse:
+  $ref: '../../inference/batch.yaml#/BatchCreateResponse'
+
+OpenAIBatchListRequest:
+  type: object
+  properties:
+    limit:
+      type: integer
+      description: Maximum number of batches to return
+      default: 30
+    after:
+      type: string
+      description: Cursor for pagination
+    provider:
+      type: string
+      description: Filter by provider
+      example: openai
+
+OpenAIBatchListResponse:
+  $ref: '../../inference/batch.yaml#/BatchListResponse'
+
+OpenAIBatchRetrieveRequest:
+  type: object
+  required:
+    - batch_id
+  properties:
+    batch_id:
+      type: string
+      description: Batch ID to retrieve
+    provider:
+      type: string
+      description: Provider for the batch
+
+OpenAIBatchRetrieveResponse:
+  $ref: '../../inference/batch.yaml#/BatchRetrieveResponse'
+
+OpenAIBatchCancelRequest:
+  type: object
+  required:
+    - batch_id
+  properties:
+    batch_id:
+      type: string
+      description: Batch ID to cancel
+    provider:
+      type: string
+      description: Provider for the batch
+
+OpenAIBatchCancelResponse:
+  $ref: '../../inference/batch.yaml#/BatchCancelResponse'
--- a/docs/openapi/schemas/integrations/openai/chat.yaml
+++ b/docs/openapi/schemas/integrations/openai/chat.yaml
@@ -0,0 +1,121 @@
+# OpenAI Integration Chat Completions Schemas
+# Reuses inference schemas where possible since Bifrost follows OpenAI format
+
+OpenAIChatRequest:
+  type: object
+  required:
+    - model
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model identifier (e.g., gpt-4, gpt-3.5-turbo)
+      example: gpt-4
+    messages:
+      type: array
+      items:
+        $ref: '#/OpenAIMessage'
+      description: List of messages in the conversation
+    stream:
+      type: boolean
+      description: Whether to stream the response
+    max_tokens:
+      type: integer
+      description: Maximum tokens to generate (legacy, use max_completion_tokens)
+    max_completion_tokens:
+      type: integer
+      description: Maximum tokens to generate
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 2
+    top_p:
+      type: number
+    frequency_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    presence_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    logit_bias:
+      type: object
+      additionalProperties:
+        type: number
+    logprobs:
+      type: boolean
+    top_logprobs:
+      type: integer
+    n:
+      type: integer
+    stop:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+    seed:
+      type: integer
+    user:
+      type: string
+    tools:
+      type: array
+      items:
+        $ref: '../../inference/chat.yaml#/ChatTool'
+    tool_choice:
+      $ref: '../../inference/chat.yaml#/ChatToolChoice'
+    parallel_tool_calls:
+      type: boolean
+    response_format:
+      type: object
+      description: Format for the response
+    reasoning_effort:
+      type: string
+      enum: [none, minimal, low, medium, high, xhigh]
+      description: OpenAI reasoning effort level
+    service_tier:
+      type: string
+    stream_options:
+      $ref: '../../inference/chat.yaml#/ChatStreamOptions'
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+      description: Fallback models
+
+OpenAIMessage:
+  type: object
+  required:
+    - role
+  properties:
+    role:
+      type: string
+      enum: [system, user, assistant, tool, developer]
+    name:
+      type: string
+    content:
+      $ref: '../../inference/chat.yaml#/ChatMessageContent'
+    tool_call_id:
+      type: string
+      description: For tool messages
+    refusal:
+      type: string
+    reasoning:
+      type: string
+    annotations:
+      type: array
+      items:
+        $ref: '../../inference/chat.yaml#/ChatAssistantMessageAnnotation'
+    tool_calls:
+      type: array
+      items:
+        $ref: '../../inference/chat.yaml#/ChatAssistantMessageToolCall'
+
+# Response reuses inference schema since format is identical
+OpenAIChatResponse:
+  $ref: '../../inference/chat.yaml#/ChatCompletionResponse'
+
+OpenAIChatStreamResponse:
+  $ref: '../../inference/chat.yaml#/ChatCompletionStreamResponse'
--- a/docs/openapi/schemas/integrations/openai/common.yaml
+++ b/docs/openapi/schemas/integrations/openai/common.yaml
@@ -0,0 +1,51 @@
+# OpenAI Integration Common Types
+
+OpenAIError:
+  type: object
+  properties:
+    error:
+      type: object
+      properties:
+        message:
+          type: string
+        type:
+          type: string
+        param:
+          type: string
+          nullable: true
+        code:
+          type: string
+          nullable: true
+
+# OpenAI uses the same model format but without provider prefix
+OpenAIModel:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Model identifier
+    object:
+      type: string
+      default: model
+    owned_by:
+      type: string
+    created:
+      type: integer
+      format: int64
+    active:
+      type: boolean
+      description: GROQ-specific field
+    context_window:
+      type: integer
+      description: GROQ-specific field
+
+OpenAIListModelsResponse:
+  type: object
+  properties:
+    object:
+      type: string
+      default: list
+    data:
+      type: array
+      items:
+        $ref: '#/OpenAIModel'
--- a/docs/openapi/schemas/integrations/openai/embeddings.yaml
+++ b/docs/openapi/schemas/integrations/openai/embeddings.yaml
@@ -0,0 +1,36 @@
+# OpenAI Integration Embeddings Schemas
+
+OpenAIEmbeddingRequest:
+  type: object
+  required:
+    - model
+    - input
+  properties:
+    model:
+      type: string
+      description: Model identifier
+      example: text-embedding-3-small
+    input:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+      description: Input text to embed
+    encoding_format:
+      type: string
+      enum: [float, base64]
+    dimensions:
+      type: integer
+      description: Number of dimensions for the embedding
+    user:
+      type: string
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+# Response reuses inference schema
+OpenAIEmbeddingResponse:
+  $ref: '../../inference/embeddings.yaml#/EmbeddingResponse'
--- a/docs/openapi/schemas/integrations/openai/files.yaml
+++ b/docs/openapi/schemas/integrations/openai/files.yaml
@@ -0,0 +1,95 @@
+# OpenAI Integration Files API Schemas
+# Reuses inference files schemas since OpenAI integration uses Bifrost format
+
+OpenAIFileUploadRequest:
+  type: object
+  required:
+    - file
+    - purpose
+  properties:
+    file:
+      type: string
+      format: binary
+      description: File to upload
+    purpose:
+      type: string
+      enum: [assistants, assistants_output, batch, batch_output, fine-tune, fine-tune-results, vision, user_data, evals]
+      description: Purpose of the file
+    provider:
+      type: string
+      description: Provider for file storage
+    storage_config:
+      $ref: '../../inference/files.yaml#/FileStorageConfig'
+
+OpenAIFileUploadResponse:
+  $ref: '../../inference/files.yaml#/FileUploadResponse'
+
+OpenAIFileListRequest:
+  type: object
+  properties:
+    purpose:
+      type: string
+      description: Filter by purpose
+    limit:
+      type: integer
+      description: Maximum files to return
+    after:
+      type: string
+      description: Cursor for pagination
+    order:
+      type: string
+      enum: [asc, desc]
+    provider:
+      type: string
+      description: Filter by provider
+
+OpenAIFileListResponse:
+  $ref: '../../inference/files.yaml#/FileListResponse'
+
+OpenAIFileRetrieveRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+      description: File ID to retrieve
+    provider:
+      type: string
+      description: Provider for the file
+    storage_config:
+      $ref: '../../inference/files.yaml#/FileStorageConfig'
+
+OpenAIFileRetrieveResponse:
+  $ref: '../../inference/files.yaml#/FileRetrieveResponse'
+
+OpenAIFileDeleteRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+      description: File ID to delete
+    provider:
+      type: string
+      description: Provider for the file
+    storage_config:
+      $ref: '../../inference/files.yaml#/FileStorageConfig'
+
+OpenAIFileDeleteResponse:
+  $ref: '../../inference/files.yaml#/FileDeleteResponse'
+
+OpenAIFileContentRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+      description: File ID to get content for
+    provider:
+      type: string
+      description: Provider for the file
+    storage_config:
+      $ref: '../../inference/files.yaml#/FileStorageConfig'
--- a/docs/openapi/schemas/integrations/openai/images.yaml
+++ b/docs/openapi/schemas/integrations/openai/images.yaml
@@ -0,0 +1,133 @@
+# OpenAI Integration - Image Generation Schemas
+
+OpenAIImageGenerationRequest:
+  type: object
+  required:
+    - model
+    - prompt
+  properties:
+    model:
+      type: string
+      description: Model identifier
+    prompt:
+      type: string
+      description: Text prompt to generate image
+    n:
+      type: integer
+      minimum: 1
+      maximum: 10
+      default: 1
+      description: Number of images to generate
+    size:
+      type: string
+      enum:
+        - "256x256"
+        - "512x512"
+        - "1024x1024"
+        - "1792x1024"
+        - "1024x1792"
+        - "1536x1024"
+        - "1024x1536"
+        - "auto"
+      description: Size of the generated image
+    quality:
+      type: string
+      enum:
+        - "standard"
+        - "hd"
+      description: Quality of the generated image
+    style:
+      type: string
+      enum:
+        - "natural"
+        - "vivid"
+      description: Style of the generated image
+    response_format:
+      type: string
+      enum:
+        - "url"
+        - "b64_json"
+      default: "url"
+      description: Format of the response. This parameter is not supported for streaming requests.
+    user:
+      type: string
+      description: User identifier for tracking
+    stream:
+      type: boolean
+      default: false
+      description: |
+        Whether to stream the response. When true, images are sent as base64 chunks via SSE.
+    fallbacks:
+      type: array
+      items:
+        type: string
+      description: Fallback models to try if primary model fails
+
+OpenAIImageGenerationResponse:
+  type: object
+  properties:
+    created:
+      type: integer
+      format: int64
+      description: Unix timestamp when the image was created
+    data:
+      type: array
+      items:
+        $ref: '../../../schemas/inference/images.yaml#/ImageData'
+      description: Array of generated images
+    background:
+      type: string
+      description: Background type used
+    output_format:
+      type: string
+      description: Output format used
+    quality:
+      type: string
+      description: Quality setting used
+    size:
+      type: string
+      description: Size setting used
+    usage:
+      $ref: '../../../schemas/inference/images.yaml#/ImageUsage'
+
+OpenAIImageStreamResponse:
+  type: object
+  description: |
+    Streaming response chunk for image generation (OpenAI format).
+    Sent via Server-Sent Events (SSE) when stream=true.
+  properties:
+    type:
+      type: string
+      enum:
+        - "image_generation.partial_image"
+        - "image_generation.completed"
+        - "error"
+      description: Type of stream event
+    b64_json:
+      type: string
+      description: Base64-encoded chunk of image data
+    partial_image_index:
+      type: integer
+      description: Index of the partial image chunk
+    sequence_number:
+      type: integer
+      description: Ordering index for stream chunks
+    created_at:
+      type: integer
+      format: int64
+      description: Timestamp when chunk was created
+    size:
+      type: string
+      description: Size of the generated image
+    quality:
+      type: string
+      description: Quality setting used
+    background:
+      type: string
+      description: Background type used
+    output_format:
+      type: string
+      description: Output format used
+    usage:
+      $ref: '../../../schemas/inference/images.yaml#/ImageUsage'
+      description: Token usage (usually in final chunk)
--- a/docs/openapi/schemas/integrations/openai/responses.yaml
+++ b/docs/openapi/schemas/integrations/openai/responses.yaml
@@ -0,0 +1,108 @@
+# OpenAI Integration Responses API Schemas
+
+OpenAIResponsesRequest:
+  type: object
+  required:
+    - model
+    - input
+  properties:
+    model:
+      type: string
+      description: Model identifier
+      example: gpt-4
+    input:
+      $ref: '#/OpenAIResponsesInput'
+    stream:
+      type: boolean
+    instructions:
+      type: string
+      description: System instructions for the model
+    max_output_tokens:
+      type: integer
+    metadata:
+      type: object
+      additionalProperties: true
+    parallel_tool_calls:
+      type: boolean
+    previous_response_id:
+      type: string
+    reasoning:
+      $ref: '#/OpenAIResponsesReasoning'
+    store:
+      type: boolean
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 2
+    text:
+      $ref: '#/OpenAIResponsesTextConfig'
+    tool_choice:
+      $ref: '../../inference/responses.yaml#/ResponsesToolChoice'
+    tools:
+      type: array
+      items:
+        $ref: '../../inference/responses.yaml#/ResponsesTool'
+    top_p:
+      type: number
+    truncation:
+      type: string
+      enum: [auto, disabled]
+    user:
+      type: string
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+OpenAIResponsesInput:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '../../inference/responses.yaml#/ResponsesMessage'
+  description: Input - can be a string or array of messages
+
+OpenAIResponsesReasoning:
+  type: object
+  properties:
+    effort:
+      type: string
+      enum: [none, minimal, low, medium, high, xhigh]
+    generate_summary:
+      type: string
+      enum: [auto, concise, detailed]
+    summary:
+      type: string
+      enum: [auto, concise, detailed]
+    max_tokens:
+      type: integer
+
+OpenAIResponsesTextConfig:
+  type: object
+  properties:
+    format:
+      $ref: '#/OpenAIResponsesTextFormat'
+
+OpenAIResponsesTextFormat:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text, json_object, json_schema]
+    json_schema:
+      type: object
+      properties:
+        name:
+          type: string
+        schema:
+          type: object
+        strict:
+          type: boolean
+
+# Response reuses inference schema
+OpenAIResponsesResponse:
+  $ref: '../../inference/responses.yaml#/ResponsesResponse'
+
+OpenAIResponsesStreamResponse:
+  $ref: '../../inference/responses.yaml#/ResponsesStreamResponse'
--- a/docs/openapi/schemas/integrations/openai/text.yaml
+++ b/docs/openapi/schemas/integrations/openai/text.yaml
@@ -0,0 +1,74 @@
+# OpenAI Integration Text Completions Schemas (Legacy Completions API)
+
+OpenAITextCompletionRequest:
+  type: object
+  required:
+    - model
+    - prompt
+  properties:
+    model:
+      type: string
+      description: Model identifier
+      example: gpt-3.5-turbo-instruct
+    prompt:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+      description: The prompt(s) to generate completions for
+    stream:
+      type: boolean
+      description: Whether to stream the response
+    max_tokens:
+      type: integer
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 2
+    top_p:
+      type: number
+    frequency_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    presence_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    logit_bias:
+      type: object
+      additionalProperties:
+        type: number
+    logprobs:
+      type: integer
+    n:
+      type: integer
+    stop:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+    suffix:
+      type: string
+    echo:
+      type: boolean
+    best_of:
+      type: integer
+    user:
+      type: string
+    seed:
+      type: integer
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+# Response reuses inference schema
+OpenAITextCompletionResponse:
+  $ref: '../../inference/text.yaml#/TextCompletionResponse'
+
+OpenAITextCompletionStreamResponse:
+  $ref: '../../inference/text.yaml#/TextCompletionStreamResponse'
--- a/docs/openapi/schemas/management/cache.yaml
+++ b/docs/openapi/schemas/management/cache.yaml
@@ -0,0 +1,9 @@
+# Cache API schemas
+
+ClearCacheResponse:
+  type: object
+  description: Clear cache response
+  properties:
+    message:
+      type: string
+      example: Cache cleared successfully
--- a/docs/openapi/schemas/management/common.yaml
+++ b/docs/openapi/schemas/management/common.yaml
@@ -0,0 +1,35 @@
+# Common schemas used across management APIs
+
+SuccessResponse:
+  type: object
+  description: Generic success response
+  properties:
+    status:
+      type: string
+      example: success
+    message:
+      type: string
+      example: Operation completed successfully
+
+ErrorResponse:
+  type: object
+  description: Error response
+  $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+MessageResponse:
+  type: object
+  description: Simple message response
+  properties:
+    message:
+      type: string
+
+EnvVar:
+  type: object
+  description: Environment variable configuration
+  properties:
+        value:
+          type: string
+        env_var:
+          type: string
+        from_env:
+          type: boolean
--- a/docs/openapi/schemas/management/config.yaml
+++ b/docs/openapi/schemas/management/config.yaml
@@ -0,0 +1,200 @@
+# Config API schemas
+
+Version:
+  type: string
+  description: Version information
+  example: "1.0.0"
+
+ClientConfig:
+  type: object
+  description: Client configuration
+  properties:
+    drop_excess_requests:
+      type: boolean
+      description: Whether to drop excess requests when rate limited
+    prometheus_labels:
+      type: array
+      items:
+        type: string
+      description: Custom Prometheus labels
+    allowed_origins:
+      type: array
+      items:
+        type: string
+      description: Allowed CORS origins
+    initial_pool_size:
+      type: integer
+      description: Initial connection pool size
+    enable_logging:
+      type: boolean
+      description: Whether logging is enabled
+    disable_content_logging:
+      type: boolean
+      description: Whether content logging is disabled
+    enforce_auth_on_inference:
+      type: boolean
+      description: Whether to enforce virtual key authentication on inference requests
+    enforce_governance_header:
+      type: boolean
+      deprecated: true
+      description: "Deprecated: use enforce_auth_on_inference instead"
+    allow_direct_keys:
+      type: boolean
+      description: Whether to allow direct API keys
+    max_request_body_size_mb:
+      type: integer
+      description: Maximum request body size in MB
+    compat:
+      type: object
+      description: Compat plugin configuration
+      properties:
+        convert_text_to_chat:
+          type: boolean
+          description: Convert text completion requests to chat
+        convert_chat_to_responses:
+          type: boolean
+          description: Convert chat completion requests to responses
+        should_drop_params:
+          type: boolean
+          description: Drop unsupported parameters based on model catalog
+        should_convert_params:
+          type: boolean
+          default: false
+          description: Converts model parameter values that are not supported by the model
+      additionalProperties: false
+    log_retention_days:
+      type: integer
+      description: Number of days to retain logs
+    header_filter_config:
+      $ref: '#/HeaderFilterConfig'
+    mcp_agent_depth:
+      type: integer
+      description: Depth of MCP agent
+    mcp_tool_execution_timeout:
+      type: integer
+      description: Timeout for MCP tool execution in seconds
+    mcp_code_mode_binding_level:
+      type: string
+      description: Binding level for MCP code mode
+    required_headers:
+      type: array
+      items:
+        type: string
+      description: Headers that must be present on every request. Requests missing any of these headers are rejected with 400. Case-insensitive matching.
+    logging_headers:
+      type: array
+      items:
+        type: string
+      description: Headers to capture in log metadata. Values are extracted from incoming requests and stored in the metadata field of log entries. Case-insensitive matching. No restart required.
+
+FrameworkConfig:
+  type: object
+  description: Framework configuration
+  properties:
+    id:
+      type: integer
+      description: Unique identifier for the framework config
+    pricing_url:
+      type: string
+      description: URL for pricing data
+    pricing_sync_interval:
+      type: integer
+      format: int64
+      description: Pricing sync interval in seconds
+
+AuthConfig:
+  type: object
+  description: Authentication configuration
+  properties:
+    admin_username:
+      type: string
+    admin_password:
+      type: string
+      description: Password (redacted as <redacted> in responses)
+    is_enabled:
+      type: boolean
+    disable_auth_on_inference:
+      type: boolean
+
+HeaderFilterConfig:
+  type: object
+  description: Header filter configuration
+  properties:
+    allowlist:
+      type: array
+      items:
+        type: string
+    denylist:
+      type: array
+      items:
+        type: string
+
+ProxyConfig:
+  type: object
+  description: Global proxy configuration
+  properties:
+    enabled:
+      type: boolean
+    type:
+      type: string
+      enum: [http, socks5, tcp]
+    url:
+      type: string
+    username:
+      type: string
+    password:
+      type: string
+      description: Password (redacted as <redacted> in responses)
+    no_proxy:
+      type: string
+    timeout:
+      type: integer
+    skip_tls_verify:
+      type: boolean
+    enable_for_scim:
+      type: boolean
+    enable_for_inference:
+      type: boolean
+    enable_for_api:
+      type: boolean
+
+RestartRequiredConfig:
+  type: object
+  description: Restart required configuration
+  properties:
+    required:
+      type: boolean
+    reason:
+      type: string
+
+GetConfigResponse:
+  type: object
+  description: Configuration response
+  properties:
+    client_config:
+      $ref: '#/ClientConfig'
+    framework_config:
+      $ref: '#/FrameworkConfig'
+    auth_config:
+      $ref: '#/AuthConfig'
+    is_db_connected:
+      type: boolean
+    is_cache_connected:
+      type: boolean
+    is_logs_connected:
+      type: boolean
+    proxy_config:
+      $ref: '#/ProxyConfig'
+    restart_required:
+      $ref: '#/RestartRequiredConfig'
+
+UpdateConfigRequest:
+  type: object
+  description: Update configuration request
+  properties:
+    client_config:
+      $ref: '#/ClientConfig'
+    framework_config:
+      $ref: '#/FrameworkConfig'
+    auth_config:
+      $ref: '#/AuthConfig'
--- a/docs/openapi/schemas/management/governance.yaml
+++ b/docs/openapi/schemas/management/governance.yaml
--- a/docs/openapi/schemas/management/health.yaml
+++ b/docs/openapi/schemas/management/health.yaml
@@ -0,0 +1,15 @@
+# Health API schemas
+
+HealthResponse:
+  type: object
+  description: Health check response
+  properties:
+    status:
+      type: string
+      enum: [ok]
+      example: ok
+    components:
+      type: object
+      description: Health status of individual components (config_store, log_store, vector_store)
+      additionalProperties:
+        type: string
--- a/docs/openapi/schemas/management/logging.yaml
+++ b/docs/openapi/schemas/management/logging.yaml
@@ -0,0 +1,829 @@
+# Logging API schemas
+
+LogEntry:
+  type: object
+  description: Log entry
+  properties:
+    id:
+      type: string
+    parent_request_id:
+      type: string
+    provider:
+      type: string
+    model:
+      type: string
+    status:
+      type: string
+      enum: ["processing", "success", "error"]
+    object:
+      type: string
+    timestamp:
+      type: string
+      format: date-time
+    number_of_retries:
+      type: integer
+    fallback_index:
+      type: integer
+    latency:
+      type: number
+    cost:
+      type: number
+    selected_key_id:
+      type: string
+    selected_key_name:
+      type: string
+    virtual_key_id:
+      type: string
+    virtual_key_name:
+      type: string
+      nullable: true
+    routing_engines_used:
+      type: array
+      items:
+        type: string
+      description: Array of routing engines used for this request (routing-rule, governance, or loadbalancing)
+      nullable: true
+    routing_rule_id:
+      type: string
+      nullable: true
+    routing_rule_name:
+      type: string
+      nullable: true
+    stream:
+      type: boolean
+    raw_request:
+      type: string
+    raw_response:
+      type: string
+    created_at:
+      type: string
+      format: date-time
+    token_usage:
+      $ref: '../../schemas/inference/usage.yaml#/BifrostLLMUsage'
+    error_details:
+      $ref: '../../schemas/inference/common.yaml#/BifrostError'
+    input_history:
+      type: array
+      items:
+        $ref: '../../schemas/inference/chat.yaml#/ChatMessage'
+    responses_input_history:
+      type: array
+      items:
+        $ref: '../../schemas/inference/responses.yaml#/ResponsesMessage'
+    output_message:
+      $ref: '../../schemas/inference/chat.yaml#/ChatMessage'
+    responses_output:
+      type: array
+      items:
+        $ref: '../../schemas/inference/responses.yaml#/ResponsesMessage'
+    embedding_output:
+      type: array
+      items:
+        type: array
+        items:
+          type: number
+    params:
+      type: object
+      additionalProperties: true
+    tools:
+      type: array
+      items:
+        $ref: '../../schemas/inference/chat.yaml#/ChatTool'
+    tool_calls:
+      type: array
+      items:
+        $ref: '../../schemas/inference/chat.yaml#/ChatAssistantMessageToolCall'
+    speech_input:
+      type: object
+      additionalProperties: true
+    transcription_input:
+      type: object
+      additionalProperties: true
+    image_generation_input:
+      type: object
+      additionalProperties: true
+    speech_output:
+      type: object
+      additionalProperties: true
+    transcription_output:
+      type: object
+      additionalProperties: true
+    image_generation_output:
+      type: object
+      additionalProperties: true
+    cache_debug:
+      type: object
+      additionalProperties: true
+    metadata:
+      type: object
+      additionalProperties: true
+      description: Custom metadata captured from request headers (configured via logging_headers or x-bf-lh-* prefix)
+    selected_key:
+      type: object
+      additionalProperties: true
+    virtual_key:
+      type: object
+      additionalProperties: true
+    passthrough_request_body:
+      type: string
+      description: Raw passthrough request body (for passthrough integration routes)
+    passthrough_response_body:
+      type: string
+      description: Raw passthrough response body (for passthrough integration routes)
+    routing_engine_logs:
+      type: object
+      additionalProperties: true
+      description: Detailed logs from the routing engine decision process
+    is_large_payload_request:
+      type: boolean
+      description: Whether the request payload exceeded the large payload threshold
+    is_large_payload_response:
+      type: boolean
+      description: Whether the response payload exceeded the large payload threshold
+    rerank_output:
+      type: object
+      additionalProperties: true
+      description: Rerank operation output
+    video_generation_input:
+      type: object
+      additionalProperties: true
+      description: Video generation request input
+    video_generation_output:
+      type: object
+      additionalProperties: true
+      description: Video generation response output
+    video_retrieve_output:
+      type: object
+      additionalProperties: true
+      description: Video retrieve response output
+    video_list_output:
+      type: object
+      additionalProperties: true
+      description: Video list response output
+    video_delete_output:
+      type: object
+      additionalProperties: true
+      description: Video delete response output
+    video_download_output:
+      type: object
+      additionalProperties: true
+      description: Video download response output
+    list_models_output:
+      type: object
+      additionalProperties: true
+      description: List models response output
+
+MCPToolLogEntry:
+  type: object
+  description: MCP tool execution log entry
+  properties:
+    id:
+      type: string
+      description: Unique identifier for the log entry
+    llm_request_id:
+      type: string
+      description: Links to the LLM request that triggered this tool call
+    timestamp:
+      type: string
+      format: date-time
+      description: When the tool execution started
+    tool_name:
+      type: string
+      description: Name of the MCP tool that was executed
+    server_label:
+      type: string
+      description: Label of the MCP server that provided the tool
+    virtual_key_id:
+      type: string
+      description: ID of the virtual key used for this tool execution
+    virtual_key_name:
+      type: string
+      description: Name of the virtual key used for this tool execution
+    arguments:
+      type: object
+      additionalProperties: true
+      description: Tool execution arguments
+    result:
+      type: object
+      additionalProperties: true
+      description: Tool execution result
+    error_details:
+      $ref: '../../schemas/inference/common.yaml#/BifrostError'
+    latency:
+      type: number
+      description: Execution time in milliseconds
+    cost:
+      type: number
+      description: Cost in dollars for this tool execution
+    status:
+      type: string
+      enum: ["processing", "success", "error"]
+      description: Execution status
+    metadata:
+      type: object
+      additionalProperties: true
+      description: Custom metadata captured from request headers (configured via logging_headers or x-bf-lh-* prefix)
+    created_at:
+      type: string
+      format: date-time
+      description: When the log entry was created
+    virtual_key:
+      type: object
+      additionalProperties: true
+      description: Full virtual key object (populated when virtual_key_id is set)
+
+MCPToolLogSearchFilters:
+  type: object
+  description: MCP tool log search filters
+  properties:
+    tool_names:
+      type: array
+      items:
+        type: string
+      description: Filter by tool names
+    server_labels:
+      type: array
+      items:
+        type: string
+      description: Filter by server labels
+    status:
+      type: array
+      items:
+        type: string
+      description: Filter by execution status
+    llm_request_ids:
+      type: array
+      items:
+        type: string
+      description: Filter by linked LLM request IDs
+    start_time:
+      type: string
+      format: date-time
+      description: Filter by start time (RFC3339 format)
+    end_time:
+      type: string
+      format: date-time
+      description: Filter by end time (RFC3339 format)
+    min_latency:
+      type: number
+      description: Filter by minimum latency
+    max_latency:
+      type: number
+      description: Filter by maximum latency
+    content_search:
+      type: string
+      description: Search in tool arguments and results
+
+MCPToolLogStats:
+  type: object
+  description: MCP tool log statistics
+  properties:
+    total_executions:
+      type: integer
+      description: Total number of tool executions
+    success_rate:
+      type: number
+      description: Success rate percentage
+    average_latency:
+      type: number
+      description: Average execution latency in milliseconds
+    total_cost:
+      type: number
+      description: Total cost in dollars for all executions
+
+SearchMCPLogsResponse:
+  type: object
+  description: Search MCP logs response
+  properties:
+    logs:
+      type: array
+      items:
+        $ref: '#/MCPToolLogEntry'
+    pagination:
+      type: object
+      required:
+        - total_count
+      properties:
+        limit:
+          type: integer
+        offset:
+          type: integer
+        sort_by:
+          type: string
+        order:
+          type: string
+        total_count:
+          type: integer
+          format: int64
+          description: Total number of items matching the query
+    stats:
+      $ref: '#/MCPToolLogStats'
+    has_logs:
+      type: boolean
+      description: Whether any logs exist in the system
+
+MCPLogsFilterDataResponse:
+  type: object
+  description: Available MCP log filter data
+  properties:
+    tool_names:
+      type: array
+      items:
+        type: string
+      description: All unique tool names
+    server_labels:
+      type: array
+      items:
+        type: string
+      description: All unique server labels
+    virtual_keys:
+      type: array
+      items:
+        type: object
+        properties:
+          id:
+            type: string
+            description: Virtual key ID
+          name:
+            type: string
+            description: Virtual key name
+          value:
+            type: string
+            description: Virtual key value (redacted if applicable)
+      description: All unique virtual keys
+
+DeleteMCPLogsRequest:
+  type: object
+  description: Delete MCP logs request
+  required:
+    - ids
+  properties:
+    ids:
+      type: array
+      items:
+        type: string
+      description: Array of log IDs to delete
+
+SearchFilters:
+  type: object
+  description: Log search filters
+  properties:
+    providers:
+      type: array
+      items:
+        type: string
+    models:
+      type: array
+      items:
+        type: string
+    status:
+      type: array
+      items:
+        type: string
+    objects:
+      type: array
+      items:
+        type: string
+    selected_key_ids:
+      type: array
+      items:
+        type: string
+    virtual_key_ids:
+      type: array
+      items:
+        type: string
+    routing_rule_ids:
+      type: array
+      items:
+        type: string
+    routing_engine_used:
+      type: array
+      items:
+        type: string
+      description: Filter by routing engine (routing-rule, governance, or loadbalancing)
+    start_time:
+      type: string
+      format: date-time
+    end_time:
+      type: string
+      format: date-time
+    min_latency:
+      type: number
+    max_latency:
+      type: number
+    min_tokens:
+      type: integer
+    max_tokens:
+      type: integer
+    min_cost:
+      type: number
+    max_cost:
+      type: number
+    missing_cost_only:
+      type: boolean
+    content_search:
+      type: string
+
+SearchLogsResponse:
+  type: object
+  description: Search logs response
+  properties:
+    logs:
+      type: array
+      items:
+        $ref: '#/LogEntry'
+    pagination:
+      $ref: '#/PaginationOptions'
+    stats:
+      $ref: '#/LogStats'
+    has_logs:
+      type: boolean
+      description: Whether any logs exist in the system
+
+PaginationOptions:
+  type: object
+  description: Pagination metadata for list responses
+  properties:
+    limit:
+      type: integer
+    offset:
+      type: integer
+    sort_by:
+      type: string
+      enum: [timestamp, latency, tokens, cost]
+    order:
+      type: string
+      enum: [asc, desc]
+    total_count:
+      type: integer
+      format: int64
+      description: Total number of items matching the query
+
+LogStats:
+  type: object
+  description: Log statistics
+  properties:
+    total_requests:
+      type: integer
+    total_tokens:
+      type: integer
+    total_cost:
+      type: number
+    average_latency:
+      type: number
+    success_rate:
+      type: number
+
+DroppedRequestsResponse:
+  type: object
+  description: Dropped requests response
+  properties:
+    dropped_requests:
+      type: integer
+      format: int64
+
+FilterDataResponse:
+  type: object
+  description: Available filter data response
+  properties:
+    models:
+      type: array
+      items:
+        type: string
+    selected_keys:
+      type: array
+      items:
+        $ref: '../../schemas/management/providers.yaml#/Key'
+    virtual_keys:
+      type: array
+      items:
+        $ref: '../../schemas/management/governance.yaml#/VirtualKey'
+    routing_rules:
+      type: array
+      items:
+        $ref: '../../schemas/management/governance.yaml#/RoutingRule'
+      description: Available routing rules for filtering
+    routing_engines:
+      type: array
+      items:
+        type: string
+      description: Available routing engine types (routing-rule, governance, loadbalancing)
+
+DeleteLogsRequest:
+  type: object
+  description: Delete logs request
+  required:
+    - ids
+  properties:
+    ids:
+      type: array
+      items:
+        type: string
+
+RecalculateCostRequest:
+  type: object
+  description: Recalculate cost request
+  properties:
+    filters:
+      $ref: '#/SearchFilters'
+    limit:
+      type: integer
+      description: Maximum number of logs to process (default 200, max 1000)
+
+RecalculateCostResponse:
+  type: object
+  description: Recalculate cost response
+  properties:
+    total_matched:
+      type: integer
+    updated:
+      type: integer
+    skipped:
+      type: integer
+    remaining:
+      type: integer
+
+# Histogram schemas
+
+HistogramBucket:
+  type: object
+  description: Time-bucketed request count
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    count:
+      type: integer
+      format: int64
+    success:
+      type: integer
+      format: int64
+    error:
+      type: integer
+      format: int64
+
+HistogramResult:
+  type: object
+  description: Time-bucketed request count histogram
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/HistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+
+TokenHistogramBucket:
+  type: object
+  description: Time-bucketed token usage
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    prompt_tokens:
+      type: integer
+      format: int64
+    completion_tokens:
+      type: integer
+      format: int64
+    total_tokens:
+      type: integer
+      format: int64
+
+TokenHistogramResult:
+  type: object
+  description: Time-bucketed token usage histogram
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/TokenHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+
+CostHistogramBucket:
+  type: object
+  description: Time-bucketed cost data with model breakdown
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    total_cost:
+      type: number
+    by_model:
+      type: object
+      additionalProperties:
+        type: number
+      description: Cost breakdown by model name
+
+CostHistogramResult:
+  type: object
+  description: Time-bucketed cost histogram with model breakdown
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/CostHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+    models:
+      type: array
+      items:
+        type: string
+      description: List of models present in the histogram
+
+ModelUsageStats:
+  type: object
+  description: Usage statistics for a single model
+  properties:
+    total:
+      type: integer
+      format: int64
+    success:
+      type: integer
+      format: int64
+    error:
+      type: integer
+      format: int64
+
+ModelHistogramBucket:
+  type: object
+  description: Time-bucketed model usage with success/error breakdown
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    by_model:
+      type: object
+      additionalProperties:
+        $ref: '#/ModelUsageStats'
+      description: Usage breakdown by model name
+
+ModelHistogramResult:
+  type: object
+  description: Time-bucketed model usage histogram
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/ModelHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+    models:
+      type: array
+      items:
+        type: string
+
+LatencyHistogramBucket:
+  type: object
+  description: Time-bucketed latency percentiles
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    avg_latency:
+      type: number
+    p90_latency:
+      type: number
+    p95_latency:
+      type: number
+    p99_latency:
+      type: number
+    total_requests:
+      type: integer
+      format: int64
+
+LatencyHistogramResult:
+  type: object
+  description: Time-bucketed latency histogram
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/LatencyHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+
+ProviderCostHistogramBucket:
+  type: object
+  description: Time-bucketed cost data with provider breakdown
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    total_cost:
+      type: number
+    by_provider:
+      type: object
+      additionalProperties:
+        type: number
+      description: Cost breakdown by provider name
+
+ProviderCostHistogramResult:
+  type: object
+  description: Time-bucketed cost histogram with provider breakdown
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/ProviderCostHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+    providers:
+      type: array
+      items:
+        type: string
+
+ProviderTokenStats:
+  type: object
+  description: Token statistics for a single provider
+  properties:
+    prompt_tokens:
+      type: integer
+      format: int64
+    completion_tokens:
+      type: integer
+      format: int64
+    total_tokens:
+      type: integer
+      format: int64
+
+ProviderTokenHistogramBucket:
+  type: object
+  description: Time-bucketed token usage with provider breakdown
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    by_provider:
+      type: object
+      additionalProperties:
+        $ref: '#/ProviderTokenStats'
+      description: Token usage breakdown by provider name
+
+ProviderTokenHistogramResult:
+  type: object
+  description: Time-bucketed token histogram with provider breakdown
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/ProviderTokenHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+    providers:
+      type: array
+      items:
+        type: string
+
+ProviderLatencyStats:
+  type: object
+  description: Latency statistics for a single provider
+  properties:
+    avg_latency:
+      type: number
+    p90_latency:
+      type: number
+    p95_latency:
+      type: number
+    p99_latency:
+      type: number
+    total_requests:
+      type: integer
+      format: int64
+
+ProviderLatencyHistogramBucket:
+  type: object
+  description: Time-bucketed latency data with provider breakdown
+  properties:
+    timestamp:
+      type: string
+      format: date-time
+    by_provider:
+      type: object
+      additionalProperties:
+        $ref: '#/ProviderLatencyStats'
+      description: Latency breakdown by provider name
+
+ProviderLatencyHistogramResult:
+  type: object
+  description: Time-bucketed latency histogram with provider breakdown
+  properties:
+    buckets:
+      type: array
+      items:
+        $ref: '#/ProviderLatencyHistogramBucket'
+    bucket_size_seconds:
+      type: integer
+      format: int64
+    providers:
+      type: array
+      items:
+        type: string
--- a/docs/openapi/schemas/management/mcp.yaml
+++ b/docs/openapi/schemas/management/mcp.yaml
@@ -0,0 +1,446 @@
+# MCP API schemas
+
+MCPAuthType:
+  type: string
+  enum: [none, headers, oauth, per_user_oauth]
+  description: |
+    Authentication type for MCP connections:
+    - none: No authentication
+    - headers: Header-based authentication (API keys, custom headers, etc.)
+    - oauth: OAuth 2.0 authentication (server-level, admin authenticates once)
+    - per_user_oauth: Per-user OAuth 2.0 authentication (each user authenticates individually)
+
+MCPConnectionType:
+  type: string
+  enum: [http, stdio, sse, inprocess]
+  description: Connection type for MCP client
+
+MCPConnectionState:
+  type: string
+  enum: [connected, disconnected, error]
+  description: Connection state of an MCP client
+
+MCPStdioConfig:
+  type: object
+  description: STDIO configuration for MCP client
+  properties:
+    command:
+      type: string
+      description: Executable command to run
+    args:
+      type: array
+      items:
+        type: string
+      description: Command line arguments
+    envs:
+      type: array
+      items:
+        type: string
+      description: Environment variables required
+
+MCPClientCreateRequest:
+  oneOf:
+    - $ref: '#/MCPClientCreateRequestHTTP'
+    - $ref: '#/MCPClientCreateRequestSSE'
+    - $ref: '#/MCPClientCreateRequestSTDIO'
+  discriminator:
+    propertyName: connection_type
+    mapping:
+      http: '#/MCPClientCreateRequestHTTP'
+      sse: '#/MCPClientCreateRequestSSE'
+      stdio: '#/MCPClientCreateRequestSTDIO'
+  description: |
+    MCP client configuration for creating a new client (tool_pricing not available at creation).
+    The schema varies based on connection_type:
+    - HTTP/SSE: connection_string is required
+    - STDIO: stdio_config is required
+    - InProcess: server instance must be provided programmatically (Go package only)
+
+MCPClientCreateRequestBase:
+  type: object
+  required:
+    - name
+    - connection_type
+  properties:
+    client_id:
+      type: string
+      description: Unique identifier for the MCP client (optional, auto-generated if not provided)
+    name:
+      type: string
+      description: Display name for the MCP client
+    is_code_mode_client:
+      type: boolean
+
+    is_ping_available:
+      type: boolean
+      default: true
+      description: |
+        Whether the MCP server supports ping for health checks.
+        If true, uses lightweight ping method for health checks.
+        If false, uses listTools method for health checks instead.
+    connection_type:
+      $ref: '#/MCPConnectionType'
+    auth_type:
+      $ref: '#/MCPAuthType'
+      description: Authentication type for the MCP connection
+    oauth_config_id:
+      type: string
+      description: |
+        OAuth config ID for OAuth authentication.
+        Set after OAuth flow is completed. References the oauth_configs table.
+        Only relevant when auth_type is "oauth".
+    headers:
+      type: object
+      additionalProperties:
+        type: string
+      description: |
+        Custom headers to include in requests.
+        Only used when auth_type is "headers".
+    oauth_config:
+      $ref: '../../schemas/management/oauth.yaml#/OAuthConfigRequest'
+      description: |
+        OAuth configuration for initiating OAuth flow.
+        Only include this when creating a client with auth_type "oauth".
+        This will trigger the OAuth flow and return an authorization URL.
+    tools_to_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        Include-only list for tools.
+        ["*"] => all tools are included
+        [] => no tools are included
+        ["tool1", "tool2"] => include only the specified tools
+    tools_to_auto_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        List of tools that can be auto-executed without user approval.
+        Must be a subset of tools_to_execute.
+        ["*"] => all executable tools can be auto-executed
+        [] => no tools are auto-executed
+        ["tool1", "tool2"] => only specified tools can be auto-executed
+    allow_on_all_virtual_keys:
+      type: boolean
+      default: false
+      description: |
+        When true, this MCP client's tools are available to all virtual keys by default,
+        without requiring an explicit virtual key assignment.
+        An explicit virtual key config always overrides this setting for that key.
+MCPClientCreateRequestHTTP:
+  allOf:
+    - $ref: '#/MCPClientCreateRequestBase'
+    - type: object
+      required:
+        - connection_string
+      properties:
+        connection_type:
+          type: string
+          enum: [http]
+        connection_string:
+          type: string
+          description: HTTP URL (required for HTTP connection type)
+
+MCPClientCreateRequestSSE:
+  allOf:
+    - $ref: '#/MCPClientCreateRequestBase'
+    - type: object
+      required:
+        - connection_string
+      properties:
+        connection_type:
+          type: string
+          enum: [sse]
+        connection_string:
+          type: string
+          description: SSE URL (required for SSE connection type)
+
+MCPClientCreateRequestSTDIO:
+  allOf:
+    - $ref: '#/MCPClientCreateRequestBase'
+    - type: object
+      required:
+        - stdio_config
+      properties:
+        connection_type:
+          type: string
+          enum: [stdio]
+        stdio_config:
+          $ref: '#/MCPStdioConfig'
+          description: STDIO configuration (required for STDIO connection type)
+
+MCPClientUpdateRequest:
+  type: object
+  description: MCP client configuration for updating an existing client (includes tool_pricing)
+  properties:
+    client_id:
+      type: string
+      description: Unique identifier for the MCP client
+    name:
+      type: string
+      description: Display name for the MCP client
+    is_code_mode_client:
+      type: boolean
+      description: Whether this client is available in code mode
+    connection_type:
+      $ref: '#/MCPConnectionType'
+    connection_string:
+      type: string
+      description: HTTP or SSE URL (required for HTTP or SSE connections)
+    stdio_config:
+      $ref: '#/MCPStdioConfig'
+    auth_type:
+      $ref: '#/MCPAuthType'
+      description: Authentication type for the MCP connection
+    oauth_config_id:
+      type: string
+      description: |
+        OAuth config ID for OAuth authentication.
+        References the oauth_configs table.
+        Only relevant when auth_type is "oauth".
+    headers:
+      type: object
+      additionalProperties:
+        type: string
+      description: |
+        Custom headers to include in requests.
+        Only used when auth_type is "headers".
+    tools_to_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        Include-only list for tools.
+        ["*"] => all tools are included
+        [] => no tools are included
+        ["tool1", "tool2"] => include only the specified tools
+    tools_to_auto_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        List of tools that can be auto-executed without user approval.
+        Must be a subset of tools_to_execute.
+        ["*"] => all executable tools can be auto-executed
+        [] => no tools are auto-executed
+        ["tool1", "tool2"] => only specified tools can be auto-executed
+    tool_pricing:
+      type: object
+      additionalProperties:
+        type: number
+        format: double
+      description: |
+        Per-tool cost in USD for execution.
+        Key is the tool name, value is the cost per execution.
+        Example: {"read_file": 0.001, "write_file": 0.002}
+        Note: Only available when updating an existing client after tools have been fetched.
+    allow_on_all_virtual_keys:
+      type: boolean
+      default: false
+      description: |
+        When true, this MCP client's tools are accessible to all virtual keys without requiring
+        explicit per-key assignment. All tools are allowed by default. If a virtual key has an
+        explicit MCP config for this client, that config takes precedence and overrides this behaviour.
+    vk_configs:
+      type: array
+      items:
+        $ref: '#/MCPVKConfig'
+      description: |
+        When provided, replaces all virtual key assignments for this MCP client.
+        Each entry specifies a virtual key and the tools it is allowed to call.
+        To remove all VK access, provide an empty array [].
+        Omit this field to leave existing VK assignments unchanged.
+
+MCPVKConfig:
+  type: object
+  description: Per-virtual-key tool access configuration for an MCP client
+  required:
+    - virtual_key_id
+    - tools_to_execute
+  properties:
+    virtual_key_id:
+      type: string
+      description: ID of the virtual key
+    tools_to_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        Tools this virtual key is allowed to call on this MCP server.
+        ["*"] => all tools allowed
+        ["tool1", "tool2"] => only the specified tools
+
+MCPClientConfig:
+  type: object
+  description: Full MCP client configuration (used in responses)
+  properties:
+    client_id:
+      type: string
+      description: Unique identifier for the MCP client
+    name:
+      type: string
+      description: Display name for the MCP client
+    is_code_mode_client:
+      type: boolean
+      description: Whether this client is available in code mode
+    connection_type:
+      $ref: '#/MCPConnectionType'
+    connection_string:
+      type: string
+      description: HTTP or SSE URL (required for HTTP or SSE connections)
+    stdio_config:
+      $ref: '#/MCPStdioConfig'
+    auth_type:
+      $ref: '#/MCPAuthType'
+      description: Authentication type for the MCP connection
+    oauth_config_id:
+      type: string
+      description: |
+        OAuth config ID for OAuth authentication.
+        References the oauth_configs table.
+        Only set when auth_type is "oauth".
+    headers:
+      type: object
+      additionalProperties:
+        type: string
+      description: |
+        Custom headers to include in requests.
+        Only used when auth_type is "headers".
+    tools_to_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        Include-only list for tools.
+        ["*"] => all tools are included
+        [] => no tools are included
+        ["tool1", "tool2"] => include only the specified tools
+    tools_to_auto_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        List of tools that can be auto-executed without user approval.
+        Must be a subset of tools_to_execute.
+        ["*"] => all executable tools can be auto-executed
+        [] => no tools are auto-executed
+        ["tool1", "tool2"] => only specified tools can be auto-executed
+    tool_pricing:
+      type: object
+      additionalProperties:
+        type: number
+        format: double
+      description: |
+        Per-tool cost in USD for execution.
+        Key is the tool name, value is the cost per execution.
+        Example: {"read_file": 0.001, "write_file": 0.002}
+    allow_on_all_virtual_keys:
+      type: boolean
+      default: false
+      description: |
+        When true, this MCP client's tools are accessible to all virtual keys without requiring
+        explicit per-key assignment. All tools are allowed by default. If a virtual key has an
+        explicit MCP config for this client, that config takes precedence and overrides this behaviour.
+
+ChatToolFunction:
+  type: object
+  description: Tool function definition
+  properties:
+    name:
+      type: string
+    description:
+      type: string
+    parameters:
+      type: object
+      additionalProperties: true
+    strict:
+      type: boolean
+
+MCPVKConfigResponse:
+  type: object
+  description: Per-virtual-key tool access configuration as returned in list/get responses
+  properties:
+    virtual_key_id:
+      type: string
+      description: ID of the virtual key
+    virtual_key_name:
+      type: string
+      description: Display name of the virtual key
+    tools_to_execute:
+      type: array
+      items:
+        type: string
+      description: |
+        Tools this virtual key is allowed to call on this MCP client.
+        ["*"] => all tools allowed
+        ["tool1", "tool2"] => only the specified tools
+
+MCPClient:
+  type: object
+  description: Connected MCP client with its tools
+  properties:
+    config:
+      $ref: '#/MCPClientConfig'
+    tools:
+      type: array
+      items:
+        $ref: '#/ChatToolFunction'
+    state:
+      $ref: '#/MCPConnectionState'
+    vk_configs:
+      type: array
+      items:
+        $ref: '#/MCPVKConfigResponse'
+      description: Virtual key assignments for this MCP client
+
+ExecuteToolRequest:
+  oneOf:
+    - title: Chat (Default)
+      description: Chat format - uses ChatAssistantMessageToolCall schema
+      $ref: '../../schemas/inference/chat.yaml#/ChatAssistantMessageToolCall'
+    - title: Responses
+      description: Responses format - uses ResponsesToolMessage schema
+      $ref: '#/ResponsesToolMessage'
+  description: |
+    MCP tool execution request. The schema depends on the `format` query parameter:
+    - `format=chat` or empty (default): Use `ChatAssistantMessageToolCall` schema
+    - `format=responses`: Use `ResponsesToolMessage` schema
+
+ExecuteToolResponse:
+  oneOf:
+    - title: Chat (Default)
+      description: Chat format response
+      $ref: '../../schemas/inference/chat.yaml#/ChatMessage'
+    - title: Responses
+      description: Responses format response
+      $ref: '../../schemas/inference/responses.yaml#/ResponsesMessage'
+  description: |
+    MCP tool execution response.
+
+ResponsesToolMessage:
+  type: object
+  description: Tool message for Responses API format
+  required:
+    - name
+  properties:
+    call_id:
+      type: string
+      description: Common call ID for tool calls and outputs
+    name:
+      type: string
+      description: Tool function name (required for execution)
+    arguments:
+      type: string
+      description: Tool function arguments as JSON string
+    output:
+      type: object
+      description: Tool execution output
+      additionalProperties: true
+    action:
+      type: object
+      description: Tool action configuration
+      additionalProperties: true
+    error:
+      type: string
+      description: Error message if tool execution failed
--- a/docs/openapi/schemas/management/oauth.yaml
+++ b/docs/openapi/schemas/management/oauth.yaml
@@ -0,0 +1,305 @@
+# OAuth API schemas
+
+MCPAuthType:
+  type: string
+  enum: [none, headers, oauth, per_user_oauth]
+  description: |
+    Authentication type for MCP connections:
+    - none: No authentication
+    - headers: Header-based authentication (API keys, custom headers, etc.)
+    - oauth: OAuth 2.0 authentication (shared admin token)
+    - per_user_oauth: Per-user OAuth 2.1 (each end-user authenticates individually)
+
+OAuthConfigRequest:
+  type: object
+  description: OAuth configuration for MCP client creation
+  properties:
+    client_id:
+      type: string
+      description: |
+        OAuth client ID. Optional if client supports dynamic client registration (RFC 7591).
+        If not provided, the server_url must be set for OAuth discovery and dynamic registration.
+    client_secret:
+      type: string
+      description: |
+        OAuth client secret. Optional for public clients using PKCE or clients obtained via dynamic registration.
+    authorize_url:
+      type: string
+      description: |
+        OAuth authorization endpoint URL. Optional - will be discovered from server_url if not provided.
+    token_url:
+      type: string
+      description: |
+        OAuth token endpoint URL. Optional - will be discovered from server_url if not provided.
+    registration_url:
+      type: string
+      description: |
+        Dynamic client registration endpoint URL (RFC 7591). Optional - will be discovered from server_url if not provided.
+    scopes:
+      type: array
+      items:
+        type: string
+      description: |
+        OAuth scopes requested. Optional - can be discovered from server_url if not provided.
+        Example: ["read", "write"]
+
+OAuthFlowInitiation:
+  type: object
+  description: Response when initiating an OAuth flow
+  properties:
+    status:
+      type: string
+      enum: [pending_oauth]
+    message:
+      type: string
+    oauth_config_id:
+      type: string
+      description: ID of the OAuth config created for this flow
+    authorize_url:
+      type: string
+      description: URL to redirect the user to for authorization
+    expires_at:
+      type: string
+      format: date-time
+      description: When the OAuth authorization request expires
+    mcp_client_id:
+      type: string
+      description: The MCP client ID that initiated this OAuth flow
+
+OAuthConfigStatus:
+  type: object
+  description: Status of an OAuth configuration
+  properties:
+    id:
+      type: string
+      description: OAuth config ID
+    status:
+      type: string
+      enum: [pending, authorized, failed]
+      description: |
+        Current status of the OAuth flow:
+        - pending: User has not yet authorized
+        - authorized: User authorized and token is stored
+        - failed: Authorization failed
+    created_at:
+      type: string
+      format: date-time
+      description: When this OAuth config was created
+    expires_at:
+      type: string
+      format: date-time
+      description: When this OAuth config expires (becomes invalid if not completed)
+    token_id:
+      type: string
+      description: ID of the associated OAuth token (only present if status is authorized)
+    token_expires_at:
+      type: string
+      format: date-time
+      description: When the OAuth access token expires (only present if status is authorized)
+    token_scopes:
+      type: array
+      items:
+        type: string
+      description: Scopes granted in the OAuth token (only present if status is authorized)
+
+OAuthToken:
+  type: object
+  description: OAuth access and refresh tokens
+  properties:
+    id:
+      type: string
+      description: Unique token identifier
+    access_token:
+      type: string
+      description: OAuth access token
+    refresh_token:
+      type: string
+      description: OAuth refresh token for obtaining new access tokens
+    token_type:
+      type: string
+      description: Token type (typically "Bearer")
+    expires_at:
+      type: string
+      format: date-time
+      description: When the access token expires
+    scopes:
+      type: array
+      items:
+        type: string
+      description: Scopes granted in this token
+    last_refreshed_at:
+      type: string
+      format: date-time
+      description: When the token was last refreshed
+
+# Per-User OAuth 2.1 Authorization Server schemas
+
+PerUserOAuthClientRegistrationRequest:
+  type: object
+  description: |
+    Dynamic Client Registration request per RFC 7591.
+    MCP clients (Claude Code, Cursor, etc.) call this to obtain a client_id
+    before initiating the authorization flow.
+  required:
+    - redirect_uris
+  properties:
+    client_name:
+      type: string
+      description: Human-readable name of the client application
+      example: Claude Code
+    redirect_uris:
+      type: array
+      items:
+        type: string
+      description: List of allowed redirect URIs for this client
+      example: ["http://localhost:54321/callback"]
+    grant_types:
+      type: array
+      items:
+        type: string
+      description: Supported grant types. Defaults to ["authorization_code"]
+      example: ["authorization_code"]
+    response_types:
+      type: array
+      items:
+        type: string
+      description: Supported response types
+      example: ["code"]
+    token_endpoint_auth_method:
+      type: string
+      description: Token endpoint authentication method. Always "none" (public client)
+      example: none
+    scope:
+      type: string
+      description: Space-separated list of requested scopes
+      example: "mcp:read mcp:write"
+
+PerUserOAuthClientRegistrationResponse:
+  type: object
+  description: Dynamic Client Registration response per RFC 7591
+  properties:
+    client_id:
+      type: string
+      description: Issued client identifier
+      example: "550e8400-e29b-41d4-a716-446655440000"
+    client_name:
+      type: string
+      description: Human-readable name of the client application
+    redirect_uris:
+      type: array
+      items:
+        type: string
+      description: Registered redirect URIs
+    grant_types:
+      type: array
+      items:
+        type: string
+      description: Registered grant types
+    response_types:
+      type: array
+      items:
+        type: string
+      description: Registered response types
+    token_endpoint_auth_method:
+      type: string
+      description: Token endpoint authentication method (always "none")
+
+PerUserOAuthTokenResponse:
+  type: object
+  description: OAuth 2.1 token response from the token endpoint
+  properties:
+    access_token:
+      type: string
+      description: Bifrost-issued access token (24h TTL). Use as Bearer token on /mcp requests.
+    token_type:
+      type: string
+      description: Token type, always "Bearer"
+      example: Bearer
+    expires_in:
+      type: integer
+      description: Seconds until the access token expires (86400 for 24h)
+      example: 86400
+    scope:
+      type: string
+      description: Space-separated scopes granted
+
+ProtectedResourceMetadata:
+  type: object
+  description: |
+    OAuth 2.0 Protected Resource Metadata per RFC 9728.
+    Returned by /.well-known/oauth-protected-resource to tell MCP clients
+    which authorization server(s) protect the /mcp endpoint.
+  properties:
+    resource:
+      type: string
+      description: URL of the protected resource (Bifrost's /mcp endpoint)
+      example: "https://your-bifrost-domain.com/mcp"
+    authorization_servers:
+      type: array
+      items:
+        type: string
+      description: List of authorization server issuer URLs
+      example: ["https://your-bifrost-domain.com"]
+    scopes_supported:
+      type: array
+      items:
+        type: string
+      description: Scopes supported by this resource
+      example: ["mcp:read", "mcp:write"]
+    bearer_methods_supported:
+      type: array
+      items:
+        type: string
+      description: Supported methods for passing Bearer tokens
+      example: ["header"]
+
+AuthorizationServerMetadata:
+  type: object
+  description: |
+    OAuth 2.0 Authorization Server Metadata per RFC 8414.
+    Returned by /.well-known/oauth-authorization-server to let MCP clients
+    discover Bifrost's OAuth endpoints and capabilities.
+  properties:
+    issuer:
+      type: string
+      description: Authorization server issuer URL (Bifrost base URL)
+      example: "https://your-bifrost-domain.com"
+    authorization_endpoint:
+      type: string
+      description: Authorization endpoint URL
+      example: "https://your-bifrost-domain.com/api/oauth/per-user/authorize"
+    token_endpoint:
+      type: string
+      description: Token endpoint URL
+      example: "https://your-bifrost-domain.com/api/oauth/per-user/token"
+    registration_endpoint:
+      type: string
+      description: Dynamic client registration endpoint URL
+      example: "https://your-bifrost-domain.com/api/oauth/per-user/register"
+    response_types_supported:
+      type: array
+      items:
+        type: string
+      example: ["code"]
+    grant_types_supported:
+      type: array
+      items:
+        type: string
+      example: ["authorization_code"]
+    code_challenge_methods_supported:
+      type: array
+      items:
+        type: string
+      description: Supported PKCE methods (only S256)
+      example: ["S256"]
+    token_endpoint_auth_methods_supported:
+      type: array
+      items:
+        type: string
+      description: Supported token endpoint auth methods (public clients only)
+      example: ["none"]
+    scopes_supported:
+      type: array
+      items:
+        type: string
+      example: ["mcp:read", "mcp:write"]
--- a/docs/openapi/schemas/management/plugins.yaml
+++ b/docs/openapi/schemas/management/plugins.yaml
@@ -0,0 +1,131 @@
+# Plugins API schemas
+
+PluginStatus:
+  type: object
+  description: Plugin status information
+  properties:
+    name:
+      type: string
+      description: Display name of the plugin
+    status:
+      type: string
+      enum: [active, error, disabled, loading, uninitialized, unloaded, loaded]
+    logs:
+      type: array
+      items:
+        type: string
+    types:
+      type: array
+      description: Plugin types indicating which interfaces the plugin implements
+      items:
+        type: string
+        enum: [llm, mcp, http, observability]
+  example:
+    name: my_custom_plugin
+    status: active
+    logs:
+      - "plugin my_custom_plugin initialized successfully"
+    types:
+      - llm
+      - http
+
+Plugin:
+  type: object
+  description: Plugin configuration
+  properties:
+    id:
+      type: integer
+      description: Plugin ID (auto-generated)
+    name:
+      type: string
+      description: Display name of the plugin (from config)
+    actualName:
+      type: string
+      description: Actual plugin name from GetName() (used as map key in plugin status). Only populated for active plugins.
+    enabled:
+      type: boolean
+    config:
+      type: object
+      additionalProperties: true
+    isCustom:
+      type: boolean
+    path:
+      type: string
+    status:
+      $ref: '#/PluginStatus'
+      description: Current plugin status including types array (only populated for active plugins)
+    created_at:
+      type: string
+      format: date-time
+    version:
+      type: integer
+      format: int16
+    updated_at:
+      type: string
+      format: date-time
+    config_hash:
+      type: string
+  example:
+    name: my_custom_plugin
+    actualName: MyCustomPlugin
+    enabled: true
+    config:
+      api_key: "xxx"
+    isCustom: true
+    path: "/plugins/my_custom_plugin.so"
+    status:
+      name: my_custom_plugin
+      status: active
+      logs:
+        - "plugin my_custom_plugin initialized successfully"
+      types:
+        - llm
+        - http
+
+ListPluginsResponse:
+  type: object
+  description: List plugins response
+  properties:
+    plugins:
+      type: array
+      items:
+        $ref: '#/Plugin'
+    count:
+      type: integer
+
+CreatePluginRequest:
+  type: object
+  description: Create plugin request
+  required:
+    - name
+  properties:
+    name:
+      type: string
+    enabled:
+      type: boolean
+    config:
+      type: object
+      additionalProperties: true
+    path:
+      type: string
+
+UpdatePluginRequest:
+  type: object
+  description: Update plugin request
+  properties:
+    enabled:
+      type: boolean
+    config:
+      type: object
+      additionalProperties: true
+    path:
+      type: string
+
+PluginResponse:
+  type: object
+  description: Plugin operation response
+  properties:
+    message:
+      type: string
+    plugin:
+      $ref: '#/Plugin'
--- a/docs/openapi/schemas/management/prompts.yaml
+++ b/docs/openapi/schemas/management/prompts.yaml
@@ -0,0 +1,276 @@
+# Prompt Repository schemas
+
+Folder:
+  type: object
+  description: Prompt folder
+  properties:
+    id:
+      type: string
+      description: Unique folder ID (UUID)
+    name:
+      type: string
+    description:
+      type: string
+      nullable: true
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+    prompts_count:
+      type: integer
+      description: Number of prompts in this folder (virtual field)
+
+Prompt:
+  type: object
+  description: Prompt
+  properties:
+    id:
+      type: string
+      description: Unique prompt ID (UUID)
+    name:
+      type: string
+    folder_id:
+      type: string
+      nullable: true
+    folder:
+      $ref: '#/Folder'
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+    versions:
+      type: array
+      items:
+        $ref: '#/PromptVersion'
+    sessions:
+      type: array
+      items:
+        $ref: '#/PromptSession'
+    latest_version:
+      $ref: '#/PromptVersion'
+
+PromptVersion:
+  type: object
+  description: Prompt version (immutable snapshot)
+  properties:
+    id:
+      type: integer
+      description: Auto-increment version ID
+    prompt_id:
+      type: string
+    version_number:
+      type: integer
+    commit_message:
+      type: string
+    model_params:
+      type: object
+      additionalProperties: true
+      description: Model parameters (flexible JSON object)
+    provider:
+      type: string
+    model:
+      type: string
+    is_latest:
+      type: boolean
+    created_at:
+      type: string
+      format: date-time
+    messages:
+      type: array
+      items:
+        $ref: '#/PromptVersionMessage'
+
+PromptVersionMessage:
+  type: object
+  description: Message within a prompt version
+  properties:
+    id:
+      type: integer
+    order_index:
+      type: integer
+    message:
+      type: object
+      additionalProperties: true
+      description: Opaque message content (JSON)
+
+PromptSession:
+  type: object
+  description: Prompt playground session
+  properties:
+    id:
+      type: integer
+      description: Auto-increment session ID
+    prompt_id:
+      type: string
+    version_id:
+      type: integer
+      nullable: true
+      description: Version this session was forked from
+    name:
+      type: string
+    model_params:
+      type: object
+      additionalProperties: true
+      description: Model parameters (flexible JSON object)
+    provider:
+      type: string
+    model:
+      type: string
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+    messages:
+      type: array
+      items:
+        $ref: '#/PromptSessionMessage'
+
+PromptSessionMessage:
+  type: object
+  description: Message within a prompt session
+  properties:
+    id:
+      type: integer
+    order_index:
+      type: integer
+    message:
+      type: object
+      additionalProperties: true
+      description: Opaque message content (JSON)
+
+# Request schemas
+
+CreateFolderRequest:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+    description:
+      type: string
+
+UpdateFolderRequest:
+  type: object
+  properties:
+    name:
+      type: string
+    description:
+      type: string
+      nullable: true
+
+CreatePromptRequest:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+    folder_id:
+      type: string
+
+UpdatePromptRequest:
+  type: object
+  properties:
+    name:
+      type: string
+    folder_id:
+      type: string
+      nullable: true
+
+CreateVersionRequest:
+  type: object
+  required:
+    - commit_message
+    - messages
+    - model_params
+    - provider
+    - model
+  properties:
+    commit_message:
+      type: string
+    messages:
+      type: array
+      items:
+        type: object
+        additionalProperties: true
+      description: Array of message objects
+    model_params:
+      type: object
+      additionalProperties: true
+    provider:
+      type: string
+    model:
+      type: string
+
+CreateSessionRequest:
+  type: object
+  required:
+    - name
+    - model_params
+    - provider
+    - model
+  properties:
+    name:
+      type: string
+    version_id:
+      type: integer
+      description: Fork from this version
+    messages:
+      type: array
+      items:
+        type: object
+        additionalProperties: true
+    model_params:
+      type: object
+      additionalProperties: true
+    provider:
+      type: string
+    model:
+      type: string
+
+UpdateSessionRequest:
+  type: object
+  required:
+    - name
+    - messages
+    - model_params
+    - provider
+    - model
+  properties:
+    name:
+      type: string
+    messages:
+      type: array
+      items:
+        type: object
+        additionalProperties: true
+    model_params:
+      type: object
+      additionalProperties: true
+    provider:
+      type: string
+    model:
+      type: string
+
+RenameSessionRequest:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+
+CommitSessionRequest:
+  type: object
+  required:
+    - commit_message
+  properties:
+    commit_message:
+      type: string
--- a/docs/openapi/schemas/management/providers.yaml
+++ b/docs/openapi/schemas/management/providers.yaml
@@ -0,0 +1,491 @@
+# Providers API schemas
+
+ProviderStatus:
+  type: string
+  enum: [active, error, deleted]
+  description: Status of the provider
+
+NetworkConfig:
+  type: object
+  description: Network configuration for provider connections
+  properties:
+    base_url:
+      type: string
+      description: Base URL for the provider (optional)
+    extra_headers:
+      type: object
+      additionalProperties:
+        type: string
+      description: Additional headers to include in requests
+    default_request_timeout_in_seconds:
+      type: integer
+      description: Default timeout for requests
+    max_retries:
+      type: integer
+      description: Maximum number of retries
+    retry_backoff_initial:
+      type: integer
+      format: int64
+      description: Initial backoff duration in milliseconds
+    retry_backoff_max:
+      type: integer
+      format: int64
+      description: Maximum backoff duration in milliseconds
+    insecure_skip_verify:
+      type: boolean
+      description: Disable TLS certificate verification for provider connections. This bypasses server certificate validation and should be used only as a last resort when a trusted CA chain cannot be configured. Prefer ca_cert_pem for self-signed or private CA deployments.
+    ca_cert_pem:
+      type: string
+      description: PEM-encoded CA certificate to trust for provider endpoint connections (e.g. self-signed or internal CA)
+
+ConcurrencyAndBufferSize:
+  type: object
+  description: Concurrency settings
+  properties:
+    concurrency:
+      type: integer
+      description: Number of concurrent operations
+    buffer_size:
+      type: integer
+      description: Size of the buffer
+
+ProxyConfig:
+  type: object
+  description: Proxy configuration
+  properties:
+    type:
+      type: string
+      enum: [none, http, socks5, environment]
+    url:
+      type: string
+    username:
+      type: string
+    password:
+      type: string
+    ca_cert_pem:
+      type: string
+
+AzureKeyConfig:
+  type: object
+  description: Azure-specific key configuration
+  properties:
+    endpoint:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    api_version:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    client_id:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    client_secret:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    tenant_id:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    scopes:
+      type: array
+      items:
+        type: string
+      description: List of scopes to use for authentication
+
+VertexKeyConfig:
+  type: object
+  description: Vertex-specific key configuration
+  properties:
+    project_id:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    project_number:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    region:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    auth_credentials:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+
+BedrockKeyConfig:
+  type: object
+  description: AWS Bedrock-specific key configuration
+  properties:
+    access_key:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    secret_key:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    session_token:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    region:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    arn:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    batch_s3_config:
+      type: object
+      properties:
+        buckets:
+          type: array
+          items:
+            type: object
+            properties:
+              bucket_name:
+                type: string
+              prefix:
+                type: string
+              is_default:
+                type: boolean
+
+VllmKeyConfig:
+  type: object
+  description: VLLM-specific key configuration
+  properties:
+    url:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+    model_name:
+      type: string
+  required:
+    - url
+
+OllamaKeyConfig:
+  type: object
+  description: Ollama-specific key configuration
+  properties:
+    url:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+  required:
+    - url
+
+ReplicateKeyConfig:
+  type: object
+  description: Replicate-specific key configuration
+  properties:
+    use_deployments_endpoint:
+      type: boolean
+      description: Whether to use the deployments endpoint instead of the models endpoint
+
+SglKeyConfig:
+  type: object
+  description: SGLang-specific key configuration
+  properties:
+    url:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+  required:
+    - url
+
+VLLMKeyConfig:
+  type: object
+  description: vLLM-specific key configuration for per-key routing to different vLLM instances
+  properties:
+    url:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+      description: vLLM server base URL (required)
+    model_name:
+      type: string
+      description: Exact model name served on this vLLM instance
+
+VLLMKeyConfig:
+  type: object
+  description: vLLM-specific key configuration for per-key routing to different vLLM instances
+  properties:
+    url:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+      description: vLLM server base URL (required)
+    model_name:
+      type: string
+      description: Exact model name served on this vLLM instance
+
+Key:
+  type: object
+  description: API key configuration
+  properties:
+    id:
+      type: string
+      description: Unique identifier for the key
+    name:
+      type: string
+      description: Name of the key
+    value:
+      $ref: '../../schemas/management/common.yaml#/EnvVar'
+      description: API key value (redacted in responses)
+    models:
+      type: array
+      items:
+        type: string
+      description: List of models this key can access (whitelist)
+    blacklisted_models:
+      type: array
+      items:
+        type: string
+      description: List of models this key cannot access (blacklist)
+    weight:
+      type: number
+      description: Weight for load balancing
+    aliases:
+      type: object
+      propertyNames:
+        minLength: 1
+      additionalProperties:
+        type: string
+        minLength: 1
+      description: Model alias mappings — maps a user-facing model name to a provider-specific identifier (deployment name, inference profile ID, fine-tuned model ID, etc.)
+    azure_key_config:
+      $ref: '#/AzureKeyConfig'
+    vertex_key_config:
+      $ref: '#/VertexKeyConfig'
+    bedrock_key_config:
+      $ref: '#/BedrockKeyConfig'
+    vllm_key_config:
+      $ref: '#/VllmKeyConfig'
+    ollama_key_config:
+      $ref: '#/OllamaKeyConfig'
+    sgl_key_config:
+      $ref: '#/SglKeyConfig'
+    replicate_key_config:
+      $ref: '#/ReplicateKeyConfig'
+    enabled:
+      type: boolean
+      description: Whether the key is active (defaults to true)
+    use_for_batch_api:
+      type: boolean
+      description: Whether this key can be used for batch API operations
+    config_hash:
+      type: string
+      description: Hash of config.json version, used for change detection
+    status:
+      type: string
+      description: Status of key (e.g., success, list_models_failed)
+    description:
+      type: string
+      description: Error or status description for the key
+
+AllowedRequests:
+  type: object
+  description: Allowed request types for custom providers
+  properties:
+    list_models:
+      type: boolean
+    text_completion:
+      type: boolean
+    text_completion_stream:
+      type: boolean
+    chat_completion:
+      type: boolean
+    chat_completion_stream:
+      type: boolean
+    responses:
+      type: boolean
+    responses_stream:
+      type: boolean
+    count_tokens:
+      type: boolean
+    embedding:
+      type: boolean
+    speech:
+      type: boolean
+    speech_stream:
+      type: boolean
+    transcription:
+      type: boolean
+    transcription_stream:
+      type: boolean
+    image_generation:
+      type: boolean
+    image_generation_stream:
+      type: boolean
+    batch_create:
+      type: boolean
+    batch_list:
+      type: boolean
+    batch_retrieve:
+      type: boolean
+    batch_cancel:
+      type: boolean
+    batch_results:
+      type: boolean
+    file_upload:
+      type: boolean
+    file_list:
+      type: boolean
+    file_retrieve:
+      type: boolean
+    file_delete:
+      type: boolean
+    file_content:
+      type: boolean
+
+CustomProviderConfig:
+  type: object
+  description: Custom provider configuration
+  properties:
+    is_key_less:
+      type: boolean
+    base_provider_type:
+      $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    allowed_requests:
+      $ref: '#/AllowedRequests'
+    request_path_overrides:
+      type: object
+      additionalProperties:
+        type: string
+
+ProviderResponse:
+  type: object
+  description: Provider configuration response
+  properties:
+    name:
+      $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    network_config:
+      $ref: '#/NetworkConfig'
+    concurrency_and_buffer_size:
+      $ref: '#/ConcurrencyAndBufferSize'
+    proxy_config:
+      $ref: '#/ProxyConfig'
+    send_back_raw_request:
+      type: boolean
+    send_back_raw_response:
+      type: boolean
+    store_raw_request_response:
+      type: boolean
+    custom_provider_config:
+      $ref: '#/CustomProviderConfig'
+    provider_status:
+      $ref: '#/ProviderStatus'
+    status:
+      type: string
+      description: Operational status (e.g., list_models_failed)
+    description:
+      type: string
+      description: Error/status description
+    config_hash:
+      type: string
+      description: Hash of config.json version, used for change detection
+
+ListProvidersResponse:
+  type: object
+  description: List providers response
+  properties:
+    providers:
+      type: array
+      items:
+        $ref: '#/ProviderResponse'
+    total:
+      type: integer
+
+AddProviderRequest:
+  type: object
+  description: Add provider request. Keys are managed separately via /api/providers/{provider}/keys.
+  required:
+    - provider
+  properties:
+    provider:
+      $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    network_config:
+      $ref: '#/NetworkConfig'
+    concurrency_and_buffer_size:
+      $ref: '#/ConcurrencyAndBufferSize'
+    proxy_config:
+      $ref: '#/ProxyConfig'
+    send_back_raw_request:
+      type: boolean
+    send_back_raw_response:
+      type: boolean
+    store_raw_request_response:
+      type: boolean
+    custom_provider_config:
+      $ref: '#/CustomProviderConfig'
+
+UpdateProviderRequest:
+  type: object
+  description: Update provider request. Keys are managed separately via /api/providers/{provider}/keys.
+  properties:
+    network_config:
+      $ref: '#/NetworkConfig'
+    concurrency_and_buffer_size:
+      $ref: '#/ConcurrencyAndBufferSize'
+    proxy_config:
+      $ref: '#/ProxyConfig'
+    send_back_raw_request:
+      type: boolean
+    send_back_raw_response:
+      type: boolean
+    store_raw_request_response:
+      type: boolean
+    custom_provider_config:
+      $ref: '#/CustomProviderConfig'
+
+ListProviderKeysResponse:
+  type: object
+  description: Response for listing keys for a provider
+  properties:
+    keys:
+      type: array
+      items:
+        $ref: '#/Key'
+    total:
+      type: integer
+
+ModelResponse:
+  type: object
+  description: Model information
+  properties:
+    name:
+      type: string
+    provider:
+      type: string
+    accessible_by_keys:
+      type: array
+      items:
+        type: string
+
+Architecture:
+  type: object
+  properties:
+    modality:
+      type: string
+    tokenizer:
+      type: string
+    instruct_type:
+      type: string
+    input_modalities:
+      type: array
+      items:
+        type: string
+    output_modalities:
+      type: array
+      items:
+        type: string
+
+ModelDetailsResponse:
+  type: object
+  description: Model details with capability metadata
+  properties:
+    name:
+      type: string
+    provider:
+      type: string
+    context_length:
+      type: integer
+    max_input_tokens:
+      type: integer
+    max_output_tokens:
+      type: integer
+    architecture:
+      $ref: '#/Architecture'
+    accessible_by_keys:
+      type: array
+      items:
+        type: string
+
+ListModelsResponse:
+  type: object
+  description: List models response
+  properties:
+    models:
+      type: array
+      items:
+        $ref: '#/ModelResponse'
+    total:
+      type: integer
+
+ListModelDetailsResponse:
+  type: object
+  description: List model details response
+  properties:
+    models:
+      type: array
+      items:
+        $ref: '#/ModelDetailsResponse'
+    total:
+      type: integer
--- a/docs/openapi/schemas/management/session.yaml
+++ b/docs/openapi/schemas/management/session.yaml
@@ -0,0 +1,41 @@
+# Session API schemas
+
+LoginRequest:
+  type: object
+  description: Login request
+  required:
+    - username
+    - password
+  properties:
+    username:
+      type: string
+    password:
+      type: string
+
+LoginResponse:
+  type: object
+  description: Login response
+  properties:
+    message:
+      type: string
+      example: Login successful
+    token:
+      type: string
+      description: Session token
+
+IsAuthEnabledResponse:
+  type: object
+  description: Auth enabled status response
+  properties:
+    is_auth_enabled:
+      type: boolean
+    has_valid_token:
+      type: boolean
+
+LogoutResponse:
+  type: object
+  description: Logout response
+  properties:
+    message:
+      type: string
+      example: Logout successful
--- a/docs/openapi/schemas/management/users.yaml
+++ b/docs/openapi/schemas/management/users.yaml
@@ -0,0 +1,295 @@
+UserObject:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Unique user identifier
+    name:
+      type: string
+      description: User's display name
+    email:
+      type: string
+      format: email
+      description: User's email address
+    role_id:
+      type: integer
+      nullable: true
+      description: ID of the assigned RBAC role
+    role:
+      type: object
+      nullable: true
+      description: RBAC role details
+      properties:
+        id:
+          type: integer
+        name:
+          type: string
+        description:
+          type: string
+        is_system_role:
+          type: boolean
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+    teams:
+      type: array
+      description: Teams the user belongs to.
+      items:
+        $ref: '#/UserTeamSummaryEntry'
+    access_profile:
+      $ref: '#/AccessProfile'
+
+CreateUserRequest:
+  type: object
+  required:
+    - name
+    - email
+  properties:
+    name:
+      type: string
+      description: User's display name
+    email:
+      type: string
+      format: email
+      pattern: '^[^\s@]+@[^\s@]+\.[^\s@]+$'
+      description: User's email address (must be unique)
+    role_id:
+      type: integer
+      description: Optional RBAC role ID to assign
+
+UserResponse:
+  type: object
+  properties:
+    user:
+      $ref: '#/UserObject'
+
+ListUsersResponse:
+  type: object
+  properties:
+    users:
+      type: array
+      items:
+        $ref: '#/UserObject'
+    total:
+      type: integer
+      description: Total number of users matching the query
+    page:
+      type: integer
+      description: Current page number
+    limit:
+      type: integer
+      description: Number of users per page
+    total_pages:
+      type: integer
+      description: Total number of pages
+    has_more:
+      type: boolean
+      description: Whether more pages are available
+
+# ---- User Permissions ----
+
+PermissionsResponse:
+  type: object
+  properties:
+    permissions:
+      type: object
+      description: >
+        Map of resource names to their permitted operations.
+        When SCIM is disabled, returns full permissions for all resources.
+      additionalProperties:
+        type: object
+        additionalProperties:
+          type: boolean
+
+# ---- User Role ----
+
+AssignUserRoleRequest:
+  type: object
+  required:
+    - role_id
+  properties:
+    role_id:
+      type: integer
+      description: ID of the RBAC role to assign
+
+# ---- User Teams ----
+
+UserTeamSummaryEntry:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Team ID
+    name:
+      type: string
+      description: Team name
+    business_unit_id:
+      type: string
+      nullable: true
+      description: Business unit ID associated with this team (if any)
+    business_unit_name:
+      type: string
+      nullable: true
+      description: Business unit name associated with this team (if any)
+
+UserTeamEntry:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Team ID
+    name:
+      type: string
+      description: Team name
+    source:
+      type: string
+      description: How the user was added to this team (e.g. "manual", "scim_sync")
+
+AccessProfile:
+  type: object
+  nullable: true
+  description: Active or fallback user access profile, if assigned.
+  properties:
+    id:
+      type: integer
+    user_id:
+      type: string
+    parent_profile_id:
+      type: integer
+      nullable: true
+    name:
+      type: string
+    is_active:
+      type: boolean
+    expires_at:
+      type: string
+      format: date-time
+      nullable: true
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+
+UserTeamsResponse:
+  type: object
+  properties:
+    teams:
+      type: array
+      items:
+        $ref: '#/UserTeamEntry'
+
+UpdateUserTeamsRequest:
+  type: object
+  required:
+    - team_ids
+  properties:
+    team_ids:
+      type: array
+      items:
+        type: string
+      description: List of team IDs to assign (replaces existing manual assignments; synced memberships are preserved)
+
+# ---- Teams ----
+
+TeamObject:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Team ID (derived from name)
+    name:
+      type: string
+      description: Team name
+    member_count:
+      type: integer
+      description: Number of members in the team
+    virtual_key_count:
+      type: integer
+      description: Number of virtual keys assigned to the team
+    created_at:
+      type: string
+      format: date-time
+    updated_at:
+      type: string
+      format: date-time
+
+CreateTeamRequest:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+      description: Team name (must be unique)
+
+UpdateTeamRequest:
+  type: object
+  properties:
+    description:
+      type: string
+      description: Updated team description
+
+CreateTeamResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    name:
+      type: string
+
+ListTeamsResponse:
+  type: object
+  properties:
+    teams:
+      type: array
+      items:
+        $ref: '#/TeamObject'
+    total:
+      type: integer
+    page:
+      type: integer
+    limit:
+      type: integer
+    total_pages:
+      type: integer
+      description: Total number of pages
+    has_more:
+      type: boolean
+      description: Whether more pages are available
+
+# ---- Team Members ----
+
+TeamMemberObject:
+  type: object
+  properties:
+    user_id:
+      type: string
+    user_name:
+      type: string
+    user_email:
+      type: string
+    source:
+      type: string
+      description: How the member was added (e.g. "manual", "scim_sync")
+
+TeamMembersResponse:
+  type: object
+  properties:
+    members:
+      type: array
+      items:
+        $ref: '#/TeamMemberObject'
+
+AddTeamMemberRequest:
+  type: object
+  required:
+    - user_id
+  properties:
+    user_id:
+      type: string
+      description: ID of the user to add to the team