first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/docs/openapi/schemas/integrations/anthropic/batch.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/batch.yaml
@@ -0,0 +1,105 @@
+# Anthropic Integration Batch API Schemas
+
+AnthropicBatchCreateRequest:
+  type: object
+  required:
+    - requests
+  properties:
+    requests:
+      type: array
+      items:
+        $ref: '#/AnthropicBatchRequestItem'
+      description: Array of batch request items
+
+AnthropicBatchRequestItem:
+  type: object
+  required:
+    - custom_id
+    - params
+  properties:
+    custom_id:
+      type: string
+      description: Unique identifier for this request
+    params:
+      type: object
+      description: Request parameters (same as AnthropicMessageRequest)
+
+AnthropicBatchCreateResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: message_batch
+    processing_status:
+      type: string
+      enum: [in_progress, ended, canceling]
+    request_counts:
+      $ref: '#/AnthropicBatchRequestCounts'
+    ended_at:
+      type: string
+      format: date-time
+      nullable: true
+    created_at:
+      type: string
+      format: date-time
+    expires_at:
+      type: string
+      format: date-time
+    archived_at:
+      type: string
+      format: date-time
+      nullable: true
+    cancel_initiated_at:
+      type: string
+      format: date-time
+      nullable: true
+    results_url:
+      type: string
+      nullable: true
+
+AnthropicBatchRequestCounts:
+  type: object
+  properties:
+    processing:
+      type: integer
+    succeeded:
+      type: integer
+    errored:
+      type: integer
+    canceled:
+      type: integer
+    expired:
+      type: integer
+
+AnthropicBatchListRequest:
+  type: object
+  properties:
+    page_size:
+      type: integer
+      default: 20
+    page_token:
+      type: string
+      description: Cursor for pagination
+
+AnthropicBatchListResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/AnthropicBatchCreateResponse'
+    has_more:
+      type: boolean
+    first_id:
+      type: string
+    last_id:
+      type: string
+
+AnthropicBatchRetrieveResponse:
+  $ref: '#/AnthropicBatchCreateResponse'
+
+AnthropicBatchCancelResponse:
+  $ref: '#/AnthropicBatchCreateResponse'
+
--- a/docs/openapi/schemas/integrations/anthropic/common.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/common.yaml
@@ -0,0 +1,53 @@
+# Anthropic Integration Common Types
+
+AnthropicError:
+  type: object
+  properties:
+    type:
+      type: string
+      default: error
+    error:
+      type: object
+      properties:
+        type:
+          type: string
+          description: Error type (e.g., invalid_request_error, api_error)
+        message:
+          type: string
+          description: Error message
+
+AnthropicModel:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Model identifier
+    type:
+      type: string
+      default: model
+    display_name:
+      type: string
+    created_at:
+      type: string
+      format: date-time
+
+AnthropicListModelsResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/AnthropicModel'
+    has_more:
+      type: boolean
+    first_id:
+      type: string
+    last_id:
+      type: string
+
+# Anthropic Message Roles
+AnthropicMessageRole:
+  type: string
+  enum:
+    - user
+    - assistant
--- a/docs/openapi/schemas/integrations/anthropic/count-tokens.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/count-tokens.yaml
@@ -0,0 +1,13 @@
+# Anthropic Integration Count Tokens Schemas
+
+AnthropicCountTokensRequest:
+  # Uses the same format as AnthropicMessageRequest
+  allOf:
+    - $ref: './messages.yaml#/AnthropicMessageRequest'
+
+AnthropicCountTokensResponse:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of input tokens
--- a/docs/openapi/schemas/integrations/anthropic/files.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/files.yaml
@@ -0,0 +1,102 @@
+# Anthropic Integration Files API Schemas
+
+AnthropicFileUploadRequest:
+  type: object
+  required:
+    - file
+  properties:
+    file:
+      type: string
+      format: binary
+      description: File to upload (raw file content)
+    filename:
+      type: string
+      description: Original filename
+    purpose:
+      type: string
+      description: Purpose of the file (e.g., "batch")
+
+AnthropicFileUploadResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: file
+    filename:
+      type: string
+    mime_type:
+      type: string
+      description: MIME type of the file
+    size_bytes:
+      type: integer
+      description: Size of the file in bytes
+    created_at:
+      type: string
+      format: date-time
+    downloadable:
+      type: boolean
+
+AnthropicFileListRequest:
+  type: object
+  properties:
+    limit:
+      type: integer
+      default: 30
+    after:
+      type: string
+      description: Cursor for pagination (after_id)
+    order:
+      type: string
+      enum: [asc, desc]
+
+AnthropicFileListResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/AnthropicFileUploadResponse'
+    has_more:
+      type: boolean
+    first_id:
+      type: string
+    last_id:
+      type: string
+
+AnthropicFileRetrieveRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+
+AnthropicFileRetrieveResponse:
+  $ref: '#/AnthropicFileUploadResponse'
+
+AnthropicFileDeleteRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+
+AnthropicFileDeleteResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: file_deleted
+
+AnthropicFileContentRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
--- a/docs/openapi/schemas/integrations/anthropic/messages.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/messages.yaml
@@ -0,0 +1,403 @@
+# Anthropic Integration Messages API Schemas
+
+AnthropicMessageRequest:
+  type: object
+  required:
+    - model
+    - max_tokens
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model identifier (e.g., claude-3-opus-20240229)
+      example: claude-3-opus-20240229
+    max_tokens:
+      type: integer
+      description: Maximum tokens to generate
+    messages:
+      type: array
+      items:
+        $ref: '#/AnthropicMessage'
+      description: List of messages in the conversation
+    system:
+      $ref: '#/AnthropicContent'
+      description: System prompt
+    cache_control:
+      $ref: '../../inference/common.yaml#/CacheControl'
+      description: Automatic caching directives for the whole request
+    metadata:
+      $ref: '#/AnthropicMetadata'
+    stream:
+      type: boolean
+      description: Whether to stream the response
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    top_p:
+      type: number
+    top_k:
+      type: integer
+    stop_sequences:
+      type: array
+      items:
+        type: string
+    tools:
+      type: array
+      items:
+        $ref: '#/AnthropicTool'
+    tool_choice:
+      $ref: '#/AnthropicToolChoice'
+    mcp_servers:
+      type: array
+      items:
+        $ref: '#/AnthropicMCPServer'
+      description: MCP servers configuration (requires beta header)
+    thinking:
+      $ref: '#/AnthropicThinking'
+    output_format:
+      type: object
+      description: Structured output format (requires beta header)
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+AnthropicMessage:
+  type: object
+  required:
+    - role
+    - content
+  properties:
+    role:
+      $ref: './common.yaml#/AnthropicMessageRole'
+    content:
+      $ref: '#/AnthropicContent'
+
+AnthropicContent:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/AnthropicContentBlock'
+  description: Content - can be a string or array of content blocks
+
+AnthropicContentBlock:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum:
+        - text
+        - image
+        - document
+        - tool_use
+        - server_tool_use
+        - tool_result
+        - web_search_result
+        - mcp_tool_use
+        - mcp_tool_result
+        - thinking
+        - redacted_thinking
+    text:
+      type: string
+      description: For text content
+    thinking:
+      type: string
+      description: For thinking content
+    signature:
+      type: string
+      description: For signature content
+    data:
+      type: string
+      description: For data content (encrypted data for redacted thinking)
+    tool_use_id:
+      type: string
+      description: For tool_result content
+    id:
+      type: string
+      description: For tool_use content
+    name:
+      type: string
+      description: For tool_use content
+    input:
+      type: object
+      description: For tool_use content
+    server_name:
+      type: string
+      description: For mcp_tool_use content
+    content:
+      $ref: '#/AnthropicContent'
+      description: For tool_result content
+    source:
+      $ref: '#/AnthropicSource'
+      description: For image/document content
+    cache_control:
+      $ref: '../../inference/common.yaml#/CacheControl'
+    citations:
+      $ref: '#/AnthropicCitationsConfig'
+      description: For document content
+    context:
+      type: string
+      description: For document content
+    title:
+      type: string
+      description: For document content
+
+AnthropicSource:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [base64, url, text, content_block]
+    media_type:
+      type: string
+      description: MIME type (e.g., image/jpeg, application/pdf)
+    data:
+      type: string
+      description: Base64-encoded data (for base64 type)
+    url:
+      type: string
+      description: URL (for url type)
+
+AnthropicCitationsConfig:
+  type: object
+  properties:
+    enabled:
+      type: boolean
+
+AnthropicMetadata:
+  type: object
+  properties:
+    user_id:
+      type: string
+
+AnthropicThinking:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [enabled, disabled]
+    budget_tokens:
+      type: integer
+
+AnthropicTool:
+  type: object
+  properties:
+    type:
+      type: string
+      enum:
+        - custom
+        - bash_20250124
+        - computer_20250124
+        - computer_20251124
+        - code_execution_20250522
+        - code_execution_20250825
+        - code_execution_20260120
+        - text_editor_20250124
+        - text_editor_20250429
+        - text_editor_20250728
+        - web_search_20250305
+        - web_search_20260209
+        - web_fetch_20250910
+        - web_fetch_20260209
+        - web_fetch_20260309
+        - memory_20250818
+        - tool_search_tool_bm25
+        - tool_search_tool_bm25_20251119
+        - tool_search_tool_regex
+        - tool_search_tool_regex_20251119
+    name:
+      type: string
+      description: Tool name (for custom tools)
+    description:
+      type: string
+    input_schema:
+      type: object
+      description: JSON Schema for tool input
+    cache_control:
+      $ref: '../../inference/common.yaml#/CacheControl'
+    # Computer use tool settings
+    display_width_px:
+      type: integer
+    display_height_px:
+      type: integer
+    display_number:
+      type: integer
+    enable_zoom:
+      type: boolean
+    # Web search settings
+    max_uses:
+      type: integer
+    allowed_domains:
+      type: array
+      items:
+        type: string
+    blocked_domains:
+      type: array
+      items:
+        type: string
+    user_location:
+      $ref: '#/AnthropicToolWebSearchUserLocation'
+
+AnthropicToolWebSearchUserLocation:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [approximate]
+    city:
+      type: string
+    country:
+      type: string
+    timezone:
+      type: string
+
+AnthropicToolChoice:
+  oneOf:
+    - type: object
+      properties:
+        type:
+          type: string
+          enum: [auto, any, tool, none]
+        name:
+          type: string
+          description: Required when type is 'tool'
+        disable_parallel_tool_use:
+          type: boolean
+
+AnthropicMCPServer:
+  type: object
+  properties:
+    type:
+      type: string
+    name:
+      type: string
+    url:
+      type: string
+    authorization_token:
+      type: string
+      description: Authorization token for the MCP server
+    tool_configuration:
+      $ref: '#/AnthropicMCPToolConfig'
+
+AnthropicMCPToolConfig:
+  type: object
+  properties:
+    enabled:
+      type: boolean
+    allowed_tools:
+      type: array
+      items:
+        type: string
+
+# Response types
+AnthropicMessageResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: message
+    role:
+      type: string
+      default: assistant
+    content:
+      type: array
+      items:
+        $ref: '#/AnthropicContentBlock'
+    model:
+      type: string
+    stop_reason:
+      type: string
+      enum: [end_turn, max_tokens, stop_sequence, tool_use, pause_turn, refusal, model_context_window_exceeded, null]
+    stop_sequence:
+      type: string
+      nullable: true
+    usage:
+      $ref: '#/AnthropicUsage'
+
+AnthropicUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+    output_tokens:
+      type: integer
+    cache_creation_input_tokens:
+      type: integer
+    cache_read_input_tokens:
+      type: integer
+    cache_creation:
+      $ref: '#/AnthropicUsageCacheCreation'
+
+AnthropicUsageCacheCreation:
+  type: object
+  properties:
+    ephemeral_5m_input_tokens:
+      type: integer
+    ephemeral_1h_input_tokens:
+      type: integer
+
+# Stream event types
+AnthropicStreamEvent:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      enum:
+        - message_start
+        - content_block_start
+        - content_block_delta
+        - content_block_stop
+        - message_delta
+        - message_stop
+        - ping
+        - error
+    message:
+      $ref: '#/AnthropicMessageResponse'
+    index:
+      type: integer
+    content_block:
+      $ref: '#/AnthropicContentBlock'
+    delta:
+      $ref: '#/AnthropicStreamDelta'
+    usage:
+      $ref: '#/AnthropicUsage'
+    error:
+      $ref: '#/AnthropicStreamError'
+
+AnthropicStreamDelta:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text_delta, input_json_delta, thinking_delta, signature_delta]
+    text:
+      type: string
+    partial_json:
+      type: string
+    thinking:
+      type: string
+    signature:
+      type: string
+    stop_reason:
+      type: string
+    stop_sequence:
+      type: string
+
+AnthropicStreamError:
+  type: object
+  properties:
+    type:
+      type: string
+    message:
+      type: string
--- a/docs/openapi/schemas/integrations/anthropic/text.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/text.yaml
@@ -0,0 +1,62 @@
+# Anthropic Integration Text Completions Schemas (Legacy Complete API)
+
+AnthropicTextRequest:
+  type: object
+  required:
+    - model
+    - prompt
+    - max_tokens_to_sample
+  properties:
+    model:
+      type: string
+      description: Model identifier
+    prompt:
+      type: string
+      description: The prompt to complete
+    max_tokens_to_sample:
+      type: integer
+      description: Maximum tokens to generate
+    stream:
+      type: boolean
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    top_p:
+      type: number
+    top_k:
+      type: integer
+    stop_sequences:
+      type: array
+      items:
+        type: string
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+AnthropicTextResponse:
+  type: object
+  properties:
+    type:
+      type: string
+      default: completion
+    id:
+      type: string
+    completion:
+      type: string
+    stop_reason:
+      type: string
+      enum: [stop_sequence, max_tokens, null]
+    model:
+      type: string
+    usage:
+      type: object
+      properties:
+        input_tokens:
+          type: integer
+          description: Number of input tokens used
+        output_tokens:
+          type: integer
+          description: Number of output tokens generated
--- a/docs/openapi/schemas/integrations/bedrock/batch.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/batch.yaml
@@ -0,0 +1,153 @@
+# AWS Bedrock Batch API Schemas
+
+BedrockBatchJobRequest:
+  type: object
+  required:
+    - roleArn
+    - inputDataConfig
+    - outputDataConfig
+  properties:
+    modelId:
+      type: string
+      description: Model ID for the batch job (optional, can be specified in request)
+    jobName:
+      type: string
+      description: Name for the batch job
+    roleArn:
+      type: string
+      description: IAM role ARN for the job
+    inputDataConfig:
+      type: object
+      properties:
+        s3InputDataConfig:
+          type: object
+          properties:
+            s3Uri:
+              type: string
+              description: S3 URI for input data
+    outputDataConfig:
+      type: object
+      properties:
+        s3OutputDataConfig:
+          type: object
+          properties:
+            s3Uri:
+              type: string
+              description: S3 URI for output data
+    timeoutDurationInHours:
+      type: integer
+      description: Timeout in hours
+    tags:
+      type: array
+      items:
+        type: object
+        properties:
+          key:
+            type: string
+          value:
+            type: string
+
+BedrockBatchJobResponse:
+  type: object
+  properties:
+    jobArn:
+      type: string
+    status:
+      type: string
+      enum: [Submitted, InProgress, Completed, Failed, Stopping, Stopped, PartiallyCompleted, Expired, Validating, Scheduled]
+    jobName:
+      type: string
+    modelId:
+      type: string
+    roleArn:
+      type: string
+    inputDataConfig:
+      type: object
+    outputDataConfig:
+      type: object
+    vpcConfig:
+      type: object
+      properties:
+        securityGroupIds:
+          type: array
+          items:
+            type: string
+        subnetIds:
+          type: array
+          items:
+            type: string
+    submitTime:
+      type: string
+      format: date-time
+    lastModifiedTime:
+      type: string
+      format: date-time
+    endTime:
+      type: string
+      format: date-time
+    message:
+      type: string
+    clientRequestToken:
+      type: string
+    jobExpirationTime:
+      type: string
+      format: date-time
+    timeoutDurationInHours:
+      type: integer
+
+BedrockBatchListRequest:
+  type: object
+  properties:
+    maxResults:
+      type: integer
+    nextToken:
+      type: string
+    statusEquals:
+      type: string
+    nameContains:
+      type: string
+
+BedrockBatchListResponse:
+  type: object
+  properties:
+    invocationJobSummaries:
+      type: array
+      items:
+        $ref: '#/BedrockBatchJobSummary'
+    nextToken:
+      type: string
+
+BedrockBatchJobSummary:
+  type: object
+  properties:
+    jobArn:
+      type: string
+    jobName:
+      type: string
+    modelId:
+      type: string
+    status:
+      type: string
+    submitTime:
+      type: string
+      format: date-time
+    lastModifiedTime:
+      type: string
+      format: date-time
+    endTime:
+      type: string
+      format: date-time
+    message:
+      type: string
+
+BedrockBatchRetrieveResponse:
+  description: Uses same structure as BedrockBatchJobResponse
+  $ref: '#/BedrockBatchJobResponse'
+
+BedrockBatchCancelResponse:
+  type: object
+  properties:
+    jobArn:
+      type: string
+    status:
+      type: string
--- a/docs/openapi/schemas/integrations/bedrock/common.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/common.yaml
@@ -0,0 +1,15 @@
+# AWS Bedrock Integration Common Types
+
+BedrockError:
+  type: object
+  properties:
+    message:
+      type: string
+    type:
+      type: string
+
+BedrockMessageRole:
+  type: string
+  enum:
+    - user
+    - assistant
--- a/docs/openapi/schemas/integrations/bedrock/converse.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/converse.yaml
@@ -0,0 +1,367 @@
+# AWS Bedrock Converse API Schemas
+
+BedrockConverseRequest:
+  type: object
+  properties:
+    messages:
+      type: array
+      items:
+        $ref: '#/BedrockMessage'
+      description: Array of messages for the conversation
+    system:
+      type: array
+      items:
+        $ref: '#/BedrockSystemMessage'
+      description: System messages/prompts
+    inferenceConfig:
+      $ref: '#/BedrockInferenceConfig'
+    toolConfig:
+      $ref: '#/BedrockToolConfig'
+    guardrailConfig:
+      $ref: '#/BedrockGuardrailConfig'
+    additionalModelRequestFields:
+      type: object
+      description: Model-specific parameters
+    additionalModelResponseFieldPaths:
+      type: array
+      items:
+        type: string
+    performanceConfig:
+      $ref: '#/BedrockPerformanceConfig'
+    promptVariables:
+      type: object
+      additionalProperties:
+        $ref: '#/BedrockPromptVariable'
+    requestMetadata:
+      type: object
+      additionalProperties:
+        type: string
+    serviceTier:
+      $ref: '#/BedrockServiceTier'
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+BedrockMessage:
+  type: object
+  required:
+    - role
+    - content
+  properties:
+    role:
+      $ref: './common.yaml#/BedrockMessageRole'
+    content:
+      type: array
+      items:
+        $ref: '#/BedrockContentBlock'
+
+BedrockSystemMessage:
+  type: object
+  properties:
+    text:
+      type: string
+    guardContent:
+      $ref: '#/BedrockGuardContent'
+    cachePoint:
+      $ref: '#/BedrockCachePoint'
+
+BedrockContentBlock:
+  type: object
+  properties:
+    text:
+      type: string
+    image:
+      $ref: '#/BedrockImageSource'
+    document:
+      $ref: '#/BedrockDocumentSource'
+    toolUse:
+      $ref: '#/BedrockToolUse'
+    toolResult:
+      $ref: '#/BedrockToolResult'
+    guardContent:
+      $ref: '#/BedrockGuardContent'
+    reasoningContent:
+      $ref: '#/BedrockReasoningContent'
+    json:
+      type: object
+      description: JSON content for tool call results
+    cachePoint:
+      $ref: '#/BedrockCachePoint'
+
+BedrockImageSource:
+  type: object
+  properties:
+    format:
+      type: string
+      enum: [jpeg, png, gif, webp]
+    source:
+      type: object
+      properties:
+        bytes:
+          type: string
+          format: byte
+
+BedrockDocumentSource:
+  type: object
+  properties:
+    format:
+      type: string
+      enum: [pdf, csv, doc, docx, xls, xlsx, html, txt, md]
+    name:
+      type: string
+    source:
+      type: object
+      properties:
+        bytes:
+          type: string
+          format: byte
+        text:
+          type: string
+          description: Plain text content (for text-based documents)
+
+BedrockToolUse:
+  type: object
+  properties:
+    toolUseId:
+      type: string
+    name:
+      type: string
+    input:
+      type: object
+
+BedrockToolResult:
+  type: object
+  properties:
+    toolUseId:
+      type: string
+    content:
+      type: array
+      items:
+        $ref: '#/BedrockContentBlock'
+    status:
+      type: string
+      enum: [success, error]
+
+BedrockGuardContent:
+  type: object
+  properties:
+    text:
+      type: object
+      properties:
+        text:
+          type: string
+        qualifiers:
+          type: array
+          items:
+            type: string
+
+BedrockReasoningContent:
+  type: object
+  properties:
+    reasoningText:
+      type: object
+      properties:
+        text:
+          type: string
+        signature:
+          type: string
+
+BedrockCachePoint:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [default]
+
+BedrockInferenceConfig:
+  type: object
+  properties:
+    maxTokens:
+      type: integer
+    temperature:
+      type: number
+    topP:
+      type: number
+    stopSequences:
+      type: array
+      items:
+        type: string
+
+BedrockToolConfig:
+  type: object
+  properties:
+    tools:
+      type: array
+      items:
+        $ref: '#/BedrockTool'
+    toolChoice:
+      $ref: '#/BedrockToolChoice'
+
+BedrockTool:
+  type: object
+  properties:
+    toolSpec:
+      type: object
+      properties:
+        name:
+          type: string
+        description:
+          type: string
+        inputSchema:
+          type: object
+          properties:
+            json:
+              type: object
+    cachePoint:
+      $ref: '#/BedrockCachePoint'
+
+BedrockToolChoice:
+  type: object
+  properties:
+    auto:
+      type: object
+    any:
+      type: object
+    tool:
+      type: object
+      properties:
+        name:
+          type: string
+
+BedrockGuardrailConfig:
+  type: object
+  properties:
+    guardrailIdentifier:
+      type: string
+    guardrailVersion:
+      type: string
+    trace:
+      type: string
+      enum: [enabled, disabled]
+
+BedrockPerformanceConfig:
+  type: object
+  properties:
+    latency:
+      type: string
+      enum: [standard, optimized]
+
+BedrockPromptVariable:
+  type: object
+  properties:
+    text:
+      type: string
+
+BedrockServiceTier:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [reserved, priority, default, flex]
+
+# Response types
+BedrockConverseResponse:
+  type: object
+  properties:
+    output:
+      type: object
+      properties:
+        message:
+          $ref: '#/BedrockMessage'
+    stopReason:
+      type: string
+      enum: [end_turn, tool_use, max_tokens, stop_sequence, guardrail_intervened, content_filtered]
+    usage:
+      $ref: '#/BedrockUsage'
+    metrics:
+      type: object
+      properties:
+        latencyMs:
+          type: integer
+    additionalModelResponseFields:
+      type: object
+    trace:
+      type: object
+    performanceConfig:
+      $ref: '#/BedrockPerformanceConfig'
+    serviceTier:
+      $ref: '#/BedrockServiceTier'
+
+BedrockUsage:
+  type: object
+  properties:
+    inputTokens:
+      type: integer
+    outputTokens:
+      type: integer
+    totalTokens:
+      type: integer
+    cacheReadInputTokens:
+      type: integer
+    cacheWriteInputTokens:
+      type: integer
+
+# Stream event types
+BedrockStreamEvent:
+  type: object
+  description: Flat structure for streaming events matching actual Bedrock API response
+  properties:
+    role:
+      type: string
+      description: For messageStart events
+    contentBlockIndex:
+      type: integer
+      description: For content block events
+    delta:
+      $ref: '#/BedrockContentBlockDelta'
+    stopReason:
+      type: string
+      description: For messageStop events
+    start:
+      $ref: '#/BedrockContentBlockStart'
+    usage:
+      $ref: '#/BedrockUsage'
+    metrics:
+      type: object
+      properties:
+        latencyMs:
+          type: integer
+    trace:
+      type: object
+    additionalModelResponseFields:
+      type: object
+    invokeModelRawChunk:
+      type: string
+      format: byte
+      description: Raw bytes for legacy invoke stream
+
+BedrockContentBlockDelta:
+  type: object
+  properties:
+    text:
+      type: string
+    reasoningContent:
+      type: object
+      properties:
+        text:
+          type: string
+        signature:
+          type: string
+    toolUse:
+      type: object
+      properties:
+        input:
+          type: string
+
+BedrockContentBlockStart:
+  type: object
+  properties:
+    toolUse:
+      type: object
+      properties:
+        toolUseId:
+          type: string
+        name:
+          type: string
--- a/docs/openapi/schemas/integrations/bedrock/invoke.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/invoke.yaml
@@ -0,0 +1,50 @@
+# AWS Bedrock Invoke API Schemas (Legacy/Raw Model Invocation)
+
+BedrockInvokeRequest:
+  type: object
+  description: |
+    Raw model invocation request. The body format depends on the model provider.
+    For Anthropic models, use Anthropic format. For other models, use their native format.
+  properties:
+    prompt:
+      type: string
+      description: Text prompt to complete
+    max_tokens:
+      type: integer
+    max_tokens_to_sample:
+      type: integer
+      description: Anthropic-style max tokens
+    temperature:
+      type: number
+    top_p:
+      type: number
+    top_k:
+      type: integer
+    stop:
+      type: array
+      items:
+        type: string
+    stop_sequences:
+      type: array
+      items:
+        type: string
+      description: Anthropic-style stop sequences
+    messages:
+      type: array
+      items:
+        type: object
+      description: For Claude 3 models
+    system:
+      description: System prompt (string or array of strings)
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+    anthropic_version:
+      type: string
+
+BedrockInvokeResponse:
+  type: object
+  description: Raw model response. Format depends on the model provider.
+  additionalProperties: true
--- a/docs/openapi/schemas/integrations/cohere/chat.yaml
+++ b/docs/openapi/schemas/integrations/cohere/chat.yaml
@@ -0,0 +1,364 @@
+# Cohere v2 Chat API Schemas
+
+CohereChatRequest:
+  type: object
+  required:
+    - model
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model to use for chat completion
+      example: command-r-plus
+    messages:
+      type: array
+      items:
+        $ref: '#/CohereMessage'
+      description: Array of message objects
+    tools:
+      type: array
+      items:
+        $ref: '#/CohereTool'
+    tool_choice:
+      $ref: '#/CohereToolChoice'
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    p:
+      type: number
+      description: Top-p sampling
+    k:
+      type: integer
+      description: Top-k sampling
+    max_tokens:
+      type: integer
+    stop_sequences:
+      type: array
+      items:
+        type: string
+    frequency_penalty:
+      type: number
+    presence_penalty:
+      type: number
+    stream:
+      type: boolean
+    safety_mode:
+      type: string
+      enum: [CONTEXTUAL, STRICT, NONE]
+    log_probs:
+      type: boolean
+    strict_tool_choice:
+      type: boolean
+    thinking:
+      $ref: '#/CohereThinking'
+    response_format:
+      $ref: '#/CohereResponseFormat'
+
+CohereMessage:
+  type: object
+  required:
+    - role
+  properties:
+    role:
+      type: string
+      enum: [system, user, assistant, tool]
+    content:
+      $ref: '#/CohereMessageContent'
+    tool_calls:
+      type: array
+      items:
+        $ref: '#/CohereToolCall'
+    tool_call_id:
+      type: string
+    tool_plan:
+      type: string
+      description: Chain-of-thought style reflection (assistant only)
+
+CohereMessageContent:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/CohereContentBlock'
+  description: Message content - can be a string or array of content blocks
+
+CohereContentBlock:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [text, image_url, thinking, document]
+    text:
+      type: string
+    image_url:
+      type: object
+      properties:
+        url:
+          type: string
+    thinking:
+      type: string
+    document:
+      type: object
+      properties:
+        data:
+          type: object
+        id:
+          type: string
+
+CohereTool:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [function]
+    function:
+      type: object
+      properties:
+        name:
+          type: string
+        description:
+          type: string
+        parameters:
+          type: object
+
+CohereToolChoice:
+  type: string
+  enum: [AUTO, NONE, REQUIRED]
+  description: Tool choice mode - AUTO lets the model decide, NONE disables tools, REQUIRED forces tool use
+
+CohereToolCall:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      enum: [function]
+    function:
+      type: object
+      properties:
+        name:
+          type: string
+        arguments:
+          type: string
+
+CohereThinking:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [enabled, disabled]
+    token_budget:
+      type: integer
+      minimum: 1
+
+CohereResponseFormat:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text, json_object]
+      description: Response format type
+    schema:
+      type: object
+      description: JSON schema for structured output (used with json_object type)
+
+# Response types
+CohereChatResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    finish_reason:
+      type: string
+      enum: [COMPLETE, STOP_SEQUENCE, MAX_TOKENS, TOOL_CALL, ERROR, TIMEOUT]
+    message:
+      type: object
+      properties:
+        role:
+          type: string
+        content:
+          type: array
+          items:
+            $ref: '#/CohereContentBlock'
+        tool_calls:
+          type: array
+          items:
+            $ref: '#/CohereToolCall'
+        tool_plan:
+          type: string
+    usage:
+      $ref: '#/CohereUsage'
+    logprobs:
+      type: array
+      items:
+        $ref: '#/CohereLogProb'
+      description: Log probabilities (if requested)
+
+CohereUsage:
+  type: object
+  properties:
+    billed_units:
+      $ref: '#/CohereBilledUnits'
+    tokens:
+      $ref: '#/CohereTokenUsage'
+    cached_tokens:
+      type: integer
+      description: Cached tokens
+
+CohereBilledUnits:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of billed input tokens
+    output_tokens:
+      type: integer
+      description: Number of billed output tokens
+    search_units:
+      type: integer
+      description: Number of billed search units
+    classifications:
+      type: integer
+      description: Number of billed classification units
+
+CohereTokenUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of input tokens used
+    output_tokens:
+      type: integer
+      description: Number of output tokens produced
+
+CohereLogProb:
+  type: object
+  properties:
+    token_ids:
+      type: array
+      items:
+        type: integer
+      description: Token IDs of each token in text chunk
+    text:
+      type: string
+      description: Text chunk for log probabilities
+    logprobs:
+      type: array
+      items:
+        type: number
+      description: Log probability of each token
+
+# Stream event types
+CohereChatStreamEvent:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [message-start, content-start, content-delta, content-end, tool-plan-delta, tool-call-start, tool-call-delta, tool-call-end, citation-start, citation-end, message-end, debug]
+      description: Type of streaming event
+    id:
+      type: string
+      description: Event ID (for message-start)
+    index:
+      type: integer
+      description: Index for indexed events
+    delta:
+      $ref: '#/CohereStreamDelta'
+
+CohereStreamDelta:
+  type: object
+  properties:
+    message:
+      $ref: '#/CohereStreamMessage'
+    finish_reason:
+      type: string
+      enum: [COMPLETE, STOP_SEQUENCE, MAX_TOKENS, TOOL_CALL, ERROR, TIMEOUT]
+    usage:
+      $ref: '#/CohereUsage'
+
+CohereStreamMessage:
+  type: object
+  properties:
+    role:
+      type: string
+      description: Message role (for message-start)
+    content:
+      oneOf:
+        - $ref: '#/CohereStreamContent'
+        - type: array
+          items:
+            $ref: '#/CohereStreamContent'
+      description: Content for content events
+    tool_plan:
+      type: string
+      description: Tool plan content (for tool-plan-delta)
+    tool_calls:
+      oneOf:
+        - $ref: '#/CohereToolCall'
+        - type: array
+          items:
+            $ref: '#/CohereToolCall'
+      description: Tool calls (for tool-call events)
+    citations:
+      oneOf:
+        - $ref: '#/CohereCitation'
+        - type: array
+          items:
+            $ref: '#/CohereCitation'
+      description: Citations (for citation events)
+
+CohereStreamContent:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text, image_url, thinking, document]
+    text:
+      type: string
+    thinking:
+      type: string
+
+CohereCitation:
+  type: object
+  properties:
+    start:
+      type: integer
+      description: Start position of cited text
+    end:
+      type: integer
+      description: End position of cited text
+    text:
+      type: string
+      description: Cited text
+    sources:
+      type: array
+      items:
+        $ref: '#/CohereSource'
+    content_index:
+      type: integer
+      description: Content index of the citation
+    type:
+      type: string
+      enum: [TEXT_CONTENT, THINKING_CONTENT, PLAN]
+      description: Type of citation
+
+CohereSource:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [tool, document]
+      description: Source type
+    id:
+      type: string
+      description: Source ID (nullable)
+    tool_output:
+      type: object
+      description: Tool output (for tool sources)
+    document:
+      type: object
+      description: Document data (for document sources)
--- a/docs/openapi/schemas/integrations/cohere/common.yaml
+++ b/docs/openapi/schemas/integrations/cohere/common.yaml
@@ -0,0 +1,14 @@
+# Cohere Integration Common Types
+
+CohereError:
+  type: object
+  properties:
+    type:
+      type: string
+      description: Error type
+    message:
+      type: string
+      description: Error message
+    code:
+      type: string
+      description: Optional error code
--- a/docs/openapi/schemas/integrations/cohere/embed.yaml
+++ b/docs/openapi/schemas/integrations/cohere/embed.yaml
@@ -0,0 +1,172 @@
+# Cohere v2 Embed API Schemas
+
+CohereEmbeddingRequest:
+  type: object
+  required:
+    - model
+    - input_type
+  properties:
+    model:
+      type: string
+      description: ID of an available embedding model
+      example: embed-english-v3.0
+    input_type:
+      type: string
+      description: Specifies the type of input passed to the model. Required for embedding models v3 and higher.
+    texts:
+      type: array
+      items:
+        type: string
+      description: Array of strings to embed. Maximum 96 texts per call. At least one of texts, images, or inputs is required.
+      maxItems: 96
+    images:
+      type: array
+      items:
+        type: string
+      description: Array of image data URIs for multimodal embedding. Maximum 1 image per call. Supports JPEG, PNG, WebP, GIF up to 5MB.
+      maxItems: 1
+    inputs:
+      type: array
+      items:
+        $ref: '#/CohereEmbeddingInput'
+      description: Array of mixed text/image components for embedding. Maximum 96 per call.
+      maxItems: 96
+    embedding_types:
+      type: array
+      items:
+        type: string
+      description: Specifies the return format types (float, int8, uint8, binary, ubinary, base64). Defaults to float if unspecified.
+    output_dimension:
+      type: integer
+      description: Number of dimensions for output embeddings (256, 512, 1024, 1536). Available only for embed-v4 and newer models.
+    max_tokens:
+      type: integer
+      description: Maximum tokens to embed per input before truncation.
+    truncate:
+      type: string
+      description: Handling for inputs exceeding token limits. Defaults to END.
+
+CohereEmbeddingInput:
+  type: object
+  properties:
+    content:
+      type: array
+      items:
+        $ref: './chat.yaml#/CohereContentBlock'
+      description: Array of content blocks (reuses chat content blocks)
+
+CohereEmbeddingResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Response ID
+    embeddings:
+      $ref: '#/CohereEmbeddingData'
+    response_type:
+      type: string
+      description: Response type (embeddings_floats, embeddings_by_type)
+    texts:
+      type: array
+      items:
+        type: string
+      description: Original text entries
+    images:
+      type: array
+      items:
+        $ref: '#/CohereEmbeddingImageInfo'
+      description: Original image entries
+    meta:
+      $ref: '#/CohereEmbeddingMeta'
+
+CohereEmbeddingData:
+  type: object
+  description: Embedding data object with different types
+  properties:
+    float:
+      type: array
+      items:
+        type: array
+        items:
+          type: number
+      description: Float embeddings
+    int8:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Int8 embeddings
+    uint8:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Uint8 embeddings
+    binary:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Binary embeddings
+    ubinary:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Unsigned binary embeddings
+    base64:
+      type: array
+      items:
+        type: string
+      description: Base64-encoded embeddings
+
+CohereEmbeddingImageInfo:
+  type: object
+  description: Image information in the response
+  properties:
+    width:
+      type: integer
+      description: Width in pixels
+    height:
+      type: integer
+      description: Height in pixels
+    format:
+      type: string
+      description: Image format
+    bit_depth:
+      type: integer
+      description: Bit depth
+
+CohereEmbeddingMeta:
+  type: object
+  description: Metadata in embedding response
+  properties:
+    api_version:
+      $ref: '#/CohereEmbeddingAPIVersion'
+    billed_units:
+      $ref: './chat.yaml#/CohereBilledUnits'
+    tokens:
+      $ref: './chat.yaml#/CohereTokenUsage'
+    warnings:
+      type: array
+      items:
+        type: string
+      description: Any warnings
+
+CohereEmbeddingAPIVersion:
+  type: object
+  description: API version information
+  properties:
+    version:
+      type: string
+      description: API version
+    is_deprecated:
+      type: boolean
+      description: Deprecation status
+    is_experimental:
+      type: boolean
+      description: Experimental status
--- a/docs/openapi/schemas/integrations/cohere/tokenize.yaml
+++ b/docs/openapi/schemas/integrations/cohere/tokenize.yaml
@@ -0,0 +1,48 @@
+# Cohere Tokenize API Schemas
+
+CohereCountTokensRequest:
+  type: object
+  required:
+    - text
+    - model
+  properties:
+    model:
+      type: string
+      description: Model whose tokenizer should be used
+      example: command-r-plus
+    text:
+      type: string
+      description: Text to tokenize (1-65536 characters)
+      minLength: 1
+      maxLength: 65536
+
+CohereCountTokensResponse:
+  type: object
+  properties:
+    tokens:
+      type: array
+      items:
+        type: integer
+      description: Token IDs
+    token_strings:
+      type: array
+      items:
+        type: string
+      description: Token strings
+    meta:
+      $ref: '#/CohereTokenizeMeta'
+
+CohereTokenizeMeta:
+  type: object
+  description: Metadata returned by the tokenize endpoint
+  properties:
+    api_version:
+      $ref: '#/CohereTokenizeAPIVersion'
+
+CohereTokenizeAPIVersion:
+  type: object
+  description: API version metadata
+  properties:
+    version:
+      type: string
+      description: API version
--- a/docs/openapi/schemas/integrations/genai/common.yaml
+++ b/docs/openapi/schemas/integrations/genai/common.yaml
@@ -0,0 +1,80 @@
+# Google GenAI (Gemini) Integration Common Types
+
+GeminiError:
+  type: object
+  properties:
+    error:
+      type: object
+      properties:
+        code:
+          type: integer
+        message:
+          type: string
+        status:
+          type: string
+        details:
+          type: array
+          items:
+            $ref: '#/GeminiErrorDetails'
+
+GeminiErrorDetails:
+  type: object
+  properties:
+    '@type':
+      type: string
+      description: Type identifier for the error details
+    fieldViolations:
+      type: array
+      items:
+        type: object
+        properties:
+          description:
+            type: string
+
+GeminiModel:
+  type: object
+  properties:
+    name:
+      type: string
+      description: Model resource name (e.g., models/gemini-pro)
+    baseModelId:
+      type: string
+    version:
+      type: string
+    displayName:
+      type: string
+    description:
+      type: string
+    inputTokenLimit:
+      type: integer
+    outputTokenLimit:
+      type: integer
+    supportedGenerationMethods:
+      type: array
+      items:
+        type: string
+    thinking:
+      type: boolean
+      description: Whether the model supports thinking mode
+    temperature:
+      type: number
+      description: Default temperature for the model
+    maxTemperature:
+      type: number
+      description: Maximum allowed temperature for the model
+    topP:
+      type: number
+      description: Default nucleus-sampling value
+    topK:
+      type: integer
+      description: Default top-k sampling value
+
+GeminiListModelsResponse:
+  type: object
+  properties:
+    models:
+      type: array
+      items:
+        $ref: '#/GeminiModel'
+    nextPageToken:
+      type: string
--- a/docs/openapi/schemas/integrations/genai/files.yaml
+++ b/docs/openapi/schemas/integrations/genai/files.yaml
@@ -0,0 +1,94 @@
+# Google GenAI (Gemini) Files API Schemas
+
+GeminiFileUploadRequest:
+  type: object
+  description: >
+    Multipart upload for Gemini Files API. Send two parts:
+    - "metadata": JSON object {"file": {"displayName": "<optional label>"}}
+    - "file": binary content
+    Note: Direct file content download is not supported by Gemini Files API.
+    Use the file.uri field from the response to access the file.
+  required:
+    - file
+  properties:
+    metadata:
+      type: object
+      description: JSON metadata part; see encoding at the path for contentType application/json.
+      properties:
+        file:
+          type: object
+          properties:
+            displayName:
+              type: string
+          additionalProperties: false
+      additionalProperties: false
+    file:
+      type: string
+      format: binary
+  additionalProperties: false
+
+GeminiFile:
+  type: object
+  properties:
+    name:
+      type: string
+      description: File resource name (e.g., files/abc123)
+    displayName:
+      type: string
+    mimeType:
+      type: string
+    sizeBytes:
+      type: string
+      description: Size in bytes (returned as string by Gemini API)
+    createTime:
+      type: string
+      format: date-time
+    updateTime:
+      type: string
+      format: date-time
+    expirationTime:
+      type: string
+      format: date-time
+    sha256Hash:
+      type: string
+    uri:
+      type: string
+      description: URI for accessing the file content
+    state:
+      type: string
+      enum: [STATE_UNSPECIFIED, PROCESSING, ACTIVE, FAILED]
+    error:
+      type: object
+      properties:
+        code:
+          type: integer
+        message:
+          type: string
+    videoMetadata:
+      type: object
+      properties:
+        videoDuration:
+          type: string
+
+GeminiFileUploadResponse:
+  type: object
+  properties:
+    file:
+      $ref: '#/GeminiFile'
+
+GeminiFileListResponse:
+  type: object
+  properties:
+    files:
+      type: array
+      items:
+        $ref: '#/GeminiFile'
+    nextPageToken:
+      type: string
+
+GeminiFileRetrieveResponse:
+  $ref: '#/GeminiFile'
+
+GeminiFileDeleteResponse:
+  type: object
+  description: Empty response on successful deletion
--- a/docs/openapi/schemas/integrations/genai/generation.yaml
+++ b/docs/openapi/schemas/integrations/genai/generation.yaml
--- a/docs/openapi/schemas/integrations/openai/audio.yaml
+++ b/docs/openapi/schemas/integrations/openai/audio.yaml
@@ -0,0 +1,90 @@
+# OpenAI Integration Audio Schemas (Speech and Transcription)
+
+# Speech (TTS) Request
+OpenAISpeechRequest:
+  type: object
+  required:
+    - model
+    - input
+  properties:
+    model:
+      type: string
+      description: Model identifier (e.g., tts-1, tts-1-hd)
+      example: tts-1
+    input:
+      type: string
+      description: Text to convert to speech
+    voice:
+      type: string
+      description: Voice to use
+      enum: [alloy, echo, fable, onyx, nova, shimmer]
+    response_format:
+      type: string
+      enum: [mp3, opus, aac, flac, wav, pcm]
+    speed:
+      type: number
+      minimum: 0.25
+      maximum: 4.0
+    stream_format:
+      type: string
+      enum: [sse]
+      description: Set to 'sse' for streaming
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+# Transcription Request
+OpenAITranscriptionRequest:
+  type: object
+  required:
+    - model
+    - file
+  properties:
+    model:
+      type: string
+      description: Model identifier (e.g., whisper-1)
+      example: whisper-1
+    file:
+      type: string
+      format: binary
+      description: Audio file to transcribe
+    language:
+      type: string
+      description: Language of the audio (ISO 639-1)
+    prompt:
+      type: string
+      description: Prompt to guide transcription
+    response_format:
+      type: string
+      enum: [json, text, srt, verbose_json, vtt]
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    timestamp_granularities:
+      type: array
+      items:
+        type: string
+        enum: [word, segment]
+    stream:
+      type: boolean
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+# Responses reuse inference schemas
+OpenAISpeechResponse:
+  $ref: '../../inference/speech.yaml#/SpeechResponse'
+
+OpenAISpeechStreamResponse:
+  $ref: '../../inference/speech.yaml#/SpeechStreamResponse'
+
+OpenAITranscriptionResponse:
+  $ref: '../../inference/transcription.yaml#/TranscriptionResponse'
+
+OpenAITranscriptionStreamResponse:
+  $ref: '../../inference/transcription.yaml#/TranscriptionStreamResponse'
--- a/docs/openapi/schemas/integrations/openai/batch.yaml
+++ b/docs/openapi/schemas/integrations/openai/batch.yaml
@@ -0,0 +1,57 @@
+# OpenAI Integration Batch API Schemas
+# Reuses inference batch schemas since OpenAI integration uses Bifrost format
+
+# Batch Create Request - uses Bifrost format with provider field
+OpenAIBatchCreateRequest:
+  $ref: '../../inference/batch.yaml#/BatchCreateRequest'
+
+OpenAIBatchCreateResponse:
+  $ref: '../../inference/batch.yaml#/BatchCreateResponse'
+
+OpenAIBatchListRequest:
+  type: object
+  properties:
+    limit:
+      type: integer
+      description: Maximum number of batches to return
+      default: 30
+    after:
+      type: string
+      description: Cursor for pagination
+    provider:
+      type: string
+      description: Filter by provider
+      example: openai
+
+OpenAIBatchListResponse:
+  $ref: '../../inference/batch.yaml#/BatchListResponse'
+
+OpenAIBatchRetrieveRequest:
+  type: object
+  required:
+    - batch_id
+  properties:
+    batch_id:
+      type: string
+      description: Batch ID to retrieve
+    provider:
+      type: string
+      description: Provider for the batch
+
+OpenAIBatchRetrieveResponse:
+  $ref: '../../inference/batch.yaml#/BatchRetrieveResponse'
+
+OpenAIBatchCancelRequest:
+  type: object
+  required:
+    - batch_id
+  properties:
+    batch_id:
+      type: string
+      description: Batch ID to cancel
+    provider:
+      type: string
+      description: Provider for the batch
+
+OpenAIBatchCancelResponse:
+  $ref: '../../inference/batch.yaml#/BatchCancelResponse'
--- a/docs/openapi/schemas/integrations/openai/chat.yaml
+++ b/docs/openapi/schemas/integrations/openai/chat.yaml
@@ -0,0 +1,121 @@
+# OpenAI Integration Chat Completions Schemas
+# Reuses inference schemas where possible since Bifrost follows OpenAI format
+
+OpenAIChatRequest:
+  type: object
+  required:
+    - model
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model identifier (e.g., gpt-4, gpt-3.5-turbo)
+      example: gpt-4
+    messages:
+      type: array
+      items:
+        $ref: '#/OpenAIMessage'
+      description: List of messages in the conversation
+    stream:
+      type: boolean
+      description: Whether to stream the response
+    max_tokens:
+      type: integer
+      description: Maximum tokens to generate (legacy, use max_completion_tokens)
+    max_completion_tokens:
+      type: integer
+      description: Maximum tokens to generate
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 2
+    top_p:
+      type: number
+    frequency_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    presence_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    logit_bias:
+      type: object
+      additionalProperties:
+        type: number
+    logprobs:
+      type: boolean
+    top_logprobs:
+      type: integer
+    n:
+      type: integer
+    stop:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+    seed:
+      type: integer
+    user:
+      type: string
+    tools:
+      type: array
+      items:
+        $ref: '../../inference/chat.yaml#/ChatTool'
+    tool_choice:
+      $ref: '../../inference/chat.yaml#/ChatToolChoice'
+    parallel_tool_calls:
+      type: boolean
+    response_format:
+      type: object
+      description: Format for the response
+    reasoning_effort:
+      type: string
+      enum: [none, minimal, low, medium, high, xhigh]
+      description: OpenAI reasoning effort level
+    service_tier:
+      type: string
+    stream_options:
+      $ref: '../../inference/chat.yaml#/ChatStreamOptions'
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+      description: Fallback models
+
+OpenAIMessage:
+  type: object
+  required:
+    - role
+  properties:
+    role:
+      type: string
+      enum: [system, user, assistant, tool, developer]
+    name:
+      type: string
+    content:
+      $ref: '../../inference/chat.yaml#/ChatMessageContent'
+    tool_call_id:
+      type: string
+      description: For tool messages
+    refusal:
+      type: string
+    reasoning:
+      type: string
+    annotations:
+      type: array
+      items:
+        $ref: '../../inference/chat.yaml#/ChatAssistantMessageAnnotation'
+    tool_calls:
+      type: array
+      items:
+        $ref: '../../inference/chat.yaml#/ChatAssistantMessageToolCall'
+
+# Response reuses inference schema since format is identical
+OpenAIChatResponse:
+  $ref: '../../inference/chat.yaml#/ChatCompletionResponse'
+
+OpenAIChatStreamResponse:
+  $ref: '../../inference/chat.yaml#/ChatCompletionStreamResponse'
--- a/docs/openapi/schemas/integrations/openai/common.yaml
+++ b/docs/openapi/schemas/integrations/openai/common.yaml
@@ -0,0 +1,51 @@
+# OpenAI Integration Common Types
+
+OpenAIError:
+  type: object
+  properties:
+    error:
+      type: object
+      properties:
+        message:
+          type: string
+        type:
+          type: string
+        param:
+          type: string
+          nullable: true
+        code:
+          type: string
+          nullable: true
+
+# OpenAI uses the same model format but without provider prefix
+OpenAIModel:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Model identifier
+    object:
+      type: string
+      default: model
+    owned_by:
+      type: string
+    created:
+      type: integer
+      format: int64
+    active:
+      type: boolean
+      description: GROQ-specific field
+    context_window:
+      type: integer
+      description: GROQ-specific field
+
+OpenAIListModelsResponse:
+  type: object
+  properties:
+    object:
+      type: string
+      default: list
+    data:
+      type: array
+      items:
+        $ref: '#/OpenAIModel'
--- a/docs/openapi/schemas/integrations/openai/embeddings.yaml
+++ b/docs/openapi/schemas/integrations/openai/embeddings.yaml
@@ -0,0 +1,36 @@
+# OpenAI Integration Embeddings Schemas
+
+OpenAIEmbeddingRequest:
+  type: object
+  required:
+    - model
+    - input
+  properties:
+    model:
+      type: string
+      description: Model identifier
+      example: text-embedding-3-small
+    input:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+      description: Input text to embed
+    encoding_format:
+      type: string
+      enum: [float, base64]
+    dimensions:
+      type: integer
+      description: Number of dimensions for the embedding
+    user:
+      type: string
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+# Response reuses inference schema
+OpenAIEmbeddingResponse:
+  $ref: '../../inference/embeddings.yaml#/EmbeddingResponse'
--- a/docs/openapi/schemas/integrations/openai/files.yaml
+++ b/docs/openapi/schemas/integrations/openai/files.yaml
@@ -0,0 +1,95 @@
+# OpenAI Integration Files API Schemas
+# Reuses inference files schemas since OpenAI integration uses Bifrost format
+
+OpenAIFileUploadRequest:
+  type: object
+  required:
+    - file
+    - purpose
+  properties:
+    file:
+      type: string
+      format: binary
+      description: File to upload
+    purpose:
+      type: string
+      enum: [assistants, assistants_output, batch, batch_output, fine-tune, fine-tune-results, vision, user_data, evals]
+      description: Purpose of the file
+    provider:
+      type: string
+      description: Provider for file storage
+    storage_config:
+      $ref: '../../inference/files.yaml#/FileStorageConfig'
+
+OpenAIFileUploadResponse:
+  $ref: '../../inference/files.yaml#/FileUploadResponse'
+
+OpenAIFileListRequest:
+  type: object
+  properties:
+    purpose:
+      type: string
+      description: Filter by purpose
+    limit:
+      type: integer
+      description: Maximum files to return
+    after:
+      type: string
+      description: Cursor for pagination
+    order:
+      type: string
+      enum: [asc, desc]
+    provider:
+      type: string
+      description: Filter by provider
+
+OpenAIFileListResponse:
+  $ref: '../../inference/files.yaml#/FileListResponse'
+
+OpenAIFileRetrieveRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+      description: File ID to retrieve
+    provider:
+      type: string
+      description: Provider for the file
+    storage_config:
+      $ref: '../../inference/files.yaml#/FileStorageConfig'
+
+OpenAIFileRetrieveResponse:
+  $ref: '../../inference/files.yaml#/FileRetrieveResponse'
+
+OpenAIFileDeleteRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+      description: File ID to delete
+    provider:
+      type: string
+      description: Provider for the file
+    storage_config:
+      $ref: '../../inference/files.yaml#/FileStorageConfig'
+
+OpenAIFileDeleteResponse:
+  $ref: '../../inference/files.yaml#/FileDeleteResponse'
+
+OpenAIFileContentRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+      description: File ID to get content for
+    provider:
+      type: string
+      description: Provider for the file
+    storage_config:
+      $ref: '../../inference/files.yaml#/FileStorageConfig'
--- a/docs/openapi/schemas/integrations/openai/images.yaml
+++ b/docs/openapi/schemas/integrations/openai/images.yaml
@@ -0,0 +1,133 @@
+# OpenAI Integration - Image Generation Schemas
+
+OpenAIImageGenerationRequest:
+  type: object
+  required:
+    - model
+    - prompt
+  properties:
+    model:
+      type: string
+      description: Model identifier
+    prompt:
+      type: string
+      description: Text prompt to generate image
+    n:
+      type: integer
+      minimum: 1
+      maximum: 10
+      default: 1
+      description: Number of images to generate
+    size:
+      type: string
+      enum:
+        - "256x256"
+        - "512x512"
+        - "1024x1024"
+        - "1792x1024"
+        - "1024x1792"
+        - "1536x1024"
+        - "1024x1536"
+        - "auto"
+      description: Size of the generated image
+    quality:
+      type: string
+      enum:
+        - "standard"
+        - "hd"
+      description: Quality of the generated image
+    style:
+      type: string
+      enum:
+        - "natural"
+        - "vivid"
+      description: Style of the generated image
+    response_format:
+      type: string
+      enum:
+        - "url"
+        - "b64_json"
+      default: "url"
+      description: Format of the response. This parameter is not supported for streaming requests.
+    user:
+      type: string
+      description: User identifier for tracking
+    stream:
+      type: boolean
+      default: false
+      description: |
+        Whether to stream the response. When true, images are sent as base64 chunks via SSE.
+    fallbacks:
+      type: array
+      items:
+        type: string
+      description: Fallback models to try if primary model fails
+
+OpenAIImageGenerationResponse:
+  type: object
+  properties:
+    created:
+      type: integer
+      format: int64
+      description: Unix timestamp when the image was created
+    data:
+      type: array
+      items:
+        $ref: '../../../schemas/inference/images.yaml#/ImageData'
+      description: Array of generated images
+    background:
+      type: string
+      description: Background type used
+    output_format:
+      type: string
+      description: Output format used
+    quality:
+      type: string
+      description: Quality setting used
+    size:
+      type: string
+      description: Size setting used
+    usage:
+      $ref: '../../../schemas/inference/images.yaml#/ImageUsage'
+
+OpenAIImageStreamResponse:
+  type: object
+  description: |
+    Streaming response chunk for image generation (OpenAI format).
+    Sent via Server-Sent Events (SSE) when stream=true.
+  properties:
+    type:
+      type: string
+      enum:
+        - "image_generation.partial_image"
+        - "image_generation.completed"
+        - "error"
+      description: Type of stream event
+    b64_json:
+      type: string
+      description: Base64-encoded chunk of image data
+    partial_image_index:
+      type: integer
+      description: Index of the partial image chunk
+    sequence_number:
+      type: integer
+      description: Ordering index for stream chunks
+    created_at:
+      type: integer
+      format: int64
+      description: Timestamp when chunk was created
+    size:
+      type: string
+      description: Size of the generated image
+    quality:
+      type: string
+      description: Quality setting used
+    background:
+      type: string
+      description: Background type used
+    output_format:
+      type: string
+      description: Output format used
+    usage:
+      $ref: '../../../schemas/inference/images.yaml#/ImageUsage'
+      description: Token usage (usually in final chunk)
--- a/docs/openapi/schemas/integrations/openai/responses.yaml
+++ b/docs/openapi/schemas/integrations/openai/responses.yaml
@@ -0,0 +1,108 @@
+# OpenAI Integration Responses API Schemas
+
+OpenAIResponsesRequest:
+  type: object
+  required:
+    - model
+    - input
+  properties:
+    model:
+      type: string
+      description: Model identifier
+      example: gpt-4
+    input:
+      $ref: '#/OpenAIResponsesInput'
+    stream:
+      type: boolean
+    instructions:
+      type: string
+      description: System instructions for the model
+    max_output_tokens:
+      type: integer
+    metadata:
+      type: object
+      additionalProperties: true
+    parallel_tool_calls:
+      type: boolean
+    previous_response_id:
+      type: string
+    reasoning:
+      $ref: '#/OpenAIResponsesReasoning'
+    store:
+      type: boolean
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 2
+    text:
+      $ref: '#/OpenAIResponsesTextConfig'
+    tool_choice:
+      $ref: '../../inference/responses.yaml#/ResponsesToolChoice'
+    tools:
+      type: array
+      items:
+        $ref: '../../inference/responses.yaml#/ResponsesTool'
+    top_p:
+      type: number
+    truncation:
+      type: string
+      enum: [auto, disabled]
+    user:
+      type: string
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+OpenAIResponsesInput:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '../../inference/responses.yaml#/ResponsesMessage'
+  description: Input - can be a string or array of messages
+
+OpenAIResponsesReasoning:
+  type: object
+  properties:
+    effort:
+      type: string
+      enum: [none, minimal, low, medium, high, xhigh]
+    generate_summary:
+      type: string
+      enum: [auto, concise, detailed]
+    summary:
+      type: string
+      enum: [auto, concise, detailed]
+    max_tokens:
+      type: integer
+
+OpenAIResponsesTextConfig:
+  type: object
+  properties:
+    format:
+      $ref: '#/OpenAIResponsesTextFormat'
+
+OpenAIResponsesTextFormat:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text, json_object, json_schema]
+    json_schema:
+      type: object
+      properties:
+        name:
+          type: string
+        schema:
+          type: object
+        strict:
+          type: boolean
+
+# Response reuses inference schema
+OpenAIResponsesResponse:
+  $ref: '../../inference/responses.yaml#/ResponsesResponse'
+
+OpenAIResponsesStreamResponse:
+  $ref: '../../inference/responses.yaml#/ResponsesStreamResponse'
--- a/docs/openapi/schemas/integrations/openai/text.yaml
+++ b/docs/openapi/schemas/integrations/openai/text.yaml
@@ -0,0 +1,74 @@
+# OpenAI Integration Text Completions Schemas (Legacy Completions API)
+
+OpenAITextCompletionRequest:
+  type: object
+  required:
+    - model
+    - prompt
+  properties:
+    model:
+      type: string
+      description: Model identifier
+      example: gpt-3.5-turbo-instruct
+    prompt:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+      description: The prompt(s) to generate completions for
+    stream:
+      type: boolean
+      description: Whether to stream the response
+    max_tokens:
+      type: integer
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 2
+    top_p:
+      type: number
+    frequency_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    presence_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    logit_bias:
+      type: object
+      additionalProperties:
+        type: number
+    logprobs:
+      type: integer
+    n:
+      type: integer
+    stop:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+    suffix:
+      type: string
+    echo:
+      type: boolean
+    best_of:
+      type: integer
+    user:
+      type: string
+    seed:
+      type: integer
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+# Response reuses inference schema
+OpenAITextCompletionResponse:
+  $ref: '../../inference/text.yaml#/TextCompletionResponse'
+
+OpenAITextCompletionStreamResponse:
+  $ref: '../../inference/text.yaml#/TextCompletionStreamResponse'