first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/docs/openapi/bundle.py
+++ b/docs/openapi/bundle.py
@@ -0,0 +1,399 @@
+#!/usr/bin/env python3
+"""
+OpenAPI Bundle Script
+
+Bundles multiple OpenAPI YAML files with $ref references into a single
+OpenAPI specification file using proper component references instead of
+full inlining.
+
+The bundler uses openapi.yaml#/components/* as a registry. All $refs that
+resolve to a registered component are replaced with #/components/{type}/{Name}
+pointers. Only genuinely unregistered sub-schemas are inlined.
+
+This is fully generic — adding new component types (securitySchemes, headers,
+requestBodies, links, callbacks, etc.) to openapi.yaml requires no changes here.
+
+Usage:
+    python bundle.py                    # Output to openapi.json
+    python bundle.py --output spec.json # Output to custom file
+    python bundle.py --format yaml      # Output as YAML
+
+Requirements:
+    pip install pyyaml
+"""
+
+import argparse
+import copy
+import json
+import os
+import sys
+import warnings
+from pathlib import Path
+from typing import Any, Dict, Optional, Set, Tuple
+from urllib.parse import urldefrag
+
+try:
+    import yaml
+except ImportError:
+    print("Error: PyYAML is required. Install with: pip install pyyaml")
+    sys.exit(1)
+
+
+class OpenAPIBundler:
+    """
+    Generic OpenAPI bundler that hoists all registered components into
+    #/components/{type}/{name} refs rather than fully inlining $refs.
+
+    Algorithm:
+      Phase 1 - Build registry: scan ALL openapi.yaml components/* sections and
+                map (abs_file, frag_key) -> (component_type, canonical_name).
+      Phase 2 - Resolve components: for each registered component, resolve its
+                content, substituting known refs with #/components/{type}/{name}.
+      Phase 3 - Resolve paths: resolve all path items the same way.
+      Phase 4 - Assemble output: emit the full bundled spec.
+
+    Adding a new component type (e.g. securitySchemes, headers, requestBodies)
+    only requires registering it in openapi.yaml components section — no changes
+    needed in this file.
+
+    Circular reference handling:
+      If a $ref points back to something currently being resolved AND that
+      something is registered, the registry lookup intercepts it first and emits
+      a clean #/components/{type}/{name} pointer (breaking the cycle). If it is
+      NOT registered, a warning is emitted with instructions to register it.
+    """
+
+    def __init__(self, base_path: Path):
+        self.base_path = base_path
+        self.file_cache: Dict[str, Any] = {}
+        # Registry: (abs_file_str, frag_key) -> (component_type, canonical_name)
+        # e.g. ('/path/chat.yaml', 'ChatMessage') -> ('schemas', 'ChatMessage')
+        self.registry: Dict[Tuple[str, str], Tuple[str, str]] = {}
+        # Resolved components: {component_type: {name: resolved_content}}
+        self.resolved_components: Dict[str, Dict[str, Any]] = {}
+        # Set of (abs_file_str, frag_key) currently being resolved (circular detection)
+        self.resolving: Set[Tuple[str, str]] = set()
+
+    # -------------------------------------------------------------------------
+    # File loading
+    # -------------------------------------------------------------------------
+
+    def _load(self, path: Path) -> Any:
+        key = str(path.resolve())
+        if key not in self.file_cache:
+            if not path.exists():
+                raise FileNotFoundError(f"File not found: {path}")
+            with open(path, "r", encoding="utf-8") as f:
+                self.file_cache[key] = yaml.safe_load(f)
+        return self.file_cache[key]
+
+    # -------------------------------------------------------------------------
+    # Ref parsing helpers
+    # -------------------------------------------------------------------------
+
+    def _split_ref(self, ref: str, current_file: Path) -> Tuple[Path, str]:
+        """
+        Split a $ref into (absolute_file_path, normalized_fragment_key).
+
+        fragment_key is the JSON Pointer fragment with the leading '#/' stripped,
+        e.g. '#/ChatMessage' -> 'ChatMessage', 'file.yaml#/foo/bar' -> 'foo/bar'.
+        """
+        url, fragment = urldefrag(ref)
+        abs_path = (current_file.parent / url).resolve() if url else current_file.resolve()
+        return abs_path, fragment.lstrip("/")
+
+    def _navigate(self, content: Any, frag_key: str) -> Any:
+        """Navigate into content using a normalized fragment key."""
+        if not frag_key:
+            return content
+        for part in frag_key.split("/"):
+            part = part.replace("~1", "/").replace("~0", "~")
+            if isinstance(content, dict):
+                if part not in content:
+                    raise KeyError(
+                        f"Key '{part}' not found. Available: {list(content.keys())}"
+                    )
+                content = content[part]
+            elif isinstance(content, list):
+                content = content[int(part)]
+            else:
+                raise KeyError(f"Cannot navigate into {type(content).__name__} at '{part}'")
+        return content
+
+    # -------------------------------------------------------------------------
+    # Phase 1: Build registry (generic over all component types)
+    # -------------------------------------------------------------------------
+
+    def _build_registry(self, entry_path: Path) -> None:
+        """
+        Scan openapi.yaml components/* and register every $ref entry as
+        (abs_file, frag_key) -> (component_type, canonical_name).
+
+        Works for any component type: schemas, responses, parameters,
+        securitySchemes, headers, requestBodies, links, callbacks, etc.
+        No changes needed here when new types are added to openapi.yaml.
+        """
+        spec = self._load(entry_path)
+        for comp_type, section in spec.get("components", {}).items():
+            if not isinstance(section, dict):
+                continue
+            for name, comp_def in section.items():
+                if isinstance(comp_def, dict) and "$ref" in comp_def:
+                    abs_file, frag_key = self._split_ref(comp_def["$ref"], entry_path)
+                    self.registry[(str(abs_file), frag_key)] = (comp_type, name)
+
+    # -------------------------------------------------------------------------
+    # Core resolver
+    # -------------------------------------------------------------------------
+
+    def _resolve_value(self, obj: Any, current_file: Path) -> Any:
+        """
+        Recursively resolve all $refs in obj.
+
+        - If a $ref already points to #/components/..., keep it as-is.
+        - If a $ref resolves to a registered component, replace with
+          #/components/{type}/{name}.
+        - Otherwise, inline the referenced content (resolved recursively).
+        - Circular refs to unregistered content emit a warning with fix instructions.
+        """
+        if isinstance(obj, dict):
+            if "$ref" in obj:
+                ref = obj["$ref"]
+
+                # Already an internal component ref — keep it as-is
+                if ref.startswith("#/components/"):
+                    if len(obj) > 1:
+                        result = {"$ref": ref}
+                        for k, v in obj.items():
+                            if k != "$ref":
+                                result[k] = self._resolve_value(v, current_file)
+                        return result
+                    return obj
+
+                abs_file, frag_key = self._split_ref(ref, current_file)
+
+                # Check if this resolves to a registered component
+                match = self.registry.get((str(abs_file), frag_key))
+                if match is not None:
+                    comp_type, name = match
+                    result: Dict[str, Any] = {"$ref": f"#/components/{comp_type}/{name}"}
+                    if len(obj) > 1:
+                        for k, v in obj.items():
+                            if k != "$ref":
+                                result[k] = self._resolve_value(v, current_file)
+                    return result
+
+                # Detect circular reference — the target is currently being resolved
+                # and is NOT in the registry (so the registry can't break the cycle).
+                #
+                # This happens when a schema file has an internal self-ref (e.g.
+                # `$ref: '#/MySchema'`) but MySchema was never added to openapi.yaml.
+                #
+                # FIX: register the schema in openapi.yaml components/schemas:
+                #
+                #   MySchema:
+                #     $ref: './schemas/path/to/file.yaml#/MySchema'
+                #
+                # Once registered, the registry check above intercepts the ref and
+                # emits a clean #/components/schemas/MySchema pointer instead of
+                # attempting to inline it (which would recurse forever).
+                resolve_key = (str(abs_file), frag_key)
+                if resolve_key in self.resolving:
+                    warnings.warn(
+                        f"Circular $ref not in registry, left unresolved: '{ref}' "
+                        f"(from {current_file}). Register it in openapi.yaml components/."
+                    )
+                    return obj
+
+                # Inline the referenced content
+                try:
+                    content = self._load(abs_file)
+                    value = self._navigate(content, frag_key)
+                except (FileNotFoundError, KeyError) as e:
+                    warnings.warn(f"Cannot resolve $ref '{ref}' from {current_file}: {e}")
+                    return obj
+
+                self.resolving.add(resolve_key)
+                try:
+                    resolved = self._resolve_value(copy.deepcopy(value), abs_file)
+                finally:
+                    self.resolving.discard(resolve_key)
+
+                # Merge any sibling keys alongside $ref
+                if len(obj) > 1 and isinstance(resolved, dict):
+                    result = dict(resolved)
+                    for k, v in obj.items():
+                        if k != "$ref":
+                            result[k] = self._resolve_value(v, current_file)
+                    return result
+
+                return resolved
+
+            return {k: self._resolve_value(v, current_file) for k, v in obj.items()}
+
+        elif isinstance(obj, list):
+            return [self._resolve_value(item, current_file) for item in obj]
+
+        return obj
+
+    # -------------------------------------------------------------------------
+    # Phase 2: Resolve all registered components (generic)
+    # -------------------------------------------------------------------------
+
+    def _ensure_component(
+        self, comp_type: str, name: str, ref_str: str, entry_path: Path
+    ) -> None:
+        """
+        Resolve a registered component and store it in resolved_components.
+        Idempotent; handles circular refs via the resolving set.
+        """
+        if name in self.resolved_components.get(comp_type, {}):
+            return
+
+        abs_file, frag_key = self._split_ref(ref_str, entry_path)
+        resolve_key = (str(abs_file), frag_key)
+
+        if resolve_key in self.resolving:
+            return  # Circular — the registry will emit a component ref to break the cycle
+
+        self.resolving.add(resolve_key)
+        try:
+            content = self._load(abs_file)
+            value = self._navigate(content, frag_key)
+            resolved = self._resolve_value(copy.deepcopy(value), abs_file)
+        except (FileNotFoundError, KeyError) as e:
+            warnings.warn(f"Cannot resolve {comp_type} '{name}' ({ref_str}): {e}")
+            resolved = {"description": f"[unresolvable: {e}]"}
+        finally:
+            self.resolving.discard(resolve_key)
+
+        self.resolved_components.setdefault(comp_type, {})[name] = resolved
+
+    # -------------------------------------------------------------------------
+    # Main bundle entry point
+    # -------------------------------------------------------------------------
+
+    def bundle(self, entry_file: str = "openapi.yaml") -> Dict[str, Any]:
+        """Bundle the OpenAPI spec starting from the entry file."""
+        entry_path = (self.base_path / entry_file).resolve()
+        if not entry_path.exists():
+            raise FileNotFoundError(f"Entry file not found: {entry_path}")
+
+        # Phase 1: Build registry from all components/* sections
+        self._build_registry(entry_path)
+
+        spec = self._load(entry_path)
+        components = spec.get("components", {})
+
+        # Phase 2: Resolve every registered component generically
+        for comp_type, section in components.items():
+            if not isinstance(section, dict):
+                continue
+            for name, comp_def in section.items():
+                if isinstance(comp_def, dict) and "$ref" in comp_def:
+                    self._ensure_component(comp_type, name, comp_def["$ref"], entry_path)
+                else:
+                    self.resolved_components.setdefault(comp_type, {})[name] = (
+                        self._resolve_value(copy.deepcopy(comp_def), entry_path)
+                    )
+
+        # Phase 3 + 4: Build output spec
+        output: Dict[str, Any] = {}
+        for key, value in spec.items():
+            if key == "paths":
+                output["paths"] = self._resolve_paths(value, entry_path)
+            elif key == "components":
+                output["components"] = self.resolved_components
+            else:
+                # info, servers, tags, security, etc. — resolve defensively
+                output[key] = (
+                    self._resolve_value(copy.deepcopy(value), entry_path)
+                    if isinstance(value, (dict, list))
+                    else value
+                )
+
+        return output
+
+    def _resolve_paths(self, paths: Dict[str, Any], entry_path: Path) -> Dict[str, Any]:
+        """Resolve all path items."""
+        resolved: Dict[str, Any] = {}
+        for path_name, path_ref in paths.items():
+            if isinstance(path_ref, dict) and "$ref" in path_ref:
+                abs_file, frag_key = self._split_ref(path_ref["$ref"], entry_path)
+                try:
+                    content = self._load(abs_file)
+                    value = self._navigate(content, frag_key)
+                    resolved[path_name] = self._resolve_value(
+                        copy.deepcopy(value), abs_file
+                    )
+                except (FileNotFoundError, KeyError) as e:
+                    warnings.warn(f"Cannot resolve path '{path_name}': {e}")
+                    resolved[path_name] = path_ref
+            else:
+                resolved[path_name] = self._resolve_value(path_ref, entry_path)
+        return resolved
+
+
+# -----------------------------------------------------------------------------
+# CLI
+# -----------------------------------------------------------------------------
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Bundle OpenAPI YAML files into a single specification"
+    )
+    parser.add_argument(
+        "--input", "-i", default="openapi.yaml",
+        help="Entry point YAML file (default: openapi.yaml)",
+    )
+    parser.add_argument(
+        "--output", "-o", default="openapi.json",
+        help="Output file path (default: openapi.json)",
+    )
+    parser.add_argument(
+        "--format", "-f", choices=["json", "yaml"], default="json",
+        help="Output format (default: json)",
+    )
+    parser.add_argument(
+        "--indent", type=int, default=2,
+        help="Indentation level for output (default: 2)",
+    )
+
+    args = parser.parse_args()
+
+    base_path = Path(__file__).parent.resolve()
+    print(f"Bundling OpenAPI spec from: {base_path / args.input}")
+
+    try:
+        bundler = OpenAPIBundler(base_path)
+        spec = bundler.bundle(args.input)
+
+        output_path = base_path / args.output
+        with open(output_path, "w", encoding="utf-8") as f:
+            if args.format == "json":
+                json.dump(spec, f, indent=args.indent, ensure_ascii=False)
+            else:
+                yaml.dump(spec, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
+
+        print(f"✓ Bundled specification written to: {output_path}")
+
+        paths_count = len(spec.get("paths", {}))
+        print(f"  - Paths: {paths_count}")
+        for comp_type, section in spec.get("components", {}).items():
+            print(f"  - {comp_type.capitalize()}: {len(section)}")
+        size_kb = os.path.getsize(output_path) / 1024
+        print(f"  - File size: {size_kb:.1f} KB")
+
+    except FileNotFoundError as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error bundling spec: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/docs/openapi/openapi.json
+++ b/docs/openapi/openapi.json
--- a/docs/openapi/openapi.yaml
+++ b/docs/openapi/openapi.yaml
--- a/docs/openapi/paths/inference/async.yaml
+++ b/docs/openapi/paths/inference/async.yaml
@@ -0,0 +1,664 @@
+# Async Inference Endpoints
+
+# --- Submission endpoints ---
+
+async-chat-completions:
+  post:
+    operationId: createAsyncChatCompletion
+    summary: Create async chat completion
+    description: |
+      Submits a chat completion request for asynchronous execution. Returns a job ID immediately
+      with HTTP 202. Poll the corresponding GET endpoint with the job ID to retrieve the result.
+      Streaming is not supported for async requests.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncResultTTL'
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/chat.yaml#/ChatCompletionRequest'
+    responses:
+      '202':
+        description: Job accepted for processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-text-completions:
+  post:
+    operationId: createAsyncTextCompletion
+    summary: Create async text completion
+    description: |
+      Submits a text completion request for asynchronous execution. Returns a job ID immediately
+      with HTTP 202. Poll the corresponding GET endpoint with the job ID to retrieve the result.
+      Streaming is not supported for async requests.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncResultTTL'
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/text.yaml#/TextCompletionRequest'
+    responses:
+      '202':
+        description: Job accepted for processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-responses:
+  post:
+    operationId: createAsyncResponse
+    summary: Create async response
+    description: |
+      Submits a response request for asynchronous execution. Returns a job ID immediately
+      with HTTP 202. Poll the corresponding GET endpoint with the job ID to retrieve the result.
+      Streaming is not supported for async requests.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncResultTTL'
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/responses.yaml#/ResponsesRequest'
+    responses:
+      '202':
+        description: Job accepted for processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-embeddings:
+  post:
+    operationId: createAsyncEmbedding
+    summary: Create async embedding
+    description: |
+      Submits an embedding request for asynchronous execution. Returns a job ID immediately
+      with HTTP 202. Poll the corresponding GET endpoint with the job ID to retrieve the result.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncResultTTL'
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/embeddings.yaml#/EmbeddingRequest'
+    responses:
+      '202':
+        description: Job accepted for processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-speech:
+  post:
+    operationId: createAsyncSpeech
+    summary: Create async speech
+    description: |
+      Submits a speech synthesis request for asynchronous execution. Returns a job ID immediately
+      with HTTP 202. Poll the corresponding GET endpoint with the job ID to retrieve the result.
+      SSE streaming is not supported for async requests.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncResultTTL'
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/speech.yaml#/SpeechRequest'
+    responses:
+      '202':
+        description: Job accepted for processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-transcriptions:
+  post:
+    operationId: createAsyncTranscription
+    summary: Create async transcription
+    description: |
+      Submits a transcription request for asynchronous execution. Returns a job ID immediately
+      with HTTP 202. Poll the corresponding GET endpoint with the job ID to retrieve the result.
+      Streaming is not supported for async requests.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncResultTTL'
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../schemas/inference/transcription.yaml#/TranscriptionRequest'
+    responses:
+      '202':
+        description: Job accepted for processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-image-generation:
+  post:
+    operationId: createAsyncImageGeneration
+    summary: Create async image generation
+    description: |
+      Submits an image generation request for asynchronous execution. Returns a job ID immediately
+      with HTTP 202. Poll the corresponding GET endpoint with the job ID to retrieve the result.
+      Streaming is not supported for async requests.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncResultTTL'
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/images.yaml#/ImageGenerationRequest'
+    responses:
+      '202':
+        description: Job accepted for processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-image-edit:
+  post:
+    operationId: createAsyncImageEdit
+    summary: Create async image edit
+    description: |
+      Submits an image edit request for asynchronous execution. Returns a job ID immediately
+      with HTTP 202. Poll the corresponding GET endpoint with the job ID to retrieve the result.
+      Streaming is not supported for async requests.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncResultTTL'
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../schemas/inference/images.yaml#/ImageEditRequest'
+    responses:
+      '202':
+        description: Job accepted for processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-image-variation:
+  post:
+    operationId: createAsyncImageVariation
+    summary: Create async image variation
+    description: |
+      Submits an image variation request for asynchronous execution. Returns a job ID immediately
+      with HTTP 202. Poll the corresponding GET endpoint with the job ID to retrieve the result.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncResultTTL'
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../schemas/inference/images.yaml#/ImageVariationRequest'
+    responses:
+      '202':
+        description: Job accepted for processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+# --- Retrieval endpoints ---
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-chat-completions-job:
+  get:
+    operationId: getAsyncChatCompletionJob
+    summary: Get async chat completion job
+    description: |
+      Retrieves the status and result of an async chat completion job.
+      Returns HTTP 202 if the job is still pending or processing, HTTP 200 if completed or failed.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncJobId'
+    responses:
+      '200':
+        description: Job completed (successfully or with failure)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '202':
+        description: Job is still pending or processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '404':
+        description: Job not found or expired
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-text-completions-job:
+  get:
+    operationId: getAsyncTextCompletionJob
+    summary: Get async text completion job
+    description: |
+      Retrieves the status and result of an async text completion job.
+      Returns HTTP 202 if the job is still pending or processing, HTTP 200 if completed or failed.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncJobId'
+    responses:
+      '200':
+        description: Job completed (successfully or with failure)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '202':
+        description: Job is still pending or processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '404':
+        description: Job not found or expired
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-responses-job:
+  get:
+    operationId: getAsyncResponseJob
+    summary: Get async response job
+    description: |
+      Retrieves the status and result of an async response job.
+      Returns HTTP 202 if the job is still pending or processing, HTTP 200 if completed or failed.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncJobId'
+    responses:
+      '200':
+        description: Job completed (successfully or with failure)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '202':
+        description: Job is still pending or processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '404':
+        description: Job not found or expired
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-embeddings-job:
+  get:
+    operationId: getAsyncEmbeddingJob
+    summary: Get async embedding job
+    description: |
+      Retrieves the status and result of an async embedding job.
+      Returns HTTP 202 if the job is still pending or processing, HTTP 200 if completed or failed.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncJobId'
+    responses:
+      '200':
+        description: Job completed (successfully or with failure)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '202':
+        description: Job is still pending or processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '404':
+        description: Job not found or expired
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-speech-job:
+  get:
+    operationId: getAsyncSpeechJob
+    summary: Get async speech job
+    description: |
+      Retrieves the status and result of an async speech job.
+      Returns HTTP 202 if the job is still pending or processing, HTTP 200 if completed or failed.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncJobId'
+    responses:
+      '200':
+        description: Job completed (successfully or with failure)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '202':
+        description: Job is still pending or processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '404':
+        description: Job not found or expired
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-transcriptions-job:
+  get:
+    operationId: getAsyncTranscriptionJob
+    summary: Get async transcription job
+    description: |
+      Retrieves the status and result of an async transcription job.
+      Returns HTTP 202 if the job is still pending or processing, HTTP 200 if completed or failed.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncJobId'
+    responses:
+      '200':
+        description: Job completed (successfully or with failure)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '202':
+        description: Job is still pending or processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '404':
+        description: Job not found or expired
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-image-generation-job:
+  get:
+    operationId: getAsyncImageGenerationJob
+    summary: Get async image generation job
+    description: |
+      Retrieves the status and result of an async image generation job.
+      Returns HTTP 202 if the job is still pending or processing, HTTP 200 if completed or failed.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncJobId'
+    responses:
+      '200':
+        description: Job completed (successfully or with failure)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '202':
+        description: Job is still pending or processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '404':
+        description: Job not found or expired
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-image-edit-job:
+  get:
+    operationId: getAsyncImageEditJob
+    summary: Get async image edit job
+    description: |
+      Retrieves the status and result of an async image edit job.
+      Returns HTTP 202 if the job is still pending or processing, HTTP 200 if completed or failed.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncJobId'
+    responses:
+      '200':
+        description: Job completed (successfully or with failure)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '202':
+        description: Job is still pending or processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '404':
+        description: Job not found or expired
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+async-image-variation-job:
+  get:
+    operationId: getAsyncImageVariationJob
+    summary: Get async image variation job
+    description: |
+      Retrieves the status and result of an async image variation job.
+      Returns HTTP 202 if the job is still pending or processing, HTTP 200 if completed or failed.
+    tags:
+    - Async Jobs
+    parameters:
+    - $ref: '#/components/parameters/AsyncJobId'
+    responses:
+      '200':
+        description: Job completed (successfully or with failure)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '202':
+        description: Job is still pending or processing
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/async.yaml#/AsyncJobResponse'
+      '404':
+        description: Job not found or expired
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+# --- Shared parameters ---
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+components:
+  parameters:
+    AsyncJobId:
+      name: job_id
+      in: path
+      required: true
+      description: The unique identifier of the async job
+      schema:
+        type: string
+    AsyncResultTTL:
+      name: x-bf-async-job-result-ttl
+      in: header
+      required: false
+      description: |
+        Time-to-live in seconds for the job result after completion. Defaults to 3600 (1 hour).
+        After expiry, the job result is automatically cleaned up.
+      schema:
+        type: integer
+        default: 3600
--- a/docs/openapi/paths/inference/audio.yaml
+++ b/docs/openapi/paths/inference/audio.yaml
@@ -0,0 +1,71 @@
+speech:
+  post:
+    operationId: createSpeech
+    summary: Create speech
+    description: |
+      Generates audio from the input text. Returns audio data or streams via SSE.
+    tags:
+    - Audio
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/speech.yaml#/SpeechRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          audio/mpeg:
+            schema:
+              type: string
+              format: binary
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/speech.yaml#/SpeechResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../schemas/inference/speech.yaml#/SpeechStreamResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+transcriptions:
+  post:
+    operationId: createTranscription
+    summary: Create transcription
+    description: |
+      Transcribes audio into text in the input language.
+    tags:
+    - Audio
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../schemas/inference/transcription.yaml#/TranscriptionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/transcription.yaml#/TranscriptionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../schemas/inference/transcription.yaml#/TranscriptionStreamResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/batches.yaml
+++ b/docs/openapi/paths/inference/batches.yaml
@@ -0,0 +1,190 @@
+batches:
+  post:
+    operationId: createBatch
+    summary: Create a batch job
+    description: |
+      Creates a batch job for asynchronous processing.
+    tags:
+    - Batch
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/batch.yaml#/BatchCreateRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/batch.yaml#/BatchCreateResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: listBatches
+    summary: List batch jobs
+    description: |
+      Lists batch jobs for a provider.
+    tags:
+    - Batch
+    parameters:
+    - name: provider
+      in: query
+      required: true
+      description: Provider to list batches for
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    - name: limit
+      in: query
+      description: Maximum number of batches to return
+      schema:
+        type: integer
+        minimum: 1
+    - name: after
+      in: query
+      description: Cursor for pagination
+      schema:
+        type: string
+    - name: before
+      in: query
+      description: Cursor for pagination
+      schema:
+        type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/batch.yaml#/BatchListResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+batches-by-id:
+  get:
+    operationId: retrieveBatch
+    summary: Retrieve a batch job
+    description: |
+      Retrieves a specific batch job by ID.
+    tags:
+    - Batch
+    parameters:
+    - name: batch_id
+      in: path
+      required: true
+      description: The ID of the batch to retrieve
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the batch
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/batch.yaml#/BatchRetrieveResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+batches-cancel:
+  post:
+    operationId: cancelBatch
+    summary: Cancel a batch job
+    description: |
+      Cancels a batch job.
+    tags:
+    - Batch
+    parameters:
+    - name: batch_id
+      in: path
+      required: true
+      description: The ID of the batch to cancel
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the batch
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/batch.yaml#/BatchCancelResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+batches-results:
+  get:
+    operationId: getBatchResults
+    summary: Get batch results
+    description: |
+      Retrieves results from a completed batch job.
+    tags:
+    - Batch
+    parameters:
+    - name: batch_id
+      in: path
+      required: true
+      description: The ID of the batch
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the batch
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/batch.yaml#/BatchResultsResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/chat-completions.yaml
+++ b/docs/openapi/paths/inference/chat-completions.yaml
@@ -0,0 +1,33 @@
+chat-completions:
+  post:
+    operationId: createChatCompletion
+    summary: Create a chat completion
+    description: |
+      Creates a completion for the provided messages. Supports streaming via SSE.
+    tags:
+    - Chat Completions
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/chat.yaml#/ChatCompletionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/chat.yaml#/ChatCompletionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../schemas/inference/chat.yaml#/ChatCompletionStreamResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/containers.yaml
+++ b/docs/openapi/paths/inference/containers.yaml
@@ -0,0 +1,390 @@
+containers:
+  post:
+    operationId: createContainer
+    summary: Create a container
+    description: |
+      Creates a new container for storing files and data.
+    tags:
+    - Containers
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/containers.yaml#/ContainerCreateRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/containers.yaml#/ContainerCreateResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: listContainers
+    summary: List containers
+    description: |
+      Lists containers for a provider.
+    tags:
+    - Containers
+    parameters:
+    - name: provider
+      in: query
+      required: true
+      description: Provider to list containers for
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    - name: limit
+      in: query
+      description: Maximum number of containers to return (1-100, default 20)
+      schema:
+        type: integer
+        minimum: 1
+        limit: 200
+        maximum: 100
+    - name: after
+      in: query
+      description: Cursor for pagination
+      schema:
+        type: string
+    - name: order
+      in: query
+      description: Sort order (asc/desc)
+      schema:
+        type: string
+        enum: [asc, desc]
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/containers.yaml#/ContainerListResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+containers-by-id:
+  get:
+    operationId: retrieveContainer
+    summary: Retrieve a container
+    description: |
+      Retrieves a specific container by ID.
+    tags:
+    - Containers
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      description: The ID of the container to retrieve
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the container
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/containers.yaml#/ContainerRetrieveResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  delete:
+    operationId: deleteContainer
+    summary: Delete a container
+    description: |
+      Deletes a container.
+    tags:
+    - Containers
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      description: The ID of the container to delete
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the container
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/containers.yaml#/ContainerDeleteResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+# =============================================================================
+# CONTAINER FILES ENDPOINTS
+# =============================================================================
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+container-files:
+  post:
+    operationId: createContainerFile
+    summary: Create a file in a container
+    description: |
+      Creates a new file in a container. You can either upload file content directly
+      via multipart/form-data or reference an existing file by its ID.
+    tags:
+    - Containers
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      description: The ID of the container
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the container
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../schemas/inference/containers.yaml#/ContainerFileCreateMultipartRequest'
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/containers.yaml#/ContainerFileCreateJsonRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/containers.yaml#/ContainerFileCreateResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: listContainerFiles
+    summary: List files in a container
+    description: |
+      Lists all files in a container.
+    tags:
+    - Containers
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      description: The ID of the container
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the container
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    - name: limit
+      in: query
+      description: Maximum number of files to return
+      schema:
+        type: integer
+        minimum: 1
+        maximum: 100
+    - name: after
+      in: query
+      description: Cursor for pagination
+      schema:
+        type: string
+    - name: order
+      in: query
+      description: Sort order (asc/desc)
+      schema:
+        type: string
+        enum: [asc, desc]
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/containers.yaml#/ContainerFileListResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+container-files-by-id:
+  get:
+    operationId: retrieveContainerFile
+    summary: Retrieve a file from a container
+    description: |
+      Retrieves metadata for a specific file in a container.
+    tags:
+    - Containers
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      description: The ID of the container
+      schema:
+        type: string
+    - name: file_id
+      in: path
+      required: true
+      description: The ID of the file
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the container
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/containers.yaml#/ContainerFileRetrieveResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  delete:
+    operationId: deleteContainerFile
+    summary: Delete a file from a container
+    description: |
+      Deletes a file from a container.
+    tags:
+    - Containers
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      description: The ID of the container
+      schema:
+        type: string
+    - name: file_id
+      in: path
+      required: true
+      description: The ID of the file to delete
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the container
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/containers.yaml#/ContainerFileDeleteResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+container-files-content:
+  get:
+    operationId: getContainerFileContent
+    summary: Download file content from a container
+    description: |
+      Downloads the content of a file from a container.
+    tags:
+    - Containers
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      description: The ID of the container
+      schema:
+        type: string
+    - name: file_id
+      in: path
+      required: true
+      description: The ID of the file
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the container
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/octet-stream:
+            schema:
+              type: string
+              format: binary
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/count-tokens.yaml
+++ b/docs/openapi/paths/inference/count-tokens.yaml
@@ -0,0 +1,30 @@
+count-tokens:
+  post:
+    operationId: countTokens
+    summary: Count tokens
+    description: |
+      Counts the number of tokens in the provided messages.
+    tags:
+    - Count Tokens
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/count-tokens.yaml#/CountTokensRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/count-tokens.yaml#/CountTokensResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/embeddings.yaml
+++ b/docs/openapi/paths/inference/embeddings.yaml
@@ -0,0 +1,30 @@
+embeddings:
+  post:
+    operationId: createEmbedding
+    summary: Create embeddings
+    description: |
+      Creates an embedding vector representing the input text.
+    tags:
+    - Embeddings
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/embeddings.yaml#/EmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/embeddings.yaml#/EmbeddingResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/files.yaml
+++ b/docs/openapi/paths/inference/files.yaml
@@ -0,0 +1,201 @@
+files:
+  post:
+    operationId: uploadFile
+    summary: Upload a file
+    description: |
+      Uploads a file to be used with batch operations or other features.
+    tags:
+    - Files
+    parameters:
+    - name: provider
+      in: query
+      description: Provider to upload file to (can also use x-model-provider header)
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../schemas/inference/files.yaml#/FileUploadRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/files.yaml#/FileUploadResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: listFiles
+    summary: List files
+    description: |
+      Lists files for a provider.
+    tags:
+    - Files
+    parameters:
+    - name: x-model-provider
+      in: query
+      required: true
+      description: Provider to list files for
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    - name: purpose
+      in: query
+      description: Filter by purpose
+      schema:
+        $ref: '../../schemas/inference/files.yaml#/FilePurpose'
+    - name: limit
+      in: query
+      description: Maximum number of files to return
+      schema:
+        type: integer
+        minimum: 1
+    - name: after
+      in: query
+      description: Cursor for pagination
+      schema:
+        type: string
+    - name: order
+      in: query
+      description: Sort order (asc/desc)
+      schema:
+        type: string
+        enum: [asc, desc]
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/files.yaml#/FileListResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+files-by-id:
+  get:
+    operationId: retrieveFile
+    summary: Retrieve file metadata
+    description: |
+      Retrieves metadata for a specific file.
+    tags:
+    - Files
+    parameters:
+    - name: file_id
+      in: path
+      required: true
+      description: The ID of the file
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the file
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/files.yaml#/FileRetrieveResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  delete:
+    operationId: deleteFile
+    summary: Delete a file
+    description: |
+      Deletes a file.
+    tags:
+    - Files
+    parameters:
+    - name: file_id
+      in: path
+      required: true
+      description: The ID of the file to delete
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the file
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/files.yaml#/FileDeleteResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+files-content:
+  get:
+    operationId: getFileContent
+    summary: Download file content
+    description: |
+      Downloads the content of a file.
+    tags:
+    - Files
+    parameters:
+    - name: file_id
+      in: path
+      required: true
+      description: The ID of the file
+      schema:
+        type: string
+    - name: provider
+      in: query
+      required: true
+      description: The provider of the file
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/octet-stream:
+            schema:
+              type: string
+              format: binary
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/images.yaml
+++ b/docs/openapi/paths/inference/images.yaml
@@ -0,0 +1,112 @@
+# Image Generation Endpoints
+
+image-generation:
+  post:
+    operationId: imageGeneration
+    summary: Generate an image
+    description: |
+      Generates images from text prompts using the specified model.
+    tags:
+    - Images
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/images.yaml#/ImageGenerationRequest'
+    responses:
+      '200':
+        description: |
+          Successful response. Returns JSON for non-streaming requests, or Server-Sent Events (SSE) stream when `stream=true`.
+          When streaming, events are sent with the following event types:
+          - `image_generation.partial_image`: Intermediate image chunks with base64-encoded image data
+          - `image_generation.completed`: Final event for each image with usage information
+          - `error`: Error events with error details
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/images.yaml#/ImageGenerationResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../schemas/inference/images.yaml#/ImageGenerationStreamResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+image-edit:
+  post:
+    operationId: imageEdit
+    summary: Edit an image
+    description: |
+      Edits an image using a text prompt and optional mask. Request must be sent as multipart/form-data
+      with at least `model`, `prompt` (unless `type` is `background_removal`), and `image` (or `image[]`).
+    tags:
+    - Images
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../schemas/inference/images.yaml#/ImageEditRequest'
+    responses:
+      '200':
+        description: |
+          Successful response. Returns JSON for non-streaming requests, or Server-Sent Events (SSE) stream when `stream=true`.
+          When streaming, events are sent with the following event types:
+          - `image_edit.partial_image`: Intermediate image chunks with base64-encoded image data
+          - `image_edit.completed`: Final event for each image with usage information
+          - `error`: Error events with error details
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/images.yaml#/ImageGenerationResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../schemas/inference/images.yaml#/ImageEditStreamResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+image-variation:
+  post:
+    operationId: imageVariation
+    summary: Create Variation
+    description: |
+      Creates variations of an image. Request must be sent as multipart/form-data with `model` and `image` (or `image[]`).
+      Does not support streaming.
+    tags:
+    - Images
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../schemas/inference/images.yaml#/ImageVariationRequest'
+    responses:
+      '200':
+        description: Successful response. Returns JSON with generated image variation(s).
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/images.yaml#/ImageGenerationResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/models.yaml
+++ b/docs/openapi/paths/inference/models.yaml
@@ -0,0 +1,41 @@
+models:
+  get:
+    operationId: listModels
+    summary: List available models
+    description: |
+      Lists available models. If provider is not specified, lists all models from all configured providers.
+    tags:
+    - Models
+    parameters:
+    - name: provider
+      in: query
+      description: Filter by provider (e.g., openai, anthropic, bedrock)
+      schema:
+        $ref: '../../schemas/inference/common.yaml#/ModelProvider'
+    - name: page_size
+      in: query
+      description: Maximum number of models to return
+      schema:
+        type: integer
+        minimum: 0
+    - name: page_token
+      in: query
+      description: Token for pagination
+      schema:
+        type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/models.yaml#/ListModelsResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/rerank.yaml
+++ b/docs/openapi/paths/inference/rerank.yaml
@@ -0,0 +1,30 @@
+rerank:
+  post:
+    operationId: rerankDocuments
+    summary: Rerank documents
+    description: |
+      Reorders input documents by relevance to a query.
+    tags:
+    - Rerank
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/rerank.yaml#/RerankRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/rerank.yaml#/RerankResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/responses.yaml
+++ b/docs/openapi/paths/inference/responses.yaml
@@ -0,0 +1,33 @@
+responses:
+  post:
+    operationId: createResponse
+    summary: Create a response
+    description: |
+      Creates a response using the OpenAI Responses API format. Supports streaming via SSE.
+    tags:
+    - Responses
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/responses.yaml#/ResponsesRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/responses.yaml#/ResponsesResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../schemas/inference/responses.yaml#/ResponsesStreamResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/text-completions.yaml
+++ b/docs/openapi/paths/inference/text-completions.yaml
@@ -0,0 +1,33 @@
+text-completions:
+  post:
+    operationId: createTextCompletion
+    summary: Create a text completion
+    description: |
+      Creates a completion for the provided prompt. Supports streaming via SSE.
+    tags:
+    - Text Completions
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/text.yaml#/TextCompletionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/text.yaml#/TextCompletionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../schemas/inference/text.yaml#/TextCompletionStreamResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/inference/videos.yaml
+++ b/docs/openapi/paths/inference/videos.yaml
@@ -0,0 +1,311 @@
+# Video Generation Endpoints
+
+video-generation:
+  post:
+    operationId: videoGeneration
+    summary: Generate a video
+    description: |
+      Creates a video generation job from a text prompt. This is an asynchronous operation
+      that returns immediately with a job ID. Use the retrieve endpoint to check the status
+      and get the video URL when generation is complete.
+    tags:
+    - Videos
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/videos.yaml#/VideoGenerationRequest'
+    responses:
+      '200':
+        description: |
+          Successful response. Returns a video generation job object with status information.
+          Poll the retrieve endpoint to check completion status.
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/videos.yaml#/VideoGenerationResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: videoList
+    summary: List video generation jobs
+    description: |
+      Lists video generation jobs for a specific provider. Results are paginated
+      and can be filtered using query parameters.
+    tags:
+    - Videos
+    parameters:
+    - name: provider
+      in: query
+      required: true
+      schema:
+        type: string
+      description: Provider name (e.g., "openai", "gemini")
+    - name: after
+      in: query
+      required: false
+      schema:
+        type: string
+      description: Cursor for pagination - ID of the last item from the previous page
+    - name: limit
+      in: query
+      required: false
+      schema:
+        type: integer
+        minimum: 1
+        maximum: 100
+        default: 20
+      description: Maximum number of results to return
+    - name: order
+      in: query
+      required: false
+      schema:
+        type: string
+        enum:
+        - "asc"
+        - "desc"
+        default: "desc"
+      description: Sort order by creation time
+    responses:
+      '200':
+        description: Successful response. Returns a paginated list of video generation jobs.
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/videos.yaml#/VideoListResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+video-by-id:
+  get:
+    operationId: videoRetrieve
+    summary: Retrieve a video generation job
+    description: |
+      Retrieves the status and metadata for a video generation job.
+      Use this endpoint to poll for completion status after creating a video generation job.
+      When the status is "completed", the response will include a URL to download the video.
+    tags:
+    - Videos
+    parameters:
+    - name: video_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Video ID in format `id:provider` (e.g., `video_abc123:openai`)
+    responses:
+      '200':
+        description: Successful response. Returns the video generation job details.
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/videos.yaml#/VideoGenerationResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Video not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  delete:
+    operationId: videoDelete
+    summary: Delete a video generation job
+    description: |
+      Deletes a video generation job and its associated assets.
+      This operation cannot be undone.
+    tags:
+    - Videos
+    parameters:
+    - name: video_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Video ID in format `id:provider` (e.g., `video_abc123:openai`)
+    responses:
+      '200':
+        description: Successful response. Returns deletion confirmation.
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/videos.yaml#/VideoDeleteResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Video not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+video-download:
+  get:
+    operationId: videoDownload
+    summary: Download video content
+    description: |
+      Downloads the binary content of a generated video.
+      The video must have a status of "completed" to be downloadable.
+      Returns the raw video file (typically MP4 format).
+    tags:
+    - Videos
+    parameters:
+    - name: video_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Video ID in format `id:provider` (e.g., `video_abc123:openai`)
+    - name: variant
+      in: query
+      required: false
+      schema:
+        type: string
+        enum:
+        - "video"
+        - "thumbnail"
+        - "spritesheet"
+      description: Variant of the video content to download (provider-specific)
+    responses:
+      '200':
+        description: Successful response. Returns the video file as binary content.
+        content:
+          video/mp4:
+            schema:
+              type: string
+              format: binary
+          application/octet-stream:
+            schema:
+              type: string
+              format: binary
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Video not found or not yet available
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+video-delete:
+  delete:
+    operationId: videoDelete
+    summary: Delete a video generation job
+    description: |
+      Deletes a video generation job and its associated assets.
+      This operation cannot be undone.
+    tags:
+    - Videos
+    parameters:
+    - name: video_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Video ID in format `id:provider` (e.g., `video_abc123:openai`)
+    responses:
+      '200':
+        description: Successful response. Returns deletion confirmation.
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/videos.yaml#/VideoDeleteResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Video not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+video-remix:
+  post:
+    operationId: videoRemix
+    summary: Remix a video
+    description: |
+      Creates a new video generation job by remixing an existing video with a new prompt.
+      The source video must have a status of "completed" to be remixed.
+      Returns a new video generation job that can be polled for completion.
+    tags:
+    - Videos
+    parameters:
+    - name: video_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Video ID in format `id:provider` (e.g., `video_abc123:openai`)
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/inference/videos.yaml#/VideoRemixRequest'
+    responses:
+      '200':
+        description: |
+          Successful response. Returns a new video generation job object.
+          Poll the retrieve endpoint to check completion status.
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/videos.yaml#/VideoGenerationResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Source video not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/anthropic/batch.yaml
+++ b/docs/openapi/paths/integrations/anthropic/batch.yaml
@@ -0,0 +1,230 @@
+# Anthropic Integration - Batch API Endpoints
+
+batches:
+  post:
+    operationId: anthropicCreateBatch
+    summary: Create batch job (Anthropic format)
+    description: |
+      Creates a batch processing job using Anthropic format.
+      Use x-model-provider header to specify the provider.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: x-model-provider
+      in: header
+      schema:
+        type: string
+      description: Provider to use (defaults to anthropic)
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/anthropic/batch.yaml#/AnthropicBatchCreateRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/batch.yaml#/AnthropicBatchCreateResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: anthropicListBatches
+    summary: List batch jobs (Anthropic format)
+    description: |
+      Lists batch processing jobs.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: x-model-provider
+      in: header
+      schema:
+        type: string
+      description: Provider to use (defaults to anthropic)
+    - name: page_size
+      in: query
+      schema:
+        type: integer
+        default: 20
+      description: Maximum number of batches to return
+    - name: page_token
+      in: query
+      schema:
+        type: string
+      description: Cursor for pagination
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/batch.yaml#/AnthropicBatchListResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+batches-by-id:
+  get:
+    operationId: anthropicRetrieveBatch
+    summary: Retrieve batch job (Anthropic format)
+    description: |
+      Retrieves details of a batch processing job.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: batch_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Batch job ID
+    - name: x-model-provider
+      in: header
+      schema:
+        type: string
+      description: Provider for the batch
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/batch.yaml#/AnthropicBatchRetrieveResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+batches-cancel:
+  post:
+    operationId: anthropicCancelBatch
+    summary: Cancel batch job (Anthropic format)
+    description: |
+      Cancels a batch processing job.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: batch_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Batch job ID to cancel
+    - name: x-model-provider
+      in: header
+      schema:
+        type: string
+      description: Provider for the batch
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/batch.yaml#/AnthropicBatchCancelResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+batches-results:
+  get:
+    operationId: anthropicGetBatchResults
+    summary: Get batch results (Anthropic format)
+    description: |
+      Retrieves results of a completed batch job.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: batch_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Batch job ID
+    - name: x-model-provider
+      in: header
+      schema:
+        type: string
+      description: Provider for the batch
+    responses:
+      '200':
+        description: Successful response (JSONL stream)
+        content:
+          application/x-ndjson:
+            schema:
+              type: string
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/anthropic/count-tokens.yaml
+++ b/docs/openapi/paths/integrations/anthropic/count-tokens.yaml
@@ -0,0 +1,40 @@
+# Anthropic Integration - Count Tokens Endpoints
+
+count-tokens:
+  post:
+    operationId: anthropicCountTokens
+    summary: Count tokens (Anthropic format)
+    description: |
+      Counts the number of tokens in a message request.
+    tags:
+    - Anthropic Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/anthropic/count-tokens.yaml#/AnthropicCountTokensRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/count-tokens.yaml#/AnthropicCountTokensResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/anthropic/files.yaml
+++ b/docs/openapi/paths/integrations/anthropic/files.yaml
@@ -0,0 +1,209 @@
+# Anthropic Integration - Files API Endpoints
+
+files:
+  post:
+    operationId: anthropicUploadFile
+    summary: Upload file (Anthropic format)
+    description: |
+      Uploads a file. Use x-model-provider header to specify the provider.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: x-model-provider
+      in: header
+      schema:
+        type: string
+      description: Provider to use (defaults to anthropic)
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../../schemas/integrations/anthropic/files.yaml#/AnthropicFileUploadRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/files.yaml#/AnthropicFileUploadResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: anthropicListFiles
+    summary: List files (Anthropic format)
+    description: |
+      Lists uploaded files.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: x-model-provider
+      in: header
+      schema:
+        type: string
+      description: Provider to use (defaults to anthropic)
+    - name: limit
+      in: query
+      schema:
+        type: integer
+        default: 30
+      description: Maximum files to return
+    - name: after_id
+      in: query
+      schema:
+        type: string
+      description: Cursor for pagination
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/files.yaml#/AnthropicFileListResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+files-content:
+  get:
+    operationId: anthropicGetFileContent
+    summary: Get file content (Anthropic format)
+    description: |
+      Retrieves file content. Returns raw binary file data when Accept header is set to application/octet-stream,
+      or file metadata as JSON when Accept header is set to application/json.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: file_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: File ID
+    - name: x-model-provider
+      in: header
+      schema:
+        type: string
+      description: Provider for the file
+    - name: Accept
+      in: header
+      schema:
+        type: string
+        enum: [application/json, application/octet-stream]
+        default: application/json
+      description: Response content type - use application/octet-stream for binary download
+    responses:
+      '200':
+        description: |
+          Successful response. Returns file metadata as JSON or raw binary file content.
+          When returning binary content, the Content-Type header indicates the file's MIME type
+          and Content-Disposition header may include the filename.
+        headers:
+          Content-Type:
+            schema:
+              type: string
+            description: MIME type of the file (e.g., application/pdf, image/png, text/plain)
+          Content-Disposition:
+            schema:
+              type: string
+            description: Attachment filename directive (e.g., attachment; filename="document.pdf")
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/files.yaml#/AnthropicFileRetrieveResponse'
+          application/octet-stream:
+            schema:
+              type: string
+              format: binary
+              description: Raw binary file content
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+files-by-id:
+  delete:
+    operationId: anthropicDeleteFile
+    summary: Delete file (Anthropic format)
+    description: |
+      Deletes an uploaded file.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: file_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: File ID to delete
+    - name: x-model-provider
+      in: header
+      schema:
+        type: string
+      description: Provider for the file
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/files.yaml#/AnthropicFileDeleteResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/anthropic/messages.yaml
+++ b/docs/openapi/paths/integrations/anthropic/messages.yaml
@@ -0,0 +1,119 @@
+# Anthropic Integration - Messages API Endpoints
+
+messages:
+  post:
+    operationId: anthropicCreateMessage
+    summary: Create message (Anthropic format)
+    description: |
+      Creates a message using Anthropic Messages API format.
+      Supports streaming via SSE.
+
+      **Async inference:** Send `x-bf-async: true` to submit the request as a background job and receive a job ID immediately. Poll with `x-bf-async-id: <job-id>` to retrieve the result. When the job is still processing, the response will have an empty `content` array. When completed, `content` will contain the full result. See [Async Inference](/features/async-inference) for details.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: x-bf-async
+      in: header
+      required: false
+      schema:
+        type: string
+        enum: ["true"]
+      description: Set to `true` to submit this request as an async job. Returns immediately with a job ID. Not 
+        compatible with streaming.
+    - name: x-bf-async-id
+      in: header
+      required: false
+      schema:
+        type: string
+      description: Poll for results of a previously submitted async job by providing the job ID returned from the 
+        initial async request.
+    - name: x-bf-async-job-result-ttl
+      in: header
+      required: false
+      schema:
+        type: integer
+        default: 3600
+      description: Override the default result TTL in seconds. Results expire after this duration from completion time.
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicMessageRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicMessageResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+
+# Wildcard path for extended messages endpoints (e.g., /v1/messages/batches)
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+messages-wildcard:
+  post:
+    operationId: anthropicCreateMessageWildcard
+    summary: Create message (Anthropic format) - wildcard
+    description: |
+      Handles extended messages API paths.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: path
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Extended path
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicMessageRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicMessageResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/anthropic/models.yaml
+++ b/docs/openapi/paths/integrations/anthropic/models.yaml
@@ -0,0 +1,50 @@
+# Anthropic Integration - Models Endpoints
+
+models:
+  get:
+    operationId: anthropicListModels
+    summary: List models (Anthropic format)
+    description: |
+      Lists available models in Anthropic format.
+    tags:
+    - Anthropic Integration
+    parameters:
+    - name: limit
+      in: query
+      schema:
+        type: integer
+      description: Maximum number of models to return
+    - name: before_id
+      in: query
+      schema:
+        type: string
+      description: Return models before this ID
+    - name: after_id
+      in: query
+      schema:
+        type: string
+      description: Return models after this ID
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicListModelsResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/anthropic/text.yaml
+++ b/docs/openapi/paths/integrations/anthropic/text.yaml
@@ -0,0 +1,44 @@
+# Anthropic Integration - Legacy Complete API Endpoints
+
+complete:
+  post:
+    operationId: anthropicCreateComplete
+    summary: Create completion (Anthropic legacy format)
+    description: |
+      Creates a text completion using Anthropic's legacy Complete API.
+      Supports streaming via SSE.
+    tags:
+    - Anthropic Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/anthropic/text.yaml#/AnthropicTextRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/text.yaml#/AnthropicTextResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/text.yaml#/AnthropicTextResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/bedrock/batch.yaml
+++ b/docs/openapi/paths/integrations/bedrock/batch.yaml
@@ -0,0 +1,174 @@
+# AWS Bedrock - Batch Inference Endpoints
+
+batch-jobs:
+  post:
+    operationId: bedrockCreateBatchJob
+    summary: Create batch inference job (Bedrock format)
+    description: |
+      Creates a batch inference job using AWS Bedrock format.
+    tags:
+    - Bedrock Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/batch.yaml#/BedrockBatchJobRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/batch.yaml#/BedrockBatchJobResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: bedrockListBatchJobs
+    summary: List batch inference jobs (Bedrock format)
+    description: |
+      Lists batch inference jobs using AWS Bedrock format.
+    tags:
+    - Bedrock Integration
+    parameters:
+    - name: maxResults
+      in: query
+      schema:
+        type: integer
+      description: Maximum number of results to return
+    - name: nextToken
+      in: query
+      schema:
+        type: string
+      description: Token for pagination
+    - name: statusEquals
+      in: query
+      schema:
+        type: string
+        enum: [Submitted, InProgress, Completed, Failed, Stopping, Stopped, PartiallyCompleted, Expired, Validating, 
+            Scheduled]
+      description: Filter by status
+    - name: nameContains
+      in: query
+      schema:
+        type: string
+      description: Filter by job name containing this string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/batch.yaml#/BedrockBatchListResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+batch-job-by-id:
+  get:
+    operationId: bedrockRetrieveBatchJob
+    summary: Retrieve batch inference job (Bedrock format)
+    description: |
+      Retrieves a batch inference job using AWS Bedrock format.
+    tags:
+    - Bedrock Integration
+    parameters:
+    - name: jobIdentifier
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Job identifier
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/batch.yaml#/BedrockBatchJobResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+batch-job-cancel:
+  post:
+    operationId: bedrockCancelBatchJob
+    summary: Cancel batch inference job (Bedrock format)
+    description: |
+      Cancels a batch inference job using AWS Bedrock format.
+    tags:
+    - Bedrock Integration
+    parameters:
+    - name: jobIdentifier
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Job identifier to cancel
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/batch.yaml#/BedrockBatchCancelResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/bedrock/converse.yaml
+++ b/docs/openapi/paths/integrations/bedrock/converse.yaml
@@ -0,0 +1,93 @@
+# AWS Bedrock - Converse Endpoints
+
+converse:
+  post:
+    operationId: bedrockConverse
+    summary: Converse with model (Bedrock format)
+    description: |
+      Sends messages to a model using AWS Bedrock Converse API format.
+    tags:
+    - Bedrock Integration
+    parameters:
+    - name: modelId
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model ID (e.g., anthropic.claude-3-sonnet-20240229-v1:0)
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+converse-stream:
+  post:
+    operationId: bedrockConverseStream
+    summary: Stream converse with model (Bedrock format)
+    description: |
+      Streams messages from a model using AWS Bedrock Converse API format.
+    tags:
+    - Bedrock Integration
+    parameters:
+    - name: modelId
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model ID (e.g., anthropic.claude-3-sonnet-20240229-v1:0)
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseRequest'
+    responses:
+      '200':
+        description: Successful streaming response
+        content:
+          application/x-amz-eventstream:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/bedrock/invoke.yaml
+++ b/docs/openapi/paths/integrations/bedrock/invoke.yaml
@@ -0,0 +1,95 @@
+# AWS Bedrock - Invoke Endpoints
+
+invoke:
+  post:
+    operationId: bedrockInvokeModel
+    summary: Invoke model (Bedrock format)
+    description: |
+      Invokes a model using AWS Bedrock InvokeModel API format.
+      Accepts raw model-specific request body.
+    tags:
+    - Bedrock Integration
+    parameters:
+    - name: modelId
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model ID (e.g., anthropic.claude-3-sonnet-20240229-v1:0)
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/invoke.yaml#/BedrockInvokeRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/invoke.yaml#/BedrockInvokeResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+invoke-stream:
+  post:
+    operationId: bedrockInvokeModelStream
+    summary: Invoke model with streaming (Bedrock format)
+    description: |
+      Invokes a model with streaming using AWS Bedrock InvokeModelWithResponseStream API format.
+    tags:
+    - Bedrock Integration
+    parameters:
+    - name: modelId
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model ID (e.g., anthropic.claude-3-sonnet-20240229-v1:0)
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/invoke.yaml#/BedrockInvokeRequest'
+    responses:
+      '200':
+        description: Successful streaming response
+        content:
+          application/x-amz-eventstream:
+            schema:
+              type: object
+              description: AWS event stream format
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/cohere/chat.yaml
+++ b/docs/openapi/paths/integrations/cohere/chat.yaml
@@ -0,0 +1,43 @@
+# Cohere - Chat Endpoints
+
+chat:
+  post:
+    operationId: cohereChatV2
+    summary: Chat with model (Cohere v2 format)
+    description: |
+      Sends a chat request using Cohere v2 API format.
+    tags:
+    - Cohere Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/cohere/embed.yaml
+++ b/docs/openapi/paths/integrations/cohere/embed.yaml
@@ -0,0 +1,40 @@
+# Cohere - Embed Endpoints
+
+embed:
+  post:
+    operationId: cohereEmbedV2
+    summary: Create embeddings (Cohere v2 format)
+    description: |
+      Creates embeddings using Cohere v2 API format.
+    tags:
+    - Cohere Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/embed.yaml#/CohereEmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/embed.yaml#/CohereEmbeddingResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/cohere/tokenize.yaml
+++ b/docs/openapi/paths/integrations/cohere/tokenize.yaml
@@ -0,0 +1,40 @@
+# Cohere - Tokenize Endpoints
+
+tokenize:
+  post:
+    operationId: cohereTokenize
+    summary: Tokenize text (Cohere format)
+    description: |
+      Tokenizes text using Cohere v1 API format.
+    tags:
+    - Cohere Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/tokenize.yaml#/CohereCountTokensRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/tokenize.yaml#/CohereCountTokensResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/genai/files.yaml
+++ b/docs/openapi/paths/integrations/genai/files.yaml
@@ -0,0 +1,176 @@
+# Google GenAI (Gemini) - Files Endpoints
+
+files-upload:
+  post:
+    operationId: geminiUploadFile
+    summary: Upload file (Gemini format)
+    description: |
+      Uploads a file using Google Gemini API format.
+
+      This is a multipart upload with two parts:
+      - "metadata": JSON object containing file metadata
+      - "file": Binary file content
+
+      Note: Direct file content download is not supported by Gemini Files API.
+      Use the file.uri field from the response to access uploaded files.
+    tags:
+    - GenAI Integration
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../../schemas/integrations/genai/files.yaml#/GeminiFileUploadRequest'
+          encoding:
+            metadata:
+              contentType: application/json
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/files.yaml#/GeminiFileUploadResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+files:
+  get:
+    operationId: geminiListFiles
+    summary: List files (Gemini format)
+    description: |
+      Lists uploaded files in Google Gemini API format.
+    tags:
+    - GenAI Integration
+    parameters:
+    - name: pageSize
+      in: query
+      schema:
+        type: integer
+      description: Maximum number of files to return
+    - name: pageToken
+      in: query
+      schema:
+        type: string
+      description: Page token for pagination
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/files.yaml#/GeminiFileListResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+files-by-id:
+  get:
+    operationId: geminiRetrieveFile
+    summary: Retrieve file (Gemini format)
+    description: |
+      Retrieves file metadata in Google Gemini API format.
+
+      Note: This endpoint returns file metadata only. Direct file content
+      download is not supported by Gemini Files API. Use the file.uri
+      field from the response to access the file content.
+    tags:
+    - GenAI Integration
+    parameters:
+    - name: file_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: File ID
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/files.yaml#/GeminiFileRetrieveResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  delete:
+    operationId: geminiDeleteFile
+    summary: Delete file (Gemini format)
+    description: |
+      Deletes a file in Google Gemini API format.
+    tags:
+    - GenAI Integration
+    parameters:
+    - name: file_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: File ID to delete
+    responses:
+      '200':
+        description: Successful response (empty)
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/files.yaml#/GeminiFileDeleteResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/genai/generation.yaml
+++ b/docs/openapi/paths/integrations/genai/generation.yaml
@@ -0,0 +1,241 @@
+# Google GenAI (Gemini) - Generation Endpoints
+
+generate-content:
+  post:
+    operationId: geminiGenerateContent
+    summary: Generate content (Gemini format)
+    description: |
+      Generates content using Google Gemini API format.
+      The model is specified in the URL path.
+    tags:
+    - GenAI Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model name with action (e.g., gemini-pro:generateContent)
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+stream-generate-content:
+  post:
+    operationId: geminiStreamGenerateContent
+    summary: Stream generate content (Gemini format)
+    description: |
+      Streams content generation using Google Gemini API format.
+      The model is specified in the URL path.
+    tags:
+    - GenAI Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model name with action (e.g., gemini-pro:streamGenerateContent)
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationRequest'
+    responses:
+      '200':
+        description: Successful streaming response
+        content:
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+embed-content:
+  post:
+    operationId: geminiEmbedContent
+    summary: Embed content (Gemini format)
+    description: |
+      Creates embeddings using Google Gemini API format.
+    tags:
+    - GenAI Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model name with action (e.g., embedding-001:embedContent)
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiEmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiEmbeddingResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+count-tokens:
+  post:
+    operationId: geminiCountTokens
+    summary: Count tokens (Gemini format)
+    description: |
+      Counts tokens using Google Gemini API format.
+    tags:
+    - GenAI Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model name with action (e.g., gemini-pro:countTokens)
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiCountTokensRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiCountTokensResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+image-generation:
+  post:
+    operationId: geminiGenerateImage
+    summary: Generate image (Gemini format)
+    description: |
+      For Imagen models, use the `:predict` suffix (e.g., `imagen-3.0-generate-001:predict`).
+      For Gemini models, use `:generateContent` with `generationConfig.responseModalities: ["IMAGE"]` in the request body.
+    tags:
+    - GenAI Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: |
+        Model name with action suffix. For Imagen models, use `:predict` (e.g., `imagen-3.0-generate-001:predict`).
+        For Gemini models with image generation, use `:generateContent` (e.g., `gemini-1.5-pro:generateContent`).
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationRequest'
+    responses:
+      '200':
+        description: |
+          Successful response. Returns JSON with generated image data in `candidates[0].content.parts[0].inlineData`.
+          When streaming, events are sent via Server-Sent Events (SSE).
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/genai/models.yaml
+++ b/docs/openapi/paths/integrations/genai/models.yaml
@@ -0,0 +1,45 @@
+# Google GenAI (Gemini) - Models Endpoints
+
+models:
+  get:
+    operationId: geminiListModels
+    summary: List models (Gemini format)
+    description: |
+      Lists available models in Google Gemini API format.
+    tags:
+    - GenAI Integration
+    parameters:
+    - name: pageSize
+      in: query
+      schema:
+        type: integer
+      description: Maximum number of models to return
+    - name: pageToken
+      in: query
+      schema:
+        type: string
+      description: Page token for pagination
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiListModelsResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/langchain/anthropic.yaml
+++ b/docs/openapi/paths/integrations/langchain/anthropic.yaml
@@ -0,0 +1,83 @@
+# LangChain - Anthropic-compatible Endpoints
+# Reuses Anthropic integration schemas
+
+messages:
+  post:
+    operationId: langchainAnthropicMessages
+    summary: Create message (LangChain - Anthropic format)
+    description: |
+      Creates a message using Anthropic-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicMessageRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicMessageResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+count-tokens:
+  post:
+    operationId: langchainAnthropicCountTokens
+    summary: Count tokens (LangChain - Anthropic format)
+    description: |
+      Counts tokens using Anthropic-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/anthropic/count-tokens.yaml#/AnthropicCountTokensRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/count-tokens.yaml#/AnthropicCountTokensResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/langchain/bedrock.yaml
+++ b/docs/openapi/paths/integrations/langchain/bedrock.yaml
@@ -0,0 +1,94 @@
+# LangChain - Bedrock-compatible Endpoints
+# Reuses Bedrock integration schemas
+
+converse:
+  post:
+    operationId: langchainBedrockConverse
+    summary: Converse with model (LangChain - Bedrock format)
+    description: |
+      Sends messages using AWS Bedrock Converse-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    parameters:
+    - name: modelId
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model ID
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+converse-stream:
+  post:
+    operationId: langchainBedrockConverseStream
+    summary: Stream converse with model (LangChain - Bedrock format)
+    description: |
+      Streams messages using AWS Bedrock Converse-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    parameters:
+    - name: modelId
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model ID
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseRequest'
+    responses:
+      '200':
+        description: Successful streaming response
+        content:
+          application/x-amz-eventstream:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/langchain/cohere.yaml
+++ b/docs/openapi/paths/integrations/langchain/cohere.yaml
@@ -0,0 +1,122 @@
+# LangChain - Cohere-compatible Endpoints
+# Reuses Cohere integration schemas
+
+chat:
+  post:
+    operationId: langchainCohereChat
+    summary: Chat with model (LangChain - Cohere format)
+    description: |
+      Sends a chat request using Cohere-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+embed:
+  post:
+    operationId: langchainCohereEmbed
+    summary: Create embeddings (LangChain - Cohere format)
+    description: |
+      Creates embeddings using Cohere-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/embed.yaml#/CohereEmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/embed.yaml#/CohereEmbeddingResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+tokenize:
+  post:
+    operationId: langchainCohereTokenize
+    summary: Tokenize text (LangChain - Cohere format)
+    description: |
+      Tokenizes text using Cohere-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/tokenize.yaml#/CohereCountTokensRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/tokenize.yaml#/CohereCountTokensResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/langchain/genai.yaml
+++ b/docs/openapi/paths/integrations/langchain/genai.yaml
@@ -0,0 +1,138 @@
+# LangChain - GenAI (Gemini) compatible Endpoints
+# Reuses GenAI integration schemas
+
+models:
+  get:
+    operationId: langchainGeminiListModels
+    summary: List models (LangChain - Gemini format)
+    description: |
+      Lists available models in Google Gemini API format via LangChain.
+    tags:
+    - LangChain Integration
+    parameters:
+    - name: pageSize
+      in: query
+      schema:
+        type: integer
+      description: Maximum number of models to return
+    - name: pageToken
+      in: query
+      schema:
+        type: string
+      description: Page token for pagination
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiListModelsResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+generate-content:
+  post:
+    operationId: langchainGeminiGenerateContent
+    summary: Generate content (LangChain - Gemini format)
+    description: |
+      Generates content using Google Gemini-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model name with action
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+stream-generate-content:
+  post:
+    operationId: langchainGeminiStreamGenerateContent
+    summary: Stream generate content (LangChain - Gemini format)
+    description: |
+      Streams content generation using Google Gemini-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model name with action
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationRequest'
+    responses:
+      '200':
+        description: Successful streaming response
+        content:
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/langchain/openai.yaml
+++ b/docs/openapi/paths/integrations/langchain/openai.yaml
@@ -0,0 +1,262 @@
+# LangChain - OpenAI-compatible Endpoints
+# Reuses OpenAI integration schemas
+
+text-completions:
+  post:
+    operationId: langchainOpenAITextCompletions
+    summary: Text completions (LangChain - OpenAI format)
+    description: |
+      Creates a text completion using OpenAI-compatible format via LangChain.
+      This is the legacy completions API.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+chat-completions:
+  post:
+    operationId: langchainOpenAIChatCompletions
+    summary: Chat completions (LangChain - OpenAI format)
+    description: |
+      Creates a chat completion using OpenAI-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/chat.yaml#/OpenAIChatRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/chat.yaml#/ChatCompletionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/inference/chat.yaml#/ChatCompletionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+embeddings:
+  post:
+    operationId: langchainOpenAIEmbeddings
+    summary: Create embeddings (LangChain - OpenAI format)
+    description: |
+      Creates embeddings using OpenAI-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/embeddings.yaml#/OpenAIEmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/embeddings.yaml#/EmbeddingResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+models:
+  get:
+    operationId: langchainOpenAIListModels
+    summary: List models (LangChain - OpenAI format)
+    description: |
+      Lists available models using OpenAI-compatible format via LangChain.
+    tags:
+    - LangChain Integration
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/common.yaml#/OpenAIListModelsResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+responses:
+  post:
+    operationId: langchainOpenAIResponses
+    summary: Create response (LangChain - OpenAI Responses API)
+    description: |
+      Creates a response using OpenAI Responses API format via LangChain.
+      Supports streaming via SSE.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+responses-input-tokens:
+  post:
+    operationId: langchainOpenAICountInputTokens
+    summary: Count input tokens (LangChain - OpenAI format)
+    description: |
+      Counts the number of tokens in a Responses API request via LangChain.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/count-tokens.yaml#/CountTokensResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+speech:
+  post:
+    operationId: langchainOpenAISpeech
+    summary: Create speech (LangChain - OpenAI TTS)
+    description: |
+      Generates audio from text using OpenAI TTS via LangChain.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAISpeechRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          audio/mpeg:
+            schema:
+              type: string
+              format: binary
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAISpeechStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+transcriptions:
+  post:
+    operationId: langchainOpenAITranscriptions
+    summary: Create transcription (LangChain - OpenAI Whisper)
+    description: |
+      Transcribes audio into text using OpenAI Whisper via LangChain.
+    tags:
+    - LangChain Integration
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/litellm/anthropic.yaml
+++ b/docs/openapi/paths/integrations/litellm/anthropic.yaml
@@ -0,0 +1,44 @@
+# LiteLLM - Anthropic-compatible Endpoints
+# Reuses Anthropic integration schemas
+
+messages:
+  post:
+    operationId: litellmAnthropicMessages
+    summary: Create message (LiteLLM - Anthropic format)
+    description: |
+      Creates a message using Anthropic-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicMessageRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicMessageResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/litellm/bedrock.yaml
+++ b/docs/openapi/paths/integrations/litellm/bedrock.yaml
@@ -0,0 +1,94 @@
+# LiteLLM - Bedrock-compatible Endpoints
+# Reuses Bedrock integration schemas
+
+converse:
+  post:
+    operationId: litellmBedrockConverse
+    summary: Converse with model (LiteLLM - Bedrock format)
+    description: |
+      Sends messages using AWS Bedrock Converse-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    parameters:
+    - name: modelId
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model ID
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+converse-stream:
+  post:
+    operationId: litellmBedrockConverseStream
+    summary: Stream converse with model (LiteLLM - Bedrock format)
+    description: |
+      Streams messages using AWS Bedrock Converse-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    parameters:
+    - name: modelId
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model ID
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseRequest'
+    responses:
+      '200':
+        description: Successful streaming response
+        content:
+          application/x-amz-eventstream:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/litellm/cohere.yaml
+++ b/docs/openapi/paths/integrations/litellm/cohere.yaml
@@ -0,0 +1,122 @@
+# LiteLLM - Cohere-compatible Endpoints
+# Reuses Cohere integration schemas
+
+chat:
+  post:
+    operationId: litellmCohereChat
+    summary: Chat with model (LiteLLM - Cohere format)
+    description: |
+      Sends a chat request using Cohere-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+embed:
+  post:
+    operationId: litellmCohereEmbed
+    summary: Create embeddings (LiteLLM - Cohere format)
+    description: |
+      Creates embeddings using Cohere-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/embed.yaml#/CohereEmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/embed.yaml#/CohereEmbeddingResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+tokenize:
+  post:
+    operationId: litellmCohereTokenize
+    summary: Tokenize text (LiteLLM - Cohere format)
+    description: |
+      Tokenizes text using Cohere-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/tokenize.yaml#/CohereCountTokensRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/tokenize.yaml#/CohereCountTokensResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/litellm/genai.yaml
+++ b/docs/openapi/paths/integrations/litellm/genai.yaml
@@ -0,0 +1,138 @@
+# LiteLLM - GenAI (Gemini) compatible Endpoints
+# Reuses GenAI integration schemas
+
+models:
+  get:
+    operationId: litellmGeminiListModels
+    summary: List models (LiteLLM - Gemini format)
+    description: |
+      Lists available models in Google Gemini API format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    parameters:
+    - name: pageSize
+      in: query
+      schema:
+        type: integer
+      description: Maximum number of models to return
+    - name: pageToken
+      in: query
+      schema:
+        type: string
+      description: Page token for pagination
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiListModelsResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+generate-content:
+  post:
+    operationId: litellmGeminiGenerateContent
+    summary: Generate content (LiteLLM - Gemini format)
+    description: |
+      Generates content using Google Gemini-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model name with action
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+stream-generate-content:
+  post:
+    operationId: litellmGeminiStreamGenerateContent
+    summary: Stream generate content (LiteLLM - Gemini format)
+    description: |
+      Streams content generation using Google Gemini-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model name with action
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationRequest'
+    responses:
+      '200':
+        description: Successful streaming response
+        content:
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/litellm/openai.yaml
+++ b/docs/openapi/paths/integrations/litellm/openai.yaml
@@ -0,0 +1,262 @@
+# LiteLLM - OpenAI-compatible Endpoints
+# Reuses OpenAI integration schemas
+
+text-completions:
+  post:
+    operationId: litellmOpenAITextCompletions
+    summary: Text completions (LiteLLM - OpenAI format)
+    description: |
+      Creates a text completion using OpenAI-compatible format via LiteLLM.
+      This is the legacy completions API.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+chat-completions:
+  post:
+    operationId: litellmOpenAIChatCompletions
+    summary: Chat completions (LiteLLM - OpenAI format)
+    description: |
+      Creates a chat completion using OpenAI-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/chat.yaml#/OpenAIChatRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/chat.yaml#/ChatCompletionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/inference/chat.yaml#/ChatCompletionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+embeddings:
+  post:
+    operationId: litellmOpenAIEmbeddings
+    summary: Create embeddings (LiteLLM - OpenAI format)
+    description: |
+      Creates embeddings using OpenAI-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/embeddings.yaml#/OpenAIEmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/embeddings.yaml#/EmbeddingResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+models:
+  get:
+    operationId: litellmOpenAIListModels
+    summary: List models (LiteLLM - OpenAI format)
+    description: |
+      Lists available models using OpenAI-compatible format via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/common.yaml#/OpenAIListModelsResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+responses:
+  post:
+    operationId: litellmOpenAIResponses
+    summary: Create response (LiteLLM - OpenAI Responses API)
+    description: |
+      Creates a response using OpenAI Responses API format via LiteLLM.
+      Supports streaming via SSE.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+responses-input-tokens:
+  post:
+    operationId: litellmOpenAICountInputTokens
+    summary: Count input tokens (LiteLLM - OpenAI format)
+    description: |
+      Counts the number of tokens in a Responses API request via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/count-tokens.yaml#/CountTokensResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+speech:
+  post:
+    operationId: litellmOpenAISpeech
+    summary: Create speech (LiteLLM - OpenAI TTS)
+    description: |
+      Generates audio from text using OpenAI TTS via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAISpeechRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          audio/mpeg:
+            schema:
+              type: string
+              format: binary
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAISpeechStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+transcriptions:
+  post:
+    operationId: litellmOpenAITranscriptions
+    summary: Create transcription (LiteLLM - OpenAI Whisper)
+    description: |
+      Transcribes audio into text using OpenAI Whisper via LiteLLM.
+    tags:
+    - LiteLLM Integration
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/openai/audio.yaml
+++ b/docs/openapi/paths/integrations/openai/audio.yaml
@@ -0,0 +1,188 @@
+# OpenAI Integration - Audio Endpoints (Speech and Transcription)
+
+speech:
+  post:
+    operationId: openaiCreateSpeech
+    summary: Create speech (OpenAI TTS)
+    description: |
+      Generates audio from text using OpenAI TTS.
+      Supports streaming via SSE when stream_format is set to 'sse'.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/audio/speech`).
+    tags:
+    - OpenAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAISpeechRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          audio/mpeg:
+            schema:
+              type: string
+              format: binary
+          audio/opus:
+            schema:
+              type: string
+              format: binary
+          audio/aac:
+            schema:
+              type: string
+              format: binary
+          audio/flac:
+            schema:
+              type: string
+              format: binary
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAISpeechStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+azure-speech:
+  post:
+    operationId: azureCreateSpeech
+    summary: Create speech (Azure OpenAI TTS)
+    tags:
+    - OpenAI Integration
+    - Azure Integration
+    parameters:
+    - name: deployment-id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Azure deployment ID
+    - name: api-version
+      in: query
+      schema:
+        type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAISpeechRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          audio/mpeg:
+            schema:
+              type: string
+              format: binary
+          audio/opus:
+            schema:
+              type: string
+              format: binary
+          audio/aac:
+            schema:
+              type: string
+              format: binary
+          audio/flac:
+            schema:
+              type: string
+              format: binary
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAISpeechStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+transcriptions:
+  post:
+    operationId: openaiCreateTranscription
+    summary: Create transcription (OpenAI Whisper)
+    description: |
+      Transcribes audio into text using OpenAI Whisper.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/audio/transcriptions`).
+    tags:
+    - OpenAI Integration
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+azure-transcriptions:
+  post:
+    operationId: azureCreateTranscription
+    summary: Create transcription (Azure OpenAI)
+    tags:
+    - OpenAI Integration
+    - Azure Integration
+    parameters:
+    - name: deployment-id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Azure deployment ID
+    - name: api-version
+      in: query
+      schema:
+        type: string
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/openai/batch.yaml
+++ b/docs/openapi/paths/integrations/openai/batch.yaml
@@ -0,0 +1,154 @@
+# OpenAI Integration - Batch API Endpoints
+
+batches:
+  post:
+    operationId: openaiCreateBatch
+    summary: Create batch job (OpenAI format)
+    description: |
+      Creates a batch processing job.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/batches`).
+    tags:
+    - OpenAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/batch.yaml#/OpenAIBatchCreateRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/batch.yaml#/OpenAIBatchCreateResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: openaiListBatches
+    summary: List batch jobs (OpenAI format)
+    description: |
+      Lists batch processing jobs.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/batches`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: limit
+      in: query
+      schema:
+        type: integer
+        default: 30
+      description: Maximum number of batches to return
+    - name: after
+      in: query
+      schema:
+        type: string
+      description: Cursor for pagination
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Filter by provider
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/batch.yaml#/OpenAIBatchListResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+batches-by-id:
+  get:
+    operationId: openaiRetrieveBatch
+    summary: Retrieve batch job (OpenAI format)
+    description: |
+      Retrieves details of a batch processing job.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/batches/{batch_id}`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: batch_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Batch job ID
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the batch
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/batch.yaml#/OpenAIBatchRetrieveResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+batches-cancel:
+  post:
+    operationId: openaiCancelBatch
+    summary: Cancel batch job (OpenAI format)
+    description: |
+      Cancels a batch processing job.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/batches/{batch_id}/cancel`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: batch_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Batch job ID to cancel
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the batch
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/batch.yaml#/OpenAIBatchCancelResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/openai/chat.yaml
+++ b/docs/openapi/paths/integrations/openai/chat.yaml
@@ -0,0 +1,111 @@
+# OpenAI Integration - Chat Completions Endpoints
+
+chat-completions:
+  post:
+    operationId: openaiCreateChatCompletion
+    summary: Create chat completion (OpenAI format)
+    description: |
+      Creates a chat completion using OpenAI-compatible format.
+      Supports streaming via SSE.
+
+      **Async inference:** Send `x-bf-async: true` to submit the request as a background job and receive a job ID immediately. Poll with `x-bf-async-id: <job-id>` to retrieve the result. When the job is still processing, the response will have an empty `choices` array. When completed, `choices` will contain the full result. See [Async Inference](/features/async-inference) for details.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/chat/completions`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: x-bf-async
+      in: header
+      required: false
+      schema:
+        type: string
+        enum: ["true"]
+      description: Set to `true` to submit this request as an async job. Returns immediately with a job ID. Not 
+        compatible with streaming.
+    - name: x-bf-async-id
+      in: header
+      required: false
+      schema:
+        type: string
+      description: Poll for results of a previously submitted async job by providing the job ID returned from the 
+        initial async request.
+    - name: x-bf-async-job-result-ttl
+      in: header
+      required: false
+      schema:
+        type: integer
+        default: 3600
+      description: Override the default result TTL in seconds. Results expire after this duration from completion time.
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/chat.yaml#/OpenAIChatRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/chat.yaml#/OpenAIChatResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/chat.yaml#/OpenAIChatStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+# Azure deployment path
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+azure-chat-completions:
+  post:
+    operationId: azureCreateChatCompletion
+    summary: Create chat completion (Azure OpenAI)
+    description: |
+      Creates a chat completion using Azure OpenAI deployment.
+    tags:
+    - OpenAI Integration
+    - Azure Integration
+    parameters:
+    - name: deployment-id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Azure deployment ID
+    - name: api-version
+      in: query
+      schema:
+        type: string
+      description: Azure API version
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/chat.yaml#/OpenAIChatRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/chat.yaml#/OpenAIChatResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/chat.yaml#/OpenAIChatStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/openai/containers.yaml
+++ b/docs/openapi/paths/integrations/openai/containers.yaml
@@ -0,0 +1,397 @@
+# OpenAI Integration - Containers API Endpoints
+
+containers:
+  post:
+    operationId: openaiCreateContainer
+    summary: Create container (OpenAI format)
+    description: |
+      Creates a new container for storing files and data.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/containers`).
+    tags:
+    - OpenAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/inference/containers.yaml#/ContainerCreateRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/containers.yaml#/ContainerCreateResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: openaiListContainers
+    summary: List containers (OpenAI format)
+    description: |
+      Lists containers for a provider.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/containers`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider to list containers for (defaults to openai)
+    - name: limit
+      in: query
+      schema:
+        type: integer
+      description: Maximum containers to return
+    - name: after
+      in: query
+      schema:
+        type: string
+      description: Cursor for pagination
+    - name: order
+      in: query
+      schema:
+        type: string
+        enum: [asc, desc]
+      description: Sort order
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/containers.yaml#/ContainerListResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+containers-by-id:
+  get:
+    operationId: openaiRetrieveContainer
+    summary: Retrieve container (OpenAI format)
+    description: |
+      Retrieves a specific container by ID.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/containers/{container_id}`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Container ID
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the container (defaults to openai)
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/containers.yaml#/ContainerRetrieveResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  delete:
+    operationId: openaiDeleteContainer
+    summary: Delete container (OpenAI format)
+    description: |
+      Deletes a container.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/containers/{container_id}`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Container ID to delete
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the container (defaults to openai)
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/containers.yaml#/ContainerDeleteResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+# =============================================================================
+# CONTAINER FILES ENDPOINTS
+# =============================================================================
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+container-files:
+  post:
+    operationId: openaiCreateContainerFile
+    summary: Create file in container (OpenAI format)
+    description: |
+      Creates a new file in a container. You can either upload file content directly
+      via multipart/form-data or reference an existing file by its ID.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/containers/{container_id}/files`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Container ID
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the container (defaults to openai)
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../../schemas/inference/containers.yaml#/ContainerFileCreateMultipartRequest'
+        application/json:
+          schema:
+            $ref: '../../../schemas/inference/containers.yaml#/ContainerFileCreateJsonRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/containers.yaml#/ContainerFileCreateResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: openaiListContainerFiles
+    summary: List files in container (OpenAI format)
+    description: |
+      Lists all files in a container.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/containers/{container_id}/files`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Container ID
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the container (defaults to openai)
+    - name: limit
+      in: query
+      schema:
+        type: integer
+      description: Maximum files to return
+    - name: after
+      in: query
+      schema:
+        type: string
+      description: Cursor for pagination
+    - name: order
+      in: query
+      schema:
+        type: string
+        enum: [asc, desc]
+      description: Sort order
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/containers.yaml#/ContainerFileListResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+container-files-by-id:
+  get:
+    operationId: openaiRetrieveContainerFile
+    summary: Retrieve file from container (OpenAI format)
+    description: |
+      Retrieves metadata for a specific file in a container.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/containers/{container_id}/files/{file_id}`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Container ID
+    - name: file_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: File ID
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the container (defaults to openai)
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/containers.yaml#/ContainerFileRetrieveResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  delete:
+    operationId: openaiDeleteContainerFile
+    summary: Delete file from container (OpenAI format)
+    description: |
+      Deletes a file from a container.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/containers/{container_id}/files/{file_id}`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Container ID
+    - name: file_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: File ID to delete
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the container (defaults to openai)
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/containers.yaml#/ContainerFileDeleteResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+container-files-content:
+  get:
+    operationId: openaiGetContainerFileContent
+    summary: Get file content from container (OpenAI format)
+    description: |
+      Downloads the content of a file from a container.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/containers/{container_id}/files/{file_id}/content`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: container_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Container ID
+    - name: file_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: File ID
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the container (defaults to openai)
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/octet-stream:
+            schema:
+              type: string
+              format: binary
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/openai/embeddings.yaml
+++ b/docs/openapi/paths/integrations/openai/embeddings.yaml
@@ -0,0 +1,75 @@
+# OpenAI Integration - Embeddings Endpoints
+
+embeddings:
+  post:
+    operationId: openaiCreateEmbedding
+    summary: Create embeddings (OpenAI format)
+    description: |
+      Creates embedding vectors for the input text.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/embeddings`).
+    tags:
+    - OpenAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/embeddings.yaml#/OpenAIEmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/embeddings.yaml#/OpenAIEmbeddingResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+azure-embeddings:
+  post:
+    operationId: azureCreateEmbedding
+    summary: Create embeddings (Azure OpenAI)
+    tags:
+    - OpenAI Integration
+    - Azure Integration
+    parameters:
+    - name: deployment-id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Azure deployment ID
+    - name: api-version
+      in: query
+      schema:
+        type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/embeddings.yaml#/OpenAIEmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/embeddings.yaml#/OpenAIEmbeddingResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/openai/files.yaml
+++ b/docs/openapi/paths/integrations/openai/files.yaml
@@ -0,0 +1,201 @@
+# OpenAI Integration - Files API Endpoints
+
+files:
+  post:
+    operationId: openaiUploadFile
+    summary: Upload file (OpenAI format)
+    description: |
+      Uploads a file for use with batch processing or other features.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/files`).
+    tags:
+    - OpenAI Integration
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../../schemas/integrations/openai/files.yaml#/OpenAIFileUploadRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/files.yaml#/OpenAIFileUploadResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  get:
+    operationId: openaiListFiles
+    summary: List files (OpenAI format)
+    description: |
+      Lists uploaded files.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/files`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: purpose
+      in: query
+      schema:
+        type: string
+      description: Filter by purpose
+    - name: limit
+      in: query
+      schema:
+        type: integer
+      description: Maximum files to return
+    - name: after
+      in: query
+      schema:
+        type: string
+      description: Cursor for pagination
+    - name: order
+      in: query
+      schema:
+        type: string
+        enum: [asc, desc]
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Filter by provider
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/files.yaml#/OpenAIFileListResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+files-by-id:
+  get:
+    operationId: openaiRetrieveFile
+    summary: Retrieve file metadata (OpenAI format)
+    description: |
+      Retrieves metadata for an uploaded file.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/files/{file_id}`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: file_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: File ID
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the file
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/files.yaml#/OpenAIFileRetrieveResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+  delete:
+    operationId: openaiDeleteFile
+    summary: Delete file (OpenAI format)
+    description: |
+      Deletes an uploaded file.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/files/{file_id}`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: file_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: File ID to delete
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the file
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/files.yaml#/OpenAIFileDeleteResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+files-content:
+  get:
+    operationId: openaiGetFileContent
+    summary: Get file content (OpenAI format)
+    description: |
+      Retrieves the content of an uploaded file.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/files/{file_id}/content`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: file_id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: File ID
+    - name: provider
+      in: query
+      schema:
+        type: string
+      description: Provider for the file
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/octet-stream:
+            schema:
+              type: string
+              format: binary
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/openai/images.yaml
+++ b/docs/openapi/paths/integrations/openai/images.yaml
@@ -0,0 +1,88 @@
+# OpenAI Integration - Image Generation Endpoints
+
+image-generation:
+  post:
+    operationId: openaiCreateImage
+    summary: Create image
+    description: |
+      Generates images from text prompts using OpenAI-compatible format.
+
+      **Note:** Azure OpenAI deployments are also supported via the Azure integration endpoint.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/images/generations`).
+    tags:
+    - OpenAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/images.yaml#/OpenAIImageGenerationRequest'
+    responses:
+      '200':
+        description: |
+          Successful response. Returns JSON for non-streaming requests, or Server-Sent Events (SSE) stream when `stream=true`.
+          When streaming, each event contains a chunk of the image as base64 data, with the final event having type `image_generation.completed`.
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/images.yaml#/OpenAIImageGenerationResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/images.yaml#/OpenAIImageStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+azure-image-generation:
+  post:
+    operationId: azureCreateImage
+    summary: Create image (Azure OpenAI)
+    description: |
+      Generates images from text prompts using Azure OpenAI deployment.
+    tags:
+    - OpenAI Integration
+    - Azure Integration
+    parameters:
+    - name: deployment-id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Azure deployment ID
+    - name: api-version
+      in: query
+      schema:
+        type: string
+      description: Azure API version
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/images.yaml#/OpenAIImageGenerationRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/images.yaml#/OpenAIImageGenerationResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/images.yaml#/OpenAIImageStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/openai/models.yaml
+++ b/docs/openapi/paths/integrations/openai/models.yaml
@@ -0,0 +1,63 @@
+# OpenAI Integration - Models Endpoints
+
+models:
+  get:
+    operationId: openaiListModels
+    summary: List models (OpenAI format)
+    description: |
+      Lists available models in OpenAI format.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/models`).
+    tags:
+    - OpenAI Integration
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/common.yaml#/OpenAIListModelsResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+azure-models:
+  get:
+    operationId: azureListModels
+    summary: List models (Azure OpenAI)
+    tags:
+    - OpenAI Integration
+    - Azure Integration
+    parameters:
+    - name: deployment-id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Azure deployment ID
+    - name: api-version
+      in: query
+      schema:
+        type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/common.yaml#/OpenAIListModelsResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/openai/responses.yaml
+++ b/docs/openapi/paths/integrations/openai/responses.yaml
@@ -0,0 +1,139 @@
+# OpenAI Integration - Responses API Endpoints
+
+responses:
+  post:
+    operationId: openaiCreateResponse
+    summary: Create response (OpenAI Responses API)
+    description: |
+      Creates a response using OpenAI Responses API format.
+      Supports streaming via SSE.
+
+      **Async inference:** Send `x-bf-async: true` to submit the request as a background job and receive a job ID immediately. Poll with `x-bf-async-id: <job-id>` to retrieve the result. When the job is still processing, the response `status` will not be `completed`. When completed, the full response with `output_text` will be returned. See [Async Inference](/features/async-inference) for details.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/responses`).
+    tags:
+    - OpenAI Integration
+    parameters:
+    - name: x-bf-async
+      in: header
+      required: false
+      schema:
+        type: string
+        enum: ["true"]
+      description: Set to `true` to submit this request as an async job. Returns immediately with a job ID. Not 
+        compatible with streaming.
+    - name: x-bf-async-id
+      in: header
+      required: false
+      schema:
+        type: string
+      description: Poll for results of a previously submitted async job by providing the job ID returned from the 
+        initial async request.
+    - name: x-bf-async-job-result-ttl
+      in: header
+      required: false
+      schema:
+        type: integer
+        default: 3600
+      description: Override the default result TTL in seconds. Results expire after this duration from completion time.
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+azure-responses:
+  post:
+    operationId: azureCreateResponse
+    summary: Create response (Azure OpenAI)
+    tags:
+    - OpenAI Integration
+    - Azure Integration
+    parameters:
+    - name: deployment-id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Azure deployment ID
+    - name: api-version
+      in: query
+      schema:
+        type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+# Input tokens endpoint (count tokens for Responses API format)
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+responses-input-tokens:
+  post:
+    operationId: openaiCountInputTokens
+    summary: Count input tokens
+    description: |
+      Counts the number of tokens in a Responses API request.
+    tags:
+    - OpenAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/count-tokens.yaml#/CountTokensResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/openai/text.yaml
+++ b/docs/openapi/paths/integrations/openai/text.yaml
@@ -0,0 +1,82 @@
+# OpenAI Integration - Text Completions Endpoints (Legacy)
+
+text-completions:
+  post:
+    operationId: openaiCreateTextCompletion
+    summary: Create text completion (OpenAI format)
+    description: |
+      Creates a text completion using OpenAI-compatible format.
+      This is the legacy completions API.
+
+      **Note:** This endpoint also works without the `/v1` prefix (e.g., `/openai/completions`).
+    tags:
+    - OpenAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+azure-text-completions:
+  post:
+    operationId: azureCreateTextCompletion
+    summary: Create text completion (Azure OpenAI)
+    tags:
+    - OpenAI Integration
+    - Azure Integration
+    parameters:
+    - name: deployment-id
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Azure deployment ID
+    - name: api-version
+      in: query
+      schema:
+        type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/pydanticai/anthropic.yaml
+++ b/docs/openapi/paths/integrations/pydanticai/anthropic.yaml
@@ -0,0 +1,44 @@
+# PydanticAI - Anthropic-compatible Endpoints
+# Reuses Anthropic integration schemas
+
+messages:
+  post:
+    operationId: pydanticaiAnthropicMessages
+    summary: Create message (PydanticAI - Anthropic format)
+    description: |
+      Creates a message using Anthropic-compatible format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicMessageRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicMessageResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/messages.yaml#/AnthropicStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/anthropic/common.yaml#/AnthropicError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/pydanticai/bedrock.yaml
+++ b/docs/openapi/paths/integrations/pydanticai/bedrock.yaml
@@ -0,0 +1,94 @@
+# PydanticAI - Bedrock-compatible Endpoints
+# Reuses Bedrock integration schemas
+
+converse:
+  post:
+    operationId: pydanticaiBedrockConverse
+    summary: Converse with model (PydanticAI - Bedrock format)
+    description: |
+      Sends messages using AWS Bedrock Converse-compatible format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    parameters:
+    - name: modelId
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model ID
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+converse-stream:
+  post:
+    operationId: pydanticaiBedrockConverseStream
+    summary: Stream converse with model (PydanticAI - Bedrock format)
+    description: |
+      Streams messages using AWS Bedrock Converse-compatible format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    parameters:
+    - name: modelId
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model ID
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockConverseRequest'
+    responses:
+      '200':
+        description: Successful streaming response
+        content:
+          application/x-amz-eventstream:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/converse.yaml#/BedrockStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/bedrock/common.yaml#/BedrockError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/pydanticai/cohere.yaml
+++ b/docs/openapi/paths/integrations/pydanticai/cohere.yaml
@@ -0,0 +1,122 @@
+# PydanticAI - Cohere-compatible Endpoints
+# Reuses Cohere integration schemas
+
+chat:
+  post:
+    operationId: pydanticaiCohereChat
+    summary: Chat with model (PydanticAI - Cohere format)
+    description: |
+      Sends a chat request using Cohere-compatible format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/chat.yaml#/CohereChatStreamEvent'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+embed:
+  post:
+    operationId: pydanticaiCohereEmbed
+    summary: Create embeddings (PydanticAI - Cohere format)
+    description: |
+      Creates embeddings using Cohere-compatible format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/embed.yaml#/CohereEmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/embed.yaml#/CohereEmbeddingResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+tokenize:
+  post:
+    operationId: pydanticaiCohereTokenize
+    summary: Tokenize text (PydanticAI - Cohere format)
+    description: |
+      Tokenizes text using Cohere v1 API format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/cohere/tokenize.yaml#/CohereCountTokensRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/tokenize.yaml#/CohereCountTokensResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/cohere/common.yaml#/CohereError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/pydanticai/genai.yaml
+++ b/docs/openapi/paths/integrations/pydanticai/genai.yaml
@@ -0,0 +1,138 @@
+# PydanticAI - GenAI (Gemini) compatible Endpoints
+# Reuses GenAI integration schemas
+
+models:
+  get:
+    operationId: pydanticaiGeminiListModels
+    summary: List models (PydanticAI - Gemini format)
+    description: |
+      Lists available models in Google Gemini API format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    parameters:
+    - name: pageSize
+      in: query
+      schema:
+        type: integer
+      description: Maximum number of models to return
+    - name: pageToken
+      in: query
+      schema:
+        type: string
+      description: Page token for pagination
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiListModelsResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+generate-content:
+  post:
+    operationId: pydanticaiGeminiGenerateContent
+    summary: Generate content (PydanticAI - Gemini format)
+    description: |
+      Generates content using Google Gemini-compatible format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model name with action
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+stream-generate-content:
+  post:
+    operationId: pydanticaiGeminiStreamGenerateContent
+    summary: Stream generate content (PydanticAI - Gemini format)
+    description: |
+      Streams content generation using Google Gemini-compatible format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    parameters:
+    - name: model
+      in: path
+      required: true
+      schema:
+        type: string
+      description: Model name with action
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationRequest'
+    responses:
+      '200':
+        description: Successful streaming response
+        content:
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/genai/generation.yaml#/GeminiGenerationResponse'
+      '400':
+        description: Bad request
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+      '500':
+        description: Internal server error
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/genai/common.yaml#/GeminiError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/integrations/pydanticai/openai.yaml
+++ b/docs/openapi/paths/integrations/pydanticai/openai.yaml
@@ -0,0 +1,262 @@
+# PydanticAI - OpenAI-compatible Endpoints
+# Reuses OpenAI integration schemas
+
+text-completions:
+  post:
+    operationId: pydanticaiOpenAITextCompletions
+    summary: Text completions (PydanticAI - OpenAI format)
+    description: |
+      Creates a text completion using OpenAI-compatible format via PydanticAI.
+      This is the legacy completions API.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/text.yaml#/OpenAITextCompletionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+chat-completions:
+  post:
+    operationId: pydanticaiOpenAIChatCompletions
+    summary: Chat completions (PydanticAI - OpenAI format)
+    description: |
+      Creates a chat completion using OpenAI-compatible format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/chat.yaml#/OpenAIChatRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/chat.yaml#/ChatCompletionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/inference/chat.yaml#/ChatCompletionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+embeddings:
+  post:
+    operationId: pydanticaiOpenAIEmbeddings
+    summary: Create embeddings (PydanticAI - OpenAI format)
+    description: |
+      Creates embeddings using OpenAI-compatible format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/embeddings.yaml#/OpenAIEmbeddingRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/embeddings.yaml#/EmbeddingResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+models:
+  get:
+    operationId: pydanticaiOpenAIListModels
+    summary: List models (PydanticAI - OpenAI format)
+    description: |
+      Lists available models using OpenAI-compatible format via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/common.yaml#/OpenAIListModelsResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+responses:
+  post:
+    operationId: pydanticaiOpenAIResponses
+    summary: Create response (PydanticAI - OpenAI Responses API)
+    description: |
+      Creates a response using OpenAI Responses API format via PydanticAI.
+      Supports streaming via SSE.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+responses-input-tokens:
+  post:
+    operationId: pydanticaiOpenAICountInputTokens
+    summary: Count input tokens (PydanticAI - OpenAI format)
+    description: |
+      Counts the number of tokens in a Responses API request via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/responses.yaml#/OpenAIResponsesRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/inference/count-tokens.yaml#/CountTokensResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+speech:
+  post:
+    operationId: pydanticaiOpenAISpeech
+    summary: Create speech (PydanticAI - OpenAI TTS)
+    description: |
+      Generates audio from text using OpenAI TTS via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAISpeechRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          audio/mpeg:
+            schema:
+              type: string
+              format: binary
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAISpeechStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
+transcriptions:
+  post:
+    operationId: pydanticaiOpenAITranscriptions
+    summary: Create transcription (PydanticAI - OpenAI Whisper)
+    description: |
+      Transcribes audio into text using OpenAI Whisper via PydanticAI.
+    tags:
+    - PydanticAI Integration
+    requestBody:
+      required: true
+      content:
+        multipart/form-data:
+          schema:
+            $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionRequest'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionResponse'
+          text/event-stream:
+            schema:
+              $ref: '../../../schemas/integrations/openai/audio.yaml#/OpenAITranscriptionStreamResponse'
+      '400':
+        $ref: '../../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../../openapi.yaml#/components/responses/InternalError'
+    security:
+    - BearerAuth: []
+    - BasicAuth: []
+    - VirtualKeyAuth: []
+    - ApiKeyAuth: []
--- a/docs/openapi/paths/management/cache.yaml
+++ b/docs/openapi/paths/management/cache.yaml
@@ -0,0 +1,51 @@
+clear-by-request-id:
+  delete:
+    operationId: clearCacheByRequestId
+    summary: Clear cache by request ID
+    description: Clears cache entries associated with a specific request ID.
+    tags:
+      - Cache
+    parameters:
+      - name: requestId
+        in: path
+        required: true
+        description: Request ID to clear cache for
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Cache cleared successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/cache.yaml#/ClearCacheResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+clear-by-cache-key:
+  delete:
+    operationId: clearCacheByCacheKey
+    summary: Clear cache by cache key
+    description: Clears a cache entry by its direct cache key.
+    tags:
+      - Cache
+    parameters:
+      - name: cacheKey
+        in: path
+        required: true
+        description: Cache key to clear
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Cache cleared successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/cache.yaml#/ClearCacheResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
--- a/docs/openapi/paths/management/config.yaml
+++ b/docs/openapi/paths/management/config.yaml
@@ -0,0 +1,136 @@
+config:
+  get:
+    operationId: getConfig
+    summary: Get configuration
+    description: |
+      Retrieves the current Bifrost configuration including client config, framework config,
+      auth config, and connection status for various stores.
+    tags:
+      - Configuration
+    parameters:
+      - name: from_db
+        in: query
+        description: If true, fetch configuration directly from the database
+        schema:
+          type: string
+          enum: ["true", "false"]
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/config.yaml#/GetConfigResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  put:
+    operationId: updateConfig
+    summary: Update configuration
+    description: |
+      Updates the Bifrost configuration. Supports hot-reloading of certain settings
+      like drop_excess_requests. Some settings may require a restart to take effect.
+    tags:
+      - Configuration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/config.yaml#/UpdateConfigRequest'
+    responses:
+      '200':
+        description: Configuration updated successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/SuccessResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+version:
+  get:
+    operationId: getVersion
+    summary: Get version
+    description: Returns the current Bifrost version information.
+    tags:
+      - Configuration
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/config.yaml#/Version'
+
+proxy-config:
+  get:
+    operationId: getProxyConfig
+    summary: Get proxy configuration
+    description: Retrieves the current global proxy configuration.
+    tags:
+      - Configuration
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/config.yaml#/ProxyConfig'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+      '503':
+        description: Config store not available
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+  put:
+    operationId: updateProxyConfig
+    summary: Update proxy configuration
+    description: Updates the global proxy configuration.
+    tags:
+      - Configuration
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/config.yaml#/ProxyConfig'
+    responses:
+      '200':
+        description: Proxy configuration updated successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/SuccessResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+force-sync-pricing:
+  post:
+    operationId: forceSyncPricing
+    summary: Force pricing sync
+    description: Triggers an immediate pricing sync and resets the pricing sync timer.
+    tags:
+      - Configuration
+    responses:
+      '200':
+        description: Pricing sync triggered successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/SuccessResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+      '503':
+        description: Config store not available
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
--- a/docs/openapi/paths/management/governance.yaml
+++ b/docs/openapi/paths/management/governance.yaml
--- a/docs/openapi/paths/management/health.yaml
+++ b/docs/openapi/paths/management/health.yaml
@@ -0,0 +1,22 @@
+health:
+  get:
+    operationId: getHealth
+    summary: Health check
+    description: |
+      Returns the health status of the Bifrost server. Checks connectivity to config store,
+      log store, and vector store if configured.
+    tags:
+      - Health
+    responses:
+      '200':
+        description: Server is healthy
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/health.yaml#/HealthResponse'
+      '503':
+        description: Service unavailable
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
--- a/docs/openapi/paths/management/infrastructure.yaml
+++ b/docs/openapi/paths/management/infrastructure.yaml
@@ -0,0 +1,122 @@
+# Infrastructure endpoints (WebSocket, MCP Server, Metrics)
+
+websocket:
+  get:
+    operationId: websocketConnect
+    summary: WebSocket connection
+    description: |
+      Upgrades to a WebSocket connection for real-time updates.
+      Server pushes log events, MCP log events, and store update notifications.
+      Heartbeat pings are sent every 30 seconds.
+    tags:
+      - Infrastructure
+    responses:
+      '101':
+        description: WebSocket upgrade successful
+
+mcp-server:
+  post:
+    operationId: mcpServerMessage
+    summary: MCP protocol message
+    description: |
+      Receives a JSON-RPC 2.0 message for the MCP protocol server.
+      Returns a JSON-RPC 2.0 response, or null for notifications.
+    tags:
+      - Infrastructure
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            type: object
+            description: JSON-RPC 2.0 request
+            properties:
+              jsonrpc:
+                type: string
+                enum: ["2.0"]
+              method:
+                type: string
+              params:
+                type: object
+              id:
+                oneOf:
+                  - type: string
+                  - type: integer
+    responses:
+      '200':
+        description: JSON-RPC 2.0 response
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                jsonrpc:
+                  type: string
+                  enum: ["2.0"]
+                result:
+                  type: object
+                error:
+                  type: object
+                  properties:
+                    code:
+                      type: integer
+                    message:
+                      type: string
+                id:
+                  oneOf:
+                    - type: string
+                    - type: integer
+    security:
+      - BearerAuth: []
+      - BasicAuth: []
+      - VirtualKeyAuth: []
+      - ApiKeyAuth: []
+  get:
+    operationId: mcpServerSSE
+    summary: MCP protocol SSE stream
+    description: |
+      Opens a Server-Sent Events stream for the MCP protocol server.
+      Returns `Content-Type: text/event-stream`.
+    tags:
+      - Infrastructure
+    responses:
+      '200':
+        description: SSE stream opened
+        content:
+          text/event-stream:
+            schema:
+              type: string
+    security:
+      - BearerAuth: []
+      - BasicAuth: []
+      - VirtualKeyAuth: []
+      - ApiKeyAuth: []
+
+metrics:
+  get:
+    operationId: getMetrics
+    summary: Prometheus metrics
+    description: Returns Prometheus-formatted metrics for monitoring.
+    tags:
+      - Infrastructure
+    responses:
+      '200':
+        description: Prometheus metrics
+        content:
+          text/plain:
+            schema:
+              type: string
+
+websocket-responses:
+  get:
+    operationId: websocketResponses
+    summary: WebSocket Responses API
+    description: |
+      Upgrades to a WebSocket connection for the streaming Responses API.
+      Clients send `response.create` events and receive streaming response events.
+      Supports authentication via Bearer token, x-api-key, or x-bf-vk headers.
+    tags:
+      - Infrastructure
+    responses:
+      '101':
+        description: WebSocket upgrade successful
--- a/docs/openapi/paths/management/logging.yaml
+++ b/docs/openapi/paths/management/logging.yaml
@@ -0,0 +1,997 @@
+logs:
+  get:
+    operationId: getLogs
+    summary: Get logs
+    description: |
+      Retrieves logs with filtering, search, and pagination via query parameters.
+    tags:
+      - Logging
+    parameters:
+      - name: providers
+        in: query
+        description: Comma-separated list of providers to filter by
+        schema:
+          type: string
+      - name: models
+        in: query
+        description: Comma-separated list of models to filter by
+        schema:
+          type: string
+      - name: status
+        in: query
+        description: Comma-separated list of statuses to filter by
+        schema:
+          type: string
+      - name: objects
+        in: query
+        description: Comma-separated list of object types to filter by
+        schema:
+          type: string
+      - name: selected_key_ids
+        in: query
+        description: Comma-separated list of selected key IDs to filter by
+        schema:
+          type: string
+      - name: virtual_key_ids
+        in: query
+        description: Comma-separated list of virtual key IDs to filter by
+        schema:
+          type: string
+      - name: routing_rule_ids
+        in: query
+        description: Comma-separated list of routing rule IDs to filter by
+        schema:
+          type: string
+      - name: routing_engine_used
+        in: query
+        description: Comma-separated list of routing engines to filter by (routing-rule, governance, or loadbalancing)
+        schema:
+          type: string
+      - name: start_time
+        in: query
+        description: Start time filter (RFC3339 format)
+        schema:
+          type: string
+          format: date-time
+      - name: end_time
+        in: query
+        description: End time filter (RFC3339 format)
+        schema:
+          type: string
+          format: date-time
+      - name: min_latency
+        in: query
+        description: Minimum latency filter
+        schema:
+          type: number
+      - name: max_latency
+        in: query
+        description: Maximum latency filter
+        schema:
+          type: number
+      - name: min_tokens
+        in: query
+        description: Minimum tokens filter
+        schema:
+          type: integer
+      - name: max_tokens
+        in: query
+        description: Maximum tokens filter
+        schema:
+          type: integer
+      - name: min_cost
+        in: query
+        description: Minimum cost filter
+        schema:
+          type: number
+      - name: max_cost
+        in: query
+        description: Maximum cost filter
+        schema:
+          type: number
+      - name: missing_cost_only
+        in: query
+        description: Only show logs with missing cost
+        schema:
+          type: boolean
+      - name: content_search
+        in: query
+        description: Search in request/response content
+        schema:
+          type: string
+      - name: limit
+        in: query
+        description: Number of logs to return (default 50, max 1000)
+        schema:
+          type: integer
+          default: 50
+          maximum: 1000
+      - name: offset
+        in: query
+        description: Number of logs to skip
+        schema:
+          type: integer
+          default: 0
+      - name: sort_by
+        in: query
+        description: Field to sort by
+        schema:
+          type: string
+          enum: [timestamp, latency, tokens, cost]
+          default: timestamp
+      - name: order
+        in: query
+        description: Sort order
+        schema:
+          type: string
+          enum: [asc, desc]
+          default: desc
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/SearchLogsResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: deleteLogs
+    summary: Delete logs
+    description: Deletes logs by their IDs.
+    tags:
+      - Logging
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/logging.yaml#/DeleteLogsRequest'
+    responses:
+      '200':
+        description: Logs deleted successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-stats:
+  get:
+    operationId: getLogsStats
+    summary: Get log statistics
+    description: Returns statistics for logs matching the specified filters.
+    tags:
+      - Logging
+    parameters:
+      - name: providers
+        in: query
+        description: Comma-separated list of providers to filter by
+        schema:
+          type: string
+      - name: models
+        in: query
+        description: Comma-separated list of models to filter by
+        schema:
+          type: string
+      - name: status
+        in: query
+        description: Comma-separated list of statuses to filter by
+        schema:
+          type: string
+      - name: objects
+        in: query
+        description: Comma-separated list of object types to filter by
+        schema:
+          type: string
+      - name: selected_key_ids
+        in: query
+        description: Comma-separated list of selected key IDs to filter by
+        schema:
+          type: string
+      - name: virtual_key_ids
+        in: query
+        description: Comma-separated list of virtual key IDs to filter by
+        schema:
+          type: string
+      - name: routing_rule_ids
+        in: query
+        description: Comma-separated list of routing rule IDs to filter by
+        schema:
+          type: string
+      - name: routing_engine_used
+        in: query
+        description: Comma-separated list of routing engines to filter by (routing-rule, governance, or loadbalancing)
+        schema:
+          type: string
+      - name: start_time
+        in: query
+        description: Start time filter (RFC3339 format)
+        schema:
+          type: string
+          format: date-time
+      - name: end_time
+        in: query
+        description: End time filter (RFC3339 format)
+        schema:
+          type: string
+          format: date-time
+      - name: min_latency
+        in: query
+        description: Minimum latency filter
+        schema:
+          type: number
+      - name: max_latency
+        in: query
+        description: Maximum latency filter
+        schema:
+          type: number
+      - name: min_tokens
+        in: query
+        description: Minimum tokens filter
+        schema:
+          type: integer
+      - name: max_tokens
+        in: query
+        description: Maximum tokens filter
+        schema:
+          type: integer
+      - name: min_cost
+        in: query
+        description: Minimum cost filter
+        schema:
+          type: number
+      - name: max_cost
+        in: query
+        description: Maximum cost filter
+        schema:
+          type: number
+      - name: missing_cost_only
+        in: query
+        description: Only show logs with missing cost
+        schema:
+          type: boolean
+      - name: content_search
+        in: query
+        description: Search in request/response content
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/LogStats'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-dropped:
+  get:
+    operationId: getDroppedRequests
+    summary: Get dropped requests count
+    description: Returns the number of dropped requests.
+    tags:
+      - Logging
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/DroppedRequestsResponse'
+
+logs-filterdata:
+  get:
+    operationId: getAvailableFilterData
+    summary: Get available filter data
+    description: Returns all unique filter data from logs (models, keys, virtual keys).
+    tags:
+      - Logging
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/FilterDataResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-by-id:
+  get:
+    operationId: getLogById
+    summary: Get a single log entry
+    description: Retrieves a single log entry by its ID.
+    tags:
+      - Logging
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Log entry ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/LogEntry'
+      '404':
+        description: Log not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-histogram:
+  get:
+    operationId: getLogsHistogram
+    summary: Get request count histogram
+    description: |
+      Returns time-bucketed request counts. Bucket size is auto-calculated from the time range.
+    tags:
+      - Logging
+    parameters:
+      - $ref: '#/_histogram-parameters/providers'
+      - $ref: '#/_histogram-parameters/models'
+      - $ref: '#/_histogram-parameters/status'
+      - $ref: '#/_histogram-parameters/objects'
+      - $ref: '#/_histogram-parameters/selected_key_ids'
+      - $ref: '#/_histogram-parameters/virtual_key_ids'
+      - $ref: '#/_histogram-parameters/routing_rule_ids'
+      - $ref: '#/_histogram-parameters/routing_engine_used'
+      - $ref: '#/_histogram-parameters/start_time'
+      - $ref: '#/_histogram-parameters/end_time'
+      - $ref: '#/_histogram-parameters/min_latency'
+      - $ref: '#/_histogram-parameters/max_latency'
+      - $ref: '#/_histogram-parameters/min_tokens'
+      - $ref: '#/_histogram-parameters/max_tokens'
+      - $ref: '#/_histogram-parameters/min_cost'
+      - $ref: '#/_histogram-parameters/max_cost'
+      - $ref: '#/_histogram-parameters/missing_cost_only'
+      - $ref: '#/_histogram-parameters/content_search'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/HistogramResult'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-histogram-tokens:
+  get:
+    operationId: getLogsTokenHistogram
+    summary: Get token usage histogram
+    description: Returns time-bucketed token usage (prompt, completion, total).
+    tags:
+      - Logging
+    parameters:
+      - $ref: '#/_histogram-parameters/providers'
+      - $ref: '#/_histogram-parameters/models'
+      - $ref: '#/_histogram-parameters/status'
+      - $ref: '#/_histogram-parameters/objects'
+      - $ref: '#/_histogram-parameters/selected_key_ids'
+      - $ref: '#/_histogram-parameters/virtual_key_ids'
+      - $ref: '#/_histogram-parameters/routing_rule_ids'
+      - $ref: '#/_histogram-parameters/routing_engine_used'
+      - $ref: '#/_histogram-parameters/start_time'
+      - $ref: '#/_histogram-parameters/end_time'
+      - $ref: '#/_histogram-parameters/min_latency'
+      - $ref: '#/_histogram-parameters/max_latency'
+      - $ref: '#/_histogram-parameters/min_tokens'
+      - $ref: '#/_histogram-parameters/max_tokens'
+      - $ref: '#/_histogram-parameters/min_cost'
+      - $ref: '#/_histogram-parameters/max_cost'
+      - $ref: '#/_histogram-parameters/missing_cost_only'
+      - $ref: '#/_histogram-parameters/content_search'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/TokenHistogramResult'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-histogram-cost:
+  get:
+    operationId: getLogsCostHistogram
+    summary: Get cost histogram
+    description: Returns time-bucketed cost data with model breakdown.
+    tags:
+      - Logging
+    parameters:
+      - $ref: '#/_histogram-parameters/providers'
+      - $ref: '#/_histogram-parameters/models'
+      - $ref: '#/_histogram-parameters/status'
+      - $ref: '#/_histogram-parameters/objects'
+      - $ref: '#/_histogram-parameters/selected_key_ids'
+      - $ref: '#/_histogram-parameters/virtual_key_ids'
+      - $ref: '#/_histogram-parameters/routing_rule_ids'
+      - $ref: '#/_histogram-parameters/routing_engine_used'
+      - $ref: '#/_histogram-parameters/start_time'
+      - $ref: '#/_histogram-parameters/end_time'
+      - $ref: '#/_histogram-parameters/min_latency'
+      - $ref: '#/_histogram-parameters/max_latency'
+      - $ref: '#/_histogram-parameters/min_tokens'
+      - $ref: '#/_histogram-parameters/max_tokens'
+      - $ref: '#/_histogram-parameters/min_cost'
+      - $ref: '#/_histogram-parameters/max_cost'
+      - $ref: '#/_histogram-parameters/missing_cost_only'
+      - $ref: '#/_histogram-parameters/content_search'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/CostHistogramResult'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-histogram-models:
+  get:
+    operationId: getLogsModelHistogram
+    summary: Get model usage histogram
+    description: Returns time-bucketed model usage with success/error breakdown.
+    tags:
+      - Logging
+    parameters:
+      - $ref: '#/_histogram-parameters/providers'
+      - $ref: '#/_histogram-parameters/models'
+      - $ref: '#/_histogram-parameters/status'
+      - $ref: '#/_histogram-parameters/objects'
+      - $ref: '#/_histogram-parameters/selected_key_ids'
+      - $ref: '#/_histogram-parameters/virtual_key_ids'
+      - $ref: '#/_histogram-parameters/routing_rule_ids'
+      - $ref: '#/_histogram-parameters/routing_engine_used'
+      - $ref: '#/_histogram-parameters/start_time'
+      - $ref: '#/_histogram-parameters/end_time'
+      - $ref: '#/_histogram-parameters/min_latency'
+      - $ref: '#/_histogram-parameters/max_latency'
+      - $ref: '#/_histogram-parameters/min_tokens'
+      - $ref: '#/_histogram-parameters/max_tokens'
+      - $ref: '#/_histogram-parameters/min_cost'
+      - $ref: '#/_histogram-parameters/max_cost'
+      - $ref: '#/_histogram-parameters/missing_cost_only'
+      - $ref: '#/_histogram-parameters/content_search'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/ModelHistogramResult'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-histogram-latency:
+  get:
+    operationId: getLogsLatencyHistogram
+    summary: Get latency histogram
+    description: Returns time-bucketed latency percentiles (avg, p90, p95, p99).
+    tags:
+      - Logging
+    parameters:
+      - $ref: '#/_histogram-parameters/providers'
+      - $ref: '#/_histogram-parameters/models'
+      - $ref: '#/_histogram-parameters/status'
+      - $ref: '#/_histogram-parameters/objects'
+      - $ref: '#/_histogram-parameters/selected_key_ids'
+      - $ref: '#/_histogram-parameters/virtual_key_ids'
+      - $ref: '#/_histogram-parameters/routing_rule_ids'
+      - $ref: '#/_histogram-parameters/routing_engine_used'
+      - $ref: '#/_histogram-parameters/start_time'
+      - $ref: '#/_histogram-parameters/end_time'
+      - $ref: '#/_histogram-parameters/min_latency'
+      - $ref: '#/_histogram-parameters/max_latency'
+      - $ref: '#/_histogram-parameters/min_tokens'
+      - $ref: '#/_histogram-parameters/max_tokens'
+      - $ref: '#/_histogram-parameters/min_cost'
+      - $ref: '#/_histogram-parameters/max_cost'
+      - $ref: '#/_histogram-parameters/missing_cost_only'
+      - $ref: '#/_histogram-parameters/content_search'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/LatencyHistogramResult'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-histogram-cost-by-provider:
+  get:
+    operationId: getLogsProviderCostHistogram
+    summary: Get cost histogram by provider
+    description: Returns time-bucketed cost data with provider breakdown.
+    tags:
+      - Logging
+    parameters:
+      - $ref: '#/_histogram-parameters/providers'
+      - $ref: '#/_histogram-parameters/models'
+      - $ref: '#/_histogram-parameters/status'
+      - $ref: '#/_histogram-parameters/objects'
+      - $ref: '#/_histogram-parameters/selected_key_ids'
+      - $ref: '#/_histogram-parameters/virtual_key_ids'
+      - $ref: '#/_histogram-parameters/routing_rule_ids'
+      - $ref: '#/_histogram-parameters/routing_engine_used'
+      - $ref: '#/_histogram-parameters/start_time'
+      - $ref: '#/_histogram-parameters/end_time'
+      - $ref: '#/_histogram-parameters/min_latency'
+      - $ref: '#/_histogram-parameters/max_latency'
+      - $ref: '#/_histogram-parameters/min_tokens'
+      - $ref: '#/_histogram-parameters/max_tokens'
+      - $ref: '#/_histogram-parameters/min_cost'
+      - $ref: '#/_histogram-parameters/max_cost'
+      - $ref: '#/_histogram-parameters/missing_cost_only'
+      - $ref: '#/_histogram-parameters/content_search'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/ProviderCostHistogramResult'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-histogram-tokens-by-provider:
+  get:
+    operationId: getLogsProviderTokenHistogram
+    summary: Get token histogram by provider
+    description: Returns time-bucketed token usage with provider breakdown.
+    tags:
+      - Logging
+    parameters:
+      - $ref: '#/_histogram-parameters/providers'
+      - $ref: '#/_histogram-parameters/models'
+      - $ref: '#/_histogram-parameters/status'
+      - $ref: '#/_histogram-parameters/objects'
+      - $ref: '#/_histogram-parameters/selected_key_ids'
+      - $ref: '#/_histogram-parameters/virtual_key_ids'
+      - $ref: '#/_histogram-parameters/routing_rule_ids'
+      - $ref: '#/_histogram-parameters/routing_engine_used'
+      - $ref: '#/_histogram-parameters/start_time'
+      - $ref: '#/_histogram-parameters/end_time'
+      - $ref: '#/_histogram-parameters/min_latency'
+      - $ref: '#/_histogram-parameters/max_latency'
+      - $ref: '#/_histogram-parameters/min_tokens'
+      - $ref: '#/_histogram-parameters/max_tokens'
+      - $ref: '#/_histogram-parameters/min_cost'
+      - $ref: '#/_histogram-parameters/max_cost'
+      - $ref: '#/_histogram-parameters/missing_cost_only'
+      - $ref: '#/_histogram-parameters/content_search'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/ProviderTokenHistogramResult'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logs-histogram-latency-by-provider:
+  get:
+    operationId: getLogsProviderLatencyHistogram
+    summary: Get latency histogram by provider
+    description: Returns time-bucketed latency percentiles with provider breakdown.
+    tags:
+      - Logging
+    parameters:
+      - $ref: '#/_histogram-parameters/providers'
+      - $ref: '#/_histogram-parameters/models'
+      - $ref: '#/_histogram-parameters/status'
+      - $ref: '#/_histogram-parameters/objects'
+      - $ref: '#/_histogram-parameters/selected_key_ids'
+      - $ref: '#/_histogram-parameters/virtual_key_ids'
+      - $ref: '#/_histogram-parameters/routing_rule_ids'
+      - $ref: '#/_histogram-parameters/routing_engine_used'
+      - $ref: '#/_histogram-parameters/start_time'
+      - $ref: '#/_histogram-parameters/end_time'
+      - $ref: '#/_histogram-parameters/min_latency'
+      - $ref: '#/_histogram-parameters/max_latency'
+      - $ref: '#/_histogram-parameters/min_tokens'
+      - $ref: '#/_histogram-parameters/max_tokens'
+      - $ref: '#/_histogram-parameters/min_cost'
+      - $ref: '#/_histogram-parameters/max_cost'
+      - $ref: '#/_histogram-parameters/missing_cost_only'
+      - $ref: '#/_histogram-parameters/content_search'
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/ProviderLatencyHistogramResult'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+# Shared histogram filter parameters
+_histogram-parameters:
+  providers:
+    name: providers
+    in: query
+    description: Comma-separated list of providers to filter by
+    schema:
+      type: string
+  models:
+    name: models
+    in: query
+    description: Comma-separated list of models to filter by
+    schema:
+      type: string
+  status:
+    name: status
+    in: query
+    description: Comma-separated list of statuses to filter by
+    schema:
+      type: string
+  objects:
+    name: objects
+    in: query
+    description: Comma-separated list of object types to filter by
+    schema:
+      type: string
+  selected_key_ids:
+    name: selected_key_ids
+    in: query
+    description: Comma-separated list of selected key IDs to filter by
+    schema:
+      type: string
+  virtual_key_ids:
+    name: virtual_key_ids
+    in: query
+    description: Comma-separated list of virtual key IDs to filter by
+    schema:
+      type: string
+  routing_rule_ids:
+    name: routing_rule_ids
+    in: query
+    description: Comma-separated list of routing rule IDs to filter by
+    schema:
+      type: string
+  routing_engine_used:
+    name: routing_engine_used
+    in: query
+    description: Comma-separated list of routing engines to filter by
+    schema:
+      type: string
+  start_time:
+    name: start_time
+    in: query
+    description: Start time filter (RFC3339 format)
+    schema:
+      type: string
+      format: date-time
+  end_time:
+    name: end_time
+    in: query
+    description: End time filter (RFC3339 format)
+    schema:
+      type: string
+      format: date-time
+  min_latency:
+    name: min_latency
+    in: query
+    description: Minimum latency filter
+    schema:
+      type: number
+  max_latency:
+    name: max_latency
+    in: query
+    description: Maximum latency filter
+    schema:
+      type: number
+  min_tokens:
+    name: min_tokens
+    in: query
+    description: Minimum tokens filter
+    schema:
+      type: integer
+  max_tokens:
+    name: max_tokens
+    in: query
+    description: Maximum tokens filter
+    schema:
+      type: integer
+  min_cost:
+    name: min_cost
+    in: query
+    description: Minimum cost filter
+    schema:
+      type: number
+  max_cost:
+    name: max_cost
+    in: query
+    description: Maximum cost filter
+    schema:
+      type: number
+  missing_cost_only:
+    name: missing_cost_only
+    in: query
+    description: Only show logs with missing cost
+    schema:
+      type: boolean
+  content_search:
+    name: content_search
+    in: query
+    description: Search in request/response content
+    schema:
+      type: string
+
+logs-recalculate-cost:
+  post:
+    operationId: recalculateLogCosts
+    summary: Recalculate log costs
+    description: |
+      Recomputes missing costs in batches. Processes logs with missing cost values
+      and updates them based on current pricing data.
+    tags:
+      - Logging
+    requestBody:
+      required: false
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/logging.yaml#/RecalculateCostRequest'
+    responses:
+      '200':
+        description: Costs recalculated successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/RecalculateCostResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+mcp-logs:
+  get:
+    operationId: getMCPLogs
+    summary: Get MCP tool logs
+    description: |
+      Retrieves MCP tool execution logs with filtering, search, and pagination via query parameters.
+    tags:
+      - Logging
+    parameters:
+      - name: tool_names
+        in: query
+        description: Comma-separated list of tool names to filter by
+        schema:
+          type: string
+      - name: server_labels
+        in: query
+        description: Comma-separated list of server labels to filter by
+        schema:
+          type: string
+      - name: status
+        in: query
+        description: Comma-separated list of statuses to filter by (processing, success, error)
+        schema:
+          type: string
+          enum: [processing, success, error]
+      - name: virtual_key_ids
+        in: query
+        description: Comma-separated list of virtual key IDs to filter by
+        schema:
+          type: string
+      - name: llm_request_ids
+        in: query
+        description: Comma-separated list of LLM request IDs to filter by
+        schema:
+          type: string
+      - name: start_time
+        in: query
+        description: Start time filter (RFC3339 format)
+        schema:
+          type: string
+          format: date-time
+      - name: end_time
+        in: query
+        description: End time filter (RFC3339 format)
+        schema:
+          type: string
+          format: date-time
+      - name: min_latency
+        in: query
+        description: Minimum latency filter (milliseconds)
+        schema:
+          type: number
+      - name: max_latency
+        in: query
+        description: Maximum latency filter (milliseconds)
+        schema:
+          type: number
+      - name: content_search
+        in: query
+        description: Search in tool arguments and results
+        schema:
+          type: string
+      - name: limit
+        in: query
+        description: Number of logs to return (default 50, max 1000)
+        schema:
+          type: integer
+          default: 50
+          maximum: 1000
+      - name: offset
+        in: query
+        description: Number of logs to skip
+        schema:
+          type: integer
+          default: 0
+      - name: sort_by
+        in: query
+        description: Field to sort by
+        schema:
+          type: string
+          enum: [timestamp, latency, cost]
+          default: timestamp
+      - name: order
+        in: query
+        description: Sort order
+        schema:
+          type: string
+          enum: [asc, desc]
+          default: desc
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/SearchMCPLogsResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: deleteMCPLogs
+    summary: Delete MCP tool logs
+    description: Deletes MCP tool logs by their IDs.
+    tags:
+      - Logging
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/logging.yaml#/DeleteMCPLogsRequest'
+    responses:
+      '200':
+        description: MCP tool logs deleted successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+mcp-logs-stats:
+  get:
+    operationId: getMCPLogsStats
+    summary: Get MCP tool log statistics
+    description: Returns statistics for MCP tool logs matching the specified filters.
+    tags:
+      - Logging
+    parameters:
+      - name: tool_names
+        in: query
+        description: Comma-separated list of tool names to filter by
+        schema:
+          type: string
+      - name: server_labels
+        in: query
+        description: Comma-separated list of server labels to filter by
+        schema:
+          type: string
+      - name: status
+        in: query
+        description: Comma-separated list of statuses to filter by
+        schema:
+          type: string
+          enum: [processing, success, error]
+      - name: virtual_key_ids
+        in: query
+        description: Comma-separated list of virtual key IDs to filter by
+        schema:
+          type: string
+      - name: llm_request_ids
+        in: query
+        description: Comma-separated list of LLM request IDs to filter by
+        schema:
+          type: string
+      - name: start_time
+        in: query
+        description: Start time filter (RFC3339 format)
+        schema:
+          type: string
+          format: date-time
+      - name: end_time
+        in: query
+        description: End time filter (RFC3339 format)
+        schema:
+          type: string
+          format: date-time
+      - name: min_latency
+        in: query
+        description: Minimum latency filter
+        schema:
+          type: number
+      - name: max_latency
+        in: query
+        description: Maximum latency filter
+        schema:
+          type: number
+      - name: content_search
+        in: query
+        description: Search in tool arguments and results
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/MCPToolLogStats'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+mcp-logs-filterdata:
+  get:
+    operationId: getMCPLogsFilterData
+    summary: Get available MCP log filter data
+    description: Returns all unique filter data from MCP tool logs (tool names, server labels).
+    tags:
+      - Logging
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/logging.yaml#/MCPLogsFilterDataResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
--- a/docs/openapi/paths/management/mcp.yaml
+++ b/docs/openapi/paths/management/mcp.yaml
@@ -0,0 +1,240 @@
+execute-tool:
+  post:
+    operationId: executeMCPTool
+    summary: Execute MCP tool
+    description: Executes an MCP tool and returns the result.
+    tags:
+      - MCP
+    parameters:
+      - name: format
+        in: query
+        required: false
+        description: |
+          Format of the tool execution request/response.
+        schema:
+          type: string
+          enum: [chat, responses]
+          default: chat
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/mcp.yaml#/ExecuteToolRequest'
+          examples:
+            chat:
+              summary: Chat format example
+              value:
+                id: "call_123"
+                type: "function"
+                function:
+                  name: "get_weather"
+                  arguments: '{"location": "San Francisco"}'
+            responses:
+              summary: Responses format example
+              value:
+                call_id: "call_123"
+                name: "get_weather"
+                arguments: '{"location": "San Francisco"}'
+    responses:
+      '200':
+        description: Tool executed successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/mcp.yaml#/ExecuteToolResponse'
+            examples:
+              chat:
+                summary: Chat format response
+                value:
+                  name: "get_weather"
+                  role: "tool"
+                  tool_call_id: "call_123"
+                  content: "The weather in San Francisco is 72°F and sunny."
+              responses:
+                summary: Responses format response
+                value:
+                  id: "msg_123"
+                  type: "function_call_output"
+                  status: "completed"
+                  role: "assistant"
+                  call_id: "call_123"
+                  name: "get_weather"
+                  arguments: '{"location": "San Francisco"}'
+                  content: "The weather in San Francisco is 72°F and sunny."
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+    security:
+      - BearerAuth: []
+      - BasicAuth: []
+      - VirtualKeyAuth: []
+      - ApiKeyAuth: []
+
+clients:
+  get:
+    operationId: getMCPClients
+    summary: List MCP clients
+    description: Returns a list of all configured MCP clients with their tools and connection state.
+    tags:
+      - MCP
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: array
+              items:
+                $ref: '../../schemas/management/mcp.yaml#/MCPClient'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+client:
+  post:
+    operationId: addMCPClient
+    summary: Add MCP client
+    description: |
+      Adds a new MCP client with the specified configuration.
+      Note: tool_pricing is not available when creating a new client as tools are fetched after client creation.
+    tags:
+      - MCP
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/mcp.yaml#/MCPClientCreateRequest'
+    responses:
+      '200':
+        description: MCP client added successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/SuccessResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+client-by-id:
+  put:
+    operationId: editMCPClient
+    summary: Edit MCP client
+    description: |
+      Updates an existing MCP client's configuration.
+      Unlike client creation, tool_pricing can be included to set per-tool execution costs since tools are already fetched.
+      Optionally provide vk_configs to manage which virtual keys have access to this MCP server and with which tools. When provided, this fully replaces all existing VK assignments in a single atomic transaction.
+    tags:
+      - MCP
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: MCP client ID
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/mcp.yaml#/MCPClientUpdateRequest'
+    responses:
+      '200':
+        description: MCP client updated successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/SuccessResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: removeMCPClient
+    summary: Remove MCP client
+    description: Removes an MCP client from the configuration.
+    tags:
+      - MCP
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: MCP client ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: MCP client removed successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/SuccessResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+client-reconnect:
+  post:
+    operationId: reconnectMCPClient
+    summary: Reconnect MCP client
+    description: Reconnects an MCP client that is in an error or disconnected state.
+    tags:
+      - MCP
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: MCP client ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: MCP client reconnected successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/SuccessResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+client-complete-oauth:
+  post:
+    operationId: completeMCPClientOAuth
+    summary: Complete MCP client OAuth flow
+    description: |
+      Completes the OAuth flow for an MCP client after the user has authorized the request.
+      This endpoint should be called after the OAuth provider redirects back to the callback endpoint
+      and the OAuth token has been stored. It retrieves the pending MCP client configuration and
+      establishes the connection with the OAuth-provided credentials.
+    tags:
+      - MCP
+      - OAuth
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: MCP client ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: MCP client connected successfully with OAuth
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/SuccessResponse'
+      '400':
+        description: OAuth not authorized yet or MCP client not found in pending OAuth clients
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: MCP client not found in pending OAuth clients or OAuth config not found
+        $ref: '../../openapi.yaml#/components/responses/NotFound'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
--- a/docs/openapi/paths/management/oauth.yaml
+++ b/docs/openapi/paths/management/oauth.yaml
@@ -0,0 +1,769 @@
+oauth-callback:
+  get:
+    operationId: handleOAuthCallback
+    summary: OAuth callback endpoint
+    description: |
+      Handles the OAuth provider callback after user authorization.
+      This endpoint processes the authorization code and exchanges it for an access token.
+      On success, displays an HTML page that closes the authorization window.
+    tags:
+      - OAuth
+    parameters:
+      - name: state
+        in: query
+        required: true
+        description: State parameter for OAuth security (CSRF protection)
+        schema:
+          type: string
+      - name: code
+        in: query
+        required: true
+        description: Authorization code from the OAuth provider
+        schema:
+          type: string
+      - name: error
+        in: query
+        required: false
+        description: Error code if authorization failed
+        schema:
+          type: string
+      - name: error_description
+        in: query
+        required: false
+        description: Error description if authorization failed
+        schema:
+          type: string
+    responses:
+      '200':
+        description: OAuth authorization successful. Returns HTML page that closes the authorization window.
+        content:
+          text/html:
+            schema:
+              type: string
+      '400':
+        description: OAuth authorization failed or missing required parameters
+        content:
+          text/html:
+            schema:
+              type: string
+
+oauth-config-status:
+  get:
+    operationId: getOAuthConfigStatus
+    summary: Get OAuth config status
+    description: |
+      Retrieves the current status of an OAuth configuration.
+      Shows whether the OAuth flow is pending, authorized, or failed,
+      and includes token expiration and scopes if authorized.
+    tags:
+      - OAuth
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: OAuth config ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: OAuth config status retrieved successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/oauth.yaml#/OAuthConfigStatus'
+      '404':
+        description: OAuth config not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: revokeOAuthConfig
+    summary: Revoke OAuth config
+    description: |
+      Revokes an OAuth configuration and its associated access token.
+      After revocation, the MCP client will no longer be able to use this OAuth token.
+    tags:
+      - OAuth
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: OAuth config ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: OAuth token revoked successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/SuccessResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+# ─── Per-User OAuth 2.1 Authorization Server ───────────────────────────────
+# These endpoints implement RFC 7591 (dynamic registration), RFC 7636 (PKCE),
+# and the OAuth 2.1 authorization code flow. MCP clients use them automatically
+# when connecting to Bifrost's /mcp endpoint. Only active when at least one MCP
+# client is configured with auth_type: per_user_oauth.
+
+per-user-oauth-register:
+  post:
+    operationId: registerPerUserOAuthClient
+    summary: Register OAuth client (RFC 7591)
+    description: |
+      Dynamic Client Registration per RFC 7591. MCP clients (Claude Code, Cursor, etc.)
+      call this endpoint to obtain a `client_id` before initiating the authorization flow.
+
+      This endpoint is only available when at least one MCP client is configured with
+      `auth_type: per_user_oauth`. Returns `404` otherwise.
+
+      Authentication is not required — this is part of the unauthenticated OAuth bootstrap flow.
+    tags:
+      - OAuth
+      - Per-User OAuth
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/oauth.yaml#/PerUserOAuthClientRegistrationRequest'
+          example:
+            client_name: "Claude Code"
+            redirect_uris: ["http://localhost:54321/callback"]
+            grant_types: ["authorization_code"]
+            response_types: ["code"]
+            token_endpoint_auth_method: "none"
+    responses:
+      '201':
+        description: Client registered successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/oauth.yaml#/PerUserOAuthClientRegistrationResponse'
+            example:
+              client_id: "550e8400-e29b-41d4-a716-446655440000"
+              client_name: "Claude Code"
+              redirect_uris: ["http://localhost:54321/callback"]
+              grant_types: ["authorization_code"]
+              token_endpoint_auth_method: "none"
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: No per-user OAuth MCP clients configured
+        content:
+          text/plain:
+            schema:
+              type: string
+      '503':
+        description: Config store is disabled
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+
+per-user-oauth-authorize:
+  get:
+    operationId: authorizePerUserOAuth
+    summary: Authorization endpoint (OAuth 2.1)
+    description: |
+      OAuth 2.1 authorization endpoint. Validates the request parameters, creates a
+      browser-bound `PendingFlow` record (15-minute TTL), and redirects the user to
+      the Bifrost consent screen at `/oauth/consent?flow_id=xxx`.
+
+      **PKCE is required** — `code_challenge` and `code_challenge_method=S256` must
+      be provided. Plain code challenges are not supported.
+
+      A `__bifrost_flow_secret` HttpOnly SameSite=Lax cookie is set on redirect to
+      bind the consent flow to the initiating browser session (CSRF protection).
+
+      Authentication is not required — this is part of the unauthenticated OAuth bootstrap flow.
+    tags:
+      - OAuth
+      - Per-User OAuth
+    parameters:
+      - name: response_type
+        in: query
+        required: true
+        description: Must be `code`
+        schema:
+          type: string
+          enum: [code]
+      - name: client_id
+        in: query
+        required: true
+        description: Client ID obtained from the registration endpoint
+        schema:
+          type: string
+      - name: redirect_uri
+        in: query
+        required: true
+        description: Must match a URI registered for this client
+        schema:
+          type: string
+      - name: code_challenge
+        in: query
+        required: true
+        description: PKCE code challenge (Base64URL-encoded SHA-256 of the code verifier)
+        schema:
+          type: string
+      - name: code_challenge_method
+        in: query
+        required: true
+        description: Must be `S256`
+        schema:
+          type: string
+          enum: [S256]
+      - name: state
+        in: query
+        required: false
+        description: Opaque value to maintain state between request and callback (CSRF protection)
+        schema:
+          type: string
+    responses:
+      '302':
+        description: Redirect to consent screen at `/oauth/consent?flow_id=xxx`
+        headers:
+          Location:
+            schema:
+              type: string
+            description: URL of the consent screen
+          Set-Cookie:
+            schema:
+              type: string
+            description: "`__bifrost_flow_secret` HttpOnly SameSite=Lax cookie for browser binding"
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: No per-user OAuth MCP clients configured, or unknown client_id
+        content:
+          text/plain:
+            schema:
+              type: string
+      '503':
+        description: Config store is disabled
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+
+per-user-oauth-token:
+  post:
+    operationId: exchangePerUserOAuthToken
+    summary: Token endpoint (OAuth 2.1)
+    description: |
+      OAuth 2.1 token endpoint. Exchanges a single-use authorization code (5-minute TTL)
+      for a Bifrost-issued access token (24-hour TTL) using PKCE verification.
+
+      The request body must be `application/x-www-form-urlencoded`.
+
+      The returned `access_token` is the Bearer token to use on subsequent `/mcp` requests.
+      It carries the user's upstream service tokens (Notion, GitHub, etc.) linked to their
+      identity (Virtual Key or User ID) from the consent flow.
+
+      Authentication is not required — this is part of the unauthenticated OAuth bootstrap flow.
+    tags:
+      - OAuth
+      - Per-User OAuth
+    requestBody:
+      required: true
+      content:
+        application/x-www-form-urlencoded:
+          schema:
+            type: object
+            required:
+              - grant_type
+              - code
+              - code_verifier
+            properties:
+              grant_type:
+                type: string
+                description: Must be `authorization_code`
+                enum: [authorization_code]
+              code:
+                type: string
+                description: Authorization code received in the redirect callback
+              redirect_uri:
+                type: string
+                description: Must match the redirect_uri used in the authorize request (if provided)
+              client_id:
+                type: string
+                description: Client ID (optional — code is already bound to the client)
+              code_verifier:
+                type: string
+                description: PKCE code verifier — the raw secret whose SHA-256 matches the code_challenge
+    responses:
+      '200':
+        description: Token issued successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/oauth.yaml#/PerUserOAuthTokenResponse'
+            example:
+              access_token: "abc123xyz..."
+              token_type: "Bearer"
+              expires_in: 86400
+              scope: "mcp:read mcp:write"
+      '400':
+        description: Invalid grant, expired code, PKCE failure, or unsupported grant type
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                error:
+                  type: string
+                  enum: [invalid_grant, invalid_request, unsupported_grant_type]
+                error_description:
+                  type: string
+      '404':
+        description: No per-user OAuth MCP clients configured
+        content:
+          text/plain:
+            schema:
+              type: string
+      '500':
+        description: Server error or session creation failed
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                error:
+                  type: string
+                  enum: [server_error]
+                error_description:
+                  type: string
+
+per-user-oauth-upstream-authorize:
+  get:
+    operationId: authorizeUpstreamPerUserOAuth
+    summary: Upstream OAuth proxy — authorize with upstream service
+    description: |
+      Initiates an OAuth flow with an upstream MCP service (Notion, GitHub, etc.)
+      on behalf of the current user. Used during the consent flow (via "Connect" buttons
+      on the MCPs page) and at runtime when a tool call is made to an unauthenticated service.
+
+      **Consent flow** — provide `flow_id` (from the pending consent flow). The browser-binding
+      cookie (`__bifrost_flow_secret`) is validated.
+
+      **Runtime flow** — provide `session` (the Bifrost session ID from the token endpoint).
+      Used when a service was skipped during consent and needs to be connected later.
+
+      On success, redirects the user to the upstream provider's authorize URL. After the user
+      grants access, the upstream callback lands at `/api/oauth/callback`, stores the upstream
+      token against the user's identity, and redirects back to the consent screen (consent flow)
+      or returns an authorization success page (runtime flow).
+
+      Authentication is not required — cookie/session validation is performed instead.
+    tags:
+      - OAuth
+      - Per-User OAuth
+    parameters:
+      - name: mcp_client_id
+        in: query
+        required: true
+        description: ID of the per-user OAuth MCP client to authenticate with
+        schema:
+          type: string
+      - name: flow_id
+        in: query
+        required: false
+        description: |
+          Pending consent flow ID. Required if `session` is not provided.
+          The `__bifrost_flow_secret` cookie must be present and match the flow.
+        schema:
+          type: string
+      - name: session
+        in: query
+        required: false
+        description: |
+          Bifrost session ID (from the token endpoint). Required if `flow_id` is not provided.
+          Used for runtime (post-consent) upstream authorization.
+        schema:
+          type: string
+    responses:
+      '302':
+        description: Redirect to upstream OAuth provider's authorize URL
+        headers:
+          Location:
+            schema:
+              type: string
+            description: Upstream provider authorization URL with PKCE parameters
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '401':
+        description: Invalid or expired flow/session
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '403':
+        description: Browser-binding cookie mismatch (CSRF protection)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '404':
+        description: MCP client not found or not configured for per-user OAuth
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '503':
+        description: Config store is disabled
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+
+# ─── Per-User OAuth Consent Flow (browser UI) ──────────────────────────────
+# These endpoints serve HTML pages and handle form submissions for the
+# multi-step consent flow. They are browser-facing, not JSON API endpoints.
+# All endpoints validate the __bifrost_flow_secret browser-binding cookie.
+
+consent-identity-page:
+  get:
+    operationId: getConsentIdentityPage
+    summary: Consent identity selection page
+    description: |
+      Renders the identity selection screen where the user chooses how to identify
+      themselves for the session: Virtual Key, User ID, or Skip (session-only auth).
+
+      The `__bifrost_flow_secret` HttpOnly cookie set during `/api/oauth/per-user/authorize`
+      must be present — it binds the consent flow to the initiating browser.
+
+      The Skip option is only shown when `enforce_auth_on_inference` is `false` in config.
+    tags:
+      - Per-User OAuth
+      - Consent Flow
+    parameters:
+      - name: flow_id
+        in: query
+        required: true
+        description: Pending flow ID from the authorize redirect
+        schema:
+          type: string
+      - name: error
+        in: query
+        required: false
+        description: Error message to display (used on redirect-back from failed form submissions)
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Identity selection HTML page
+        content:
+          text/html:
+            schema:
+              type: string
+      '400':
+        description: Missing or expired flow_id
+        content:
+          text/plain:
+            schema:
+              type: string
+      '403':
+        description: Browser-binding cookie mismatch
+        content:
+          text/plain:
+            schema:
+              type: string
+
+consent-mcps-page:
+  get:
+    operationId: getConsentMCPsPage
+    summary: Consent MCP services page
+    description: |
+      Renders the MCP services connection screen. Shows all per-user OAuth MCP servers
+      available on the user's Virtual Key (or all servers if no VK was selected).
+      Each service shows a "Connect" link or a "Connected ✓" badge.
+
+      Requires the `__bifrost_flow_secret` browser-binding cookie.
+    tags:
+      - Per-User OAuth
+      - Consent Flow
+    parameters:
+      - name: flow_id
+        in: query
+        required: true
+        description: Pending flow ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: MCP services connection HTML page
+        content:
+          text/html:
+            schema:
+              type: string
+      '400':
+        description: Missing or expired flow_id
+        content:
+          text/plain:
+            schema:
+              type: string
+      '403':
+        description: Browser-binding cookie mismatch
+        content:
+          text/plain:
+            schema:
+              type: string
+
+consent-submit-vk:
+  post:
+    operationId: submitConsentVirtualKey
+    summary: Submit Virtual Key identity
+    description: |
+      Validates the submitted Virtual Key and links it to the pending flow as the user's
+      identity. On success, redirects to the MCPs page. On failure, redirects back to the
+      identity page with an error message.
+
+      Request body is `application/x-www-form-urlencoded` (browser form submission).
+    tags:
+      - Per-User OAuth
+      - Consent Flow
+    requestBody:
+      required: true
+      content:
+        application/x-www-form-urlencoded:
+          schema:
+            type: object
+            required: [flow_id, vk]
+            properties:
+              flow_id:
+                type: string
+                description: Pending flow ID
+              vk:
+                type: string
+                description: Virtual Key value (validated against the database)
+    responses:
+      '302':
+        description: |
+          Redirect to `/oauth/consent/mcps?flow_id=xxx` on success, or back to
+          `/oauth/consent?flow_id=xxx&error=...` on failure.
+        headers:
+          Location:
+            schema:
+              type: string
+
+consent-submit-user-id:
+  post:
+    operationId: submitConsentUserID
+    summary: Submit User ID identity
+    description: |
+      Links a self-declared User ID to the pending flow as the user's identity.
+      On success, redirects to the MCPs page.
+
+      The User ID is self-declared with no server-side verification — it matches
+      the trust model of the `X-Bf-User-Id` header in the LLM Gateway path.
+
+      Request body is `application/x-www-form-urlencoded` (browser form submission).
+    tags:
+      - Per-User OAuth
+      - Consent Flow
+    requestBody:
+      required: true
+      content:
+        application/x-www-form-urlencoded:
+          schema:
+            type: object
+            required: [flow_id, user_id]
+            properties:
+              flow_id:
+                type: string
+                description: Pending flow ID
+              user_id:
+                type: string
+                description: Self-declared user identifier (max 255 characters)
+    responses:
+      '302':
+        description: |
+          Redirect to `/oauth/consent/mcps?flow_id=xxx` on success, or back to
+          `/oauth/consent?flow_id=xxx&error=...` on failure.
+        headers:
+          Location:
+            schema:
+              type: string
+
+consent-skip:
+  post:
+    operationId: skipConsentIdentity
+    summary: Skip identity selection
+    description: |
+      Skips identity selection and proceeds directly to the MCPs page. Upstream service
+      tokens will be stored against the session token only (not a persistent identity),
+      so they will not carry over to other sessions or the LLM Gateway.
+
+      Only available when `enforce_auth_on_inference` is `false` in config. Returns a
+      redirect back to the identity page with an error if auth enforcement is enabled.
+
+      Request body is `application/x-www-form-urlencoded` (browser form submission).
+    tags:
+      - Per-User OAuth
+      - Consent Flow
+    requestBody:
+      required: true
+      content:
+        application/x-www-form-urlencoded:
+          schema:
+            type: object
+            required: [flow_id]
+            properties:
+              flow_id:
+                type: string
+                description: Pending flow ID
+    responses:
+      '302':
+        description: |
+          Redirect to `/oauth/consent/mcps?flow_id=xxx` on success, or back to
+          `/oauth/consent?flow_id=xxx&error=...` if identity enforcement is required.
+        headers:
+          Location:
+            schema:
+              type: string
+
+consent-submit:
+  post:
+    operationId: submitConsent
+    summary: Finalize consent flow
+    description: |
+      Finalizes the consent flow atomically:
+      1. Creates a `TablePerUserOAuthSession` (24h Bifrost session token)
+      2. Transfers upstream tokens from the flow proxy to the session
+      3. Issues a single-use `TablePerUserOAuthCode` (5-minute TTL, PKCE-bound)
+      4. Deletes the `PendingFlow`
+      5. Redirects to the MCP client's `redirect_uri` with `code` and `state`
+
+      The MCP client then exchanges the code at `/api/oauth/per-user/token`.
+
+      Request body is `application/x-www-form-urlencoded` (browser form submission).
+    tags:
+      - Per-User OAuth
+      - Consent Flow
+    requestBody:
+      required: true
+      content:
+        application/x-www-form-urlencoded:
+          schema:
+            type: object
+            required: [flow_id]
+            properties:
+              flow_id:
+                type: string
+                description: Pending flow ID
+    responses:
+      '302':
+        description: |
+          Redirect to the MCP client's registered `redirect_uri` with
+          `?code=xxx&state=yyy` query parameters.
+        headers:
+          Location:
+            schema:
+              type: string
+            description: MCP client callback URL with code and state
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '403':
+        description: Browser-binding cookie mismatch
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '409':
+        description: Consent flow already submitted (duplicate submission prevention)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '410':
+        description: Consent flow expired
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+# ─── OAuth Discovery (RFC 9728 + RFC 8414) ─────────────────────────────────
+# These well-known endpoints enable MCP clients to auto-discover Bifrost's
+# OAuth configuration. Only active when at least one MCP client is configured
+# with auth_type: per_user_oauth.
+
+oauth-protected-resource-metadata:
+  get:
+    operationId: getOAuthProtectedResourceMetadata
+    summary: Protected Resource Metadata (RFC 9728)
+    description: |
+      Returns the OAuth 2.0 Protected Resource Metadata document per RFC 9728.
+
+      MCP clients fetch this after receiving a `401` response from `/mcp` (with a
+      `WWW-Authenticate: Bearer resource_metadata=".../.well-known/oauth-protected-resource"`
+      header). The response tells the client which authorization server(s) protect the
+      `/mcp` resource so it can proceed with discovery.
+
+      Returns `404` when no MCP clients are configured with `auth_type: per_user_oauth`.
+    tags:
+      - OAuth
+      - Per-User OAuth
+    responses:
+      '200':
+        description: Protected resource metadata
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/oauth.yaml#/ProtectedResourceMetadata'
+            example:
+              resource: "https://your-bifrost-domain.com/mcp"
+              authorization_servers: ["https://your-bifrost-domain.com"]
+              scopes_supported: ["mcp:read", "mcp:write"]
+              bearer_methods_supported: ["header"]
+      '404':
+        description: No per-user OAuth MCP clients configured
+        content:
+          text/plain:
+            schema:
+              type: string
+
+oauth-authorization-server-metadata:
+  get:
+    operationId: getOAuthAuthorizationServerMetadata
+    summary: Authorization Server Metadata (RFC 8414)
+    description: |
+      Returns the OAuth 2.0 Authorization Server Metadata document per RFC 8414.
+
+      After fetching the Protected Resource Metadata, MCP clients fetch this endpoint
+      to discover Bifrost's OAuth endpoints (register, authorize, token) and capabilities
+      (PKCE methods, grant types, etc.).
+
+      Returns `404` when no MCP clients are configured with `auth_type: per_user_oauth`.
+    tags:
+      - OAuth
+      - Per-User OAuth
+    responses:
+      '200':
+        description: Authorization server metadata
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/oauth.yaml#/AuthorizationServerMetadata'
+            example:
+              issuer: "https://your-bifrost-domain.com"
+              authorization_endpoint: "https://your-bifrost-domain.com/api/oauth/per-user/authorize"
+              token_endpoint: "https://your-bifrost-domain.com/api/oauth/per-user/token"
+              registration_endpoint: "https://your-bifrost-domain.com/api/oauth/per-user/register"
+              response_types_supported: ["code"]
+              grant_types_supported: ["authorization_code"]
+              code_challenge_methods_supported: ["S256"]
+              token_endpoint_auth_methods_supported: ["none"]
+              scopes_supported: ["mcp:read", "mcp:write"]
+      '404':
+        description: No per-user OAuth MCP clients configured
+        content:
+          text/plain:
+            schema:
+              type: string
--- a/docs/openapi/paths/management/plugins.yaml
+++ b/docs/openapi/paths/management/plugins.yaml
@@ -0,0 +1,157 @@
+plugins:
+  get:
+    operationId: listPlugins
+    summary: List all plugins
+    description: |
+      Returns a list of all plugins with their configurations and status.
+      The `actualName` field contains the plugin name from `GetName()` (used as the map key),
+      while `name` contains the display name from the configuration.
+      The `types` array in the status shows which interfaces the plugin implements (llm, mcp, http).
+    tags:
+      - Plugins
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/plugins.yaml#/ListPluginsResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  post:
+    operationId: createPlugin
+    summary: Create a new plugin
+    description: Creates a new plugin with the specified configuration.
+    tags:
+      - Plugins
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/plugins.yaml#/CreatePluginRequest'
+    responses:
+      '201':
+        description: Plugin created successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/plugins.yaml#/PluginResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '409':
+        description: Plugin already exists
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+/plugins/{name}:
+  get:
+    operationId: getPlugin
+    summary: Get a specific plugin
+    description: |
+      Returns the configuration for a specific plugin.
+      The response includes the plugin status with types array showing which interfaces
+      the plugin implements (llm, mcp, http). The `actualName` field shows the plugin name
+      from GetName() (used as the map key), which may differ from the display name (`name`).
+    tags:
+      - Plugins
+    parameters:
+      - name: name
+        in: path
+        required: true
+        description: Plugin display name (the config field `name`, not the internal `actualName` from GetName())
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/plugins.yaml#/Plugin'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Plugin not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  put:
+    operationId: updatePlugin
+    summary: Update a plugin
+    description: |
+      Updates a plugin's configuration. Will reload or stop the plugin based on enabled status.
+      The response `actualName` field shows the plugin name from GetName() (used as the map key),
+      which may differ from the display name (`name`).
+    tags:
+      - Plugins
+    parameters:
+      - name: name
+        in: path
+        required: true
+        description: Plugin display name (the config field `name`, not the internal `actualName` from GetName())
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/plugins.yaml#/UpdatePluginRequest'
+    responses:
+      '200':
+        description: Plugin updated successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/plugins.yaml#/PluginResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Plugin not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: deletePlugin
+    summary: Delete a plugin
+    description: Removes a plugin from the configuration and stops it if running.
+    tags:
+      - Plugins
+    parameters:
+      - name: name
+        in: path
+        required: true
+        description: Plugin display name (the config field `name`, not the internal `actualName` from GetName())
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Plugin deleted successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Plugin not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
--- a/docs/openapi/paths/management/prompts.yaml
+++ b/docs/openapi/paths/management/prompts.yaml
@@ -0,0 +1,651 @@
+# Prompt Repository paths
+
+folders:
+  get:
+    operationId: listFolders
+    summary: List folders
+    description: Returns all prompt folders.
+    tags:
+      - Prompt Repository
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                folders:
+                  type: array
+                  items:
+                    $ref: '../../schemas/management/prompts.yaml#/Folder'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  post:
+    operationId: createFolder
+    summary: Create folder
+    description: Creates a new prompt folder.
+    tags:
+      - Prompt Repository
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/prompts.yaml#/CreateFolderRequest'
+    responses:
+      '200':
+        description: Folder created
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                folder:
+                  $ref: '../../schemas/management/prompts.yaml#/Folder'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+folders-by-id:
+  get:
+    operationId: getFolder
+    summary: Get folder
+    description: Returns a folder by ID.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                folder:
+                  $ref: '../../schemas/management/prompts.yaml#/Folder'
+      '404':
+        description: Folder not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  put:
+    operationId: updateFolder
+    summary: Update folder
+    description: Updates a folder's name or description.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/prompts.yaml#/UpdateFolderRequest'
+    responses:
+      '200':
+        description: Folder updated
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                folder:
+                  $ref: '../../schemas/management/prompts.yaml#/Folder'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Folder not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: deleteFolder
+    summary: Delete folder
+    description: Deletes a folder and cascades to contained prompts.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Folder deleted
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '404':
+        description: Folder not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+prompts:
+  get:
+    operationId: listPrompts
+    summary: List prompts
+    description: Returns all prompts, optionally filtered by folder.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: folder_id
+        in: query
+        description: Filter by folder ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                prompts:
+                  type: array
+                  items:
+                    $ref: '../../schemas/management/prompts.yaml#/Prompt'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  post:
+    operationId: createPrompt
+    summary: Create prompt
+    description: Creates a new prompt.
+    tags:
+      - Prompt Repository
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/prompts.yaml#/CreatePromptRequest'
+    responses:
+      '200':
+        description: Prompt created
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                prompt:
+                  $ref: '../../schemas/management/prompts.yaml#/Prompt'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+prompts-by-id:
+  get:
+    operationId: getPrompt
+    summary: Get prompt
+    description: Returns a prompt by ID with its latest version.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                prompt:
+                  $ref: '../../schemas/management/prompts.yaml#/Prompt'
+      '404':
+        description: Prompt not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  put:
+    operationId: updatePrompt
+    summary: Update prompt
+    description: Updates a prompt's name or folder.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/prompts.yaml#/UpdatePromptRequest'
+    responses:
+      '200':
+        description: Prompt updated
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                prompt:
+                  $ref: '../../schemas/management/prompts.yaml#/Prompt'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: deletePrompt
+    summary: Delete prompt
+    description: Deletes a prompt and all its versions and sessions.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Prompt deleted
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '404':
+        description: Prompt not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+prompt-versions:
+  get:
+    operationId: listPromptVersions
+    summary: List prompt versions
+    description: Returns all versions for a prompt.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Prompt ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                versions:
+                  type: array
+                  items:
+                    $ref: '../../schemas/management/prompts.yaml#/PromptVersion'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  post:
+    operationId: createPromptVersion
+    summary: Create prompt version
+    description: Creates a new version for a prompt.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Prompt ID
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/prompts.yaml#/CreateVersionRequest'
+    responses:
+      '200':
+        description: Version created
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                version:
+                  $ref: '../../schemas/management/prompts.yaml#/PromptVersion'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+versions-by-id:
+  get:
+    operationId: getPromptVersion
+    summary: Get prompt version
+    description: Returns a specific version by ID.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Version ID
+        schema:
+          type: integer
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                version:
+                  $ref: '../../schemas/management/prompts.yaml#/PromptVersion'
+      '404':
+        description: Version not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: deletePromptVersion
+    summary: Delete prompt version
+    description: Deletes a specific version.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Version ID
+        schema:
+          type: integer
+    responses:
+      '200':
+        description: Version deleted
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '404':
+        description: Version not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+prompt-sessions:
+  get:
+    operationId: listPromptSessions
+    summary: List prompt sessions
+    description: Returns all sessions for a prompt.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Prompt ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                sessions:
+                  type: array
+                  items:
+                    $ref: '../../schemas/management/prompts.yaml#/PromptSession'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  post:
+    operationId: createPromptSession
+    summary: Create prompt session
+    description: Creates a new playground session for a prompt.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Prompt ID
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/prompts.yaml#/CreateSessionRequest'
+    responses:
+      '200':
+        description: Session created
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                session:
+                  $ref: '../../schemas/management/prompts.yaml#/PromptSession'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+sessions-by-id:
+  get:
+    operationId: getPromptSession
+    summary: Get prompt session
+    description: Returns a specific session by ID.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Session ID
+        schema:
+          type: integer
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                session:
+                  $ref: '../../schemas/management/prompts.yaml#/PromptSession'
+      '404':
+        description: Session not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  put:
+    operationId: updatePromptSession
+    summary: Update prompt session
+    description: Updates a session's messages, model params, etc.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Session ID
+        schema:
+          type: integer
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/prompts.yaml#/UpdateSessionRequest'
+    responses:
+      '200':
+        description: Session updated
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                session:
+                  $ref: '../../schemas/management/prompts.yaml#/PromptSession'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: deletePromptSession
+    summary: Delete prompt session
+    description: Deletes a specific session.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Session ID
+        schema:
+          type: integer
+    responses:
+      '200':
+        description: Session deleted
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '404':
+        description: Session not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+sessions-rename:
+  put:
+    operationId: renamePromptSession
+    summary: Rename prompt session
+    description: Renames a session.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Session ID
+        schema:
+          type: integer
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/prompts.yaml#/RenameSessionRequest'
+    responses:
+      '200':
+        description: Session renamed
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                session:
+                  $ref: '../../schemas/management/prompts.yaml#/PromptSession'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+sessions-commit:
+  post:
+    operationId: commitPromptSession
+    summary: Commit session as version
+    description: Commits the current session state as a new prompt version.
+    tags:
+      - Prompt Repository
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Session ID
+        schema:
+          type: integer
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/prompts.yaml#/CommitSessionRequest'
+    responses:
+      '200':
+        description: Version created from session
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                version:
+                  $ref: '../../schemas/management/prompts.yaml#/PromptVersion'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
--- a/docs/openapi/paths/management/providers.yaml
+++ b/docs/openapi/paths/management/providers.yaml
@@ -0,0 +1,512 @@
+providers:
+  get:
+    operationId: listProviders
+    summary: List all providers
+    description: Returns a list of all configured providers with their configurations and status.
+    tags:
+      - Providers
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/ListProvidersResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  post:
+    operationId: addProvider
+    summary: Add a new provider
+    description: Adds a new provider with the specified configuration.
+    tags:
+      - Providers
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/providers.yaml#/AddProviderRequest'
+    responses:
+      '200':
+        description: Provider added successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/ProviderResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '409':
+        description: Provider already exists
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+providers-by-name:
+  get:
+    operationId: getProvider
+    summary: Get a specific provider
+    description: Returns the configuration for a specific provider.
+    tags:
+      - Providers
+    parameters:
+      - name: provider
+        in: path
+        required: true
+        description: Provider name
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/ProviderResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Provider not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  put:
+    operationId: updateProvider
+    summary: Update a provider
+    description: |
+      Updates a provider's configuration. Expects ALL fields to be provided,
+      including both edited and non-edited fields. Partial updates are not supported.
+    tags:
+      - Providers
+    parameters:
+      - name: provider
+        in: path
+        required: true
+        description: Provider name
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/providers.yaml#/UpdateProviderRequest'
+    responses:
+      '200':
+        description: Provider updated successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/ProviderResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: deleteProvider
+    summary: Delete a provider
+    description: Removes a provider from the configuration.
+    tags:
+      - Providers
+    parameters:
+      - name: provider
+        in: path
+        required: true
+        description: Provider name
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Provider deleted successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/ProviderResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Provider not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+provider-keys:
+  get:
+    operationId: listProviderKeys
+    summary: List keys for a provider
+    description: Returns all keys configured for a specific provider.
+    tags:
+      - Providers
+    parameters:
+      - name: provider
+        in: path
+        required: true
+        description: Provider name
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/ListProviderKeysResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Provider not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  post:
+    operationId: createProviderKey
+    summary: Create a key for a provider
+    description: |
+      Creates a new API key for the specified provider. The key `id` is auto-generated
+      if omitted. `enabled` defaults to `true` if omitted. `value` is required and must
+      not be empty. Keys cannot be created on keyless providers.
+    tags:
+      - Providers
+    parameters:
+      - name: provider
+        in: path
+        required: true
+        description: Provider name
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/providers.yaml#/Key'
+    responses:
+      '200':
+        description: Key created successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/Key'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Provider not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '409':
+        description: Key ID already exists
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+provider-key-by-id:
+  get:
+    operationId: getProviderKey
+    summary: Get a specific key for a provider
+    description: Returns a single key for the specified provider.
+    tags:
+      - Providers
+    parameters:
+      - name: provider
+        in: path
+        required: true
+        description: Provider name
+        schema:
+          type: string
+      - name: key_id
+        in: path
+        required: true
+        description: Key ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/Key'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Provider or key not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  put:
+    operationId: updateProviderKey
+    summary: Update a key for a provider
+    description: |
+      Updates an existing key. Send the full key object. Redacted values sent back
+      unchanged are automatically preserved (the server merges them with the stored
+      raw values).
+    tags:
+      - Providers
+    parameters:
+      - name: provider
+        in: path
+        required: true
+        description: Provider name
+        schema:
+          type: string
+      - name: key_id
+        in: path
+        required: true
+        description: Key ID
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/providers.yaml#/Key'
+    responses:
+      '200':
+        description: Key updated successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/Key'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Provider or key not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: deleteProviderKey
+    summary: Delete a key from a provider
+    description: Deletes a key from the specified provider. Returns the deleted key.
+    tags:
+      - Providers
+    parameters:
+      - name: provider
+        in: path
+        required: true
+        description: Provider name
+        schema:
+          type: string
+      - name: key_id
+        in: path
+        required: true
+        description: Key ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Key deleted successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/Key'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Provider or key not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+keys:
+  get:
+    operationId: listKeys
+    summary: List all keys
+    description: Returns a list of all configured API keys across all providers.
+    tags:
+      - Providers
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: array
+              items:
+                $ref: '../../schemas/management/providers.yaml#/Key'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+models:
+  get:
+    operationId: listModelsManagement
+    summary: List models
+    description: |
+      Lists available models with optional filtering by query, provider, or keys.
+    tags:
+      - Providers
+    parameters:
+      - name: query
+        in: query
+        description: Filter models by name (case-insensitive partial match)
+        schema:
+          type: string
+      - name: provider
+        in: query
+        description: Filter by specific provider name
+        schema:
+          type: string
+      - name: keys
+        in: query
+        description: Comma-separated list of key IDs to filter models accessible by those keys
+        style: form
+        explode: false
+        schema:
+          type: array
+          items:
+            type: string
+      - name: limit
+        in: query
+        description: Maximum number of results to return (default 5)
+        schema:
+          type: integer
+          default: 5
+      - name: unfiltered
+        in: query
+        description: If true, return all models including those filtered out by provider-level restrictions
+        schema:
+          type: boolean
+          default: false
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/ListModelsResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+models-details:
+  get:
+    operationId: listModelDetailsManagement
+    summary: List model details
+    description: |
+      Lists available models with capability metadata, when available from the model catalog, with optional filtering by query, provider, or keys.
+    tags:
+      - Providers
+    parameters:
+      - name: query
+        in: query
+        description: Filter models by name (case-insensitive partial match)
+        schema:
+          type: string
+      - name: provider
+        in: query
+        description: Filter by specific provider name
+        schema:
+          type: string
+      - name: keys
+        in: query
+        description: Comma-separated list of key IDs to filter models accessible by those keys
+        style: form
+        explode: false
+        schema:
+          type: array
+          items:
+            type: string
+      - name: limit
+        in: query
+        description: Maximum number of results to return (default 20)
+        schema:
+          type: integer
+          default: 20
+      - name: unfiltered
+        in: query
+        description: If true, return all models including those filtered out by provider-level restrictions
+        schema:
+          type: boolean
+          default: false
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/providers.yaml#/ListModelDetailsResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+models-parameters:
+  get:
+    operationId: getModelParameters
+    summary: Get model parameters
+    description: Returns the available parameter definitions for models.
+    tags:
+      - Providers
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: object
+              additionalProperties: true
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+models-base:
+  get:
+    operationId: listBaseModels
+    summary: List base models
+    description: Returns a list of base models from the model catalog.
+    tags:
+      - Providers
+    parameters:
+      - name: query
+        in: query
+        description: Filter models by name
+        schema:
+          type: string
+      - name: provider
+        in: query
+        description: Filter by provider
+        schema:
+          type: string
+      - name: limit
+        in: query
+        description: Maximum number of results to return
+        schema:
+          type: integer
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              type: object
+              additionalProperties: true
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
--- a/docs/openapi/paths/management/session.yaml
+++ b/docs/openapi/paths/management/session.yaml
@@ -0,0 +1,105 @@
+login:
+  post:
+    operationId: login
+    summary: Login
+    description: |
+      Authenticates a user and returns a session token.
+      Sets a cookie with the session token for subsequent requests.
+    tags:
+      - Session
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/session.yaml#/LoginRequest'
+    responses:
+      '200':
+        description: Login successful
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/session.yaml#/LoginResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '401':
+        description: Invalid credentials
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '403':
+        description: Authentication is not enabled
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+logout:
+  post:
+    operationId: logout
+    summary: Logout
+    description: Logs out the current user and invalidates the session token.
+    tags:
+      - Session
+    responses:
+      '200':
+        description: Logout successful
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/session.yaml#/LogoutResponse'
+      '403':
+        description: Authentication is not enabled
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+
+is-auth-enabled:
+  get:
+    operationId: isAuthEnabled
+    summary: Check if authentication is enabled
+    description: Returns whether authentication is enabled and if the current token is valid.
+    tags:
+      - Session
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/session.yaml#/IsAuthEnabledResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+ws-ticket:
+  post:
+    operationId: issueWSTicket
+    summary: Issue WebSocket ticket
+    description: |
+      Issues a short-lived ticket for authenticating WebSocket connections.
+      The ticket can be used as a query parameter when upgrading to WebSocket.
+    tags:
+      - Session
+    responses:
+      '200':
+        description: Ticket issued successfully
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                ticket:
+                  type: string
+                  description: Short-lived WebSocket authentication ticket
+      '403':
+        description: Authentication is not enabled
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/inference/common.yaml#/BifrostError'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
--- a/docs/openapi/paths/management/users.yaml
+++ b/docs/openapi/paths/management/users.yaml
@@ -0,0 +1,534 @@
+users:
+  get:
+    operationId: listUsers
+    summary: List users
+    description: Returns a paginated list of users with optional search.
+    tags:
+      - Users
+    parameters:
+      - name: page
+        in: query
+        description: Page number (1-based)
+        schema:
+          type: integer
+          minimum: 1
+          default: 1
+      - name: limit
+        in: query
+        description: Number of users per page (max 100)
+        schema:
+          type: integer
+          minimum: 1
+          maximum: 100
+          default: 20
+      - name: search
+        in: query
+        description: Search by name or email
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/users.yaml#/ListUsersResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  post:
+    operationId: createUser
+    summary: Create user
+    description: Manually creates a new user in the organization.
+    tags:
+      - Users
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/users.yaml#/CreateUserRequest'
+    responses:
+      '200':
+        description: User created successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/users.yaml#/UserResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '409':
+        description: User with this email already exists
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+users-by-id:
+  delete:
+    operationId: deleteUser
+    summary: Delete user
+    description: >
+      Permanently removes a user from the organization. This cascades to delete
+      the user's governance settings (budget/rate limits), team memberships,
+      access profiles, and OIDC sessions. Cannot delete yourself.
+    tags:
+      - Users
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: User ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: User deleted successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '400':
+        description: Bad request (e.g. cannot delete yourself)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '404':
+        description: User not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+users-me-permissions:
+  get:
+    operationId: getCurrentUserPermissions
+    summary: Get current user permissions
+    description: >
+      Returns the RBAC permissions for the authenticated user. When SCIM is not
+      enabled, returns full permissions for all resources. Otherwise returns the
+      permissions associated with the user's assigned role.
+    tags:
+      - Users
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/users.yaml#/PermissionsResponse'
+      '401':
+        description: Unauthorized (user not authenticated)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '404':
+        description: User not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+users-role:
+  put:
+    operationId: assignUserRole
+    summary: Assign role to user
+    description: >
+      Assigns an RBAC role to a user. This also auto-assigns the default
+      access profile for the new role and reloads the RBAC permission cache.
+    tags:
+      - Users
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: User ID
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/users.yaml#/AssignUserRoleRequest'
+    responses:
+      '200':
+        description: Role assigned successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: User or role not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+users-teams:
+  get:
+    operationId: getUserTeams
+    summary: Get user's teams
+    description: Returns the list of teams a user belongs to, including the membership source.
+    tags:
+      - Users
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: User ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/users.yaml#/UserTeamsResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: User not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  put:
+    operationId: updateUserTeams
+    summary: Update user's team assignments
+    description: >
+      Replaces the user's manual team assignments. Synced team memberships
+      (from SCIM providers) are preserved and cannot be removed via this endpoint.
+    tags:
+      - Users
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: User ID
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/users.yaml#/UpdateUserTeamsRequest'
+    responses:
+      '200':
+        description: Teams updated successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '400':
+        description: Bad request (e.g. team not found)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '404':
+        description: User not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+# ---- Teams ----
+
+teams:
+  get:
+    operationId: listTeams
+    summary: List teams
+    description: Returns a paginated list of teams with optional search.
+    tags:
+      - Teams
+    parameters:
+      - name: page
+        in: query
+        description: Page number (1-based)
+        schema:
+          type: integer
+          minimum: 1
+          default: 1
+      - name: limit
+        in: query
+        description: Number of teams per page (max 100)
+        schema:
+          type: integer
+          minimum: 1
+          maximum: 100
+          default: 20
+      - name: search
+        in: query
+        description: Search by team name
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/users.yaml#/ListTeamsResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  post:
+    operationId: createTeam
+    summary: Create team
+    description: Creates a new team. The team ID is derived from the name.
+    tags:
+      - Teams
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/users.yaml#/CreateTeamRequest'
+    responses:
+      '200':
+        description: Team created successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/users.yaml#/CreateTeamResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '409':
+        description: Team with this name already exists
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+teams-by-id:
+  get:
+    operationId: getTeam
+    summary: Get team
+    description: Returns details of a specific team including member count.
+    tags:
+      - Teams
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Team ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/users.yaml#/TeamObject'
+      '404':
+        description: Team not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  put:
+    operationId: updateTeam
+    summary: Update team
+    description: Updates a team. Note that renaming teams is not allowed.
+    tags:
+      - Teams
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Team ID
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/users.yaml#/UpdateTeamRequest'
+    responses:
+      '200':
+        description: Team updated successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/users.yaml#/CreateTeamResponse'
+      '400':
+        description: Bad request (e.g. renaming not allowed)
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '404':
+        description: Team not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  delete:
+    operationId: deleteTeam
+    summary: Delete team
+    description: Permanently removes a team.
+    tags:
+      - Teams
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Team ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Team deleted successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '404':
+        description: Team not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+# ---- Team Members ----
+
+team-members:
+  get:
+    operationId: getTeamMembers
+    summary: List team members
+    description: Returns all members of a team with their user details and membership source.
+    tags:
+      - Teams
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Team ID
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Successful response
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/users.yaml#/TeamMembersResponse'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '404':
+        description: Team not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+  post:
+    operationId: addTeamMember
+    summary: Add team member
+    description: Adds a user to a team. Both the team and user must exist.
+    tags:
+      - Teams
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Team ID
+        schema:
+          type: string
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/users.yaml#/AddTeamMemberRequest'
+    responses:
+      '200':
+        description: Member added successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '404':
+        description: Team or user not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '409':
+        description: User is already a member of this team
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+team-member-by-id:
+  delete:
+    operationId: removeTeamMember
+    summary: Remove team member
+    description: Removes a user from a team.
+    tags:
+      - Teams
+    parameters:
+      - name: id
+        in: path
+        required: true
+        description: Team ID
+        schema:
+          type: string
+      - name: userId
+        in: path
+        required: true
+        description: User ID to remove
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Member removed successfully
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/MessageResponse'
+      '404':
+        description: Membership not found
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/common.yaml#/ErrorResponse'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
--- a/docs/openapi/schemas/inference/async.yaml
+++ b/docs/openapi/schemas/inference/async.yaml
@@ -0,0 +1,43 @@
+# Async Job schemas
+
+AsyncJobStatus:
+  type: string
+  description: The status of an async job
+  enum:
+    - pending
+    - processing
+    - completed
+    - failed
+
+AsyncJobResponse:
+  type: object
+  description: Response returned when creating or polling an async job
+  required:
+    - id
+    - status
+    - created_at
+  properties:
+    id:
+      type: string
+      description: Unique identifier for the async job
+    status:
+      $ref: '#/AsyncJobStatus'
+    expires_at:
+      type: string
+      format: date-time
+      description: When the job result expires and will be cleaned up
+    created_at:
+      type: string
+      format: date-time
+      description: When the job was created
+    completed_at:
+      type: string
+      format: date-time
+      description: When the job completed (successfully or with failure)
+    status_code:
+      type: integer
+      description: HTTP status code of the completed operation
+    result:
+      description: The result of the completed operation (shape depends on the request type)
+    error:
+      $ref: './common.yaml#/BifrostError'
--- a/docs/openapi/schemas/inference/batch.yaml
+++ b/docs/openapi/schemas/inference/batch.yaml
@@ -0,0 +1,309 @@
+# Batch API schemas
+
+BatchStatus:
+  type: string
+  enum:
+    - validating
+    - failed
+    - in_progress
+    - finalizing
+    - completed
+    - expired
+    - cancelling
+    - canceled
+    - ended
+
+BatchEndpoint:
+  type: string
+  enum:
+    - /v1/chat/completions
+    - /v1/embeddings
+    - /v1/completions
+    - /v1/responses
+    - /v1/messages
+
+BatchCreateRequest:
+  type: object
+  required:
+    - model
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    input_file_id:
+      type: string
+      description: OpenAI-style file ID
+    requests:
+      type: array
+      items:
+        $ref: '#/BatchRequestItem'
+      description: Anthropic-style inline requests
+    endpoint:
+      $ref: '#/BatchEndpoint'
+    completion_window:
+      type: string
+      description: e.g., "24h"
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+
+BatchRequestItem:
+  type: object
+  required:
+    - custom_id
+  properties:
+    custom_id:
+      type: string
+    method:
+      type: string
+    url:
+      type: string
+    body:
+      type: object
+    params:
+      type: object
+
+BatchCreateResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    endpoint:
+      type: string
+    input_file_id:
+      type: string
+    completion_window:
+      type: string
+    status:
+      $ref: '#/BatchStatus'
+    request_counts:
+      $ref: '#/BatchRequestCounts'
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+    created_at:
+      type: integer
+      format: int64
+    expires_at:
+      type: integer
+      format: int64
+    output_file_id:
+      type: string
+    error_file_id:
+      type: string
+    processing_status:
+      type: string
+    results_url:
+      type: string
+    operation_name:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+BatchRequestCounts:
+  type: object
+  properties:
+    total:
+      type: integer
+    completed:
+      type: integer
+    failed:
+      type: integer
+    succeeded:
+      type: integer
+    expired:
+      type: integer
+    canceled:
+      type: integer
+    pending:
+      type: integer
+
+BatchListResponse:
+  type: object
+  properties:
+    object:
+      type: string
+    data:
+      type: array
+      items:
+        $ref: '#/BatchRetrieveResponse'
+    first_id:
+      type: string
+    last_id:
+      type: string
+    has_more:
+      type: boolean
+    next_cursor:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+BatchRetrieveResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    endpoint:
+      type: string
+    input_file_id:
+      type: string
+    completion_window:
+      type: string
+    status:
+      $ref: '#/BatchStatus'
+    request_counts:
+      $ref: '#/BatchRequestCounts'
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+    created_at:
+      type: integer
+      format: int64
+    expires_at:
+      type: integer
+      format: int64
+    in_progress_at:
+      type: integer
+      format: int64
+    finalizing_at:
+      type: integer
+      format: int64
+    completed_at:
+      type: integer
+      format: int64
+    failed_at:
+      type: integer
+      format: int64
+    expired_at:
+      type: integer
+      format: int64
+    cancelling_at:
+      type: integer
+      format: int64
+    cancelled_at:
+      type: integer
+      format: int64
+    output_file_id:
+      type: string
+    error_file_id:
+      type: string
+    errors:
+      $ref: '#/BatchErrors'
+    processing_status:
+      type: string
+    results_url:
+      type: string
+    archived_at:
+      type: integer
+      format: int64
+    operation_name:
+      type: string
+    done:
+      type: boolean
+    progress:
+      type: integer
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+BatchErrors:
+  type: object
+  properties:
+    object:
+      type: string
+    data:
+      type: array
+      items:
+        $ref: '#/BatchError'
+
+BatchError:
+  type: object
+  properties:
+    code:
+      type: string
+    message:
+      type: string
+    param:
+      type: string
+    line:
+      type: integer
+
+BatchCancelResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    status:
+      $ref: '#/BatchStatus'
+    request_counts:
+      $ref: '#/BatchRequestCounts'
+    cancelling_at:
+      type: integer
+      format: int64
+    cancelled_at:
+      type: integer
+      format: int64
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+BatchResultsResponse:
+  type: object
+  properties:
+    batch_id:
+      type: string
+    results:
+      type: array
+      items:
+        $ref: '#/BatchResultItem'
+    has_more:
+      type: boolean
+    next_cursor:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+BatchResultItem:
+  type: object
+  properties:
+    custom_id:
+      type: string
+    response:
+      $ref: '#/BatchResultResponse'
+    result:
+      $ref: '#/BatchResultData'
+    error:
+      $ref: '#/BatchResultError'
+
+BatchResultResponse:
+  type: object
+  properties:
+    status_code:
+      type: integer
+    request_id:
+      type: string
+    body:
+      type: object
+
+BatchResultData:
+  type: object
+  properties:
+    type:
+      type: string
+    message:
+      type: object
+
+BatchResultError:
+  type: object
+  properties:
+    code:
+      type: string
+    message:
+      type: string
--- a/docs/openapi/schemas/inference/chat.yaml
+++ b/docs/openapi/schemas/inference/chat.yaml
@@ -0,0 +1,673 @@
+# Chat Completions API schemas
+
+ChatCompletionRequest:
+  type: object
+  required:
+    - model
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format (e.g., openai/gpt-4)
+      example: openai/gpt-4
+    messages:
+      type: array
+      items:
+        $ref: '#/ChatMessage'
+      description: List of messages in the conversation
+    fallbacks:
+      type: array
+      items:
+        type: string
+      description: Fallback models in provider/model format
+    stream:
+      type: boolean
+      description: Whether to stream the response
+    frequency_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    logit_bias:
+      type: object
+      additionalProperties:
+        type: number
+    logprobs:
+      type: boolean
+    max_completion_tokens:
+      type: integer
+    metadata:
+      type: object
+      additionalProperties: true
+    modalities:
+      type: array
+      items:
+        type: string
+    parallel_tool_calls:
+      type: boolean
+    presence_penalty:
+      type: number
+      minimum: -2.0
+      maximum: 2.0
+    prompt_cache_key:
+      type: string
+    reasoning:
+      $ref: '#/ChatReasoning'
+    response_format:
+      type: object
+      description: Format for the response
+    safety_identifier:
+      type: string
+    service_tier:
+      type: string
+    stream_options:
+      $ref: '#/ChatStreamOptions'
+    store:
+      type: boolean
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 2
+    tool_choice:
+      $ref: '#/ChatToolChoice'
+    tools:
+      type: array
+      items:
+        $ref: '#/ChatTool'
+    seed:
+      type: integer
+      description: Deterministic sampling seed
+    top_p:
+      type: number
+      minimum: 0
+      maximum: 1
+      description: Nucleus sampling parameter
+    top_logprobs:
+      type: integer
+      minimum: 0
+      maximum: 20
+      description: Number of most likely tokens to return at each position
+    stop:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+      description: Up to 4 sequences where the API will stop generating tokens
+    prediction:
+      $ref: '#/ChatPrediction'
+    prompt_cache_retention:
+      type: string
+      enum: [in-memory, 24h]
+      description: Prompt cache retention policy
+    web_search_options:
+      $ref: '#/ChatWebSearchOptions'
+    truncation:
+      type: string
+    user:
+      type: string
+    verbosity:
+      type: string
+      enum: [low, medium, high]
+
+ChatMessage:
+  type: object
+  required:
+    - role
+  properties:
+    role:
+      $ref: '#/ChatMessageRole'
+    name:
+      type: string
+    content:
+      $ref: '#/ChatMessageContent'
+    tool_call_id:
+      type: string
+      description: For tool messages
+    refusal:
+      type: string
+    audio:
+      $ref: '#/ChatAudioMessageAudio'
+    reasoning:
+      type: string
+    reasoning_details:
+      type: array
+      items:
+        $ref: '#/ChatReasoningDetails'
+    annotations:
+      type: array
+      items:
+        $ref: '#/ChatAssistantMessageAnnotation'
+    tool_calls:
+      type: array
+      items:
+        $ref: '#/ChatAssistantMessageToolCall'
+
+ChatMessageRole:
+  type: string
+  enum:
+    - assistant
+    - user
+    - system
+    - tool
+    - developer
+
+ChatMessageContent:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/ChatContentBlock'
+  description: Message content - can be a string or array of content blocks
+
+ChatContentBlock:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [text, image_url, input_audio, file, refusal]
+    text:
+      type: string
+    refusal:
+      type: string
+    image_url:
+      $ref: '#/ChatInputImage'
+    input_audio:
+      $ref: '#/ChatInputAudio'
+    file:
+      $ref: '#/ChatInputFile'
+    cache_control:
+      $ref: './common.yaml#/CacheControl'
+
+ChatInputImage:
+  type: object
+  required:
+    - url
+  properties:
+    url:
+      type: string
+    detail:
+      type: string
+      enum: [low, high, auto]
+
+ChatInputAudio:
+  type: object
+  required:
+    - data
+  properties:
+    data:
+      type: string
+    format:
+      type: string
+
+ChatInputFile:
+  type: object
+  properties:
+    file_data:
+      type: string
+    file_id:
+      type: string
+    filename:
+      type: string
+    file_type:
+      type: string
+
+ChatReasoning:
+  type: object
+  properties:
+    effort:
+      type: string
+      description: Reasoning effort level
+      enum: [none, minimal, low, medium, high, xhigh]
+    max_tokens:
+      type: integer
+
+ChatStreamOptions:
+  type: object
+  properties:
+    include_obfuscation:
+      type: boolean
+    include_usage:
+      type: boolean
+
+ChatToolChoice:
+  oneOf:
+    - type: string
+      enum: [none, auto, required]
+    - $ref: '#/ChatToolChoiceStruct'
+
+ChatToolChoiceStruct:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [none, any, required, function, allowed_tools, custom]
+    function:
+      $ref: '#/ChatToolChoiceFunction'
+    allowed_tools:
+      $ref: '#/ChatToolChoiceAllowedTools'
+
+ChatToolChoiceFunction:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+
+ChatToolChoiceAllowedTools:
+  type: object
+  properties:
+    mode:
+      type: string
+      enum: [auto, required]
+    tools:
+      type: array
+      items:
+        $ref: '#/ChatToolChoiceAllowedToolsTool'
+
+ChatToolChoiceAllowedToolsTool:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+    function:
+      $ref: '#/ChatToolChoiceFunction'
+
+ChatTool:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [function, custom]
+    function:
+      $ref: '#/ChatToolFunction'
+    custom:
+      $ref: '#/ChatToolCustom'
+    cache_control:
+      $ref: './common.yaml#/CacheControl'
+
+ChatToolFunction:
+  type: object
+  required:
+    - name
+  properties:
+    name:
+      type: string
+    description:
+      type: string
+    parameters:
+      $ref: '#/ToolFunctionParameters'
+    strict:
+      type: boolean
+
+ToolFunctionParameters:
+  type: object
+  properties:
+    type:
+      type: string
+    description:
+      type: string
+    required:
+      type: array
+      items:
+        type: string
+    properties:
+      type: object
+      additionalProperties: true
+    enum:
+      type: array
+      items:
+        type: string
+    additionalProperties:
+      type: boolean
+
+ChatToolCustom:
+  type: object
+  properties:
+    format:
+      $ref: '#/ChatToolCustomFormat'
+
+ChatToolCustomFormat:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+    grammar:
+      $ref: '#/ChatToolCustomGrammarFormat'
+
+ChatToolCustomGrammarFormat:
+  type: object
+  required:
+    - definition
+    - syntax
+  properties:
+    definition:
+      type: string
+    syntax:
+      type: string
+      enum: [lark, regex]
+
+ChatReasoningDetails:
+  type: object
+  properties:
+    id:
+      type: string
+    index:
+      type: integer
+    type:
+      type: string
+      enum: [reasoning.summary, reasoning.encrypted, reasoning.text]
+    summary:
+      type: string
+    text:
+      type: string
+    signature:
+      type: string
+    data:
+      type: string
+
+ChatAssistantMessageAnnotation:
+  type: object
+  properties:
+    type:
+      type: string
+    url_citation:
+      $ref: '#/ChatAssistantMessageAnnotationCitation'
+
+ChatAssistantMessageAnnotationCitation:
+  type: object
+  properties:
+    start_index:
+      type: integer
+    end_index:
+      type: integer
+    title:
+      type: string
+    url:
+      type: string
+    sources:
+      type: object
+    type:
+      type: string
+
+ChatAssistantMessageToolCall:
+  type: object
+  required:
+    - function
+  properties:
+    index:
+      type: integer
+    type:
+      type: string
+    id:
+      type: string
+    function:
+      $ref: '#/ChatAssistantMessageToolCallFunction'
+
+ChatAssistantMessageToolCallFunction:
+  type: object
+  properties:
+    name:
+      type: string
+    arguments:
+      type: string
+
+ChatAudioMessageAudio:
+  type: object
+  properties:
+    id:
+      type: string
+    data:
+      type: string
+    expires_at:
+      type: integer
+    transcript:
+      type: string
+
+ChatCompletionResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    choices:
+      type: array
+      items:
+        $ref: '#/BifrostResponseChoice'
+    created:
+      type: integer
+    model:
+      type: string
+    object:
+      type: string
+    service_tier:
+      type: string
+    system_fingerprint:
+      type: string
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+    search_results:
+      type: array
+      items:
+        $ref: '#/PerplexitySearchResult'
+    videos:
+      type: array
+      items:
+        $ref: '#/VideoResult'
+    citations:
+      type: array
+      items:
+        type: string
+
+BifrostResponseChoice:
+  type: object
+  properties:
+    index:
+      type: integer
+    finish_reason:
+      type: string
+    log_probs:
+      $ref: '#/BifrostLogProbs'
+    text:
+      type: string
+      description: For text completions
+    message:
+      $ref: '#/ChatMessage'
+      description: For non-streaming chat completions
+    delta:
+      $ref: '#/ChatStreamResponseChoiceDelta'
+      description: For streaming chat completions
+
+BifrostLogProbs:
+  type: object
+  properties:
+    content:
+      type: array
+      items:
+        $ref: '#/ContentLogProb'
+    refusal:
+      type: array
+      items:
+        $ref: '#/LogProb'
+    text_offset:
+      type: array
+      items:
+        type: integer
+    token_logprobs:
+      type: array
+      items:
+        type: number
+    tokens:
+      type: array
+      items:
+        type: string
+    top_logprobs:
+      type: array
+      items:
+        type: object
+        additionalProperties:
+          type: number
+
+ContentLogProb:
+  type: object
+  properties:
+    bytes:
+      type: array
+      items:
+        type: integer
+    logprob:
+      type: number
+    token:
+      type: string
+    top_logprobs:
+      type: array
+      items:
+        $ref: '#/LogProb'
+
+LogProb:
+  type: object
+  properties:
+    bytes:
+      type: array
+      items:
+        type: integer
+    logprob:
+      type: number
+    token:
+      type: string
+
+ChatStreamResponseChoiceDelta:
+  type: object
+  properties:
+    role:
+      type: string
+    content:
+      type: string
+    refusal:
+      type: string
+    audio:
+      $ref: '#/ChatAudioMessageAudio'
+    reasoning:
+      type: string
+    reasoning_details:
+      type: array
+      items:
+        $ref: '#/ChatReasoningDetails'
+    tool_calls:
+      type: array
+      items:
+        $ref: '#/ChatAssistantMessageToolCall'
+
+ChatCompletionStreamResponse:
+  type: object
+  description: Streaming chat completion response (SSE format)
+  properties:
+    id:
+      type: string
+    choices:
+      type: array
+      items:
+        $ref: '#/BifrostResponseChoice'
+    created:
+      type: integer
+    model:
+      type: string
+    object:
+      type: string
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+PerplexitySearchResult:
+  type: object
+  description: Search result from Perplexity AI search
+  properties:
+    title:
+      type: string
+    url:
+      type: string
+    date:
+      type: string
+    last_updated:
+      type: string
+    snippet:
+      type: string
+    source:
+      type: string
+
+VideoResult:
+  type: object
+  properties:
+    url:
+      type: string
+    thumbnail_url:
+      type: string
+    thumbnail_width:
+      type: integer
+    thumbnail_height:
+      type: integer
+    duration:
+      type: number
+
+ChatPrediction:
+  type: object
+  description: Predicted output content for the model to reference (OpenAI only). Can reduce latency.
+  properties:
+    type:
+      type: string
+      description: Always "content"
+    content:
+      description: Predicted content (string or array of content parts)
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: object
+            additionalProperties: true
+
+ChatWebSearchOptions:
+  type: object
+  description: Web search options for chat completions (OpenAI only)
+  properties:
+    search_context_size:
+      type: string
+      enum: [low, medium, high]
+      description: Amount of search context to include
+    user_location:
+      $ref: '#/ChatWebSearchOptionsUserLocation'
+
+ChatWebSearchOptionsUserLocation:
+  type: object
+  properties:
+    type:
+      type: string
+      description: Location type (e.g., "approximate")
+    approximate:
+      $ref: '#/ChatWebSearchOptionsUserLocationApproximate'
+
+ChatWebSearchOptionsUserLocationApproximate:
+  type: object
+  properties:
+    city:
+      type: string
+    country:
+      type: string
+      description: Two-letter ISO country code (e.g., "US")
+    region:
+      type: string
+      description: Region or state (e.g., "California")
+    timezone:
+      type: string
+      description: IANA timezone (e.g., "America/Los_Angeles")
--- a/docs/openapi/schemas/inference/common.yaml
+++ b/docs/openapi/schemas/inference/common.yaml
@@ -0,0 +1,149 @@
+# Common schemas used across the API
+
+ModelProvider:
+  type: string
+  description: AI model provider identifier
+  enum:
+    - openai
+    - azure
+    - anthropic
+    - bedrock
+    - cohere
+    - vertex
+    - vllm
+    - mistral
+    - ollama
+    - groq
+    - sgl
+    - parasail
+    - perplexity
+    - replicate
+    - cerebras
+    - gemini
+    - openrouter
+    - elevenlabs
+    - huggingface
+    - nebius
+    - xai
+    - runway
+    - fireworks
+
+Fallback:
+  type: object
+  description: Fallback model configuration
+  required:
+    - provider
+    - model
+  properties:
+    provider:
+      $ref: '#/ModelProvider'
+    model:
+      type: string
+      description: Model name
+
+BifrostError:
+  type: object
+  description: Error response from Bifrost
+  properties:
+    event_id:
+      type: string
+    type:
+      type: string
+    is_bifrost_error:
+      type: boolean
+    status_code:
+      type: integer
+    error:
+      $ref: '#/ErrorField'
+    extra_fields:
+      $ref: '#/BifrostErrorExtraFields'
+
+ErrorField:
+  type: object
+  properties:
+    type:
+      type: string
+    code:
+      type: string
+    message:
+      type: string
+    param:
+      type: string
+    event_id:
+      type: string
+
+BifrostErrorExtraFields:
+  type: object
+  properties:
+    provider:
+      $ref: '#/ModelProvider'
+    model_requested:
+      type: string
+    request_type:
+      type: string
+
+BifrostResponseExtraFields:
+  type: object
+  description: Additional fields included in responses
+  properties:
+    request_type:
+      type: string
+      description: Type of request that was made
+    provider:
+      $ref: '#/ModelProvider'
+    model_requested:
+      type: string
+      description: The model that was requested
+    model_deployment:
+      type: string
+      description: The actual model deployment used
+    latency:
+      type: integer
+      format: int64
+      description: Request latency in milliseconds
+    chunk_index:
+      type: integer
+      description: Index of the chunk for streaming responses
+    raw_request:
+      type: object
+      description: Raw request if enabled
+    raw_response:
+      type: object
+      description: Raw response if enabled
+    cache_debug:
+      $ref: '#/BifrostCacheDebug'
+
+BifrostCacheDebug:
+  type: object
+  properties:
+    cache_hit:
+      type: boolean
+    cache_id:
+      type: string
+    hit_type:
+      type: string
+    requested_provider:
+      type: string
+    requested_model:
+      type: string
+    provider_used:
+      type: string
+    model_used:
+      type: string
+    input_tokens:
+      type: integer
+    threshold:
+      type: number
+    similarity:
+      type: number
+
+CacheControl:
+  type: object
+  description: Cache control settings for content blocks
+  properties:
+    type:
+      type: string
+      enum: [ephemeral]
+    ttl:
+      type: string
+      description: Time to live (e.g., "1m", "1h")
--- a/docs/openapi/schemas/inference/containers.yaml
+++ b/docs/openapi/schemas/inference/containers.yaml
@@ -0,0 +1,344 @@
+# Containers API schemas
+
+ContainerStatus:
+  type: string
+  enum:
+    - running
+  description: The status of a container
+
+ContainerExpiresAfter:
+  type: object
+  description: Expiration configuration for a container
+  properties:
+    anchor:
+      type: string
+      description: The anchor point for expiration (e.g., "last_active_at")
+    minutes:
+      type: integer
+      description: Number of minutes after anchor point
+
+ContainerObject:
+  type: object
+  description: A container object
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the container
+    object:
+      type: string
+      description: The object type (always "container")
+    name:
+      type: string
+      description: The name of the container
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the container was created
+    status:
+      $ref: '#/ContainerStatus'
+    expires_after:
+      $ref: '#/ContainerExpiresAfter'
+    last_active_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of last activity
+    memory_limit:
+      type: string
+      description: Memory limit for the container (e.g., "1g", "4g")
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+      description: User-provided metadata
+
+ContainerCreateRequest:
+  type: object
+  required:
+    - provider
+    - name
+  properties:
+    provider:
+      $ref: './common.yaml#/ModelProvider'
+    name:
+      type: string
+      description: Name of the container
+    expires_after:
+      $ref: '#/ContainerExpiresAfter'
+    file_ids:
+      type: array
+      items:
+        type: string
+      description: IDs of existing files to copy into this container
+    memory_limit:
+      type: string
+      description: Memory limit for the container (e.g., "1g", "4g")
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+      description: User-provided metadata
+
+ContainerCreateResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the created container
+    object:
+      type: string
+      description: The object type (always "container")
+    name:
+      type: string
+      description: The name of the container
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the container was created
+    status:
+      $ref: '#/ContainerStatus'
+    expires_after:
+      $ref: '#/ContainerExpiresAfter'
+    last_active_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of last activity
+    memory_limit:
+      type: string
+      description: Memory limit for the container
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+      description: User-provided metadata
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerListResponse:
+  type: object
+  properties:
+    object:
+      type: string
+      description: The object type (always "list")
+    data:
+      type: array
+      items:
+        $ref: '#/ContainerObject'
+      description: List of container objects
+    first_id:
+      type: string
+      description: ID of the first container in the list
+    last_id:
+      type: string
+      description: ID of the last container in the list
+    has_more:
+      type: boolean
+      description: Whether there are more containers to fetch
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerRetrieveResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the container
+    object:
+      type: string
+      description: The object type (always "container")
+    name:
+      type: string
+      description: The name of the container
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the container was created
+    status:
+      $ref: '#/ContainerStatus'
+    expires_after:
+      $ref: '#/ContainerExpiresAfter'
+    last_active_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of last activity
+    memory_limit:
+      type: string
+      description: Memory limit for the container
+    metadata:
+      type: object
+      additionalProperties:
+        type: string
+      description: User-provided metadata
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerDeleteResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: The ID of the deleted container
+    object:
+      type: string
+      description: The object type (always "container.deleted")
+    deleted:
+      type: boolean
+      description: Whether the container was successfully deleted
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+# =============================================================================
+# CONTAINER FILES SCHEMAS
+# =============================================================================
+
+ContainerFileObject:
+  type: object
+  description: A file object within a container
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the file
+    object:
+      type: string
+      description: The object type (always "container.file")
+    container_id:
+      type: string
+      description: The ID of the container this file belongs to
+    path:
+      type: string
+      description: The path of the file within the container
+    bytes:
+      type: integer
+      format: int64
+      description: The size of the file in bytes
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the file was created
+    source:
+      type: string
+      description: The source of the file (e.g., "user_upload", "copied")
+
+ContainerFileCreateMultipartRequest:
+  type: object
+  description: Request to create a file in a container via multipart upload
+  properties:
+    file:
+      type: string
+      format: binary
+      description: The file content to upload
+    file_path:
+      type: string
+      description: Optional path for the file within the container
+
+ContainerFileCreateJsonRequest:
+  type: object
+  description: Request to create a file in a container by referencing an existing file
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+      description: The ID of an existing file to copy into the container
+    file_path:
+      type: string
+      description: Optional path for the file within the container
+
+ContainerFileCreateResponse:
+  type: object
+  description: Response from creating a file in a container
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the created file
+    object:
+      type: string
+      description: The object type (always "container.file")
+    container_id:
+      type: string
+      description: The ID of the container this file belongs to
+    path:
+      type: string
+      description: The path of the file within the container
+    bytes:
+      type: integer
+      format: int64
+      description: The size of the file in bytes
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the file was created
+    source:
+      type: string
+      description: The source of the file
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerFileListResponse:
+  type: object
+  description: Response containing a list of files in a container
+  properties:
+    object:
+      type: string
+      description: The object type (always "list")
+    data:
+      type: array
+      items:
+        $ref: '#/ContainerFileObject'
+      description: List of file objects
+    first_id:
+      type: string
+      description: ID of the first file in the list
+    last_id:
+      type: string
+      description: ID of the last file in the list
+    has_more:
+      type: boolean
+      description: Whether there are more files to fetch
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerFileRetrieveResponse:
+  type: object
+  description: Response from retrieving a file from a container
+  properties:
+    id:
+      type: string
+      description: The unique identifier for the file
+    object:
+      type: string
+      description: The object type (always "container.file")
+    container_id:
+      type: string
+      description: The ID of the container this file belongs to
+    path:
+      type: string
+      description: The path of the file within the container
+    bytes:
+      type: integer
+      format: int64
+      description: The size of the file in bytes
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp of when the file was created
+    source:
+      type: string
+      description: The source of the file
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ContainerFileDeleteResponse:
+  type: object
+  description: Response from deleting a file from a container
+  properties:
+    id:
+      type: string
+      description: The ID of the deleted file
+    object:
+      type: string
+      description: The object type (always "container.file.deleted")
+    deleted:
+      type: boolean
+      description: Whether the file was successfully deleted
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/count-tokens.yaml
+++ b/docs/openapi/schemas/inference/count-tokens.yaml
@@ -0,0 +1,53 @@
+# Count Tokens API schemas
+
+CountTokensRequest:
+  type: object
+  required:
+    - model
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    messages:
+      type: array
+      items:
+        $ref: './responses.yaml#/ResponsesMessage'
+    fallbacks:
+      type: array
+      items:
+        type: string
+    tools:
+      type: array
+      items:
+        $ref: './responses.yaml#/ResponsesTool'
+    instructions:
+      type: string
+    text:
+      type: string
+
+CountTokensResponse:
+  type: object
+  properties:
+    object:
+      type: string
+    model:
+      type: string
+    input_tokens:
+      type: integer
+    input_tokens_details:
+      $ref: './responses.yaml#/ResponsesResponseInputTokens'
+    tokens:
+      type: array
+      items:
+        type: integer
+    token_strings:
+      type: array
+      items:
+        type: string
+    output_tokens:
+      type: integer
+    total_tokens:
+      type: integer
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/embeddings.yaml
+++ b/docs/openapi/schemas/inference/embeddings.yaml
@@ -0,0 +1,76 @@
+# Embeddings API schemas
+
+EmbeddingRequest:
+  type: object
+  required:
+    - model
+    - input
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    input:
+      $ref: '#/EmbeddingInput'
+    fallbacks:
+      type: array
+      items:
+        type: string
+    encoding_format:
+      type: string
+      enum: [float, base64]
+    dimensions:
+      type: integer
+
+EmbeddingInput:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        type: string
+    - type: array
+      items:
+        type: integer
+    - type: array
+      items:
+        type: array
+        items:
+          type: integer
+  description: Input for embedding - text or token arrays
+
+EmbeddingResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/EmbeddingData'
+    model:
+      type: string
+    object:
+      type: string
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+EmbeddingData:
+  type: object
+  properties:
+    index:
+      type: integer
+    object:
+      type: string
+    embedding:
+      $ref: '#/EmbeddingStruct'
+
+EmbeddingStruct:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        type: number
+    - type: array
+      items:
+        type: array
+        items:
+          type: number
--- a/docs/openapi/schemas/inference/files.yaml
+++ b/docs/openapi/schemas/inference/files.yaml
@@ -0,0 +1,188 @@
+# Files API schemas
+
+S3StorageConfig:
+  type: object
+  description: AWS S3 storage configuration
+  properties:
+    bucket:
+      type: string
+      description: S3 bucket name
+    region:
+      type: string
+      description: AWS region
+    prefix:
+      type: string
+      description: Path prefix for stored files
+
+GCSStorageConfig:
+  type: object
+  description: Google Cloud Storage configuration
+  properties:
+    bucket:
+      type: string
+      description: GCS bucket name
+    project:
+      type: string
+      description: GCP project ID
+    prefix:
+      type: string
+      description: Path prefix for stored files
+
+FileStorageConfig:
+  type: object
+  description: Storage configuration for cloud storage backends
+  properties:
+    s3:
+      $ref: '#/S3StorageConfig'
+    gcs:
+      $ref: '#/GCSStorageConfig'
+
+FilePurpose:
+  type: string
+  enum:
+    - batch
+    - assistants
+    - fine-tune
+    - vision
+    - batch_output
+    - user_data
+    - responses
+    - evals
+
+FileStatus:
+  type: string
+  enum:
+    - uploaded
+    - processed
+    - processing
+    - error
+    - deleted
+
+FileUploadRequest:
+  type: object
+  required:
+    - file
+    - purpose
+  properties:
+    file:
+      type: string
+      format: binary
+    purpose:
+      $ref: '#/FilePurpose'
+    provider:
+      $ref: './common.yaml#/ModelProvider'
+
+FileUploadResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    bytes:
+      type: integer
+      format: int64
+    created_at:
+      type: integer
+      format: int64
+    filename:
+      type: string
+    purpose:
+      $ref: '#/FilePurpose'
+    status:
+      $ref: '#/FileStatus'
+    status_details:
+      type: string
+    expires_at:
+      type: integer
+      format: int64
+    storage_backend:
+      type: string
+    storage_uri:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+FileListResponse:
+  type: object
+  properties:
+    object:
+      type: string
+    data:
+      type: array
+      items:
+        $ref: '#/FileObject'
+    has_more:
+      type: boolean
+    after:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+FileObject:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    bytes:
+      type: integer
+      format: int64
+    created_at:
+      type: integer
+      format: int64
+    filename:
+      type: string
+    purpose:
+      $ref: '#/FilePurpose'
+    status:
+      $ref: '#/FileStatus'
+    status_details:
+      type: string
+    expires_at:
+      type: integer
+      format: int64
+
+FileRetrieveResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    bytes:
+      type: integer
+      format: int64
+    created_at:
+      type: integer
+      format: int64
+    filename:
+      type: string
+    purpose:
+      $ref: '#/FilePurpose'
+    status:
+      $ref: '#/FileStatus'
+    status_details:
+      type: string
+    expires_at:
+      type: integer
+      format: int64
+    storage_backend:
+      type: string
+    storage_uri:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+FileDeleteResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    object:
+      type: string
+    deleted:
+      type: boolean
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/images.yaml
+++ b/docs/openapi/schemas/inference/images.yaml
@@ -0,0 +1,514 @@
+# Image Generation Schemas
+
+ImageGenerationRequest:
+  allOf:
+    - type: object
+      required:
+        - model
+        - prompt
+      properties:
+        model:
+          type: string
+          description: Model identifier in format `provider/model`
+        prompt:
+          type: string
+          description: Text prompt to generate image
+        n:
+          type: integer
+          minimum: 1
+          maximum: 10
+          description: Number of images to generate
+        size:
+          type: string
+          enum:
+            - "256x256"
+            - "512x512"
+            - "1024x1024"
+            - "1792x1024"
+            - "1024x1792"
+            - "1536x1024"
+            - "1024x1536"
+            - "auto"
+          description: Size of the generated image
+        quality:
+          type: string
+          enum:
+            - "auto"
+            - "high"
+            - "medium"
+            - "low"
+            - "hd"
+            - "standard"
+          description: Quality of the generated image
+        style:
+          type: string
+          enum:
+            - "natural"
+            - "vivid"
+          description: Style of the generated image
+        response_format:
+          type: string
+          enum:
+            - "url"
+            - "b64_json"
+          default: "url"
+          description: |
+            Format of the response.
+        background:
+          type: string
+          enum:
+            - "transparent"
+            - "opaque"
+            - "auto"
+          description: Background type for the image
+        moderation:
+          type: string
+          enum:
+            - "low"
+            - "auto"
+          description: Content moderation level
+        partial_images:
+          type: integer
+          minimum: 0
+          maximum: 3
+          description: Number of partial images to generate
+        output_compression:
+          type: integer
+          minimum: 0
+          maximum: 100
+          description: Compression level (0-100%)
+        output_format:
+          type: string
+          enum:
+            - "png"
+            - "webp"
+            - "jpeg"
+          description: Output image format
+        user:
+          type: string
+          description: User identifier for tracking
+        seed:
+          type: integer
+          description: Seed for reproducible image generation
+        negative_prompt:
+          type: string
+          description: Negative prompt to guide what to avoid in generation
+        num_inference_steps:
+          type: integer
+          description: Number of inference steps for generation
+        stream:
+          type: boolean
+          default: false
+          description: |
+            Whether to stream the response. When true, images are sent as SSE.
+            When streaming, providers may return base64 chunks (`b64_json`) and/or URLs (`url`) depending on provider and configuration.
+        fallbacks:
+          type: array
+          items:
+            $ref: './common.yaml#/Fallback'
+          description: Fallback models to try if primary model fails
+
+ImageGenerationResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Unique identifier for the generation request
+    created:
+      type: integer
+      format: int64
+      description: Unix timestamp when the image was created
+    model:
+      type: string
+      description: Model used for generation
+    data:
+      type: array
+      items:
+        $ref: '#/ImageData'
+      description: Array of generated images
+    background:
+      type: string
+      description: Background type for the image
+    output_format:
+      type: string
+      enum:
+        - "png"
+        - "webp"
+        - "jpeg"
+      description: Output image format
+    quality:
+      type: string
+      description: Quality of the generated image
+    size:
+      type: string
+      enum:
+        - "256x256"
+        - "512x512"
+        - "1024x1024"
+        - "1792x1024"
+        - "1024x1792"
+        - "1536x1024"
+        - "1024x1536"
+        - "auto"
+      description: Size of the generated image
+    usage:
+      $ref: '#/ImageUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ImageData:
+  type: object
+  properties:
+    url:
+      type: string
+      format: uri
+      description: URL of the generated image
+    b64_json:
+      type: string
+      description: Base64-encoded image data
+    revised_prompt:
+      type: string
+      description: Revised prompt used for generation
+    index:
+      type: integer
+      description: Index of this image
+
+ImageGenerationResponseParameters:
+  type: object
+  properties:
+    background:
+      type: string
+    output_format:
+      type: string
+    quality:
+      type: string
+    size:
+      type: string
+
+ImageUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of input tokens
+    input_tokens_details:
+      $ref: '#/ImageTokenDetails'
+    total_tokens:
+      type: integer
+      description: Total tokens used
+    output_tokens:
+      type: integer
+      description: Number of output tokens
+    output_tokens_details:
+      $ref: '#/ImageTokenDetails'
+
+ImageTokenDetails:
+  type: object
+  properties:
+    image_tokens:
+      type: integer
+      description: Tokens used for images
+    text_tokens:
+      type: integer
+      description: Tokens used for text
+
+ImageGenerationStreamResponse:
+  type: object
+  description: |
+    Streaming response chunk for image generation.
+    Sent via Server-Sent Events (SSE).
+    Providers may return either b64_json (base64-encoded image data) or url (public URL to the image).
+  properties:
+    id:
+      type: string
+      description: Request identifier
+    type:
+      type: string
+      enum:
+        - "image_generation.partial_image"
+        - "image_generation.completed"
+        - "error"
+      description: Type of stream event
+    partial_image_index:
+      type: integer
+      description: Index of the partial image chunk
+    sequence_number:
+      type: integer
+      description: Sequence number for event ordering within the stream
+    b64_json:
+      type: string
+      description: |
+        Base64-encoded chunk of image data.
+        Optional; either b64_json or url may be present.
+    url:
+      type: string
+      format: uri
+      description: |
+        Optional public URL to the generated image chunk.
+        Used by HuggingFace and other providers that return image URLs instead of base64 data.
+    created_at:
+      type: integer
+      format: int64
+      description: Timestamp when chunk was created
+    size:
+      type: string
+      enum:
+        - "256x256"
+        - "512x512"
+        - "1024x1024"
+        - "1792x1024"
+        - "1024x1792"
+        - "1536x1024"
+        - "1024x1536"
+        - "auto"
+      description: Size of the generated image
+    quality:
+      type: string
+      description: Quality setting used
+    background:
+      type: string
+      description: Background type used
+    output_format:
+      type: string
+      enum:
+        - "png"
+        - "webp"
+        - "jpeg"
+      description: Output format used
+    revised_prompt:
+      type: string
+      description: Revised prompt
+    usage:
+      $ref: '#/ImageUsage'
+      description: Token usage
+    error:
+      $ref: './common.yaml#/BifrostError'
+      description: Error information if generation failed
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+# Image Edit Schemas (multipart/form-data)
+
+ImageEditRequest:
+  type: object
+  required:
+    - model
+    - image
+  properties:
+    model:
+      type: string
+      description: Model identifier in format `provider/model`
+    prompt:
+      type: string
+      description: |
+        Text prompt describing the edit. Required unless `type` is `background_removal`.
+    image:
+      type: string
+      format: binary
+      description: |
+        Image file to edit. Use field name `image` for a single file or `image[]` for multiple files.
+    mask:
+      type: string
+      format: binary
+      description: Optional mask image for inpainting (transparent areas indicate regions to edit)
+    type:
+      type: string
+      enum:
+        - "inpainting"
+        - "outpainting"
+        - "background_removal"
+      description: Type of edit operation
+    n:
+      type: integer
+      minimum: 1
+      maximum: 10
+      description: Number of images to generate
+    size:
+      type: string
+      enum:
+        - "256x256"
+        - "512x512"
+        - "1024x1024"
+        - "1536x1024"
+        - "1024x1536"
+        - "auto"
+      description: Size of the output image
+    response_format:
+      type: string
+      enum:
+        - "url"
+        - "b64_json"
+      default: "url"
+      description: Format of the response
+    stream:
+      type: boolean
+      default: false
+      description: When true, stream the response via Server-Sent Events
+    background:
+      type: string
+      enum:
+        - "transparent"
+        - "opaque"
+        - "auto"
+      description: Background type for the image
+    input_fidelity:
+      type: string
+      enum:
+        - "low"
+        - "high"
+      description: How closely to follow the original image
+    partial_images:
+      type: integer
+      minimum: 0
+      maximum: 3
+      description: Number of partial images to generate when streaming
+    quality:
+      type: string
+      enum:
+        - "auto"
+        - "high"
+        - "medium"
+        - "low"
+        - "standard"
+      description: Quality of the output image
+    output_format:
+      type: string
+      enum:
+        - "png"
+        - "webp"
+        - "jpeg"
+      description: Output image format
+    num_inference_steps:
+      type: integer
+      description: Number of inference steps
+    seed:
+      type: integer
+      description: Seed for reproducible editing
+    output_compression:
+      type: integer
+      minimum: 0
+      maximum: 100
+      description: Compression level (0-100%)
+    negative_prompt:
+      type: string
+      description: What to avoid in the edit
+    user:
+      type: string
+      description: User identifier for tracking
+    fallbacks:
+      type: array
+      items:
+        $ref: './common.yaml#/Fallback'
+      description: Fallback models to try if primary model fails
+
+# Image Variation Schemas (multipart/form-data)
+
+ImageVariationRequest:
+  type: object
+  required:
+    - model
+    - image
+  properties:
+    model:
+      type: string
+      description: Model identifier in format `provider/model`
+    image:
+      type: string
+      format: binary
+      description: |
+        Image file to create variations of. Use field name `image` for a single file or `image[]` for multiple (first image is used).
+    n:
+      type: integer
+      minimum: 1
+      maximum: 10
+      description: Number of variations to generate
+    size:
+      type: string
+      enum:
+        - "256x256"
+        - "512x512"
+        - "1024x1024"
+        - "1792x1024"
+        - "1024x1792"
+        - "1536x1024"
+        - "1024x1536"
+        - "auto"
+      description: Size of the output images
+    response_format:
+      type: string
+      enum:
+        - "url"
+        - "b64_json"
+      default: "url"
+      description: Format of the response
+    user:
+      type: string
+      description: User identifier for tracking
+    fallbacks:
+      type: array
+      items:
+        $ref: './common.yaml#/Fallback'
+      description: Fallback models to try if primary model fails
+
+# Image Edit Streaming (SSE)
+
+ImageEditStreamResponse:
+  type: object
+  description: |
+    Streaming response chunk for image edit.
+    Sent via Server-Sent Events (SSE) when `stream=true`.
+  properties:
+    id:
+      type: string
+      description: Request identifier
+    type:
+      type: string
+      enum:
+        - "image_edit.partial_image"
+        - "image_edit.completed"
+        - "error"
+      description: Type of stream event
+    partial_image_index:
+      type: integer
+      description: Index of the partial image chunk
+    sequence_number:
+      type: integer
+      description: Sequence number for event ordering within the stream
+    b64_json:
+      type: string
+      description: Base64-encoded chunk of image data; optional
+    url:
+      type: string
+      format: uri
+      description: Optional public URL to the image chunk
+    created_at:
+      type: integer
+      format: int64
+      description: Timestamp when chunk was created
+    size:
+      type: string
+      description: Size of the image
+    quality:
+      type: string
+      description: Quality setting used
+    background:
+      type: string
+      description: Background type used
+    output_format:
+      type: string
+      enum:
+        - "png"
+        - "webp"
+        - "jpeg"
+      description: Output format used
+    revised_prompt:
+      type: string
+      description: Revised prompt
+    usage:
+      $ref: '#/ImageUsage'
+      description: Token usage
+    error:
+      $ref: './common.yaml#/BifrostError'
+      description: Error information if edit failed
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/models.yaml
+++ b/docs/openapi/schemas/inference/models.yaml
@@ -0,0 +1,125 @@
+# Models API schemas
+
+ListModelsResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/Model'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+    next_page_token:
+      type: string
+
+Model:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Model ID in provider/model format
+    canonical_slug:
+      type: string
+    name:
+      type: string
+    deployment:
+      type: string
+    created:
+      type: integer
+      format: int64
+    context_length:
+      type: integer
+    max_input_tokens:
+      type: integer
+    max_output_tokens:
+      type: integer
+    architecture:
+      $ref: '#/Architecture'
+    pricing:
+      $ref: '#/Pricing'
+    top_provider:
+      $ref: '#/TopProvider'
+    per_request_limits:
+      $ref: '#/PerRequestLimits'
+    supported_parameters:
+      type: array
+      items:
+        type: string
+    default_parameters:
+      $ref: '#/DefaultParameters'
+    hugging_face_id:
+      type: string
+    description:
+      type: string
+    owned_by:
+      type: string
+    supported_methods:
+      type: array
+      items:
+        type: string
+
+Architecture:
+  type: object
+  properties:
+    modality:
+      type: string
+    tokenizer:
+      type: string
+    instruct_type:
+      type: string
+    input_modalities:
+      type: array
+      items:
+        type: string
+    output_modalities:
+      type: array
+      items:
+        type: string
+
+Pricing:
+  type: object
+  properties:
+    prompt:
+      type: string
+    completion:
+      type: string
+    request:
+      type: string
+    image:
+      type: string
+    web_search:
+      type: string
+    internal_reasoning:
+      type: string
+    input_cache_read:
+      type: string
+    input_cache_write:
+      type: string
+
+TopProvider:
+  type: object
+  properties:
+    is_moderated:
+      type: boolean
+    context_length:
+      type: integer
+    max_completion_tokens:
+      type: integer
+
+PerRequestLimits:
+  type: object
+  properties:
+    prompt_tokens:
+      type: integer
+    completion_tokens:
+      type: integer
+
+DefaultParameters:
+  type: object
+  properties:
+    temperature:
+      type: number
+    top_p:
+      type: number
+    frequency_penalty:
+      type: number
--- a/docs/openapi/schemas/inference/rerank.yaml
+++ b/docs/openapi/schemas/inference/rerank.yaml
@@ -0,0 +1,98 @@
+# Rerank API schemas
+
+RerankRequest:
+  type: object
+  required:
+    - model
+    - query
+    - documents
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+      example: cohere/rerank-v3.5
+    query:
+      type: string
+      minLength: 1
+      description: Query used to score and reorder documents
+    documents:
+      type: array
+      description: Documents to rerank
+      minItems: 1
+      items:
+        $ref: '#/RerankDocument'
+    fallbacks:
+      type: array
+      items:
+        type: string
+      description: Fallback models in provider/model format
+    top_n:
+      type: integer
+      minimum: 1
+      description: Maximum number of ranked results to return
+    max_tokens_per_doc:
+      type: integer
+      minimum: 1
+      description: Maximum tokens to consider per document (provider-dependent)
+    priority:
+      type: integer
+      description: Request priority hint (provider-dependent)
+    return_documents:
+      type: boolean
+      description: Whether to include document content in each result
+
+RerankDocument:
+  type: object
+  required:
+    - text
+  properties:
+    text:
+      type: string
+      minLength: 1
+      description: Document text content
+    id:
+      type: string
+      minLength: 1
+      description: Optional document identifier
+    meta:
+      type: object
+      description: Optional document metadata
+      additionalProperties: true
+
+RerankResponse:
+  type: object
+  required:
+    - results
+    - model
+  properties:
+    id:
+      type: string
+      description: Unique identifier for the rerank response
+    results:
+      type: array
+      description: Ranked results ordered by relevance score descending
+      items:
+        $ref: '#/RerankResult'
+    model:
+      type: string
+      description: Model used to perform reranking
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+RerankResult:
+  type: object
+  required:
+    - index
+    - relevance_score
+  properties:
+    index:
+      type: integer
+      minimum: 0
+      description: Index into the original documents array
+    relevance_score:
+      type: number
+      description: Relevance score for this document
+    document:
+      $ref: '#/RerankDocument'
--- a/docs/openapi/schemas/inference/responses.yaml
+++ b/docs/openapi/schemas/inference/responses.yaml
@@ -0,0 +1,716 @@
+# Responses API schemas
+
+ResponsesRequest:
+  type: object
+  required:
+    - model
+    - input
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    input:
+      $ref: '#/ResponsesRequestInput'
+    fallbacks:
+      type: array
+      items:
+        type: string
+    stream:
+      type: boolean
+    background:
+      type: boolean
+    conversation:
+      type: string
+    include:
+      type: array
+      items:
+        type: string
+    instructions:
+      type: string
+    max_output_tokens:
+      type: integer
+    max_tool_calls:
+      type: integer
+    metadata:
+      type: object
+      additionalProperties: true
+    parallel_tool_calls:
+      type: boolean
+    previous_response_id:
+      type: string
+    prompt_cache_key:
+      type: string
+    reasoning:
+      $ref: '#/ResponsesParametersReasoning'
+    safety_identifier:
+      type: string
+    service_tier:
+      type: string
+    stream_options:
+      $ref: '#/ResponsesStreamOptions'
+    store:
+      type: boolean
+    temperature:
+      type: number
+    text:
+      $ref: '#/ResponsesTextConfig'
+    top_logprobs:
+      type: integer
+    top_p:
+      type: number
+    tool_choice:
+      $ref: '#/ResponsesToolChoice'
+    tools:
+      type: array
+      items:
+        $ref: '#/ResponsesTool'
+    truncation:
+      type: string
+
+ResponsesRequestInput:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/ResponsesMessage'
+  description: Input - can be a string or array of messages
+
+ResponsesMessage:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      $ref: '#/ResponsesMessageType'
+    status:
+      type: string
+      enum: [in_progress, completed, incomplete, interpreting, failed]
+    role:
+      type: string
+      enum: [assistant, user, system, developer]
+    content:
+      $ref: '#/ResponsesMessageContent'
+    call_id:
+      type: string
+    name:
+      type: string
+    arguments:
+      type: string
+    output:
+      type: object
+    action:
+      type: object
+    error:
+      type: string
+    queries:
+      type: array
+      items:
+        type: string
+    results:
+      type: array
+      items:
+        type: object
+    summary:
+      type: array
+      items:
+        $ref: '#/ResponsesReasoningSummary'
+    encrypted_content:
+      type: string
+
+ResponsesMessageType:
+  type: string
+  enum:
+    - message
+    - file_search_call
+    - computer_call
+    - computer_call_output
+    - web_search_call
+    - web_fetch_call
+    - function_call
+    - function_call_output
+    - code_interpreter_call
+    - local_shell_call
+    - local_shell_call_output
+    - mcp_call
+    - custom_tool_call
+    - custom_tool_call_output
+    - image_generation_call
+    - mcp_list_tools
+    - mcp_approval_request
+    - mcp_approval_responses
+    - reasoning
+    - item_reference
+    - refusal
+
+ResponsesMessageContent:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/ResponsesMessageContentBlock'
+
+ResponsesMessageContentBlock:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [input_text, input_image, input_file, input_audio, output_text, refusal, reasoning_text]
+    file_id:
+      type: string
+    text:
+      type: string
+    signature:
+      type: string
+    image_url:
+      type: string
+    detail:
+      type: string
+    file_data:
+      type: string
+    file_url:
+      type: string
+    filename:
+      type: string
+    file_type:
+      type: string
+    input_audio:
+      $ref: '#/ResponsesInputMessageContentBlockAudio'
+    annotations:
+      type: array
+      items:
+        $ref: '#/ResponsesOutputMessageContentTextAnnotation'
+    logprobs:
+      type: array
+      items:
+        $ref: '#/ResponsesOutputMessageContentTextLogProb'
+    refusal:
+      type: string
+    cache_control:
+      $ref: './common.yaml#/CacheControl'
+
+ResponsesInputMessageContentBlockAudio:
+  type: object
+  required:
+    - format
+    - data
+  properties:
+    format:
+      type: string
+      enum: [mp3, wav]
+    data:
+      type: string
+
+ResponsesOutputMessageContentTextAnnotation:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [file_citation, url_citation, container_file_citation, file_path]
+    index:
+      type: integer
+    file_id:
+      type: string
+    text:
+      type: string
+    start_index:
+      type: integer
+    end_index:
+      type: integer
+    filename:
+      type: string
+    title:
+      type: string
+    url:
+      type: string
+    container_id:
+      type: string
+
+ResponsesOutputMessageContentTextLogProb:
+  type: object
+  properties:
+    bytes:
+      type: array
+      items:
+        type: integer
+    logprob:
+      type: number
+    token:
+      type: string
+    top_logprobs:
+      type: array
+      items:
+        $ref: './chat.yaml#/LogProb'
+
+ResponsesParametersReasoning:
+  type: object
+  properties:
+    effort:
+      type: string
+      enum: [none, minimal, low, medium, high, xhigh]
+    generate_summary:
+      type: string
+      deprecated: true
+    summary:
+      type: string
+      enum: [auto, concise, detailed]
+    max_tokens:
+      type: integer
+
+ResponsesStreamOptions:
+  type: object
+  properties:
+    include_obfuscation:
+      type: boolean
+
+ResponsesTextConfig:
+  type: object
+  properties:
+    format:
+      $ref: '#/ResponsesTextConfigFormat'
+    verbosity:
+      type: string
+      enum: [low, medium, high]
+
+ResponsesTextConfigFormat:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [text, json_schema, json_object]
+    name:
+      type: string
+    schema:
+      type: object
+    strict:
+      type: boolean
+
+ResponsesToolChoice:
+  oneOf:
+    - type: string
+      enum: [none, auto, required]
+    - $ref: '#/ResponsesToolChoiceStruct'
+
+ResponsesToolChoiceStruct:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum:
+        - none
+        - auto
+        - any
+        - required
+        - function
+        - allowed_tools
+        - file_search
+        - web_search_preview
+        - computer_use_preview
+        - code_interpreter
+        - image_generation
+        - mcp
+        - custom
+    mode:
+      type: string
+    name:
+      type: string
+    server_label:
+      type: string
+    tools:
+      type: array
+      items:
+        $ref: '#/ResponsesToolChoiceAllowedToolDef'
+
+ResponsesToolChoiceAllowedToolDef:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [function, mcp, image_generation]
+    name:
+      type: string
+    server_label:
+      type: string
+
+ResponsesTool:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum:
+        - function
+        - file_search
+        - computer_use_preview
+        - web_search
+        - web_fetch
+        - mcp
+        - code_interpreter
+        - image_generation
+        - local_shell
+        - custom
+        - web_search_preview
+        - memory
+        - tool_search
+    name:
+      type: string
+    description:
+      type: string
+    cache_control:
+      $ref: './common.yaml#/CacheControl'
+    parameters:
+      $ref: './chat.yaml#/ToolFunctionParameters'
+    strict:
+      type: boolean
+    vector_store_ids:
+      type: array
+      items:
+        type: string
+    filters:
+      type: object
+    max_num_results:
+      type: integer
+    ranking_options:
+      type: object
+    display_height:
+      type: integer
+    display_width:
+      type: integer
+    environment:
+      type: string
+    enable_zoom:
+      type: boolean
+    search_context_size:
+      type: string
+    user_location:
+      type: object
+    server_label:
+      type: string
+    server_url:
+      type: string
+    allowed_tools:
+      type: object
+    authorization:
+      type: string
+    connector_id:
+      type: string
+    headers:
+      type: object
+      additionalProperties:
+        type: string
+    require_approval:
+      type: object
+    server_description:
+      type: string
+    container:
+      type: object
+    background:
+      type: string
+    input_fidelity:
+      type: string
+    input_image_mask:
+      type: object
+    moderation:
+      type: string
+    output_compression:
+      type: integer
+    output_format:
+      type: string
+    partial_images:
+      type: integer
+    quality:
+      type: string
+    size:
+      type: string
+    format:
+      type: object
+
+ResponsesReasoningSummary:
+  type: object
+  required:
+    - type
+    - text
+  properties:
+    type:
+      type: string
+      enum: [summary_text]
+    text:
+      type: string
+
+ResponsesResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    background:
+      type: boolean
+    conversation:
+      type: object
+    created_at:
+      type: integer
+    error:
+      $ref: '#/ResponsesResponseError'
+    include:
+      type: array
+      items:
+        type: string
+    incomplete_details:
+      $ref: '#/ResponsesResponseIncompleteDetails'
+    instructions:
+      type: object
+    max_output_tokens:
+      type: integer
+    max_tool_calls:
+      type: integer
+    metadata:
+      type: object
+    model:
+      type: string
+    output:
+      type: array
+      items:
+        $ref: '#/ResponsesMessage'
+    parallel_tool_calls:
+      type: boolean
+    previous_response_id:
+      type: string
+    prompt:
+      type: object
+    prompt_cache_key:
+      type: string
+    reasoning:
+      $ref: '#/ResponsesParametersReasoning'
+    safety_identifier:
+      type: string
+    service_tier:
+      type: string
+    status:
+      type: string
+      enum: [completed, failed, in_progress, canceled, queued, incomplete]
+    stop_reason:
+      type: string
+    store:
+      type: boolean
+    temperature:
+      type: number
+    text:
+      $ref: '#/ResponsesTextConfig'
+    top_logprobs:
+      type: integer
+    top_p:
+      type: number
+    tool_choice:
+      $ref: '#/ResponsesToolChoice'
+    tools:
+      type: array
+      items:
+        $ref: '#/ResponsesTool'
+    truncation:
+      type: string
+    usage:
+      $ref: '#/ResponsesResponseUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+    search_results:
+      type: array
+      items:
+        $ref: './chat.yaml#/PerplexitySearchResult'
+    videos:
+      type: array
+      items:
+        $ref: './chat.yaml#/VideoResult'
+    citations:
+      type: array
+      items:
+        type: string
+
+ResponsesResponseError:
+  type: object
+  required:
+    - code
+    - message
+  properties:
+    code:
+      type: string
+    message:
+      type: string
+
+ResponsesResponseIncompleteDetails:
+  type: object
+  required:
+    - reason
+  properties:
+    reason:
+      type: string
+
+ResponsesResponseUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+    input_tokens_details:
+      $ref: '#/ResponsesResponseInputTokens'
+    output_tokens:
+      type: integer
+    output_tokens_details:
+      $ref: '#/ResponsesResponseOutputTokens'
+    total_tokens:
+      type: integer
+    cost:
+      $ref: './usage.yaml#/BifrostCost'
+
+ResponsesResponseInputTokens:
+  type: object
+  properties:
+    text_tokens:
+      type: integer
+    audio_tokens:
+      type: integer
+    image_tokens:
+      type: integer
+    cached_read_tokens:
+      type: integer
+      description: >
+        Tokens served from the prompt cache (cache hit), billed at the reduced
+        cache-read rate. Already included in the parent input_tokens total.
+    cached_write_tokens:
+      type: integer
+      description: >
+        Tokens written to the prompt cache on this request, billed at the
+        cache-creation rate. Already included in the parent input_tokens total.
+        Populated for providers that separately report cache write tokens
+        (Anthropic, Bedrock).
+
+ResponsesResponseOutputTokens:
+  type: object
+  properties:
+    text_tokens:
+      type: integer
+    accepted_prediction_tokens:
+      type: integer
+    audio_tokens:
+      type: integer
+    reasoning_tokens:
+      type: integer
+    rejected_prediction_tokens:
+      type: integer
+    citation_tokens:
+      type: integer
+    num_search_queries:
+      type: integer
+
+ResponsesStreamResponse:
+  type: object
+  description: Streaming responses API response (SSE format)
+  properties:
+    type:
+      $ref: '#/ResponsesStreamResponseType'
+    sequence_number:
+      type: integer
+    response:
+      $ref: '#/ResponsesResponse'
+    output_index:
+      type: integer
+    item:
+      $ref: '#/ResponsesMessage'
+    content_index:
+      type: integer
+    item_id:
+      type: string
+    part:
+      $ref: '#/ResponsesMessageContentBlock'
+    delta:
+      type: string
+    signature:
+      type: string
+    logprobs:
+      type: array
+      items:
+        $ref: '#/ResponsesOutputMessageContentTextLogProb'
+    text:
+      type: string
+    refusal:
+      type: string
+    arguments:
+      type: string
+    partial_image_b64:
+      type: string
+    partial_image_index:
+      type: integer
+    annotation:
+      $ref: '#/ResponsesOutputMessageContentTextAnnotation'
+    annotation_index:
+      type: integer
+    code:
+      type: string
+    message:
+      type: string
+    param:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+ResponsesStreamResponseType:
+  type: string
+  enum:
+    - response.ping
+    - response.created
+    - response.in_progress
+    - response.completed
+    - response.failed
+    - response.incomplete
+    - response.output_item.added
+    - response.output_item.done
+    - response.content_part.added
+    - response.content_part.done
+    - response.output_text.delta
+    - response.output_text.done
+    - response.refusal.delta
+    - response.refusal.done
+    - response.function_call_arguments.delta
+    - response.function_call_arguments.done
+    - response.file_search_call.in_progress
+    - response.file_search_call.searching
+    - response.file_search_call.results.added
+    - response.file_search_call.results.completed
+    - response.web_search_call.searching
+    - response.web_search_call.results.added
+    - response.web_search_call.results.completed
+    - response.web_fetch_call.in_progress
+    - response.web_fetch_call.fetching
+    - response.web_fetch_call.completed
+    - response.reasoning_summary_part.added
+    - response.reasoning_summary_part.done
+    - response.reasoning_summary_text.delta
+    - response.reasoning_summary_text.done
+    - response.image_generation_call.completed
+    - response.image_generation_call.generating
+    - response.image_generation_call.in_progress
+    - response.image_generation_call.partial_image
+    - response.mcp_call_arguments.delta
+    - response.mcp_call_arguments.done
+    - response.mcp_call.completed
+    - response.mcp_call.failed
+    - response.mcp_call.in_progress
+    - response.mcp_list_tools.completed
+    - response.mcp_list_tools.failed
+    - response.mcp_list_tools.in_progress
+    - response.code_interpreter_call.in_progress
+    - response.code_interpreter_call.interpreting
+    - response.code_interpreter_call.completed
+    - response.code_interpreter_call_code.delta
+    - response.code_interpreter_call_code.done
+    - response.output_text.annotation.added
+    - response.output_text.annotation.done
+    - response.queued
+    - response.custom_tool_call_input.delta
+    - response.custom_tool_call_input.done
+    - error
--- a/docs/openapi/schemas/inference/speech.yaml
+++ b/docs/openapi/schemas/inference/speech.yaml
@@ -0,0 +1,132 @@
+# Speech API schemas
+
+SpeechRequest:
+  type: object
+  required:
+    - model
+    - input
+    - voice
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    input:
+      type: string
+      description: Text to convert to speech
+    fallbacks:
+      type: array
+      items:
+        type: string
+    stream_format:
+      type: string
+      enum: [sse]
+      description: Set to "sse" to enable streaming
+    voice:
+      $ref: '#/SpeechVoiceInput'
+    instructions:
+      type: string
+    response_format:
+      type: string
+      enum: [mp3, opus, aac, flac, wav, pcm]
+    speed:
+      type: number
+      minimum: 0.25
+      maximum: 4.0
+    language_code:
+      type: string
+    pronunciation_dictionary_locators:
+      type: array
+      items:
+        $ref: '#/SpeechPronunciationDictionaryLocator'
+    enable_logging:
+      type: boolean
+    optimize_streaming_latency:
+      type: boolean
+    with_timestamps:
+      type: boolean
+
+SpeechVoiceInput:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/VoiceConfig'
+
+VoiceConfig:
+  type: object
+  required:
+    - speaker
+    - voice
+  properties:
+    speaker:
+      type: string
+    voice:
+      type: string
+
+SpeechPronunciationDictionaryLocator:
+  type: object
+  required:
+    - pronunciation_dictionary_id
+  properties:
+    pronunciation_dictionary_id:
+      type: string
+    version_id:
+      type: string
+
+SpeechResponse:
+  type: object
+  properties:
+    audio:
+      type: string
+      format: byte
+      description: Audio data (binary)
+    usage:
+      $ref: '#/SpeechUsage'
+    alignment:
+      $ref: '#/SpeechAlignment'
+    normalized_alignment:
+      $ref: '#/SpeechAlignment'
+    audio_base64:
+      type: string
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+SpeechUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+    output_tokens:
+      type: integer
+    total_tokens:
+      type: integer
+
+SpeechAlignment:
+  type: object
+  properties:
+    char_start_times_ms:
+      type: array
+      items:
+        type: number
+    char_end_times_ms:
+      type: array
+      items:
+        type: number
+    characters:
+      type: array
+      items:
+        type: string
+
+SpeechStreamResponse:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [speech.audio.delta, speech.audio.done]
+    audio:
+      type: string
+      format: byte
+    usage:
+      $ref: '#/SpeechUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/text.yaml
+++ b/docs/openapi/schemas/inference/text.yaml
@@ -0,0 +1,98 @@
+# Text Completions API schemas
+
+TextCompletionRequest:
+  type: object
+  required:
+    - model
+    - prompt
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    prompt:
+      $ref: '#/TextCompletionInput'
+    fallbacks:
+      type: array
+      items:
+        type: string
+    stream:
+      type: boolean
+    best_of:
+      type: integer
+    echo:
+      type: boolean
+    frequency_penalty:
+      type: number
+    logit_bias:
+      type: object
+      additionalProperties:
+        type: number
+    logprobs:
+      type: integer
+    max_tokens:
+      type: integer
+    n:
+      type: integer
+    presence_penalty:
+      type: number
+    seed:
+      type: integer
+    stop:
+      type: array
+      items:
+        type: string
+    suffix:
+      type: string
+    temperature:
+      type: number
+    top_p:
+      type: number
+    user:
+      type: string
+
+TextCompletionInput:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        type: string
+  description: Prompt input - can be a string or array of strings
+
+TextCompletionResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    choices:
+      type: array
+      items:
+        $ref: './chat.yaml#/BifrostResponseChoice'
+    model:
+      type: string
+    object:
+      type: string
+    system_fingerprint:
+      type: string
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+TextCompletionStreamResponse:
+  type: object
+  description: Streaming text completion response
+  properties:
+    id:
+      type: string
+    choices:
+      type: array
+      items:
+        $ref: './chat.yaml#/BifrostResponseChoice'
+    model:
+      type: string
+    object:
+      type: string
+    usage:
+      $ref: './usage.yaml#/BifrostLLMUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/transcription.yaml
+++ b/docs/openapi/schemas/inference/transcription.yaml
@@ -0,0 +1,150 @@
+# Transcription API schemas
+
+TranscriptionRequest:
+  type: object
+  required:
+    - model
+    - file
+  properties:
+    model:
+      type: string
+      description: Model in provider/model format
+    file:
+      type: string
+      format: binary
+      description: Audio file to transcribe
+    fallbacks:
+      type: array
+      items:
+        type: string
+    stream:
+      type: boolean
+    language:
+      type: string
+    prompt:
+      type: string
+    response_format:
+      type: string
+      enum: [json, text, srt, verbose_json, vtt]
+    file_format:
+      type: string
+
+TranscriptionResponse:
+  type: object
+  properties:
+    duration:
+      type: number
+    language:
+      type: string
+    logprobs:
+      type: array
+      items:
+        $ref: '#/TranscriptionLogProb'
+    segments:
+      type: array
+      items:
+        $ref: '#/TranscriptionSegment'
+    task:
+      type: string
+    text:
+      type: string
+    usage:
+      $ref: '#/TranscriptionUsage'
+    words:
+      type: array
+      items:
+        $ref: '#/TranscriptionWord'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+TranscriptionLogProb:
+  type: object
+  properties:
+    token:
+      type: string
+    logprob:
+      type: number
+    bytes:
+      type: array
+      items:
+        type: integer
+
+TranscriptionSegment:
+  type: object
+  properties:
+    id:
+      type: integer
+    seek:
+      type: integer
+    start:
+      type: number
+    end:
+      type: number
+    text:
+      type: string
+    tokens:
+      type: array
+      items:
+        type: integer
+    temperature:
+      type: number
+    avg_logprob:
+      type: number
+    compression_ratio:
+      type: number
+    no_speech_prob:
+      type: number
+
+TranscriptionWord:
+  type: object
+  properties:
+    word:
+      type: string
+    start:
+      type: number
+    end:
+      type: number
+
+TranscriptionUsage:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [tokens, duration]
+    input_tokens:
+      type: integer
+    input_token_details:
+      $ref: '#/TranscriptionUsageInputTokenDetails'
+    output_tokens:
+      type: integer
+    total_tokens:
+      type: integer
+    seconds:
+      type: integer
+
+TranscriptionUsageInputTokenDetails:
+  type: object
+  properties:
+    text_tokens:
+      type: integer
+    audio_tokens:
+      type: integer
+
+TranscriptionStreamResponse:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [transcript.text.delta, transcript.text.done]
+    delta:
+      type: string
+    logprobs:
+      type: array
+      items:
+        $ref: '#/TranscriptionLogProb'
+    text:
+      type: string
+    usage:
+      $ref: '#/TranscriptionUsage'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
--- a/docs/openapi/schemas/inference/usage.yaml
+++ b/docs/openapi/schemas/inference/usage.yaml
@@ -0,0 +1,89 @@
+# Usage and cost related schemas
+
+BifrostLLMUsage:
+  type: object
+  description: Token usage information
+  properties:
+    prompt_tokens:
+      type: integer
+      description: >
+        Total input tokens including any prompt-cache tokens (read + write).
+        Subtract prompt_tokens_details.cached_read_tokens and
+        prompt_tokens_details.cached_write_tokens to get the non-cached portion.
+    prompt_tokens_details:
+      $ref: '#/ChatPromptTokensDetails'
+    completion_tokens:
+      type: integer
+      description: Number of output/completion tokens generated.
+    completion_tokens_details:
+      $ref: '#/ChatCompletionTokensDetails'
+    total_tokens:
+      type: integer
+    cost:
+      $ref: '#/BifrostCost'
+
+ChatPromptTokensDetails:
+  type: object
+  properties:
+    text_tokens:
+      type: integer
+    audio_tokens:
+      type: integer
+    image_tokens:
+      type: integer
+    cached_read_tokens:
+      type: integer
+      description: >
+        Tokens served from the prompt cache (cache hit). These tokens are already
+        included in prompt_tokens and are billed at the reduced cache-read rate.
+        Populated for all providers that support prompt caching (Anthropic, Bedrock,
+        OpenAI, Gemini, xAI, etc.).
+    cached_write_tokens:
+      type: integer
+      description: >
+        Tokens written to the prompt cache on this request (cache creation / write).
+        These tokens are already included in prompt_tokens and are billed at the
+        cache-creation rate. Populated for providers that separately report cache
+        write tokens (Anthropic, Bedrock).
+
+ChatCompletionTokensDetails:
+  type: object
+  properties:
+    text_tokens:
+      type: integer
+    accepted_prediction_tokens:
+      type: integer
+    audio_tokens:
+      type: integer
+    citation_tokens:
+      type: integer
+    num_search_queries:
+      type: integer
+    reasoning_tokens:
+      type: integer
+    image_tokens:
+      type: integer
+    rejected_prediction_tokens:
+      type: integer
+
+BifrostCost:
+  type: object
+  description: Cost breakdown for the request
+  properties:
+    input_tokens_cost:
+      type: number
+    output_tokens_cost:
+      type: number
+    reasoning_tokens_cost:
+      type: number
+      description: Cost for reasoning/thinking tokens (reasoning models)
+    citation_tokens_cost:
+      type: number
+      description: Cost for citation tokens
+    search_queries_cost:
+      type: number
+      description: Cost for web search queries
+    request_cost:
+      type: number
+    total_cost:
+      type: number
--- a/docs/openapi/schemas/inference/videos.yaml
+++ b/docs/openapi/schemas/inference/videos.yaml
@@ -0,0 +1,254 @@
+# Video Generation Schemas
+
+VideoGenerationRequest:
+  type: object
+  required:
+    - model
+    - prompt
+  properties:
+    model:
+      type: string
+      description: Model identifier in format `provider/model`
+    prompt:
+      type: string
+      description: Text prompt describing the video to generate
+    input_reference:
+      type: string
+      description: Optional reference image for image-to-video. OpenAI and Gemini require a base64 data URL (e.g., `data:image/png;base64,...`). Runway and Replicate accept both data URLs and plain URLs.
+    seconds:
+      type: string
+      description: Duration of the video in seconds as a string (e.g., "4")
+    size:
+      type: string
+      description: Resolution of the generated video (e.g., `1280x720`, `720x1280`, `1920x1080`)
+    negative_prompt:
+      type: string
+      description: Text describing what to avoid in the generated video
+    seed:
+      type: integer
+      description: Seed for reproducible generation
+    video_uri:
+      type: string
+      description: Source video URI for video-to-video generation (provider-specific, e.g. GCS URI)
+    audio:
+      type: boolean
+      description: Enable audio generation in the video (supported by select providers/models)
+    fallbacks:
+      type: array
+      items:
+        $ref: './common.yaml#/Fallback'
+      description: Fallback models to try if primary model fails
+
+VideoGenerationResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Provider-native job ID. To use in path parameters (retrieve/delete/download), combine as `{id}:{provider}` (e.g., `task_abc123:runway`)
+    object:
+      type: string
+      enum:
+        - "video"
+      description: Object type, always "video"
+    model:
+      type: string
+      description: Model used for video generation
+    status:
+      $ref: '#/VideoStatus'
+    progress:
+      type: number
+      format: float
+      minimum: 0
+      maximum: 100
+      description: Approximate completion percentage (0-100)
+    prompt:
+      type: string
+      description: Prompt used to generate the video
+    remixed_from_video_id:
+      type: string
+      description: Source video ID if this is a remix
+    seconds:
+      type: string
+      description: Duration of the generated video in seconds as a string (e.g., "4")
+    size:
+      $ref: '#/VideoSize'
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when the job was created
+    completed_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when the job completed
+    expires_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when downloadable assets expire
+    videos:
+      type: array
+      description: Generated video outputs (only present when status is "completed")
+      items:
+        type: object
+        properties:
+          type:
+            type: string
+            enum:
+              - "url"
+              - "base64"
+            description: Output format of this video
+          url:
+            type: string
+            format: uri
+            description: URL to the generated video (present when type is "url")
+          base64:
+            type: string
+            description: Base64-encoded video content (present when type is "base64")
+          content_type:
+            type: string
+            description: MIME type of the video (e.g., "video/mp4")
+    error:
+      $ref: '#/VideoError'
+    content_filter:
+      $ref: '#/VideoContentFilter'
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+VideoRemixRequest:
+  type: object
+  required:
+    - prompt
+  properties:
+    prompt:
+      type: string
+      description: Text prompt describing how to remix the video
+
+VideoListResponse:
+  type: object
+  properties:
+    object:
+      type: string
+      enum:
+        - "list"
+      description: Object type, always "list"
+    data:
+      type: array
+      items:
+        $ref: '#/VideoObject'
+      description: Array of video generation jobs
+    first_id:
+      type: string
+      description: ID of the first item in the list
+    last_id:
+      type: string
+      description: ID of the last item in the list
+    has_more:
+      type: boolean
+      description: Whether there are more results available
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+VideoObject:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Provider-native video ID. To use in path parameters (retrieve/delete/download), combine as `{id}:{provider}` (e.g., `task_abc123:runway`)
+    object:
+      type: string
+      enum:
+        - "video"
+      description: Object type, always "video"
+    model:
+      type: string
+      description: Model used for generation
+    status:
+      $ref: '#/VideoStatus'
+    progress:
+      type: number
+      format: float
+      minimum: 0
+      maximum: 100
+      description: Approximate completion percentage (0-100)
+    prompt:
+      type: string
+      description: Prompt used to generate the video
+    remixed_from_video_id:
+      type: string
+      description: Source video ID if this is a remix
+    seconds:
+      type: string
+      description: Duration of the video in seconds as a string (e.g., "4")
+    size:
+      $ref: '#/VideoSize'
+    created_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when the job was created
+    completed_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when the job completed
+    expires_at:
+      type: integer
+      format: int64
+      description: Unix timestamp (seconds) when downloadable assets expire
+    error:
+      $ref: '#/VideoError'
+
+VideoDeleteResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: ID of the deleted video
+    object:
+      type: string
+      enum:
+        - "video.deleted"
+      description: Object type, always "video.deleted"
+    deleted:
+      type: boolean
+      description: Whether the video was successfully deleted
+    extra_fields:
+      $ref: './common.yaml#/BifrostResponseExtraFields'
+
+VideoStatus:
+  type: string
+  enum:
+    - "queued"
+    - "in_progress"
+    - "completed"
+    - "failed"
+  description: |
+    Current lifecycle status of the video generation job:
+    - `queued`: Job is waiting to be processed
+    - `in_progress`: Video is currently being generated
+    - `completed`: Video generation completed successfully
+    - `failed`: Video generation failed
+
+VideoSize:
+  type: string
+  description: Resolution of the generated video (e.g., "1920x1080")
+
+VideoError:
+  type: object
+  properties:
+    code:
+      type: string
+      description: Error code
+    message:
+      type: string
+      description: Human-readable error message
+
+VideoContentFilter:
+  type: object
+  description: Information about content that was filtered due to safety policies
+  properties:
+    filtered_count:
+      type: integer
+      description: Number of items filtered
+    reasons:
+      type: array
+      items:
+        type: string
+      description: Human-readable reasons for filtering
--- a/docs/openapi/schemas/integrations/anthropic/batch.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/batch.yaml
@@ -0,0 +1,105 @@
+# Anthropic Integration Batch API Schemas
+
+AnthropicBatchCreateRequest:
+  type: object
+  required:
+    - requests
+  properties:
+    requests:
+      type: array
+      items:
+        $ref: '#/AnthropicBatchRequestItem'
+      description: Array of batch request items
+
+AnthropicBatchRequestItem:
+  type: object
+  required:
+    - custom_id
+    - params
+  properties:
+    custom_id:
+      type: string
+      description: Unique identifier for this request
+    params:
+      type: object
+      description: Request parameters (same as AnthropicMessageRequest)
+
+AnthropicBatchCreateResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: message_batch
+    processing_status:
+      type: string
+      enum: [in_progress, ended, canceling]
+    request_counts:
+      $ref: '#/AnthropicBatchRequestCounts'
+    ended_at:
+      type: string
+      format: date-time
+      nullable: true
+    created_at:
+      type: string
+      format: date-time
+    expires_at:
+      type: string
+      format: date-time
+    archived_at:
+      type: string
+      format: date-time
+      nullable: true
+    cancel_initiated_at:
+      type: string
+      format: date-time
+      nullable: true
+    results_url:
+      type: string
+      nullable: true
+
+AnthropicBatchRequestCounts:
+  type: object
+  properties:
+    processing:
+      type: integer
+    succeeded:
+      type: integer
+    errored:
+      type: integer
+    canceled:
+      type: integer
+    expired:
+      type: integer
+
+AnthropicBatchListRequest:
+  type: object
+  properties:
+    page_size:
+      type: integer
+      default: 20
+    page_token:
+      type: string
+      description: Cursor for pagination
+
+AnthropicBatchListResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/AnthropicBatchCreateResponse'
+    has_more:
+      type: boolean
+    first_id:
+      type: string
+    last_id:
+      type: string
+
+AnthropicBatchRetrieveResponse:
+  $ref: '#/AnthropicBatchCreateResponse'
+
+AnthropicBatchCancelResponse:
+  $ref: '#/AnthropicBatchCreateResponse'
+
--- a/docs/openapi/schemas/integrations/anthropic/common.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/common.yaml
@@ -0,0 +1,53 @@
+# Anthropic Integration Common Types
+
+AnthropicError:
+  type: object
+  properties:
+    type:
+      type: string
+      default: error
+    error:
+      type: object
+      properties:
+        type:
+          type: string
+          description: Error type (e.g., invalid_request_error, api_error)
+        message:
+          type: string
+          description: Error message
+
+AnthropicModel:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Model identifier
+    type:
+      type: string
+      default: model
+    display_name:
+      type: string
+    created_at:
+      type: string
+      format: date-time
+
+AnthropicListModelsResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/AnthropicModel'
+    has_more:
+      type: boolean
+    first_id:
+      type: string
+    last_id:
+      type: string
+
+# Anthropic Message Roles
+AnthropicMessageRole:
+  type: string
+  enum:
+    - user
+    - assistant
--- a/docs/openapi/schemas/integrations/anthropic/count-tokens.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/count-tokens.yaml
@@ -0,0 +1,13 @@
+# Anthropic Integration Count Tokens Schemas
+
+AnthropicCountTokensRequest:
+  # Uses the same format as AnthropicMessageRequest
+  allOf:
+    - $ref: './messages.yaml#/AnthropicMessageRequest'
+
+AnthropicCountTokensResponse:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of input tokens
--- a/docs/openapi/schemas/integrations/anthropic/files.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/files.yaml
@@ -0,0 +1,102 @@
+# Anthropic Integration Files API Schemas
+
+AnthropicFileUploadRequest:
+  type: object
+  required:
+    - file
+  properties:
+    file:
+      type: string
+      format: binary
+      description: File to upload (raw file content)
+    filename:
+      type: string
+      description: Original filename
+    purpose:
+      type: string
+      description: Purpose of the file (e.g., "batch")
+
+AnthropicFileUploadResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: file
+    filename:
+      type: string
+    mime_type:
+      type: string
+      description: MIME type of the file
+    size_bytes:
+      type: integer
+      description: Size of the file in bytes
+    created_at:
+      type: string
+      format: date-time
+    downloadable:
+      type: boolean
+
+AnthropicFileListRequest:
+  type: object
+  properties:
+    limit:
+      type: integer
+      default: 30
+    after:
+      type: string
+      description: Cursor for pagination (after_id)
+    order:
+      type: string
+      enum: [asc, desc]
+
+AnthropicFileListResponse:
+  type: object
+  properties:
+    data:
+      type: array
+      items:
+        $ref: '#/AnthropicFileUploadResponse'
+    has_more:
+      type: boolean
+    first_id:
+      type: string
+    last_id:
+      type: string
+
+AnthropicFileRetrieveRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+
+AnthropicFileRetrieveResponse:
+  $ref: '#/AnthropicFileUploadResponse'
+
+AnthropicFileDeleteRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
+
+AnthropicFileDeleteResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: file_deleted
+
+AnthropicFileContentRequest:
+  type: object
+  required:
+    - file_id
+  properties:
+    file_id:
+      type: string
--- a/docs/openapi/schemas/integrations/anthropic/messages.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/messages.yaml
@@ -0,0 +1,403 @@
+# Anthropic Integration Messages API Schemas
+
+AnthropicMessageRequest:
+  type: object
+  required:
+    - model
+    - max_tokens
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model identifier (e.g., claude-3-opus-20240229)
+      example: claude-3-opus-20240229
+    max_tokens:
+      type: integer
+      description: Maximum tokens to generate
+    messages:
+      type: array
+      items:
+        $ref: '#/AnthropicMessage'
+      description: List of messages in the conversation
+    system:
+      $ref: '#/AnthropicContent'
+      description: System prompt
+    cache_control:
+      $ref: '../../inference/common.yaml#/CacheControl'
+      description: Automatic caching directives for the whole request
+    metadata:
+      $ref: '#/AnthropicMetadata'
+    stream:
+      type: boolean
+      description: Whether to stream the response
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    top_p:
+      type: number
+    top_k:
+      type: integer
+    stop_sequences:
+      type: array
+      items:
+        type: string
+    tools:
+      type: array
+      items:
+        $ref: '#/AnthropicTool'
+    tool_choice:
+      $ref: '#/AnthropicToolChoice'
+    mcp_servers:
+      type: array
+      items:
+        $ref: '#/AnthropicMCPServer'
+      description: MCP servers configuration (requires beta header)
+    thinking:
+      $ref: '#/AnthropicThinking'
+    output_format:
+      type: object
+      description: Structured output format (requires beta header)
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+AnthropicMessage:
+  type: object
+  required:
+    - role
+    - content
+  properties:
+    role:
+      $ref: './common.yaml#/AnthropicMessageRole'
+    content:
+      $ref: '#/AnthropicContent'
+
+AnthropicContent:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/AnthropicContentBlock'
+  description: Content - can be a string or array of content blocks
+
+AnthropicContentBlock:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum:
+        - text
+        - image
+        - document
+        - tool_use
+        - server_tool_use
+        - tool_result
+        - web_search_result
+        - mcp_tool_use
+        - mcp_tool_result
+        - thinking
+        - redacted_thinking
+    text:
+      type: string
+      description: For text content
+    thinking:
+      type: string
+      description: For thinking content
+    signature:
+      type: string
+      description: For signature content
+    data:
+      type: string
+      description: For data content (encrypted data for redacted thinking)
+    tool_use_id:
+      type: string
+      description: For tool_result content
+    id:
+      type: string
+      description: For tool_use content
+    name:
+      type: string
+      description: For tool_use content
+    input:
+      type: object
+      description: For tool_use content
+    server_name:
+      type: string
+      description: For mcp_tool_use content
+    content:
+      $ref: '#/AnthropicContent'
+      description: For tool_result content
+    source:
+      $ref: '#/AnthropicSource'
+      description: For image/document content
+    cache_control:
+      $ref: '../../inference/common.yaml#/CacheControl'
+    citations:
+      $ref: '#/AnthropicCitationsConfig'
+      description: For document content
+    context:
+      type: string
+      description: For document content
+    title:
+      type: string
+      description: For document content
+
+AnthropicSource:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [base64, url, text, content_block]
+    media_type:
+      type: string
+      description: MIME type (e.g., image/jpeg, application/pdf)
+    data:
+      type: string
+      description: Base64-encoded data (for base64 type)
+    url:
+      type: string
+      description: URL (for url type)
+
+AnthropicCitationsConfig:
+  type: object
+  properties:
+    enabled:
+      type: boolean
+
+AnthropicMetadata:
+  type: object
+  properties:
+    user_id:
+      type: string
+
+AnthropicThinking:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [enabled, disabled]
+    budget_tokens:
+      type: integer
+
+AnthropicTool:
+  type: object
+  properties:
+    type:
+      type: string
+      enum:
+        - custom
+        - bash_20250124
+        - computer_20250124
+        - computer_20251124
+        - code_execution_20250522
+        - code_execution_20250825
+        - code_execution_20260120
+        - text_editor_20250124
+        - text_editor_20250429
+        - text_editor_20250728
+        - web_search_20250305
+        - web_search_20260209
+        - web_fetch_20250910
+        - web_fetch_20260209
+        - web_fetch_20260309
+        - memory_20250818
+        - tool_search_tool_bm25
+        - tool_search_tool_bm25_20251119
+        - tool_search_tool_regex
+        - tool_search_tool_regex_20251119
+    name:
+      type: string
+      description: Tool name (for custom tools)
+    description:
+      type: string
+    input_schema:
+      type: object
+      description: JSON Schema for tool input
+    cache_control:
+      $ref: '../../inference/common.yaml#/CacheControl'
+    # Computer use tool settings
+    display_width_px:
+      type: integer
+    display_height_px:
+      type: integer
+    display_number:
+      type: integer
+    enable_zoom:
+      type: boolean
+    # Web search settings
+    max_uses:
+      type: integer
+    allowed_domains:
+      type: array
+      items:
+        type: string
+    blocked_domains:
+      type: array
+      items:
+        type: string
+    user_location:
+      $ref: '#/AnthropicToolWebSearchUserLocation'
+
+AnthropicToolWebSearchUserLocation:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [approximate]
+    city:
+      type: string
+    country:
+      type: string
+    timezone:
+      type: string
+
+AnthropicToolChoice:
+  oneOf:
+    - type: object
+      properties:
+        type:
+          type: string
+          enum: [auto, any, tool, none]
+        name:
+          type: string
+          description: Required when type is 'tool'
+        disable_parallel_tool_use:
+          type: boolean
+
+AnthropicMCPServer:
+  type: object
+  properties:
+    type:
+      type: string
+    name:
+      type: string
+    url:
+      type: string
+    authorization_token:
+      type: string
+      description: Authorization token for the MCP server
+    tool_configuration:
+      $ref: '#/AnthropicMCPToolConfig'
+
+AnthropicMCPToolConfig:
+  type: object
+  properties:
+    enabled:
+      type: boolean
+    allowed_tools:
+      type: array
+      items:
+        type: string
+
+# Response types
+AnthropicMessageResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      default: message
+    role:
+      type: string
+      default: assistant
+    content:
+      type: array
+      items:
+        $ref: '#/AnthropicContentBlock'
+    model:
+      type: string
+    stop_reason:
+      type: string
+      enum: [end_turn, max_tokens, stop_sequence, tool_use, pause_turn, refusal, model_context_window_exceeded, null]
+    stop_sequence:
+      type: string
+      nullable: true
+    usage:
+      $ref: '#/AnthropicUsage'
+
+AnthropicUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+    output_tokens:
+      type: integer
+    cache_creation_input_tokens:
+      type: integer
+    cache_read_input_tokens:
+      type: integer
+    cache_creation:
+      $ref: '#/AnthropicUsageCacheCreation'
+
+AnthropicUsageCacheCreation:
+  type: object
+  properties:
+    ephemeral_5m_input_tokens:
+      type: integer
+    ephemeral_1h_input_tokens:
+      type: integer
+
+# Stream event types
+AnthropicStreamEvent:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      enum:
+        - message_start
+        - content_block_start
+        - content_block_delta
+        - content_block_stop
+        - message_delta
+        - message_stop
+        - ping
+        - error
+    message:
+      $ref: '#/AnthropicMessageResponse'
+    index:
+      type: integer
+    content_block:
+      $ref: '#/AnthropicContentBlock'
+    delta:
+      $ref: '#/AnthropicStreamDelta'
+    usage:
+      $ref: '#/AnthropicUsage'
+    error:
+      $ref: '#/AnthropicStreamError'
+
+AnthropicStreamDelta:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text_delta, input_json_delta, thinking_delta, signature_delta]
+    text:
+      type: string
+    partial_json:
+      type: string
+    thinking:
+      type: string
+    signature:
+      type: string
+    stop_reason:
+      type: string
+    stop_sequence:
+      type: string
+
+AnthropicStreamError:
+  type: object
+  properties:
+    type:
+      type: string
+    message:
+      type: string
--- a/docs/openapi/schemas/integrations/anthropic/text.yaml
+++ b/docs/openapi/schemas/integrations/anthropic/text.yaml
@@ -0,0 +1,62 @@
+# Anthropic Integration Text Completions Schemas (Legacy Complete API)
+
+AnthropicTextRequest:
+  type: object
+  required:
+    - model
+    - prompt
+    - max_tokens_to_sample
+  properties:
+    model:
+      type: string
+      description: Model identifier
+    prompt:
+      type: string
+      description: The prompt to complete
+    max_tokens_to_sample:
+      type: integer
+      description: Maximum tokens to generate
+    stream:
+      type: boolean
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    top_p:
+      type: number
+    top_k:
+      type: integer
+    stop_sequences:
+      type: array
+      items:
+        type: string
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+AnthropicTextResponse:
+  type: object
+  properties:
+    type:
+      type: string
+      default: completion
+    id:
+      type: string
+    completion:
+      type: string
+    stop_reason:
+      type: string
+      enum: [stop_sequence, max_tokens, null]
+    model:
+      type: string
+    usage:
+      type: object
+      properties:
+        input_tokens:
+          type: integer
+          description: Number of input tokens used
+        output_tokens:
+          type: integer
+          description: Number of output tokens generated
--- a/docs/openapi/schemas/integrations/bedrock/batch.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/batch.yaml
@@ -0,0 +1,153 @@
+# AWS Bedrock Batch API Schemas
+
+BedrockBatchJobRequest:
+  type: object
+  required:
+    - roleArn
+    - inputDataConfig
+    - outputDataConfig
+  properties:
+    modelId:
+      type: string
+      description: Model ID for the batch job (optional, can be specified in request)
+    jobName:
+      type: string
+      description: Name for the batch job
+    roleArn:
+      type: string
+      description: IAM role ARN for the job
+    inputDataConfig:
+      type: object
+      properties:
+        s3InputDataConfig:
+          type: object
+          properties:
+            s3Uri:
+              type: string
+              description: S3 URI for input data
+    outputDataConfig:
+      type: object
+      properties:
+        s3OutputDataConfig:
+          type: object
+          properties:
+            s3Uri:
+              type: string
+              description: S3 URI for output data
+    timeoutDurationInHours:
+      type: integer
+      description: Timeout in hours
+    tags:
+      type: array
+      items:
+        type: object
+        properties:
+          key:
+            type: string
+          value:
+            type: string
+
+BedrockBatchJobResponse:
+  type: object
+  properties:
+    jobArn:
+      type: string
+    status:
+      type: string
+      enum: [Submitted, InProgress, Completed, Failed, Stopping, Stopped, PartiallyCompleted, Expired, Validating, Scheduled]
+    jobName:
+      type: string
+    modelId:
+      type: string
+    roleArn:
+      type: string
+    inputDataConfig:
+      type: object
+    outputDataConfig:
+      type: object
+    vpcConfig:
+      type: object
+      properties:
+        securityGroupIds:
+          type: array
+          items:
+            type: string
+        subnetIds:
+          type: array
+          items:
+            type: string
+    submitTime:
+      type: string
+      format: date-time
+    lastModifiedTime:
+      type: string
+      format: date-time
+    endTime:
+      type: string
+      format: date-time
+    message:
+      type: string
+    clientRequestToken:
+      type: string
+    jobExpirationTime:
+      type: string
+      format: date-time
+    timeoutDurationInHours:
+      type: integer
+
+BedrockBatchListRequest:
+  type: object
+  properties:
+    maxResults:
+      type: integer
+    nextToken:
+      type: string
+    statusEquals:
+      type: string
+    nameContains:
+      type: string
+
+BedrockBatchListResponse:
+  type: object
+  properties:
+    invocationJobSummaries:
+      type: array
+      items:
+        $ref: '#/BedrockBatchJobSummary'
+    nextToken:
+      type: string
+
+BedrockBatchJobSummary:
+  type: object
+  properties:
+    jobArn:
+      type: string
+    jobName:
+      type: string
+    modelId:
+      type: string
+    status:
+      type: string
+    submitTime:
+      type: string
+      format: date-time
+    lastModifiedTime:
+      type: string
+      format: date-time
+    endTime:
+      type: string
+      format: date-time
+    message:
+      type: string
+
+BedrockBatchRetrieveResponse:
+  description: Uses same structure as BedrockBatchJobResponse
+  $ref: '#/BedrockBatchJobResponse'
+
+BedrockBatchCancelResponse:
+  type: object
+  properties:
+    jobArn:
+      type: string
+    status:
+      type: string
--- a/docs/openapi/schemas/integrations/bedrock/common.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/common.yaml
@@ -0,0 +1,15 @@
+# AWS Bedrock Integration Common Types
+
+BedrockError:
+  type: object
+  properties:
+    message:
+      type: string
+    type:
+      type: string
+
+BedrockMessageRole:
+  type: string
+  enum:
+    - user
+    - assistant
--- a/docs/openapi/schemas/integrations/bedrock/converse.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/converse.yaml
@@ -0,0 +1,367 @@
+# AWS Bedrock Converse API Schemas
+
+BedrockConverseRequest:
+  type: object
+  properties:
+    messages:
+      type: array
+      items:
+        $ref: '#/BedrockMessage'
+      description: Array of messages for the conversation
+    system:
+      type: array
+      items:
+        $ref: '#/BedrockSystemMessage'
+      description: System messages/prompts
+    inferenceConfig:
+      $ref: '#/BedrockInferenceConfig'
+    toolConfig:
+      $ref: '#/BedrockToolConfig'
+    guardrailConfig:
+      $ref: '#/BedrockGuardrailConfig'
+    additionalModelRequestFields:
+      type: object
+      description: Model-specific parameters
+    additionalModelResponseFieldPaths:
+      type: array
+      items:
+        type: string
+    performanceConfig:
+      $ref: '#/BedrockPerformanceConfig'
+    promptVariables:
+      type: object
+      additionalProperties:
+        $ref: '#/BedrockPromptVariable'
+    requestMetadata:
+      type: object
+      additionalProperties:
+        type: string
+    serviceTier:
+      $ref: '#/BedrockServiceTier'
+    # Bifrost-specific
+    fallbacks:
+      type: array
+      items:
+        type: string
+
+BedrockMessage:
+  type: object
+  required:
+    - role
+    - content
+  properties:
+    role:
+      $ref: './common.yaml#/BedrockMessageRole'
+    content:
+      type: array
+      items:
+        $ref: '#/BedrockContentBlock'
+
+BedrockSystemMessage:
+  type: object
+  properties:
+    text:
+      type: string
+    guardContent:
+      $ref: '#/BedrockGuardContent'
+    cachePoint:
+      $ref: '#/BedrockCachePoint'
+
+BedrockContentBlock:
+  type: object
+  properties:
+    text:
+      type: string
+    image:
+      $ref: '#/BedrockImageSource'
+    document:
+      $ref: '#/BedrockDocumentSource'
+    toolUse:
+      $ref: '#/BedrockToolUse'
+    toolResult:
+      $ref: '#/BedrockToolResult'
+    guardContent:
+      $ref: '#/BedrockGuardContent'
+    reasoningContent:
+      $ref: '#/BedrockReasoningContent'
+    json:
+      type: object
+      description: JSON content for tool call results
+    cachePoint:
+      $ref: '#/BedrockCachePoint'
+
+BedrockImageSource:
+  type: object
+  properties:
+    format:
+      type: string
+      enum: [jpeg, png, gif, webp]
+    source:
+      type: object
+      properties:
+        bytes:
+          type: string
+          format: byte
+
+BedrockDocumentSource:
+  type: object
+  properties:
+    format:
+      type: string
+      enum: [pdf, csv, doc, docx, xls, xlsx, html, txt, md]
+    name:
+      type: string
+    source:
+      type: object
+      properties:
+        bytes:
+          type: string
+          format: byte
+        text:
+          type: string
+          description: Plain text content (for text-based documents)
+
+BedrockToolUse:
+  type: object
+  properties:
+    toolUseId:
+      type: string
+    name:
+      type: string
+    input:
+      type: object
+
+BedrockToolResult:
+  type: object
+  properties:
+    toolUseId:
+      type: string
+    content:
+      type: array
+      items:
+        $ref: '#/BedrockContentBlock'
+    status:
+      type: string
+      enum: [success, error]
+
+BedrockGuardContent:
+  type: object
+  properties:
+    text:
+      type: object
+      properties:
+        text:
+          type: string
+        qualifiers:
+          type: array
+          items:
+            type: string
+
+BedrockReasoningContent:
+  type: object
+  properties:
+    reasoningText:
+      type: object
+      properties:
+        text:
+          type: string
+        signature:
+          type: string
+
+BedrockCachePoint:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [default]
+
+BedrockInferenceConfig:
+  type: object
+  properties:
+    maxTokens:
+      type: integer
+    temperature:
+      type: number
+    topP:
+      type: number
+    stopSequences:
+      type: array
+      items:
+        type: string
+
+BedrockToolConfig:
+  type: object
+  properties:
+    tools:
+      type: array
+      items:
+        $ref: '#/BedrockTool'
+    toolChoice:
+      $ref: '#/BedrockToolChoice'
+
+BedrockTool:
+  type: object
+  properties:
+    toolSpec:
+      type: object
+      properties:
+        name:
+          type: string
+        description:
+          type: string
+        inputSchema:
+          type: object
+          properties:
+            json:
+              type: object
+    cachePoint:
+      $ref: '#/BedrockCachePoint'
+
+BedrockToolChoice:
+  type: object
+  properties:
+    auto:
+      type: object
+    any:
+      type: object
+    tool:
+      type: object
+      properties:
+        name:
+          type: string
+
+BedrockGuardrailConfig:
+  type: object
+  properties:
+    guardrailIdentifier:
+      type: string
+    guardrailVersion:
+      type: string
+    trace:
+      type: string
+      enum: [enabled, disabled]
+
+BedrockPerformanceConfig:
+  type: object
+  properties:
+    latency:
+      type: string
+      enum: [standard, optimized]
+
+BedrockPromptVariable:
+  type: object
+  properties:
+    text:
+      type: string
+
+BedrockServiceTier:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [reserved, priority, default, flex]
+
+# Response types
+BedrockConverseResponse:
+  type: object
+  properties:
+    output:
+      type: object
+      properties:
+        message:
+          $ref: '#/BedrockMessage'
+    stopReason:
+      type: string
+      enum: [end_turn, tool_use, max_tokens, stop_sequence, guardrail_intervened, content_filtered]
+    usage:
+      $ref: '#/BedrockUsage'
+    metrics:
+      type: object
+      properties:
+        latencyMs:
+          type: integer
+    additionalModelResponseFields:
+      type: object
+    trace:
+      type: object
+    performanceConfig:
+      $ref: '#/BedrockPerformanceConfig'
+    serviceTier:
+      $ref: '#/BedrockServiceTier'
+
+BedrockUsage:
+  type: object
+  properties:
+    inputTokens:
+      type: integer
+    outputTokens:
+      type: integer
+    totalTokens:
+      type: integer
+    cacheReadInputTokens:
+      type: integer
+    cacheWriteInputTokens:
+      type: integer
+
+# Stream event types
+BedrockStreamEvent:
+  type: object
+  description: Flat structure for streaming events matching actual Bedrock API response
+  properties:
+    role:
+      type: string
+      description: For messageStart events
+    contentBlockIndex:
+      type: integer
+      description: For content block events
+    delta:
+      $ref: '#/BedrockContentBlockDelta'
+    stopReason:
+      type: string
+      description: For messageStop events
+    start:
+      $ref: '#/BedrockContentBlockStart'
+    usage:
+      $ref: '#/BedrockUsage'
+    metrics:
+      type: object
+      properties:
+        latencyMs:
+          type: integer
+    trace:
+      type: object
+    additionalModelResponseFields:
+      type: object
+    invokeModelRawChunk:
+      type: string
+      format: byte
+      description: Raw bytes for legacy invoke stream
+
+BedrockContentBlockDelta:
+  type: object
+  properties:
+    text:
+      type: string
+    reasoningContent:
+      type: object
+      properties:
+        text:
+          type: string
+        signature:
+          type: string
+    toolUse:
+      type: object
+      properties:
+        input:
+          type: string
+
+BedrockContentBlockStart:
+  type: object
+  properties:
+    toolUse:
+      type: object
+      properties:
+        toolUseId:
+          type: string
+        name:
+          type: string
--- a/docs/openapi/schemas/integrations/bedrock/invoke.yaml
+++ b/docs/openapi/schemas/integrations/bedrock/invoke.yaml
@@ -0,0 +1,50 @@
+# AWS Bedrock Invoke API Schemas (Legacy/Raw Model Invocation)
+
+BedrockInvokeRequest:
+  type: object
+  description: |
+    Raw model invocation request. The body format depends on the model provider.
+    For Anthropic models, use Anthropic format. For other models, use their native format.
+  properties:
+    prompt:
+      type: string
+      description: Text prompt to complete
+    max_tokens:
+      type: integer
+    max_tokens_to_sample:
+      type: integer
+      description: Anthropic-style max tokens
+    temperature:
+      type: number
+    top_p:
+      type: number
+    top_k:
+      type: integer
+    stop:
+      type: array
+      items:
+        type: string
+    stop_sequences:
+      type: array
+      items:
+        type: string
+      description: Anthropic-style stop sequences
+    messages:
+      type: array
+      items:
+        type: object
+      description: For Claude 3 models
+    system:
+      description: System prompt (string or array of strings)
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+    anthropic_version:
+      type: string
+
+BedrockInvokeResponse:
+  type: object
+  description: Raw model response. Format depends on the model provider.
+  additionalProperties: true
--- a/docs/openapi/schemas/integrations/cohere/chat.yaml
+++ b/docs/openapi/schemas/integrations/cohere/chat.yaml
@@ -0,0 +1,364 @@
+# Cohere v2 Chat API Schemas
+
+CohereChatRequest:
+  type: object
+  required:
+    - model
+    - messages
+  properties:
+    model:
+      type: string
+      description: Model to use for chat completion
+      example: command-r-plus
+    messages:
+      type: array
+      items:
+        $ref: '#/CohereMessage'
+      description: Array of message objects
+    tools:
+      type: array
+      items:
+        $ref: '#/CohereTool'
+    tool_choice:
+      $ref: '#/CohereToolChoice'
+    temperature:
+      type: number
+      minimum: 0
+      maximum: 1
+    p:
+      type: number
+      description: Top-p sampling
+    k:
+      type: integer
+      description: Top-k sampling
+    max_tokens:
+      type: integer
+    stop_sequences:
+      type: array
+      items:
+        type: string
+    frequency_penalty:
+      type: number
+    presence_penalty:
+      type: number
+    stream:
+      type: boolean
+    safety_mode:
+      type: string
+      enum: [CONTEXTUAL, STRICT, NONE]
+    log_probs:
+      type: boolean
+    strict_tool_choice:
+      type: boolean
+    thinking:
+      $ref: '#/CohereThinking'
+    response_format:
+      $ref: '#/CohereResponseFormat'
+
+CohereMessage:
+  type: object
+  required:
+    - role
+  properties:
+    role:
+      type: string
+      enum: [system, user, assistant, tool]
+    content:
+      $ref: '#/CohereMessageContent'
+    tool_calls:
+      type: array
+      items:
+        $ref: '#/CohereToolCall'
+    tool_call_id:
+      type: string
+    tool_plan:
+      type: string
+      description: Chain-of-thought style reflection (assistant only)
+
+CohereMessageContent:
+  oneOf:
+    - type: string
+    - type: array
+      items:
+        $ref: '#/CohereContentBlock'
+  description: Message content - can be a string or array of content blocks
+
+CohereContentBlock:
+  type: object
+  required:
+    - type
+  properties:
+    type:
+      type: string
+      enum: [text, image_url, thinking, document]
+    text:
+      type: string
+    image_url:
+      type: object
+      properties:
+        url:
+          type: string
+    thinking:
+      type: string
+    document:
+      type: object
+      properties:
+        data:
+          type: object
+        id:
+          type: string
+
+CohereTool:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [function]
+    function:
+      type: object
+      properties:
+        name:
+          type: string
+        description:
+          type: string
+        parameters:
+          type: object
+
+CohereToolChoice:
+  type: string
+  enum: [AUTO, NONE, REQUIRED]
+  description: Tool choice mode - AUTO lets the model decide, NONE disables tools, REQUIRED forces tool use
+
+CohereToolCall:
+  type: object
+  properties:
+    id:
+      type: string
+    type:
+      type: string
+      enum: [function]
+    function:
+      type: object
+      properties:
+        name:
+          type: string
+        arguments:
+          type: string
+
+CohereThinking:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [enabled, disabled]
+    token_budget:
+      type: integer
+      minimum: 1
+
+CohereResponseFormat:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text, json_object]
+      description: Response format type
+    schema:
+      type: object
+      description: JSON schema for structured output (used with json_object type)
+
+# Response types
+CohereChatResponse:
+  type: object
+  properties:
+    id:
+      type: string
+    finish_reason:
+      type: string
+      enum: [COMPLETE, STOP_SEQUENCE, MAX_TOKENS, TOOL_CALL, ERROR, TIMEOUT]
+    message:
+      type: object
+      properties:
+        role:
+          type: string
+        content:
+          type: array
+          items:
+            $ref: '#/CohereContentBlock'
+        tool_calls:
+          type: array
+          items:
+            $ref: '#/CohereToolCall'
+        tool_plan:
+          type: string
+    usage:
+      $ref: '#/CohereUsage'
+    logprobs:
+      type: array
+      items:
+        $ref: '#/CohereLogProb'
+      description: Log probabilities (if requested)
+
+CohereUsage:
+  type: object
+  properties:
+    billed_units:
+      $ref: '#/CohereBilledUnits'
+    tokens:
+      $ref: '#/CohereTokenUsage'
+    cached_tokens:
+      type: integer
+      description: Cached tokens
+
+CohereBilledUnits:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of billed input tokens
+    output_tokens:
+      type: integer
+      description: Number of billed output tokens
+    search_units:
+      type: integer
+      description: Number of billed search units
+    classifications:
+      type: integer
+      description: Number of billed classification units
+
+CohereTokenUsage:
+  type: object
+  properties:
+    input_tokens:
+      type: integer
+      description: Number of input tokens used
+    output_tokens:
+      type: integer
+      description: Number of output tokens produced
+
+CohereLogProb:
+  type: object
+  properties:
+    token_ids:
+      type: array
+      items:
+        type: integer
+      description: Token IDs of each token in text chunk
+    text:
+      type: string
+      description: Text chunk for log probabilities
+    logprobs:
+      type: array
+      items:
+        type: number
+      description: Log probability of each token
+
+# Stream event types
+CohereChatStreamEvent:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [message-start, content-start, content-delta, content-end, tool-plan-delta, tool-call-start, tool-call-delta, tool-call-end, citation-start, citation-end, message-end, debug]
+      description: Type of streaming event
+    id:
+      type: string
+      description: Event ID (for message-start)
+    index:
+      type: integer
+      description: Index for indexed events
+    delta:
+      $ref: '#/CohereStreamDelta'
+
+CohereStreamDelta:
+  type: object
+  properties:
+    message:
+      $ref: '#/CohereStreamMessage'
+    finish_reason:
+      type: string
+      enum: [COMPLETE, STOP_SEQUENCE, MAX_TOKENS, TOOL_CALL, ERROR, TIMEOUT]
+    usage:
+      $ref: '#/CohereUsage'
+
+CohereStreamMessage:
+  type: object
+  properties:
+    role:
+      type: string
+      description: Message role (for message-start)
+    content:
+      oneOf:
+        - $ref: '#/CohereStreamContent'
+        - type: array
+          items:
+            $ref: '#/CohereStreamContent'
+      description: Content for content events
+    tool_plan:
+      type: string
+      description: Tool plan content (for tool-plan-delta)
+    tool_calls:
+      oneOf:
+        - $ref: '#/CohereToolCall'
+        - type: array
+          items:
+            $ref: '#/CohereToolCall'
+      description: Tool calls (for tool-call events)
+    citations:
+      oneOf:
+        - $ref: '#/CohereCitation'
+        - type: array
+          items:
+            $ref: '#/CohereCitation'
+      description: Citations (for citation events)
+
+CohereStreamContent:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [text, image_url, thinking, document]
+    text:
+      type: string
+    thinking:
+      type: string
+
+CohereCitation:
+  type: object
+  properties:
+    start:
+      type: integer
+      description: Start position of cited text
+    end:
+      type: integer
+      description: End position of cited text
+    text:
+      type: string
+      description: Cited text
+    sources:
+      type: array
+      items:
+        $ref: '#/CohereSource'
+    content_index:
+      type: integer
+      description: Content index of the citation
+    type:
+      type: string
+      enum: [TEXT_CONTENT, THINKING_CONTENT, PLAN]
+      description: Type of citation
+
+CohereSource:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [tool, document]
+      description: Source type
+    id:
+      type: string
+      description: Source ID (nullable)
+    tool_output:
+      type: object
+      description: Tool output (for tool sources)
+    document:
+      type: object
+      description: Document data (for document sources)
--- a/docs/openapi/schemas/integrations/cohere/common.yaml
+++ b/docs/openapi/schemas/integrations/cohere/common.yaml
@@ -0,0 +1,14 @@
+# Cohere Integration Common Types
+
+CohereError:
+  type: object
+  properties:
+    type:
+      type: string
+      description: Error type
+    message:
+      type: string
+      description: Error message
+    code:
+      type: string
+      description: Optional error code
--- a/docs/openapi/schemas/integrations/cohere/embed.yaml
+++ b/docs/openapi/schemas/integrations/cohere/embed.yaml
@@ -0,0 +1,172 @@
+# Cohere v2 Embed API Schemas
+
+CohereEmbeddingRequest:
+  type: object
+  required:
+    - model
+    - input_type
+  properties:
+    model:
+      type: string
+      description: ID of an available embedding model
+      example: embed-english-v3.0
+    input_type:
+      type: string
+      description: Specifies the type of input passed to the model. Required for embedding models v3 and higher.
+    texts:
+      type: array
+      items:
+        type: string
+      description: Array of strings to embed. Maximum 96 texts per call. At least one of texts, images, or inputs is required.
+      maxItems: 96
+    images:
+      type: array
+      items:
+        type: string
+      description: Array of image data URIs for multimodal embedding. Maximum 1 image per call. Supports JPEG, PNG, WebP, GIF up to 5MB.
+      maxItems: 1
+    inputs:
+      type: array
+      items:
+        $ref: '#/CohereEmbeddingInput'
+      description: Array of mixed text/image components for embedding. Maximum 96 per call.
+      maxItems: 96
+    embedding_types:
+      type: array
+      items:
+        type: string
+      description: Specifies the return format types (float, int8, uint8, binary, ubinary, base64). Defaults to float if unspecified.
+    output_dimension:
+      type: integer
+      description: Number of dimensions for output embeddings (256, 512, 1024, 1536). Available only for embed-v4 and newer models.
+    max_tokens:
+      type: integer
+      description: Maximum tokens to embed per input before truncation.
+    truncate:
+      type: string
+      description: Handling for inputs exceeding token limits. Defaults to END.
+
+CohereEmbeddingInput:
+  type: object
+  properties:
+    content:
+      type: array
+      items:
+        $ref: './chat.yaml#/CohereContentBlock'
+      description: Array of content blocks (reuses chat content blocks)
+
+CohereEmbeddingResponse:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Response ID
+    embeddings:
+      $ref: '#/CohereEmbeddingData'
+    response_type:
+      type: string
+      description: Response type (embeddings_floats, embeddings_by_type)
+    texts:
+      type: array
+      items:
+        type: string
+      description: Original text entries
+    images:
+      type: array
+      items:
+        $ref: '#/CohereEmbeddingImageInfo'
+      description: Original image entries
+    meta:
+      $ref: '#/CohereEmbeddingMeta'
+
+CohereEmbeddingData:
+  type: object
+  description: Embedding data object with different types
+  properties:
+    float:
+      type: array
+      items:
+        type: array
+        items:
+          type: number
+      description: Float embeddings
+    int8:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Int8 embeddings
+    uint8:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Uint8 embeddings
+    binary:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Binary embeddings
+    ubinary:
+      type: array
+      items:
+        type: array
+        items:
+          type: integer
+      description: Unsigned binary embeddings
+    base64:
+      type: array
+      items:
+        type: string
+      description: Base64-encoded embeddings
+
+CohereEmbeddingImageInfo:
+  type: object
+  description: Image information in the response
+  properties:
+    width:
+      type: integer
+      description: Width in pixels
+    height:
+      type: integer
+      description: Height in pixels
+    format:
+      type: string
+      description: Image format
+    bit_depth:
+      type: integer
+      description: Bit depth
+
+CohereEmbeddingMeta:
+  type: object
+  description: Metadata in embedding response
+  properties:
+    api_version:
+      $ref: '#/CohereEmbeddingAPIVersion'
+    billed_units:
+      $ref: './chat.yaml#/CohereBilledUnits'
+    tokens:
+      $ref: './chat.yaml#/CohereTokenUsage'
+    warnings:
+      type: array
+      items:
+        type: string
+      description: Any warnings
+
+CohereEmbeddingAPIVersion:
+  type: object
+  description: API version information
+  properties:
+    version:
+      type: string
+      description: API version
+    is_deprecated:
+      type: boolean
+      description: Deprecation status
+    is_experimental:
+      type: boolean
+      description: Experimental status
--- a/Show More
+++ b/Show More