first commit

This commit is contained in:
Beyhan Oğur
2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions

View File

@@ -0,0 +1,43 @@
# Async Job schemas
AsyncJobStatus:
type: string
description: The status of an async job
enum:
- pending
- processing
- completed
- failed
AsyncJobResponse:
type: object
description: Response returned when creating or polling an async job
required:
- id
- status
- created_at
properties:
id:
type: string
description: Unique identifier for the async job
status:
$ref: '#/AsyncJobStatus'
expires_at:
type: string
format: date-time
description: When the job result expires and will be cleaned up
created_at:
type: string
format: date-time
description: When the job was created
completed_at:
type: string
format: date-time
description: When the job completed (successfully or with failure)
status_code:
type: integer
description: HTTP status code of the completed operation
result:
description: The result of the completed operation (shape depends on the request type)
error:
$ref: './common.yaml#/BifrostError'

View File

@@ -0,0 +1,309 @@
# Batch API schemas
BatchStatus:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- canceled
- ended
BatchEndpoint:
type: string
enum:
- /v1/chat/completions
- /v1/embeddings
- /v1/completions
- /v1/responses
- /v1/messages
BatchCreateRequest:
type: object
required:
- model
properties:
model:
type: string
description: Model in provider/model format
input_file_id:
type: string
description: OpenAI-style file ID
requests:
type: array
items:
$ref: '#/BatchRequestItem'
description: Anthropic-style inline requests
endpoint:
$ref: '#/BatchEndpoint'
completion_window:
type: string
description: e.g., "24h"
metadata:
type: object
additionalProperties:
type: string
BatchRequestItem:
type: object
required:
- custom_id
properties:
custom_id:
type: string
method:
type: string
url:
type: string
body:
type: object
params:
type: object
BatchCreateResponse:
type: object
properties:
id:
type: string
object:
type: string
endpoint:
type: string
input_file_id:
type: string
completion_window:
type: string
status:
$ref: '#/BatchStatus'
request_counts:
$ref: '#/BatchRequestCounts'
metadata:
type: object
additionalProperties:
type: string
created_at:
type: integer
format: int64
expires_at:
type: integer
format: int64
output_file_id:
type: string
error_file_id:
type: string
processing_status:
type: string
results_url:
type: string
operation_name:
type: string
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
BatchRequestCounts:
type: object
properties:
total:
type: integer
completed:
type: integer
failed:
type: integer
succeeded:
type: integer
expired:
type: integer
canceled:
type: integer
pending:
type: integer
BatchListResponse:
type: object
properties:
object:
type: string
data:
type: array
items:
$ref: '#/BatchRetrieveResponse'
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
next_cursor:
type: string
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
BatchRetrieveResponse:
type: object
properties:
id:
type: string
object:
type: string
endpoint:
type: string
input_file_id:
type: string
completion_window:
type: string
status:
$ref: '#/BatchStatus'
request_counts:
$ref: '#/BatchRequestCounts'
metadata:
type: object
additionalProperties:
type: string
created_at:
type: integer
format: int64
expires_at:
type: integer
format: int64
in_progress_at:
type: integer
format: int64
finalizing_at:
type: integer
format: int64
completed_at:
type: integer
format: int64
failed_at:
type: integer
format: int64
expired_at:
type: integer
format: int64
cancelling_at:
type: integer
format: int64
cancelled_at:
type: integer
format: int64
output_file_id:
type: string
error_file_id:
type: string
errors:
$ref: '#/BatchErrors'
processing_status:
type: string
results_url:
type: string
archived_at:
type: integer
format: int64
operation_name:
type: string
done:
type: boolean
progress:
type: integer
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
BatchErrors:
type: object
properties:
object:
type: string
data:
type: array
items:
$ref: '#/BatchError'
BatchError:
type: object
properties:
code:
type: string
message:
type: string
param:
type: string
line:
type: integer
BatchCancelResponse:
type: object
properties:
id:
type: string
object:
type: string
status:
$ref: '#/BatchStatus'
request_counts:
$ref: '#/BatchRequestCounts'
cancelling_at:
type: integer
format: int64
cancelled_at:
type: integer
format: int64
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
BatchResultsResponse:
type: object
properties:
batch_id:
type: string
results:
type: array
items:
$ref: '#/BatchResultItem'
has_more:
type: boolean
next_cursor:
type: string
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
BatchResultItem:
type: object
properties:
custom_id:
type: string
response:
$ref: '#/BatchResultResponse'
result:
$ref: '#/BatchResultData'
error:
$ref: '#/BatchResultError'
BatchResultResponse:
type: object
properties:
status_code:
type: integer
request_id:
type: string
body:
type: object
BatchResultData:
type: object
properties:
type:
type: string
message:
type: object
BatchResultError:
type: object
properties:
code:
type: string
message:
type: string

View File

@@ -0,0 +1,673 @@
# Chat Completions API schemas
ChatCompletionRequest:
type: object
required:
- model
- messages
properties:
model:
type: string
description: Model in provider/model format (e.g., openai/gpt-4)
example: openai/gpt-4
messages:
type: array
items:
$ref: '#/ChatMessage'
description: List of messages in the conversation
fallbacks:
type: array
items:
type: string
description: Fallback models in provider/model format
stream:
type: boolean
description: Whether to stream the response
frequency_penalty:
type: number
minimum: -2.0
maximum: 2.0
logit_bias:
type: object
additionalProperties:
type: number
logprobs:
type: boolean
max_completion_tokens:
type: integer
metadata:
type: object
additionalProperties: true
modalities:
type: array
items:
type: string
parallel_tool_calls:
type: boolean
presence_penalty:
type: number
minimum: -2.0
maximum: 2.0
prompt_cache_key:
type: string
reasoning:
$ref: '#/ChatReasoning'
response_format:
type: object
description: Format for the response
safety_identifier:
type: string
service_tier:
type: string
stream_options:
$ref: '#/ChatStreamOptions'
store:
type: boolean
temperature:
type: number
minimum: 0
maximum: 2
tool_choice:
$ref: '#/ChatToolChoice'
tools:
type: array
items:
$ref: '#/ChatTool'
seed:
type: integer
description: Deterministic sampling seed
top_p:
type: number
minimum: 0
maximum: 1
description: Nucleus sampling parameter
top_logprobs:
type: integer
minimum: 0
maximum: 20
description: Number of most likely tokens to return at each position
stop:
oneOf:
- type: string
- type: array
items:
type: string
description: Up to 4 sequences where the API will stop generating tokens
prediction:
$ref: '#/ChatPrediction'
prompt_cache_retention:
type: string
enum: [in-memory, 24h]
description: Prompt cache retention policy
web_search_options:
$ref: '#/ChatWebSearchOptions'
truncation:
type: string
user:
type: string
verbosity:
type: string
enum: [low, medium, high]
ChatMessage:
type: object
required:
- role
properties:
role:
$ref: '#/ChatMessageRole'
name:
type: string
content:
$ref: '#/ChatMessageContent'
tool_call_id:
type: string
description: For tool messages
refusal:
type: string
audio:
$ref: '#/ChatAudioMessageAudio'
reasoning:
type: string
reasoning_details:
type: array
items:
$ref: '#/ChatReasoningDetails'
annotations:
type: array
items:
$ref: '#/ChatAssistantMessageAnnotation'
tool_calls:
type: array
items:
$ref: '#/ChatAssistantMessageToolCall'
ChatMessageRole:
type: string
enum:
- assistant
- user
- system
- tool
- developer
ChatMessageContent:
oneOf:
- type: string
- type: array
items:
$ref: '#/ChatContentBlock'
description: Message content - can be a string or array of content blocks
ChatContentBlock:
type: object
required:
- type
properties:
type:
type: string
enum: [text, image_url, input_audio, file, refusal]
text:
type: string
refusal:
type: string
image_url:
$ref: '#/ChatInputImage'
input_audio:
$ref: '#/ChatInputAudio'
file:
$ref: '#/ChatInputFile'
cache_control:
$ref: './common.yaml#/CacheControl'
ChatInputImage:
type: object
required:
- url
properties:
url:
type: string
detail:
type: string
enum: [low, high, auto]
ChatInputAudio:
type: object
required:
- data
properties:
data:
type: string
format:
type: string
ChatInputFile:
type: object
properties:
file_data:
type: string
file_id:
type: string
filename:
type: string
file_type:
type: string
ChatReasoning:
type: object
properties:
effort:
type: string
description: Reasoning effort level
enum: [none, minimal, low, medium, high, xhigh]
max_tokens:
type: integer
ChatStreamOptions:
type: object
properties:
include_obfuscation:
type: boolean
include_usage:
type: boolean
ChatToolChoice:
oneOf:
- type: string
enum: [none, auto, required]
- $ref: '#/ChatToolChoiceStruct'
ChatToolChoiceStruct:
type: object
required:
- type
properties:
type:
type: string
enum: [none, any, required, function, allowed_tools, custom]
function:
$ref: '#/ChatToolChoiceFunction'
allowed_tools:
$ref: '#/ChatToolChoiceAllowedTools'
ChatToolChoiceFunction:
type: object
required:
- name
properties:
name:
type: string
ChatToolChoiceAllowedTools:
type: object
properties:
mode:
type: string
enum: [auto, required]
tools:
type: array
items:
$ref: '#/ChatToolChoiceAllowedToolsTool'
ChatToolChoiceAllowedToolsTool:
type: object
required:
- type
properties:
type:
type: string
function:
$ref: '#/ChatToolChoiceFunction'
ChatTool:
type: object
required:
- type
properties:
type:
type: string
enum: [function, custom]
function:
$ref: '#/ChatToolFunction'
custom:
$ref: '#/ChatToolCustom'
cache_control:
$ref: './common.yaml#/CacheControl'
ChatToolFunction:
type: object
required:
- name
properties:
name:
type: string
description:
type: string
parameters:
$ref: '#/ToolFunctionParameters'
strict:
type: boolean
ToolFunctionParameters:
type: object
properties:
type:
type: string
description:
type: string
required:
type: array
items:
type: string
properties:
type: object
additionalProperties: true
enum:
type: array
items:
type: string
additionalProperties:
type: boolean
ChatToolCustom:
type: object
properties:
format:
$ref: '#/ChatToolCustomFormat'
ChatToolCustomFormat:
type: object
required:
- type
properties:
type:
type: string
grammar:
$ref: '#/ChatToolCustomGrammarFormat'
ChatToolCustomGrammarFormat:
type: object
required:
- definition
- syntax
properties:
definition:
type: string
syntax:
type: string
enum: [lark, regex]
ChatReasoningDetails:
type: object
properties:
id:
type: string
index:
type: integer
type:
type: string
enum: [reasoning.summary, reasoning.encrypted, reasoning.text]
summary:
type: string
text:
type: string
signature:
type: string
data:
type: string
ChatAssistantMessageAnnotation:
type: object
properties:
type:
type: string
url_citation:
$ref: '#/ChatAssistantMessageAnnotationCitation'
ChatAssistantMessageAnnotationCitation:
type: object
properties:
start_index:
type: integer
end_index:
type: integer
title:
type: string
url:
type: string
sources:
type: object
type:
type: string
ChatAssistantMessageToolCall:
type: object
required:
- function
properties:
index:
type: integer
type:
type: string
id:
type: string
function:
$ref: '#/ChatAssistantMessageToolCallFunction'
ChatAssistantMessageToolCallFunction:
type: object
properties:
name:
type: string
arguments:
type: string
ChatAudioMessageAudio:
type: object
properties:
id:
type: string
data:
type: string
expires_at:
type: integer
transcript:
type: string
ChatCompletionResponse:
type: object
properties:
id:
type: string
choices:
type: array
items:
$ref: '#/BifrostResponseChoice'
created:
type: integer
model:
type: string
object:
type: string
service_tier:
type: string
system_fingerprint:
type: string
usage:
$ref: './usage.yaml#/BifrostLLMUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
search_results:
type: array
items:
$ref: '#/PerplexitySearchResult'
videos:
type: array
items:
$ref: '#/VideoResult'
citations:
type: array
items:
type: string
BifrostResponseChoice:
type: object
properties:
index:
type: integer
finish_reason:
type: string
log_probs:
$ref: '#/BifrostLogProbs'
text:
type: string
description: For text completions
message:
$ref: '#/ChatMessage'
description: For non-streaming chat completions
delta:
$ref: '#/ChatStreamResponseChoiceDelta'
description: For streaming chat completions
BifrostLogProbs:
type: object
properties:
content:
type: array
items:
$ref: '#/ContentLogProb'
refusal:
type: array
items:
$ref: '#/LogProb'
text_offset:
type: array
items:
type: integer
token_logprobs:
type: array
items:
type: number
tokens:
type: array
items:
type: string
top_logprobs:
type: array
items:
type: object
additionalProperties:
type: number
ContentLogProb:
type: object
properties:
bytes:
type: array
items:
type: integer
logprob:
type: number
token:
type: string
top_logprobs:
type: array
items:
$ref: '#/LogProb'
LogProb:
type: object
properties:
bytes:
type: array
items:
type: integer
logprob:
type: number
token:
type: string
ChatStreamResponseChoiceDelta:
type: object
properties:
role:
type: string
content:
type: string
refusal:
type: string
audio:
$ref: '#/ChatAudioMessageAudio'
reasoning:
type: string
reasoning_details:
type: array
items:
$ref: '#/ChatReasoningDetails'
tool_calls:
type: array
items:
$ref: '#/ChatAssistantMessageToolCall'
ChatCompletionStreamResponse:
type: object
description: Streaming chat completion response (SSE format)
properties:
id:
type: string
choices:
type: array
items:
$ref: '#/BifrostResponseChoice'
created:
type: integer
model:
type: string
object:
type: string
usage:
$ref: './usage.yaml#/BifrostLLMUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
PerplexitySearchResult:
type: object
description: Search result from Perplexity AI search
properties:
title:
type: string
url:
type: string
date:
type: string
last_updated:
type: string
snippet:
type: string
source:
type: string
VideoResult:
type: object
properties:
url:
type: string
thumbnail_url:
type: string
thumbnail_width:
type: integer
thumbnail_height:
type: integer
duration:
type: number
ChatPrediction:
type: object
description: Predicted output content for the model to reference (OpenAI only). Can reduce latency.
properties:
type:
type: string
description: Always "content"
content:
description: Predicted content (string or array of content parts)
oneOf:
- type: string
- type: array
items:
type: object
additionalProperties: true
ChatWebSearchOptions:
type: object
description: Web search options for chat completions (OpenAI only)
properties:
search_context_size:
type: string
enum: [low, medium, high]
description: Amount of search context to include
user_location:
$ref: '#/ChatWebSearchOptionsUserLocation'
ChatWebSearchOptionsUserLocation:
type: object
properties:
type:
type: string
description: Location type (e.g., "approximate")
approximate:
$ref: '#/ChatWebSearchOptionsUserLocationApproximate'
ChatWebSearchOptionsUserLocationApproximate:
type: object
properties:
city:
type: string
country:
type: string
description: Two-letter ISO country code (e.g., "US")
region:
type: string
description: Region or state (e.g., "California")
timezone:
type: string
description: IANA timezone (e.g., "America/Los_Angeles")

View File

@@ -0,0 +1,149 @@
# Common schemas used across the API
ModelProvider:
type: string
description: AI model provider identifier
enum:
- openai
- azure
- anthropic
- bedrock
- cohere
- vertex
- vllm
- mistral
- ollama
- groq
- sgl
- parasail
- perplexity
- replicate
- cerebras
- gemini
- openrouter
- elevenlabs
- huggingface
- nebius
- xai
- runway
- fireworks
Fallback:
type: object
description: Fallback model configuration
required:
- provider
- model
properties:
provider:
$ref: '#/ModelProvider'
model:
type: string
description: Model name
BifrostError:
type: object
description: Error response from Bifrost
properties:
event_id:
type: string
type:
type: string
is_bifrost_error:
type: boolean
status_code:
type: integer
error:
$ref: '#/ErrorField'
extra_fields:
$ref: '#/BifrostErrorExtraFields'
ErrorField:
type: object
properties:
type:
type: string
code:
type: string
message:
type: string
param:
type: string
event_id:
type: string
BifrostErrorExtraFields:
type: object
properties:
provider:
$ref: '#/ModelProvider'
model_requested:
type: string
request_type:
type: string
BifrostResponseExtraFields:
type: object
description: Additional fields included in responses
properties:
request_type:
type: string
description: Type of request that was made
provider:
$ref: '#/ModelProvider'
model_requested:
type: string
description: The model that was requested
model_deployment:
type: string
description: The actual model deployment used
latency:
type: integer
format: int64
description: Request latency in milliseconds
chunk_index:
type: integer
description: Index of the chunk for streaming responses
raw_request:
type: object
description: Raw request if enabled
raw_response:
type: object
description: Raw response if enabled
cache_debug:
$ref: '#/BifrostCacheDebug'
BifrostCacheDebug:
type: object
properties:
cache_hit:
type: boolean
cache_id:
type: string
hit_type:
type: string
requested_provider:
type: string
requested_model:
type: string
provider_used:
type: string
model_used:
type: string
input_tokens:
type: integer
threshold:
type: number
similarity:
type: number
CacheControl:
type: object
description: Cache control settings for content blocks
properties:
type:
type: string
enum: [ephemeral]
ttl:
type: string
description: Time to live (e.g., "1m", "1h")

View File

@@ -0,0 +1,344 @@
# Containers API schemas
ContainerStatus:
type: string
enum:
- running
description: The status of a container
ContainerExpiresAfter:
type: object
description: Expiration configuration for a container
properties:
anchor:
type: string
description: The anchor point for expiration (e.g., "last_active_at")
minutes:
type: integer
description: Number of minutes after anchor point
ContainerObject:
type: object
description: A container object
properties:
id:
type: string
description: The unique identifier for the container
object:
type: string
description: The object type (always "container")
name:
type: string
description: The name of the container
created_at:
type: integer
format: int64
description: Unix timestamp of when the container was created
status:
$ref: '#/ContainerStatus'
expires_after:
$ref: '#/ContainerExpiresAfter'
last_active_at:
type: integer
format: int64
description: Unix timestamp of last activity
memory_limit:
type: string
description: Memory limit for the container (e.g., "1g", "4g")
metadata:
type: object
additionalProperties:
type: string
description: User-provided metadata
ContainerCreateRequest:
type: object
required:
- provider
- name
properties:
provider:
$ref: './common.yaml#/ModelProvider'
name:
type: string
description: Name of the container
expires_after:
$ref: '#/ContainerExpiresAfter'
file_ids:
type: array
items:
type: string
description: IDs of existing files to copy into this container
memory_limit:
type: string
description: Memory limit for the container (e.g., "1g", "4g")
metadata:
type: object
additionalProperties:
type: string
description: User-provided metadata
ContainerCreateResponse:
type: object
properties:
id:
type: string
description: The unique identifier for the created container
object:
type: string
description: The object type (always "container")
name:
type: string
description: The name of the container
created_at:
type: integer
format: int64
description: Unix timestamp of when the container was created
status:
$ref: '#/ContainerStatus'
expires_after:
$ref: '#/ContainerExpiresAfter'
last_active_at:
type: integer
format: int64
description: Unix timestamp of last activity
memory_limit:
type: string
description: Memory limit for the container
metadata:
type: object
additionalProperties:
type: string
description: User-provided metadata
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
ContainerListResponse:
type: object
properties:
object:
type: string
description: The object type (always "list")
data:
type: array
items:
$ref: '#/ContainerObject'
description: List of container objects
first_id:
type: string
description: ID of the first container in the list
last_id:
type: string
description: ID of the last container in the list
has_more:
type: boolean
description: Whether there are more containers to fetch
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
ContainerRetrieveResponse:
type: object
properties:
id:
type: string
description: The unique identifier for the container
object:
type: string
description: The object type (always "container")
name:
type: string
description: The name of the container
created_at:
type: integer
format: int64
description: Unix timestamp of when the container was created
status:
$ref: '#/ContainerStatus'
expires_after:
$ref: '#/ContainerExpiresAfter'
last_active_at:
type: integer
format: int64
description: Unix timestamp of last activity
memory_limit:
type: string
description: Memory limit for the container
metadata:
type: object
additionalProperties:
type: string
description: User-provided metadata
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
ContainerDeleteResponse:
type: object
properties:
id:
type: string
description: The ID of the deleted container
object:
type: string
description: The object type (always "container.deleted")
deleted:
type: boolean
description: Whether the container was successfully deleted
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
# =============================================================================
# CONTAINER FILES SCHEMAS
# =============================================================================
ContainerFileObject:
type: object
description: A file object within a container
properties:
id:
type: string
description: The unique identifier for the file
object:
type: string
description: The object type (always "container.file")
container_id:
type: string
description: The ID of the container this file belongs to
path:
type: string
description: The path of the file within the container
bytes:
type: integer
format: int64
description: The size of the file in bytes
created_at:
type: integer
format: int64
description: Unix timestamp of when the file was created
source:
type: string
description: The source of the file (e.g., "user_upload", "copied")
ContainerFileCreateMultipartRequest:
type: object
description: Request to create a file in a container via multipart upload
properties:
file:
type: string
format: binary
description: The file content to upload
file_path:
type: string
description: Optional path for the file within the container
ContainerFileCreateJsonRequest:
type: object
description: Request to create a file in a container by referencing an existing file
required:
- file_id
properties:
file_id:
type: string
description: The ID of an existing file to copy into the container
file_path:
type: string
description: Optional path for the file within the container
ContainerFileCreateResponse:
type: object
description: Response from creating a file in a container
properties:
id:
type: string
description: The unique identifier for the created file
object:
type: string
description: The object type (always "container.file")
container_id:
type: string
description: The ID of the container this file belongs to
path:
type: string
description: The path of the file within the container
bytes:
type: integer
format: int64
description: The size of the file in bytes
created_at:
type: integer
format: int64
description: Unix timestamp of when the file was created
source:
type: string
description: The source of the file
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
ContainerFileListResponse:
type: object
description: Response containing a list of files in a container
properties:
object:
type: string
description: The object type (always "list")
data:
type: array
items:
$ref: '#/ContainerFileObject'
description: List of file objects
first_id:
type: string
description: ID of the first file in the list
last_id:
type: string
description: ID of the last file in the list
has_more:
type: boolean
description: Whether there are more files to fetch
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
ContainerFileRetrieveResponse:
type: object
description: Response from retrieving a file from a container
properties:
id:
type: string
description: The unique identifier for the file
object:
type: string
description: The object type (always "container.file")
container_id:
type: string
description: The ID of the container this file belongs to
path:
type: string
description: The path of the file within the container
bytes:
type: integer
format: int64
description: The size of the file in bytes
created_at:
type: integer
format: int64
description: Unix timestamp of when the file was created
source:
type: string
description: The source of the file
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
ContainerFileDeleteResponse:
type: object
description: Response from deleting a file from a container
properties:
id:
type: string
description: The ID of the deleted file
object:
type: string
description: The object type (always "container.file.deleted")
deleted:
type: boolean
description: Whether the file was successfully deleted
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'

View File

@@ -0,0 +1,53 @@
# Count Tokens API schemas
CountTokensRequest:
type: object
required:
- model
- messages
properties:
model:
type: string
description: Model in provider/model format
messages:
type: array
items:
$ref: './responses.yaml#/ResponsesMessage'
fallbacks:
type: array
items:
type: string
tools:
type: array
items:
$ref: './responses.yaml#/ResponsesTool'
instructions:
type: string
text:
type: string
CountTokensResponse:
type: object
properties:
object:
type: string
model:
type: string
input_tokens:
type: integer
input_tokens_details:
$ref: './responses.yaml#/ResponsesResponseInputTokens'
tokens:
type: array
items:
type: integer
token_strings:
type: array
items:
type: string
output_tokens:
type: integer
total_tokens:
type: integer
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'

View File

@@ -0,0 +1,76 @@
# Embeddings API schemas
EmbeddingRequest:
type: object
required:
- model
- input
properties:
model:
type: string
description: Model in provider/model format
input:
$ref: '#/EmbeddingInput'
fallbacks:
type: array
items:
type: string
encoding_format:
type: string
enum: [float, base64]
dimensions:
type: integer
EmbeddingInput:
oneOf:
- type: string
- type: array
items:
type: string
- type: array
items:
type: integer
- type: array
items:
type: array
items:
type: integer
description: Input for embedding - text or token arrays
EmbeddingResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/EmbeddingData'
model:
type: string
object:
type: string
usage:
$ref: './usage.yaml#/BifrostLLMUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
EmbeddingData:
type: object
properties:
index:
type: integer
object:
type: string
embedding:
$ref: '#/EmbeddingStruct'
EmbeddingStruct:
oneOf:
- type: string
- type: array
items:
type: number
- type: array
items:
type: array
items:
type: number

View File

@@ -0,0 +1,188 @@
# Files API schemas
S3StorageConfig:
type: object
description: AWS S3 storage configuration
properties:
bucket:
type: string
description: S3 bucket name
region:
type: string
description: AWS region
prefix:
type: string
description: Path prefix for stored files
GCSStorageConfig:
type: object
description: Google Cloud Storage configuration
properties:
bucket:
type: string
description: GCS bucket name
project:
type: string
description: GCP project ID
prefix:
type: string
description: Path prefix for stored files
FileStorageConfig:
type: object
description: Storage configuration for cloud storage backends
properties:
s3:
$ref: '#/S3StorageConfig'
gcs:
$ref: '#/GCSStorageConfig'
FilePurpose:
type: string
enum:
- batch
- assistants
- fine-tune
- vision
- batch_output
- user_data
- responses
- evals
FileStatus:
type: string
enum:
- uploaded
- processed
- processing
- error
- deleted
FileUploadRequest:
type: object
required:
- file
- purpose
properties:
file:
type: string
format: binary
purpose:
$ref: '#/FilePurpose'
provider:
$ref: './common.yaml#/ModelProvider'
FileUploadResponse:
type: object
properties:
id:
type: string
object:
type: string
bytes:
type: integer
format: int64
created_at:
type: integer
format: int64
filename:
type: string
purpose:
$ref: '#/FilePurpose'
status:
$ref: '#/FileStatus'
status_details:
type: string
expires_at:
type: integer
format: int64
storage_backend:
type: string
storage_uri:
type: string
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
FileListResponse:
type: object
properties:
object:
type: string
data:
type: array
items:
$ref: '#/FileObject'
has_more:
type: boolean
after:
type: string
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
FileObject:
type: object
properties:
id:
type: string
object:
type: string
bytes:
type: integer
format: int64
created_at:
type: integer
format: int64
filename:
type: string
purpose:
$ref: '#/FilePurpose'
status:
$ref: '#/FileStatus'
status_details:
type: string
expires_at:
type: integer
format: int64
FileRetrieveResponse:
type: object
properties:
id:
type: string
object:
type: string
bytes:
type: integer
format: int64
created_at:
type: integer
format: int64
filename:
type: string
purpose:
$ref: '#/FilePurpose'
status:
$ref: '#/FileStatus'
status_details:
type: string
expires_at:
type: integer
format: int64
storage_backend:
type: string
storage_uri:
type: string
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
FileDeleteResponse:
type: object
properties:
id:
type: string
object:
type: string
deleted:
type: boolean
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'

View File

@@ -0,0 +1,514 @@
# Image Generation Schemas
ImageGenerationRequest:
allOf:
- type: object
required:
- model
- prompt
properties:
model:
type: string
description: Model identifier in format `provider/model`
prompt:
type: string
description: Text prompt to generate image
n:
type: integer
minimum: 1
maximum: 10
description: Number of images to generate
size:
type: string
enum:
- "256x256"
- "512x512"
- "1024x1024"
- "1792x1024"
- "1024x1792"
- "1536x1024"
- "1024x1536"
- "auto"
description: Size of the generated image
quality:
type: string
enum:
- "auto"
- "high"
- "medium"
- "low"
- "hd"
- "standard"
description: Quality of the generated image
style:
type: string
enum:
- "natural"
- "vivid"
description: Style of the generated image
response_format:
type: string
enum:
- "url"
- "b64_json"
default: "url"
description: |
Format of the response.
background:
type: string
enum:
- "transparent"
- "opaque"
- "auto"
description: Background type for the image
moderation:
type: string
enum:
- "low"
- "auto"
description: Content moderation level
partial_images:
type: integer
minimum: 0
maximum: 3
description: Number of partial images to generate
output_compression:
type: integer
minimum: 0
maximum: 100
description: Compression level (0-100%)
output_format:
type: string
enum:
- "png"
- "webp"
- "jpeg"
description: Output image format
user:
type: string
description: User identifier for tracking
seed:
type: integer
description: Seed for reproducible image generation
negative_prompt:
type: string
description: Negative prompt to guide what to avoid in generation
num_inference_steps:
type: integer
description: Number of inference steps for generation
stream:
type: boolean
default: false
description: |
Whether to stream the response. When true, images are sent as SSE.
When streaming, providers may return base64 chunks (`b64_json`) and/or URLs (`url`) depending on provider and configuration.
fallbacks:
type: array
items:
$ref: './common.yaml#/Fallback'
description: Fallback models to try if primary model fails
ImageGenerationResponse:
type: object
properties:
id:
type: string
description: Unique identifier for the generation request
created:
type: integer
format: int64
description: Unix timestamp when the image was created
model:
type: string
description: Model used for generation
data:
type: array
items:
$ref: '#/ImageData'
description: Array of generated images
background:
type: string
description: Background type for the image
output_format:
type: string
enum:
- "png"
- "webp"
- "jpeg"
description: Output image format
quality:
type: string
description: Quality of the generated image
size:
type: string
enum:
- "256x256"
- "512x512"
- "1024x1024"
- "1792x1024"
- "1024x1792"
- "1536x1024"
- "1024x1536"
- "auto"
description: Size of the generated image
usage:
$ref: '#/ImageUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
ImageData:
type: object
properties:
url:
type: string
format: uri
description: URL of the generated image
b64_json:
type: string
description: Base64-encoded image data
revised_prompt:
type: string
description: Revised prompt used for generation
index:
type: integer
description: Index of this image
ImageGenerationResponseParameters:
type: object
properties:
background:
type: string
output_format:
type: string
quality:
type: string
size:
type: string
ImageUsage:
type: object
properties:
input_tokens:
type: integer
description: Number of input tokens
input_tokens_details:
$ref: '#/ImageTokenDetails'
total_tokens:
type: integer
description: Total tokens used
output_tokens:
type: integer
description: Number of output tokens
output_tokens_details:
$ref: '#/ImageTokenDetails'
ImageTokenDetails:
type: object
properties:
image_tokens:
type: integer
description: Tokens used for images
text_tokens:
type: integer
description: Tokens used for text
ImageGenerationStreamResponse:
type: object
description: |
Streaming response chunk for image generation.
Sent via Server-Sent Events (SSE).
Providers may return either b64_json (base64-encoded image data) or url (public URL to the image).
properties:
id:
type: string
description: Request identifier
type:
type: string
enum:
- "image_generation.partial_image"
- "image_generation.completed"
- "error"
description: Type of stream event
partial_image_index:
type: integer
description: Index of the partial image chunk
sequence_number:
type: integer
description: Sequence number for event ordering within the stream
b64_json:
type: string
description: |
Base64-encoded chunk of image data.
Optional; either b64_json or url may be present.
url:
type: string
format: uri
description: |
Optional public URL to the generated image chunk.
Used by HuggingFace and other providers that return image URLs instead of base64 data.
created_at:
type: integer
format: int64
description: Timestamp when chunk was created
size:
type: string
enum:
- "256x256"
- "512x512"
- "1024x1024"
- "1792x1024"
- "1024x1792"
- "1536x1024"
- "1024x1536"
- "auto"
description: Size of the generated image
quality:
type: string
description: Quality setting used
background:
type: string
description: Background type used
output_format:
type: string
enum:
- "png"
- "webp"
- "jpeg"
description: Output format used
revised_prompt:
type: string
description: Revised prompt
usage:
$ref: '#/ImageUsage'
description: Token usage
error:
$ref: './common.yaml#/BifrostError'
description: Error information if generation failed
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
# Image Edit Schemas (multipart/form-data)
ImageEditRequest:
type: object
required:
- model
- image
properties:
model:
type: string
description: Model identifier in format `provider/model`
prompt:
type: string
description: |
Text prompt describing the edit. Required unless `type` is `background_removal`.
image:
type: string
format: binary
description: |
Image file to edit. Use field name `image` for a single file or `image[]` for multiple files.
mask:
type: string
format: binary
description: Optional mask image for inpainting (transparent areas indicate regions to edit)
type:
type: string
enum:
- "inpainting"
- "outpainting"
- "background_removal"
description: Type of edit operation
n:
type: integer
minimum: 1
maximum: 10
description: Number of images to generate
size:
type: string
enum:
- "256x256"
- "512x512"
- "1024x1024"
- "1536x1024"
- "1024x1536"
- "auto"
description: Size of the output image
response_format:
type: string
enum:
- "url"
- "b64_json"
default: "url"
description: Format of the response
stream:
type: boolean
default: false
description: When true, stream the response via Server-Sent Events
background:
type: string
enum:
- "transparent"
- "opaque"
- "auto"
description: Background type for the image
input_fidelity:
type: string
enum:
- "low"
- "high"
description: How closely to follow the original image
partial_images:
type: integer
minimum: 0
maximum: 3
description: Number of partial images to generate when streaming
quality:
type: string
enum:
- "auto"
- "high"
- "medium"
- "low"
- "standard"
description: Quality of the output image
output_format:
type: string
enum:
- "png"
- "webp"
- "jpeg"
description: Output image format
num_inference_steps:
type: integer
description: Number of inference steps
seed:
type: integer
description: Seed for reproducible editing
output_compression:
type: integer
minimum: 0
maximum: 100
description: Compression level (0-100%)
negative_prompt:
type: string
description: What to avoid in the edit
user:
type: string
description: User identifier for tracking
fallbacks:
type: array
items:
$ref: './common.yaml#/Fallback'
description: Fallback models to try if primary model fails
# Image Variation Schemas (multipart/form-data)
ImageVariationRequest:
type: object
required:
- model
- image
properties:
model:
type: string
description: Model identifier in format `provider/model`
image:
type: string
format: binary
description: |
Image file to create variations of. Use field name `image` for a single file or `image[]` for multiple (first image is used).
n:
type: integer
minimum: 1
maximum: 10
description: Number of variations to generate
size:
type: string
enum:
- "256x256"
- "512x512"
- "1024x1024"
- "1792x1024"
- "1024x1792"
- "1536x1024"
- "1024x1536"
- "auto"
description: Size of the output images
response_format:
type: string
enum:
- "url"
- "b64_json"
default: "url"
description: Format of the response
user:
type: string
description: User identifier for tracking
fallbacks:
type: array
items:
$ref: './common.yaml#/Fallback'
description: Fallback models to try if primary model fails
# Image Edit Streaming (SSE)
ImageEditStreamResponse:
type: object
description: |
Streaming response chunk for image edit.
Sent via Server-Sent Events (SSE) when `stream=true`.
properties:
id:
type: string
description: Request identifier
type:
type: string
enum:
- "image_edit.partial_image"
- "image_edit.completed"
- "error"
description: Type of stream event
partial_image_index:
type: integer
description: Index of the partial image chunk
sequence_number:
type: integer
description: Sequence number for event ordering within the stream
b64_json:
type: string
description: Base64-encoded chunk of image data; optional
url:
type: string
format: uri
description: Optional public URL to the image chunk
created_at:
type: integer
format: int64
description: Timestamp when chunk was created
size:
type: string
description: Size of the image
quality:
type: string
description: Quality setting used
background:
type: string
description: Background type used
output_format:
type: string
enum:
- "png"
- "webp"
- "jpeg"
description: Output format used
revised_prompt:
type: string
description: Revised prompt
usage:
$ref: '#/ImageUsage'
description: Token usage
error:
$ref: './common.yaml#/BifrostError'
description: Error information if edit failed
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'

View File

@@ -0,0 +1,125 @@
# Models API schemas
ListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/Model'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
next_page_token:
type: string
Model:
type: object
properties:
id:
type: string
description: Model ID in provider/model format
canonical_slug:
type: string
name:
type: string
deployment:
type: string
created:
type: integer
format: int64
context_length:
type: integer
max_input_tokens:
type: integer
max_output_tokens:
type: integer
architecture:
$ref: '#/Architecture'
pricing:
$ref: '#/Pricing'
top_provider:
$ref: '#/TopProvider'
per_request_limits:
$ref: '#/PerRequestLimits'
supported_parameters:
type: array
items:
type: string
default_parameters:
$ref: '#/DefaultParameters'
hugging_face_id:
type: string
description:
type: string
owned_by:
type: string
supported_methods:
type: array
items:
type: string
Architecture:
type: object
properties:
modality:
type: string
tokenizer:
type: string
instruct_type:
type: string
input_modalities:
type: array
items:
type: string
output_modalities:
type: array
items:
type: string
Pricing:
type: object
properties:
prompt:
type: string
completion:
type: string
request:
type: string
image:
type: string
web_search:
type: string
internal_reasoning:
type: string
input_cache_read:
type: string
input_cache_write:
type: string
TopProvider:
type: object
properties:
is_moderated:
type: boolean
context_length:
type: integer
max_completion_tokens:
type: integer
PerRequestLimits:
type: object
properties:
prompt_tokens:
type: integer
completion_tokens:
type: integer
DefaultParameters:
type: object
properties:
temperature:
type: number
top_p:
type: number
frequency_penalty:
type: number

View File

@@ -0,0 +1,98 @@
# Rerank API schemas
RerankRequest:
type: object
required:
- model
- query
- documents
properties:
model:
type: string
description: Model in provider/model format
example: cohere/rerank-v3.5
query:
type: string
minLength: 1
description: Query used to score and reorder documents
documents:
type: array
description: Documents to rerank
minItems: 1
items:
$ref: '#/RerankDocument'
fallbacks:
type: array
items:
type: string
description: Fallback models in provider/model format
top_n:
type: integer
minimum: 1
description: Maximum number of ranked results to return
max_tokens_per_doc:
type: integer
minimum: 1
description: Maximum tokens to consider per document (provider-dependent)
priority:
type: integer
description: Request priority hint (provider-dependent)
return_documents:
type: boolean
description: Whether to include document content in each result
RerankDocument:
type: object
required:
- text
properties:
text:
type: string
minLength: 1
description: Document text content
id:
type: string
minLength: 1
description: Optional document identifier
meta:
type: object
description: Optional document metadata
additionalProperties: true
RerankResponse:
type: object
required:
- results
- model
properties:
id:
type: string
description: Unique identifier for the rerank response
results:
type: array
description: Ranked results ordered by relevance score descending
items:
$ref: '#/RerankResult'
model:
type: string
description: Model used to perform reranking
usage:
$ref: './usage.yaml#/BifrostLLMUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
RerankResult:
type: object
required:
- index
- relevance_score
properties:
index:
type: integer
minimum: 0
description: Index into the original documents array
relevance_score:
type: number
description: Relevance score for this document
document:
$ref: '#/RerankDocument'

View File

@@ -0,0 +1,716 @@
# Responses API schemas
ResponsesRequest:
type: object
required:
- model
- input
properties:
model:
type: string
description: Model in provider/model format
input:
$ref: '#/ResponsesRequestInput'
fallbacks:
type: array
items:
type: string
stream:
type: boolean
background:
type: boolean
conversation:
type: string
include:
type: array
items:
type: string
instructions:
type: string
max_output_tokens:
type: integer
max_tool_calls:
type: integer
metadata:
type: object
additionalProperties: true
parallel_tool_calls:
type: boolean
previous_response_id:
type: string
prompt_cache_key:
type: string
reasoning:
$ref: '#/ResponsesParametersReasoning'
safety_identifier:
type: string
service_tier:
type: string
stream_options:
$ref: '#/ResponsesStreamOptions'
store:
type: boolean
temperature:
type: number
text:
$ref: '#/ResponsesTextConfig'
top_logprobs:
type: integer
top_p:
type: number
tool_choice:
$ref: '#/ResponsesToolChoice'
tools:
type: array
items:
$ref: '#/ResponsesTool'
truncation:
type: string
ResponsesRequestInput:
oneOf:
- type: string
- type: array
items:
$ref: '#/ResponsesMessage'
description: Input - can be a string or array of messages
ResponsesMessage:
type: object
properties:
id:
type: string
type:
$ref: '#/ResponsesMessageType'
status:
type: string
enum: [in_progress, completed, incomplete, interpreting, failed]
role:
type: string
enum: [assistant, user, system, developer]
content:
$ref: '#/ResponsesMessageContent'
call_id:
type: string
name:
type: string
arguments:
type: string
output:
type: object
action:
type: object
error:
type: string
queries:
type: array
items:
type: string
results:
type: array
items:
type: object
summary:
type: array
items:
$ref: '#/ResponsesReasoningSummary'
encrypted_content:
type: string
ResponsesMessageType:
type: string
enum:
- message
- file_search_call
- computer_call
- computer_call_output
- web_search_call
- web_fetch_call
- function_call
- function_call_output
- code_interpreter_call
- local_shell_call
- local_shell_call_output
- mcp_call
- custom_tool_call
- custom_tool_call_output
- image_generation_call
- mcp_list_tools
- mcp_approval_request
- mcp_approval_responses
- reasoning
- item_reference
- refusal
ResponsesMessageContent:
oneOf:
- type: string
- type: array
items:
$ref: '#/ResponsesMessageContentBlock'
ResponsesMessageContentBlock:
type: object
required:
- type
properties:
type:
type: string
enum: [input_text, input_image, input_file, input_audio, output_text, refusal, reasoning_text]
file_id:
type: string
text:
type: string
signature:
type: string
image_url:
type: string
detail:
type: string
file_data:
type: string
file_url:
type: string
filename:
type: string
file_type:
type: string
input_audio:
$ref: '#/ResponsesInputMessageContentBlockAudio'
annotations:
type: array
items:
$ref: '#/ResponsesOutputMessageContentTextAnnotation'
logprobs:
type: array
items:
$ref: '#/ResponsesOutputMessageContentTextLogProb'
refusal:
type: string
cache_control:
$ref: './common.yaml#/CacheControl'
ResponsesInputMessageContentBlockAudio:
type: object
required:
- format
- data
properties:
format:
type: string
enum: [mp3, wav]
data:
type: string
ResponsesOutputMessageContentTextAnnotation:
type: object
properties:
type:
type: string
enum: [file_citation, url_citation, container_file_citation, file_path]
index:
type: integer
file_id:
type: string
text:
type: string
start_index:
type: integer
end_index:
type: integer
filename:
type: string
title:
type: string
url:
type: string
container_id:
type: string
ResponsesOutputMessageContentTextLogProb:
type: object
properties:
bytes:
type: array
items:
type: integer
logprob:
type: number
token:
type: string
top_logprobs:
type: array
items:
$ref: './chat.yaml#/LogProb'
ResponsesParametersReasoning:
type: object
properties:
effort:
type: string
enum: [none, minimal, low, medium, high, xhigh]
generate_summary:
type: string
deprecated: true
summary:
type: string
enum: [auto, concise, detailed]
max_tokens:
type: integer
ResponsesStreamOptions:
type: object
properties:
include_obfuscation:
type: boolean
ResponsesTextConfig:
type: object
properties:
format:
$ref: '#/ResponsesTextConfigFormat'
verbosity:
type: string
enum: [low, medium, high]
ResponsesTextConfigFormat:
type: object
required:
- type
properties:
type:
type: string
enum: [text, json_schema, json_object]
name:
type: string
schema:
type: object
strict:
type: boolean
ResponsesToolChoice:
oneOf:
- type: string
enum: [none, auto, required]
- $ref: '#/ResponsesToolChoiceStruct'
ResponsesToolChoiceStruct:
type: object
required:
- type
properties:
type:
type: string
enum:
- none
- auto
- any
- required
- function
- allowed_tools
- file_search
- web_search_preview
- computer_use_preview
- code_interpreter
- image_generation
- mcp
- custom
mode:
type: string
name:
type: string
server_label:
type: string
tools:
type: array
items:
$ref: '#/ResponsesToolChoiceAllowedToolDef'
ResponsesToolChoiceAllowedToolDef:
type: object
required:
- type
properties:
type:
type: string
enum: [function, mcp, image_generation]
name:
type: string
server_label:
type: string
ResponsesTool:
type: object
required:
- type
properties:
type:
type: string
enum:
- function
- file_search
- computer_use_preview
- web_search
- web_fetch
- mcp
- code_interpreter
- image_generation
- local_shell
- custom
- web_search_preview
- memory
- tool_search
name:
type: string
description:
type: string
cache_control:
$ref: './common.yaml#/CacheControl'
parameters:
$ref: './chat.yaml#/ToolFunctionParameters'
strict:
type: boolean
vector_store_ids:
type: array
items:
type: string
filters:
type: object
max_num_results:
type: integer
ranking_options:
type: object
display_height:
type: integer
display_width:
type: integer
environment:
type: string
enable_zoom:
type: boolean
search_context_size:
type: string
user_location:
type: object
server_label:
type: string
server_url:
type: string
allowed_tools:
type: object
authorization:
type: string
connector_id:
type: string
headers:
type: object
additionalProperties:
type: string
require_approval:
type: object
server_description:
type: string
container:
type: object
background:
type: string
input_fidelity:
type: string
input_image_mask:
type: object
moderation:
type: string
output_compression:
type: integer
output_format:
type: string
partial_images:
type: integer
quality:
type: string
size:
type: string
format:
type: object
ResponsesReasoningSummary:
type: object
required:
- type
- text
properties:
type:
type: string
enum: [summary_text]
text:
type: string
ResponsesResponse:
type: object
properties:
id:
type: string
background:
type: boolean
conversation:
type: object
created_at:
type: integer
error:
$ref: '#/ResponsesResponseError'
include:
type: array
items:
type: string
incomplete_details:
$ref: '#/ResponsesResponseIncompleteDetails'
instructions:
type: object
max_output_tokens:
type: integer
max_tool_calls:
type: integer
metadata:
type: object
model:
type: string
output:
type: array
items:
$ref: '#/ResponsesMessage'
parallel_tool_calls:
type: boolean
previous_response_id:
type: string
prompt:
type: object
prompt_cache_key:
type: string
reasoning:
$ref: '#/ResponsesParametersReasoning'
safety_identifier:
type: string
service_tier:
type: string
status:
type: string
enum: [completed, failed, in_progress, canceled, queued, incomplete]
stop_reason:
type: string
store:
type: boolean
temperature:
type: number
text:
$ref: '#/ResponsesTextConfig'
top_logprobs:
type: integer
top_p:
type: number
tool_choice:
$ref: '#/ResponsesToolChoice'
tools:
type: array
items:
$ref: '#/ResponsesTool'
truncation:
type: string
usage:
$ref: '#/ResponsesResponseUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
search_results:
type: array
items:
$ref: './chat.yaml#/PerplexitySearchResult'
videos:
type: array
items:
$ref: './chat.yaml#/VideoResult'
citations:
type: array
items:
type: string
ResponsesResponseError:
type: object
required:
- code
- message
properties:
code:
type: string
message:
type: string
ResponsesResponseIncompleteDetails:
type: object
required:
- reason
properties:
reason:
type: string
ResponsesResponseUsage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
$ref: '#/ResponsesResponseInputTokens'
output_tokens:
type: integer
output_tokens_details:
$ref: '#/ResponsesResponseOutputTokens'
total_tokens:
type: integer
cost:
$ref: './usage.yaml#/BifrostCost'
ResponsesResponseInputTokens:
type: object
properties:
text_tokens:
type: integer
audio_tokens:
type: integer
image_tokens:
type: integer
cached_read_tokens:
type: integer
description: >
Tokens served from the prompt cache (cache hit), billed at the reduced
cache-read rate. Already included in the parent input_tokens total.
cached_write_tokens:
type: integer
description: >
Tokens written to the prompt cache on this request, billed at the
cache-creation rate. Already included in the parent input_tokens total.
Populated for providers that separately report cache write tokens
(Anthropic, Bedrock).
ResponsesResponseOutputTokens:
type: object
properties:
text_tokens:
type: integer
accepted_prediction_tokens:
type: integer
audio_tokens:
type: integer
reasoning_tokens:
type: integer
rejected_prediction_tokens:
type: integer
citation_tokens:
type: integer
num_search_queries:
type: integer
ResponsesStreamResponse:
type: object
description: Streaming responses API response (SSE format)
properties:
type:
$ref: '#/ResponsesStreamResponseType'
sequence_number:
type: integer
response:
$ref: '#/ResponsesResponse'
output_index:
type: integer
item:
$ref: '#/ResponsesMessage'
content_index:
type: integer
item_id:
type: string
part:
$ref: '#/ResponsesMessageContentBlock'
delta:
type: string
signature:
type: string
logprobs:
type: array
items:
$ref: '#/ResponsesOutputMessageContentTextLogProb'
text:
type: string
refusal:
type: string
arguments:
type: string
partial_image_b64:
type: string
partial_image_index:
type: integer
annotation:
$ref: '#/ResponsesOutputMessageContentTextAnnotation'
annotation_index:
type: integer
code:
type: string
message:
type: string
param:
type: string
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
ResponsesStreamResponseType:
type: string
enum:
- response.ping
- response.created
- response.in_progress
- response.completed
- response.failed
- response.incomplete
- response.output_item.added
- response.output_item.done
- response.content_part.added
- response.content_part.done
- response.output_text.delta
- response.output_text.done
- response.refusal.delta
- response.refusal.done
- response.function_call_arguments.delta
- response.function_call_arguments.done
- response.file_search_call.in_progress
- response.file_search_call.searching
- response.file_search_call.results.added
- response.file_search_call.results.completed
- response.web_search_call.searching
- response.web_search_call.results.added
- response.web_search_call.results.completed
- response.web_fetch_call.in_progress
- response.web_fetch_call.fetching
- response.web_fetch_call.completed
- response.reasoning_summary_part.added
- response.reasoning_summary_part.done
- response.reasoning_summary_text.delta
- response.reasoning_summary_text.done
- response.image_generation_call.completed
- response.image_generation_call.generating
- response.image_generation_call.in_progress
- response.image_generation_call.partial_image
- response.mcp_call_arguments.delta
- response.mcp_call_arguments.done
- response.mcp_call.completed
- response.mcp_call.failed
- response.mcp_call.in_progress
- response.mcp_list_tools.completed
- response.mcp_list_tools.failed
- response.mcp_list_tools.in_progress
- response.code_interpreter_call.in_progress
- response.code_interpreter_call.interpreting
- response.code_interpreter_call.completed
- response.code_interpreter_call_code.delta
- response.code_interpreter_call_code.done
- response.output_text.annotation.added
- response.output_text.annotation.done
- response.queued
- response.custom_tool_call_input.delta
- response.custom_tool_call_input.done
- error

View File

@@ -0,0 +1,132 @@
# Speech API schemas
SpeechRequest:
type: object
required:
- model
- input
- voice
properties:
model:
type: string
description: Model in provider/model format
input:
type: string
description: Text to convert to speech
fallbacks:
type: array
items:
type: string
stream_format:
type: string
enum: [sse]
description: Set to "sse" to enable streaming
voice:
$ref: '#/SpeechVoiceInput'
instructions:
type: string
response_format:
type: string
enum: [mp3, opus, aac, flac, wav, pcm]
speed:
type: number
minimum: 0.25
maximum: 4.0
language_code:
type: string
pronunciation_dictionary_locators:
type: array
items:
$ref: '#/SpeechPronunciationDictionaryLocator'
enable_logging:
type: boolean
optimize_streaming_latency:
type: boolean
with_timestamps:
type: boolean
SpeechVoiceInput:
oneOf:
- type: string
- type: array
items:
$ref: '#/VoiceConfig'
VoiceConfig:
type: object
required:
- speaker
- voice
properties:
speaker:
type: string
voice:
type: string
SpeechPronunciationDictionaryLocator:
type: object
required:
- pronunciation_dictionary_id
properties:
pronunciation_dictionary_id:
type: string
version_id:
type: string
SpeechResponse:
type: object
properties:
audio:
type: string
format: byte
description: Audio data (binary)
usage:
$ref: '#/SpeechUsage'
alignment:
$ref: '#/SpeechAlignment'
normalized_alignment:
$ref: '#/SpeechAlignment'
audio_base64:
type: string
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
SpeechUsage:
type: object
properties:
input_tokens:
type: integer
output_tokens:
type: integer
total_tokens:
type: integer
SpeechAlignment:
type: object
properties:
char_start_times_ms:
type: array
items:
type: number
char_end_times_ms:
type: array
items:
type: number
characters:
type: array
items:
type: string
SpeechStreamResponse:
type: object
properties:
type:
type: string
enum: [speech.audio.delta, speech.audio.done]
audio:
type: string
format: byte
usage:
$ref: '#/SpeechUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'

View File

@@ -0,0 +1,98 @@
# Text Completions API schemas
TextCompletionRequest:
type: object
required:
- model
- prompt
properties:
model:
type: string
description: Model in provider/model format
prompt:
$ref: '#/TextCompletionInput'
fallbacks:
type: array
items:
type: string
stream:
type: boolean
best_of:
type: integer
echo:
type: boolean
frequency_penalty:
type: number
logit_bias:
type: object
additionalProperties:
type: number
logprobs:
type: integer
max_tokens:
type: integer
n:
type: integer
presence_penalty:
type: number
seed:
type: integer
stop:
type: array
items:
type: string
suffix:
type: string
temperature:
type: number
top_p:
type: number
user:
type: string
TextCompletionInput:
oneOf:
- type: string
- type: array
items:
type: string
description: Prompt input - can be a string or array of strings
TextCompletionResponse:
type: object
properties:
id:
type: string
choices:
type: array
items:
$ref: './chat.yaml#/BifrostResponseChoice'
model:
type: string
object:
type: string
system_fingerprint:
type: string
usage:
$ref: './usage.yaml#/BifrostLLMUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
TextCompletionStreamResponse:
type: object
description: Streaming text completion response
properties:
id:
type: string
choices:
type: array
items:
$ref: './chat.yaml#/BifrostResponseChoice'
model:
type: string
object:
type: string
usage:
$ref: './usage.yaml#/BifrostLLMUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'

View File

@@ -0,0 +1,150 @@
# Transcription API schemas
TranscriptionRequest:
type: object
required:
- model
- file
properties:
model:
type: string
description: Model in provider/model format
file:
type: string
format: binary
description: Audio file to transcribe
fallbacks:
type: array
items:
type: string
stream:
type: boolean
language:
type: string
prompt:
type: string
response_format:
type: string
enum: [json, text, srt, verbose_json, vtt]
file_format:
type: string
TranscriptionResponse:
type: object
properties:
duration:
type: number
language:
type: string
logprobs:
type: array
items:
$ref: '#/TranscriptionLogProb'
segments:
type: array
items:
$ref: '#/TranscriptionSegment'
task:
type: string
text:
type: string
usage:
$ref: '#/TranscriptionUsage'
words:
type: array
items:
$ref: '#/TranscriptionWord'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
TranscriptionLogProb:
type: object
properties:
token:
type: string
logprob:
type: number
bytes:
type: array
items:
type: integer
TranscriptionSegment:
type: object
properties:
id:
type: integer
seek:
type: integer
start:
type: number
end:
type: number
text:
type: string
tokens:
type: array
items:
type: integer
temperature:
type: number
avg_logprob:
type: number
compression_ratio:
type: number
no_speech_prob:
type: number
TranscriptionWord:
type: object
properties:
word:
type: string
start:
type: number
end:
type: number
TranscriptionUsage:
type: object
properties:
type:
type: string
enum: [tokens, duration]
input_tokens:
type: integer
input_token_details:
$ref: '#/TranscriptionUsageInputTokenDetails'
output_tokens:
type: integer
total_tokens:
type: integer
seconds:
type: integer
TranscriptionUsageInputTokenDetails:
type: object
properties:
text_tokens:
type: integer
audio_tokens:
type: integer
TranscriptionStreamResponse:
type: object
properties:
type:
type: string
enum: [transcript.text.delta, transcript.text.done]
delta:
type: string
logprobs:
type: array
items:
$ref: '#/TranscriptionLogProb'
text:
type: string
usage:
$ref: '#/TranscriptionUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'

View File

@@ -0,0 +1,89 @@
# Usage and cost related schemas
BifrostLLMUsage:
type: object
description: Token usage information
properties:
prompt_tokens:
type: integer
description: >
Total input tokens including any prompt-cache tokens (read + write).
Subtract prompt_tokens_details.cached_read_tokens and
prompt_tokens_details.cached_write_tokens to get the non-cached portion.
prompt_tokens_details:
$ref: '#/ChatPromptTokensDetails'
completion_tokens:
type: integer
description: Number of output/completion tokens generated.
completion_tokens_details:
$ref: '#/ChatCompletionTokensDetails'
total_tokens:
type: integer
cost:
$ref: '#/BifrostCost'
ChatPromptTokensDetails:
type: object
properties:
text_tokens:
type: integer
audio_tokens:
type: integer
image_tokens:
type: integer
cached_read_tokens:
type: integer
description: >
Tokens served from the prompt cache (cache hit). These tokens are already
included in prompt_tokens and are billed at the reduced cache-read rate.
Populated for all providers that support prompt caching (Anthropic, Bedrock,
OpenAI, Gemini, xAI, etc.).
cached_write_tokens:
type: integer
description: >
Tokens written to the prompt cache on this request (cache creation / write).
These tokens are already included in prompt_tokens and are billed at the
cache-creation rate. Populated for providers that separately report cache
write tokens (Anthropic, Bedrock).
ChatCompletionTokensDetails:
type: object
properties:
text_tokens:
type: integer
accepted_prediction_tokens:
type: integer
audio_tokens:
type: integer
citation_tokens:
type: integer
num_search_queries:
type: integer
reasoning_tokens:
type: integer
image_tokens:
type: integer
rejected_prediction_tokens:
type: integer
BifrostCost:
type: object
description: Cost breakdown for the request
properties:
input_tokens_cost:
type: number
output_tokens_cost:
type: number
reasoning_tokens_cost:
type: number
description: Cost for reasoning/thinking tokens (reasoning models)
citation_tokens_cost:
type: number
description: Cost for citation tokens
search_queries_cost:
type: number
description: Cost for web search queries
request_cost:
type: number
total_cost:
type: number

View File

@@ -0,0 +1,254 @@
# Video Generation Schemas
VideoGenerationRequest:
type: object
required:
- model
- prompt
properties:
model:
type: string
description: Model identifier in format `provider/model`
prompt:
type: string
description: Text prompt describing the video to generate
input_reference:
type: string
description: Optional reference image for image-to-video. OpenAI and Gemini require a base64 data URL (e.g., `data:image/png;base64,...`). Runway and Replicate accept both data URLs and plain URLs.
seconds:
type: string
description: Duration of the video in seconds as a string (e.g., "4")
size:
type: string
description: Resolution of the generated video (e.g., `1280x720`, `720x1280`, `1920x1080`)
negative_prompt:
type: string
description: Text describing what to avoid in the generated video
seed:
type: integer
description: Seed for reproducible generation
video_uri:
type: string
description: Source video URI for video-to-video generation (provider-specific, e.g. GCS URI)
audio:
type: boolean
description: Enable audio generation in the video (supported by select providers/models)
fallbacks:
type: array
items:
$ref: './common.yaml#/Fallback'
description: Fallback models to try if primary model fails
VideoGenerationResponse:
type: object
properties:
id:
type: string
description: Provider-native job ID. To use in path parameters (retrieve/delete/download), combine as `{id}:{provider}` (e.g., `task_abc123:runway`)
object:
type: string
enum:
- "video"
description: Object type, always "video"
model:
type: string
description: Model used for video generation
status:
$ref: '#/VideoStatus'
progress:
type: number
format: float
minimum: 0
maximum: 100
description: Approximate completion percentage (0-100)
prompt:
type: string
description: Prompt used to generate the video
remixed_from_video_id:
type: string
description: Source video ID if this is a remix
seconds:
type: string
description: Duration of the generated video in seconds as a string (e.g., "4")
size:
$ref: '#/VideoSize'
created_at:
type: integer
format: int64
description: Unix timestamp (seconds) when the job was created
completed_at:
type: integer
format: int64
description: Unix timestamp (seconds) when the job completed
expires_at:
type: integer
format: int64
description: Unix timestamp (seconds) when downloadable assets expire
videos:
type: array
description: Generated video outputs (only present when status is "completed")
items:
type: object
properties:
type:
type: string
enum:
- "url"
- "base64"
description: Output format of this video
url:
type: string
format: uri
description: URL to the generated video (present when type is "url")
base64:
type: string
description: Base64-encoded video content (present when type is "base64")
content_type:
type: string
description: MIME type of the video (e.g., "video/mp4")
error:
$ref: '#/VideoError'
content_filter:
$ref: '#/VideoContentFilter'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
VideoRemixRequest:
type: object
required:
- prompt
properties:
prompt:
type: string
description: Text prompt describing how to remix the video
VideoListResponse:
type: object
properties:
object:
type: string
enum:
- "list"
description: Object type, always "list"
data:
type: array
items:
$ref: '#/VideoObject'
description: Array of video generation jobs
first_id:
type: string
description: ID of the first item in the list
last_id:
type: string
description: ID of the last item in the list
has_more:
type: boolean
description: Whether there are more results available
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
VideoObject:
type: object
properties:
id:
type: string
description: Provider-native video ID. To use in path parameters (retrieve/delete/download), combine as `{id}:{provider}` (e.g., `task_abc123:runway`)
object:
type: string
enum:
- "video"
description: Object type, always "video"
model:
type: string
description: Model used for generation
status:
$ref: '#/VideoStatus'
progress:
type: number
format: float
minimum: 0
maximum: 100
description: Approximate completion percentage (0-100)
prompt:
type: string
description: Prompt used to generate the video
remixed_from_video_id:
type: string
description: Source video ID if this is a remix
seconds:
type: string
description: Duration of the video in seconds as a string (e.g., "4")
size:
$ref: '#/VideoSize'
created_at:
type: integer
format: int64
description: Unix timestamp (seconds) when the job was created
completed_at:
type: integer
format: int64
description: Unix timestamp (seconds) when the job completed
expires_at:
type: integer
format: int64
description: Unix timestamp (seconds) when downloadable assets expire
error:
$ref: '#/VideoError'
VideoDeleteResponse:
type: object
properties:
id:
type: string
description: ID of the deleted video
object:
type: string
enum:
- "video.deleted"
description: Object type, always "video.deleted"
deleted:
type: boolean
description: Whether the video was successfully deleted
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
VideoStatus:
type: string
enum:
- "queued"
- "in_progress"
- "completed"
- "failed"
description: |
Current lifecycle status of the video generation job:
- `queued`: Job is waiting to be processed
- `in_progress`: Video is currently being generated
- `completed`: Video generation completed successfully
- `failed`: Video generation failed
VideoSize:
type: string
description: Resolution of the generated video (e.g., "1920x1080")
VideoError:
type: object
properties:
code:
type: string
description: Error code
message:
type: string
description: Human-readable error message
VideoContentFilter:
type: object
description: Information about content that was filtered due to safety policies
properties:
filtered_count:
type: integer
description: Number of items filtered
reasons:
type: array
items:
type: string
description: Human-readable reasons for filtering

View File

@@ -0,0 +1,105 @@
# Anthropic Integration Batch API Schemas
AnthropicBatchCreateRequest:
type: object
required:
- requests
properties:
requests:
type: array
items:
$ref: '#/AnthropicBatchRequestItem'
description: Array of batch request items
AnthropicBatchRequestItem:
type: object
required:
- custom_id
- params
properties:
custom_id:
type: string
description: Unique identifier for this request
params:
type: object
description: Request parameters (same as AnthropicMessageRequest)
AnthropicBatchCreateResponse:
type: object
properties:
id:
type: string
type:
type: string
default: message_batch
processing_status:
type: string
enum: [in_progress, ended, canceling]
request_counts:
$ref: '#/AnthropicBatchRequestCounts'
ended_at:
type: string
format: date-time
nullable: true
created_at:
type: string
format: date-time
expires_at:
type: string
format: date-time
archived_at:
type: string
format: date-time
nullable: true
cancel_initiated_at:
type: string
format: date-time
nullable: true
results_url:
type: string
nullable: true
AnthropicBatchRequestCounts:
type: object
properties:
processing:
type: integer
succeeded:
type: integer
errored:
type: integer
canceled:
type: integer
expired:
type: integer
AnthropicBatchListRequest:
type: object
properties:
page_size:
type: integer
default: 20
page_token:
type: string
description: Cursor for pagination
AnthropicBatchListResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/AnthropicBatchCreateResponse'
has_more:
type: boolean
first_id:
type: string
last_id:
type: string
AnthropicBatchRetrieveResponse:
$ref: '#/AnthropicBatchCreateResponse'
AnthropicBatchCancelResponse:
$ref: '#/AnthropicBatchCreateResponse'

View File

@@ -0,0 +1,53 @@
# Anthropic Integration Common Types
AnthropicError:
type: object
properties:
type:
type: string
default: error
error:
type: object
properties:
type:
type: string
description: Error type (e.g., invalid_request_error, api_error)
message:
type: string
description: Error message
AnthropicModel:
type: object
properties:
id:
type: string
description: Model identifier
type:
type: string
default: model
display_name:
type: string
created_at:
type: string
format: date-time
AnthropicListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/AnthropicModel'
has_more:
type: boolean
first_id:
type: string
last_id:
type: string
# Anthropic Message Roles
AnthropicMessageRole:
type: string
enum:
- user
- assistant

View File

@@ -0,0 +1,13 @@
# Anthropic Integration Count Tokens Schemas
AnthropicCountTokensRequest:
# Uses the same format as AnthropicMessageRequest
allOf:
- $ref: './messages.yaml#/AnthropicMessageRequest'
AnthropicCountTokensResponse:
type: object
properties:
input_tokens:
type: integer
description: Number of input tokens

View File

@@ -0,0 +1,102 @@
# Anthropic Integration Files API Schemas
AnthropicFileUploadRequest:
type: object
required:
- file
properties:
file:
type: string
format: binary
description: File to upload (raw file content)
filename:
type: string
description: Original filename
purpose:
type: string
description: Purpose of the file (e.g., "batch")
AnthropicFileUploadResponse:
type: object
properties:
id:
type: string
type:
type: string
default: file
filename:
type: string
mime_type:
type: string
description: MIME type of the file
size_bytes:
type: integer
description: Size of the file in bytes
created_at:
type: string
format: date-time
downloadable:
type: boolean
AnthropicFileListRequest:
type: object
properties:
limit:
type: integer
default: 30
after:
type: string
description: Cursor for pagination (after_id)
order:
type: string
enum: [asc, desc]
AnthropicFileListResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/AnthropicFileUploadResponse'
has_more:
type: boolean
first_id:
type: string
last_id:
type: string
AnthropicFileRetrieveRequest:
type: object
required:
- file_id
properties:
file_id:
type: string
AnthropicFileRetrieveResponse:
$ref: '#/AnthropicFileUploadResponse'
AnthropicFileDeleteRequest:
type: object
required:
- file_id
properties:
file_id:
type: string
AnthropicFileDeleteResponse:
type: object
properties:
id:
type: string
type:
type: string
default: file_deleted
AnthropicFileContentRequest:
type: object
required:
- file_id
properties:
file_id:
type: string

View File

@@ -0,0 +1,403 @@
# Anthropic Integration Messages API Schemas
AnthropicMessageRequest:
type: object
required:
- model
- max_tokens
- messages
properties:
model:
type: string
description: Model identifier (e.g., claude-3-opus-20240229)
example: claude-3-opus-20240229
max_tokens:
type: integer
description: Maximum tokens to generate
messages:
type: array
items:
$ref: '#/AnthropicMessage'
description: List of messages in the conversation
system:
$ref: '#/AnthropicContent'
description: System prompt
cache_control:
$ref: '../../inference/common.yaml#/CacheControl'
description: Automatic caching directives for the whole request
metadata:
$ref: '#/AnthropicMetadata'
stream:
type: boolean
description: Whether to stream the response
temperature:
type: number
minimum: 0
maximum: 1
top_p:
type: number
top_k:
type: integer
stop_sequences:
type: array
items:
type: string
tools:
type: array
items:
$ref: '#/AnthropicTool'
tool_choice:
$ref: '#/AnthropicToolChoice'
mcp_servers:
type: array
items:
$ref: '#/AnthropicMCPServer'
description: MCP servers configuration (requires beta header)
thinking:
$ref: '#/AnthropicThinking'
output_format:
type: object
description: Structured output format (requires beta header)
# Bifrost-specific
fallbacks:
type: array
items:
type: string
AnthropicMessage:
type: object
required:
- role
- content
properties:
role:
$ref: './common.yaml#/AnthropicMessageRole'
content:
$ref: '#/AnthropicContent'
AnthropicContent:
oneOf:
- type: string
- type: array
items:
$ref: '#/AnthropicContentBlock'
description: Content - can be a string or array of content blocks
AnthropicContentBlock:
type: object
required:
- type
properties:
type:
type: string
enum:
- text
- image
- document
- tool_use
- server_tool_use
- tool_result
- web_search_result
- mcp_tool_use
- mcp_tool_result
- thinking
- redacted_thinking
text:
type: string
description: For text content
thinking:
type: string
description: For thinking content
signature:
type: string
description: For signature content
data:
type: string
description: For data content (encrypted data for redacted thinking)
tool_use_id:
type: string
description: For tool_result content
id:
type: string
description: For tool_use content
name:
type: string
description: For tool_use content
input:
type: object
description: For tool_use content
server_name:
type: string
description: For mcp_tool_use content
content:
$ref: '#/AnthropicContent'
description: For tool_result content
source:
$ref: '#/AnthropicSource'
description: For image/document content
cache_control:
$ref: '../../inference/common.yaml#/CacheControl'
citations:
$ref: '#/AnthropicCitationsConfig'
description: For document content
context:
type: string
description: For document content
title:
type: string
description: For document content
AnthropicSource:
type: object
required:
- type
properties:
type:
type: string
enum: [base64, url, text, content_block]
media_type:
type: string
description: MIME type (e.g., image/jpeg, application/pdf)
data:
type: string
description: Base64-encoded data (for base64 type)
url:
type: string
description: URL (for url type)
AnthropicCitationsConfig:
type: object
properties:
enabled:
type: boolean
AnthropicMetadata:
type: object
properties:
user_id:
type: string
AnthropicThinking:
type: object
properties:
type:
type: string
enum: [enabled, disabled]
budget_tokens:
type: integer
AnthropicTool:
type: object
properties:
type:
type: string
enum:
- custom
- bash_20250124
- computer_20250124
- computer_20251124
- code_execution_20250522
- code_execution_20250825
- code_execution_20260120
- text_editor_20250124
- text_editor_20250429
- text_editor_20250728
- web_search_20250305
- web_search_20260209
- web_fetch_20250910
- web_fetch_20260209
- web_fetch_20260309
- memory_20250818
- tool_search_tool_bm25
- tool_search_tool_bm25_20251119
- tool_search_tool_regex
- tool_search_tool_regex_20251119
name:
type: string
description: Tool name (for custom tools)
description:
type: string
input_schema:
type: object
description: JSON Schema for tool input
cache_control:
$ref: '../../inference/common.yaml#/CacheControl'
# Computer use tool settings
display_width_px:
type: integer
display_height_px:
type: integer
display_number:
type: integer
enable_zoom:
type: boolean
# Web search settings
max_uses:
type: integer
allowed_domains:
type: array
items:
type: string
blocked_domains:
type: array
items:
type: string
user_location:
$ref: '#/AnthropicToolWebSearchUserLocation'
AnthropicToolWebSearchUserLocation:
type: object
properties:
type:
type: string
enum: [approximate]
city:
type: string
country:
type: string
timezone:
type: string
AnthropicToolChoice:
oneOf:
- type: object
properties:
type:
type: string
enum: [auto, any, tool, none]
name:
type: string
description: Required when type is 'tool'
disable_parallel_tool_use:
type: boolean
AnthropicMCPServer:
type: object
properties:
type:
type: string
name:
type: string
url:
type: string
authorization_token:
type: string
description: Authorization token for the MCP server
tool_configuration:
$ref: '#/AnthropicMCPToolConfig'
AnthropicMCPToolConfig:
type: object
properties:
enabled:
type: boolean
allowed_tools:
type: array
items:
type: string
# Response types
AnthropicMessageResponse:
type: object
properties:
id:
type: string
type:
type: string
default: message
role:
type: string
default: assistant
content:
type: array
items:
$ref: '#/AnthropicContentBlock'
model:
type: string
stop_reason:
type: string
enum: [end_turn, max_tokens, stop_sequence, tool_use, pause_turn, refusal, model_context_window_exceeded, null]
stop_sequence:
type: string
nullable: true
usage:
$ref: '#/AnthropicUsage'
AnthropicUsage:
type: object
properties:
input_tokens:
type: integer
output_tokens:
type: integer
cache_creation_input_tokens:
type: integer
cache_read_input_tokens:
type: integer
cache_creation:
$ref: '#/AnthropicUsageCacheCreation'
AnthropicUsageCacheCreation:
type: object
properties:
ephemeral_5m_input_tokens:
type: integer
ephemeral_1h_input_tokens:
type: integer
# Stream event types
AnthropicStreamEvent:
type: object
properties:
id:
type: string
type:
type: string
enum:
- message_start
- content_block_start
- content_block_delta
- content_block_stop
- message_delta
- message_stop
- ping
- error
message:
$ref: '#/AnthropicMessageResponse'
index:
type: integer
content_block:
$ref: '#/AnthropicContentBlock'
delta:
$ref: '#/AnthropicStreamDelta'
usage:
$ref: '#/AnthropicUsage'
error:
$ref: '#/AnthropicStreamError'
AnthropicStreamDelta:
type: object
properties:
type:
type: string
enum: [text_delta, input_json_delta, thinking_delta, signature_delta]
text:
type: string
partial_json:
type: string
thinking:
type: string
signature:
type: string
stop_reason:
type: string
stop_sequence:
type: string
AnthropicStreamError:
type: object
properties:
type:
type: string
message:
type: string

View File

@@ -0,0 +1,62 @@
# Anthropic Integration Text Completions Schemas (Legacy Complete API)
AnthropicTextRequest:
type: object
required:
- model
- prompt
- max_tokens_to_sample
properties:
model:
type: string
description: Model identifier
prompt:
type: string
description: The prompt to complete
max_tokens_to_sample:
type: integer
description: Maximum tokens to generate
stream:
type: boolean
temperature:
type: number
minimum: 0
maximum: 1
top_p:
type: number
top_k:
type: integer
stop_sequences:
type: array
items:
type: string
# Bifrost-specific
fallbacks:
type: array
items:
type: string
AnthropicTextResponse:
type: object
properties:
type:
type: string
default: completion
id:
type: string
completion:
type: string
stop_reason:
type: string
enum: [stop_sequence, max_tokens, null]
model:
type: string
usage:
type: object
properties:
input_tokens:
type: integer
description: Number of input tokens used
output_tokens:
type: integer
description: Number of output tokens generated

View File

@@ -0,0 +1,153 @@
# AWS Bedrock Batch API Schemas
BedrockBatchJobRequest:
type: object
required:
- roleArn
- inputDataConfig
- outputDataConfig
properties:
modelId:
type: string
description: Model ID for the batch job (optional, can be specified in request)
jobName:
type: string
description: Name for the batch job
roleArn:
type: string
description: IAM role ARN for the job
inputDataConfig:
type: object
properties:
s3InputDataConfig:
type: object
properties:
s3Uri:
type: string
description: S3 URI for input data
outputDataConfig:
type: object
properties:
s3OutputDataConfig:
type: object
properties:
s3Uri:
type: string
description: S3 URI for output data
timeoutDurationInHours:
type: integer
description: Timeout in hours
tags:
type: array
items:
type: object
properties:
key:
type: string
value:
type: string
BedrockBatchJobResponse:
type: object
properties:
jobArn:
type: string
status:
type: string
enum: [Submitted, InProgress, Completed, Failed, Stopping, Stopped, PartiallyCompleted, Expired, Validating, Scheduled]
jobName:
type: string
modelId:
type: string
roleArn:
type: string
inputDataConfig:
type: object
outputDataConfig:
type: object
vpcConfig:
type: object
properties:
securityGroupIds:
type: array
items:
type: string
subnetIds:
type: array
items:
type: string
submitTime:
type: string
format: date-time
lastModifiedTime:
type: string
format: date-time
endTime:
type: string
format: date-time
message:
type: string
clientRequestToken:
type: string
jobExpirationTime:
type: string
format: date-time
timeoutDurationInHours:
type: integer
BedrockBatchListRequest:
type: object
properties:
maxResults:
type: integer
nextToken:
type: string
statusEquals:
type: string
nameContains:
type: string
BedrockBatchListResponse:
type: object
properties:
invocationJobSummaries:
type: array
items:
$ref: '#/BedrockBatchJobSummary'
nextToken:
type: string
BedrockBatchJobSummary:
type: object
properties:
jobArn:
type: string
jobName:
type: string
modelId:
type: string
status:
type: string
submitTime:
type: string
format: date-time
lastModifiedTime:
type: string
format: date-time
endTime:
type: string
format: date-time
message:
type: string
BedrockBatchRetrieveResponse:
description: Uses same structure as BedrockBatchJobResponse
$ref: '#/BedrockBatchJobResponse'
BedrockBatchCancelResponse:
type: object
properties:
jobArn:
type: string
status:
type: string

View File

@@ -0,0 +1,15 @@
# AWS Bedrock Integration Common Types
BedrockError:
type: object
properties:
message:
type: string
type:
type: string
BedrockMessageRole:
type: string
enum:
- user
- assistant

View File

@@ -0,0 +1,367 @@
# AWS Bedrock Converse API Schemas
BedrockConverseRequest:
type: object
properties:
messages:
type: array
items:
$ref: '#/BedrockMessage'
description: Array of messages for the conversation
system:
type: array
items:
$ref: '#/BedrockSystemMessage'
description: System messages/prompts
inferenceConfig:
$ref: '#/BedrockInferenceConfig'
toolConfig:
$ref: '#/BedrockToolConfig'
guardrailConfig:
$ref: '#/BedrockGuardrailConfig'
additionalModelRequestFields:
type: object
description: Model-specific parameters
additionalModelResponseFieldPaths:
type: array
items:
type: string
performanceConfig:
$ref: '#/BedrockPerformanceConfig'
promptVariables:
type: object
additionalProperties:
$ref: '#/BedrockPromptVariable'
requestMetadata:
type: object
additionalProperties:
type: string
serviceTier:
$ref: '#/BedrockServiceTier'
# Bifrost-specific
fallbacks:
type: array
items:
type: string
BedrockMessage:
type: object
required:
- role
- content
properties:
role:
$ref: './common.yaml#/BedrockMessageRole'
content:
type: array
items:
$ref: '#/BedrockContentBlock'
BedrockSystemMessage:
type: object
properties:
text:
type: string
guardContent:
$ref: '#/BedrockGuardContent'
cachePoint:
$ref: '#/BedrockCachePoint'
BedrockContentBlock:
type: object
properties:
text:
type: string
image:
$ref: '#/BedrockImageSource'
document:
$ref: '#/BedrockDocumentSource'
toolUse:
$ref: '#/BedrockToolUse'
toolResult:
$ref: '#/BedrockToolResult'
guardContent:
$ref: '#/BedrockGuardContent'
reasoningContent:
$ref: '#/BedrockReasoningContent'
json:
type: object
description: JSON content for tool call results
cachePoint:
$ref: '#/BedrockCachePoint'
BedrockImageSource:
type: object
properties:
format:
type: string
enum: [jpeg, png, gif, webp]
source:
type: object
properties:
bytes:
type: string
format: byte
BedrockDocumentSource:
type: object
properties:
format:
type: string
enum: [pdf, csv, doc, docx, xls, xlsx, html, txt, md]
name:
type: string
source:
type: object
properties:
bytes:
type: string
format: byte
text:
type: string
description: Plain text content (for text-based documents)
BedrockToolUse:
type: object
properties:
toolUseId:
type: string
name:
type: string
input:
type: object
BedrockToolResult:
type: object
properties:
toolUseId:
type: string
content:
type: array
items:
$ref: '#/BedrockContentBlock'
status:
type: string
enum: [success, error]
BedrockGuardContent:
type: object
properties:
text:
type: object
properties:
text:
type: string
qualifiers:
type: array
items:
type: string
BedrockReasoningContent:
type: object
properties:
reasoningText:
type: object
properties:
text:
type: string
signature:
type: string
BedrockCachePoint:
type: object
properties:
type:
type: string
enum: [default]
BedrockInferenceConfig:
type: object
properties:
maxTokens:
type: integer
temperature:
type: number
topP:
type: number
stopSequences:
type: array
items:
type: string
BedrockToolConfig:
type: object
properties:
tools:
type: array
items:
$ref: '#/BedrockTool'
toolChoice:
$ref: '#/BedrockToolChoice'
BedrockTool:
type: object
properties:
toolSpec:
type: object
properties:
name:
type: string
description:
type: string
inputSchema:
type: object
properties:
json:
type: object
cachePoint:
$ref: '#/BedrockCachePoint'
BedrockToolChoice:
type: object
properties:
auto:
type: object
any:
type: object
tool:
type: object
properties:
name:
type: string
BedrockGuardrailConfig:
type: object
properties:
guardrailIdentifier:
type: string
guardrailVersion:
type: string
trace:
type: string
enum: [enabled, disabled]
BedrockPerformanceConfig:
type: object
properties:
latency:
type: string
enum: [standard, optimized]
BedrockPromptVariable:
type: object
properties:
text:
type: string
BedrockServiceTier:
type: object
properties:
type:
type: string
enum: [reserved, priority, default, flex]
# Response types
BedrockConverseResponse:
type: object
properties:
output:
type: object
properties:
message:
$ref: '#/BedrockMessage'
stopReason:
type: string
enum: [end_turn, tool_use, max_tokens, stop_sequence, guardrail_intervened, content_filtered]
usage:
$ref: '#/BedrockUsage'
metrics:
type: object
properties:
latencyMs:
type: integer
additionalModelResponseFields:
type: object
trace:
type: object
performanceConfig:
$ref: '#/BedrockPerformanceConfig'
serviceTier:
$ref: '#/BedrockServiceTier'
BedrockUsage:
type: object
properties:
inputTokens:
type: integer
outputTokens:
type: integer
totalTokens:
type: integer
cacheReadInputTokens:
type: integer
cacheWriteInputTokens:
type: integer
# Stream event types
BedrockStreamEvent:
type: object
description: Flat structure for streaming events matching actual Bedrock API response
properties:
role:
type: string
description: For messageStart events
contentBlockIndex:
type: integer
description: For content block events
delta:
$ref: '#/BedrockContentBlockDelta'
stopReason:
type: string
description: For messageStop events
start:
$ref: '#/BedrockContentBlockStart'
usage:
$ref: '#/BedrockUsage'
metrics:
type: object
properties:
latencyMs:
type: integer
trace:
type: object
additionalModelResponseFields:
type: object
invokeModelRawChunk:
type: string
format: byte
description: Raw bytes for legacy invoke stream
BedrockContentBlockDelta:
type: object
properties:
text:
type: string
reasoningContent:
type: object
properties:
text:
type: string
signature:
type: string
toolUse:
type: object
properties:
input:
type: string
BedrockContentBlockStart:
type: object
properties:
toolUse:
type: object
properties:
toolUseId:
type: string
name:
type: string

View File

@@ -0,0 +1,50 @@
# AWS Bedrock Invoke API Schemas (Legacy/Raw Model Invocation)
BedrockInvokeRequest:
type: object
description: |
Raw model invocation request. The body format depends on the model provider.
For Anthropic models, use Anthropic format. For other models, use their native format.
properties:
prompt:
type: string
description: Text prompt to complete
max_tokens:
type: integer
max_tokens_to_sample:
type: integer
description: Anthropic-style max tokens
temperature:
type: number
top_p:
type: number
top_k:
type: integer
stop:
type: array
items:
type: string
stop_sequences:
type: array
items:
type: string
description: Anthropic-style stop sequences
messages:
type: array
items:
type: object
description: For Claude 3 models
system:
description: System prompt (string or array of strings)
oneOf:
- type: string
- type: array
items:
type: string
anthropic_version:
type: string
BedrockInvokeResponse:
type: object
description: Raw model response. Format depends on the model provider.
additionalProperties: true

View File

@@ -0,0 +1,364 @@
# Cohere v2 Chat API Schemas
CohereChatRequest:
type: object
required:
- model
- messages
properties:
model:
type: string
description: Model to use for chat completion
example: command-r-plus
messages:
type: array
items:
$ref: '#/CohereMessage'
description: Array of message objects
tools:
type: array
items:
$ref: '#/CohereTool'
tool_choice:
$ref: '#/CohereToolChoice'
temperature:
type: number
minimum: 0
maximum: 1
p:
type: number
description: Top-p sampling
k:
type: integer
description: Top-k sampling
max_tokens:
type: integer
stop_sequences:
type: array
items:
type: string
frequency_penalty:
type: number
presence_penalty:
type: number
stream:
type: boolean
safety_mode:
type: string
enum: [CONTEXTUAL, STRICT, NONE]
log_probs:
type: boolean
strict_tool_choice:
type: boolean
thinking:
$ref: '#/CohereThinking'
response_format:
$ref: '#/CohereResponseFormat'
CohereMessage:
type: object
required:
- role
properties:
role:
type: string
enum: [system, user, assistant, tool]
content:
$ref: '#/CohereMessageContent'
tool_calls:
type: array
items:
$ref: '#/CohereToolCall'
tool_call_id:
type: string
tool_plan:
type: string
description: Chain-of-thought style reflection (assistant only)
CohereMessageContent:
oneOf:
- type: string
- type: array
items:
$ref: '#/CohereContentBlock'
description: Message content - can be a string or array of content blocks
CohereContentBlock:
type: object
required:
- type
properties:
type:
type: string
enum: [text, image_url, thinking, document]
text:
type: string
image_url:
type: object
properties:
url:
type: string
thinking:
type: string
document:
type: object
properties:
data:
type: object
id:
type: string
CohereTool:
type: object
properties:
type:
type: string
enum: [function]
function:
type: object
properties:
name:
type: string
description:
type: string
parameters:
type: object
CohereToolChoice:
type: string
enum: [AUTO, NONE, REQUIRED]
description: Tool choice mode - AUTO lets the model decide, NONE disables tools, REQUIRED forces tool use
CohereToolCall:
type: object
properties:
id:
type: string
type:
type: string
enum: [function]
function:
type: object
properties:
name:
type: string
arguments:
type: string
CohereThinking:
type: object
properties:
type:
type: string
enum: [enabled, disabled]
token_budget:
type: integer
minimum: 1
CohereResponseFormat:
type: object
properties:
type:
type: string
enum: [text, json_object]
description: Response format type
schema:
type: object
description: JSON schema for structured output (used with json_object type)
# Response types
CohereChatResponse:
type: object
properties:
id:
type: string
finish_reason:
type: string
enum: [COMPLETE, STOP_SEQUENCE, MAX_TOKENS, TOOL_CALL, ERROR, TIMEOUT]
message:
type: object
properties:
role:
type: string
content:
type: array
items:
$ref: '#/CohereContentBlock'
tool_calls:
type: array
items:
$ref: '#/CohereToolCall'
tool_plan:
type: string
usage:
$ref: '#/CohereUsage'
logprobs:
type: array
items:
$ref: '#/CohereLogProb'
description: Log probabilities (if requested)
CohereUsage:
type: object
properties:
billed_units:
$ref: '#/CohereBilledUnits'
tokens:
$ref: '#/CohereTokenUsage'
cached_tokens:
type: integer
description: Cached tokens
CohereBilledUnits:
type: object
properties:
input_tokens:
type: integer
description: Number of billed input tokens
output_tokens:
type: integer
description: Number of billed output tokens
search_units:
type: integer
description: Number of billed search units
classifications:
type: integer
description: Number of billed classification units
CohereTokenUsage:
type: object
properties:
input_tokens:
type: integer
description: Number of input tokens used
output_tokens:
type: integer
description: Number of output tokens produced
CohereLogProb:
type: object
properties:
token_ids:
type: array
items:
type: integer
description: Token IDs of each token in text chunk
text:
type: string
description: Text chunk for log probabilities
logprobs:
type: array
items:
type: number
description: Log probability of each token
# Stream event types
CohereChatStreamEvent:
type: object
properties:
type:
type: string
enum: [message-start, content-start, content-delta, content-end, tool-plan-delta, tool-call-start, tool-call-delta, tool-call-end, citation-start, citation-end, message-end, debug]
description: Type of streaming event
id:
type: string
description: Event ID (for message-start)
index:
type: integer
description: Index for indexed events
delta:
$ref: '#/CohereStreamDelta'
CohereStreamDelta:
type: object
properties:
message:
$ref: '#/CohereStreamMessage'
finish_reason:
type: string
enum: [COMPLETE, STOP_SEQUENCE, MAX_TOKENS, TOOL_CALL, ERROR, TIMEOUT]
usage:
$ref: '#/CohereUsage'
CohereStreamMessage:
type: object
properties:
role:
type: string
description: Message role (for message-start)
content:
oneOf:
- $ref: '#/CohereStreamContent'
- type: array
items:
$ref: '#/CohereStreamContent'
description: Content for content events
tool_plan:
type: string
description: Tool plan content (for tool-plan-delta)
tool_calls:
oneOf:
- $ref: '#/CohereToolCall'
- type: array
items:
$ref: '#/CohereToolCall'
description: Tool calls (for tool-call events)
citations:
oneOf:
- $ref: '#/CohereCitation'
- type: array
items:
$ref: '#/CohereCitation'
description: Citations (for citation events)
CohereStreamContent:
type: object
properties:
type:
type: string
enum: [text, image_url, thinking, document]
text:
type: string
thinking:
type: string
CohereCitation:
type: object
properties:
start:
type: integer
description: Start position of cited text
end:
type: integer
description: End position of cited text
text:
type: string
description: Cited text
sources:
type: array
items:
$ref: '#/CohereSource'
content_index:
type: integer
description: Content index of the citation
type:
type: string
enum: [TEXT_CONTENT, THINKING_CONTENT, PLAN]
description: Type of citation
CohereSource:
type: object
properties:
type:
type: string
enum: [tool, document]
description: Source type
id:
type: string
description: Source ID (nullable)
tool_output:
type: object
description: Tool output (for tool sources)
document:
type: object
description: Document data (for document sources)

View File

@@ -0,0 +1,14 @@
# Cohere Integration Common Types
CohereError:
type: object
properties:
type:
type: string
description: Error type
message:
type: string
description: Error message
code:
type: string
description: Optional error code

View File

@@ -0,0 +1,172 @@
# Cohere v2 Embed API Schemas
CohereEmbeddingRequest:
type: object
required:
- model
- input_type
properties:
model:
type: string
description: ID of an available embedding model
example: embed-english-v3.0
input_type:
type: string
description: Specifies the type of input passed to the model. Required for embedding models v3 and higher.
texts:
type: array
items:
type: string
description: Array of strings to embed. Maximum 96 texts per call. At least one of texts, images, or inputs is required.
maxItems: 96
images:
type: array
items:
type: string
description: Array of image data URIs for multimodal embedding. Maximum 1 image per call. Supports JPEG, PNG, WebP, GIF up to 5MB.
maxItems: 1
inputs:
type: array
items:
$ref: '#/CohereEmbeddingInput'
description: Array of mixed text/image components for embedding. Maximum 96 per call.
maxItems: 96
embedding_types:
type: array
items:
type: string
description: Specifies the return format types (float, int8, uint8, binary, ubinary, base64). Defaults to float if unspecified.
output_dimension:
type: integer
description: Number of dimensions for output embeddings (256, 512, 1024, 1536). Available only for embed-v4 and newer models.
max_tokens:
type: integer
description: Maximum tokens to embed per input before truncation.
truncate:
type: string
description: Handling for inputs exceeding token limits. Defaults to END.
CohereEmbeddingInput:
type: object
properties:
content:
type: array
items:
$ref: './chat.yaml#/CohereContentBlock'
description: Array of content blocks (reuses chat content blocks)
CohereEmbeddingResponse:
type: object
properties:
id:
type: string
description: Response ID
embeddings:
$ref: '#/CohereEmbeddingData'
response_type:
type: string
description: Response type (embeddings_floats, embeddings_by_type)
texts:
type: array
items:
type: string
description: Original text entries
images:
type: array
items:
$ref: '#/CohereEmbeddingImageInfo'
description: Original image entries
meta:
$ref: '#/CohereEmbeddingMeta'
CohereEmbeddingData:
type: object
description: Embedding data object with different types
properties:
float:
type: array
items:
type: array
items:
type: number
description: Float embeddings
int8:
type: array
items:
type: array
items:
type: integer
description: Int8 embeddings
uint8:
type: array
items:
type: array
items:
type: integer
description: Uint8 embeddings
binary:
type: array
items:
type: array
items:
type: integer
description: Binary embeddings
ubinary:
type: array
items:
type: array
items:
type: integer
description: Unsigned binary embeddings
base64:
type: array
items:
type: string
description: Base64-encoded embeddings
CohereEmbeddingImageInfo:
type: object
description: Image information in the response
properties:
width:
type: integer
description: Width in pixels
height:
type: integer
description: Height in pixels
format:
type: string
description: Image format
bit_depth:
type: integer
description: Bit depth
CohereEmbeddingMeta:
type: object
description: Metadata in embedding response
properties:
api_version:
$ref: '#/CohereEmbeddingAPIVersion'
billed_units:
$ref: './chat.yaml#/CohereBilledUnits'
tokens:
$ref: './chat.yaml#/CohereTokenUsage'
warnings:
type: array
items:
type: string
description: Any warnings
CohereEmbeddingAPIVersion:
type: object
description: API version information
properties:
version:
type: string
description: API version
is_deprecated:
type: boolean
description: Deprecation status
is_experimental:
type: boolean
description: Experimental status

View File

@@ -0,0 +1,48 @@
# Cohere Tokenize API Schemas
CohereCountTokensRequest:
type: object
required:
- text
- model
properties:
model:
type: string
description: Model whose tokenizer should be used
example: command-r-plus
text:
type: string
description: Text to tokenize (1-65536 characters)
minLength: 1
maxLength: 65536
CohereCountTokensResponse:
type: object
properties:
tokens:
type: array
items:
type: integer
description: Token IDs
token_strings:
type: array
items:
type: string
description: Token strings
meta:
$ref: '#/CohereTokenizeMeta'
CohereTokenizeMeta:
type: object
description: Metadata returned by the tokenize endpoint
properties:
api_version:
$ref: '#/CohereTokenizeAPIVersion'
CohereTokenizeAPIVersion:
type: object
description: API version metadata
properties:
version:
type: string
description: API version

View File

@@ -0,0 +1,80 @@
# Google GenAI (Gemini) Integration Common Types
GeminiError:
type: object
properties:
error:
type: object
properties:
code:
type: integer
message:
type: string
status:
type: string
details:
type: array
items:
$ref: '#/GeminiErrorDetails'
GeminiErrorDetails:
type: object
properties:
'@type':
type: string
description: Type identifier for the error details
fieldViolations:
type: array
items:
type: object
properties:
description:
type: string
GeminiModel:
type: object
properties:
name:
type: string
description: Model resource name (e.g., models/gemini-pro)
baseModelId:
type: string
version:
type: string
displayName:
type: string
description:
type: string
inputTokenLimit:
type: integer
outputTokenLimit:
type: integer
supportedGenerationMethods:
type: array
items:
type: string
thinking:
type: boolean
description: Whether the model supports thinking mode
temperature:
type: number
description: Default temperature for the model
maxTemperature:
type: number
description: Maximum allowed temperature for the model
topP:
type: number
description: Default nucleus-sampling value
topK:
type: integer
description: Default top-k sampling value
GeminiListModelsResponse:
type: object
properties:
models:
type: array
items:
$ref: '#/GeminiModel'
nextPageToken:
type: string

View File

@@ -0,0 +1,94 @@
# Google GenAI (Gemini) Files API Schemas
GeminiFileUploadRequest:
type: object
description: >
Multipart upload for Gemini Files API. Send two parts:
- "metadata": JSON object {"file": {"displayName": "<optional label>"}}
- "file": binary content
Note: Direct file content download is not supported by Gemini Files API.
Use the file.uri field from the response to access the file.
required:
- file
properties:
metadata:
type: object
description: JSON metadata part; see encoding at the path for contentType application/json.
properties:
file:
type: object
properties:
displayName:
type: string
additionalProperties: false
additionalProperties: false
file:
type: string
format: binary
additionalProperties: false
GeminiFile:
type: object
properties:
name:
type: string
description: File resource name (e.g., files/abc123)
displayName:
type: string
mimeType:
type: string
sizeBytes:
type: string
description: Size in bytes (returned as string by Gemini API)
createTime:
type: string
format: date-time
updateTime:
type: string
format: date-time
expirationTime:
type: string
format: date-time
sha256Hash:
type: string
uri:
type: string
description: URI for accessing the file content
state:
type: string
enum: [STATE_UNSPECIFIED, PROCESSING, ACTIVE, FAILED]
error:
type: object
properties:
code:
type: integer
message:
type: string
videoMetadata:
type: object
properties:
videoDuration:
type: string
GeminiFileUploadResponse:
type: object
properties:
file:
$ref: '#/GeminiFile'
GeminiFileListResponse:
type: object
properties:
files:
type: array
items:
$ref: '#/GeminiFile'
nextPageToken:
type: string
GeminiFileRetrieveResponse:
$ref: '#/GeminiFile'
GeminiFileDeleteResponse:
type: object
description: Empty response on successful deletion

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,90 @@
# OpenAI Integration Audio Schemas (Speech and Transcription)
# Speech (TTS) Request
OpenAISpeechRequest:
type: object
required:
- model
- input
properties:
model:
type: string
description: Model identifier (e.g., tts-1, tts-1-hd)
example: tts-1
input:
type: string
description: Text to convert to speech
voice:
type: string
description: Voice to use
enum: [alloy, echo, fable, onyx, nova, shimmer]
response_format:
type: string
enum: [mp3, opus, aac, flac, wav, pcm]
speed:
type: number
minimum: 0.25
maximum: 4.0
stream_format:
type: string
enum: [sse]
description: Set to 'sse' for streaming
# Bifrost-specific
fallbacks:
type: array
items:
type: string
# Transcription Request
OpenAITranscriptionRequest:
type: object
required:
- model
- file
properties:
model:
type: string
description: Model identifier (e.g., whisper-1)
example: whisper-1
file:
type: string
format: binary
description: Audio file to transcribe
language:
type: string
description: Language of the audio (ISO 639-1)
prompt:
type: string
description: Prompt to guide transcription
response_format:
type: string
enum: [json, text, srt, verbose_json, vtt]
temperature:
type: number
minimum: 0
maximum: 1
timestamp_granularities:
type: array
items:
type: string
enum: [word, segment]
stream:
type: boolean
# Bifrost-specific
fallbacks:
type: array
items:
type: string
# Responses reuse inference schemas
OpenAISpeechResponse:
$ref: '../../inference/speech.yaml#/SpeechResponse'
OpenAISpeechStreamResponse:
$ref: '../../inference/speech.yaml#/SpeechStreamResponse'
OpenAITranscriptionResponse:
$ref: '../../inference/transcription.yaml#/TranscriptionResponse'
OpenAITranscriptionStreamResponse:
$ref: '../../inference/transcription.yaml#/TranscriptionStreamResponse'

View File

@@ -0,0 +1,57 @@
# OpenAI Integration Batch API Schemas
# Reuses inference batch schemas since OpenAI integration uses Bifrost format
# Batch Create Request - uses Bifrost format with provider field
OpenAIBatchCreateRequest:
$ref: '../../inference/batch.yaml#/BatchCreateRequest'
OpenAIBatchCreateResponse:
$ref: '../../inference/batch.yaml#/BatchCreateResponse'
OpenAIBatchListRequest:
type: object
properties:
limit:
type: integer
description: Maximum number of batches to return
default: 30
after:
type: string
description: Cursor for pagination
provider:
type: string
description: Filter by provider
example: openai
OpenAIBatchListResponse:
$ref: '../../inference/batch.yaml#/BatchListResponse'
OpenAIBatchRetrieveRequest:
type: object
required:
- batch_id
properties:
batch_id:
type: string
description: Batch ID to retrieve
provider:
type: string
description: Provider for the batch
OpenAIBatchRetrieveResponse:
$ref: '../../inference/batch.yaml#/BatchRetrieveResponse'
OpenAIBatchCancelRequest:
type: object
required:
- batch_id
properties:
batch_id:
type: string
description: Batch ID to cancel
provider:
type: string
description: Provider for the batch
OpenAIBatchCancelResponse:
$ref: '../../inference/batch.yaml#/BatchCancelResponse'

View File

@@ -0,0 +1,121 @@
# OpenAI Integration Chat Completions Schemas
# Reuses inference schemas where possible since Bifrost follows OpenAI format
OpenAIChatRequest:
type: object
required:
- model
- messages
properties:
model:
type: string
description: Model identifier (e.g., gpt-4, gpt-3.5-turbo)
example: gpt-4
messages:
type: array
items:
$ref: '#/OpenAIMessage'
description: List of messages in the conversation
stream:
type: boolean
description: Whether to stream the response
max_tokens:
type: integer
description: Maximum tokens to generate (legacy, use max_completion_tokens)
max_completion_tokens:
type: integer
description: Maximum tokens to generate
temperature:
type: number
minimum: 0
maximum: 2
top_p:
type: number
frequency_penalty:
type: number
minimum: -2.0
maximum: 2.0
presence_penalty:
type: number
minimum: -2.0
maximum: 2.0
logit_bias:
type: object
additionalProperties:
type: number
logprobs:
type: boolean
top_logprobs:
type: integer
n:
type: integer
stop:
oneOf:
- type: string
- type: array
items:
type: string
seed:
type: integer
user:
type: string
tools:
type: array
items:
$ref: '../../inference/chat.yaml#/ChatTool'
tool_choice:
$ref: '../../inference/chat.yaml#/ChatToolChoice'
parallel_tool_calls:
type: boolean
response_format:
type: object
description: Format for the response
reasoning_effort:
type: string
enum: [none, minimal, low, medium, high, xhigh]
description: OpenAI reasoning effort level
service_tier:
type: string
stream_options:
$ref: '../../inference/chat.yaml#/ChatStreamOptions'
# Bifrost-specific
fallbacks:
type: array
items:
type: string
description: Fallback models
OpenAIMessage:
type: object
required:
- role
properties:
role:
type: string
enum: [system, user, assistant, tool, developer]
name:
type: string
content:
$ref: '../../inference/chat.yaml#/ChatMessageContent'
tool_call_id:
type: string
description: For tool messages
refusal:
type: string
reasoning:
type: string
annotations:
type: array
items:
$ref: '../../inference/chat.yaml#/ChatAssistantMessageAnnotation'
tool_calls:
type: array
items:
$ref: '../../inference/chat.yaml#/ChatAssistantMessageToolCall'
# Response reuses inference schema since format is identical
OpenAIChatResponse:
$ref: '../../inference/chat.yaml#/ChatCompletionResponse'
OpenAIChatStreamResponse:
$ref: '../../inference/chat.yaml#/ChatCompletionStreamResponse'

View File

@@ -0,0 +1,51 @@
# OpenAI Integration Common Types
OpenAIError:
type: object
properties:
error:
type: object
properties:
message:
type: string
type:
type: string
param:
type: string
nullable: true
code:
type: string
nullable: true
# OpenAI uses the same model format but without provider prefix
OpenAIModel:
type: object
properties:
id:
type: string
description: Model identifier
object:
type: string
default: model
owned_by:
type: string
created:
type: integer
format: int64
active:
type: boolean
description: GROQ-specific field
context_window:
type: integer
description: GROQ-specific field
OpenAIListModelsResponse:
type: object
properties:
object:
type: string
default: list
data:
type: array
items:
$ref: '#/OpenAIModel'

View File

@@ -0,0 +1,36 @@
# OpenAI Integration Embeddings Schemas
OpenAIEmbeddingRequest:
type: object
required:
- model
- input
properties:
model:
type: string
description: Model identifier
example: text-embedding-3-small
input:
oneOf:
- type: string
- type: array
items:
type: string
description: Input text to embed
encoding_format:
type: string
enum: [float, base64]
dimensions:
type: integer
description: Number of dimensions for the embedding
user:
type: string
# Bifrost-specific
fallbacks:
type: array
items:
type: string
# Response reuses inference schema
OpenAIEmbeddingResponse:
$ref: '../../inference/embeddings.yaml#/EmbeddingResponse'

View File

@@ -0,0 +1,95 @@
# OpenAI Integration Files API Schemas
# Reuses inference files schemas since OpenAI integration uses Bifrost format
OpenAIFileUploadRequest:
type: object
required:
- file
- purpose
properties:
file:
type: string
format: binary
description: File to upload
purpose:
type: string
enum: [assistants, assistants_output, batch, batch_output, fine-tune, fine-tune-results, vision, user_data, evals]
description: Purpose of the file
provider:
type: string
description: Provider for file storage
storage_config:
$ref: '../../inference/files.yaml#/FileStorageConfig'
OpenAIFileUploadResponse:
$ref: '../../inference/files.yaml#/FileUploadResponse'
OpenAIFileListRequest:
type: object
properties:
purpose:
type: string
description: Filter by purpose
limit:
type: integer
description: Maximum files to return
after:
type: string
description: Cursor for pagination
order:
type: string
enum: [asc, desc]
provider:
type: string
description: Filter by provider
OpenAIFileListResponse:
$ref: '../../inference/files.yaml#/FileListResponse'
OpenAIFileRetrieveRequest:
type: object
required:
- file_id
properties:
file_id:
type: string
description: File ID to retrieve
provider:
type: string
description: Provider for the file
storage_config:
$ref: '../../inference/files.yaml#/FileStorageConfig'
OpenAIFileRetrieveResponse:
$ref: '../../inference/files.yaml#/FileRetrieveResponse'
OpenAIFileDeleteRequest:
type: object
required:
- file_id
properties:
file_id:
type: string
description: File ID to delete
provider:
type: string
description: Provider for the file
storage_config:
$ref: '../../inference/files.yaml#/FileStorageConfig'
OpenAIFileDeleteResponse:
$ref: '../../inference/files.yaml#/FileDeleteResponse'
OpenAIFileContentRequest:
type: object
required:
- file_id
properties:
file_id:
type: string
description: File ID to get content for
provider:
type: string
description: Provider for the file
storage_config:
$ref: '../../inference/files.yaml#/FileStorageConfig'

View File

@@ -0,0 +1,133 @@
# OpenAI Integration - Image Generation Schemas
OpenAIImageGenerationRequest:
type: object
required:
- model
- prompt
properties:
model:
type: string
description: Model identifier
prompt:
type: string
description: Text prompt to generate image
n:
type: integer
minimum: 1
maximum: 10
default: 1
description: Number of images to generate
size:
type: string
enum:
- "256x256"
- "512x512"
- "1024x1024"
- "1792x1024"
- "1024x1792"
- "1536x1024"
- "1024x1536"
- "auto"
description: Size of the generated image
quality:
type: string
enum:
- "standard"
- "hd"
description: Quality of the generated image
style:
type: string
enum:
- "natural"
- "vivid"
description: Style of the generated image
response_format:
type: string
enum:
- "url"
- "b64_json"
default: "url"
description: Format of the response. This parameter is not supported for streaming requests.
user:
type: string
description: User identifier for tracking
stream:
type: boolean
default: false
description: |
Whether to stream the response. When true, images are sent as base64 chunks via SSE.
fallbacks:
type: array
items:
type: string
description: Fallback models to try if primary model fails
OpenAIImageGenerationResponse:
type: object
properties:
created:
type: integer
format: int64
description: Unix timestamp when the image was created
data:
type: array
items:
$ref: '../../../schemas/inference/images.yaml#/ImageData'
description: Array of generated images
background:
type: string
description: Background type used
output_format:
type: string
description: Output format used
quality:
type: string
description: Quality setting used
size:
type: string
description: Size setting used
usage:
$ref: '../../../schemas/inference/images.yaml#/ImageUsage'
OpenAIImageStreamResponse:
type: object
description: |
Streaming response chunk for image generation (OpenAI format).
Sent via Server-Sent Events (SSE) when stream=true.
properties:
type:
type: string
enum:
- "image_generation.partial_image"
- "image_generation.completed"
- "error"
description: Type of stream event
b64_json:
type: string
description: Base64-encoded chunk of image data
partial_image_index:
type: integer
description: Index of the partial image chunk
sequence_number:
type: integer
description: Ordering index for stream chunks
created_at:
type: integer
format: int64
description: Timestamp when chunk was created
size:
type: string
description: Size of the generated image
quality:
type: string
description: Quality setting used
background:
type: string
description: Background type used
output_format:
type: string
description: Output format used
usage:
$ref: '../../../schemas/inference/images.yaml#/ImageUsage'
description: Token usage (usually in final chunk)

View File

@@ -0,0 +1,108 @@
# OpenAI Integration Responses API Schemas
OpenAIResponsesRequest:
type: object
required:
- model
- input
properties:
model:
type: string
description: Model identifier
example: gpt-4
input:
$ref: '#/OpenAIResponsesInput'
stream:
type: boolean
instructions:
type: string
description: System instructions for the model
max_output_tokens:
type: integer
metadata:
type: object
additionalProperties: true
parallel_tool_calls:
type: boolean
previous_response_id:
type: string
reasoning:
$ref: '#/OpenAIResponsesReasoning'
store:
type: boolean
temperature:
type: number
minimum: 0
maximum: 2
text:
$ref: '#/OpenAIResponsesTextConfig'
tool_choice:
$ref: '../../inference/responses.yaml#/ResponsesToolChoice'
tools:
type: array
items:
$ref: '../../inference/responses.yaml#/ResponsesTool'
top_p:
type: number
truncation:
type: string
enum: [auto, disabled]
user:
type: string
# Bifrost-specific
fallbacks:
type: array
items:
type: string
OpenAIResponsesInput:
oneOf:
- type: string
- type: array
items:
$ref: '../../inference/responses.yaml#/ResponsesMessage'
description: Input - can be a string or array of messages
OpenAIResponsesReasoning:
type: object
properties:
effort:
type: string
enum: [none, minimal, low, medium, high, xhigh]
generate_summary:
type: string
enum: [auto, concise, detailed]
summary:
type: string
enum: [auto, concise, detailed]
max_tokens:
type: integer
OpenAIResponsesTextConfig:
type: object
properties:
format:
$ref: '#/OpenAIResponsesTextFormat'
OpenAIResponsesTextFormat:
type: object
properties:
type:
type: string
enum: [text, json_object, json_schema]
json_schema:
type: object
properties:
name:
type: string
schema:
type: object
strict:
type: boolean
# Response reuses inference schema
OpenAIResponsesResponse:
$ref: '../../inference/responses.yaml#/ResponsesResponse'
OpenAIResponsesStreamResponse:
$ref: '../../inference/responses.yaml#/ResponsesStreamResponse'

View File

@@ -0,0 +1,74 @@
# OpenAI Integration Text Completions Schemas (Legacy Completions API)
OpenAITextCompletionRequest:
type: object
required:
- model
- prompt
properties:
model:
type: string
description: Model identifier
example: gpt-3.5-turbo-instruct
prompt:
oneOf:
- type: string
- type: array
items:
type: string
description: The prompt(s) to generate completions for
stream:
type: boolean
description: Whether to stream the response
max_tokens:
type: integer
temperature:
type: number
minimum: 0
maximum: 2
top_p:
type: number
frequency_penalty:
type: number
minimum: -2.0
maximum: 2.0
presence_penalty:
type: number
minimum: -2.0
maximum: 2.0
logit_bias:
type: object
additionalProperties:
type: number
logprobs:
type: integer
n:
type: integer
stop:
oneOf:
- type: string
- type: array
items:
type: string
suffix:
type: string
echo:
type: boolean
best_of:
type: integer
user:
type: string
seed:
type: integer
# Bifrost-specific
fallbacks:
type: array
items:
type: string
# Response reuses inference schema
OpenAITextCompletionResponse:
$ref: '../../inference/text.yaml#/TextCompletionResponse'
OpenAITextCompletionStreamResponse:
$ref: '../../inference/text.yaml#/TextCompletionStreamResponse'

View File

@@ -0,0 +1,9 @@
# Cache API schemas
ClearCacheResponse:
type: object
description: Clear cache response
properties:
message:
type: string
example: Cache cleared successfully

View File

@@ -0,0 +1,35 @@
# Common schemas used across management APIs
SuccessResponse:
type: object
description: Generic success response
properties:
status:
type: string
example: success
message:
type: string
example: Operation completed successfully
ErrorResponse:
type: object
description: Error response
$ref: '../../schemas/inference/common.yaml#/BifrostError'
MessageResponse:
type: object
description: Simple message response
properties:
message:
type: string
EnvVar:
type: object
description: Environment variable configuration
properties:
value:
type: string
env_var:
type: string
from_env:
type: boolean

View File

@@ -0,0 +1,200 @@
# Config API schemas
Version:
type: string
description: Version information
example: "1.0.0"
ClientConfig:
type: object
description: Client configuration
properties:
drop_excess_requests:
type: boolean
description: Whether to drop excess requests when rate limited
prometheus_labels:
type: array
items:
type: string
description: Custom Prometheus labels
allowed_origins:
type: array
items:
type: string
description: Allowed CORS origins
initial_pool_size:
type: integer
description: Initial connection pool size
enable_logging:
type: boolean
description: Whether logging is enabled
disable_content_logging:
type: boolean
description: Whether content logging is disabled
enforce_auth_on_inference:
type: boolean
description: Whether to enforce virtual key authentication on inference requests
enforce_governance_header:
type: boolean
deprecated: true
description: "Deprecated: use enforce_auth_on_inference instead"
allow_direct_keys:
type: boolean
description: Whether to allow direct API keys
max_request_body_size_mb:
type: integer
description: Maximum request body size in MB
compat:
type: object
description: Compat plugin configuration
properties:
convert_text_to_chat:
type: boolean
description: Convert text completion requests to chat
convert_chat_to_responses:
type: boolean
description: Convert chat completion requests to responses
should_drop_params:
type: boolean
description: Drop unsupported parameters based on model catalog
should_convert_params:
type: boolean
default: false
description: Converts model parameter values that are not supported by the model
additionalProperties: false
log_retention_days:
type: integer
description: Number of days to retain logs
header_filter_config:
$ref: '#/HeaderFilterConfig'
mcp_agent_depth:
type: integer
description: Depth of MCP agent
mcp_tool_execution_timeout:
type: integer
description: Timeout for MCP tool execution in seconds
mcp_code_mode_binding_level:
type: string
description: Binding level for MCP code mode
required_headers:
type: array
items:
type: string
description: Headers that must be present on every request. Requests missing any of these headers are rejected with 400. Case-insensitive matching.
logging_headers:
type: array
items:
type: string
description: Headers to capture in log metadata. Values are extracted from incoming requests and stored in the metadata field of log entries. Case-insensitive matching. No restart required.
FrameworkConfig:
type: object
description: Framework configuration
properties:
id:
type: integer
description: Unique identifier for the framework config
pricing_url:
type: string
description: URL for pricing data
pricing_sync_interval:
type: integer
format: int64
description: Pricing sync interval in seconds
AuthConfig:
type: object
description: Authentication configuration
properties:
admin_username:
type: string
admin_password:
type: string
description: Password (redacted as <redacted> in responses)
is_enabled:
type: boolean
disable_auth_on_inference:
type: boolean
HeaderFilterConfig:
type: object
description: Header filter configuration
properties:
allowlist:
type: array
items:
type: string
denylist:
type: array
items:
type: string
ProxyConfig:
type: object
description: Global proxy configuration
properties:
enabled:
type: boolean
type:
type: string
enum: [http, socks5, tcp]
url:
type: string
username:
type: string
password:
type: string
description: Password (redacted as <redacted> in responses)
no_proxy:
type: string
timeout:
type: integer
skip_tls_verify:
type: boolean
enable_for_scim:
type: boolean
enable_for_inference:
type: boolean
enable_for_api:
type: boolean
RestartRequiredConfig:
type: object
description: Restart required configuration
properties:
required:
type: boolean
reason:
type: string
GetConfigResponse:
type: object
description: Configuration response
properties:
client_config:
$ref: '#/ClientConfig'
framework_config:
$ref: '#/FrameworkConfig'
auth_config:
$ref: '#/AuthConfig'
is_db_connected:
type: boolean
is_cache_connected:
type: boolean
is_logs_connected:
type: boolean
proxy_config:
$ref: '#/ProxyConfig'
restart_required:
$ref: '#/RestartRequiredConfig'
UpdateConfigRequest:
type: object
description: Update configuration request
properties:
client_config:
$ref: '#/ClientConfig'
framework_config:
$ref: '#/FrameworkConfig'
auth_config:
$ref: '#/AuthConfig'

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,15 @@
# Health API schemas
HealthResponse:
type: object
description: Health check response
properties:
status:
type: string
enum: [ok]
example: ok
components:
type: object
description: Health status of individual components (config_store, log_store, vector_store)
additionalProperties:
type: string

View File

@@ -0,0 +1,829 @@
# Logging API schemas
LogEntry:
type: object
description: Log entry
properties:
id:
type: string
parent_request_id:
type: string
provider:
type: string
model:
type: string
status:
type: string
enum: ["processing", "success", "error"]
object:
type: string
timestamp:
type: string
format: date-time
number_of_retries:
type: integer
fallback_index:
type: integer
latency:
type: number
cost:
type: number
selected_key_id:
type: string
selected_key_name:
type: string
virtual_key_id:
type: string
virtual_key_name:
type: string
nullable: true
routing_engines_used:
type: array
items:
type: string
description: Array of routing engines used for this request (routing-rule, governance, or loadbalancing)
nullable: true
routing_rule_id:
type: string
nullable: true
routing_rule_name:
type: string
nullable: true
stream:
type: boolean
raw_request:
type: string
raw_response:
type: string
created_at:
type: string
format: date-time
token_usage:
$ref: '../../schemas/inference/usage.yaml#/BifrostLLMUsage'
error_details:
$ref: '../../schemas/inference/common.yaml#/BifrostError'
input_history:
type: array
items:
$ref: '../../schemas/inference/chat.yaml#/ChatMessage'
responses_input_history:
type: array
items:
$ref: '../../schemas/inference/responses.yaml#/ResponsesMessage'
output_message:
$ref: '../../schemas/inference/chat.yaml#/ChatMessage'
responses_output:
type: array
items:
$ref: '../../schemas/inference/responses.yaml#/ResponsesMessage'
embedding_output:
type: array
items:
type: array
items:
type: number
params:
type: object
additionalProperties: true
tools:
type: array
items:
$ref: '../../schemas/inference/chat.yaml#/ChatTool'
tool_calls:
type: array
items:
$ref: '../../schemas/inference/chat.yaml#/ChatAssistantMessageToolCall'
speech_input:
type: object
additionalProperties: true
transcription_input:
type: object
additionalProperties: true
image_generation_input:
type: object
additionalProperties: true
speech_output:
type: object
additionalProperties: true
transcription_output:
type: object
additionalProperties: true
image_generation_output:
type: object
additionalProperties: true
cache_debug:
type: object
additionalProperties: true
metadata:
type: object
additionalProperties: true
description: Custom metadata captured from request headers (configured via logging_headers or x-bf-lh-* prefix)
selected_key:
type: object
additionalProperties: true
virtual_key:
type: object
additionalProperties: true
passthrough_request_body:
type: string
description: Raw passthrough request body (for passthrough integration routes)
passthrough_response_body:
type: string
description: Raw passthrough response body (for passthrough integration routes)
routing_engine_logs:
type: object
additionalProperties: true
description: Detailed logs from the routing engine decision process
is_large_payload_request:
type: boolean
description: Whether the request payload exceeded the large payload threshold
is_large_payload_response:
type: boolean
description: Whether the response payload exceeded the large payload threshold
rerank_output:
type: object
additionalProperties: true
description: Rerank operation output
video_generation_input:
type: object
additionalProperties: true
description: Video generation request input
video_generation_output:
type: object
additionalProperties: true
description: Video generation response output
video_retrieve_output:
type: object
additionalProperties: true
description: Video retrieve response output
video_list_output:
type: object
additionalProperties: true
description: Video list response output
video_delete_output:
type: object
additionalProperties: true
description: Video delete response output
video_download_output:
type: object
additionalProperties: true
description: Video download response output
list_models_output:
type: object
additionalProperties: true
description: List models response output
MCPToolLogEntry:
type: object
description: MCP tool execution log entry
properties:
id:
type: string
description: Unique identifier for the log entry
llm_request_id:
type: string
description: Links to the LLM request that triggered this tool call
timestamp:
type: string
format: date-time
description: When the tool execution started
tool_name:
type: string
description: Name of the MCP tool that was executed
server_label:
type: string
description: Label of the MCP server that provided the tool
virtual_key_id:
type: string
description: ID of the virtual key used for this tool execution
virtual_key_name:
type: string
description: Name of the virtual key used for this tool execution
arguments:
type: object
additionalProperties: true
description: Tool execution arguments
result:
type: object
additionalProperties: true
description: Tool execution result
error_details:
$ref: '../../schemas/inference/common.yaml#/BifrostError'
latency:
type: number
description: Execution time in milliseconds
cost:
type: number
description: Cost in dollars for this tool execution
status:
type: string
enum: ["processing", "success", "error"]
description: Execution status
metadata:
type: object
additionalProperties: true
description: Custom metadata captured from request headers (configured via logging_headers or x-bf-lh-* prefix)
created_at:
type: string
format: date-time
description: When the log entry was created
virtual_key:
type: object
additionalProperties: true
description: Full virtual key object (populated when virtual_key_id is set)
MCPToolLogSearchFilters:
type: object
description: MCP tool log search filters
properties:
tool_names:
type: array
items:
type: string
description: Filter by tool names
server_labels:
type: array
items:
type: string
description: Filter by server labels
status:
type: array
items:
type: string
description: Filter by execution status
llm_request_ids:
type: array
items:
type: string
description: Filter by linked LLM request IDs
start_time:
type: string
format: date-time
description: Filter by start time (RFC3339 format)
end_time:
type: string
format: date-time
description: Filter by end time (RFC3339 format)
min_latency:
type: number
description: Filter by minimum latency
max_latency:
type: number
description: Filter by maximum latency
content_search:
type: string
description: Search in tool arguments and results
MCPToolLogStats:
type: object
description: MCP tool log statistics
properties:
total_executions:
type: integer
description: Total number of tool executions
success_rate:
type: number
description: Success rate percentage
average_latency:
type: number
description: Average execution latency in milliseconds
total_cost:
type: number
description: Total cost in dollars for all executions
SearchMCPLogsResponse:
type: object
description: Search MCP logs response
properties:
logs:
type: array
items:
$ref: '#/MCPToolLogEntry'
pagination:
type: object
required:
- total_count
properties:
limit:
type: integer
offset:
type: integer
sort_by:
type: string
order:
type: string
total_count:
type: integer
format: int64
description: Total number of items matching the query
stats:
$ref: '#/MCPToolLogStats'
has_logs:
type: boolean
description: Whether any logs exist in the system
MCPLogsFilterDataResponse:
type: object
description: Available MCP log filter data
properties:
tool_names:
type: array
items:
type: string
description: All unique tool names
server_labels:
type: array
items:
type: string
description: All unique server labels
virtual_keys:
type: array
items:
type: object
properties:
id:
type: string
description: Virtual key ID
name:
type: string
description: Virtual key name
value:
type: string
description: Virtual key value (redacted if applicable)
description: All unique virtual keys
DeleteMCPLogsRequest:
type: object
description: Delete MCP logs request
required:
- ids
properties:
ids:
type: array
items:
type: string
description: Array of log IDs to delete
SearchFilters:
type: object
description: Log search filters
properties:
providers:
type: array
items:
type: string
models:
type: array
items:
type: string
status:
type: array
items:
type: string
objects:
type: array
items:
type: string
selected_key_ids:
type: array
items:
type: string
virtual_key_ids:
type: array
items:
type: string
routing_rule_ids:
type: array
items:
type: string
routing_engine_used:
type: array
items:
type: string
description: Filter by routing engine (routing-rule, governance, or loadbalancing)
start_time:
type: string
format: date-time
end_time:
type: string
format: date-time
min_latency:
type: number
max_latency:
type: number
min_tokens:
type: integer
max_tokens:
type: integer
min_cost:
type: number
max_cost:
type: number
missing_cost_only:
type: boolean
content_search:
type: string
SearchLogsResponse:
type: object
description: Search logs response
properties:
logs:
type: array
items:
$ref: '#/LogEntry'
pagination:
$ref: '#/PaginationOptions'
stats:
$ref: '#/LogStats'
has_logs:
type: boolean
description: Whether any logs exist in the system
PaginationOptions:
type: object
description: Pagination metadata for list responses
properties:
limit:
type: integer
offset:
type: integer
sort_by:
type: string
enum: [timestamp, latency, tokens, cost]
order:
type: string
enum: [asc, desc]
total_count:
type: integer
format: int64
description: Total number of items matching the query
LogStats:
type: object
description: Log statistics
properties:
total_requests:
type: integer
total_tokens:
type: integer
total_cost:
type: number
average_latency:
type: number
success_rate:
type: number
DroppedRequestsResponse:
type: object
description: Dropped requests response
properties:
dropped_requests:
type: integer
format: int64
FilterDataResponse:
type: object
description: Available filter data response
properties:
models:
type: array
items:
type: string
selected_keys:
type: array
items:
$ref: '../../schemas/management/providers.yaml#/Key'
virtual_keys:
type: array
items:
$ref: '../../schemas/management/governance.yaml#/VirtualKey'
routing_rules:
type: array
items:
$ref: '../../schemas/management/governance.yaml#/RoutingRule'
description: Available routing rules for filtering
routing_engines:
type: array
items:
type: string
description: Available routing engine types (routing-rule, governance, loadbalancing)
DeleteLogsRequest:
type: object
description: Delete logs request
required:
- ids
properties:
ids:
type: array
items:
type: string
RecalculateCostRequest:
type: object
description: Recalculate cost request
properties:
filters:
$ref: '#/SearchFilters'
limit:
type: integer
description: Maximum number of logs to process (default 200, max 1000)
RecalculateCostResponse:
type: object
description: Recalculate cost response
properties:
total_matched:
type: integer
updated:
type: integer
skipped:
type: integer
remaining:
type: integer
# Histogram schemas
HistogramBucket:
type: object
description: Time-bucketed request count
properties:
timestamp:
type: string
format: date-time
count:
type: integer
format: int64
success:
type: integer
format: int64
error:
type: integer
format: int64
HistogramResult:
type: object
description: Time-bucketed request count histogram
properties:
buckets:
type: array
items:
$ref: '#/HistogramBucket'
bucket_size_seconds:
type: integer
format: int64
TokenHistogramBucket:
type: object
description: Time-bucketed token usage
properties:
timestamp:
type: string
format: date-time
prompt_tokens:
type: integer
format: int64
completion_tokens:
type: integer
format: int64
total_tokens:
type: integer
format: int64
TokenHistogramResult:
type: object
description: Time-bucketed token usage histogram
properties:
buckets:
type: array
items:
$ref: '#/TokenHistogramBucket'
bucket_size_seconds:
type: integer
format: int64
CostHistogramBucket:
type: object
description: Time-bucketed cost data with model breakdown
properties:
timestamp:
type: string
format: date-time
total_cost:
type: number
by_model:
type: object
additionalProperties:
type: number
description: Cost breakdown by model name
CostHistogramResult:
type: object
description: Time-bucketed cost histogram with model breakdown
properties:
buckets:
type: array
items:
$ref: '#/CostHistogramBucket'
bucket_size_seconds:
type: integer
format: int64
models:
type: array
items:
type: string
description: List of models present in the histogram
ModelUsageStats:
type: object
description: Usage statistics for a single model
properties:
total:
type: integer
format: int64
success:
type: integer
format: int64
error:
type: integer
format: int64
ModelHistogramBucket:
type: object
description: Time-bucketed model usage with success/error breakdown
properties:
timestamp:
type: string
format: date-time
by_model:
type: object
additionalProperties:
$ref: '#/ModelUsageStats'
description: Usage breakdown by model name
ModelHistogramResult:
type: object
description: Time-bucketed model usage histogram
properties:
buckets:
type: array
items:
$ref: '#/ModelHistogramBucket'
bucket_size_seconds:
type: integer
format: int64
models:
type: array
items:
type: string
LatencyHistogramBucket:
type: object
description: Time-bucketed latency percentiles
properties:
timestamp:
type: string
format: date-time
avg_latency:
type: number
p90_latency:
type: number
p95_latency:
type: number
p99_latency:
type: number
total_requests:
type: integer
format: int64
LatencyHistogramResult:
type: object
description: Time-bucketed latency histogram
properties:
buckets:
type: array
items:
$ref: '#/LatencyHistogramBucket'
bucket_size_seconds:
type: integer
format: int64
ProviderCostHistogramBucket:
type: object
description: Time-bucketed cost data with provider breakdown
properties:
timestamp:
type: string
format: date-time
total_cost:
type: number
by_provider:
type: object
additionalProperties:
type: number
description: Cost breakdown by provider name
ProviderCostHistogramResult:
type: object
description: Time-bucketed cost histogram with provider breakdown
properties:
buckets:
type: array
items:
$ref: '#/ProviderCostHistogramBucket'
bucket_size_seconds:
type: integer
format: int64
providers:
type: array
items:
type: string
ProviderTokenStats:
type: object
description: Token statistics for a single provider
properties:
prompt_tokens:
type: integer
format: int64
completion_tokens:
type: integer
format: int64
total_tokens:
type: integer
format: int64
ProviderTokenHistogramBucket:
type: object
description: Time-bucketed token usage with provider breakdown
properties:
timestamp:
type: string
format: date-time
by_provider:
type: object
additionalProperties:
$ref: '#/ProviderTokenStats'
description: Token usage breakdown by provider name
ProviderTokenHistogramResult:
type: object
description: Time-bucketed token histogram with provider breakdown
properties:
buckets:
type: array
items:
$ref: '#/ProviderTokenHistogramBucket'
bucket_size_seconds:
type: integer
format: int64
providers:
type: array
items:
type: string
ProviderLatencyStats:
type: object
description: Latency statistics for a single provider
properties:
avg_latency:
type: number
p90_latency:
type: number
p95_latency:
type: number
p99_latency:
type: number
total_requests:
type: integer
format: int64
ProviderLatencyHistogramBucket:
type: object
description: Time-bucketed latency data with provider breakdown
properties:
timestamp:
type: string
format: date-time
by_provider:
type: object
additionalProperties:
$ref: '#/ProviderLatencyStats'
description: Latency breakdown by provider name
ProviderLatencyHistogramResult:
type: object
description: Time-bucketed latency histogram with provider breakdown
properties:
buckets:
type: array
items:
$ref: '#/ProviderLatencyHistogramBucket'
bucket_size_seconds:
type: integer
format: int64
providers:
type: array
items:
type: string

View File

@@ -0,0 +1,446 @@
# MCP API schemas
MCPAuthType:
type: string
enum: [none, headers, oauth, per_user_oauth]
description: |
Authentication type for MCP connections:
- none: No authentication
- headers: Header-based authentication (API keys, custom headers, etc.)
- oauth: OAuth 2.0 authentication (server-level, admin authenticates once)
- per_user_oauth: Per-user OAuth 2.0 authentication (each user authenticates individually)
MCPConnectionType:
type: string
enum: [http, stdio, sse, inprocess]
description: Connection type for MCP client
MCPConnectionState:
type: string
enum: [connected, disconnected, error]
description: Connection state of an MCP client
MCPStdioConfig:
type: object
description: STDIO configuration for MCP client
properties:
command:
type: string
description: Executable command to run
args:
type: array
items:
type: string
description: Command line arguments
envs:
type: array
items:
type: string
description: Environment variables required
MCPClientCreateRequest:
oneOf:
- $ref: '#/MCPClientCreateRequestHTTP'
- $ref: '#/MCPClientCreateRequestSSE'
- $ref: '#/MCPClientCreateRequestSTDIO'
discriminator:
propertyName: connection_type
mapping:
http: '#/MCPClientCreateRequestHTTP'
sse: '#/MCPClientCreateRequestSSE'
stdio: '#/MCPClientCreateRequestSTDIO'
description: |
MCP client configuration for creating a new client (tool_pricing not available at creation).
The schema varies based on connection_type:
- HTTP/SSE: connection_string is required
- STDIO: stdio_config is required
- InProcess: server instance must be provided programmatically (Go package only)
MCPClientCreateRequestBase:
type: object
required:
- name
- connection_type
properties:
client_id:
type: string
description: Unique identifier for the MCP client (optional, auto-generated if not provided)
name:
type: string
description: Display name for the MCP client
is_code_mode_client:
type: boolean
is_ping_available:
type: boolean
default: true
description: |
Whether the MCP server supports ping for health checks.
If true, uses lightweight ping method for health checks.
If false, uses listTools method for health checks instead.
connection_type:
$ref: '#/MCPConnectionType'
auth_type:
$ref: '#/MCPAuthType'
description: Authentication type for the MCP connection
oauth_config_id:
type: string
description: |
OAuth config ID for OAuth authentication.
Set after OAuth flow is completed. References the oauth_configs table.
Only relevant when auth_type is "oauth".
headers:
type: object
additionalProperties:
type: string
description: |
Custom headers to include in requests.
Only used when auth_type is "headers".
oauth_config:
$ref: '../../schemas/management/oauth.yaml#/OAuthConfigRequest'
description: |
OAuth configuration for initiating OAuth flow.
Only include this when creating a client with auth_type "oauth".
This will trigger the OAuth flow and return an authorization URL.
tools_to_execute:
type: array
items:
type: string
description: |
Include-only list for tools.
["*"] => all tools are included
[] => no tools are included
["tool1", "tool2"] => include only the specified tools
tools_to_auto_execute:
type: array
items:
type: string
description: |
List of tools that can be auto-executed without user approval.
Must be a subset of tools_to_execute.
["*"] => all executable tools can be auto-executed
[] => no tools are auto-executed
["tool1", "tool2"] => only specified tools can be auto-executed
allow_on_all_virtual_keys:
type: boolean
default: false
description: |
When true, this MCP client's tools are available to all virtual keys by default,
without requiring an explicit virtual key assignment.
An explicit virtual key config always overrides this setting for that key.
MCPClientCreateRequestHTTP:
allOf:
- $ref: '#/MCPClientCreateRequestBase'
- type: object
required:
- connection_string
properties:
connection_type:
type: string
enum: [http]
connection_string:
type: string
description: HTTP URL (required for HTTP connection type)
MCPClientCreateRequestSSE:
allOf:
- $ref: '#/MCPClientCreateRequestBase'
- type: object
required:
- connection_string
properties:
connection_type:
type: string
enum: [sse]
connection_string:
type: string
description: SSE URL (required for SSE connection type)
MCPClientCreateRequestSTDIO:
allOf:
- $ref: '#/MCPClientCreateRequestBase'
- type: object
required:
- stdio_config
properties:
connection_type:
type: string
enum: [stdio]
stdio_config:
$ref: '#/MCPStdioConfig'
description: STDIO configuration (required for STDIO connection type)
MCPClientUpdateRequest:
type: object
description: MCP client configuration for updating an existing client (includes tool_pricing)
properties:
client_id:
type: string
description: Unique identifier for the MCP client
name:
type: string
description: Display name for the MCP client
is_code_mode_client:
type: boolean
description: Whether this client is available in code mode
connection_type:
$ref: '#/MCPConnectionType'
connection_string:
type: string
description: HTTP or SSE URL (required for HTTP or SSE connections)
stdio_config:
$ref: '#/MCPStdioConfig'
auth_type:
$ref: '#/MCPAuthType'
description: Authentication type for the MCP connection
oauth_config_id:
type: string
description: |
OAuth config ID for OAuth authentication.
References the oauth_configs table.
Only relevant when auth_type is "oauth".
headers:
type: object
additionalProperties:
type: string
description: |
Custom headers to include in requests.
Only used when auth_type is "headers".
tools_to_execute:
type: array
items:
type: string
description: |
Include-only list for tools.
["*"] => all tools are included
[] => no tools are included
["tool1", "tool2"] => include only the specified tools
tools_to_auto_execute:
type: array
items:
type: string
description: |
List of tools that can be auto-executed without user approval.
Must be a subset of tools_to_execute.
["*"] => all executable tools can be auto-executed
[] => no tools are auto-executed
["tool1", "tool2"] => only specified tools can be auto-executed
tool_pricing:
type: object
additionalProperties:
type: number
format: double
description: |
Per-tool cost in USD for execution.
Key is the tool name, value is the cost per execution.
Example: {"read_file": 0.001, "write_file": 0.002}
Note: Only available when updating an existing client after tools have been fetched.
allow_on_all_virtual_keys:
type: boolean
default: false
description: |
When true, this MCP client's tools are accessible to all virtual keys without requiring
explicit per-key assignment. All tools are allowed by default. If a virtual key has an
explicit MCP config for this client, that config takes precedence and overrides this behaviour.
vk_configs:
type: array
items:
$ref: '#/MCPVKConfig'
description: |
When provided, replaces all virtual key assignments for this MCP client.
Each entry specifies a virtual key and the tools it is allowed to call.
To remove all VK access, provide an empty array [].
Omit this field to leave existing VK assignments unchanged.
MCPVKConfig:
type: object
description: Per-virtual-key tool access configuration for an MCP client
required:
- virtual_key_id
- tools_to_execute
properties:
virtual_key_id:
type: string
description: ID of the virtual key
tools_to_execute:
type: array
items:
type: string
description: |
Tools this virtual key is allowed to call on this MCP server.
["*"] => all tools allowed
["tool1", "tool2"] => only the specified tools
MCPClientConfig:
type: object
description: Full MCP client configuration (used in responses)
properties:
client_id:
type: string
description: Unique identifier for the MCP client
name:
type: string
description: Display name for the MCP client
is_code_mode_client:
type: boolean
description: Whether this client is available in code mode
connection_type:
$ref: '#/MCPConnectionType'
connection_string:
type: string
description: HTTP or SSE URL (required for HTTP or SSE connections)
stdio_config:
$ref: '#/MCPStdioConfig'
auth_type:
$ref: '#/MCPAuthType'
description: Authentication type for the MCP connection
oauth_config_id:
type: string
description: |
OAuth config ID for OAuth authentication.
References the oauth_configs table.
Only set when auth_type is "oauth".
headers:
type: object
additionalProperties:
type: string
description: |
Custom headers to include in requests.
Only used when auth_type is "headers".
tools_to_execute:
type: array
items:
type: string
description: |
Include-only list for tools.
["*"] => all tools are included
[] => no tools are included
["tool1", "tool2"] => include only the specified tools
tools_to_auto_execute:
type: array
items:
type: string
description: |
List of tools that can be auto-executed without user approval.
Must be a subset of tools_to_execute.
["*"] => all executable tools can be auto-executed
[] => no tools are auto-executed
["tool1", "tool2"] => only specified tools can be auto-executed
tool_pricing:
type: object
additionalProperties:
type: number
format: double
description: |
Per-tool cost in USD for execution.
Key is the tool name, value is the cost per execution.
Example: {"read_file": 0.001, "write_file": 0.002}
allow_on_all_virtual_keys:
type: boolean
default: false
description: |
When true, this MCP client's tools are accessible to all virtual keys without requiring
explicit per-key assignment. All tools are allowed by default. If a virtual key has an
explicit MCP config for this client, that config takes precedence and overrides this behaviour.
ChatToolFunction:
type: object
description: Tool function definition
properties:
name:
type: string
description:
type: string
parameters:
type: object
additionalProperties: true
strict:
type: boolean
MCPVKConfigResponse:
type: object
description: Per-virtual-key tool access configuration as returned in list/get responses
properties:
virtual_key_id:
type: string
description: ID of the virtual key
virtual_key_name:
type: string
description: Display name of the virtual key
tools_to_execute:
type: array
items:
type: string
description: |
Tools this virtual key is allowed to call on this MCP client.
["*"] => all tools allowed
["tool1", "tool2"] => only the specified tools
MCPClient:
type: object
description: Connected MCP client with its tools
properties:
config:
$ref: '#/MCPClientConfig'
tools:
type: array
items:
$ref: '#/ChatToolFunction'
state:
$ref: '#/MCPConnectionState'
vk_configs:
type: array
items:
$ref: '#/MCPVKConfigResponse'
description: Virtual key assignments for this MCP client
ExecuteToolRequest:
oneOf:
- title: Chat (Default)
description: Chat format - uses ChatAssistantMessageToolCall schema
$ref: '../../schemas/inference/chat.yaml#/ChatAssistantMessageToolCall'
- title: Responses
description: Responses format - uses ResponsesToolMessage schema
$ref: '#/ResponsesToolMessage'
description: |
MCP tool execution request. The schema depends on the `format` query parameter:
- `format=chat` or empty (default): Use `ChatAssistantMessageToolCall` schema
- `format=responses`: Use `ResponsesToolMessage` schema
ExecuteToolResponse:
oneOf:
- title: Chat (Default)
description: Chat format response
$ref: '../../schemas/inference/chat.yaml#/ChatMessage'
- title: Responses
description: Responses format response
$ref: '../../schemas/inference/responses.yaml#/ResponsesMessage'
description: |
MCP tool execution response.
ResponsesToolMessage:
type: object
description: Tool message for Responses API format
required:
- name
properties:
call_id:
type: string
description: Common call ID for tool calls and outputs
name:
type: string
description: Tool function name (required for execution)
arguments:
type: string
description: Tool function arguments as JSON string
output:
type: object
description: Tool execution output
additionalProperties: true
action:
type: object
description: Tool action configuration
additionalProperties: true
error:
type: string
description: Error message if tool execution failed

View File

@@ -0,0 +1,305 @@
# OAuth API schemas
MCPAuthType:
type: string
enum: [none, headers, oauth, per_user_oauth]
description: |
Authentication type for MCP connections:
- none: No authentication
- headers: Header-based authentication (API keys, custom headers, etc.)
- oauth: OAuth 2.0 authentication (shared admin token)
- per_user_oauth: Per-user OAuth 2.1 (each end-user authenticates individually)
OAuthConfigRequest:
type: object
description: OAuth configuration for MCP client creation
properties:
client_id:
type: string
description: |
OAuth client ID. Optional if client supports dynamic client registration (RFC 7591).
If not provided, the server_url must be set for OAuth discovery and dynamic registration.
client_secret:
type: string
description: |
OAuth client secret. Optional for public clients using PKCE or clients obtained via dynamic registration.
authorize_url:
type: string
description: |
OAuth authorization endpoint URL. Optional - will be discovered from server_url if not provided.
token_url:
type: string
description: |
OAuth token endpoint URL. Optional - will be discovered from server_url if not provided.
registration_url:
type: string
description: |
Dynamic client registration endpoint URL (RFC 7591). Optional - will be discovered from server_url if not provided.
scopes:
type: array
items:
type: string
description: |
OAuth scopes requested. Optional - can be discovered from server_url if not provided.
Example: ["read", "write"]
OAuthFlowInitiation:
type: object
description: Response when initiating an OAuth flow
properties:
status:
type: string
enum: [pending_oauth]
message:
type: string
oauth_config_id:
type: string
description: ID of the OAuth config created for this flow
authorize_url:
type: string
description: URL to redirect the user to for authorization
expires_at:
type: string
format: date-time
description: When the OAuth authorization request expires
mcp_client_id:
type: string
description: The MCP client ID that initiated this OAuth flow
OAuthConfigStatus:
type: object
description: Status of an OAuth configuration
properties:
id:
type: string
description: OAuth config ID
status:
type: string
enum: [pending, authorized, failed]
description: |
Current status of the OAuth flow:
- pending: User has not yet authorized
- authorized: User authorized and token is stored
- failed: Authorization failed
created_at:
type: string
format: date-time
description: When this OAuth config was created
expires_at:
type: string
format: date-time
description: When this OAuth config expires (becomes invalid if not completed)
token_id:
type: string
description: ID of the associated OAuth token (only present if status is authorized)
token_expires_at:
type: string
format: date-time
description: When the OAuth access token expires (only present if status is authorized)
token_scopes:
type: array
items:
type: string
description: Scopes granted in the OAuth token (only present if status is authorized)
OAuthToken:
type: object
description: OAuth access and refresh tokens
properties:
id:
type: string
description: Unique token identifier
access_token:
type: string
description: OAuth access token
refresh_token:
type: string
description: OAuth refresh token for obtaining new access tokens
token_type:
type: string
description: Token type (typically "Bearer")
expires_at:
type: string
format: date-time
description: When the access token expires
scopes:
type: array
items:
type: string
description: Scopes granted in this token
last_refreshed_at:
type: string
format: date-time
description: When the token was last refreshed
# Per-User OAuth 2.1 Authorization Server schemas
PerUserOAuthClientRegistrationRequest:
type: object
description: |
Dynamic Client Registration request per RFC 7591.
MCP clients (Claude Code, Cursor, etc.) call this to obtain a client_id
before initiating the authorization flow.
required:
- redirect_uris
properties:
client_name:
type: string
description: Human-readable name of the client application
example: Claude Code
redirect_uris:
type: array
items:
type: string
description: List of allowed redirect URIs for this client
example: ["http://localhost:54321/callback"]
grant_types:
type: array
items:
type: string
description: Supported grant types. Defaults to ["authorization_code"]
example: ["authorization_code"]
response_types:
type: array
items:
type: string
description: Supported response types
example: ["code"]
token_endpoint_auth_method:
type: string
description: Token endpoint authentication method. Always "none" (public client)
example: none
scope:
type: string
description: Space-separated list of requested scopes
example: "mcp:read mcp:write"
PerUserOAuthClientRegistrationResponse:
type: object
description: Dynamic Client Registration response per RFC 7591
properties:
client_id:
type: string
description: Issued client identifier
example: "550e8400-e29b-41d4-a716-446655440000"
client_name:
type: string
description: Human-readable name of the client application
redirect_uris:
type: array
items:
type: string
description: Registered redirect URIs
grant_types:
type: array
items:
type: string
description: Registered grant types
response_types:
type: array
items:
type: string
description: Registered response types
token_endpoint_auth_method:
type: string
description: Token endpoint authentication method (always "none")
PerUserOAuthTokenResponse:
type: object
description: OAuth 2.1 token response from the token endpoint
properties:
access_token:
type: string
description: Bifrost-issued access token (24h TTL). Use as Bearer token on /mcp requests.
token_type:
type: string
description: Token type, always "Bearer"
example: Bearer
expires_in:
type: integer
description: Seconds until the access token expires (86400 for 24h)
example: 86400
scope:
type: string
description: Space-separated scopes granted
ProtectedResourceMetadata:
type: object
description: |
OAuth 2.0 Protected Resource Metadata per RFC 9728.
Returned by /.well-known/oauth-protected-resource to tell MCP clients
which authorization server(s) protect the /mcp endpoint.
properties:
resource:
type: string
description: URL of the protected resource (Bifrost's /mcp endpoint)
example: "https://your-bifrost-domain.com/mcp"
authorization_servers:
type: array
items:
type: string
description: List of authorization server issuer URLs
example: ["https://your-bifrost-domain.com"]
scopes_supported:
type: array
items:
type: string
description: Scopes supported by this resource
example: ["mcp:read", "mcp:write"]
bearer_methods_supported:
type: array
items:
type: string
description: Supported methods for passing Bearer tokens
example: ["header"]
AuthorizationServerMetadata:
type: object
description: |
OAuth 2.0 Authorization Server Metadata per RFC 8414.
Returned by /.well-known/oauth-authorization-server to let MCP clients
discover Bifrost's OAuth endpoints and capabilities.
properties:
issuer:
type: string
description: Authorization server issuer URL (Bifrost base URL)
example: "https://your-bifrost-domain.com"
authorization_endpoint:
type: string
description: Authorization endpoint URL
example: "https://your-bifrost-domain.com/api/oauth/per-user/authorize"
token_endpoint:
type: string
description: Token endpoint URL
example: "https://your-bifrost-domain.com/api/oauth/per-user/token"
registration_endpoint:
type: string
description: Dynamic client registration endpoint URL
example: "https://your-bifrost-domain.com/api/oauth/per-user/register"
response_types_supported:
type: array
items:
type: string
example: ["code"]
grant_types_supported:
type: array
items:
type: string
example: ["authorization_code"]
code_challenge_methods_supported:
type: array
items:
type: string
description: Supported PKCE methods (only S256)
example: ["S256"]
token_endpoint_auth_methods_supported:
type: array
items:
type: string
description: Supported token endpoint auth methods (public clients only)
example: ["none"]
scopes_supported:
type: array
items:
type: string
example: ["mcp:read", "mcp:write"]

View File

@@ -0,0 +1,131 @@
# Plugins API schemas
PluginStatus:
type: object
description: Plugin status information
properties:
name:
type: string
description: Display name of the plugin
status:
type: string
enum: [active, error, disabled, loading, uninitialized, unloaded, loaded]
logs:
type: array
items:
type: string
types:
type: array
description: Plugin types indicating which interfaces the plugin implements
items:
type: string
enum: [llm, mcp, http, observability]
example:
name: my_custom_plugin
status: active
logs:
- "plugin my_custom_plugin initialized successfully"
types:
- llm
- http
Plugin:
type: object
description: Plugin configuration
properties:
id:
type: integer
description: Plugin ID (auto-generated)
name:
type: string
description: Display name of the plugin (from config)
actualName:
type: string
description: Actual plugin name from GetName() (used as map key in plugin status). Only populated for active plugins.
enabled:
type: boolean
config:
type: object
additionalProperties: true
isCustom:
type: boolean
path:
type: string
status:
$ref: '#/PluginStatus'
description: Current plugin status including types array (only populated for active plugins)
created_at:
type: string
format: date-time
version:
type: integer
format: int16
updated_at:
type: string
format: date-time
config_hash:
type: string
example:
name: my_custom_plugin
actualName: MyCustomPlugin
enabled: true
config:
api_key: "xxx"
isCustom: true
path: "/plugins/my_custom_plugin.so"
status:
name: my_custom_plugin
status: active
logs:
- "plugin my_custom_plugin initialized successfully"
types:
- llm
- http
ListPluginsResponse:
type: object
description: List plugins response
properties:
plugins:
type: array
items:
$ref: '#/Plugin'
count:
type: integer
CreatePluginRequest:
type: object
description: Create plugin request
required:
- name
properties:
name:
type: string
enabled:
type: boolean
config:
type: object
additionalProperties: true
path:
type: string
UpdatePluginRequest:
type: object
description: Update plugin request
properties:
enabled:
type: boolean
config:
type: object
additionalProperties: true
path:
type: string
PluginResponse:
type: object
description: Plugin operation response
properties:
message:
type: string
plugin:
$ref: '#/Plugin'

View File

@@ -0,0 +1,276 @@
# Prompt Repository schemas
Folder:
type: object
description: Prompt folder
properties:
id:
type: string
description: Unique folder ID (UUID)
name:
type: string
description:
type: string
nullable: true
created_at:
type: string
format: date-time
updated_at:
type: string
format: date-time
prompts_count:
type: integer
description: Number of prompts in this folder (virtual field)
Prompt:
type: object
description: Prompt
properties:
id:
type: string
description: Unique prompt ID (UUID)
name:
type: string
folder_id:
type: string
nullable: true
folder:
$ref: '#/Folder'
created_at:
type: string
format: date-time
updated_at:
type: string
format: date-time
versions:
type: array
items:
$ref: '#/PromptVersion'
sessions:
type: array
items:
$ref: '#/PromptSession'
latest_version:
$ref: '#/PromptVersion'
PromptVersion:
type: object
description: Prompt version (immutable snapshot)
properties:
id:
type: integer
description: Auto-increment version ID
prompt_id:
type: string
version_number:
type: integer
commit_message:
type: string
model_params:
type: object
additionalProperties: true
description: Model parameters (flexible JSON object)
provider:
type: string
model:
type: string
is_latest:
type: boolean
created_at:
type: string
format: date-time
messages:
type: array
items:
$ref: '#/PromptVersionMessage'
PromptVersionMessage:
type: object
description: Message within a prompt version
properties:
id:
type: integer
order_index:
type: integer
message:
type: object
additionalProperties: true
description: Opaque message content (JSON)
PromptSession:
type: object
description: Prompt playground session
properties:
id:
type: integer
description: Auto-increment session ID
prompt_id:
type: string
version_id:
type: integer
nullable: true
description: Version this session was forked from
name:
type: string
model_params:
type: object
additionalProperties: true
description: Model parameters (flexible JSON object)
provider:
type: string
model:
type: string
created_at:
type: string
format: date-time
updated_at:
type: string
format: date-time
messages:
type: array
items:
$ref: '#/PromptSessionMessage'
PromptSessionMessage:
type: object
description: Message within a prompt session
properties:
id:
type: integer
order_index:
type: integer
message:
type: object
additionalProperties: true
description: Opaque message content (JSON)
# Request schemas
CreateFolderRequest:
type: object
required:
- name
properties:
name:
type: string
description:
type: string
UpdateFolderRequest:
type: object
properties:
name:
type: string
description:
type: string
nullable: true
CreatePromptRequest:
type: object
required:
- name
properties:
name:
type: string
folder_id:
type: string
UpdatePromptRequest:
type: object
properties:
name:
type: string
folder_id:
type: string
nullable: true
CreateVersionRequest:
type: object
required:
- commit_message
- messages
- model_params
- provider
- model
properties:
commit_message:
type: string
messages:
type: array
items:
type: object
additionalProperties: true
description: Array of message objects
model_params:
type: object
additionalProperties: true
provider:
type: string
model:
type: string
CreateSessionRequest:
type: object
required:
- name
- model_params
- provider
- model
properties:
name:
type: string
version_id:
type: integer
description: Fork from this version
messages:
type: array
items:
type: object
additionalProperties: true
model_params:
type: object
additionalProperties: true
provider:
type: string
model:
type: string
UpdateSessionRequest:
type: object
required:
- name
- messages
- model_params
- provider
- model
properties:
name:
type: string
messages:
type: array
items:
type: object
additionalProperties: true
model_params:
type: object
additionalProperties: true
provider:
type: string
model:
type: string
RenameSessionRequest:
type: object
required:
- name
properties:
name:
type: string
CommitSessionRequest:
type: object
required:
- commit_message
properties:
commit_message:
type: string

View File

@@ -0,0 +1,491 @@
# Providers API schemas
ProviderStatus:
type: string
enum: [active, error, deleted]
description: Status of the provider
NetworkConfig:
type: object
description: Network configuration for provider connections
properties:
base_url:
type: string
description: Base URL for the provider (optional)
extra_headers:
type: object
additionalProperties:
type: string
description: Additional headers to include in requests
default_request_timeout_in_seconds:
type: integer
description: Default timeout for requests
max_retries:
type: integer
description: Maximum number of retries
retry_backoff_initial:
type: integer
format: int64
description: Initial backoff duration in milliseconds
retry_backoff_max:
type: integer
format: int64
description: Maximum backoff duration in milliseconds
insecure_skip_verify:
type: boolean
description: Disable TLS certificate verification for provider connections. This bypasses server certificate validation and should be used only as a last resort when a trusted CA chain cannot be configured. Prefer ca_cert_pem for self-signed or private CA deployments.
ca_cert_pem:
type: string
description: PEM-encoded CA certificate to trust for provider endpoint connections (e.g. self-signed or internal CA)
ConcurrencyAndBufferSize:
type: object
description: Concurrency settings
properties:
concurrency:
type: integer
description: Number of concurrent operations
buffer_size:
type: integer
description: Size of the buffer
ProxyConfig:
type: object
description: Proxy configuration
properties:
type:
type: string
enum: [none, http, socks5, environment]
url:
type: string
username:
type: string
password:
type: string
ca_cert_pem:
type: string
AzureKeyConfig:
type: object
description: Azure-specific key configuration
properties:
endpoint:
$ref: '../../schemas/management/common.yaml#/EnvVar'
api_version:
$ref: '../../schemas/management/common.yaml#/EnvVar'
client_id:
$ref: '../../schemas/management/common.yaml#/EnvVar'
client_secret:
$ref: '../../schemas/management/common.yaml#/EnvVar'
tenant_id:
$ref: '../../schemas/management/common.yaml#/EnvVar'
scopes:
type: array
items:
type: string
description: List of scopes to use for authentication
VertexKeyConfig:
type: object
description: Vertex-specific key configuration
properties:
project_id:
$ref: '../../schemas/management/common.yaml#/EnvVar'
project_number:
$ref: '../../schemas/management/common.yaml#/EnvVar'
region:
$ref: '../../schemas/management/common.yaml#/EnvVar'
auth_credentials:
$ref: '../../schemas/management/common.yaml#/EnvVar'
BedrockKeyConfig:
type: object
description: AWS Bedrock-specific key configuration
properties:
access_key:
$ref: '../../schemas/management/common.yaml#/EnvVar'
secret_key:
$ref: '../../schemas/management/common.yaml#/EnvVar'
session_token:
$ref: '../../schemas/management/common.yaml#/EnvVar'
region:
$ref: '../../schemas/management/common.yaml#/EnvVar'
arn:
$ref: '../../schemas/management/common.yaml#/EnvVar'
batch_s3_config:
type: object
properties:
buckets:
type: array
items:
type: object
properties:
bucket_name:
type: string
prefix:
type: string
is_default:
type: boolean
VllmKeyConfig:
type: object
description: VLLM-specific key configuration
properties:
url:
$ref: '../../schemas/management/common.yaml#/EnvVar'
model_name:
type: string
required:
- url
OllamaKeyConfig:
type: object
description: Ollama-specific key configuration
properties:
url:
$ref: '../../schemas/management/common.yaml#/EnvVar'
required:
- url
ReplicateKeyConfig:
type: object
description: Replicate-specific key configuration
properties:
use_deployments_endpoint:
type: boolean
description: Whether to use the deployments endpoint instead of the models endpoint
SglKeyConfig:
type: object
description: SGLang-specific key configuration
properties:
url:
$ref: '../../schemas/management/common.yaml#/EnvVar'
required:
- url
VLLMKeyConfig:
type: object
description: vLLM-specific key configuration for per-key routing to different vLLM instances
properties:
url:
$ref: '../../schemas/management/common.yaml#/EnvVar'
description: vLLM server base URL (required)
model_name:
type: string
description: Exact model name served on this vLLM instance
VLLMKeyConfig:
type: object
description: vLLM-specific key configuration for per-key routing to different vLLM instances
properties:
url:
$ref: '../../schemas/management/common.yaml#/EnvVar'
description: vLLM server base URL (required)
model_name:
type: string
description: Exact model name served on this vLLM instance
Key:
type: object
description: API key configuration
properties:
id:
type: string
description: Unique identifier for the key
name:
type: string
description: Name of the key
value:
$ref: '../../schemas/management/common.yaml#/EnvVar'
description: API key value (redacted in responses)
models:
type: array
items:
type: string
description: List of models this key can access (whitelist)
blacklisted_models:
type: array
items:
type: string
description: List of models this key cannot access (blacklist)
weight:
type: number
description: Weight for load balancing
aliases:
type: object
propertyNames:
minLength: 1
additionalProperties:
type: string
minLength: 1
description: Model alias mappings — maps a user-facing model name to a provider-specific identifier (deployment name, inference profile ID, fine-tuned model ID, etc.)
azure_key_config:
$ref: '#/AzureKeyConfig'
vertex_key_config:
$ref: '#/VertexKeyConfig'
bedrock_key_config:
$ref: '#/BedrockKeyConfig'
vllm_key_config:
$ref: '#/VllmKeyConfig'
ollama_key_config:
$ref: '#/OllamaKeyConfig'
sgl_key_config:
$ref: '#/SglKeyConfig'
replicate_key_config:
$ref: '#/ReplicateKeyConfig'
enabled:
type: boolean
description: Whether the key is active (defaults to true)
use_for_batch_api:
type: boolean
description: Whether this key can be used for batch API operations
config_hash:
type: string
description: Hash of config.json version, used for change detection
status:
type: string
description: Status of key (e.g., success, list_models_failed)
description:
type: string
description: Error or status description for the key
AllowedRequests:
type: object
description: Allowed request types for custom providers
properties:
list_models:
type: boolean
text_completion:
type: boolean
text_completion_stream:
type: boolean
chat_completion:
type: boolean
chat_completion_stream:
type: boolean
responses:
type: boolean
responses_stream:
type: boolean
count_tokens:
type: boolean
embedding:
type: boolean
speech:
type: boolean
speech_stream:
type: boolean
transcription:
type: boolean
transcription_stream:
type: boolean
image_generation:
type: boolean
image_generation_stream:
type: boolean
batch_create:
type: boolean
batch_list:
type: boolean
batch_retrieve:
type: boolean
batch_cancel:
type: boolean
batch_results:
type: boolean
file_upload:
type: boolean
file_list:
type: boolean
file_retrieve:
type: boolean
file_delete:
type: boolean
file_content:
type: boolean
CustomProviderConfig:
type: object
description: Custom provider configuration
properties:
is_key_less:
type: boolean
base_provider_type:
$ref: '../../schemas/inference/common.yaml#/ModelProvider'
allowed_requests:
$ref: '#/AllowedRequests'
request_path_overrides:
type: object
additionalProperties:
type: string
ProviderResponse:
type: object
description: Provider configuration response
properties:
name:
$ref: '../../schemas/inference/common.yaml#/ModelProvider'
network_config:
$ref: '#/NetworkConfig'
concurrency_and_buffer_size:
$ref: '#/ConcurrencyAndBufferSize'
proxy_config:
$ref: '#/ProxyConfig'
send_back_raw_request:
type: boolean
send_back_raw_response:
type: boolean
store_raw_request_response:
type: boolean
custom_provider_config:
$ref: '#/CustomProviderConfig'
provider_status:
$ref: '#/ProviderStatus'
status:
type: string
description: Operational status (e.g., list_models_failed)
description:
type: string
description: Error/status description
config_hash:
type: string
description: Hash of config.json version, used for change detection
ListProvidersResponse:
type: object
description: List providers response
properties:
providers:
type: array
items:
$ref: '#/ProviderResponse'
total:
type: integer
AddProviderRequest:
type: object
description: Add provider request. Keys are managed separately via /api/providers/{provider}/keys.
required:
- provider
properties:
provider:
$ref: '../../schemas/inference/common.yaml#/ModelProvider'
network_config:
$ref: '#/NetworkConfig'
concurrency_and_buffer_size:
$ref: '#/ConcurrencyAndBufferSize'
proxy_config:
$ref: '#/ProxyConfig'
send_back_raw_request:
type: boolean
send_back_raw_response:
type: boolean
store_raw_request_response:
type: boolean
custom_provider_config:
$ref: '#/CustomProviderConfig'
UpdateProviderRequest:
type: object
description: Update provider request. Keys are managed separately via /api/providers/{provider}/keys.
properties:
network_config:
$ref: '#/NetworkConfig'
concurrency_and_buffer_size:
$ref: '#/ConcurrencyAndBufferSize'
proxy_config:
$ref: '#/ProxyConfig'
send_back_raw_request:
type: boolean
send_back_raw_response:
type: boolean
store_raw_request_response:
type: boolean
custom_provider_config:
$ref: '#/CustomProviderConfig'
ListProviderKeysResponse:
type: object
description: Response for listing keys for a provider
properties:
keys:
type: array
items:
$ref: '#/Key'
total:
type: integer
ModelResponse:
type: object
description: Model information
properties:
name:
type: string
provider:
type: string
accessible_by_keys:
type: array
items:
type: string
Architecture:
type: object
properties:
modality:
type: string
tokenizer:
type: string
instruct_type:
type: string
input_modalities:
type: array
items:
type: string
output_modalities:
type: array
items:
type: string
ModelDetailsResponse:
type: object
description: Model details with capability metadata
properties:
name:
type: string
provider:
type: string
context_length:
type: integer
max_input_tokens:
type: integer
max_output_tokens:
type: integer
architecture:
$ref: '#/Architecture'
accessible_by_keys:
type: array
items:
type: string
ListModelsResponse:
type: object
description: List models response
properties:
models:
type: array
items:
$ref: '#/ModelResponse'
total:
type: integer
ListModelDetailsResponse:
type: object
description: List model details response
properties:
models:
type: array
items:
$ref: '#/ModelDetailsResponse'
total:
type: integer

View File

@@ -0,0 +1,41 @@
# Session API schemas
LoginRequest:
type: object
description: Login request
required:
- username
- password
properties:
username:
type: string
password:
type: string
LoginResponse:
type: object
description: Login response
properties:
message:
type: string
example: Login successful
token:
type: string
description: Session token
IsAuthEnabledResponse:
type: object
description: Auth enabled status response
properties:
is_auth_enabled:
type: boolean
has_valid_token:
type: boolean
LogoutResponse:
type: object
description: Logout response
properties:
message:
type: string
example: Logout successful

View File

@@ -0,0 +1,295 @@
UserObject:
type: object
properties:
id:
type: string
description: Unique user identifier
name:
type: string
description: User's display name
email:
type: string
format: email
description: User's email address
role_id:
type: integer
nullable: true
description: ID of the assigned RBAC role
role:
type: object
nullable: true
description: RBAC role details
properties:
id:
type: integer
name:
type: string
description:
type: string
is_system_role:
type: boolean
created_at:
type: string
format: date-time
updated_at:
type: string
format: date-time
teams:
type: array
description: Teams the user belongs to.
items:
$ref: '#/UserTeamSummaryEntry'
access_profile:
$ref: '#/AccessProfile'
CreateUserRequest:
type: object
required:
- name
- email
properties:
name:
type: string
description: User's display name
email:
type: string
format: email
pattern: '^[^\s@]+@[^\s@]+\.[^\s@]+$'
description: User's email address (must be unique)
role_id:
type: integer
description: Optional RBAC role ID to assign
UserResponse:
type: object
properties:
user:
$ref: '#/UserObject'
ListUsersResponse:
type: object
properties:
users:
type: array
items:
$ref: '#/UserObject'
total:
type: integer
description: Total number of users matching the query
page:
type: integer
description: Current page number
limit:
type: integer
description: Number of users per page
total_pages:
type: integer
description: Total number of pages
has_more:
type: boolean
description: Whether more pages are available
# ---- User Permissions ----
PermissionsResponse:
type: object
properties:
permissions:
type: object
description: >
Map of resource names to their permitted operations.
When SCIM is disabled, returns full permissions for all resources.
additionalProperties:
type: object
additionalProperties:
type: boolean
# ---- User Role ----
AssignUserRoleRequest:
type: object
required:
- role_id
properties:
role_id:
type: integer
description: ID of the RBAC role to assign
# ---- User Teams ----
UserTeamSummaryEntry:
type: object
properties:
id:
type: string
description: Team ID
name:
type: string
description: Team name
business_unit_id:
type: string
nullable: true
description: Business unit ID associated with this team (if any)
business_unit_name:
type: string
nullable: true
description: Business unit name associated with this team (if any)
UserTeamEntry:
type: object
properties:
id:
type: string
description: Team ID
name:
type: string
description: Team name
source:
type: string
description: How the user was added to this team (e.g. "manual", "scim_sync")
AccessProfile:
type: object
nullable: true
description: Active or fallback user access profile, if assigned.
properties:
id:
type: integer
user_id:
type: string
parent_profile_id:
type: integer
nullable: true
name:
type: string
is_active:
type: boolean
expires_at:
type: string
format: date-time
nullable: true
created_at:
type: string
format: date-time
updated_at:
type: string
format: date-time
UserTeamsResponse:
type: object
properties:
teams:
type: array
items:
$ref: '#/UserTeamEntry'
UpdateUserTeamsRequest:
type: object
required:
- team_ids
properties:
team_ids:
type: array
items:
type: string
description: List of team IDs to assign (replaces existing manual assignments; synced memberships are preserved)
# ---- Teams ----
TeamObject:
type: object
properties:
id:
type: string
description: Team ID (derived from name)
name:
type: string
description: Team name
member_count:
type: integer
description: Number of members in the team
virtual_key_count:
type: integer
description: Number of virtual keys assigned to the team
created_at:
type: string
format: date-time
updated_at:
type: string
format: date-time
CreateTeamRequest:
type: object
required:
- name
properties:
name:
type: string
description: Team name (must be unique)
UpdateTeamRequest:
type: object
properties:
description:
type: string
description: Updated team description
CreateTeamResponse:
type: object
properties:
id:
type: string
name:
type: string
ListTeamsResponse:
type: object
properties:
teams:
type: array
items:
$ref: '#/TeamObject'
total:
type: integer
page:
type: integer
limit:
type: integer
total_pages:
type: integer
description: Total number of pages
has_more:
type: boolean
description: Whether more pages are available
# ---- Team Members ----
TeamMemberObject:
type: object
properties:
user_id:
type: string
user_name:
type: string
user_email:
type: string
source:
type: string
description: How the member was added (e.g. "manual", "scim_sync")
TeamMembersResponse:
type: object
properties:
members:
type: array
items:
$ref: '#/TeamMemberObject'
AddTeamMemberRequest:
type: object
required:
- user_id
properties:
user_id:
type: string
description: ID of the user to add to the team