1315 lines
31 KiB
YAML
1315 lines
31 KiB
YAML
# Governance API schemas
|
|
|
|
Budget:
|
|
type: object
|
|
description: Budget configuration
|
|
properties:
|
|
id:
|
|
type: string
|
|
max_limit:
|
|
type: number
|
|
description: Maximum budget in dollars
|
|
reset_duration:
|
|
type: string
|
|
description: Reset duration (e.g., "30s", "5m", "1h", "1d", "1w", "1M")
|
|
calendar_aligned:
|
|
type: boolean
|
|
description: When true, resets align to calendar period boundaries in UTC (not rolling from last reset)
|
|
default: false
|
|
last_reset:
|
|
type: string
|
|
format: date-time
|
|
current_usage:
|
|
type: number
|
|
config_hash:
|
|
type: string
|
|
nullable: true
|
|
created_at:
|
|
type: string
|
|
format: date-time
|
|
updated_at:
|
|
type: string
|
|
format: date-time
|
|
|
|
RateLimit:
|
|
type: object
|
|
description: Rate limit configuration
|
|
properties:
|
|
id:
|
|
type: string
|
|
token_max_limit:
|
|
type: integer
|
|
format: int64
|
|
token_reset_duration:
|
|
type: string
|
|
token_current_usage:
|
|
type: integer
|
|
format: int64
|
|
token_last_reset:
|
|
type: string
|
|
format: date-time
|
|
request_max_limit:
|
|
type: integer
|
|
format: int64
|
|
nullable: true
|
|
request_reset_duration:
|
|
type: string
|
|
nullable: true
|
|
request_current_usage:
|
|
type: integer
|
|
format: int64
|
|
request_last_reset:
|
|
type: string
|
|
format: date-time
|
|
config_hash:
|
|
type: string
|
|
nullable: true
|
|
created_at:
|
|
type: string
|
|
format: date-time
|
|
updated_at:
|
|
type: string
|
|
format: date-time
|
|
|
|
CreateBudgetRequest:
|
|
type: object
|
|
description: Create budget request
|
|
required:
|
|
- max_limit
|
|
- reset_duration
|
|
properties:
|
|
max_limit:
|
|
type: number
|
|
reset_duration:
|
|
type: string
|
|
calendar_aligned:
|
|
type: boolean
|
|
default: false
|
|
description: >
|
|
When true, usage resets at the start of each calendar period in UTC instead of on a rolling window
|
|
from last reset. Only valid with reset durations that use day, week, month, or year suffixes
|
|
(`d`, `w`, `M`, `Y`). For example `1d` resets at midnight UTC; `1w` at Monday 00:00 UTC;
|
|
`1M` on the first day of each month; `1Y` on January 1. Sub-day durations (e.g. `1h`) cannot use
|
|
calendar alignment.
|
|
|
|
UpdateBudgetRequest:
|
|
type: object
|
|
description: Update budget request
|
|
properties:
|
|
max_limit:
|
|
type: number
|
|
reset_duration:
|
|
type: string
|
|
calendar_aligned:
|
|
type: boolean
|
|
nullable: true
|
|
description: >
|
|
Set to true or false to enable or disable calendar-aligned resets. Only valid with reset durations
|
|
that use day, week, month, or year suffixes (`d`, `w`, `M`, `Y`); sub-day durations (e.g. `1h`,
|
|
`30m`) are invalid with calendar alignment and the API rejects that combination. When enabling on
|
|
an existing budget, current usage is reset to zero and last_reset snaps to the current period start.
|
|
|
|
CreateRateLimitRequest:
|
|
type: object
|
|
description: Create rate limit request
|
|
properties:
|
|
token_max_limit:
|
|
type: integer
|
|
format: int64
|
|
token_reset_duration:
|
|
type: string
|
|
request_max_limit:
|
|
type: integer
|
|
format: int64
|
|
request_reset_duration:
|
|
type: string
|
|
|
|
UpdateRateLimitRequest:
|
|
type: object
|
|
description: Update rate limit request
|
|
properties:
|
|
token_max_limit:
|
|
type: integer
|
|
format: int64
|
|
token_reset_duration:
|
|
type: string
|
|
request_max_limit:
|
|
type: integer
|
|
format: int64
|
|
request_reset_duration:
|
|
type: string
|
|
|
|
VirtualKeyProviderConfig:
|
|
type: object
|
|
description: Provider configuration for a virtual key
|
|
properties:
|
|
id:
|
|
type: integer
|
|
virtual_key_id:
|
|
type: string
|
|
provider:
|
|
type: string
|
|
weight:
|
|
type: number
|
|
nullable: true
|
|
description: Weight for provider load balancing. Null means excluded from weighted routing.
|
|
allowed_models:
|
|
type: array
|
|
items:
|
|
type: string
|
|
budget_id:
|
|
type: string
|
|
rate_limit_id:
|
|
type: string
|
|
budget:
|
|
$ref: '#/Budget'
|
|
rate_limit:
|
|
$ref: '#/RateLimit'
|
|
keys:
|
|
type: array
|
|
items:
|
|
$ref: '#/TableKey'
|
|
|
|
VirtualKeyMCPConfig:
|
|
type: object
|
|
description: MCP configuration for a virtual key
|
|
properties:
|
|
id:
|
|
type: integer
|
|
mcp_client_name:
|
|
type: string
|
|
tools_to_execute:
|
|
type: array
|
|
items:
|
|
type: string
|
|
|
|
VirtualKey:
|
|
type: object
|
|
description: Virtual key configuration
|
|
properties:
|
|
id:
|
|
type: string
|
|
name:
|
|
type: string
|
|
value:
|
|
type: string
|
|
description:
|
|
type: string
|
|
is_active:
|
|
type: boolean
|
|
provider_configs:
|
|
type: array
|
|
items:
|
|
$ref: '#/VirtualKeyProviderConfig'
|
|
mcp_configs:
|
|
type: array
|
|
items:
|
|
$ref: '#/VirtualKeyMCPConfig'
|
|
|
|
CreateVirtualKeyRequest:
|
|
type: object
|
|
description: Create virtual key request
|
|
required:
|
|
- name
|
|
properties:
|
|
name:
|
|
type: string
|
|
description:
|
|
type: string
|
|
provider_configs:
|
|
type: array
|
|
description: Provider configurations (empty means no providers allowed, deny-by-default)
|
|
items:
|
|
type: object
|
|
properties:
|
|
provider:
|
|
type: string
|
|
weight:
|
|
type: number
|
|
nullable: true
|
|
description: Weight for load balancing. Null means excluded from weighted routing.
|
|
allowed_models:
|
|
type: array
|
|
items:
|
|
type: string
|
|
budget:
|
|
$ref: '#/CreateBudgetRequest'
|
|
rate_limit:
|
|
$ref: '#/CreateRateLimitRequest'
|
|
key_ids:
|
|
type: array
|
|
items:
|
|
type: string
|
|
mcp_configs:
|
|
type: array
|
|
description: MCP configurations (empty means no MCP tools allowed, deny-by-default)
|
|
items:
|
|
type: object
|
|
properties:
|
|
mcp_client_name:
|
|
type: string
|
|
tools_to_execute:
|
|
type: array
|
|
items:
|
|
type: string
|
|
team_id:
|
|
type: string
|
|
customer_id:
|
|
type: string
|
|
budget:
|
|
$ref: '#/CreateBudgetRequest'
|
|
rate_limit:
|
|
$ref: '#/CreateRateLimitRequest'
|
|
is_active:
|
|
type: boolean
|
|
|
|
UpdateVirtualKeyRequest:
|
|
type: object
|
|
description: Update virtual key request
|
|
properties:
|
|
name:
|
|
type: string
|
|
description:
|
|
type: string
|
|
provider_configs:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: integer
|
|
provider:
|
|
type: string
|
|
weight:
|
|
type: number
|
|
nullable: true
|
|
description: Weight for load balancing. Null means excluded from weighted routing.
|
|
allowed_models:
|
|
type: array
|
|
items:
|
|
type: string
|
|
budget:
|
|
$ref: '#/UpdateBudgetRequest'
|
|
rate_limit:
|
|
$ref: '#/UpdateRateLimitRequest'
|
|
key_ids:
|
|
type: array
|
|
items:
|
|
type: string
|
|
mcp_configs:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: integer
|
|
mcp_client_name:
|
|
type: string
|
|
tools_to_execute:
|
|
type: array
|
|
items:
|
|
type: string
|
|
team_id:
|
|
type: string
|
|
customer_id:
|
|
type: string
|
|
budget:
|
|
$ref: '#/UpdateBudgetRequest'
|
|
rate_limit:
|
|
$ref: '#/UpdateRateLimitRequest'
|
|
is_active:
|
|
type: boolean
|
|
|
|
ListVirtualKeysResponse:
|
|
type: object
|
|
description: List virtual keys response
|
|
properties:
|
|
virtual_keys:
|
|
type: array
|
|
items:
|
|
$ref: '#/VirtualKey'
|
|
count:
|
|
type: integer
|
|
|
|
VirtualKeyQuotaResponse:
|
|
type: object
|
|
description: Virtual key quota response (self-service, no admin auth required)
|
|
properties:
|
|
virtual_key_name:
|
|
type: string
|
|
description: Name of the virtual key
|
|
is_active:
|
|
type: boolean
|
|
description: Whether the virtual key is active
|
|
budgets:
|
|
type: array
|
|
description: Budget quotas assigned to this virtual key
|
|
items:
|
|
$ref: '#/Budget'
|
|
rate_limit:
|
|
$ref: '#/RateLimit'
|
|
|
|
VirtualKeyResponse:
|
|
type: object
|
|
description: Virtual key operation response
|
|
properties:
|
|
message:
|
|
type: string
|
|
virtual_key:
|
|
$ref: '#/VirtualKey'
|
|
|
|
Team:
|
|
type: object
|
|
description: Team configuration
|
|
properties:
|
|
id:
|
|
type: string
|
|
name:
|
|
type: string
|
|
customer_id:
|
|
type: string
|
|
budget_id:
|
|
type: string
|
|
customer:
|
|
$ref: '#/Customer'
|
|
budget:
|
|
$ref: '#/Budget'
|
|
virtual_keys:
|
|
type: array
|
|
nullable: true
|
|
description: >
|
|
Virtual keys assigned to this team. This field may be omitted or returned as null in some
|
|
responses (for example, when a team is embedded inside a virtual-key response) to avoid
|
|
nested `virtual_keys` recursion.
|
|
items:
|
|
$ref: '#/VirtualKey'
|
|
profile:
|
|
type: object
|
|
additionalProperties: true
|
|
config:
|
|
type: object
|
|
additionalProperties: true
|
|
claims:
|
|
type: object
|
|
additionalProperties: true
|
|
config_hash:
|
|
type: string
|
|
nullable: true
|
|
created_at:
|
|
type: string
|
|
format: date-time
|
|
updated_at:
|
|
type: string
|
|
format: date-time
|
|
|
|
CreateTeamRequest:
|
|
type: object
|
|
description: Create team request
|
|
required:
|
|
- name
|
|
properties:
|
|
name:
|
|
type: string
|
|
customer_id:
|
|
type: string
|
|
budget:
|
|
$ref: '#/CreateBudgetRequest'
|
|
|
|
UpdateTeamRequest:
|
|
type: object
|
|
description: Update team request
|
|
properties:
|
|
name:
|
|
type: string
|
|
customer_id:
|
|
type: string
|
|
budget:
|
|
$ref: '#/UpdateBudgetRequest'
|
|
|
|
ListTeamsResponse:
|
|
type: object
|
|
description: List teams response
|
|
properties:
|
|
teams:
|
|
type: array
|
|
items:
|
|
$ref: '#/Team'
|
|
count:
|
|
type: integer
|
|
|
|
TeamResponse:
|
|
type: object
|
|
description: Team operation response
|
|
properties:
|
|
message:
|
|
type: string
|
|
team:
|
|
$ref: '#/Team'
|
|
|
|
Customer:
|
|
type: object
|
|
description: Customer configuration
|
|
properties:
|
|
id:
|
|
type: string
|
|
name:
|
|
type: string
|
|
budget_id:
|
|
type: string
|
|
budget:
|
|
$ref: '#/Budget'
|
|
teams:
|
|
type: array
|
|
items:
|
|
$ref: '#/Team'
|
|
virtual_keys:
|
|
type: array
|
|
items:
|
|
$ref: '#/VirtualKey'
|
|
config_hash:
|
|
type: string
|
|
created_at:
|
|
type: string
|
|
format: date-time
|
|
updated_at:
|
|
type: string
|
|
format: date-time
|
|
|
|
CreateCustomerRequest:
|
|
type: object
|
|
description: Create customer request
|
|
required:
|
|
- name
|
|
properties:
|
|
name:
|
|
type: string
|
|
budget:
|
|
$ref: '#/CreateBudgetRequest'
|
|
|
|
UpdateCustomerRequest:
|
|
type: object
|
|
description: Update customer request
|
|
properties:
|
|
name:
|
|
type: string
|
|
budget:
|
|
$ref: '#/UpdateBudgetRequest'
|
|
|
|
ListCustomersResponse:
|
|
type: object
|
|
description: List customers response
|
|
properties:
|
|
customers:
|
|
type: array
|
|
items:
|
|
$ref: '#/Customer'
|
|
count:
|
|
type: integer
|
|
|
|
ListBudgetsResponse:
|
|
type: object
|
|
description: List budgets response
|
|
properties:
|
|
budgets:
|
|
type: array
|
|
items:
|
|
$ref: '#/Budget'
|
|
count:
|
|
type: integer
|
|
|
|
ListRateLimitsResponse:
|
|
type: object
|
|
description: List rate limits response
|
|
properties:
|
|
rate_limits:
|
|
type: array
|
|
items:
|
|
$ref: '#/RateLimit'
|
|
count:
|
|
type: integer
|
|
|
|
CustomerResponse:
|
|
type: object
|
|
description: Customer operation response
|
|
properties:
|
|
message:
|
|
type: string
|
|
customer:
|
|
$ref: '#/Customer'
|
|
|
|
TableKey:
|
|
type: object
|
|
description: Table key configuration
|
|
properties:
|
|
id:
|
|
type: integer
|
|
name:
|
|
type: string
|
|
provider_id:
|
|
type: integer
|
|
provider:
|
|
type: string
|
|
key_id:
|
|
type: string
|
|
value:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
models:
|
|
type: array
|
|
items:
|
|
type: string
|
|
weight:
|
|
type: number
|
|
nullable: true
|
|
enabled:
|
|
type: boolean
|
|
default: true
|
|
nullable: true
|
|
use_for_batch_api:
|
|
type: boolean
|
|
default: false
|
|
nullable: true
|
|
created_at:
|
|
type: string
|
|
format: date-time
|
|
updated_at:
|
|
type: string
|
|
format: date-time
|
|
config_hash:
|
|
type: string
|
|
nullable: true
|
|
# Azure config fields
|
|
azure_endpoint:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
azure_api_version:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
azure_client_id:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
azure_client_secret:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
azure_tenant_id:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
# Vertex config fields
|
|
vertex_project_id:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
vertex_project_number:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
vertex_region:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
vertex_auth_credentials:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
# Bedrock config fields
|
|
bedrock_access_key:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
bedrock_secret_key:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
bedrock_session_token:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
bedrock_region:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
bedrock_arn:
|
|
$ref: '../../schemas/management/common.yaml#/EnvVar'
|
|
nullable: true
|
|
|
|
# Routing Rules schemas
|
|
|
|
RoutingTarget:
|
|
type: object
|
|
description: A single weighted routing target within a routing rule
|
|
required:
|
|
- weight
|
|
dependentRequired:
|
|
key_id:
|
|
- provider
|
|
properties:
|
|
provider:
|
|
type: string
|
|
description: Target provider (omit or empty to use the incoming request provider)
|
|
nullable: true
|
|
model:
|
|
type: string
|
|
description: Target model (omit or empty to use the incoming request model)
|
|
nullable: true
|
|
key_id:
|
|
type: string
|
|
description: UUID of the API key to pin for this target (omit for load-balanced key selection)
|
|
nullable: true
|
|
weight:
|
|
type: number
|
|
format: double
|
|
exclusiveMinimum: 0
|
|
description: Probability weight for this target (must be > 0; all target weights in a rule must sum to 1, e.g. 0.7 + 0.3 = 1.0)
|
|
example: 0.5
|
|
|
|
RoutingRule:
|
|
type: object
|
|
description: CEL-based routing rule for intelligent request routing
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: Unique identifier for the routing rule
|
|
name:
|
|
type: string
|
|
description: Name of the routing rule
|
|
description:
|
|
type: string
|
|
description: Description of what the rule does
|
|
enabled:
|
|
type: boolean
|
|
description: Whether the rule is enabled and active
|
|
cel_expression:
|
|
type: string
|
|
description: CEL (Common Expression Language) expression for matching
|
|
targets:
|
|
type: array
|
|
description: Weighted routing targets; weights must sum to 1; target is selected probabilistically at request time
|
|
items:
|
|
$ref: '#/RoutingTarget'
|
|
fallbacks:
|
|
type: array
|
|
items:
|
|
type: string
|
|
description: Fallback providers in format "provider/model"
|
|
scope:
|
|
type: string
|
|
enum: [global, team, customer, virtual_key]
|
|
description: Scope level for the rule
|
|
scope_id:
|
|
type: string
|
|
description: ID for the scope (empty for global scope)
|
|
nullable: true
|
|
priority:
|
|
type: integer
|
|
description: Priority for rule evaluation (lower number = higher priority)
|
|
query:
|
|
type: object
|
|
description: Visual rule tree structure from query builder
|
|
nullable: true
|
|
created_at:
|
|
type: string
|
|
format: date-time
|
|
updated_at:
|
|
type: string
|
|
format: date-time
|
|
oneOf:
|
|
- type: object
|
|
properties:
|
|
scope:
|
|
type: string
|
|
enum: [global]
|
|
required:
|
|
- scope
|
|
description: Global scope routing rule
|
|
- type: object
|
|
properties:
|
|
scope:
|
|
type: string
|
|
enum: [team, customer, virtual_key]
|
|
scope_id:
|
|
type: string
|
|
required:
|
|
- scope
|
|
- scope_id
|
|
description: Scoped routing rule (requires scope_id)
|
|
|
|
CreateRoutingRuleRequest:
|
|
type: object
|
|
description: Request to create a routing rule
|
|
required:
|
|
- name
|
|
- cel_expression
|
|
- scope
|
|
- priority
|
|
- targets
|
|
properties:
|
|
name:
|
|
type: string
|
|
description: Name of the routing rule
|
|
description:
|
|
type: string
|
|
description: Optional description
|
|
enabled:
|
|
type: boolean
|
|
description: Whether the rule is enabled
|
|
cel_expression:
|
|
type: string
|
|
description: CEL expression for matching
|
|
targets:
|
|
type: array
|
|
minItems: 1
|
|
description: Weighted routing targets; weights must sum to 1; target is selected probabilistically at request time
|
|
items:
|
|
$ref: '#/RoutingTarget'
|
|
fallbacks:
|
|
type: array
|
|
items:
|
|
type: string
|
|
description: Fallback providers in format "provider/model"
|
|
scope:
|
|
type: string
|
|
enum: [global, team, customer, virtual_key]
|
|
description: Scope level for the rule
|
|
scope_id:
|
|
type: string
|
|
description: ID for the scope (required if scope is not global)
|
|
nullable: true
|
|
priority:
|
|
type: integer
|
|
description: Priority for rule evaluation (lower number = higher priority)
|
|
query:
|
|
type: object
|
|
description: Visual rule tree structure
|
|
nullable: true
|
|
oneOf:
|
|
- type: object
|
|
properties:
|
|
scope:
|
|
type: string
|
|
enum: [global]
|
|
required:
|
|
- scope
|
|
description: Global scope routing rule
|
|
- type: object
|
|
properties:
|
|
scope:
|
|
type: string
|
|
enum: [team, customer, virtual_key]
|
|
scope_id:
|
|
type: string
|
|
required:
|
|
- scope
|
|
- scope_id
|
|
description: Scoped routing rule (requires scope_id)
|
|
|
|
UpdateRoutingRuleRequest:
|
|
type: object
|
|
description: Request to update a routing rule (all fields optional; providing `targets` replaces all existing targets)
|
|
properties:
|
|
name:
|
|
type: string
|
|
description:
|
|
type: string
|
|
enabled:
|
|
type: boolean
|
|
cel_expression:
|
|
type: string
|
|
targets:
|
|
type: array
|
|
minItems: 1
|
|
description: Replaces all existing targets when provided; weights must sum to 1
|
|
items:
|
|
$ref: '#/RoutingTarget'
|
|
fallbacks:
|
|
type: array
|
|
items:
|
|
type: string
|
|
priority:
|
|
type: integer
|
|
query:
|
|
type: object
|
|
nullable: true
|
|
|
|
RoutingRuleResponse:
|
|
type: object
|
|
description: Response containing created/updated routing rule
|
|
properties:
|
|
message:
|
|
type: string
|
|
rule:
|
|
$ref: '#/RoutingRule'
|
|
|
|
ListRoutingRulesResponse:
|
|
type: object
|
|
description: Response containing list of routing rules
|
|
properties:
|
|
rules:
|
|
type: array
|
|
items:
|
|
$ref: '#/RoutingRule'
|
|
count:
|
|
type: integer
|
|
description: Number of routing rules returned
|
|
|
|
# Model Configs
|
|
|
|
ModelConfig:
|
|
type: object
|
|
description: Model configuration with budget and rate limit settings
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: Unique identifier for the model config
|
|
model_name:
|
|
type: string
|
|
description: Name of the model
|
|
provider:
|
|
type: string
|
|
description: Provider name (optional - applies to all providers if not specified)
|
|
budget:
|
|
$ref: '#/Budget'
|
|
description: Budget configuration for this model
|
|
rate_limit:
|
|
$ref: '#/RateLimit'
|
|
description: Rate limit configuration for this model
|
|
created_at:
|
|
type: string
|
|
format: date-time
|
|
description: When this model config was created
|
|
updated_at:
|
|
type: string
|
|
format: date-time
|
|
description: When this model config was last updated
|
|
|
|
ModelConfigResponse:
|
|
type: object
|
|
description: Response containing a created/updated model config
|
|
properties:
|
|
message:
|
|
type: string
|
|
model_config:
|
|
$ref: '#/ModelConfig'
|
|
|
|
ListModelConfigsResponse:
|
|
type: object
|
|
description: Response containing list of model configs
|
|
properties:
|
|
model_configs:
|
|
type: array
|
|
items:
|
|
$ref: '#/ModelConfig'
|
|
count:
|
|
type: integer
|
|
description: Number of model configs returned
|
|
|
|
CreateModelConfigRequest:
|
|
type: object
|
|
description: Request to create a new model config
|
|
required:
|
|
- model_name
|
|
properties:
|
|
model_name:
|
|
type: string
|
|
description: Name of the model (required)
|
|
provider:
|
|
type: string
|
|
description: Provider name (optional - applies to all providers if not specified)
|
|
budget:
|
|
$ref: '#/CreateBudgetRequest'
|
|
description: Budget configuration
|
|
rate_limit:
|
|
$ref: '#/CreateRateLimitRequest'
|
|
description: Rate limit configuration
|
|
|
|
UpdateModelConfigRequest:
|
|
type: object
|
|
description: Request to update an existing model config
|
|
properties:
|
|
model_name:
|
|
type: string
|
|
description: Name of the model
|
|
provider:
|
|
type: string
|
|
description: Provider name
|
|
budget:
|
|
$ref: '#/UpdateBudgetRequest'
|
|
description: Budget configuration
|
|
rate_limit:
|
|
$ref: '#/UpdateRateLimitRequest'
|
|
description: Rate limit configuration
|
|
|
|
# Provider Governance
|
|
|
|
ProviderGovernance:
|
|
type: object
|
|
description: Provider-level governance settings (budget and rate limits)
|
|
properties:
|
|
provider:
|
|
type: string
|
|
description: Provider name
|
|
budget:
|
|
$ref: '#/Budget'
|
|
description: Budget configuration for this provider
|
|
rate_limit:
|
|
$ref: '#/RateLimit'
|
|
description: Rate limit configuration for this provider
|
|
|
|
ProviderGovernanceResponse:
|
|
type: object
|
|
description: Response containing provider governance settings
|
|
properties:
|
|
provider:
|
|
type: string
|
|
description: Provider name
|
|
budget:
|
|
$ref: '#/Budget'
|
|
description: Budget configuration
|
|
rate_limit:
|
|
$ref: '#/RateLimit'
|
|
description: Rate limit configuration
|
|
|
|
ListProviderGovernanceResponse:
|
|
type: object
|
|
description: Response containing list of provider governance settings
|
|
properties:
|
|
providers:
|
|
type: array
|
|
items:
|
|
$ref: '#/ProviderGovernanceResponse'
|
|
count:
|
|
type: integer
|
|
description: Number of providers with governance settings
|
|
|
|
UpdateProviderGovernanceRequest:
|
|
type: object
|
|
description: Request to update provider governance settings
|
|
properties:
|
|
budget:
|
|
$ref: '#/UpdateBudgetRequest'
|
|
description: Budget configuration
|
|
rate_limit:
|
|
$ref: '#/UpdateRateLimitRequest'
|
|
description: Rate limit configuration
|
|
|
|
# Pricing Overrides
|
|
|
|
PricingOverrideRequestType:
|
|
type: string
|
|
description: >
|
|
Request type for pricing override filtering. Stream variants are treated
|
|
identically to their base type — specifying `chat_completion` covers both
|
|
streaming and non-streaming chat requests.
|
|
enum:
|
|
- chat_completion
|
|
- text_completion
|
|
- responses
|
|
- embedding
|
|
- rerank
|
|
- speech
|
|
- transcription
|
|
- image_generation
|
|
- image_variation
|
|
- image_edit
|
|
- video_generation
|
|
- video_remix
|
|
|
|
PricingPatch:
|
|
type: object
|
|
description: >
|
|
Pricing fields to override. Only non-zero/non-null fields are applied.
|
|
All values are cost per unit in USD.
|
|
properties:
|
|
input_cost_per_token:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_token:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_token_batches:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_token_batches:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_token_priority:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_token_priority:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_character:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_token_above_128k_tokens:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_token_above_128k_tokens:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_token_above_200k_tokens:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_token_above_200k_tokens:
|
|
type: number
|
|
minimum: 0
|
|
cache_creation_input_token_cost:
|
|
type: number
|
|
minimum: 0
|
|
cache_read_input_token_cost:
|
|
type: number
|
|
minimum: 0
|
|
cache_creation_input_token_cost_above_200k_tokens:
|
|
type: number
|
|
minimum: 0
|
|
cache_read_input_token_cost_above_200k_tokens:
|
|
type: number
|
|
minimum: 0
|
|
cache_read_input_token_cost_priority:
|
|
type: number
|
|
minimum: 0
|
|
cache_read_input_image_token_cost:
|
|
type: number
|
|
minimum: 0
|
|
cache_creation_input_audio_token_cost:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_image:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_pixel:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_pixel:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_image_token:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image_token:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image_low_quality:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image_medium_quality:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image_high_quality:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image_auto_quality:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image_premium_image:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image_above_512_and_512_pixels:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image_above_1024_and_1024_pixels:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image_above_2048_and_2048_pixels:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_image_above_4096_and_4096_pixels:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_audio_token:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_audio_token:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_audio_per_second:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_second:
|
|
type: number
|
|
minimum: 0
|
|
input_cost_per_video_per_second:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_video_per_second:
|
|
type: number
|
|
minimum: 0
|
|
output_cost_per_second:
|
|
type: number
|
|
minimum: 0
|
|
search_context_cost_per_query:
|
|
type: number
|
|
minimum: 0
|
|
code_interpreter_cost_per_session:
|
|
type: number
|
|
minimum: 0
|
|
|
|
PricingOverride:
|
|
type: object
|
|
description: A pricing override that applies custom rates to matching requests.
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: Unique override ID (UUID)
|
|
name:
|
|
type: string
|
|
description: Human-readable label
|
|
scope_kind:
|
|
type: string
|
|
enum:
|
|
- global
|
|
- provider
|
|
- provider_key
|
|
- virtual_key
|
|
- virtual_key_provider
|
|
- virtual_key_provider_key
|
|
description: Scope that determines which requests this override applies to
|
|
virtual_key_id:
|
|
type: string
|
|
nullable: true
|
|
description: Required for virtual_key* scopes
|
|
provider_id:
|
|
type: string
|
|
nullable: true
|
|
description: Required for provider and virtual_key_provider scopes
|
|
provider_key_id:
|
|
type: string
|
|
nullable: true
|
|
description: Required for provider_key and virtual_key_provider_key scopes
|
|
match_type:
|
|
type: string
|
|
enum:
|
|
- exact
|
|
- wildcard
|
|
description: How the pattern is matched against the model name
|
|
pattern:
|
|
type: string
|
|
description: Model name or wildcard prefix (e.g. "gpt-4o" or "claude-3*")
|
|
request_types:
|
|
type: array
|
|
minItems: 1
|
|
items:
|
|
$ref: '#/PricingOverrideRequestType'
|
|
description: Request types this override applies to. At least one value is required.
|
|
pricing_patch:
|
|
type: string
|
|
description: JSON-encoded pricing fields to override (as stored in the database)
|
|
patch:
|
|
$ref: '#/PricingPatch'
|
|
description: Decoded pricing fields (present in API responses)
|
|
config_hash:
|
|
type: string
|
|
nullable: true
|
|
description: Auto-managed hash for config-file-sourced overrides. Do not set manually.
|
|
created_at:
|
|
type: string
|
|
format: date-time
|
|
updated_at:
|
|
type: string
|
|
format: date-time
|
|
|
|
CreatePricingOverrideRequest:
|
|
type: object
|
|
description: Request body for creating a pricing override.
|
|
required:
|
|
- name
|
|
- scope_kind
|
|
- match_type
|
|
- pattern
|
|
- request_types
|
|
properties:
|
|
name:
|
|
type: string
|
|
description: Human-readable label
|
|
scope_kind:
|
|
type: string
|
|
enum:
|
|
- global
|
|
- provider
|
|
- provider_key
|
|
- virtual_key
|
|
- virtual_key_provider
|
|
- virtual_key_provider_key
|
|
virtual_key_id:
|
|
type: string
|
|
description: Required for virtual_key* scopes
|
|
provider_id:
|
|
type: string
|
|
description: Required for provider and virtual_key_provider scopes
|
|
provider_key_id:
|
|
type: string
|
|
description: Required for provider_key and virtual_key_provider_key scopes
|
|
match_type:
|
|
type: string
|
|
enum:
|
|
- exact
|
|
- wildcard
|
|
pattern:
|
|
type: string
|
|
description: Model name or wildcard prefix ending with * (e.g. "claude-3*")
|
|
request_types:
|
|
type: array
|
|
minItems: 1
|
|
items:
|
|
$ref: '#/PricingOverrideRequestType'
|
|
description: Request types this override applies to. At least one value is required.
|
|
patch:
|
|
$ref: '#/PricingPatch'
|
|
|
|
UpdatePricingOverrideRequest:
|
|
type: object
|
|
description: >
|
|
Request body for updating a pricing override. All fields are optional —
|
|
omitted fields are merged from the existing record. The `patch` field is
|
|
always replaced in full when provided.
|
|
properties:
|
|
name:
|
|
type: string
|
|
description: Human-readable label
|
|
scope_kind:
|
|
type: string
|
|
enum:
|
|
- global
|
|
- provider
|
|
- provider_key
|
|
- virtual_key
|
|
- virtual_key_provider
|
|
- virtual_key_provider_key
|
|
virtual_key_id:
|
|
type: string
|
|
description: Required for virtual_key* scopes
|
|
provider_id:
|
|
type: string
|
|
description: Required for provider and virtual_key_provider scopes
|
|
provider_key_id:
|
|
type: string
|
|
description: Required for provider_key and virtual_key_provider_key scopes
|
|
match_type:
|
|
type: string
|
|
enum:
|
|
- exact
|
|
- wildcard
|
|
pattern:
|
|
type: string
|
|
description: Model name or wildcard prefix ending with * (e.g. "claude-3*")
|
|
request_types:
|
|
type: array
|
|
minItems: 1
|
|
items:
|
|
$ref: '#/PricingOverrideRequestType'
|
|
description: Request types this override applies to.
|
|
patch:
|
|
$ref: '#/PricingPatch'
|
|
|
|
PricingOverrideResponse:
|
|
type: object
|
|
properties:
|
|
message:
|
|
type: string
|
|
pricing_override:
|
|
$ref: '#/PricingOverride'
|
|
|
|
ListPricingOverridesResponse:
|
|
type: object
|
|
properties:
|
|
pricing_overrides:
|
|
type: array
|
|
items:
|
|
$ref: '#/PricingOverride'
|
|
count:
|
|
type: integer
|
|
description: Total number of overrides returned
|