Files
bifrost/docs/openapi/schemas/inference/speech.yaml
Beyhan Oğur 880f412e2c first commit
2026-04-26 21:52:23 +03:00

133 lines
2.5 KiB
YAML

# Speech API schemas
SpeechRequest:
type: object
required:
- model
- input
- voice
properties:
model:
type: string
description: Model in provider/model format
input:
type: string
description: Text to convert to speech
fallbacks:
type: array
items:
type: string
stream_format:
type: string
enum: [sse]
description: Set to "sse" to enable streaming
voice:
$ref: '#/SpeechVoiceInput'
instructions:
type: string
response_format:
type: string
enum: [mp3, opus, aac, flac, wav, pcm]
speed:
type: number
minimum: 0.25
maximum: 4.0
language_code:
type: string
pronunciation_dictionary_locators:
type: array
items:
$ref: '#/SpeechPronunciationDictionaryLocator'
enable_logging:
type: boolean
optimize_streaming_latency:
type: boolean
with_timestamps:
type: boolean
SpeechVoiceInput:
oneOf:
- type: string
- type: array
items:
$ref: '#/VoiceConfig'
VoiceConfig:
type: object
required:
- speaker
- voice
properties:
speaker:
type: string
voice:
type: string
SpeechPronunciationDictionaryLocator:
type: object
required:
- pronunciation_dictionary_id
properties:
pronunciation_dictionary_id:
type: string
version_id:
type: string
SpeechResponse:
type: object
properties:
audio:
type: string
format: byte
description: Audio data (binary)
usage:
$ref: '#/SpeechUsage'
alignment:
$ref: '#/SpeechAlignment'
normalized_alignment:
$ref: '#/SpeechAlignment'
audio_base64:
type: string
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'
SpeechUsage:
type: object
properties:
input_tokens:
type: integer
output_tokens:
type: integer
total_tokens:
type: integer
SpeechAlignment:
type: object
properties:
char_start_times_ms:
type: array
items:
type: number
char_end_times_ms:
type: array
items:
type: number
characters:
type: array
items:
type: string
SpeechStreamResponse:
type: object
properties:
type:
type: string
enum: [speech.audio.delta, speech.audio.done]
audio:
type: string
format: byte
usage:
$ref: '#/SpeechUsage'
extra_fields:
$ref: './common.yaml#/BifrostResponseExtraFields'