311 lines
14 KiB
Go
311 lines
14 KiB
Go
package schemas
|
|
|
|
import "encoding/json"
|
|
|
|
// RealtimeEventType represents the type of a Bifrost unified Realtime event.
|
|
type RealtimeEventType string
|
|
|
|
// Client-to-server event types (sent by the client through Bifrost)
|
|
const (
|
|
RTEventSessionUpdate RealtimeEventType = "session.update"
|
|
RTEventConversationItemCreate RealtimeEventType = "conversation.item.create"
|
|
RTEventConversationItemDelete RealtimeEventType = "conversation.item.delete"
|
|
RTEventResponseCreate RealtimeEventType = "response.create"
|
|
RTEventResponseCancel RealtimeEventType = "response.cancel"
|
|
RTEventInputAudioAppend RealtimeEventType = "input_audio_buffer.append"
|
|
RTEventInputAudioCommit RealtimeEventType = "input_audio_buffer.commit"
|
|
RTEventInputAudioClear RealtimeEventType = "input_audio_buffer.clear"
|
|
)
|
|
|
|
// Server-to-client event types (received from the provider, forwarded to client)
|
|
const (
|
|
RTEventSessionCreated RealtimeEventType = "session.created"
|
|
RTEventSessionUpdated RealtimeEventType = "session.updated"
|
|
RTEventConversationCreated RealtimeEventType = "conversation.created"
|
|
RTEventConversationItemAdded RealtimeEventType = "conversation.item.added"
|
|
RTEventConversationItemCreated RealtimeEventType = "conversation.item.created"
|
|
RTEventConversationItemRetrieved RealtimeEventType = "conversation.item.retrieved"
|
|
RTEventConversationItemDone RealtimeEventType = "conversation.item.done"
|
|
RTEventResponseCreated RealtimeEventType = "response.created"
|
|
RTEventResponseDone RealtimeEventType = "response.done"
|
|
RTEventResponseTextDelta RealtimeEventType = "response.text.delta"
|
|
RTEventResponseTextDone RealtimeEventType = "response.text.done"
|
|
RTEventResponseAudioDelta RealtimeEventType = "response.audio.delta"
|
|
RTEventResponseAudioDone RealtimeEventType = "response.audio.done"
|
|
RTEventResponseAudioTransDelta RealtimeEventType = "response.audio_transcript.delta"
|
|
RTEventResponseAudioTransDone RealtimeEventType = "response.audio_transcript.done"
|
|
RTEventResponseOutputItemAdded RealtimeEventType = "response.output_item.added"
|
|
RTEventResponseOutputItemDone RealtimeEventType = "response.output_item.done"
|
|
RTEventResponseContentPartAdded RealtimeEventType = "response.content_part.added"
|
|
RTEventResponseContentPartDone RealtimeEventType = "response.content_part.done"
|
|
RTEventRateLimitsUpdated RealtimeEventType = "rate_limits.updated"
|
|
RTEventInputAudioTransCompleted RealtimeEventType = "conversation.item.input_audio_transcription.completed"
|
|
RTEventInputAudioTransDelta RealtimeEventType = "conversation.item.input_audio_transcription.delta"
|
|
RTEventInputAudioTransFailed RealtimeEventType = "conversation.item.input_audio_transcription.failed"
|
|
RTEventInputAudioBufferCommitted RealtimeEventType = "input_audio_buffer.committed"
|
|
RTEventInputAudioBufferCleared RealtimeEventType = "input_audio_buffer.cleared"
|
|
RTEventInputAudioSpeechStarted RealtimeEventType = "input_audio_buffer.speech_started"
|
|
RTEventInputAudioSpeechStopped RealtimeEventType = "input_audio_buffer.speech_stopped"
|
|
RTEventError RealtimeEventType = "error"
|
|
)
|
|
|
|
// IsRealtimeConversationItemEventType reports whether the event carries a
|
|
// canonical conversation item payload after provider translation.
|
|
func IsRealtimeConversationItemEventType(eventType RealtimeEventType) bool {
|
|
switch eventType {
|
|
case RTEventConversationItemCreate,
|
|
RTEventConversationItemAdded,
|
|
RTEventConversationItemCreated,
|
|
RTEventConversationItemRetrieved,
|
|
RTEventConversationItemDone:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// IsRealtimeUserInputEvent reports whether the event represents a finalized
|
|
// user input item in the canonical Bifrost realtime schema.
|
|
func IsRealtimeUserInputEvent(event *BifrostRealtimeEvent) bool {
|
|
return event != nil &&
|
|
event.Item != nil &&
|
|
event.Item.Role == "user" &&
|
|
IsRealtimeConversationItemEventType(event.Type)
|
|
}
|
|
|
|
// IsRealtimeToolOutputEvent reports whether the event represents a finalized
|
|
// tool output item in the canonical Bifrost realtime schema.
|
|
func IsRealtimeToolOutputEvent(event *BifrostRealtimeEvent) bool {
|
|
return event != nil &&
|
|
event.Item != nil &&
|
|
event.Item.Type == "function_call_output" &&
|
|
IsRealtimeConversationItemEventType(event.Type)
|
|
}
|
|
|
|
// IsRealtimeInputTranscriptEvent reports whether the event carries a finalized
|
|
// input-audio transcript in the canonical Bifrost realtime schema.
|
|
func IsRealtimeInputTranscriptEvent(event *BifrostRealtimeEvent) bool {
|
|
return event != nil && event.Type == RTEventInputAudioTransCompleted
|
|
}
|
|
|
|
// BifrostRealtimeEvent is the unified Bifrost envelope for all Realtime events.
|
|
// Provider converters translate between this format and the provider-native protocol.
|
|
type BifrostRealtimeEvent struct {
|
|
Type RealtimeEventType `json:"type"`
|
|
EventID string `json:"event_id,omitempty"`
|
|
|
|
Session *RealtimeSession `json:"session,omitempty"`
|
|
Item *RealtimeItem `json:"item,omitempty"`
|
|
Delta *RealtimeDelta `json:"delta,omitempty"`
|
|
Audio []byte `json:"audio,omitempty"`
|
|
Error *RealtimeError `json:"error,omitempty"`
|
|
|
|
// ExtraParams preserves provider-specific top-level event fields that are not
|
|
// promoted into the common Bifrost schema.
|
|
ExtraParams map[string]json.RawMessage `json:"extra_params,omitempty"`
|
|
|
|
// RawData preserves the original provider event for pass-through or debugging.
|
|
RawData json.RawMessage `json:"raw_data,omitempty"`
|
|
}
|
|
|
|
// RealtimeSession describes session configuration for the Realtime connection.
|
|
type RealtimeSession struct {
|
|
ID string `json:"id,omitempty"`
|
|
Model string `json:"model,omitempty"`
|
|
Modalities []string `json:"modalities,omitempty"`
|
|
Instructions string `json:"instructions,omitempty"`
|
|
Voice string `json:"voice,omitempty"`
|
|
Temperature *float64 `json:"temperature,omitempty"`
|
|
MaxOutputTokens json.RawMessage `json:"max_output_tokens,omitempty"`
|
|
TurnDetection json.RawMessage `json:"turn_detection,omitempty"`
|
|
InputAudioFormat string `json:"input_audio_format,omitempty"`
|
|
OutputAudioType string `json:"output_audio_type,omitempty"`
|
|
Tools json.RawMessage `json:"tools,omitempty"`
|
|
ExtraParams map[string]json.RawMessage `json:"extra_params,omitempty"`
|
|
}
|
|
|
|
// RealtimeItem represents a conversation item in the Realtime protocol.
|
|
type RealtimeItem struct {
|
|
ID string `json:"id,omitempty"`
|
|
Type string `json:"type,omitempty"`
|
|
Role string `json:"role,omitempty"`
|
|
Status string `json:"status,omitempty"`
|
|
Content json.RawMessage `json:"content,omitempty"`
|
|
Name string `json:"name,omitempty"`
|
|
CallID string `json:"call_id,omitempty"`
|
|
Arguments string `json:"arguments,omitempty"`
|
|
Output string `json:"output,omitempty"`
|
|
ExtraParams map[string]json.RawMessage `json:"extra_params,omitempty"`
|
|
}
|
|
|
|
// RealtimeDelta carries incremental content for streaming events.
|
|
type RealtimeDelta struct {
|
|
Text string `json:"text,omitempty"`
|
|
Audio string `json:"audio,omitempty"`
|
|
Transcript string `json:"transcript,omitempty"`
|
|
ItemID string `json:"item_id,omitempty"`
|
|
OutputIdx *int `json:"output_index,omitempty"`
|
|
ContentIdx *int `json:"content_index,omitempty"`
|
|
ResponseID string `json:"response_id,omitempty"`
|
|
}
|
|
|
|
// RealtimeError describes an error from the Realtime API.
|
|
type RealtimeError struct {
|
|
Type string `json:"type,omitempty"`
|
|
Code string `json:"code,omitempty"`
|
|
Message string `json:"message,omitempty"`
|
|
Param string `json:"param,omitempty"`
|
|
ExtraParams map[string]json.RawMessage `json:"extra_params,omitempty"`
|
|
}
|
|
|
|
// RealtimeSessionEndpointType identifies the public ephemeral-token endpoint
|
|
// shape the client called so providers can preserve versioned behavior.
|
|
type RealtimeSessionEndpointType string
|
|
|
|
const (
|
|
RealtimeSessionEndpointClientSecrets RealtimeSessionEndpointType = "client_secrets"
|
|
RealtimeSessionEndpointSessions RealtimeSessionEndpointType = "sessions"
|
|
)
|
|
|
|
// RealtimeSessionRoute describes a provider-registered public route for
|
|
// ephemeral-token creation.
|
|
type RealtimeSessionRoute struct {
|
|
Path string
|
|
EndpointType RealtimeSessionEndpointType
|
|
DefaultProvider ModelProvider
|
|
}
|
|
|
|
// RealtimeProvider is an optional interface that providers can implement to
|
|
// indicate support for bidirectional Realtime API (audio/text streaming).
|
|
// Checked via type assertion: provider.(RealtimeProvider).
|
|
type RealtimeProvider interface {
|
|
SupportsRealtimeAPI() bool
|
|
RealtimeWebSocketURL(key Key, model string) string
|
|
RealtimeHeaders(key Key) map[string]string
|
|
// SupportsRealtimeWebRTC reports whether the provider supports WebRTC SDP exchange.
|
|
SupportsRealtimeWebRTC() bool
|
|
// ExchangeRealtimeWebRTCSDP performs the provider-specific SDP signaling exchange.
|
|
// The provider owns the HTTP specifics (URL, headers, body format).
|
|
// session may be nil if the signaling format doesn't include session config.
|
|
ExchangeRealtimeWebRTCSDP(ctx *BifrostContext, key Key, model string, sdp string, session json.RawMessage) (string, *BifrostError)
|
|
ToBifrostRealtimeEvent(providerEvent json.RawMessage) (*BifrostRealtimeEvent, error)
|
|
ToProviderRealtimeEvent(bifrostEvent *BifrostRealtimeEvent) (json.RawMessage, error)
|
|
// ShouldStartRealtimeTurn reports whether the canonical client-side event
|
|
// should start pre-hooks. Providers without an explicit turn-start signal
|
|
// return false and rely on finalize-time fallback hooks.
|
|
ShouldStartRealtimeTurn(event *BifrostRealtimeEvent) bool
|
|
// RealtimeTurnFinalEvent returns the canonical provider event that completes
|
|
// a turn and should trigger post-hooks.
|
|
RealtimeTurnFinalEvent() RealtimeEventType
|
|
RealtimeWebRTCDataChannelLabel() string
|
|
RealtimeWebSocketSubprotocol() string
|
|
ShouldForwardRealtimeEvent(event *BifrostRealtimeEvent) bool
|
|
ShouldAccumulateRealtimeOutput(eventType RealtimeEventType) bool
|
|
}
|
|
|
|
// RealtimeLegacyWebRTCProvider is an optional interface for providers that
|
|
// support the beta WebRTC handshake (e.g., OpenAI's /v1/realtime).
|
|
// Only checked for legacy integration routes via type assertion.
|
|
// Takes SDP offer + optional session JSON, same as ExchangeRealtimeWebRTCSDP
|
|
// but targets the provider's legacy/beta endpoint.
|
|
type RealtimeLegacyWebRTCProvider interface {
|
|
ExchangeLegacyRealtimeWebRTCSDP(ctx *BifrostContext, key Key, sdp string, session json.RawMessage, model string) (string, *BifrostError)
|
|
}
|
|
|
|
// RealtimeUsageExtractor lets providers parse terminal-turn usage/output from
|
|
// their native wire payloads without coupling handlers to a specific protocol.
|
|
type RealtimeUsageExtractor interface {
|
|
ExtractRealtimeTurnUsage(terminalEventRaw []byte) *BifrostLLMUsage
|
|
ExtractRealtimeTurnOutput(terminalEventRaw []byte) *ChatMessage
|
|
}
|
|
|
|
// RealtimeSessionProvider is an optional interface for providers that can mint
|
|
// short-lived client secrets for browser/client-side Realtime connections.
|
|
// Checked via type assertion: provider.(RealtimeSessionProvider).
|
|
type RealtimeSessionProvider interface {
|
|
CreateRealtimeClientSecret(ctx *BifrostContext, key Key, endpointType RealtimeSessionEndpointType, rawRequest json.RawMessage) (*BifrostPassthroughResponse, *BifrostError)
|
|
}
|
|
|
|
// ParseRealtimeEvent decodes a client/provider realtime event while preserving
|
|
// unknown top-level fields in ExtraParams for provider-specific round-tripping.
|
|
func ParseRealtimeEvent(raw []byte) (*BifrostRealtimeEvent, error) {
|
|
type realtimeEventAlias struct {
|
|
Type RealtimeEventType `json:"type"`
|
|
EventID string `json:"event_id,omitempty"`
|
|
Session *RealtimeSession `json:"session,omitempty"`
|
|
Item *RealtimeItem `json:"item,omitempty"`
|
|
Delta *RealtimeDelta `json:"delta,omitempty"`
|
|
Audio []byte `json:"audio,omitempty"`
|
|
Error *RealtimeError `json:"error,omitempty"`
|
|
}
|
|
|
|
var alias realtimeEventAlias
|
|
if err := Unmarshal(raw, &alias); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
event := &BifrostRealtimeEvent{
|
|
Type: alias.Type,
|
|
EventID: alias.EventID,
|
|
Session: alias.Session,
|
|
Item: alias.Item,
|
|
Delta: alias.Delta,
|
|
Audio: alias.Audio,
|
|
Error: alias.Error,
|
|
}
|
|
|
|
var root map[string]json.RawMessage
|
|
if err := Unmarshal(raw, &root); err != nil {
|
|
return nil, err
|
|
}
|
|
savedSession := root["session"]
|
|
savedItem := root["item"]
|
|
savedError := root["error"]
|
|
for _, key := range []string{"type", "event_id", "session", "item", "delta", "audio", "error", "raw_data"} {
|
|
delete(root, key)
|
|
}
|
|
if len(root) > 0 {
|
|
event.ExtraParams = root
|
|
}
|
|
if event.Session != nil {
|
|
var sessionRoot map[string]json.RawMessage
|
|
if len(savedSession) > 0 && Unmarshal(savedSession, &sessionRoot) == nil {
|
|
for _, key := range []string{
|
|
"id", "model", "modalities", "instructions", "voice", "temperature",
|
|
"max_output_tokens", "turn_detection", "input_audio_format", "output_audio_type", "tools",
|
|
} {
|
|
delete(sessionRoot, key)
|
|
}
|
|
if len(sessionRoot) > 0 {
|
|
event.Session.ExtraParams = sessionRoot
|
|
}
|
|
}
|
|
}
|
|
if event.Item != nil {
|
|
var itemRoot map[string]json.RawMessage
|
|
if len(savedItem) > 0 && Unmarshal(savedItem, &itemRoot) == nil {
|
|
for _, key := range []string{
|
|
"id", "type", "role", "status", "content", "name", "call_id", "arguments", "output",
|
|
} {
|
|
delete(itemRoot, key)
|
|
}
|
|
if len(itemRoot) > 0 {
|
|
event.Item.ExtraParams = itemRoot
|
|
}
|
|
}
|
|
}
|
|
if event.Error != nil {
|
|
var errorRoot map[string]json.RawMessage
|
|
if len(savedError) > 0 && Unmarshal(savedError, &errorRoot) == nil {
|
|
for _, key := range []string{"type", "code", "message", "param"} {
|
|
delete(errorRoot, key)
|
|
}
|
|
if len(errorRoot) > 0 {
|
|
event.Error.ExtraParams = errorRoot
|
|
}
|
|
}
|
|
}
|
|
|
|
return event, nil
|
|
}
|