first commit
This commit is contained in:
967
core/providers/openai/realtime.go
Normal file
967
core/providers/openai/realtime.go
Normal file
@@ -0,0 +1,967 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
|
||||
// SupportsRealtimeAPI returns true since OpenAI natively supports the Realtime API.
|
||||
func (provider *OpenAIProvider) SupportsRealtimeAPI() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// RealtimeWebSocketURL returns the WSS URL for the OpenAI Realtime API.
|
||||
// Format: wss://api.openai.com/v1/realtime?model=<model>
|
||||
func (provider *OpenAIProvider) RealtimeWebSocketURL(key schemas.Key, model string) string {
|
||||
base := provider.networkConfig.BaseURL
|
||||
base = strings.Replace(base, "https://", "wss://", 1)
|
||||
base = strings.Replace(base, "http://", "ws://", 1)
|
||||
return base + "/v1/realtime?model=" + url.QueryEscape(model)
|
||||
}
|
||||
|
||||
// RealtimeHeaders returns the headers required for the OpenAI Realtime WebSocket connection.
|
||||
func (provider *OpenAIProvider) RealtimeHeaders(key schemas.Key) map[string]string {
|
||||
headers := map[string]string{
|
||||
"Authorization": "Bearer " + key.Value.GetValue(),
|
||||
}
|
||||
for k, v := range provider.networkConfig.ExtraHeaders {
|
||||
headers[k] = v
|
||||
}
|
||||
return headers
|
||||
}
|
||||
|
||||
// SupportsRealtimeWebRTC reports that OpenAI supports WebRTC SDP exchange.
|
||||
func (provider *OpenAIProvider) SupportsRealtimeWebRTC() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// ExchangeRealtimeWebRTCSDP performs the GA SDP exchange via multipart POST to /v1/realtime/calls.
|
||||
func (provider *OpenAIProvider) ExchangeRealtimeWebRTCSDP(
|
||||
ctx *schemas.BifrostContext,
|
||||
key schemas.Key,
|
||||
model string,
|
||||
sdp string,
|
||||
session json.RawMessage,
|
||||
) (string, *schemas.BifrostError) {
|
||||
path := "/v1/realtime/calls"
|
||||
if session == nil && strings.TrimSpace(model) != "" {
|
||||
path += "?model=" + url.QueryEscape(model)
|
||||
}
|
||||
return provider.exchangeWebRTCSDP(ctx, key, path, sdp, session)
|
||||
}
|
||||
|
||||
// ExchangeLegacyRealtimeWebRTCSDP performs the beta SDP exchange via multipart POST to /v1/realtime.
|
||||
// Same multipart format but targets the legacy endpoint with model in the URL.
|
||||
func (provider *OpenAIProvider) ExchangeLegacyRealtimeWebRTCSDP(
|
||||
ctx *schemas.BifrostContext,
|
||||
key schemas.Key,
|
||||
sdp string,
|
||||
session json.RawMessage,
|
||||
model string,
|
||||
) (string, *schemas.BifrostError) {
|
||||
return provider.exchangeWebRTCSDP(ctx, key, "/v1/realtime?model="+url.QueryEscape(model), sdp, session)
|
||||
}
|
||||
|
||||
// exchangeWebRTCSDP is the shared multipart SDP exchange implementation.
|
||||
// Builds a multipart body with sdp + optional session, POSTs to the given path.
|
||||
func (provider *OpenAIProvider) exchangeWebRTCSDP(
|
||||
ctx *schemas.BifrostContext,
|
||||
key schemas.Key,
|
||||
path string,
|
||||
sdp string,
|
||||
session json.RawMessage,
|
||||
) (string, *schemas.BifrostError) {
|
||||
bodyBuf := &bytes.Buffer{}
|
||||
writer := multipart.NewWriter(bodyBuf)
|
||||
if err := writer.WriteField("sdp", sdp); err != nil {
|
||||
return "", newRealtimeWebRTCSDPError(fasthttp.StatusInternalServerError, "server_error", "failed to encode upstream SDP body", err)
|
||||
}
|
||||
if session != nil {
|
||||
if err := writer.WriteField("session", string(session)); err != nil {
|
||||
return "", newRealtimeWebRTCSDPError(fasthttp.StatusInternalServerError, "server_error", "failed to encode upstream session body", err)
|
||||
}
|
||||
}
|
||||
if err := writer.Close(); err != nil {
|
||||
return "", newRealtimeWebRTCSDPError(fasthttp.StatusInternalServerError, "server_error", "failed to finalize upstream SDP body", err)
|
||||
}
|
||||
|
||||
req := fasthttp.AcquireRequest()
|
||||
resp := fasthttp.AcquireResponse()
|
||||
defer fasthttp.ReleaseRequest(req)
|
||||
defer fasthttp.ReleaseResponse(resp)
|
||||
|
||||
req.SetRequestURI(provider.buildRequestURL(ctx, path, schemas.RealtimeRequest))
|
||||
req.Header.SetMethod(http.MethodPost)
|
||||
req.Header.SetContentType(writer.FormDataContentType())
|
||||
req.Header.Set("Authorization", "Bearer "+key.Value.GetValue())
|
||||
for k, v := range provider.networkConfig.ExtraHeaders {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
if headers, _ := ctx.Value(schemas.BifrostContextKeyRequestHeaders).(map[string]string); headers != nil {
|
||||
if agentsSDK := headers["x-openai-agents-sdk"]; agentsSDK != "" {
|
||||
req.Header.Set("X-OpenAI-Agents-SDK", agentsSDK)
|
||||
}
|
||||
}
|
||||
req.SetBody(bodyBuf.Bytes())
|
||||
|
||||
_, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
|
||||
defer wait()
|
||||
if bifrostErr != nil {
|
||||
return "", bifrostErr
|
||||
}
|
||||
|
||||
answerBody := resp.Body()
|
||||
if resp.StatusCode() < fasthttp.StatusOK || resp.StatusCode() >= fasthttp.StatusMultipleChoices {
|
||||
return "", provider.realtimeWebRTCUpstreamError(ctx, resp.StatusCode(), answerBody)
|
||||
}
|
||||
|
||||
return string(answerBody), nil
|
||||
}
|
||||
|
||||
func (provider *OpenAIProvider) realtimeWebRTCUpstreamError(ctx *schemas.BifrostContext, statusCode int, body []byte) *schemas.BifrostError {
|
||||
bifrostErr := &schemas.BifrostError{
|
||||
IsBifrostError: false,
|
||||
StatusCode: schemas.Ptr(fasthttp.StatusBadGateway),
|
||||
Error: &schemas.ErrorField{
|
||||
Type: schemas.Ptr("upstream_connection_error"),
|
||||
Message: fmt.Sprintf("upstream realtime WebRTC handshake failed for %s", provider.GetProviderKey()),
|
||||
},
|
||||
ExtraFields: schemas.BifrostErrorExtraFields{
|
||||
RequestType: schemas.RealtimeRequest,
|
||||
Provider: provider.GetProviderKey(),
|
||||
},
|
||||
}
|
||||
if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
|
||||
bifrostErr.ExtraFields.RawResponse = map[string]any{
|
||||
"status": statusCode,
|
||||
"body": string(body),
|
||||
}
|
||||
}
|
||||
return bifrostErr
|
||||
}
|
||||
|
||||
func newRealtimeWebRTCSDPError(status int, errorType, message string, err error) *schemas.BifrostError {
|
||||
bifrostErr := &schemas.BifrostError{
|
||||
IsBifrostError: true,
|
||||
StatusCode: schemas.Ptr(status),
|
||||
Error: &schemas.ErrorField{
|
||||
Type: schemas.Ptr(errorType),
|
||||
Message: message,
|
||||
},
|
||||
}
|
||||
if err != nil {
|
||||
bifrostErr.Error.Error = err
|
||||
}
|
||||
return bifrostErr
|
||||
}
|
||||
|
||||
func (provider *OpenAIProvider) ShouldStartRealtimeTurn(event *schemas.BifrostRealtimeEvent) bool {
|
||||
if event == nil {
|
||||
return false
|
||||
}
|
||||
switch event.Type {
|
||||
case schemas.RTEventResponseCreate, schemas.RTEventInputAudioBufferCommitted:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (provider *OpenAIProvider) RealtimeTurnFinalEvent() schemas.RealtimeEventType {
|
||||
return schemas.RTEventResponseDone
|
||||
}
|
||||
|
||||
func (provider *OpenAIProvider) RealtimeWebRTCDataChannelLabel() string {
|
||||
return "oai-events"
|
||||
}
|
||||
|
||||
func (provider *OpenAIProvider) RealtimeWebSocketSubprotocol() string {
|
||||
return "realtime"
|
||||
}
|
||||
|
||||
func (provider *OpenAIProvider) ShouldForwardRealtimeEvent(event *schemas.BifrostRealtimeEvent) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (provider *OpenAIProvider) ShouldAccumulateRealtimeOutput(eventType schemas.RealtimeEventType) bool {
|
||||
switch eventType {
|
||||
case schemas.RTEventResponseTextDelta,
|
||||
schemas.RTEventResponseAudioTransDelta,
|
||||
schemas.RealtimeEventType("response.output_text.delta"),
|
||||
schemas.RealtimeEventType("response.output_audio_transcript.delta"):
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// CreateRealtimeClientSecret mints an OpenAI Realtime client secret and returns
|
||||
// the native OpenAI response body unchanged.
|
||||
func (provider *OpenAIProvider) CreateRealtimeClientSecret(
|
||||
ctx *schemas.BifrostContext,
|
||||
key schemas.Key,
|
||||
endpointType schemas.RealtimeSessionEndpointType,
|
||||
rawRequest json.RawMessage,
|
||||
) (*schemas.BifrostPassthroughResponse, *schemas.BifrostError) {
|
||||
if err := providerUtils.CheckOperationAllowed(schemas.OpenAI, provider.customProviderConfig, schemas.RealtimeRequest); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
normalizedBody, requestedModel, bifrostErr := normalizeRealtimeClientSecretRequest(rawRequest, provider.GetProviderKey(), endpointType)
|
||||
if bifrostErr != nil {
|
||||
return nil, bifrostErr
|
||||
}
|
||||
req := fasthttp.AcquireRequest()
|
||||
resp := fasthttp.AcquireResponse()
|
||||
defer fasthttp.ReleaseRequest(req)
|
||||
defer fasthttp.ReleaseResponse(resp)
|
||||
|
||||
req.SetRequestURI(provider.buildRequestURL(ctx, realtimeSessionUpstreamPath(endpointType), schemas.RealtimeRequest))
|
||||
req.Header.SetMethod(http.MethodPost)
|
||||
req.Header.SetContentType("application/json")
|
||||
for k, v := range provider.realtimeSessionHeaders(key, endpointType) {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
req.SetBody(normalizedBody)
|
||||
|
||||
latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
|
||||
defer wait()
|
||||
if bifrostErr != nil {
|
||||
return nil, bifrostErr
|
||||
}
|
||||
|
||||
headers := providerUtils.ExtractProviderResponseHeaders(resp)
|
||||
ctx.SetValue(schemas.BifrostContextKeyProviderResponseHeaders, headers)
|
||||
|
||||
if resp.StatusCode() < fasthttp.StatusOK || resp.StatusCode() >= fasthttp.StatusMultipleChoices {
|
||||
return nil, ParseOpenAIError(resp)
|
||||
}
|
||||
|
||||
body, err := providerUtils.CheckAndDecodeBody(resp)
|
||||
if err != nil {
|
||||
return nil, providerUtils.NewBifrostOperationError("failed to decode response body", err)
|
||||
}
|
||||
for k := range headers {
|
||||
if strings.EqualFold(k, "Content-Encoding") || strings.EqualFold(k, "Content-Length") {
|
||||
delete(headers, k)
|
||||
}
|
||||
}
|
||||
|
||||
out := &schemas.BifrostPassthroughResponse{
|
||||
StatusCode: resp.StatusCode(),
|
||||
Headers: headers,
|
||||
Body: body,
|
||||
}
|
||||
out.ExtraFields.Provider = provider.GetProviderKey()
|
||||
out.ExtraFields.OriginalModelRequested = requestedModel
|
||||
out.ExtraFields.RequestType = schemas.RealtimeRequest
|
||||
out.ExtraFields.Latency = latency.Milliseconds()
|
||||
if providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest) {
|
||||
providerUtils.ParseAndSetRawRequestIfJSON(req, &out.ExtraFields)
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func normalizeRealtimeClientSecretRequest(
|
||||
rawRequest json.RawMessage,
|
||||
defaultProvider schemas.ModelProvider,
|
||||
endpointType schemas.RealtimeSessionEndpointType,
|
||||
) ([]byte, string, *schemas.BifrostError) {
|
||||
root, bifrostErr := schemas.ParseRealtimeClientSecretBody(rawRequest)
|
||||
if bifrostErr != nil {
|
||||
return nil, "", bifrostErr
|
||||
}
|
||||
|
||||
modelValue, bifrostErr := schemas.ExtractRealtimeClientSecretModel(root)
|
||||
if bifrostErr != nil {
|
||||
return nil, "", bifrostErr
|
||||
}
|
||||
providerKey, normalizedModel := schemas.ParseModelString(modelValue, defaultProvider)
|
||||
if normalizedModel == "" {
|
||||
return nil, "", newRealtimeClientSecretError(fasthttp.StatusBadRequest, "invalid_request_error", "session.model is required", nil)
|
||||
}
|
||||
if providerKey == "" {
|
||||
providerKey = defaultProvider
|
||||
}
|
||||
if providerKey == "" {
|
||||
return nil, "", newRealtimeClientSecretError(fasthttp.StatusBadRequest, "invalid_request_error", "unable to determine provider from model", nil)
|
||||
}
|
||||
|
||||
if endpointType == schemas.RealtimeSessionEndpointSessions {
|
||||
return normalizeRealtimeSessionsRequest(root, normalizedModel)
|
||||
}
|
||||
|
||||
return normalizeRealtimeClientSecretsRequest(root, normalizedModel)
|
||||
}
|
||||
|
||||
func normalizeRealtimeClientSecretsRequest(
|
||||
root map[string]json.RawMessage,
|
||||
normalizedModel string,
|
||||
) ([]byte, string, *schemas.BifrostError) {
|
||||
session := map[string]json.RawMessage{}
|
||||
if existingSession, ok := root["session"]; ok && len(existingSession) > 0 && !bytes.Equal(existingSession, []byte("null")) {
|
||||
if err := json.Unmarshal(existingSession, &session); err != nil {
|
||||
return nil, "", newRealtimeClientSecretError(fasthttp.StatusBadRequest, "invalid_request_error", "session must be an object", err)
|
||||
}
|
||||
}
|
||||
|
||||
modelJSON, marshalErr := json.Marshal(normalizedModel)
|
||||
if marshalErr != nil {
|
||||
return nil, "", newRealtimeClientSecretError(fasthttp.StatusInternalServerError, "server_error", "failed to encode normalized model", marshalErr)
|
||||
}
|
||||
session["model"] = modelJSON
|
||||
if _, ok := session["type"]; !ok {
|
||||
typeJSON, marshalErr := json.Marshal("realtime")
|
||||
if marshalErr != nil {
|
||||
return nil, "", newRealtimeClientSecretError(fasthttp.StatusInternalServerError, "server_error", "failed to encode realtime session type", marshalErr)
|
||||
}
|
||||
session["type"] = typeJSON
|
||||
}
|
||||
delete(root, "model")
|
||||
|
||||
sessionJSON, marshalErr := json.Marshal(session)
|
||||
if marshalErr != nil {
|
||||
return nil, "", newRealtimeClientSecretError(fasthttp.StatusInternalServerError, "server_error", "failed to encode realtime session", marshalErr)
|
||||
}
|
||||
root["session"] = sessionJSON
|
||||
|
||||
normalizedBody, marshalErr := json.Marshal(root)
|
||||
if marshalErr != nil {
|
||||
return nil, "", newRealtimeClientSecretError(fasthttp.StatusInternalServerError, "server_error", "failed to encode realtime request", marshalErr)
|
||||
}
|
||||
|
||||
return normalizedBody, normalizedModel, nil
|
||||
}
|
||||
|
||||
func normalizeRealtimeSessionsRequest(
|
||||
root map[string]json.RawMessage,
|
||||
normalizedModel string,
|
||||
) ([]byte, string, *schemas.BifrostError) {
|
||||
if existingSession, ok := root["session"]; ok && len(existingSession) > 0 && !bytes.Equal(existingSession, []byte("null")) {
|
||||
session := map[string]json.RawMessage{}
|
||||
if err := json.Unmarshal(existingSession, &session); err != nil {
|
||||
return nil, "", newRealtimeClientSecretError(fasthttp.StatusBadRequest, "invalid_request_error", "session must be an object", err)
|
||||
}
|
||||
for key, value := range session {
|
||||
if _, exists := root[key]; !exists {
|
||||
root[key] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
modelJSON, marshalErr := json.Marshal(normalizedModel)
|
||||
if marshalErr != nil {
|
||||
return nil, "", newRealtimeClientSecretError(fasthttp.StatusInternalServerError, "server_error", "failed to encode normalized model", marshalErr)
|
||||
}
|
||||
root["model"] = modelJSON
|
||||
delete(root, "session")
|
||||
|
||||
normalizedBody, marshalErr := json.Marshal(root)
|
||||
if marshalErr != nil {
|
||||
return nil, "", newRealtimeClientSecretError(fasthttp.StatusInternalServerError, "server_error", "failed to encode realtime request", marshalErr)
|
||||
}
|
||||
|
||||
return normalizedBody, normalizedModel, nil
|
||||
}
|
||||
|
||||
func (provider *OpenAIProvider) realtimeSessionHeaders(
|
||||
key schemas.Key,
|
||||
endpointType schemas.RealtimeSessionEndpointType,
|
||||
) map[string]string {
|
||||
headers := map[string]string{
|
||||
"Authorization": "Bearer " + key.Value.GetValue(),
|
||||
}
|
||||
if endpointType == schemas.RealtimeSessionEndpointSessions {
|
||||
headers["OpenAI-Beta"] = "realtime=v1"
|
||||
}
|
||||
for k, v := range provider.networkConfig.ExtraHeaders {
|
||||
headers[k] = v
|
||||
}
|
||||
return headers
|
||||
}
|
||||
|
||||
func realtimeSessionUpstreamPath(endpointType schemas.RealtimeSessionEndpointType) string {
|
||||
if endpointType == schemas.RealtimeSessionEndpointSessions {
|
||||
return "/v1/realtime/sessions"
|
||||
}
|
||||
return "/v1/realtime/client_secrets"
|
||||
}
|
||||
|
||||
func newRealtimeClientSecretError(status int, errorType, message string, err error) *schemas.BifrostError {
|
||||
return &schemas.BifrostError{
|
||||
IsBifrostError: false,
|
||||
StatusCode: schemas.Ptr(status),
|
||||
Error: &schemas.ErrorField{
|
||||
Type: schemas.Ptr(errorType),
|
||||
Message: message,
|
||||
Error: err,
|
||||
},
|
||||
ExtraFields: schemas.BifrostErrorExtraFields{
|
||||
RequestType: schemas.RealtimeRequest,
|
||||
Provider: schemas.OpenAI,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// openAIRealtimeEvent is the raw shape of an OpenAI Realtime protocol event.
|
||||
type openAIRealtimeEvent struct {
|
||||
Type string `json:"type"`
|
||||
EventID string `json:"event_id,omitempty"`
|
||||
Session json.RawMessage `json:"session,omitempty"`
|
||||
Conversation json.RawMessage `json:"conversation,omitempty"`
|
||||
Item json.RawMessage `json:"item,omitempty"`
|
||||
Response json.RawMessage `json:"response,omitempty"`
|
||||
Part json.RawMessage `json:"part,omitempty"`
|
||||
Delta string `json:"delta,omitempty"`
|
||||
Audio string `json:"audio,omitempty"`
|
||||
Transcript string `json:"transcript,omitempty"`
|
||||
Text string `json:"text,omitempty"`
|
||||
Error json.RawMessage `json:"error,omitempty"`
|
||||
ItemID string `json:"item_id,omitempty"`
|
||||
OutputIndex *int `json:"output_index,omitempty"`
|
||||
ContentIndex *int `json:"content_index,omitempty"`
|
||||
ResponseID string `json:"response_id,omitempty"`
|
||||
AudioEndMS *int `json:"audio_end_ms,omitempty"`
|
||||
|
||||
PreviousItemID string `json:"previous_item_id,omitempty"`
|
||||
}
|
||||
|
||||
// openAIRealtimeSession is the session object within an OpenAI Realtime event.
|
||||
type openAIRealtimeSession struct {
|
||||
ID string `json:"id,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Modalities []string `json:"modalities,omitempty"`
|
||||
Instructions string `json:"instructions,omitempty"`
|
||||
Voice string `json:"voice,omitempty"`
|
||||
Temperature *float64 `json:"temperature,omitempty"`
|
||||
MaxOutputTokens json.RawMessage `json:"max_output_tokens,omitempty"`
|
||||
TurnDetection json.RawMessage `json:"turn_detection,omitempty"`
|
||||
InputAudioFormat string `json:"input_audio_format,omitempty"`
|
||||
OutputAudioType string `json:"output_audio_type,omitempty"`
|
||||
Tools json.RawMessage `json:"tools,omitempty"`
|
||||
}
|
||||
|
||||
// openAIRealtimeItem is the item object within an OpenAI Realtime event.
|
||||
type openAIRealtimeItem struct {
|
||||
ID string `json:"id,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Role string `json:"role,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
Content json.RawMessage `json:"content,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
CallID string `json:"call_id,omitempty"`
|
||||
Arguments string `json:"arguments,omitempty"`
|
||||
Output string `json:"output,omitempty"`
|
||||
}
|
||||
|
||||
// openAIRealtimeError is the error object within an OpenAI Realtime event.
|
||||
type openAIRealtimeError struct {
|
||||
Type string `json:"type,omitempty"`
|
||||
Code string `json:"code,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
Param string `json:"param,omitempty"`
|
||||
}
|
||||
|
||||
// ToBifrostRealtimeEvent converts an OpenAI Realtime event (raw JSON) to the unified Bifrost format.
|
||||
func (provider *OpenAIProvider) ToBifrostRealtimeEvent(providerEvent json.RawMessage) (*schemas.BifrostRealtimeEvent, error) {
|
||||
var raw openAIRealtimeEvent
|
||||
if err := json.Unmarshal(providerEvent, &raw); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal OpenAI realtime event: %w", err)
|
||||
}
|
||||
|
||||
event := &schemas.BifrostRealtimeEvent{
|
||||
Type: schemas.RealtimeEventType(raw.Type),
|
||||
EventID: raw.EventID,
|
||||
RawData: providerEvent,
|
||||
}
|
||||
setRealtimeExtraParam(event, "item_id", raw.ItemID)
|
||||
setRealtimeExtraParam(event, "previous_item_id", raw.PreviousItemID)
|
||||
setRealtimeExtraParam(event, "output_index", raw.OutputIndex)
|
||||
setRealtimeExtraParam(event, "content_index", raw.ContentIndex)
|
||||
setRealtimeExtraParam(event, "response_id", raw.ResponseID)
|
||||
setRealtimeExtraParam(event, "audio_end_ms", raw.AudioEndMS)
|
||||
setRealtimeExtraParam(event, "transcript", raw.Transcript)
|
||||
setRealtimeExtraParam(event, "text", raw.Text)
|
||||
setRealtimeExtraParam(event, "conversation", raw.Conversation)
|
||||
setRealtimeExtraParam(event, "response", raw.Response)
|
||||
setRealtimeExtraParam(event, "part", raw.Part)
|
||||
|
||||
switch {
|
||||
case raw.Session != nil:
|
||||
var sess openAIRealtimeSession
|
||||
if err := json.Unmarshal(raw.Session, &sess); err == nil {
|
||||
event.Session = &schemas.RealtimeSession{
|
||||
ID: sess.ID,
|
||||
Model: sess.Model,
|
||||
Modalities: sess.Modalities,
|
||||
Instructions: sess.Instructions,
|
||||
Voice: sess.Voice,
|
||||
Temperature: sess.Temperature,
|
||||
MaxOutputTokens: sess.MaxOutputTokens,
|
||||
TurnDetection: sess.TurnDetection,
|
||||
InputAudioFormat: sess.InputAudioFormat,
|
||||
OutputAudioType: sess.OutputAudioType,
|
||||
Tools: sess.Tools,
|
||||
}
|
||||
if extra := extractRealtimeNestedParams(raw.Session, "id", "model", "modalities", "instructions", "voice", "temperature", "max_output_tokens", "turn_detection", "input_audio_format", "output_audio_type", "tools"); len(extra) > 0 {
|
||||
event.Session.ExtraParams = extra
|
||||
}
|
||||
}
|
||||
case raw.Item != nil:
|
||||
var item openAIRealtimeItem
|
||||
if err := json.Unmarshal(raw.Item, &item); err == nil {
|
||||
event.Item = &schemas.RealtimeItem{
|
||||
ID: item.ID,
|
||||
Type: item.Type,
|
||||
Role: item.Role,
|
||||
Status: item.Status,
|
||||
Content: item.Content,
|
||||
Name: item.Name,
|
||||
CallID: item.CallID,
|
||||
Arguments: item.Arguments,
|
||||
Output: item.Output,
|
||||
}
|
||||
if extra := extractRealtimeNestedParams(raw.Item, "id", "type", "role", "status", "content", "name", "call_id", "arguments", "output"); len(extra) > 0 {
|
||||
event.Item.ExtraParams = extra
|
||||
}
|
||||
}
|
||||
|
||||
case raw.Error != nil:
|
||||
var rtErr openAIRealtimeError
|
||||
if err := json.Unmarshal(raw.Error, &rtErr); err == nil {
|
||||
event.Error = &schemas.RealtimeError{
|
||||
Type: rtErr.Type,
|
||||
Code: rtErr.Code,
|
||||
Message: rtErr.Message,
|
||||
Param: rtErr.Param,
|
||||
}
|
||||
if extra := extractRealtimeNestedParams(raw.Error, "type", "code", "message", "param"); len(extra) > 0 {
|
||||
event.Error.ExtraParams = extra
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isRealtimeDeltaEvent(raw.Type) {
|
||||
event.Delta = &schemas.RealtimeDelta{
|
||||
Text: raw.Text,
|
||||
Audio: raw.Audio,
|
||||
Transcript: raw.Transcript,
|
||||
ItemID: raw.ItemID,
|
||||
OutputIdx: raw.OutputIndex,
|
||||
ContentIdx: raw.ContentIndex,
|
||||
ResponseID: raw.ResponseID,
|
||||
}
|
||||
if raw.Delta != "" {
|
||||
if event.Delta.Text == "" {
|
||||
event.Delta.Text = raw.Delta
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return event, nil
|
||||
}
|
||||
|
||||
// ToProviderRealtimeEvent converts a unified Bifrost Realtime event back to OpenAI's native JSON.
|
||||
func (provider *OpenAIProvider) ToProviderRealtimeEvent(bifrostEvent *schemas.BifrostRealtimeEvent) (json.RawMessage, error) {
|
||||
out := map[string]interface{}{
|
||||
"type": string(bifrostEvent.Type),
|
||||
}
|
||||
if bifrostEvent.EventID != "" {
|
||||
out["event_id"] = bifrostEvent.EventID
|
||||
}
|
||||
mergeRealtimeExtraParams(out, bifrostEvent.ExtraParams)
|
||||
|
||||
if bifrostEvent.Session != nil {
|
||||
sess := map[string]interface{}{}
|
||||
if bifrostEvent.Session.ID != "" && bifrostEvent.Type != schemas.RTEventSessionUpdate {
|
||||
sess["id"] = bifrostEvent.Session.ID
|
||||
}
|
||||
if bifrostEvent.Session.Model != "" {
|
||||
sess["model"] = bifrostEvent.Session.Model
|
||||
}
|
||||
if len(bifrostEvent.Session.Modalities) > 0 {
|
||||
sess["modalities"] = bifrostEvent.Session.Modalities
|
||||
}
|
||||
if bifrostEvent.Session.Instructions != "" {
|
||||
sess["instructions"] = bifrostEvent.Session.Instructions
|
||||
}
|
||||
if bifrostEvent.Session.Voice != "" {
|
||||
sess["voice"] = bifrostEvent.Session.Voice
|
||||
}
|
||||
if bifrostEvent.Session.Temperature != nil {
|
||||
sess["temperature"] = *bifrostEvent.Session.Temperature
|
||||
}
|
||||
if bifrostEvent.Session.MaxOutputTokens != nil {
|
||||
sess["max_output_tokens"] = bifrostEvent.Session.MaxOutputTokens
|
||||
}
|
||||
if bifrostEvent.Session.TurnDetection != nil {
|
||||
sess["turn_detection"] = bifrostEvent.Session.TurnDetection
|
||||
}
|
||||
if bifrostEvent.Session.InputAudioFormat != "" {
|
||||
sess["input_audio_format"] = bifrostEvent.Session.InputAudioFormat
|
||||
}
|
||||
if bifrostEvent.Session.OutputAudioType != "" {
|
||||
sess["output_audio_type"] = bifrostEvent.Session.OutputAudioType
|
||||
}
|
||||
if bifrostEvent.Session.Tools != nil {
|
||||
sess["tools"] = bifrostEvent.Session.Tools
|
||||
}
|
||||
mergeRealtimeSessionExtraParams(sess, bifrostEvent.Session.ExtraParams, bifrostEvent.Type)
|
||||
out["session"] = sess
|
||||
}
|
||||
|
||||
if bifrostEvent.Item != nil {
|
||||
item := map[string]interface{}{
|
||||
"type": bifrostEvent.Item.Type,
|
||||
}
|
||||
if bifrostEvent.Item.ID != "" {
|
||||
item["id"] = bifrostEvent.Item.ID
|
||||
}
|
||||
if bifrostEvent.Item.Role != "" {
|
||||
item["role"] = bifrostEvent.Item.Role
|
||||
}
|
||||
if bifrostEvent.Item.Status != "" {
|
||||
item["status"] = bifrostEvent.Item.Status
|
||||
}
|
||||
if bifrostEvent.Item.Content != nil {
|
||||
item["content"] = bifrostEvent.Item.Content
|
||||
}
|
||||
if bifrostEvent.Item.Name != "" {
|
||||
item["name"] = bifrostEvent.Item.Name
|
||||
}
|
||||
if bifrostEvent.Item.CallID != "" {
|
||||
item["call_id"] = bifrostEvent.Item.CallID
|
||||
}
|
||||
if bifrostEvent.Item.Arguments != "" {
|
||||
item["arguments"] = bifrostEvent.Item.Arguments
|
||||
}
|
||||
if bifrostEvent.Item.Output != "" {
|
||||
item["output"] = bifrostEvent.Item.Output
|
||||
}
|
||||
mergeRealtimeExtraParams(item, bifrostEvent.Item.ExtraParams)
|
||||
out["item"] = item
|
||||
}
|
||||
|
||||
if bifrostEvent.Error != nil {
|
||||
rtErr := map[string]interface{}{}
|
||||
if bifrostEvent.Error.Type != "" {
|
||||
rtErr["type"] = bifrostEvent.Error.Type
|
||||
}
|
||||
if bifrostEvent.Error.Code != "" {
|
||||
rtErr["code"] = bifrostEvent.Error.Code
|
||||
}
|
||||
if bifrostEvent.Error.Message != "" {
|
||||
rtErr["message"] = bifrostEvent.Error.Message
|
||||
}
|
||||
if bifrostEvent.Error.Param != "" {
|
||||
rtErr["param"] = bifrostEvent.Error.Param
|
||||
}
|
||||
mergeRealtimeExtraParams(rtErr, bifrostEvent.Error.ExtraParams)
|
||||
out["error"] = rtErr
|
||||
}
|
||||
|
||||
if bifrostEvent.Delta != nil {
|
||||
if bifrostEvent.Delta.Text != "" {
|
||||
out["delta"] = bifrostEvent.Delta.Text
|
||||
}
|
||||
if bifrostEvent.Delta.Audio != "" {
|
||||
out["audio"] = bifrostEvent.Delta.Audio
|
||||
}
|
||||
if bifrostEvent.Delta.Transcript != "" {
|
||||
out["transcript"] = bifrostEvent.Delta.Transcript
|
||||
}
|
||||
if bifrostEvent.Delta.ItemID != "" && !hasRealtimeExtraParam(bifrostEvent.ExtraParams, "item_id") {
|
||||
out["item_id"] = bifrostEvent.Delta.ItemID
|
||||
}
|
||||
if bifrostEvent.Delta.OutputIdx != nil && !hasRealtimeExtraParam(bifrostEvent.ExtraParams, "output_index") {
|
||||
out["output_index"] = *bifrostEvent.Delta.OutputIdx
|
||||
}
|
||||
if bifrostEvent.Delta.ContentIdx != nil && !hasRealtimeExtraParam(bifrostEvent.ExtraParams, "content_index") {
|
||||
out["content_index"] = *bifrostEvent.Delta.ContentIdx
|
||||
}
|
||||
if bifrostEvent.Delta.ResponseID != "" && !hasRealtimeExtraParam(bifrostEvent.ExtraParams, "response_id") {
|
||||
out["response_id"] = bifrostEvent.Delta.ResponseID
|
||||
}
|
||||
}
|
||||
|
||||
return providerUtils.MarshalSorted(out)
|
||||
}
|
||||
|
||||
func mergeRealtimeSessionExtraParams(out map[string]interface{}, params map[string]json.RawMessage, eventType schemas.RealtimeEventType) {
|
||||
filtered := params
|
||||
if eventType == schemas.RTEventSessionUpdate && len(params) > 0 {
|
||||
filtered = make(map[string]json.RawMessage, len(params))
|
||||
for key, value := range params {
|
||||
switch key {
|
||||
case "id", "object", "expires_at", "client_secret":
|
||||
continue
|
||||
default:
|
||||
filtered[key] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
mergeRealtimeExtraParams(out, filtered)
|
||||
}
|
||||
|
||||
func (provider *OpenAIProvider) ExtractRealtimeTurnUsage(terminalEventRaw []byte) *schemas.BifrostLLMUsage {
|
||||
if len(terminalEventRaw) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var parsed openAIRealtimeResponseDoneEnvelope
|
||||
if err := json.Unmarshal(terminalEventRaw, &parsed); err != nil || parsed.Response.Usage == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
usage := &schemas.BifrostLLMUsage{
|
||||
PromptTokens: parsed.Response.Usage.InputTokens,
|
||||
CompletionTokens: parsed.Response.Usage.OutputTokens,
|
||||
TotalTokens: parsed.Response.Usage.TotalTokens,
|
||||
}
|
||||
|
||||
if parsed.Response.Usage.InputTokenDetails != nil {
|
||||
usage.PromptTokensDetails = &schemas.ChatPromptTokensDetails{
|
||||
TextTokens: parsed.Response.Usage.InputTokenDetails.TextTokens,
|
||||
AudioTokens: parsed.Response.Usage.InputTokenDetails.AudioTokens,
|
||||
ImageTokens: parsed.Response.Usage.InputTokenDetails.ImageTokens,
|
||||
CachedReadTokens: parsed.Response.Usage.InputTokenDetails.CachedTokens,
|
||||
}
|
||||
}
|
||||
|
||||
if parsed.Response.Usage.OutputTokenDetails != nil {
|
||||
usage.CompletionTokensDetails = &schemas.ChatCompletionTokensDetails{
|
||||
TextTokens: parsed.Response.Usage.OutputTokenDetails.TextTokens,
|
||||
AudioTokens: parsed.Response.Usage.OutputTokenDetails.AudioTokens,
|
||||
ReasoningTokens: parsed.Response.Usage.OutputTokenDetails.ReasoningTokens,
|
||||
ImageTokens: parsed.Response.Usage.OutputTokenDetails.ImageTokens,
|
||||
CitationTokens: parsed.Response.Usage.OutputTokenDetails.CitationTokens,
|
||||
NumSearchQueries: parsed.Response.Usage.OutputTokenDetails.NumSearchQueries,
|
||||
AcceptedPredictionTokens: parsed.Response.Usage.OutputTokenDetails.AcceptedPredictionTokens,
|
||||
RejectedPredictionTokens: parsed.Response.Usage.OutputTokenDetails.RejectedPredictionTokens,
|
||||
}
|
||||
}
|
||||
|
||||
return usage
|
||||
}
|
||||
|
||||
func (provider *OpenAIProvider) ExtractRealtimeTurnOutput(terminalEventRaw []byte) *schemas.ChatMessage {
|
||||
if len(terminalEventRaw) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var parsed openAIRealtimeResponseDoneEnvelope
|
||||
if err := json.Unmarshal(terminalEventRaw, &parsed); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
content := extractOpenAIRealtimeResponseDoneAssistantText(parsed.Response.Output)
|
||||
toolCalls := extractOpenAIRealtimeResponseDoneToolCalls(parsed.Response.Output)
|
||||
if content == "" && len(toolCalls) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
message := &schemas.ChatMessage{Role: schemas.ChatMessageRoleAssistant}
|
||||
if content != "" {
|
||||
message.Content = &schemas.ChatMessageContent{ContentStr: schemas.Ptr(content)}
|
||||
}
|
||||
if len(toolCalls) > 0 {
|
||||
message.ChatAssistantMessage = &schemas.ChatAssistantMessage{ToolCalls: toolCalls}
|
||||
}
|
||||
|
||||
return message
|
||||
}
|
||||
|
||||
type openAIRealtimeResponseDoneEnvelope struct {
|
||||
Response struct {
|
||||
Output []openAIRealtimeResponseDoneOutput `json:"output"`
|
||||
Usage *openAIRealtimeResponseDoneUsage `json:"usage"`
|
||||
} `json:"response"`
|
||||
}
|
||||
|
||||
type openAIRealtimeResponseDoneOutput struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name"`
|
||||
CallID string `json:"call_id"`
|
||||
Arguments string `json:"arguments"`
|
||||
Content []openAIRealtimeResponseDoneBlock `json:"content"`
|
||||
}
|
||||
|
||||
type openAIRealtimeResponseDoneBlock struct {
|
||||
Text string `json:"text"`
|
||||
Transcript string `json:"transcript"`
|
||||
Refusal string `json:"refusal"`
|
||||
}
|
||||
|
||||
type openAIRealtimeResponseDoneUsage struct {
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
InputTokens int `json:"input_tokens"`
|
||||
OutputTokens int `json:"output_tokens"`
|
||||
InputTokenDetails *openAIRealtimeResponseDoneInputTokenUsage `json:"input_token_details"`
|
||||
OutputTokenDetails *openAIRealtimeResponseDoneOutputTokenUsage `json:"output_token_details"`
|
||||
}
|
||||
|
||||
type openAIRealtimeResponseDoneInputTokenUsage struct {
|
||||
TextTokens int `json:"text_tokens"`
|
||||
AudioTokens int `json:"audio_tokens"`
|
||||
ImageTokens int `json:"image_tokens"`
|
||||
CachedTokens int `json:"cached_tokens"`
|
||||
}
|
||||
|
||||
type openAIRealtimeResponseDoneOutputTokenUsage struct {
|
||||
TextTokens int `json:"text_tokens"`
|
||||
AudioTokens int `json:"audio_tokens"`
|
||||
ReasoningTokens int `json:"reasoning_tokens"`
|
||||
ImageTokens *int `json:"image_tokens"`
|
||||
CitationTokens *int `json:"citation_tokens"`
|
||||
NumSearchQueries *int `json:"num_search_queries"`
|
||||
AcceptedPredictionTokens int `json:"accepted_prediction_tokens"`
|
||||
RejectedPredictionTokens int `json:"rejected_prediction_tokens"`
|
||||
}
|
||||
|
||||
func extractOpenAIRealtimeResponseDoneAssistantText(outputs []openAIRealtimeResponseDoneOutput) string {
|
||||
var sb strings.Builder
|
||||
for _, output := range outputs {
|
||||
if output.Type != "message" {
|
||||
continue
|
||||
}
|
||||
for _, block := range output.Content {
|
||||
switch {
|
||||
case strings.TrimSpace(block.Text) != "":
|
||||
sb.WriteString(block.Text)
|
||||
case strings.TrimSpace(block.Transcript) != "":
|
||||
sb.WriteString(block.Transcript)
|
||||
case strings.TrimSpace(block.Refusal) != "":
|
||||
sb.WriteString(block.Refusal)
|
||||
}
|
||||
}
|
||||
}
|
||||
return strings.TrimSpace(sb.String())
|
||||
}
|
||||
|
||||
func extractOpenAIRealtimeResponseDoneToolCalls(outputs []openAIRealtimeResponseDoneOutput) []schemas.ChatAssistantMessageToolCall {
|
||||
toolCalls := make([]schemas.ChatAssistantMessageToolCall, 0)
|
||||
for _, output := range outputs {
|
||||
if output.Type != "function_call" {
|
||||
continue
|
||||
}
|
||||
|
||||
name := strings.TrimSpace(output.Name)
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
toolType := "function"
|
||||
id := strings.TrimSpace(output.CallID)
|
||||
if id == "" {
|
||||
id = strings.TrimSpace(output.ID)
|
||||
}
|
||||
|
||||
toolCall := schemas.ChatAssistantMessageToolCall{
|
||||
Index: uint16(len(toolCalls)),
|
||||
Type: &toolType,
|
||||
Function: schemas.ChatAssistantMessageToolCallFunction{
|
||||
Name: schemas.Ptr(name),
|
||||
Arguments: output.Arguments,
|
||||
},
|
||||
}
|
||||
if id != "" {
|
||||
toolCall.ID = schemas.Ptr(id)
|
||||
}
|
||||
|
||||
toolCalls = append(toolCalls, toolCall)
|
||||
}
|
||||
return toolCalls
|
||||
}
|
||||
|
||||
func setRealtimeExtraParam(event *schemas.BifrostRealtimeEvent, key string, value any) {
|
||||
if event == nil || key == "" || value == nil {
|
||||
return
|
||||
}
|
||||
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
if v == "" {
|
||||
return
|
||||
}
|
||||
case *int:
|
||||
if v == nil {
|
||||
return
|
||||
}
|
||||
case json.RawMessage:
|
||||
if len(v) == 0 || string(v) == "null" {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
raw, err := json.Marshal(value)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if event.ExtraParams == nil {
|
||||
event.ExtraParams = make(map[string]json.RawMessage)
|
||||
}
|
||||
event.ExtraParams[key] = raw
|
||||
}
|
||||
|
||||
func mergeRealtimeExtraParams(out map[string]interface{}, params map[string]json.RawMessage) {
|
||||
for key, raw := range params {
|
||||
if len(raw) == 0 {
|
||||
continue
|
||||
}
|
||||
var value any
|
||||
if err := json.Unmarshal(raw, &value); err != nil {
|
||||
continue
|
||||
}
|
||||
out[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
func hasRealtimeExtraParam(params map[string]json.RawMessage, key string) bool {
|
||||
if params == nil {
|
||||
return false
|
||||
}
|
||||
raw, ok := params[key]
|
||||
return ok && len(raw) > 0
|
||||
}
|
||||
|
||||
func extractRealtimeNestedParams(raw json.RawMessage, knownKeys ...string) map[string]json.RawMessage {
|
||||
if len(raw) == 0 {
|
||||
return nil
|
||||
}
|
||||
root := map[string]json.RawMessage{}
|
||||
if err := json.Unmarshal(raw, &root); err != nil {
|
||||
return nil
|
||||
}
|
||||
for _, key := range knownKeys {
|
||||
delete(root, key)
|
||||
}
|
||||
if len(root) == 0 {
|
||||
return nil
|
||||
}
|
||||
return root
|
||||
}
|
||||
|
||||
func isRealtimeDeltaEvent(eventType string) bool {
|
||||
switch eventType {
|
||||
case "response.text.delta",
|
||||
"response.output_text.delta",
|
||||
"response.audio.delta",
|
||||
"response.output_audio.delta",
|
||||
"response.audio_transcript.delta",
|
||||
"response.output_audio_transcript.delta",
|
||||
"conversation.item.input_audio_transcription.delta":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user