988 lines
36 KiB
Go
988 lines
36 KiB
Go
package streaming
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/bytedance/sonic"
|
|
bifrost "github.com/maximhq/bifrost/core"
|
|
"github.com/maximhq/bifrost/core/schemas"
|
|
"github.com/maximhq/bifrost/framework/modelcatalog"
|
|
)
|
|
|
|
// deepCopyResponsesStreamResponse creates a deep copy of BifrostResponsesStreamResponse
|
|
// to prevent shared data mutation between different plugin accumulators
|
|
func deepCopyResponsesStreamResponse(original *schemas.BifrostResponsesStreamResponse) *schemas.BifrostResponsesStreamResponse {
|
|
if original == nil {
|
|
return nil
|
|
}
|
|
|
|
copy := &schemas.BifrostResponsesStreamResponse{
|
|
Type: original.Type,
|
|
SequenceNumber: original.SequenceNumber,
|
|
ExtraFields: original.ExtraFields, // ExtraFields can be safely shared as they're typically read-only
|
|
}
|
|
|
|
// Deep copy Response if present
|
|
if original.Response != nil {
|
|
copy.Response = &schemas.BifrostResponsesResponse{}
|
|
*copy.Response = *original.Response // Shallow copy the struct
|
|
|
|
// Deep copy the Output slice if present
|
|
if original.Response.Output != nil {
|
|
copy.Response.Output = make([]schemas.ResponsesMessage, len(original.Response.Output))
|
|
for i, msg := range original.Response.Output {
|
|
copy.Response.Output[i] = deepCopyResponsesMessage(msg)
|
|
}
|
|
}
|
|
|
|
// Copy Usage if present (Usage can be shallow copied as it's typically immutable)
|
|
if original.Response.Usage != nil {
|
|
copyUsage := *original.Response.Usage
|
|
copy.Response.Usage = ©Usage
|
|
}
|
|
}
|
|
|
|
// Copy pointer fields
|
|
if original.OutputIndex != nil {
|
|
copyOutputIndex := *original.OutputIndex
|
|
copy.OutputIndex = ©OutputIndex
|
|
}
|
|
|
|
if original.Item != nil {
|
|
copyItem := deepCopyResponsesMessage(*original.Item)
|
|
copy.Item = ©Item
|
|
}
|
|
|
|
if original.ContentIndex != nil {
|
|
copyContentIndex := *original.ContentIndex
|
|
copy.ContentIndex = ©ContentIndex
|
|
}
|
|
|
|
if original.ItemID != nil {
|
|
copyItemID := *original.ItemID
|
|
copy.ItemID = ©ItemID
|
|
}
|
|
|
|
if original.Part != nil {
|
|
copyPart := deepCopyResponsesMessageContentBlock(*original.Part)
|
|
copy.Part = ©Part
|
|
}
|
|
|
|
if original.Delta != nil {
|
|
copyDelta := *original.Delta
|
|
copy.Delta = ©Delta
|
|
}
|
|
|
|
// Deep copy LogProbs slice if present
|
|
if original.LogProbs != nil {
|
|
copy.LogProbs = make([]schemas.ResponsesOutputMessageContentTextLogProb, len(original.LogProbs))
|
|
for i, logProb := range original.LogProbs {
|
|
copiedLogProb := schemas.ResponsesOutputMessageContentTextLogProb{
|
|
LogProb: logProb.LogProb,
|
|
Token: logProb.Token,
|
|
}
|
|
// Deep copy Bytes slice
|
|
if logProb.Bytes != nil {
|
|
copiedLogProb.Bytes = make([]int, len(logProb.Bytes))
|
|
for j, byteValue := range logProb.Bytes {
|
|
copiedLogProb.Bytes[j] = byteValue
|
|
}
|
|
}
|
|
// Deep copy TopLogProbs slice
|
|
if logProb.TopLogProbs != nil {
|
|
copiedLogProb.TopLogProbs = make([]schemas.LogProb, len(logProb.TopLogProbs))
|
|
for j, topLogProb := range logProb.TopLogProbs {
|
|
copiedLogProb.TopLogProbs[j] = schemas.LogProb{
|
|
Bytes: topLogProb.Bytes,
|
|
LogProb: topLogProb.LogProb,
|
|
Token: topLogProb.Token,
|
|
}
|
|
}
|
|
}
|
|
copy.LogProbs[i] = copiedLogProb
|
|
}
|
|
}
|
|
|
|
if original.Text != nil {
|
|
copyText := *original.Text
|
|
copy.Text = ©Text
|
|
}
|
|
|
|
if original.Refusal != nil {
|
|
copyRefusal := *original.Refusal
|
|
copy.Refusal = ©Refusal
|
|
}
|
|
|
|
if original.Arguments != nil {
|
|
copyArguments := *original.Arguments
|
|
copy.Arguments = ©Arguments
|
|
}
|
|
|
|
if original.PartialImageB64 != nil {
|
|
copyPartialImageB64 := *original.PartialImageB64
|
|
copy.PartialImageB64 = ©PartialImageB64
|
|
}
|
|
|
|
if original.PartialImageIndex != nil {
|
|
copyPartialImageIndex := *original.PartialImageIndex
|
|
copy.PartialImageIndex = ©PartialImageIndex
|
|
}
|
|
|
|
if original.Annotation != nil {
|
|
copyAnnotation := *original.Annotation
|
|
copy.Annotation = ©Annotation
|
|
}
|
|
|
|
if original.AnnotationIndex != nil {
|
|
copyAnnotationIndex := *original.AnnotationIndex
|
|
copy.AnnotationIndex = ©AnnotationIndex
|
|
}
|
|
|
|
if original.Code != nil {
|
|
copyCode := *original.Code
|
|
copy.Code = ©Code
|
|
}
|
|
|
|
if original.Message != nil {
|
|
copyMessage := *original.Message
|
|
copy.Message = ©Message
|
|
}
|
|
|
|
if original.Param != nil {
|
|
copyParam := *original.Param
|
|
copy.Param = ©Param
|
|
}
|
|
|
|
return copy
|
|
}
|
|
|
|
// deepCopyResponsesMessage creates a deep copy of a ResponsesMessage
|
|
func deepCopyResponsesMessage(original schemas.ResponsesMessage) schemas.ResponsesMessage {
|
|
copy := schemas.ResponsesMessage{}
|
|
|
|
if original.ID != nil {
|
|
copyID := *original.ID
|
|
copy.ID = ©ID
|
|
}
|
|
|
|
if original.Type != nil {
|
|
copyType := *original.Type
|
|
copy.Type = ©Type
|
|
}
|
|
|
|
if original.Role != nil {
|
|
copyRole := *original.Role
|
|
copy.Role = ©Role
|
|
}
|
|
|
|
if original.Content != nil {
|
|
copy.Content = &schemas.ResponsesMessageContent{}
|
|
|
|
if original.Content.ContentStr != nil {
|
|
copyContentStr := *original.Content.ContentStr
|
|
copy.Content.ContentStr = ©ContentStr
|
|
}
|
|
|
|
if original.Content.ContentBlocks != nil {
|
|
copy.Content.ContentBlocks = make([]schemas.ResponsesMessageContentBlock, len(original.Content.ContentBlocks))
|
|
for i, block := range original.Content.ContentBlocks {
|
|
copy.Content.ContentBlocks[i] = deepCopyResponsesMessageContentBlock(block)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Deep copy ResponsesReasoning if present
|
|
if original.ResponsesReasoning != nil {
|
|
copy.ResponsesReasoning = &schemas.ResponsesReasoning{}
|
|
|
|
// Deep copy Summary slice
|
|
if original.ResponsesReasoning.Summary != nil {
|
|
copy.ResponsesReasoning.Summary = make([]schemas.ResponsesReasoningSummary, len(original.ResponsesReasoning.Summary))
|
|
for i, summary := range original.ResponsesReasoning.Summary {
|
|
copy.ResponsesReasoning.Summary[i] = schemas.ResponsesReasoningSummary{
|
|
Type: summary.Type,
|
|
Text: summary.Text,
|
|
}
|
|
}
|
|
}
|
|
|
|
// Deep copy EncryptedContent if present
|
|
if original.ResponsesReasoning.EncryptedContent != nil {
|
|
copyEncrypted := *original.ResponsesReasoning.EncryptedContent
|
|
copy.ResponsesReasoning.EncryptedContent = ©Encrypted
|
|
}
|
|
}
|
|
|
|
if original.ResponsesToolMessage != nil {
|
|
copy.ResponsesToolMessage = &schemas.ResponsesToolMessage{}
|
|
|
|
// Deep copy primitive fields
|
|
if original.ResponsesToolMessage.CallID != nil {
|
|
copyCallID := *original.ResponsesToolMessage.CallID
|
|
copy.ResponsesToolMessage.CallID = ©CallID
|
|
}
|
|
|
|
if original.ResponsesToolMessage.Name != nil {
|
|
copyName := *original.ResponsesToolMessage.Name
|
|
copy.ResponsesToolMessage.Name = ©Name
|
|
}
|
|
|
|
if original.ResponsesToolMessage.Arguments != nil {
|
|
copyArguments := *original.ResponsesToolMessage.Arguments
|
|
copy.ResponsesToolMessage.Arguments = ©Arguments
|
|
}
|
|
|
|
if original.ResponsesToolMessage.Error != nil {
|
|
copyError := *original.ResponsesToolMessage.Error
|
|
copy.ResponsesToolMessage.Error = ©Error
|
|
}
|
|
|
|
// Deep copy Output
|
|
if original.ResponsesToolMessage.Output != nil {
|
|
copy.ResponsesToolMessage.Output = &schemas.ResponsesToolMessageOutputStruct{}
|
|
|
|
if original.ResponsesToolMessage.Output.ResponsesToolCallOutputStr != nil {
|
|
copyStr := *original.ResponsesToolMessage.Output.ResponsesToolCallOutputStr
|
|
copy.ResponsesToolMessage.Output.ResponsesToolCallOutputStr = ©Str
|
|
}
|
|
|
|
if original.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil {
|
|
copy.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks = make([]schemas.ResponsesMessageContentBlock, len(original.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks))
|
|
for i, block := range original.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks {
|
|
copy.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks[i] = deepCopyResponsesMessageContentBlock(block)
|
|
}
|
|
}
|
|
|
|
if original.ResponsesToolMessage.Output.ResponsesComputerToolCallOutput != nil {
|
|
copyOutput := *original.ResponsesToolMessage.Output.ResponsesComputerToolCallOutput
|
|
copy.ResponsesToolMessage.Output.ResponsesComputerToolCallOutput = ©Output
|
|
}
|
|
}
|
|
|
|
// Deep copy Action
|
|
if original.ResponsesToolMessage.Action != nil {
|
|
copy.ResponsesToolMessage.Action = &schemas.ResponsesToolMessageActionStruct{}
|
|
|
|
if original.ResponsesToolMessage.Action.ResponsesComputerToolCallAction != nil {
|
|
copyAction := *original.ResponsesToolMessage.Action.ResponsesComputerToolCallAction
|
|
// Deep copy Path slice
|
|
if copyAction.Path != nil {
|
|
copyAction.Path = make([]schemas.ResponsesComputerToolCallActionPath, len(copyAction.Path))
|
|
for i, path := range original.ResponsesToolMessage.Action.ResponsesComputerToolCallAction.Path {
|
|
copyAction.Path[i] = path // struct copy is fine for simple structs
|
|
}
|
|
}
|
|
// Deep copy Keys slice
|
|
if copyAction.Keys != nil {
|
|
copyAction.Keys = make([]string, len(copyAction.Keys))
|
|
for i, key := range original.ResponsesToolMessage.Action.ResponsesComputerToolCallAction.Keys {
|
|
copyAction.Keys[i] = key
|
|
}
|
|
}
|
|
copy.ResponsesToolMessage.Action.ResponsesComputerToolCallAction = ©Action
|
|
}
|
|
|
|
if original.ResponsesToolMessage.Action.ResponsesWebSearchToolCallAction != nil {
|
|
copyAction := *original.ResponsesToolMessage.Action.ResponsesWebSearchToolCallAction
|
|
copy.ResponsesToolMessage.Action.ResponsesWebSearchToolCallAction = ©Action
|
|
}
|
|
|
|
if original.ResponsesToolMessage.Action.ResponsesWebFetchToolCallAction != nil {
|
|
copyAction := *original.ResponsesToolMessage.Action.ResponsesWebFetchToolCallAction
|
|
copy.ResponsesToolMessage.Action.ResponsesWebFetchToolCallAction = ©Action
|
|
}
|
|
|
|
if original.ResponsesToolMessage.Action.ResponsesLocalShellToolCallAction != nil {
|
|
copyAction := *original.ResponsesToolMessage.Action.ResponsesLocalShellToolCallAction
|
|
copy.ResponsesToolMessage.Action.ResponsesLocalShellToolCallAction = ©Action
|
|
}
|
|
|
|
if original.ResponsesToolMessage.Action.ResponsesMCPApprovalRequestAction != nil {
|
|
copyAction := *original.ResponsesToolMessage.Action.ResponsesMCPApprovalRequestAction
|
|
copy.ResponsesToolMessage.Action.ResponsesMCPApprovalRequestAction = ©Action
|
|
}
|
|
}
|
|
|
|
// Deep copy embedded tool call structs
|
|
if original.ResponsesToolMessage.ResponsesFileSearchToolCall != nil {
|
|
copyToolCall := *original.ResponsesToolMessage.ResponsesFileSearchToolCall
|
|
// Deep copy Queries slice
|
|
if copyToolCall.Queries != nil {
|
|
copyToolCall.Queries = make([]string, len(copyToolCall.Queries))
|
|
for i, query := range original.ResponsesToolMessage.ResponsesFileSearchToolCall.Queries {
|
|
copyToolCall.Queries[i] = query
|
|
}
|
|
}
|
|
// Deep copy Results slice
|
|
if copyToolCall.Results != nil {
|
|
copyToolCall.Results = make([]schemas.ResponsesFileSearchToolCallResult, len(copyToolCall.Results))
|
|
for i, result := range original.ResponsesToolMessage.ResponsesFileSearchToolCall.Results {
|
|
copyResult := result
|
|
// Deep copy Attributes map if present
|
|
if result.Attributes != nil {
|
|
copyAttrs := make(map[string]any, len(*result.Attributes))
|
|
for k, v := range *result.Attributes {
|
|
copyAttrs[k] = v
|
|
}
|
|
copyResult.Attributes = ©Attrs
|
|
}
|
|
copyToolCall.Results[i] = copyResult
|
|
}
|
|
}
|
|
copy.ResponsesToolMessage.ResponsesFileSearchToolCall = ©ToolCall
|
|
}
|
|
|
|
if original.ResponsesToolMessage.ResponsesComputerToolCall != nil {
|
|
copyToolCall := *original.ResponsesToolMessage.ResponsesComputerToolCall
|
|
// Deep copy PendingSafetyChecks slice
|
|
if copyToolCall.PendingSafetyChecks != nil {
|
|
copyToolCall.PendingSafetyChecks = make([]schemas.ResponsesComputerToolCallPendingSafetyCheck, len(copyToolCall.PendingSafetyChecks))
|
|
for i, check := range original.ResponsesToolMessage.ResponsesComputerToolCall.PendingSafetyChecks {
|
|
copyToolCall.PendingSafetyChecks[i] = check
|
|
}
|
|
}
|
|
copy.ResponsesToolMessage.ResponsesComputerToolCall = ©ToolCall
|
|
}
|
|
|
|
if original.ResponsesToolMessage.ResponsesComputerToolCallOutput != nil {
|
|
copyOutput := *original.ResponsesToolMessage.ResponsesComputerToolCallOutput
|
|
// Deep copy AcknowledgedSafetyChecks slice
|
|
if copyOutput.AcknowledgedSafetyChecks != nil {
|
|
copyOutput.AcknowledgedSafetyChecks = make([]schemas.ResponsesComputerToolCallAcknowledgedSafetyCheck, len(copyOutput.AcknowledgedSafetyChecks))
|
|
for i, check := range original.ResponsesToolMessage.ResponsesComputerToolCallOutput.AcknowledgedSafetyChecks {
|
|
copyOutput.AcknowledgedSafetyChecks[i] = check
|
|
}
|
|
}
|
|
copy.ResponsesToolMessage.ResponsesComputerToolCallOutput = ©Output
|
|
}
|
|
|
|
if original.ResponsesToolMessage.ResponsesCodeInterpreterToolCall != nil {
|
|
copyToolCall := *original.ResponsesToolMessage.ResponsesCodeInterpreterToolCall
|
|
// Deep copy Outputs slice
|
|
if copyToolCall.Outputs != nil {
|
|
copyToolCall.Outputs = make([]schemas.ResponsesCodeInterpreterOutput, len(copyToolCall.Outputs))
|
|
for i, output := range original.ResponsesToolMessage.ResponsesCodeInterpreterToolCall.Outputs {
|
|
copyToolCall.Outputs[i] = output
|
|
}
|
|
}
|
|
copy.ResponsesToolMessage.ResponsesCodeInterpreterToolCall = ©ToolCall
|
|
}
|
|
|
|
if original.ResponsesToolMessage.ResponsesMCPToolCall != nil {
|
|
copyToolCall := *original.ResponsesToolMessage.ResponsesMCPToolCall
|
|
copy.ResponsesToolMessage.ResponsesMCPToolCall = ©ToolCall
|
|
}
|
|
|
|
if original.ResponsesToolMessage.ResponsesCustomToolCall != nil {
|
|
copyToolCall := *original.ResponsesToolMessage.ResponsesCustomToolCall
|
|
copy.ResponsesToolMessage.ResponsesCustomToolCall = ©ToolCall
|
|
}
|
|
|
|
if original.ResponsesToolMessage.ResponsesImageGenerationCall != nil {
|
|
copyCall := *original.ResponsesToolMessage.ResponsesImageGenerationCall
|
|
copy.ResponsesToolMessage.ResponsesImageGenerationCall = ©Call
|
|
}
|
|
|
|
if original.ResponsesToolMessage.ResponsesMCPListTools != nil {
|
|
copyListTools := *original.ResponsesToolMessage.ResponsesMCPListTools
|
|
// Deep copy Tools slice
|
|
if copyListTools.Tools != nil {
|
|
copyListTools.Tools = make([]schemas.ResponsesMCPTool, len(copyListTools.Tools))
|
|
for i, tool := range original.ResponsesToolMessage.ResponsesMCPListTools.Tools {
|
|
copyListTools.Tools[i] = tool
|
|
}
|
|
}
|
|
copy.ResponsesToolMessage.ResponsesMCPListTools = ©ListTools
|
|
}
|
|
|
|
if original.ResponsesToolMessage.ResponsesMCPApprovalResponse != nil {
|
|
copyApproval := *original.ResponsesToolMessage.ResponsesMCPApprovalResponse
|
|
copy.ResponsesToolMessage.ResponsesMCPApprovalResponse = ©Approval
|
|
}
|
|
}
|
|
|
|
return copy
|
|
}
|
|
|
|
// deepCopyResponsesMessageContentBlock creates a deep copy of a ResponsesMessageContentBlock
|
|
func deepCopyResponsesMessageContentBlock(original schemas.ResponsesMessageContentBlock) schemas.ResponsesMessageContentBlock {
|
|
copy := schemas.ResponsesMessageContentBlock{
|
|
Type: original.Type,
|
|
}
|
|
|
|
if original.Text != nil {
|
|
copyText := *original.Text
|
|
copy.Text = ©Text
|
|
}
|
|
|
|
// Copy other specific content type fields as needed
|
|
if original.ResponsesOutputMessageContentText != nil {
|
|
t := *original.ResponsesOutputMessageContentText
|
|
// Annotations
|
|
if t.Annotations != nil {
|
|
t.Annotations = append([]schemas.ResponsesOutputMessageContentTextAnnotation(nil), t.Annotations...)
|
|
}
|
|
// LogProbs (and their inner slices)
|
|
if t.LogProbs != nil {
|
|
newLP := make([]schemas.ResponsesOutputMessageContentTextLogProb, len(t.LogProbs))
|
|
for i := range t.LogProbs {
|
|
lp := t.LogProbs[i]
|
|
if lp.Bytes != nil {
|
|
lp.Bytes = append([]int(nil), lp.Bytes...)
|
|
}
|
|
if lp.TopLogProbs != nil {
|
|
lp.TopLogProbs = append([]schemas.LogProb(nil), lp.TopLogProbs...)
|
|
}
|
|
newLP[i] = lp
|
|
}
|
|
t.LogProbs = newLP
|
|
}
|
|
copy.ResponsesOutputMessageContentText = &t
|
|
}
|
|
|
|
if original.ResponsesOutputMessageContentRefusal != nil {
|
|
copyRefusal := schemas.ResponsesOutputMessageContentRefusal{
|
|
Refusal: original.ResponsesOutputMessageContentRefusal.Refusal,
|
|
}
|
|
copy.ResponsesOutputMessageContentRefusal = ©Refusal
|
|
}
|
|
|
|
return copy
|
|
}
|
|
|
|
// buildCompleteMessageFromResponsesStreamChunks builds complete messages from accumulated responses stream chunks
|
|
func (a *Accumulator) buildCompleteMessageFromResponsesStreamChunks(chunks []*ResponsesStreamChunk) []schemas.ResponsesMessage {
|
|
var messages []schemas.ResponsesMessage
|
|
|
|
// Sort chunks by chunk index to ensure correct processing order
|
|
sort.Slice(chunks, func(i, j int) bool {
|
|
if chunks[i].StreamResponse == nil || chunks[j].StreamResponse == nil {
|
|
return false
|
|
}
|
|
return chunks[i].ChunkIndex < chunks[j].ChunkIndex
|
|
})
|
|
|
|
for _, chunk := range chunks {
|
|
if chunk.StreamResponse == nil {
|
|
continue
|
|
}
|
|
|
|
resp := chunk.StreamResponse
|
|
switch resp.Type {
|
|
case schemas.ResponsesStreamResponseTypeOutputItemAdded:
|
|
// Always append new items - this fixes multiple function calls issue
|
|
// Deep copy to prevent shared pointer mutation when deltas are appended
|
|
if resp.Item != nil {
|
|
messages = append(messages, deepCopyResponsesMessage(*resp.Item))
|
|
}
|
|
|
|
case schemas.ResponsesStreamResponseTypeContentPartAdded:
|
|
// Add content part to the most recent message, create message if none exists
|
|
// Deep copy to prevent shared pointer mutation
|
|
if resp.Part != nil {
|
|
if len(messages) == 0 {
|
|
messages = append(messages, createNewMessage())
|
|
}
|
|
|
|
lastMsg := &messages[len(messages)-1]
|
|
if lastMsg.Content == nil {
|
|
lastMsg.Content = &schemas.ResponsesMessageContent{}
|
|
}
|
|
if lastMsg.Content.ContentBlocks == nil {
|
|
lastMsg.Content.ContentBlocks = make([]schemas.ResponsesMessageContentBlock, 0)
|
|
}
|
|
lastMsg.Content.ContentBlocks = append(lastMsg.Content.ContentBlocks, deepCopyResponsesMessageContentBlock(*resp.Part))
|
|
}
|
|
|
|
case schemas.ResponsesStreamResponseTypeOutputTextDelta:
|
|
if len(messages) == 0 {
|
|
messages = append(messages, createNewMessage())
|
|
}
|
|
// Append text delta to the most recent message
|
|
if resp.Delta != nil && resp.ContentIndex != nil && len(messages) > 0 {
|
|
a.appendTextDeltaToResponsesMessage(&messages[len(messages)-1], *resp.Delta, *resp.ContentIndex)
|
|
}
|
|
|
|
case schemas.ResponsesStreamResponseTypeRefusalDelta:
|
|
if len(messages) == 0 {
|
|
messages = append(messages, createNewMessage())
|
|
}
|
|
// Append refusal delta to the most recent message
|
|
if resp.Refusal != nil && resp.ContentIndex != nil && len(messages) > 0 {
|
|
a.appendRefusalDeltaToResponsesMessage(&messages[len(messages)-1], *resp.Refusal, *resp.ContentIndex)
|
|
}
|
|
|
|
case schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDelta:
|
|
if len(messages) == 0 {
|
|
messages = append(messages, createNewMessage())
|
|
}
|
|
// Deep copy to prevent shared pointer mutation when arguments are appended
|
|
if resp.Item != nil {
|
|
messages = append(messages, deepCopyResponsesMessage(*resp.Item))
|
|
}
|
|
// Route arguments delta to the correct function call message by ItemID,
|
|
// falling back to last message only when no ItemID is present.
|
|
// If ItemID is present but unmatched, create a new stub message to avoid
|
|
// merging parallel tool call argument deltas into the wrong call.
|
|
if resp.Delta != nil && len(messages) > 0 {
|
|
targetIdx := len(messages) - 1
|
|
if resp.ItemID != nil {
|
|
targetIdx = -1
|
|
for i := len(messages) - 1; i >= 0; i-- {
|
|
if messages[i].ID != nil && *messages[i].ID == *resp.ItemID {
|
|
targetIdx = i
|
|
break
|
|
}
|
|
}
|
|
if targetIdx == -1 {
|
|
// ItemID present but no matching message — create a stub to hold the delta
|
|
id := *resp.ItemID
|
|
messages = append(messages, schemas.ResponsesMessage{
|
|
ID: &id,
|
|
})
|
|
targetIdx = len(messages) - 1
|
|
}
|
|
}
|
|
a.appendFunctionArgumentsDeltaToResponsesMessage(&messages[targetIdx], *resp.Delta)
|
|
}
|
|
|
|
case schemas.ResponsesStreamResponseTypeReasoningSummaryTextDelta:
|
|
// Create new reasoning message if none exists, or find existing reasoning message to append delta to
|
|
if (resp.Delta != nil || resp.Signature != nil) && resp.ItemID != nil {
|
|
var targetMessage *schemas.ResponsesMessage
|
|
|
|
// Find the reasoning message by ItemID
|
|
for i := len(messages) - 1; i >= 0; i-- {
|
|
if messages[i].ID != nil && *messages[i].ID == *resp.ItemID {
|
|
targetMessage = &messages[i]
|
|
break
|
|
}
|
|
}
|
|
|
|
// If no message found, create a new reasoning message
|
|
if targetMessage == nil {
|
|
// Deep copy ItemID to prevent shared pointer mutation
|
|
var copyID *string
|
|
if resp.ItemID != nil {
|
|
id := *resp.ItemID
|
|
copyID = &id
|
|
}
|
|
newMessage := schemas.ResponsesMessage{
|
|
ID: copyID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
ResponsesReasoning: &schemas.ResponsesReasoning{
|
|
Summary: []schemas.ResponsesReasoningSummary{},
|
|
},
|
|
}
|
|
messages = append(messages, newMessage)
|
|
targetMessage = &messages[len(messages)-1]
|
|
}
|
|
|
|
// Handle text delta
|
|
if resp.Delta != nil {
|
|
a.appendReasoningDeltaToResponsesMessage(targetMessage, *resp.Delta, resp.ContentIndex)
|
|
}
|
|
|
|
// Handle signature delta
|
|
if resp.Signature != nil {
|
|
a.appendReasoningSignatureToResponsesMessage(targetMessage, *resp.Signature, resp.ContentIndex)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return messages
|
|
}
|
|
|
|
func createNewMessage() schemas.ResponsesMessage {
|
|
return schemas.ResponsesMessage{
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: make([]schemas.ResponsesMessageContentBlock, 0),
|
|
},
|
|
}
|
|
}
|
|
|
|
// appendTextDeltaToResponsesMessage appends text delta to a responses message
|
|
func (a *Accumulator) appendTextDeltaToResponsesMessage(message *schemas.ResponsesMessage, delta string, contentIndex int) {
|
|
if message.Content == nil {
|
|
message.Content = &schemas.ResponsesMessageContent{}
|
|
}
|
|
|
|
// If we don't have content blocks yet, create them
|
|
if message.Content.ContentBlocks == nil {
|
|
message.Content.ContentBlocks = make([]schemas.ResponsesMessageContentBlock, contentIndex+1)
|
|
}
|
|
|
|
// Ensure we have enough content blocks
|
|
for len(message.Content.ContentBlocks) <= contentIndex {
|
|
message.Content.ContentBlocks = append(message.Content.ContentBlocks, schemas.ResponsesMessageContentBlock{})
|
|
}
|
|
|
|
// Initialize the content block if needed
|
|
if message.Content.ContentBlocks[contentIndex].Type == "" {
|
|
message.Content.ContentBlocks[contentIndex].Type = schemas.ResponsesOutputMessageContentTypeText
|
|
message.Content.ContentBlocks[contentIndex].ResponsesOutputMessageContentText = &schemas.ResponsesOutputMessageContentText{}
|
|
}
|
|
|
|
// Append to existing text or create new text
|
|
if message.Content.ContentBlocks[contentIndex].Text == nil {
|
|
message.Content.ContentBlocks[contentIndex].Text = &delta
|
|
} else {
|
|
*message.Content.ContentBlocks[contentIndex].Text += delta
|
|
}
|
|
}
|
|
|
|
// appendRefusalDeltaToResponsesMessage appends refusal delta to a responses message
|
|
func (a *Accumulator) appendRefusalDeltaToResponsesMessage(message *schemas.ResponsesMessage, refusal string, contentIndex int) {
|
|
if message.Content == nil {
|
|
message.Content = &schemas.ResponsesMessageContent{}
|
|
}
|
|
|
|
// If we don't have content blocks yet, create them
|
|
if message.Content.ContentBlocks == nil {
|
|
message.Content.ContentBlocks = make([]schemas.ResponsesMessageContentBlock, contentIndex+1)
|
|
}
|
|
|
|
// Ensure we have enough content blocks
|
|
for len(message.Content.ContentBlocks) <= contentIndex {
|
|
message.Content.ContentBlocks = append(message.Content.ContentBlocks, schemas.ResponsesMessageContentBlock{})
|
|
}
|
|
|
|
// Initialize the content block if needed
|
|
if message.Content.ContentBlocks[contentIndex].Type == "" {
|
|
message.Content.ContentBlocks[contentIndex].Type = schemas.ResponsesOutputMessageContentTypeRefusal
|
|
message.Content.ContentBlocks[contentIndex].ResponsesOutputMessageContentRefusal = &schemas.ResponsesOutputMessageContentRefusal{}
|
|
}
|
|
|
|
// Append to existing refusal text
|
|
if message.Content.ContentBlocks[contentIndex].ResponsesOutputMessageContentRefusal == nil {
|
|
message.Content.ContentBlocks[contentIndex].ResponsesOutputMessageContentRefusal = &schemas.ResponsesOutputMessageContentRefusal{
|
|
Refusal: refusal,
|
|
}
|
|
} else {
|
|
message.Content.ContentBlocks[contentIndex].ResponsesOutputMessageContentRefusal.Refusal += refusal
|
|
}
|
|
}
|
|
|
|
// appendFunctionArgumentsDeltaToResponsesMessage appends function arguments delta to a responses message
|
|
func (a *Accumulator) appendFunctionArgumentsDeltaToResponsesMessage(message *schemas.ResponsesMessage, arguments string) {
|
|
if message.ResponsesToolMessage == nil {
|
|
message.ResponsesToolMessage = &schemas.ResponsesToolMessage{}
|
|
}
|
|
|
|
if message.ResponsesToolMessage.Arguments == nil {
|
|
message.ResponsesToolMessage.Arguments = &arguments
|
|
} else {
|
|
*message.ResponsesToolMessage.Arguments += arguments
|
|
}
|
|
}
|
|
|
|
// appendReasoningDeltaToResponsesMessage appends reasoning delta to a responses message
|
|
func (a *Accumulator) appendReasoningDeltaToResponsesMessage(message *schemas.ResponsesMessage, delta string, contentIndex *int) {
|
|
// Handle reasoning content in two ways:
|
|
// 1. Content blocks (for reasoning_text content blocks)
|
|
// 2. ResponsesReasoning.Summary (for reasoning summary accumulation)
|
|
|
|
// If we have a content index, this is reasoning content in content blocks
|
|
if contentIndex != nil {
|
|
if message.Content == nil {
|
|
message.Content = &schemas.ResponsesMessageContent{}
|
|
}
|
|
|
|
// If we don't have content blocks yet, create them
|
|
if message.Content.ContentBlocks == nil {
|
|
message.Content.ContentBlocks = make([]schemas.ResponsesMessageContentBlock, *contentIndex+1)
|
|
}
|
|
|
|
// Ensure we have enough content blocks
|
|
for len(message.Content.ContentBlocks) <= *contentIndex {
|
|
message.Content.ContentBlocks = append(message.Content.ContentBlocks, schemas.ResponsesMessageContentBlock{})
|
|
}
|
|
|
|
// Initialize the content block if needed
|
|
if message.Content.ContentBlocks[*contentIndex].Type == "" {
|
|
message.Content.ContentBlocks[*contentIndex].Type = schemas.ResponsesOutputMessageContentTypeReasoning
|
|
}
|
|
|
|
// Append to existing reasoning text or create new text
|
|
if message.Content.ContentBlocks[*contentIndex].Text == nil {
|
|
message.Content.ContentBlocks[*contentIndex].Text = &delta
|
|
} else {
|
|
*message.Content.ContentBlocks[*contentIndex].Text += delta
|
|
}
|
|
} else {
|
|
// No content index - this is reasoning summary accumulation
|
|
if message.ResponsesReasoning == nil {
|
|
message.ResponsesReasoning = &schemas.ResponsesReasoning{
|
|
Summary: []schemas.ResponsesReasoningSummary{},
|
|
}
|
|
}
|
|
|
|
// For now, accumulate into a single summary entry
|
|
// In the future, this could be enhanced to handle multiple summary entries
|
|
if len(message.ResponsesReasoning.Summary) == 0 {
|
|
message.ResponsesReasoning.Summary = append(message.ResponsesReasoning.Summary, schemas.ResponsesReasoningSummary{
|
|
Type: schemas.ResponsesReasoningContentBlockTypeSummaryText,
|
|
Text: delta,
|
|
})
|
|
} else {
|
|
// Append to the first (and typically only) summary entry
|
|
message.ResponsesReasoning.Summary[0].Text += delta
|
|
}
|
|
}
|
|
}
|
|
|
|
// appendReasoningSignatureToResponsesMessage appends reasoning signature to a responses message
|
|
func (a *Accumulator) appendReasoningSignatureToResponsesMessage(message *schemas.ResponsesMessage, signature string, contentIndex *int) {
|
|
// Handle signature content in content blocks or ResponsesReasoning.EncryptedContent
|
|
|
|
// If we have a content index, this is signature content in content blocks
|
|
if contentIndex != nil {
|
|
if message.Content == nil {
|
|
message.Content = &schemas.ResponsesMessageContent{}
|
|
}
|
|
|
|
// If we don't have content blocks yet, create them
|
|
if message.Content.ContentBlocks == nil {
|
|
message.Content.ContentBlocks = make([]schemas.ResponsesMessageContentBlock, *contentIndex+1)
|
|
}
|
|
|
|
// Ensure we have enough content blocks
|
|
for len(message.Content.ContentBlocks) <= *contentIndex {
|
|
message.Content.ContentBlocks = append(message.Content.ContentBlocks, schemas.ResponsesMessageContentBlock{})
|
|
}
|
|
|
|
// Initialize the content block if needed
|
|
if message.Content.ContentBlocks[*contentIndex].Type == "" {
|
|
message.Content.ContentBlocks[*contentIndex].Type = schemas.ResponsesOutputMessageContentTypeReasoning
|
|
}
|
|
|
|
// Set or append signature to the content block
|
|
if message.Content.ContentBlocks[*contentIndex].Signature == nil {
|
|
message.Content.ContentBlocks[*contentIndex].Signature = &signature
|
|
} else {
|
|
*message.Content.ContentBlocks[*contentIndex].Signature += signature
|
|
}
|
|
} else {
|
|
// No content index - this is encrypted content at the reasoning level
|
|
if message.ResponsesReasoning == nil {
|
|
message.ResponsesReasoning = &schemas.ResponsesReasoning{
|
|
Summary: []schemas.ResponsesReasoningSummary{},
|
|
}
|
|
}
|
|
|
|
// Set or append to encrypted content
|
|
if message.ResponsesReasoning.EncryptedContent == nil {
|
|
message.ResponsesReasoning.EncryptedContent = &signature
|
|
} else {
|
|
*message.ResponsesReasoning.EncryptedContent += signature
|
|
}
|
|
}
|
|
}
|
|
|
|
// processAccumulatedResponsesStreamingChunks processes all accumulated responses streaming chunks in order
|
|
func (a *Accumulator) processAccumulatedResponsesStreamingChunks(requestID string, respErr *schemas.BifrostError, isFinalChunk bool) (*AccumulatedData, error) {
|
|
accumulator := a.getOrCreateStreamAccumulator(requestID)
|
|
// Lock the accumulator
|
|
accumulator.mu.Lock()
|
|
defer accumulator.mu.Unlock()
|
|
// Note: Cleanup is handled by CleanupStreamAccumulator when refcount reaches 0
|
|
// This is called from completeDeferredSpan after streaming ends
|
|
|
|
// Calculate Time to First Token (TTFT) in milliseconds
|
|
var ttft int64
|
|
if !accumulator.StartTimestamp.IsZero() && !accumulator.FirstChunkTimestamp.IsZero() {
|
|
ttft = accumulator.FirstChunkTimestamp.Sub(accumulator.StartTimestamp).Nanoseconds() / 1e6
|
|
}
|
|
|
|
// Initialize accumulated data
|
|
data := &AccumulatedData{
|
|
RequestID: requestID,
|
|
Status: "success",
|
|
Stream: true,
|
|
StartTimestamp: accumulator.StartTimestamp,
|
|
EndTimestamp: accumulator.FinalTimestamp,
|
|
Latency: 0,
|
|
TimeToFirstToken: ttft,
|
|
OutputMessages: nil,
|
|
ToolCalls: nil,
|
|
ErrorDetails: respErr,
|
|
TokenUsage: nil,
|
|
CacheDebug: nil,
|
|
Cost: nil,
|
|
}
|
|
|
|
// Build complete messages from accumulated chunks
|
|
completeMessages := a.buildCompleteMessageFromResponsesStreamChunks(accumulator.ResponsesStreamChunks)
|
|
|
|
if !isFinalChunk {
|
|
data.OutputMessages = completeMessages
|
|
return data, nil
|
|
}
|
|
|
|
// Update database with complete messages
|
|
data.Status = "success"
|
|
if respErr != nil {
|
|
data.Status = "error"
|
|
}
|
|
|
|
if accumulator.StartTimestamp.IsZero() || accumulator.FinalTimestamp.IsZero() {
|
|
data.Latency = 0
|
|
} else {
|
|
data.Latency = accumulator.FinalTimestamp.Sub(accumulator.StartTimestamp).Nanoseconds() / 1e6
|
|
}
|
|
|
|
data.EndTimestamp = accumulator.FinalTimestamp
|
|
data.OutputMessages = completeMessages
|
|
|
|
data.ErrorDetails = respErr
|
|
|
|
// Update metadata from the chunk with highest index (contains TokenUsage, Cost, FinishReason)
|
|
if lastChunk := accumulator.getLastResponsesChunkLocked(); lastChunk != nil {
|
|
if lastChunk.TokenUsage != nil {
|
|
data.TokenUsage = lastChunk.TokenUsage
|
|
}
|
|
if lastChunk.SemanticCacheDebug != nil {
|
|
data.CacheDebug = lastChunk.SemanticCacheDebug
|
|
}
|
|
if lastChunk.Cost != nil {
|
|
data.Cost = lastChunk.Cost
|
|
}
|
|
data.FinishReason = lastChunk.FinishReason
|
|
}
|
|
|
|
// Accumulate raw response using strings.Builder to avoid O(n^2) string concatenation
|
|
if len(accumulator.ResponsesStreamChunks) > 0 {
|
|
// Sort chunks by chunk index
|
|
sort.Slice(accumulator.ResponsesStreamChunks, func(i, j int) bool {
|
|
return accumulator.ResponsesStreamChunks[i].ChunkIndex < accumulator.ResponsesStreamChunks[j].ChunkIndex
|
|
})
|
|
var rawBuilder strings.Builder
|
|
for _, chunk := range accumulator.ResponsesStreamChunks {
|
|
if chunk.RawResponse != nil {
|
|
if rawBuilder.Len() > 0 {
|
|
rawBuilder.WriteString("\n\n")
|
|
}
|
|
rawBuilder.WriteString(*chunk.RawResponse)
|
|
}
|
|
}
|
|
if rawBuilder.Len() > 0 {
|
|
s := rawBuilder.String()
|
|
data.RawResponse = &s
|
|
}
|
|
}
|
|
|
|
return data, nil
|
|
}
|
|
|
|
// processResponsesStreamingResponse processes a responses streaming response
|
|
func (a *Accumulator) processResponsesStreamingResponse(ctx *schemas.BifrostContext, result *schemas.BifrostResponse, bifrostErr *schemas.BifrostError) (*ProcessedStreamResponse, error) {
|
|
a.logger.Debug("[streaming] processing responses streaming response")
|
|
|
|
// Extract accumulator ID from context
|
|
requestID, ok := getAccumulatorID(ctx)
|
|
if !ok || requestID == "" {
|
|
return nil, fmt.Errorf("accumulator-id not found in context or is empty")
|
|
}
|
|
|
|
_, provider, requestedModel, resolvedModel := bifrost.GetResponseFields(result, bifrostErr)
|
|
|
|
isFinalChunk := bifrost.IsFinalChunk(ctx)
|
|
chunk := a.getResponsesStreamChunk()
|
|
chunk.Timestamp = time.Now()
|
|
chunk.ErrorDetails = bifrostErr
|
|
|
|
if bifrostErr != nil {
|
|
chunk.FinishReason = bifrost.Ptr("error")
|
|
if bifrostErr.ExtraFields.RawResponse != nil {
|
|
if rawBytes, marshalErr := sonic.Marshal(bifrostErr.ExtraFields.RawResponse); marshalErr == nil {
|
|
chunk.RawResponse = bifrost.Ptr(string(rawBytes))
|
|
}
|
|
}
|
|
// Assign a stable trailing index; reuse on duplicate plugin calls so dedup fires correctly.
|
|
accumulator := a.getOrCreateStreamAccumulator(requestID)
|
|
accumulator.mu.Lock()
|
|
if accumulator.TerminalErrorChunkIndex >= 0 {
|
|
chunk.ChunkIndex = accumulator.TerminalErrorChunkIndex
|
|
} else {
|
|
accumulator.MaxResponsesChunkIndex++
|
|
chunk.ChunkIndex = accumulator.MaxResponsesChunkIndex
|
|
accumulator.TerminalErrorChunkIndex = chunk.ChunkIndex
|
|
}
|
|
accumulator.mu.Unlock()
|
|
} else if result != nil && result.ResponsesStreamResponse != nil {
|
|
if result.ResponsesStreamResponse.ExtraFields.RawResponse != nil {
|
|
chunk.RawResponse = bifrost.Ptr(fmt.Sprintf("%v", result.ResponsesStreamResponse.ExtraFields.RawResponse))
|
|
}
|
|
// Store a deep copy of the stream response to prevent shared data mutation between plugins
|
|
chunk.StreamResponse = deepCopyResponsesStreamResponse(result.ResponsesStreamResponse)
|
|
// Extract token usage from stream response if available
|
|
if result.ResponsesStreamResponse.Response != nil &&
|
|
result.ResponsesStreamResponse.Response.Usage != nil {
|
|
chunk.TokenUsage = result.ResponsesStreamResponse.Response.Usage.ToBifrostLLMUsage()
|
|
}
|
|
chunk.ChunkIndex = result.ResponsesStreamResponse.ExtraFields.ChunkIndex
|
|
if isFinalChunk {
|
|
if a.pricingManager != nil {
|
|
cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
|
|
chunk.Cost = bifrost.Ptr(cost)
|
|
}
|
|
chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
|
|
}
|
|
}
|
|
|
|
if addErr := a.addResponsesStreamChunk(requestID, chunk, isFinalChunk); addErr != nil {
|
|
return nil, fmt.Errorf("failed to add responses stream chunk for request %s: %w", requestID, addErr)
|
|
}
|
|
|
|
// If this is the final chunk, process accumulated chunks
|
|
// Always return data on final chunk - multiple plugins may need the result
|
|
if isFinalChunk {
|
|
// Get the accumulator and mark as complete (idempotent)
|
|
accumulator := a.getOrCreateStreamAccumulator(requestID)
|
|
accumulator.mu.Lock()
|
|
if !accumulator.IsComplete {
|
|
accumulator.IsComplete = true
|
|
}
|
|
accumulator.mu.Unlock()
|
|
|
|
// Always process and return data on final chunk
|
|
// Multiple plugins can call this - the processing is idempotent
|
|
data, processErr := a.processAccumulatedResponsesStreamingChunks(requestID, bifrostErr, isFinalChunk)
|
|
if processErr != nil {
|
|
a.logger.Error("failed to process accumulated responses chunks for request %s: %v", requestID, processErr)
|
|
return nil, processErr
|
|
}
|
|
|
|
var rawRequest interface{}
|
|
if result != nil && result.ResponsesStreamResponse != nil && result.ResponsesStreamResponse.ExtraFields.RawRequest != nil {
|
|
rawRequest = result.ResponsesStreamResponse.ExtraFields.RawRequest
|
|
}
|
|
|
|
return &ProcessedStreamResponse{
|
|
RequestID: requestID,
|
|
StreamType: StreamTypeResponses,
|
|
Provider: provider,
|
|
RequestedModel: requestedModel,
|
|
ResolvedModel: resolvedModel,
|
|
Data: data,
|
|
RawRequest: &rawRequest,
|
|
}, nil
|
|
}
|
|
|
|
return &ProcessedStreamResponse{
|
|
RequestID: requestID,
|
|
StreamType: StreamTypeResponses,
|
|
Provider: provider,
|
|
RequestedModel: requestedModel,
|
|
ResolvedModel: resolvedModel,
|
|
Data: nil,
|
|
}, nil
|
|
}
|