first commit
This commit is contained in:
583
framework/streaming/chat.go
Normal file
583
framework/streaming/chat.go
Normal file
@@ -0,0 +1,583 @@
|
||||
package streaming
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
bifrost "github.com/maximhq/bifrost/core"
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
"github.com/maximhq/bifrost/framework/modelcatalog"
|
||||
)
|
||||
|
||||
// deepCopyChatStreamDelta creates a deep copy of ChatStreamResponseChoiceDelta
|
||||
// to prevent shared data mutation between different chunks
|
||||
func deepCopyChatStreamDelta(original *schemas.ChatStreamResponseChoiceDelta) *schemas.ChatStreamResponseChoiceDelta {
|
||||
if original == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
copy := &schemas.ChatStreamResponseChoiceDelta{}
|
||||
|
||||
if original.Role != nil {
|
||||
copyRole := *original.Role
|
||||
copy.Role = ©Role
|
||||
}
|
||||
|
||||
if original.Content != nil {
|
||||
copyContent := *original.Content
|
||||
copy.Content = ©Content
|
||||
}
|
||||
|
||||
if original.Refusal != nil {
|
||||
copyRefusal := *original.Refusal
|
||||
copy.Refusal = ©Refusal
|
||||
}
|
||||
|
||||
if original.Reasoning != nil {
|
||||
copyReasoning := *original.Reasoning
|
||||
copy.Reasoning = ©Reasoning
|
||||
}
|
||||
|
||||
// Deep copy ReasoningDetails slice
|
||||
if original.ReasoningDetails != nil {
|
||||
copy.ReasoningDetails = make([]schemas.ChatReasoningDetails, len(original.ReasoningDetails))
|
||||
for i, rd := range original.ReasoningDetails {
|
||||
copyRd := schemas.ChatReasoningDetails{
|
||||
Index: rd.Index,
|
||||
Type: rd.Type,
|
||||
}
|
||||
if rd.ID != nil {
|
||||
copyID := *rd.ID
|
||||
copyRd.ID = ©ID
|
||||
}
|
||||
if rd.Text != nil {
|
||||
copyText := *rd.Text
|
||||
copyRd.Text = ©Text
|
||||
}
|
||||
if rd.Signature != nil {
|
||||
copySig := *rd.Signature
|
||||
copyRd.Signature = ©Sig
|
||||
}
|
||||
if rd.Summary != nil {
|
||||
copySummary := *rd.Summary
|
||||
copyRd.Summary = ©Summary
|
||||
}
|
||||
if rd.Data != nil {
|
||||
copyData := *rd.Data
|
||||
copyRd.Data = ©Data
|
||||
}
|
||||
copy.ReasoningDetails[i] = copyRd
|
||||
}
|
||||
}
|
||||
|
||||
// Deep copy ToolCalls slice
|
||||
if original.ToolCalls != nil {
|
||||
copy.ToolCalls = make([]schemas.ChatAssistantMessageToolCall, len(original.ToolCalls))
|
||||
for i, tc := range original.ToolCalls {
|
||||
copyTc := schemas.ChatAssistantMessageToolCall{
|
||||
Index: tc.Index,
|
||||
Function: tc.Function, // struct value, safe to copy directly
|
||||
}
|
||||
if tc.ID != nil {
|
||||
copyID := *tc.ID
|
||||
copyTc.ID = ©ID
|
||||
}
|
||||
if tc.Type != nil {
|
||||
copyType := *tc.Type
|
||||
copyTc.Type = ©Type
|
||||
}
|
||||
// Deep copy Function's Name pointer
|
||||
if tc.Function.Name != nil {
|
||||
copyName := *tc.Function.Name
|
||||
copyTc.Function.Name = ©Name
|
||||
}
|
||||
copy.ToolCalls[i] = copyTc
|
||||
}
|
||||
}
|
||||
|
||||
// Deep copy Audio
|
||||
if original.Audio != nil {
|
||||
copy.Audio = &schemas.ChatAudioMessageAudio{
|
||||
ID: original.Audio.ID,
|
||||
Data: original.Audio.Data,
|
||||
ExpiresAt: original.Audio.ExpiresAt,
|
||||
Transcript: original.Audio.Transcript,
|
||||
}
|
||||
}
|
||||
|
||||
return copy
|
||||
}
|
||||
|
||||
// buildCompleteMessageFromChunks builds a complete message from accumulated chunks.
|
||||
// Uses strings.Builder for O(n) accumulation instead of O(n²) string concatenation.
|
||||
func (a *Accumulator) buildCompleteMessageFromChatStreamChunks(chunks []*ChatStreamChunk) *schemas.ChatMessage {
|
||||
completeMessage := &schemas.ChatMessage{
|
||||
Role: schemas.ChatMessageRoleAssistant,
|
||||
Content: &schemas.ChatMessageContent{},
|
||||
}
|
||||
sort.Slice(chunks, func(i, j int) bool {
|
||||
return chunks[i].ChunkIndex < chunks[j].ChunkIndex
|
||||
})
|
||||
|
||||
// Builders for O(n) accumulation of large text fields
|
||||
var contentBuilder strings.Builder
|
||||
var refusalBuilder strings.Builder
|
||||
var reasoningBuilder strings.Builder
|
||||
var audioDataBuilder strings.Builder
|
||||
var audioTranscriptBuilder strings.Builder
|
||||
hasContent, hasRefusal, hasReasoning := false, false, false
|
||||
|
||||
// Reasoning details builders keyed by detail index
|
||||
type rdAccum struct {
|
||||
text, summary, data strings.Builder
|
||||
hasText, hasSummary, hasData bool
|
||||
typ schemas.BifrostReasoningDetailsType
|
||||
id, signature *string
|
||||
}
|
||||
var rdAccums map[int]*rdAccum
|
||||
|
||||
// Tool call argument builders keyed by delta index
|
||||
type tcAccum struct {
|
||||
id *string
|
||||
typ *string
|
||||
name *string
|
||||
args strings.Builder
|
||||
}
|
||||
var tcAccums map[uint16]*tcAccum
|
||||
|
||||
for _, chunk := range chunks {
|
||||
if chunk == nil || chunk.Delta == nil {
|
||||
continue
|
||||
}
|
||||
// Handle role (usually in first chunk)
|
||||
if chunk.Delta.Role != nil {
|
||||
completeMessage.Role = schemas.ChatMessageRole(*chunk.Delta.Role)
|
||||
}
|
||||
// Append content delta
|
||||
if chunk.Delta.Content != nil && *chunk.Delta.Content != "" {
|
||||
contentBuilder.WriteString(*chunk.Delta.Content)
|
||||
hasContent = true
|
||||
}
|
||||
// Handle refusal delta
|
||||
if chunk.Delta.Refusal != nil && *chunk.Delta.Refusal != "" {
|
||||
refusalBuilder.WriteString(*chunk.Delta.Refusal)
|
||||
hasRefusal = true
|
||||
}
|
||||
// Handle reasoning delta
|
||||
if chunk.Delta.Reasoning != nil && *chunk.Delta.Reasoning != "" {
|
||||
reasoningBuilder.WriteString(*chunk.Delta.Reasoning)
|
||||
hasReasoning = true
|
||||
}
|
||||
// Handle reasoning details delta
|
||||
for _, rd := range chunk.Delta.ReasoningDetails {
|
||||
if rdAccums == nil {
|
||||
rdAccums = make(map[int]*rdAccum)
|
||||
}
|
||||
acc, ok := rdAccums[rd.Index]
|
||||
if !ok {
|
||||
acc = &rdAccum{typ: rd.Type}
|
||||
rdAccums[rd.Index] = acc
|
||||
}
|
||||
if rd.Text != nil && *rd.Text != "" {
|
||||
acc.text.WriteString(*rd.Text)
|
||||
acc.hasText = true
|
||||
}
|
||||
if rd.Summary != nil && *rd.Summary != "" {
|
||||
acc.summary.WriteString(*rd.Summary)
|
||||
acc.hasSummary = true
|
||||
}
|
||||
if rd.Data != nil && *rd.Data != "" {
|
||||
acc.data.WriteString(*rd.Data)
|
||||
acc.hasData = true
|
||||
}
|
||||
if rd.Signature != nil {
|
||||
sigCopy := *rd.Signature
|
||||
acc.signature = &sigCopy
|
||||
}
|
||||
if rd.Type != "" {
|
||||
acc.typ = rd.Type
|
||||
}
|
||||
if rd.ID != nil {
|
||||
idCopy := *rd.ID
|
||||
acc.id = &idCopy
|
||||
}
|
||||
}
|
||||
// Handle audio data
|
||||
if chunk.Delta.Audio != nil {
|
||||
if completeMessage.ChatAssistantMessage == nil {
|
||||
completeMessage.ChatAssistantMessage = &schemas.ChatAssistantMessage{}
|
||||
}
|
||||
if completeMessage.ChatAssistantMessage.Audio == nil {
|
||||
completeMessage.ChatAssistantMessage.Audio = &schemas.ChatAudioMessageAudio{}
|
||||
}
|
||||
if chunk.Delta.Audio.Data != "" {
|
||||
audioDataBuilder.WriteString(chunk.Delta.Audio.Data)
|
||||
}
|
||||
if chunk.Delta.Audio.Transcript != "" {
|
||||
audioTranscriptBuilder.WriteString(chunk.Delta.Audio.Transcript)
|
||||
}
|
||||
if chunk.Delta.Audio.ID != "" {
|
||||
completeMessage.ChatAssistantMessage.Audio.ID = chunk.Delta.Audio.ID
|
||||
}
|
||||
if chunk.Delta.Audio.ExpiresAt != 0 {
|
||||
completeMessage.ChatAssistantMessage.Audio.ExpiresAt = chunk.Delta.Audio.ExpiresAt
|
||||
}
|
||||
}
|
||||
// Accumulate tool calls by index
|
||||
for _, deltaToolCall := range chunk.Delta.ToolCalls {
|
||||
if tcAccums == nil {
|
||||
tcAccums = make(map[uint16]*tcAccum)
|
||||
}
|
||||
idx := deltaToolCall.Index
|
||||
acc, ok := tcAccums[idx]
|
||||
if !ok {
|
||||
acc = &tcAccum{}
|
||||
tcAccums[idx] = acc
|
||||
}
|
||||
if deltaToolCall.ID != nil {
|
||||
v := *deltaToolCall.ID
|
||||
acc.id = &v
|
||||
}
|
||||
if deltaToolCall.Type != nil {
|
||||
t := *deltaToolCall.Type
|
||||
acc.typ = &t
|
||||
}
|
||||
if deltaToolCall.Function.Name != nil {
|
||||
n := *deltaToolCall.Function.Name
|
||||
acc.name = &n
|
||||
}
|
||||
if args := deltaToolCall.Function.Arguments; args != "" {
|
||||
acc.args.WriteString(args)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize content
|
||||
if hasContent {
|
||||
str := contentBuilder.String()
|
||||
completeMessage.Content.ContentStr = &str
|
||||
}
|
||||
|
||||
// Finalize refusal
|
||||
if hasRefusal {
|
||||
if completeMessage.ChatAssistantMessage == nil {
|
||||
completeMessage.ChatAssistantMessage = &schemas.ChatAssistantMessage{}
|
||||
}
|
||||
str := refusalBuilder.String()
|
||||
completeMessage.ChatAssistantMessage.Refusal = &str
|
||||
}
|
||||
|
||||
// Finalize reasoning
|
||||
if hasReasoning {
|
||||
if completeMessage.ChatAssistantMessage == nil {
|
||||
completeMessage.ChatAssistantMessage = &schemas.ChatAssistantMessage{}
|
||||
}
|
||||
str := reasoningBuilder.String()
|
||||
completeMessage.ChatAssistantMessage.Reasoning = &str
|
||||
}
|
||||
|
||||
// Finalize reasoning details
|
||||
if len(rdAccums) > 0 {
|
||||
if completeMessage.ChatAssistantMessage == nil {
|
||||
completeMessage.ChatAssistantMessage = &schemas.ChatAssistantMessage{}
|
||||
}
|
||||
// Sort by index for deterministic output
|
||||
indices := make([]int, 0, len(rdAccums))
|
||||
for idx := range rdAccums {
|
||||
indices = append(indices, idx)
|
||||
}
|
||||
sort.Ints(indices)
|
||||
for _, idx := range indices {
|
||||
acc := rdAccums[idx]
|
||||
rd := schemas.ChatReasoningDetails{
|
||||
Index: idx,
|
||||
Type: acc.typ,
|
||||
ID: acc.id,
|
||||
Signature: acc.signature,
|
||||
}
|
||||
if acc.hasText {
|
||||
str := acc.text.String()
|
||||
rd.Text = &str
|
||||
}
|
||||
if acc.hasSummary {
|
||||
str := acc.summary.String()
|
||||
rd.Summary = &str
|
||||
}
|
||||
if acc.hasData {
|
||||
str := acc.data.String()
|
||||
rd.Data = &str
|
||||
}
|
||||
completeMessage.ChatAssistantMessage.ReasoningDetails = append(
|
||||
completeMessage.ChatAssistantMessage.ReasoningDetails, rd)
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize audio
|
||||
if completeMessage.ChatAssistantMessage != nil && completeMessage.ChatAssistantMessage.Audio != nil {
|
||||
completeMessage.ChatAssistantMessage.Audio.Data = audioDataBuilder.String()
|
||||
completeMessage.ChatAssistantMessage.Audio.Transcript = audioTranscriptBuilder.String()
|
||||
}
|
||||
|
||||
// Finalize tool calls — sort by original index for deterministic output
|
||||
if len(tcAccums) > 0 {
|
||||
if completeMessage.ChatAssistantMessage == nil {
|
||||
completeMessage.ChatAssistantMessage = &schemas.ChatAssistantMessage{}
|
||||
}
|
||||
tcIndices := make([]int, 0, len(tcAccums))
|
||||
for idx := range tcAccums {
|
||||
tcIndices = append(tcIndices, int(idx))
|
||||
}
|
||||
sort.Ints(tcIndices)
|
||||
toolCalls := make([]schemas.ChatAssistantMessageToolCall, 0, len(tcIndices))
|
||||
for _, idx := range tcIndices {
|
||||
acc := tcAccums[uint16(idx)]
|
||||
toolCalls = append(toolCalls, schemas.ChatAssistantMessageToolCall{
|
||||
Index: uint16(idx),
|
||||
ID: acc.id,
|
||||
Type: acc.typ,
|
||||
Function: schemas.ChatAssistantMessageToolCallFunction{
|
||||
Name: acc.name,
|
||||
Arguments: acc.args.String(),
|
||||
},
|
||||
})
|
||||
}
|
||||
completeMessage.ChatAssistantMessage.ToolCalls = toolCalls
|
||||
}
|
||||
|
||||
return completeMessage
|
||||
}
|
||||
|
||||
// processAccumulatedChunks processes all accumulated chunks in order
|
||||
func (a *Accumulator) processAccumulatedChatStreamingChunks(requestID string, respErr *schemas.BifrostError, isFinalChunk bool) (*AccumulatedData, error) {
|
||||
accumulator := a.getOrCreateStreamAccumulator(requestID)
|
||||
// Lock the accumulator
|
||||
accumulator.mu.Lock()
|
||||
defer accumulator.mu.Unlock()
|
||||
// Note: Cleanup is handled by CleanupStreamAccumulator when refcount reaches 0
|
||||
// This is called from completeDeferredSpan after streaming ends
|
||||
|
||||
// Calculate Time to First Token (TTFT) in milliseconds
|
||||
var ttft int64
|
||||
if !accumulator.StartTimestamp.IsZero() && !accumulator.FirstChunkTimestamp.IsZero() {
|
||||
ttft = accumulator.FirstChunkTimestamp.Sub(accumulator.StartTimestamp).Nanoseconds() / 1e6
|
||||
}
|
||||
|
||||
// Initialize accumulated data
|
||||
data := &AccumulatedData{
|
||||
RequestID: requestID,
|
||||
Status: "success",
|
||||
Stream: true,
|
||||
StartTimestamp: accumulator.StartTimestamp,
|
||||
EndTimestamp: accumulator.FinalTimestamp,
|
||||
Latency: 0,
|
||||
TimeToFirstToken: ttft,
|
||||
OutputMessage: nil,
|
||||
ToolCalls: nil,
|
||||
ErrorDetails: nil,
|
||||
TokenUsage: nil,
|
||||
CacheDebug: nil,
|
||||
Cost: nil,
|
||||
}
|
||||
// Build complete message from accumulated chunks
|
||||
completeMessage := a.buildCompleteMessageFromChatStreamChunks(accumulator.ChatStreamChunks)
|
||||
if !isFinalChunk {
|
||||
data.OutputMessage = completeMessage
|
||||
return data, nil
|
||||
}
|
||||
// Update database with complete message
|
||||
data.Status = "success"
|
||||
if respErr != nil {
|
||||
data.Status = "error"
|
||||
}
|
||||
if accumulator.StartTimestamp.IsZero() || accumulator.FinalTimestamp.IsZero() {
|
||||
data.Latency = 0
|
||||
} else {
|
||||
data.Latency = accumulator.FinalTimestamp.Sub(accumulator.StartTimestamp).Nanoseconds() / 1e6
|
||||
}
|
||||
data.EndTimestamp = accumulator.FinalTimestamp
|
||||
data.OutputMessage = completeMessage
|
||||
if data.OutputMessage.ChatAssistantMessage != nil && data.OutputMessage.ChatAssistantMessage.ToolCalls != nil {
|
||||
data.ToolCalls = data.OutputMessage.ChatAssistantMessage.ToolCalls
|
||||
}
|
||||
data.ErrorDetails = respErr
|
||||
// Update metadata from the chunk with highest index (contains TokenUsage, Cost, FinishReason)
|
||||
if lastChunk := accumulator.getLastChatChunkLocked(); lastChunk != nil {
|
||||
if lastChunk.TokenUsage != nil {
|
||||
data.TokenUsage = lastChunk.TokenUsage
|
||||
}
|
||||
if lastChunk.SemanticCacheDebug != nil {
|
||||
data.CacheDebug = lastChunk.SemanticCacheDebug
|
||||
}
|
||||
if lastChunk.Cost != nil {
|
||||
data.Cost = lastChunk.Cost
|
||||
}
|
||||
data.FinishReason = lastChunk.FinishReason
|
||||
}
|
||||
// Merge LogProbs from all chunks
|
||||
if len(accumulator.ChatStreamChunks) > 0 {
|
||||
var mergedLogProbs *schemas.BifrostLogProbs
|
||||
for _, chunk := range accumulator.ChatStreamChunks {
|
||||
if chunk.LogProbs != nil {
|
||||
if mergedLogProbs == nil {
|
||||
mergedLogProbs = &schemas.BifrostLogProbs{}
|
||||
}
|
||||
mergedLogProbs.Content = append(mergedLogProbs.Content, chunk.LogProbs.Content...)
|
||||
mergedLogProbs.Refusal = append(mergedLogProbs.Refusal, chunk.LogProbs.Refusal...)
|
||||
if chunk.LogProbs.TextCompletionLogProb != nil {
|
||||
mergedLogProbs.TextCompletionLogProb = chunk.LogProbs.TextCompletionLogProb
|
||||
}
|
||||
}
|
||||
}
|
||||
data.LogProbs = mergedLogProbs
|
||||
}
|
||||
// Accumulate raw response using strings.Builder to avoid O(n^2) string concatenation
|
||||
if len(accumulator.ChatStreamChunks) > 0 {
|
||||
// Sort chunks by chunk index
|
||||
sort.Slice(accumulator.ChatStreamChunks, func(i, j int) bool {
|
||||
return accumulator.ChatStreamChunks[i].ChunkIndex < accumulator.ChatStreamChunks[j].ChunkIndex
|
||||
})
|
||||
var rawBuilder strings.Builder
|
||||
for _, chunk := range accumulator.ChatStreamChunks {
|
||||
if chunk.RawResponse != nil {
|
||||
if rawBuilder.Len() > 0 {
|
||||
rawBuilder.WriteString("\n\n")
|
||||
}
|
||||
rawBuilder.WriteString(*chunk.RawResponse)
|
||||
}
|
||||
}
|
||||
if rawBuilder.Len() > 0 {
|
||||
s := rawBuilder.String()
|
||||
data.RawResponse = &s
|
||||
}
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// processChatStreamingResponse processes a chat streaming response
|
||||
func (a *Accumulator) processChatStreamingResponse(ctx *schemas.BifrostContext, result *schemas.BifrostResponse, bifrostErr *schemas.BifrostError) (*ProcessedStreamResponse, error) {
|
||||
a.logger.Debug("[streaming] processing chat streaming response")
|
||||
// Extract accumulator ID from context
|
||||
requestID, ok := getAccumulatorID(ctx)
|
||||
if !ok || requestID == "" {
|
||||
// Log error but don't fail the request
|
||||
return nil, fmt.Errorf("accumulator-id not found in context or is empty")
|
||||
}
|
||||
requestType, provider, model, resolvedModel := bifrost.GetResponseFields(result, bifrostErr)
|
||||
|
||||
streamType := StreamTypeChat
|
||||
if requestType == schemas.TextCompletionStreamRequest {
|
||||
streamType = StreamTypeText
|
||||
}
|
||||
|
||||
isFinalChunk := bifrost.IsFinalChunk(ctx)
|
||||
chunk := a.getChatStreamChunk()
|
||||
chunk.Timestamp = time.Now()
|
||||
chunk.ErrorDetails = bifrostErr
|
||||
if bifrostErr != nil {
|
||||
chunk.FinishReason = bifrost.Ptr("error")
|
||||
} else if result != nil && result.TextCompletionResponse != nil {
|
||||
// Handle text completion response directly
|
||||
if len(result.TextCompletionResponse.Choices) > 0 {
|
||||
choice := result.TextCompletionResponse.Choices[0]
|
||||
|
||||
if choice.TextCompletionResponseChoice != nil {
|
||||
deltaCopy := choice.TextCompletionResponseChoice.Text
|
||||
chunk.Delta = &schemas.ChatStreamResponseChoiceDelta{
|
||||
Content: deltaCopy,
|
||||
}
|
||||
chunk.FinishReason = choice.FinishReason
|
||||
chunk.LogProbs = choice.LogProbs
|
||||
}
|
||||
}
|
||||
// Extract token usage
|
||||
if result.TextCompletionResponse.Usage != nil && result.TextCompletionResponse.Usage.TotalTokens > 0 {
|
||||
chunk.TokenUsage = result.TextCompletionResponse.Usage
|
||||
}
|
||||
chunk.ChunkIndex = result.TextCompletionResponse.ExtraFields.ChunkIndex
|
||||
if result.TextCompletionResponse.ExtraFields.RawResponse != nil {
|
||||
chunk.RawResponse = bifrost.Ptr(fmt.Sprintf("%v", result.TextCompletionResponse.ExtraFields.RawResponse))
|
||||
}
|
||||
if isFinalChunk {
|
||||
if a.pricingManager != nil {
|
||||
cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
|
||||
chunk.Cost = bifrost.Ptr(cost)
|
||||
}
|
||||
chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
|
||||
}
|
||||
} else if result != nil && result.ChatResponse != nil {
|
||||
// Extract delta and other information
|
||||
if len(result.ChatResponse.Choices) > 0 {
|
||||
choice := result.ChatResponse.Choices[0]
|
||||
if choice.ChatStreamResponseChoice != nil {
|
||||
// Deep copy delta to prevent shared data mutation between chunks
|
||||
chunk.Delta = deepCopyChatStreamDelta(choice.ChatStreamResponseChoice.Delta)
|
||||
chunk.FinishReason = choice.FinishReason
|
||||
chunk.LogProbs = choice.LogProbs
|
||||
}
|
||||
}
|
||||
// Extract token usage
|
||||
if result.ChatResponse.Usage != nil && result.ChatResponse.Usage.TotalTokens > 0 {
|
||||
chunk.TokenUsage = result.ChatResponse.Usage
|
||||
}
|
||||
chunk.ChunkIndex = result.ChatResponse.ExtraFields.ChunkIndex
|
||||
if result.ChatResponse.ExtraFields.RawResponse != nil {
|
||||
chunk.RawResponse = bifrost.Ptr(fmt.Sprintf("%v", result.ChatResponse.ExtraFields.RawResponse))
|
||||
}
|
||||
if isFinalChunk {
|
||||
if a.pricingManager != nil {
|
||||
cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
|
||||
chunk.Cost = bifrost.Ptr(cost)
|
||||
}
|
||||
chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
|
||||
}
|
||||
}
|
||||
if addErr := a.addChatStreamChunk(requestID, chunk, isFinalChunk); addErr != nil {
|
||||
return nil, fmt.Errorf("failed to add stream chunk for request %s: %w", requestID, addErr)
|
||||
}
|
||||
// If this is the final chunk, process accumulated chunks
|
||||
// Always return data on final chunk - multiple plugins may need the result
|
||||
if isFinalChunk {
|
||||
// Get the accumulator and mark as complete (idempotent)
|
||||
accumulator := a.getOrCreateStreamAccumulator(requestID)
|
||||
accumulator.mu.Lock()
|
||||
if !accumulator.IsComplete {
|
||||
accumulator.IsComplete = true
|
||||
}
|
||||
accumulator.mu.Unlock()
|
||||
|
||||
// Always process and return data on final chunk
|
||||
// Multiple plugins can call this - the processing is idempotent
|
||||
data, processErr := a.processAccumulatedChatStreamingChunks(requestID, bifrostErr, isFinalChunk)
|
||||
if processErr != nil {
|
||||
a.logger.Error("failed to process accumulated chunks for request %s: %v", requestID, processErr)
|
||||
return nil, processErr
|
||||
}
|
||||
var rawRequest interface{}
|
||||
if result != nil && result.ChatResponse != nil && result.ChatResponse.ExtraFields.RawRequest != nil {
|
||||
rawRequest = result.ChatResponse.ExtraFields.RawRequest
|
||||
} else if result != nil && result.TextCompletionResponse != nil && result.TextCompletionResponse.ExtraFields.RawRequest != nil {
|
||||
rawRequest = result.TextCompletionResponse.ExtraFields.RawRequest
|
||||
}
|
||||
return &ProcessedStreamResponse{
|
||||
RequestID: requestID,
|
||||
StreamType: streamType,
|
||||
Provider: provider,
|
||||
RequestedModel: model,
|
||||
ResolvedModel: resolvedModel,
|
||||
Data: data,
|
||||
RawRequest: &rawRequest,
|
||||
}, nil
|
||||
}
|
||||
// Non-final chunk: skip expensive rebuild since no consumer uses intermediate data.
|
||||
// Both logging and maxim plugins return early when !isFinalChunk.
|
||||
return &ProcessedStreamResponse{
|
||||
RequestID: requestID,
|
||||
StreamType: streamType,
|
||||
Provider: provider,
|
||||
RequestedModel: model,
|
||||
ResolvedModel: resolvedModel,
|
||||
Data: nil,
|
||||
}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user