first commit
This commit is contained in:
216
framework/streaming/transcription.go
Normal file
216
framework/streaming/transcription.go
Normal file
@@ -0,0 +1,216 @@
|
||||
package streaming
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
bifrost "github.com/maximhq/bifrost/core"
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
"github.com/maximhq/bifrost/framework/modelcatalog"
|
||||
)
|
||||
|
||||
// buildCompleteMessageFromTranscriptionStreamChunks builds a complete message from accumulated transcription chunks
|
||||
func (a *Accumulator) buildCompleteMessageFromTranscriptionStreamChunks(chunks []*TranscriptionStreamChunk) *schemas.BifrostTranscriptionResponse {
|
||||
completeMessage := &schemas.BifrostTranscriptionResponse{}
|
||||
finalContent := ""
|
||||
sort.Slice(chunks, func(i, j int) bool {
|
||||
return chunks[i].ChunkIndex < chunks[j].ChunkIndex
|
||||
})
|
||||
for _, chunk := range chunks {
|
||||
if chunk.Delta == nil {
|
||||
continue
|
||||
}
|
||||
if chunk.Delta.Type == schemas.TranscriptionStreamResponseTypeDelta && chunk.Delta.Delta != nil {
|
||||
finalContent += *chunk.Delta.Delta
|
||||
}
|
||||
}
|
||||
// Add final content to the message
|
||||
completeMessage.Text = finalContent
|
||||
return completeMessage
|
||||
}
|
||||
|
||||
// processAccumulatedTranscriptionStreamingChunks processes all accumulated transcription chunks in order
|
||||
func (a *Accumulator) processAccumulatedTranscriptionStreamingChunks(requestID string, bifrostErr *schemas.BifrostError, isFinalChunk bool) (*AccumulatedData, error) {
|
||||
accumulator := a.getOrCreateStreamAccumulator(requestID)
|
||||
// Lock the accumulator
|
||||
accumulator.mu.Lock()
|
||||
defer accumulator.mu.Unlock()
|
||||
// Note: Cleanup is handled by CleanupStreamAccumulator when refcount reaches 0
|
||||
// This is called from completeDeferredSpan after streaming ends
|
||||
|
||||
// Calculate Time to First Token (TTFT) in milliseconds
|
||||
var ttft int64
|
||||
if !accumulator.StartTimestamp.IsZero() && !accumulator.FirstChunkTimestamp.IsZero() {
|
||||
ttft = accumulator.FirstChunkTimestamp.Sub(accumulator.StartTimestamp).Nanoseconds() / 1e6
|
||||
}
|
||||
|
||||
data := &AccumulatedData{
|
||||
RequestID: requestID,
|
||||
Status: "success",
|
||||
Stream: true,
|
||||
StartTimestamp: accumulator.StartTimestamp,
|
||||
EndTimestamp: accumulator.FinalTimestamp,
|
||||
Latency: 0,
|
||||
TimeToFirstToken: ttft,
|
||||
OutputMessage: nil,
|
||||
ToolCalls: nil,
|
||||
ErrorDetails: nil,
|
||||
TokenUsage: nil,
|
||||
CacheDebug: nil,
|
||||
Cost: nil,
|
||||
}
|
||||
// Build complete message from accumulated chunks
|
||||
completeMessage := a.buildCompleteMessageFromTranscriptionStreamChunks(accumulator.TranscriptionStreamChunks)
|
||||
if !isFinalChunk {
|
||||
data.TranscriptionOutput = completeMessage
|
||||
return data, nil
|
||||
}
|
||||
data.Status = "success"
|
||||
if bifrostErr != nil {
|
||||
data.Status = "error"
|
||||
}
|
||||
if accumulator.StartTimestamp.IsZero() || accumulator.FinalTimestamp.IsZero() {
|
||||
data.Latency = 0
|
||||
} else {
|
||||
data.Latency = accumulator.FinalTimestamp.Sub(accumulator.StartTimestamp).Nanoseconds() / 1e6
|
||||
}
|
||||
data.EndTimestamp = accumulator.FinalTimestamp
|
||||
data.TranscriptionOutput = completeMessage
|
||||
data.ErrorDetails = bifrostErr
|
||||
// Update metadata from the chunk with highest index (contains TokenUsage, Cost, CacheDebug)
|
||||
if lastChunk := accumulator.getLastTranscriptionChunkLocked(); lastChunk != nil {
|
||||
if lastChunk.TokenUsage != nil {
|
||||
data.TokenUsage = &schemas.BifrostLLMUsage{}
|
||||
if lastChunk.TokenUsage.InputTokens != nil {
|
||||
data.TokenUsage.PromptTokens = *lastChunk.TokenUsage.InputTokens
|
||||
}
|
||||
if lastChunk.TokenUsage.OutputTokens != nil {
|
||||
data.TokenUsage.CompletionTokens = *lastChunk.TokenUsage.OutputTokens
|
||||
}
|
||||
if lastChunk.TokenUsage.TotalTokens != nil {
|
||||
data.TokenUsage.TotalTokens = *lastChunk.TokenUsage.TotalTokens
|
||||
}
|
||||
}
|
||||
if lastChunk.Cost != nil {
|
||||
data.Cost = lastChunk.Cost
|
||||
}
|
||||
if lastChunk.SemanticCacheDebug != nil {
|
||||
data.CacheDebug = lastChunk.SemanticCacheDebug
|
||||
}
|
||||
}
|
||||
// Accumulate raw response using strings.Builder to avoid O(n^2) string concatenation
|
||||
if len(accumulator.TranscriptionStreamChunks) > 0 {
|
||||
// Sort chunks by chunk index
|
||||
sort.Slice(accumulator.TranscriptionStreamChunks, func(i, j int) bool {
|
||||
return accumulator.TranscriptionStreamChunks[i].ChunkIndex < accumulator.TranscriptionStreamChunks[j].ChunkIndex
|
||||
})
|
||||
var rawBuilder strings.Builder
|
||||
for _, chunk := range accumulator.TranscriptionStreamChunks {
|
||||
if chunk.RawResponse != nil {
|
||||
if rawBuilder.Len() > 0 {
|
||||
rawBuilder.WriteString("\n\n")
|
||||
}
|
||||
rawBuilder.WriteString(*chunk.RawResponse)
|
||||
}
|
||||
}
|
||||
if rawBuilder.Len() > 0 {
|
||||
s := rawBuilder.String()
|
||||
data.RawResponse = &s
|
||||
}
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// processTranscriptionStreamingResponse processes a transcription streaming response
|
||||
func (a *Accumulator) processTranscriptionStreamingResponse(ctx *schemas.BifrostContext, result *schemas.BifrostResponse, bifrostErr *schemas.BifrostError) (*ProcessedStreamResponse, error) {
|
||||
// Extract accumulator ID from context
|
||||
requestID, ok := getAccumulatorID(ctx)
|
||||
if !ok || requestID == "" {
|
||||
// Log error but don't fail the request
|
||||
return nil, fmt.Errorf("accumulator-id not found in context or is empty")
|
||||
}
|
||||
_, provider, requestedModel, resolvedModel := bifrost.GetResponseFields(result, bifrostErr)
|
||||
isFinalChunk := bifrost.IsFinalChunk(ctx)
|
||||
// For audio, all the data comes in the final chunk
|
||||
chunk := a.getTranscriptionStreamChunk()
|
||||
chunk.Timestamp = time.Now()
|
||||
chunk.ErrorDetails = bifrostErr
|
||||
if bifrostErr != nil {
|
||||
chunk.FinishReason = bifrost.Ptr("error")
|
||||
} else if result != nil && result.TranscriptionStreamResponse != nil {
|
||||
// Set delta for all chunks (not just final chunks with usage)
|
||||
// We create a deep copy of the delta to avoid pointing to stack memory
|
||||
var deltaCopy *string
|
||||
if result.TranscriptionStreamResponse.Delta != nil {
|
||||
deltaValue := *result.TranscriptionStreamResponse.Delta
|
||||
deltaCopy = &deltaValue
|
||||
}
|
||||
newDelta := &schemas.BifrostTranscriptionStreamResponse{
|
||||
Type: result.TranscriptionStreamResponse.Type,
|
||||
Delta: deltaCopy,
|
||||
}
|
||||
chunk.Delta = newDelta
|
||||
|
||||
// Set token usage if available (typically only in final chunk)
|
||||
if result.TranscriptionStreamResponse.Usage != nil {
|
||||
chunk.TokenUsage = result.TranscriptionStreamResponse.Usage
|
||||
}
|
||||
chunk.ChunkIndex = result.TranscriptionStreamResponse.ExtraFields.ChunkIndex
|
||||
if result.TranscriptionStreamResponse.ExtraFields.RawResponse != nil {
|
||||
chunk.RawResponse = bifrost.Ptr(fmt.Sprintf("%v", result.TranscriptionStreamResponse.ExtraFields.RawResponse))
|
||||
}
|
||||
if isFinalChunk {
|
||||
if a.pricingManager != nil {
|
||||
cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
|
||||
chunk.Cost = bifrost.Ptr(cost)
|
||||
}
|
||||
chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
|
||||
}
|
||||
}
|
||||
if addErr := a.addTranscriptionStreamChunk(requestID, chunk, isFinalChunk); addErr != nil {
|
||||
return nil, fmt.Errorf("failed to add stream chunk for request %s: %w", requestID, addErr)
|
||||
}
|
||||
// Always return data on final chunk - multiple plugins may need the result
|
||||
if isFinalChunk {
|
||||
// Get the accumulator and mark as complete (idempotent)
|
||||
accumulator := a.getOrCreateStreamAccumulator(requestID)
|
||||
accumulator.mu.Lock()
|
||||
if !accumulator.IsComplete {
|
||||
accumulator.IsComplete = true
|
||||
}
|
||||
accumulator.mu.Unlock()
|
||||
|
||||
// Always process and return data on final chunk
|
||||
// Multiple plugins can call this - the processing is idempotent
|
||||
data, processErr := a.processAccumulatedTranscriptionStreamingChunks(requestID, bifrostErr, isFinalChunk)
|
||||
if processErr != nil {
|
||||
a.logger.Error("failed to process accumulated chunks for request %s: %v", requestID, processErr)
|
||||
return nil, processErr
|
||||
}
|
||||
var rawRequest interface{}
|
||||
if result != nil && result.TranscriptionStreamResponse != nil && result.TranscriptionStreamResponse.ExtraFields.RawRequest != nil {
|
||||
rawRequest = result.TranscriptionStreamResponse.ExtraFields.RawRequest
|
||||
}
|
||||
return &ProcessedStreamResponse{
|
||||
RequestID: requestID,
|
||||
StreamType: StreamTypeTranscription,
|
||||
Provider: provider,
|
||||
RequestedModel: requestedModel,
|
||||
ResolvedModel: resolvedModel,
|
||||
Data: data,
|
||||
RawRequest: &rawRequest,
|
||||
}, nil
|
||||
}
|
||||
// Non-final chunk: skip expensive rebuild since no consumer uses intermediate data.
|
||||
// Both logging and maxim plugins return early when !isFinalChunk.
|
||||
return &ProcessedStreamResponse{
|
||||
RequestID: requestID,
|
||||
StreamType: StreamTypeTranscription,
|
||||
Provider: provider,
|
||||
RequestedModel: requestedModel,
|
||||
ResolvedModel: resolvedModel,
|
||||
Data: nil,
|
||||
}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user