first commit

This commit is contained in:
Beyhan Oğur
2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions

View File

@@ -0,0 +1,637 @@
package llmtests
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"time"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
)
// RunTranscriptionStreamTest executes the streaming transcription test scenario
func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
if !testConfig.Scenarios.TranscriptionStream {
t.Logf("Transcription streaming not supported for provider %s", testConfig.Provider)
return
}
t.Run("TranscriptionStream", func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
// Generate TTS audio for streaming round-trip validation
streamRoundTripCases := []struct {
name string
text string
voiceType string
format string
responseFormat *string
}{
{
name: "StreamRoundTrip_Basic_MP3",
text: TTSTestTextBasic,
voiceType: "primary",
format: "mp3",
responseFormat: nil, // Default JSON streaming
},
{
name: "StreamRoundTrip_Medium_MP3",
text: TTSTestTextMedium,
voiceType: "secondary",
format: "mp3",
responseFormat: bifrost.Ptr("json"),
},
{
name: "StreamRoundTrip_Technical_MP3",
text: TTSTestTextTechnical,
voiceType: "tertiary",
format: "mp3",
responseFormat: bifrost.Ptr("json"),
},
}
for _, tc := range streamRoundTripCases {
t.Run(tc.name, func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
speechSynthesisProvider := testConfig.Provider
if testConfig.ExternalTTSProvider != "" {
speechSynthesisProvider = testConfig.ExternalTTSProvider
}
speechSynthesisModel := testConfig.SpeechSynthesisModel
if testConfig.ExternalTTSModel != "" {
speechSynthesisModel = testConfig.ExternalTTSModel
}
// Step 1: Generate TTS audio
voice := GetProviderVoice(speechSynthesisProvider, tc.voiceType)
ttsRequest := &schemas.BifrostSpeechRequest{
Provider: speechSynthesisProvider,
Model: speechSynthesisModel,
Input: &schemas.SpeechInput{
Input: tc.text,
},
Params: &schemas.SpeechParameters{
VoiceConfig: &schemas.SpeechVoiceInput{
Voice: &voice,
},
ResponseFormat: tc.format,
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
// Use retry framework for TTS generation
ttsRetryConfig := GetTestRetryConfigForScenario("SpeechSynthesis", testConfig)
ttsRetryContext := TestRetryContext{
ScenarioName: "TranscriptionStream_TTS",
ExpectedBehavior: map[string]interface{}{
"should_generate_audio": true,
},
TestMetadata: map[string]interface{}{
"provider": speechSynthesisProvider,
"model": speechSynthesisModel,
},
}
// isStreaming=false, isMultipartRequest=false, isBinaryResponse=true (audio bytes don't have JSON raw response)
ttsExpectations := ApplyRawExpectations(SpeechExpectations(100), testConfig, false, false, true)
ttsExpectations = ModifyExpectationsForProvider(ttsExpectations, speechSynthesisProvider)
ttsSpeechRetryConfig := SpeechRetryConfig{
MaxAttempts: ttsRetryConfig.MaxAttempts,
BaseDelay: ttsRetryConfig.BaseDelay,
MaxDelay: ttsRetryConfig.MaxDelay,
Conditions: []SpeechRetryCondition{},
OnRetry: ttsRetryConfig.OnRetry,
OnFinalFail: ttsRetryConfig.OnFinalFail,
}
ttsResponse, err := WithSpeechTestRetry(t, ttsSpeechRetryConfig, ttsRetryContext, ttsExpectations, "TranscriptionStream_TTS", func() (*schemas.BifrostSpeechResponse, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.SpeechRequest(bfCtx, ttsRequest)
})
if err != nil {
t.Fatalf("❌ TTS generation failed for stream round-trip test after retries: %v", GetErrorMessage(err))
}
if ttsResponse == nil || len(ttsResponse.Audio) == 0 {
t.Fatal("❌ TTS returned invalid or empty audio for stream round-trip test after retries")
}
// Save temp audio file
tempDir := os.TempDir()
audioFileName := filepath.Join(tempDir, "stream_roundtrip_"+tc.name+"."+tc.format)
writeErr := os.WriteFile(audioFileName, ttsResponse.Audio, 0644)
if writeErr != nil {
t.Fatalf("Failed to save temp audio file: %v", writeErr)
}
// Register cleanup
t.Cleanup(func() {
os.Remove(audioFileName)
})
t.Logf("Generated TTS audio for stream round-trip: %s (%d bytes)", audioFileName, len(ttsResponse.Audio))
// Step 2: Test streaming transcription
streamRequest := &schemas.BifrostTranscriptionRequest{
Provider: testConfig.Provider,
Model: testConfig.TranscriptionModel,
Input: &schemas.TranscriptionInput{
File: ttsResponse.Audio,
},
Params: &schemas.TranscriptionParameters{
Language: bifrost.Ptr("en"),
Format: bifrost.Ptr(tc.format),
ResponseFormat: tc.responseFormat,
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
// Use retry framework for streaming transcription
retryConfig := GetTestRetryConfigForScenario("TranscriptionStream", testConfig)
retryContext := TestRetryContext{
ScenarioName: "TranscriptionStream_" + tc.name,
ExpectedBehavior: map[string]interface{}{
"transcribe_streaming_audio": true,
"round_trip_test": true,
"original_text": tc.text,
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.TranscriptionModel,
"audio_format": tc.format,
"voice_type": tc.voiceType,
},
}
responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.TranscriptionStreamRequest(bfCtx, streamRequest)
})
RequireNoError(t, err, "Transcription stream initiation failed")
if responseChannel == nil {
t.Fatal("Response channel should not be nil")
}
streamCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
defer cancel()
fullTranscriptionText := ""
lastResponse := &schemas.BifrostStreamChunk{}
streamErrors := []string{}
lastTokenLatency := int64(0)
// Read streaming chunks with enhanced validation
for {
select {
case response, ok := <-responseChannel:
if !ok {
// Channel closed, streaming complete
goto streamComplete
}
if response == nil {
streamErrors = append(streamErrors, "Received nil stream response")
continue
}
// Check for errors in stream
if response.BifrostError != nil {
streamErrors = append(streamErrors, FormatErrorConcise(ParseBifrostError(response.BifrostError)))
continue
}
if response.BifrostTranscriptionStreamResponse == nil {
streamErrors = append(streamErrors, "Stream response missing transcription stream payload")
continue
}
if response.BifrostTranscriptionStreamResponse != nil {
lastTokenLatency = response.BifrostTranscriptionStreamResponse.ExtraFields.Latency
}
if response.BifrostTranscriptionStreamResponse.Text == "" && response.BifrostTranscriptionStreamResponse.Delta == nil {
streamErrors = append(streamErrors, "Stream response missing transcription data")
continue
}
chunkIndex := response.BifrostTranscriptionStreamResponse.ExtraFields.ChunkIndex
// Log latency for each chunk (can be 0 for inter-chunks)
t.Logf("📊 Transcription chunk %d latency: %d ms", chunkIndex, response.BifrostTranscriptionStreamResponse.ExtraFields.Latency)
// Collect transcription chunks
transcribeData := response.BifrostTranscriptionStreamResponse
if transcribeData.Text != "" {
t.Logf("✅ Received transcription text chunk %d with latency %d ms: '%s'", chunkIndex, response.BifrostTranscriptionStreamResponse.ExtraFields.Latency, transcribeData.Text)
}
// Handle delta vs complete text chunks
if transcribeData.Delta != nil {
// This is a delta chunk
deltaText := *transcribeData.Delta
fullTranscriptionText += deltaText
t.Logf("✅ Received transcription delta chunk %d with latency %d ms: '%s'", chunkIndex, response.BifrostTranscriptionStreamResponse.ExtraFields.Latency, deltaText)
}
// Validate chunk structure
if response.BifrostTranscriptionStreamResponse.Type != schemas.TranscriptionStreamResponseTypeDelta {
t.Logf("⚠️ Unexpected object type in stream: %s", response.BifrostTranscriptionStreamResponse.Type)
}
gotModel := response.BifrostTranscriptionStreamResponse.ExtraFields.OriginalModelRequested
if gotModel == "" {
t.Fatal("❌ Stream chunk missing extra_fields.original_model_requested")
}
if gotModel != testConfig.TranscriptionModel {
t.Fatalf("❌ Unexpected original_model_requested in stream: got %q want %q", gotModel, testConfig.TranscriptionModel)
}
lastResponse = DeepCopyBifrostStreamChunk(response)
case <-streamCtx.Done():
streamErrors = append(streamErrors, "Stream reading timed out")
goto streamComplete
}
}
streamComplete:
// Enhanced validation of streaming results
if len(streamErrors) > 0 {
t.Logf("⚠️ Stream errors encountered: %v", streamErrors)
}
if lastResponse == nil {
t.Fatal("Should have received at least one response")
}
if fullTranscriptionText == "" {
t.Fatal("Transcribed text should not be empty")
}
if lastTokenLatency == 0 {
t.Fatalf("❌ Last token latency is 0")
}
// Normalize for comparison (lowercase, remove punctuation)
originalWords := strings.Fields(strings.ToLower(tc.text))
transcribedWords := strings.Fields(strings.ToLower(fullTranscriptionText))
// Check that at least 50% of original words are found in transcription
foundWords := 0
for _, originalWord := range originalWords {
// Remove punctuation for comparison
cleanOriginal := strings.Trim(originalWord, ".,!?;:")
if len(cleanOriginal) < 3 { // Skip very short words
continue
}
for _, transcribedWord := range transcribedWords {
cleanTranscribed := strings.Trim(transcribedWord, ".,!?;:")
if strings.Contains(cleanTranscribed, cleanOriginal) || strings.Contains(cleanOriginal, cleanTranscribed) {
foundWords++
break
}
}
}
// Enhanced round-trip validation with better error reporting
minExpectedWords := len(originalWords) / 2
if foundWords < minExpectedWords {
t.Logf("❌ Stream round-trip validation failed:")
t.Logf(" Original: '%s'", tc.text)
t.Logf(" Transcribed: '%s'", fullTranscriptionText)
t.Logf(" Found %d/%d words (expected at least %d)", foundWords, len(originalWords), minExpectedWords)
// Log word-by-word comparison for debugging
t.Logf(" Word comparison:")
for i, word := range originalWords {
if i < 5 { // Show first 5 words
cleanWord := strings.Trim(word, ".,!?;:")
if len(cleanWord) >= 3 {
found := false
for _, transcribed := range transcribedWords {
if strings.Contains(strings.ToLower(transcribed), cleanWord) {
found = true
break
}
}
status := "❌"
if found {
status = "✅"
}
t.Logf(" %s '%s'", status, cleanWord)
}
}
}
t.Fatalf("Round-trip accuracy too low: got %d/%d words, need at least %d", foundWords, len(originalWords), minExpectedWords)
}
})
}
})
}
// RunTranscriptionStreamAdvancedTest executes advanced streaming transcription test scenarios
func RunTranscriptionStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
if !testConfig.Scenarios.TranscriptionStream {
t.Logf("Transcription streaming not supported for provider %s", testConfig.Provider)
return
}
t.Run("TranscriptionStreamAdvanced", func(t *testing.T) {
t.Run("JSONStreaming", func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
speechSynthesisProvider := testConfig.Provider
if testConfig.ExternalTTSProvider != "" {
speechSynthesisProvider = testConfig.ExternalTTSProvider
}
speechSynthesisModel := testConfig.SpeechSynthesisModel
if testConfig.ExternalTTSModel != "" {
speechSynthesisModel = testConfig.ExternalTTSModel
}
// Generate audio for streaming test
audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", "mp3")
// Test streaming with JSON format
request := &schemas.BifrostTranscriptionRequest{
Provider: testConfig.Provider,
Model: testConfig.TranscriptionModel,
Input: &schemas.TranscriptionInput{
File: audioData,
},
Params: &schemas.TranscriptionParameters{
Language: bifrost.Ptr("en"),
Format: bifrost.Ptr("mp3"),
ResponseFormat: bifrost.Ptr("json"),
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
retryConfig := GetTestRetryConfigForScenario("TranscriptionStreamJSON", testConfig)
retryContext := TestRetryContext{
ScenarioName: "TranscriptionStream_JSON",
ExpectedBehavior: map[string]interface{}{
"transcribe_streaming_audio": true,
"json_format": true,
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.TranscriptionModel,
"format": "json",
},
}
responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.TranscriptionStreamRequest(bfCtx, request)
})
RequireNoError(t, err, "JSON streaming failed")
var receivedResponse bool
var streamErrors []string
for response := range responseChannel {
if response == nil {
streamErrors = append(streamErrors, "Received nil JSON stream response")
continue
}
if response.BifrostError != nil {
streamErrors = append(streamErrors, FormatErrorConcise(ParseBifrostError(response.BifrostError)))
continue
}
if response.BifrostTranscriptionStreamResponse != nil {
receivedResponse = true
// Check for JSON streaming specific fields
transcribeData := response.BifrostTranscriptionStreamResponse
if transcribeData.Type != "" {
t.Logf("✅ Stream type: %v", transcribeData.Type)
if transcribeData.Delta != nil {
t.Logf("✅ Delta: %s", *transcribeData.Delta)
}
}
if transcribeData.Text != "" {
t.Logf("✅ Received transcription text: %s", transcribeData.Text)
}
}
}
if len(streamErrors) > 0 {
t.Logf("⚠️ JSON stream errors: %v", streamErrors)
}
if !receivedResponse {
t.Fatal("Should receive at least one response")
}
t.Logf("✅ Verbose JSON streaming successful")
})
t.Run("MultipleLanguages_Streaming", func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
speechSynthesisProvider := testConfig.Provider
if testConfig.ExternalTTSProvider != "" {
speechSynthesisProvider = testConfig.ExternalTTSProvider
}
speechSynthesisModel := testConfig.SpeechSynthesisModel
if testConfig.ExternalTTSModel != "" {
speechSynthesisModel = testConfig.ExternalTTSModel
}
// Generate audio for language streaming tests
audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", "mp3")
// Test streaming with different language hints (only English for now)
languages := []string{"en"}
for _, lang := range languages {
t.Run("StreamLang_"+lang, func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
langCopy := lang
request := &schemas.BifrostTranscriptionRequest{
Provider: testConfig.Provider,
Model: testConfig.TranscriptionModel,
Input: &schemas.TranscriptionInput{
File: audioData,
},
Params: &schemas.TranscriptionParameters{
Language: &langCopy,
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
retryConfig := GetTestRetryConfigForScenario("TranscriptionStreamLang", testConfig)
retryContext := TestRetryContext{
ScenarioName: "TranscriptionStream_Lang_" + lang,
ExpectedBehavior: map[string]interface{}{
"transcribe_streaming_audio": true,
"language": lang,
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"language": lang,
},
}
responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.TranscriptionStreamRequest(bfCtx, request)
})
RequireNoError(t, err, fmt.Sprintf("Streaming failed for language %s", lang))
var receivedData bool
var streamErrors []string
var lastTokenLatency int64
for response := range responseChannel {
if response == nil {
streamErrors = append(streamErrors, fmt.Sprintf("Received nil stream response for language %s", lang))
continue
}
if response.BifrostError != nil {
streamErrors = append(streamErrors, fmt.Sprintf("Error in stream for language %s: %s", lang, FormatErrorConcise(ParseBifrostError(response.BifrostError))))
continue
}
if response.BifrostTranscriptionStreamResponse != nil {
receivedData = true
t.Logf("✅ Received transcription data for language %s", lang)
if response.BifrostTranscriptionStreamResponse != nil {
lastTokenLatency = response.BifrostTranscriptionStreamResponse.ExtraFields.Latency
}
}
}
if len(streamErrors) > 0 {
t.Logf("⚠️ Stream errors for language %s: %v", lang, streamErrors)
}
if !receivedData {
t.Fatalf("Should receive transcription data for language %s", lang)
}
if lastTokenLatency == 0 {
t.Fatalf("❌ Last token latency is 0")
}
t.Logf("✅ Streaming successful for language: %s", lang)
})
}
})
t.Run("WithCustomPrompt_Streaming", func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
speechSynthesisProvider := testConfig.Provider
if testConfig.ExternalTTSProvider != "" {
speechSynthesisProvider = testConfig.ExternalTTSProvider
}
speechSynthesisModel := testConfig.SpeechSynthesisModel
if testConfig.ExternalTTSModel != "" {
speechSynthesisModel = testConfig.ExternalTTSModel
}
// Generate audio for custom prompt streaming test
audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextTechnical, "tertiary", "mp3")
// Test streaming with custom prompt for context
request := &schemas.BifrostTranscriptionRequest{
Provider: testConfig.Provider,
Model: testConfig.TranscriptionModel,
Input: &schemas.TranscriptionInput{
File: audioData,
},
Params: &schemas.TranscriptionParameters{
Language: bifrost.Ptr("en"),
Prompt: bifrost.Ptr("This audio contains technical terms, proper nouns, and streaming-related vocabulary."),
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
retryConfig := GetTestRetryConfigForScenario("TranscriptionStreamPrompt", testConfig)
retryContext := TestRetryContext{
ScenarioName: "TranscriptionStream_CustomPrompt",
ExpectedBehavior: map[string]interface{}{
"transcribe_streaming_audio": true,
"custom_prompt": true,
"technical_content": true,
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.TranscriptionModel,
"has_prompt": true,
},
}
responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.TranscriptionStreamRequest(bfCtx, request)
})
RequireNoError(t, err, "Custom prompt streaming failed")
var chunkCount int
var streamErrors []string
var receivedText string
var lastTokenLatency int64
for response := range responseChannel {
if response == nil {
streamErrors = append(streamErrors, "Received nil stream response with custom prompt")
continue
}
if response.BifrostError != nil {
streamErrors = append(streamErrors, FormatErrorConcise(ParseBifrostError(response.BifrostError)))
continue
}
if response.BifrostTranscriptionStreamResponse != nil {
lastTokenLatency = response.BifrostTranscriptionStreamResponse.ExtraFields.Latency
}
if response.BifrostTranscriptionStreamResponse != nil && response.BifrostTranscriptionStreamResponse.Text != "" {
chunkCount++
chunkText := response.BifrostTranscriptionStreamResponse.Text
receivedText += chunkText
t.Logf("✅ Custom prompt chunk %d: '%s'", chunkCount, chunkText)
}
}
if len(streamErrors) > 0 {
t.Logf("⚠️ Custom prompt stream errors: %v", streamErrors)
}
if chunkCount == 0 {
t.Fatal("Should receive at least one transcription chunk")
}
// Additional validation for custom prompt effectiveness
if receivedText != "" {
t.Logf("✅ Custom prompt produced transcription: '%s'", receivedText)
} else {
t.Logf("⚠️ Custom prompt produced empty transcription")
}
if lastTokenLatency == 0 {
t.Fatalf("❌ Last token latency is 0")
}
t.Logf("✅ Custom prompt streaming successful: %d chunks received", chunkCount)
})
})
}