package llmtests import ( "context" "fmt" "os" "path/filepath" "runtime" "strings" "testing" "github.com/stretchr/testify/require" bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" ) // RunTranscriptionTest executes the transcription test scenario func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) { if !testConfig.Scenarios.Transcription { t.Logf("Transcription not supported for provider %s", testConfig.Provider) return } t.Run("Transcription", func(t *testing.T) { // First generate TTS audio for round-trip validation roundTripCases := []struct { name string text string voiceType string format string responseFormat *string }{ { name: "RoundTrip_Basic_MP3", text: TTSTestTextBasic, voiceType: "primary", format: GetProviderDefaultFormat(testConfig.Provider), responseFormat: bifrost.Ptr("json"), }, { name: "RoundTrip_Medium_MP3", text: TTSTestTextMedium, voiceType: "secondary", format: GetProviderDefaultFormat(testConfig.Provider), responseFormat: bifrost.Ptr("json"), }, { name: "RoundTrip_Technical_MP3", text: TTSTestTextTechnical, voiceType: "tertiary", format: GetProviderDefaultFormat(testConfig.Provider), responseFormat: bifrost.Ptr("json"), }, } for _, tc := range roundTripCases { t.Run(tc.name, func(t *testing.T) { ShouldRunParallel(t, testConfig, "Transcription") speechSynthesisProvider := testConfig.Provider if testConfig.ExternalTTSProvider != "" { speechSynthesisProvider = testConfig.ExternalTTSProvider } speechSynthesisModel := testConfig.SpeechSynthesisModel if testConfig.ExternalTTSModel != "" { speechSynthesisModel = testConfig.ExternalTTSModel } var transcriptionRequest *schemas.BifrostTranscriptionRequest if testConfig.Provider == schemas.HuggingFace && strings.HasPrefix(testConfig.TranscriptionModel, "fal-ai/") { // For Fal-AI models on HuggingFace, we have to use mp3 but fal-ai speech models only return wav // So we read from a pre-generated mp3 file to avoid format issues _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) filePath := filepath.Join(dir, "scenarios", "media", fmt.Sprintf("%s.mp3", tc.name)) fileContent, err := os.ReadFile(filePath) if err != nil { t.Fatalf("failed to read audio fixture %s: %v", filePath, err) } transcriptionRequest = &schemas.BifrostTranscriptionRequest{ Provider: testConfig.Provider, Model: testConfig.TranscriptionModel, Input: &schemas.TranscriptionInput{ File: fileContent, }, Params: &schemas.TranscriptionParameters{ Language: bifrost.Ptr("en"), Format: bifrost.Ptr("mp3"), ResponseFormat: tc.responseFormat, }, Fallbacks: testConfig.TranscriptionFallbacks, } } else { // Step 1: Generate TTS audio voice := GetProviderVoice(speechSynthesisProvider, tc.voiceType) ttsRequest := &schemas.BifrostSpeechRequest{ Provider: speechSynthesisProvider, Model: speechSynthesisModel, Input: &schemas.SpeechInput{ Input: tc.text, }, Params: &schemas.SpeechParameters{ VoiceConfig: &schemas.SpeechVoiceInput{ Voice: &voice, }, ResponseFormat: tc.format, }, Fallbacks: testConfig.SpeechSynthesisFallbacks, } // Use retry framework for TTS generation ttsRetryConfig := GetTestRetryConfigForScenario("SpeechSynthesis", testConfig) ttsRetryContext := TestRetryContext{ ScenarioName: "Transcription_RoundTrip_TTS_" + tc.name, ExpectedBehavior: map[string]interface{}{ "should_generate_audio": true, }, TestMetadata: map[string]interface{}{ "provider": speechSynthesisProvider, "model": speechSynthesisModel, "format": tc.format, }, } // isStreaming=false, isMultipartRequest=false, isBinaryResponse=true (audio bytes don't have JSON raw response) ttsExpectations := ApplyRawExpectations(SpeechExpectations(100), testConfig, false, false, true) // Minimum expected bytes ttsExpectations = ModifyExpectationsForProvider(ttsExpectations, testConfig.Provider) speechRetryConfig := SpeechRetryConfig{ MaxAttempts: ttsRetryConfig.MaxAttempts, BaseDelay: ttsRetryConfig.BaseDelay, MaxDelay: ttsRetryConfig.MaxDelay, Conditions: []SpeechRetryCondition{}, OnRetry: ttsRetryConfig.OnRetry, OnFinalFail: ttsRetryConfig.OnFinalFail, } ttsResponse, err := WithSpeechTestRetry(t, speechRetryConfig, ttsRetryContext, ttsExpectations, "Transcription_RoundTrip_TTS_"+tc.name, func() (*schemas.BifrostSpeechResponse, *schemas.BifrostError) { bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) return client.SpeechRequest(bfCtx, ttsRequest) }) if err != nil { t.Fatalf("❌ TTS generation failed for round-trip test after retries: %v", GetErrorMessage(err)) } if ttsResponse == nil || len(ttsResponse.Audio) == 0 { t.Fatal("❌ TTS returned invalid or empty audio for round-trip test after retries") } // Save temp audio file tempDir := os.TempDir() audioFileName := filepath.Join(tempDir, "roundtrip_"+tc.name+"."+tc.format) writeErr := os.WriteFile(audioFileName, ttsResponse.Audio, 0644) require.NoError(t, writeErr, "Failed to save temp audio file") // Register cleanup t.Cleanup(func() { os.Remove(audioFileName) }) t.Logf("Generated TTS audio for round-trip: %s (%d bytes)", audioFileName, len(ttsResponse.Audio)) // Step 2: Transcribe the generated audio transcriptionRequest = &schemas.BifrostTranscriptionRequest{ Provider: testConfig.Provider, Model: testConfig.TranscriptionModel, Input: &schemas.TranscriptionInput{ File: ttsResponse.Audio, }, Params: &schemas.TranscriptionParameters{ Language: bifrost.Ptr("en"), Format: schemas.Ptr(tc.format), ResponseFormat: tc.responseFormat, }, Fallbacks: testConfig.TranscriptionFallbacks, } } // Use retry framework for transcription retryConfig := GetTestRetryConfigForScenario("Transcription", testConfig) retryContext := TestRetryContext{ ScenarioName: "Transcription_RoundTrip_" + tc.name, ExpectedBehavior: map[string]interface{}{ "should_transcribe_audio": true, "round_trip_test": true, }, TestMetadata: map[string]interface{}{ "provider": testConfig.Provider, "model": testConfig.TranscriptionModel, "format": tc.format, }, } // Enhanced validation for transcription // Note: isMultipartRequest=true because transcription uses multipart form data, not JSON body expectations := ApplyRawExpectations(TranscriptionExpectations(10), testConfig, false, true) // Expect at least some content expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider) // Create Transcription retry config transcriptionRetryConfig := TranscriptionRetryConfig{ MaxAttempts: retryConfig.MaxAttempts, BaseDelay: retryConfig.BaseDelay, MaxDelay: retryConfig.MaxDelay, Conditions: []TranscriptionRetryCondition{}, // Add specific transcription retry conditions as needed OnRetry: retryConfig.OnRetry, OnFinalFail: retryConfig.OnFinalFail, } transcriptionResponse, bifrostErr := WithTranscriptionTestRetry(t, transcriptionRetryConfig, retryContext, expectations, "Transcription_RoundTrip_"+tc.name, func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) { bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) return client.TranscriptionRequest(bfCtx, transcriptionRequest) }) if bifrostErr != nil { t.Fatalf("❌ Transcription_RoundTrip_"+tc.name+" request failed after retries: %v", GetErrorMessage(bifrostErr)) } // Validate round-trip transcription (complementary to main validation) validateTranscriptionRoundTrip(t, transcriptionResponse, tc.text, tc.name, testConfig) }) } // Additional test cases using the utility function for edge cases t.Run("AdditionalAudioTests", func(t *testing.T) { // Test with custom generated audio for specific scenarios customCases := []struct { name string text string language *string responseFormat *string }{ { name: "Numbers_And_Punctuation", text: "Testing numbers 1, 2, 3 and punctuation marks! Question?", language: bifrost.Ptr("en"), responseFormat: bifrost.Ptr("json"), }, { name: "Technical_Terms", text: "API gateway processes HTTP requests with JSON payloads", language: bifrost.Ptr("en"), responseFormat: bifrost.Ptr("json"), }, } for _, tc := range customCases { t.Run(tc.name, func(t *testing.T) { ShouldRunParallel(t, testConfig, "Transcription") speechSynthesisProvider := testConfig.Provider if testConfig.ExternalTTSProvider != "" { speechSynthesisProvider = testConfig.ExternalTTSProvider } speechSynthesisModel := testConfig.SpeechSynthesisModel if testConfig.ExternalTTSModel != "" { speechSynthesisModel = testConfig.ExternalTTSModel } audioFormat := GetProviderDefaultFormat(testConfig.Provider) var audioData []byte var readErr error if testConfig.Provider == schemas.HuggingFace && strings.HasPrefix(testConfig.TranscriptionModel, "fal-ai/") { // For Fal-AI models on HuggingFace, we have to use mp3 but fal-ai speech models only return wav // So we read from a pre-generated mp3 file to avoid format issues _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) filePath := filepath.Join(dir, "scenarios", "media", fmt.Sprintf("%s.mp3", tc.name)) audioData, readErr = os.ReadFile(filePath) if readErr != nil { t.Fatalf("failed to read audio fixture %s: %v", filePath, readErr) } audioFormat = "mp3" } else { // Use the utility function to generate audio audioData, _ = GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, tc.text, "primary", audioFormat) } // Test transcription request := &schemas.BifrostTranscriptionRequest{ Provider: testConfig.Provider, Model: testConfig.TranscriptionModel, Input: &schemas.TranscriptionInput{ File: audioData, }, Params: &schemas.TranscriptionParameters{ Language: tc.language, Format: &audioFormat, ResponseFormat: tc.responseFormat, }, Fallbacks: testConfig.TranscriptionFallbacks, } // Use retry framework for custom transcription customRetryConfig := GetTestRetryConfigForScenario("Transcription", testConfig) customRetryContext := TestRetryContext{ ScenarioName: "Transcription_Custom_" + tc.name, ExpectedBehavior: map[string]interface{}{ "should_transcribe_audio": true, }, TestMetadata: map[string]interface{}{ "provider": testConfig.Provider, "model": testConfig.TranscriptionModel, }, } customExpectations := ApplyRawExpectations(TranscriptionExpectations(5), testConfig, false, true) customExpectations = ModifyExpectationsForProvider(customExpectations, testConfig.Provider) customTranscriptionRetryConfig := TranscriptionRetryConfig{ MaxAttempts: customRetryConfig.MaxAttempts, BaseDelay: customRetryConfig.BaseDelay, MaxDelay: customRetryConfig.MaxDelay, Conditions: []TranscriptionRetryCondition{}, OnRetry: customRetryConfig.OnRetry, OnFinalFail: customRetryConfig.OnFinalFail, } response, err := WithTranscriptionTestRetry(t, customTranscriptionRetryConfig, customRetryContext, customExpectations, "Transcription_Custom_"+tc.name, func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) { bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) return client.TranscriptionRequest(bfCtx, request) }) if err != nil { errorMsg := GetErrorMessage(err) if !strings.Contains(errorMsg, "❌") { errorMsg = fmt.Sprintf("❌ %s", errorMsg) } t.Fatalf("❌ Custom transcription failed after retries: %s", errorMsg) } if response == nil { t.Fatalf("❌ Custom transcription returned nil response after retries") } if response.Text == "" { t.Fatalf("❌ Custom transcription returned empty text after retries") } t.Logf("✅ Custom transcription successful: '%s' → '%s'", tc.text, response.Text) }) } }) }) } // RunTranscriptionAdvancedTest executes advanced transcription test scenarios func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) { if !testConfig.Scenarios.Transcription { t.Logf("Transcription not supported for provider %s", testConfig.Provider) return } t.Run("TranscriptionAdvanced", func(t *testing.T) { t.Run("AllResponseFormats", func(t *testing.T) { // Test supported response formats (excluding text to avoid JSON parsing issues) formats := []string{"json"} for _, format := range formats { t.Run("Format_"+format, func(t *testing.T) { ShouldRunParallel(t, testConfig, "Transcription") speechSynthesisProvider := testConfig.Provider if testConfig.ExternalTTSProvider != "" { speechSynthesisProvider = testConfig.ExternalTTSProvider } speechSynthesisModel := testConfig.SpeechSynthesisModel if testConfig.ExternalTTSModel != "" { speechSynthesisModel = testConfig.ExternalTTSModel } audioFormat := GetProviderDefaultFormat(testConfig.Provider) var audioData []byte var readErr error if testConfig.Provider == schemas.HuggingFace && strings.HasPrefix(testConfig.TranscriptionModel, "fal-ai/") { // For Fal-AI models on HuggingFace, we have to use mp3 but fal-ai speech models only return wav // So we read from a pre-generated mp3 file to avoid format issues _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) filePath := filepath.Join(dir, "scenarios", "media", "RoundTrip_Basic_MP3.mp3") audioData, readErr = os.ReadFile(filePath) if readErr != nil { t.Fatalf("failed to read audio fixture %s: %v", filePath, readErr) } audioFormat = "mp3" } else { // Use the utility function to generate audio audioData, _ = GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", audioFormat) } formatCopy := format request := &schemas.BifrostTranscriptionRequest{ Provider: testConfig.Provider, Model: testConfig.TranscriptionModel, Input: &schemas.TranscriptionInput{ File: audioData, }, Params: &schemas.TranscriptionParameters{ Format: &audioFormat, ResponseFormat: &formatCopy, }, Fallbacks: testConfig.TranscriptionFallbacks, } // Use retry framework for format test formatRetryConfig := GetTestRetryConfigForScenario("Transcription", testConfig) formatRetryContext := TestRetryContext{ ScenarioName: "Transcription_Format_" + format, ExpectedBehavior: map[string]interface{}{ "should_transcribe_audio": true, }, TestMetadata: map[string]interface{}{ "provider": testConfig.Provider, "model": testConfig.TranscriptionModel, "format": format, }, } formatExpectations := ApplyRawExpectations(TranscriptionExpectations(5), testConfig, false, true) formatExpectations = ModifyExpectationsForProvider(formatExpectations, testConfig.Provider) formatTranscriptionRetryConfig := TranscriptionRetryConfig{ MaxAttempts: formatRetryConfig.MaxAttempts, BaseDelay: formatRetryConfig.BaseDelay, MaxDelay: formatRetryConfig.MaxDelay, Conditions: []TranscriptionRetryCondition{}, OnRetry: formatRetryConfig.OnRetry, OnFinalFail: formatRetryConfig.OnFinalFail, } response, err := WithTranscriptionTestRetry(t, formatTranscriptionRetryConfig, formatRetryContext, formatExpectations, "Transcription_Format_"+format, func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) { bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) return client.TranscriptionRequest(bfCtx, request) }) if err != nil { errorMsg := GetErrorMessage(err) if !strings.Contains(errorMsg, "❌") { errorMsg = fmt.Sprintf("❌ %s", errorMsg) } t.Fatalf("❌ Transcription failed for format %s after retries: %s", format, errorMsg) } if response == nil { t.Fatalf("❌ Transcription returned nil response for format %s after retries", format) } if response.Text == "" { t.Fatalf("❌ Transcription returned empty text for format %s after retries", format) } t.Logf("✅ Format %s successful: '%s'", format, response.Text) }) } }) t.Run("WithCustomParameters", func(t *testing.T) { ShouldRunParallel(t, testConfig, "Transcription") speechSynthesisProvider := testConfig.Provider if testConfig.ExternalTTSProvider != "" { speechSynthesisProvider = testConfig.ExternalTTSProvider } speechSynthesisModel := testConfig.SpeechSynthesisModel if testConfig.ExternalTTSModel != "" { speechSynthesisModel = testConfig.ExternalTTSModel } audioFormat := GetProviderDefaultFormat(testConfig.Provider) var audioData []byte var readErr error if testConfig.Provider == schemas.HuggingFace && strings.HasPrefix(testConfig.TranscriptionModel, "fal-ai/") { // For Fal-AI models on HuggingFace, we have to use mp3 but fal-ai speech models only return wav // So we read from a pre-generated mp3 file to avoid format issues _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) filePath := filepath.Join(dir, "scenarios", "media", "RoundTrip_Medium_MP3.mp3") audioData, readErr = os.ReadFile(filePath) if readErr != nil { t.Fatalf("failed to read audio fixture %s: %v", filePath, readErr) } audioFormat = "mp3" } else { // Generate audio for custom parameters test audioData, _ = GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextMedium, "secondary", audioFormat) } // Test with custom parameters and temperature request := &schemas.BifrostTranscriptionRequest{ Provider: testConfig.Provider, Model: testConfig.TranscriptionModel, Input: &schemas.TranscriptionInput{ File: audioData, }, Params: &schemas.TranscriptionParameters{ Language: bifrost.Ptr("en"), Format: &audioFormat, Prompt: bifrost.Ptr("This audio contains technical terminology and proper nouns."), ResponseFormat: bifrost.Ptr("json"), // Use json instead of verbose_json for whisper-1 }, Fallbacks: testConfig.TranscriptionFallbacks, } // Use retry framework for advanced transcription advancedRetryConfig := GetTestRetryConfigForScenario("Transcription", testConfig) advancedRetryContext := TestRetryContext{ ScenarioName: "Transcription_Advanced_CustomParams", ExpectedBehavior: map[string]interface{}{ "should_transcribe_audio": true, }, TestMetadata: map[string]interface{}{ "provider": testConfig.Provider, "model": testConfig.TranscriptionModel, }, } advancedExpectations := ApplyRawExpectations(TranscriptionExpectations(5), testConfig, false, true) advancedExpectations = ModifyExpectationsForProvider(advancedExpectations, testConfig.Provider) advancedTranscriptionRetryConfig := TranscriptionRetryConfig{ MaxAttempts: advancedRetryConfig.MaxAttempts, BaseDelay: advancedRetryConfig.BaseDelay, MaxDelay: advancedRetryConfig.MaxDelay, Conditions: []TranscriptionRetryCondition{}, OnRetry: advancedRetryConfig.OnRetry, OnFinalFail: advancedRetryConfig.OnFinalFail, } response, err := WithTranscriptionTestRetry(t, advancedTranscriptionRetryConfig, advancedRetryContext, advancedExpectations, "Transcription_Advanced_CustomParams", func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) { bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) return client.TranscriptionRequest(bfCtx, request) }) if err != nil { errorMsg := GetErrorMessage(err) if !strings.Contains(errorMsg, "❌") { errorMsg = fmt.Sprintf("❌ %s", errorMsg) } t.Fatalf("❌ Advanced transcription failed after retries: %s", errorMsg) } if response == nil { t.Fatalf("❌ Advanced transcription returned nil response after retries") } if response.Text == "" { t.Fatalf("❌ Advanced transcription returned empty text after retries") } t.Logf("✅ Advanced transcription successful: '%s'", response.Text) }) t.Run("MultipleLanguages", func(t *testing.T) { // Test with different language hints (only English for now since our TTS is English) languages := []string{"en"} for _, lang := range languages { t.Run("Language_"+lang, func(t *testing.T) { ShouldRunParallel(t, testConfig, "Transcription") speechSynthesisProvider := testConfig.Provider if testConfig.ExternalTTSProvider != "" { speechSynthesisProvider = testConfig.ExternalTTSProvider } speechSynthesisModel := testConfig.SpeechSynthesisModel if testConfig.ExternalTTSModel != "" { speechSynthesisModel = testConfig.ExternalTTSModel } audioFormat := GetProviderDefaultFormat(testConfig.Provider) var audioData []byte var readErr error if testConfig.Provider == schemas.HuggingFace && strings.HasPrefix(testConfig.TranscriptionModel, "fal-ai/") { // For Fal-AI models on HuggingFace, we have to use mp3 but fal-ai speech models only return wav // So we read from a pre-generated mp3 file to avoid format issues _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) filePath := filepath.Join(dir, "scenarios", "media", "RoundTrip_Basic_MP3.mp3") audioData, readErr = os.ReadFile(filePath) if readErr != nil { t.Fatalf("failed to read audio fixture %s: %v", filePath, readErr) } audioFormat = "mp3" } else { // Use the utility function to generate audio audioData, _ = GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", audioFormat) } langCopy := lang request := &schemas.BifrostTranscriptionRequest{ Provider: testConfig.Provider, Model: testConfig.TranscriptionModel, Input: &schemas.TranscriptionInput{ File: audioData, }, Params: &schemas.TranscriptionParameters{ Format: &audioFormat, Language: &langCopy, }, Fallbacks: testConfig.TranscriptionFallbacks, } // Use retry framework for language test langRetryConfig := GetTestRetryConfigForScenario("Transcription", testConfig) langRetryContext := TestRetryContext{ ScenarioName: "Transcription_Language_" + lang, ExpectedBehavior: map[string]interface{}{ "should_transcribe_audio": true, }, TestMetadata: map[string]interface{}{ "provider": testConfig.Provider, "model": testConfig.TranscriptionModel, "language": lang, }, } langExpectations := ApplyRawExpectations(TranscriptionExpectations(5), testConfig, false, true) langExpectations = ModifyExpectationsForProvider(langExpectations, testConfig.Provider) langTranscriptionRetryConfig := TranscriptionRetryConfig{ MaxAttempts: langRetryConfig.MaxAttempts, BaseDelay: langRetryConfig.BaseDelay, MaxDelay: langRetryConfig.MaxDelay, Conditions: []TranscriptionRetryCondition{}, OnRetry: langRetryConfig.OnRetry, OnFinalFail: langRetryConfig.OnFinalFail, } response, err := WithTranscriptionTestRetry(t, langTranscriptionRetryConfig, langRetryContext, langExpectations, "Transcription_Language_"+lang, func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) { bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) return client.TranscriptionRequest(bfCtx, request) }) if err != nil { errorMsg := GetErrorMessage(err) if !strings.Contains(errorMsg, "❌") { errorMsg = fmt.Sprintf("❌ %s", errorMsg) } t.Fatalf("❌ Transcription failed for language %s after retries: %s", lang, errorMsg) } if response == nil { t.Fatalf("❌ Transcription returned nil response for language %s after retries", lang) } if response.Text == "" { t.Fatalf("❌ Transcription returned empty text for language %s after retries", lang) } t.Logf("✅ Language %s transcription successful: '%s'", lang, response.Text) }) } }) }) } // validateTranscriptionRoundTrip performs round-trip validation for transcription responses // This is complementary to the main validation framework and focuses on transcription accuracy func validateTranscriptionRoundTrip(t *testing.T, response *schemas.BifrostTranscriptionResponse, originalText string, testName string, testConfig ComprehensiveTestConfig) { if response == nil || response.Text == "" { t.Fatal("Transcription response missing transcribed text") } transcribedText := response.Text // Normalize for comparison (lowercase, remove punctuation) originalWords := strings.Fields(strings.ToLower(originalText)) transcribedWords := strings.Fields(strings.ToLower(transcribedText)) // Check that at least 50% of original words are found in transcription foundWords := 0 for _, originalWord := range originalWords { // Remove punctuation for comparison cleanOriginal := strings.Trim(originalWord, ".,!?;:") if len(cleanOriginal) < 3 { // Skip very short words continue } for _, transcribedWord := range transcribedWords { cleanTranscribed := strings.Trim(transcribedWord, ".,!?;:") if strings.Contains(cleanTranscribed, cleanOriginal) || strings.Contains(cleanOriginal, cleanTranscribed) { foundWords++ break } } } // Expect at least 50% word match for successful round-trip minExpectedWords := len(originalWords) / 2 if foundWords < minExpectedWords { t.Logf("⚠️ Round-trip validation concern:") t.Logf(" Original: '%s'", originalText) t.Logf(" Transcribed: '%s'", transcribedText) t.Logf(" Found %d/%d words (%.1f%%), expected ≥ %d (50%%)", foundWords, len(originalWords), float64(foundWords)/float64(len(originalWords))*100, minExpectedWords) // Note: Not failing test as this can be provider/model dependent } else { t.Logf("✅ Round-trip validation passed: found %d/%d words (%.1f%%)", foundWords, len(originalWords), float64(foundWords)/float64(len(originalWords))*100) } // Check provider field if response.ExtraFields.Provider != testConfig.Provider { t.Logf("⚠️ Provider mismatch: expected %s, got %s", testConfig.Provider, response.ExtraFields.Provider) } t.Logf("Round-trip test '%s' completed successfully", testName) }