Files
bifrost/core/internal/llmtests/transcription.go
Beyhan Oğur 880f412e2c first commit
2026-04-26 21:52:23 +03:00

699 lines
27 KiB
Go

package llmtests
import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"testing"
"github.com/stretchr/testify/require"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
)
// RunTranscriptionTest executes the transcription test scenario
func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
if !testConfig.Scenarios.Transcription {
t.Logf("Transcription not supported for provider %s", testConfig.Provider)
return
}
t.Run("Transcription", func(t *testing.T) {
// First generate TTS audio for round-trip validation
roundTripCases := []struct {
name string
text string
voiceType string
format string
responseFormat *string
}{
{
name: "RoundTrip_Basic_MP3",
text: TTSTestTextBasic,
voiceType: "primary",
format: GetProviderDefaultFormat(testConfig.Provider),
responseFormat: bifrost.Ptr("json"),
},
{
name: "RoundTrip_Medium_MP3",
text: TTSTestTextMedium,
voiceType: "secondary",
format: GetProviderDefaultFormat(testConfig.Provider),
responseFormat: bifrost.Ptr("json"),
},
{
name: "RoundTrip_Technical_MP3",
text: TTSTestTextTechnical,
voiceType: "tertiary",
format: GetProviderDefaultFormat(testConfig.Provider),
responseFormat: bifrost.Ptr("json"),
},
}
for _, tc := range roundTripCases {
t.Run(tc.name, func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
speechSynthesisProvider := testConfig.Provider
if testConfig.ExternalTTSProvider != "" {
speechSynthesisProvider = testConfig.ExternalTTSProvider
}
speechSynthesisModel := testConfig.SpeechSynthesisModel
if testConfig.ExternalTTSModel != "" {
speechSynthesisModel = testConfig.ExternalTTSModel
}
var transcriptionRequest *schemas.BifrostTranscriptionRequest
if testConfig.Provider == schemas.HuggingFace && strings.HasPrefix(testConfig.TranscriptionModel, "fal-ai/") {
// For Fal-AI models on HuggingFace, we have to use mp3 but fal-ai speech models only return wav
// So we read from a pre-generated mp3 file to avoid format issues
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
filePath := filepath.Join(dir, "scenarios", "media", fmt.Sprintf("%s.mp3", tc.name))
fileContent, err := os.ReadFile(filePath)
if err != nil {
t.Fatalf("failed to read audio fixture %s: %v", filePath, err)
}
transcriptionRequest = &schemas.BifrostTranscriptionRequest{
Provider: testConfig.Provider,
Model: testConfig.TranscriptionModel,
Input: &schemas.TranscriptionInput{
File: fileContent,
},
Params: &schemas.TranscriptionParameters{
Language: bifrost.Ptr("en"),
Format: bifrost.Ptr("mp3"),
ResponseFormat: tc.responseFormat,
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
} else {
// Step 1: Generate TTS audio
voice := GetProviderVoice(speechSynthesisProvider, tc.voiceType)
ttsRequest := &schemas.BifrostSpeechRequest{
Provider: speechSynthesisProvider,
Model: speechSynthesisModel,
Input: &schemas.SpeechInput{
Input: tc.text,
},
Params: &schemas.SpeechParameters{
VoiceConfig: &schemas.SpeechVoiceInput{
Voice: &voice,
},
ResponseFormat: tc.format,
},
Fallbacks: testConfig.SpeechSynthesisFallbacks,
}
// Use retry framework for TTS generation
ttsRetryConfig := GetTestRetryConfigForScenario("SpeechSynthesis", testConfig)
ttsRetryContext := TestRetryContext{
ScenarioName: "Transcription_RoundTrip_TTS_" + tc.name,
ExpectedBehavior: map[string]interface{}{
"should_generate_audio": true,
},
TestMetadata: map[string]interface{}{
"provider": speechSynthesisProvider,
"model": speechSynthesisModel,
"format": tc.format,
},
}
// isStreaming=false, isMultipartRequest=false, isBinaryResponse=true (audio bytes don't have JSON raw response)
ttsExpectations := ApplyRawExpectations(SpeechExpectations(100), testConfig, false, false, true) // Minimum expected bytes
ttsExpectations = ModifyExpectationsForProvider(ttsExpectations, testConfig.Provider)
speechRetryConfig := SpeechRetryConfig{
MaxAttempts: ttsRetryConfig.MaxAttempts,
BaseDelay: ttsRetryConfig.BaseDelay,
MaxDelay: ttsRetryConfig.MaxDelay,
Conditions: []SpeechRetryCondition{},
OnRetry: ttsRetryConfig.OnRetry,
OnFinalFail: ttsRetryConfig.OnFinalFail,
}
ttsResponse, err := WithSpeechTestRetry(t, speechRetryConfig, ttsRetryContext, ttsExpectations, "Transcription_RoundTrip_TTS_"+tc.name, func() (*schemas.BifrostSpeechResponse, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.SpeechRequest(bfCtx, ttsRequest)
})
if err != nil {
t.Fatalf("❌ TTS generation failed for round-trip test after retries: %v", GetErrorMessage(err))
}
if ttsResponse == nil || len(ttsResponse.Audio) == 0 {
t.Fatal("❌ TTS returned invalid or empty audio for round-trip test after retries")
}
// Save temp audio file
tempDir := os.TempDir()
audioFileName := filepath.Join(tempDir, "roundtrip_"+tc.name+"."+tc.format)
writeErr := os.WriteFile(audioFileName, ttsResponse.Audio, 0644)
require.NoError(t, writeErr, "Failed to save temp audio file")
// Register cleanup
t.Cleanup(func() {
os.Remove(audioFileName)
})
t.Logf("Generated TTS audio for round-trip: %s (%d bytes)", audioFileName, len(ttsResponse.Audio))
// Step 2: Transcribe the generated audio
transcriptionRequest = &schemas.BifrostTranscriptionRequest{
Provider: testConfig.Provider,
Model: testConfig.TranscriptionModel,
Input: &schemas.TranscriptionInput{
File: ttsResponse.Audio,
},
Params: &schemas.TranscriptionParameters{
Language: bifrost.Ptr("en"),
Format: schemas.Ptr(tc.format),
ResponseFormat: tc.responseFormat,
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
}
// Use retry framework for transcription
retryConfig := GetTestRetryConfigForScenario("Transcription", testConfig)
retryContext := TestRetryContext{
ScenarioName: "Transcription_RoundTrip_" + tc.name,
ExpectedBehavior: map[string]interface{}{
"should_transcribe_audio": true,
"round_trip_test": true,
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.TranscriptionModel,
"format": tc.format,
},
}
// Enhanced validation for transcription
// Note: isMultipartRequest=true because transcription uses multipart form data, not JSON body
expectations := ApplyRawExpectations(TranscriptionExpectations(10), testConfig, false, true) // Expect at least some content
expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)
// Create Transcription retry config
transcriptionRetryConfig := TranscriptionRetryConfig{
MaxAttempts: retryConfig.MaxAttempts,
BaseDelay: retryConfig.BaseDelay,
MaxDelay: retryConfig.MaxDelay,
Conditions: []TranscriptionRetryCondition{}, // Add specific transcription retry conditions as needed
OnRetry: retryConfig.OnRetry,
OnFinalFail: retryConfig.OnFinalFail,
}
transcriptionResponse, bifrostErr := WithTranscriptionTestRetry(t, transcriptionRetryConfig, retryContext, expectations, "Transcription_RoundTrip_"+tc.name, func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.TranscriptionRequest(bfCtx, transcriptionRequest)
})
if bifrostErr != nil {
t.Fatalf("❌ Transcription_RoundTrip_"+tc.name+" request failed after retries: %v", GetErrorMessage(bifrostErr))
}
// Validate round-trip transcription (complementary to main validation)
validateTranscriptionRoundTrip(t, transcriptionResponse, tc.text, tc.name, testConfig)
})
}
// Additional test cases using the utility function for edge cases
t.Run("AdditionalAudioTests", func(t *testing.T) {
// Test with custom generated audio for specific scenarios
customCases := []struct {
name string
text string
language *string
responseFormat *string
}{
{
name: "Numbers_And_Punctuation",
text: "Testing numbers 1, 2, 3 and punctuation marks! Question?",
language: bifrost.Ptr("en"),
responseFormat: bifrost.Ptr("json"),
},
{
name: "Technical_Terms",
text: "API gateway processes HTTP requests with JSON payloads",
language: bifrost.Ptr("en"),
responseFormat: bifrost.Ptr("json"),
},
}
for _, tc := range customCases {
t.Run(tc.name, func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
speechSynthesisProvider := testConfig.Provider
if testConfig.ExternalTTSProvider != "" {
speechSynthesisProvider = testConfig.ExternalTTSProvider
}
speechSynthesisModel := testConfig.SpeechSynthesisModel
if testConfig.ExternalTTSModel != "" {
speechSynthesisModel = testConfig.ExternalTTSModel
}
audioFormat := GetProviderDefaultFormat(testConfig.Provider)
var audioData []byte
var readErr error
if testConfig.Provider == schemas.HuggingFace && strings.HasPrefix(testConfig.TranscriptionModel, "fal-ai/") {
// For Fal-AI models on HuggingFace, we have to use mp3 but fal-ai speech models only return wav
// So we read from a pre-generated mp3 file to avoid format issues
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
filePath := filepath.Join(dir, "scenarios", "media", fmt.Sprintf("%s.mp3", tc.name))
audioData, readErr = os.ReadFile(filePath)
if readErr != nil {
t.Fatalf("failed to read audio fixture %s: %v", filePath, readErr)
}
audioFormat = "mp3"
} else {
// Use the utility function to generate audio
audioData, _ = GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, tc.text, "primary", audioFormat)
}
// Test transcription
request := &schemas.BifrostTranscriptionRequest{
Provider: testConfig.Provider,
Model: testConfig.TranscriptionModel,
Input: &schemas.TranscriptionInput{
File: audioData,
},
Params: &schemas.TranscriptionParameters{
Language: tc.language,
Format: &audioFormat,
ResponseFormat: tc.responseFormat,
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
// Use retry framework for custom transcription
customRetryConfig := GetTestRetryConfigForScenario("Transcription", testConfig)
customRetryContext := TestRetryContext{
ScenarioName: "Transcription_Custom_" + tc.name,
ExpectedBehavior: map[string]interface{}{
"should_transcribe_audio": true,
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.TranscriptionModel,
},
}
customExpectations := ApplyRawExpectations(TranscriptionExpectations(5), testConfig, false, true)
customExpectations = ModifyExpectationsForProvider(customExpectations, testConfig.Provider)
customTranscriptionRetryConfig := TranscriptionRetryConfig{
MaxAttempts: customRetryConfig.MaxAttempts,
BaseDelay: customRetryConfig.BaseDelay,
MaxDelay: customRetryConfig.MaxDelay,
Conditions: []TranscriptionRetryCondition{},
OnRetry: customRetryConfig.OnRetry,
OnFinalFail: customRetryConfig.OnFinalFail,
}
response, err := WithTranscriptionTestRetry(t, customTranscriptionRetryConfig, customRetryContext, customExpectations, "Transcription_Custom_"+tc.name, func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.TranscriptionRequest(bfCtx, request)
})
if err != nil {
errorMsg := GetErrorMessage(err)
if !strings.Contains(errorMsg, "❌") {
errorMsg = fmt.Sprintf("❌ %s", errorMsg)
}
t.Fatalf("❌ Custom transcription failed after retries: %s", errorMsg)
}
if response == nil {
t.Fatalf("❌ Custom transcription returned nil response after retries")
}
if response.Text == "" {
t.Fatalf("❌ Custom transcription returned empty text after retries")
}
t.Logf("✅ Custom transcription successful: '%s' → '%s'", tc.text, response.Text)
})
}
})
})
}
// RunTranscriptionAdvancedTest executes advanced transcription test scenarios
func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
if !testConfig.Scenarios.Transcription {
t.Logf("Transcription not supported for provider %s", testConfig.Provider)
return
}
t.Run("TranscriptionAdvanced", func(t *testing.T) {
t.Run("AllResponseFormats", func(t *testing.T) {
// Test supported response formats (excluding text to avoid JSON parsing issues)
formats := []string{"json"}
for _, format := range formats {
t.Run("Format_"+format, func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
speechSynthesisProvider := testConfig.Provider
if testConfig.ExternalTTSProvider != "" {
speechSynthesisProvider = testConfig.ExternalTTSProvider
}
speechSynthesisModel := testConfig.SpeechSynthesisModel
if testConfig.ExternalTTSModel != "" {
speechSynthesisModel = testConfig.ExternalTTSModel
}
audioFormat := GetProviderDefaultFormat(testConfig.Provider)
var audioData []byte
var readErr error
if testConfig.Provider == schemas.HuggingFace && strings.HasPrefix(testConfig.TranscriptionModel, "fal-ai/") {
// For Fal-AI models on HuggingFace, we have to use mp3 but fal-ai speech models only return wav
// So we read from a pre-generated mp3 file to avoid format issues
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
filePath := filepath.Join(dir, "scenarios", "media", "RoundTrip_Basic_MP3.mp3")
audioData, readErr = os.ReadFile(filePath)
if readErr != nil {
t.Fatalf("failed to read audio fixture %s: %v", filePath, readErr)
}
audioFormat = "mp3"
} else {
// Use the utility function to generate audio
audioData, _ = GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", audioFormat)
}
formatCopy := format
request := &schemas.BifrostTranscriptionRequest{
Provider: testConfig.Provider,
Model: testConfig.TranscriptionModel,
Input: &schemas.TranscriptionInput{
File: audioData,
},
Params: &schemas.TranscriptionParameters{
Format: &audioFormat,
ResponseFormat: &formatCopy,
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
// Use retry framework for format test
formatRetryConfig := GetTestRetryConfigForScenario("Transcription", testConfig)
formatRetryContext := TestRetryContext{
ScenarioName: "Transcription_Format_" + format,
ExpectedBehavior: map[string]interface{}{
"should_transcribe_audio": true,
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.TranscriptionModel,
"format": format,
},
}
formatExpectations := ApplyRawExpectations(TranscriptionExpectations(5), testConfig, false, true)
formatExpectations = ModifyExpectationsForProvider(formatExpectations, testConfig.Provider)
formatTranscriptionRetryConfig := TranscriptionRetryConfig{
MaxAttempts: formatRetryConfig.MaxAttempts,
BaseDelay: formatRetryConfig.BaseDelay,
MaxDelay: formatRetryConfig.MaxDelay,
Conditions: []TranscriptionRetryCondition{},
OnRetry: formatRetryConfig.OnRetry,
OnFinalFail: formatRetryConfig.OnFinalFail,
}
response, err := WithTranscriptionTestRetry(t, formatTranscriptionRetryConfig, formatRetryContext, formatExpectations, "Transcription_Format_"+format, func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.TranscriptionRequest(bfCtx, request)
})
if err != nil {
errorMsg := GetErrorMessage(err)
if !strings.Contains(errorMsg, "❌") {
errorMsg = fmt.Sprintf("❌ %s", errorMsg)
}
t.Fatalf("❌ Transcription failed for format %s after retries: %s", format, errorMsg)
}
if response == nil {
t.Fatalf("❌ Transcription returned nil response for format %s after retries", format)
}
if response.Text == "" {
t.Fatalf("❌ Transcription returned empty text for format %s after retries", format)
}
t.Logf("✅ Format %s successful: '%s'", format, response.Text)
})
}
})
t.Run("WithCustomParameters", func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
speechSynthesisProvider := testConfig.Provider
if testConfig.ExternalTTSProvider != "" {
speechSynthesisProvider = testConfig.ExternalTTSProvider
}
speechSynthesisModel := testConfig.SpeechSynthesisModel
if testConfig.ExternalTTSModel != "" {
speechSynthesisModel = testConfig.ExternalTTSModel
}
audioFormat := GetProviderDefaultFormat(testConfig.Provider)
var audioData []byte
var readErr error
if testConfig.Provider == schemas.HuggingFace && strings.HasPrefix(testConfig.TranscriptionModel, "fal-ai/") {
// For Fal-AI models on HuggingFace, we have to use mp3 but fal-ai speech models only return wav
// So we read from a pre-generated mp3 file to avoid format issues
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
filePath := filepath.Join(dir, "scenarios", "media", "RoundTrip_Medium_MP3.mp3")
audioData, readErr = os.ReadFile(filePath)
if readErr != nil {
t.Fatalf("failed to read audio fixture %s: %v", filePath, readErr)
}
audioFormat = "mp3"
} else {
// Generate audio for custom parameters test
audioData, _ = GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextMedium, "secondary", audioFormat)
}
// Test with custom parameters and temperature
request := &schemas.BifrostTranscriptionRequest{
Provider: testConfig.Provider,
Model: testConfig.TranscriptionModel,
Input: &schemas.TranscriptionInput{
File: audioData,
},
Params: &schemas.TranscriptionParameters{
Language: bifrost.Ptr("en"),
Format: &audioFormat,
Prompt: bifrost.Ptr("This audio contains technical terminology and proper nouns."),
ResponseFormat: bifrost.Ptr("json"), // Use json instead of verbose_json for whisper-1
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
// Use retry framework for advanced transcription
advancedRetryConfig := GetTestRetryConfigForScenario("Transcription", testConfig)
advancedRetryContext := TestRetryContext{
ScenarioName: "Transcription_Advanced_CustomParams",
ExpectedBehavior: map[string]interface{}{
"should_transcribe_audio": true,
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.TranscriptionModel,
},
}
advancedExpectations := ApplyRawExpectations(TranscriptionExpectations(5), testConfig, false, true)
advancedExpectations = ModifyExpectationsForProvider(advancedExpectations, testConfig.Provider)
advancedTranscriptionRetryConfig := TranscriptionRetryConfig{
MaxAttempts: advancedRetryConfig.MaxAttempts,
BaseDelay: advancedRetryConfig.BaseDelay,
MaxDelay: advancedRetryConfig.MaxDelay,
Conditions: []TranscriptionRetryCondition{},
OnRetry: advancedRetryConfig.OnRetry,
OnFinalFail: advancedRetryConfig.OnFinalFail,
}
response, err := WithTranscriptionTestRetry(t, advancedTranscriptionRetryConfig, advancedRetryContext, advancedExpectations, "Transcription_Advanced_CustomParams", func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.TranscriptionRequest(bfCtx, request)
})
if err != nil {
errorMsg := GetErrorMessage(err)
if !strings.Contains(errorMsg, "❌") {
errorMsg = fmt.Sprintf("❌ %s", errorMsg)
}
t.Fatalf("❌ Advanced transcription failed after retries: %s", errorMsg)
}
if response == nil {
t.Fatalf("❌ Advanced transcription returned nil response after retries")
}
if response.Text == "" {
t.Fatalf("❌ Advanced transcription returned empty text after retries")
}
t.Logf("✅ Advanced transcription successful: '%s'", response.Text)
})
t.Run("MultipleLanguages", func(t *testing.T) {
// Test with different language hints (only English for now since our TTS is English)
languages := []string{"en"}
for _, lang := range languages {
t.Run("Language_"+lang, func(t *testing.T) {
ShouldRunParallel(t, testConfig, "Transcription")
speechSynthesisProvider := testConfig.Provider
if testConfig.ExternalTTSProvider != "" {
speechSynthesisProvider = testConfig.ExternalTTSProvider
}
speechSynthesisModel := testConfig.SpeechSynthesisModel
if testConfig.ExternalTTSModel != "" {
speechSynthesisModel = testConfig.ExternalTTSModel
}
audioFormat := GetProviderDefaultFormat(testConfig.Provider)
var audioData []byte
var readErr error
if testConfig.Provider == schemas.HuggingFace && strings.HasPrefix(testConfig.TranscriptionModel, "fal-ai/") {
// For Fal-AI models on HuggingFace, we have to use mp3 but fal-ai speech models only return wav
// So we read from a pre-generated mp3 file to avoid format issues
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
filePath := filepath.Join(dir, "scenarios", "media", "RoundTrip_Basic_MP3.mp3")
audioData, readErr = os.ReadFile(filePath)
if readErr != nil {
t.Fatalf("failed to read audio fixture %s: %v", filePath, readErr)
}
audioFormat = "mp3"
} else {
// Use the utility function to generate audio
audioData, _ = GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", audioFormat)
}
langCopy := lang
request := &schemas.BifrostTranscriptionRequest{
Provider: testConfig.Provider,
Model: testConfig.TranscriptionModel,
Input: &schemas.TranscriptionInput{
File: audioData,
},
Params: &schemas.TranscriptionParameters{
Format: &audioFormat,
Language: &langCopy,
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
// Use retry framework for language test
langRetryConfig := GetTestRetryConfigForScenario("Transcription", testConfig)
langRetryContext := TestRetryContext{
ScenarioName: "Transcription_Language_" + lang,
ExpectedBehavior: map[string]interface{}{
"should_transcribe_audio": true,
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.TranscriptionModel,
"language": lang,
},
}
langExpectations := ApplyRawExpectations(TranscriptionExpectations(5), testConfig, false, true)
langExpectations = ModifyExpectationsForProvider(langExpectations, testConfig.Provider)
langTranscriptionRetryConfig := TranscriptionRetryConfig{
MaxAttempts: langRetryConfig.MaxAttempts,
BaseDelay: langRetryConfig.BaseDelay,
MaxDelay: langRetryConfig.MaxDelay,
Conditions: []TranscriptionRetryCondition{},
OnRetry: langRetryConfig.OnRetry,
OnFinalFail: langRetryConfig.OnFinalFail,
}
response, err := WithTranscriptionTestRetry(t, langTranscriptionRetryConfig, langRetryContext, langExpectations, "Transcription_Language_"+lang, func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.TranscriptionRequest(bfCtx, request)
})
if err != nil {
errorMsg := GetErrorMessage(err)
if !strings.Contains(errorMsg, "❌") {
errorMsg = fmt.Sprintf("❌ %s", errorMsg)
}
t.Fatalf("❌ Transcription failed for language %s after retries: %s", lang, errorMsg)
}
if response == nil {
t.Fatalf("❌ Transcription returned nil response for language %s after retries", lang)
}
if response.Text == "" {
t.Fatalf("❌ Transcription returned empty text for language %s after retries", lang)
}
t.Logf("✅ Language %s transcription successful: '%s'", lang, response.Text)
})
}
})
})
}
// validateTranscriptionRoundTrip performs round-trip validation for transcription responses
// This is complementary to the main validation framework and focuses on transcription accuracy
func validateTranscriptionRoundTrip(t *testing.T, response *schemas.BifrostTranscriptionResponse, originalText string, testName string, testConfig ComprehensiveTestConfig) {
if response == nil || response.Text == "" {
t.Fatal("Transcription response missing transcribed text")
}
transcribedText := response.Text
// Normalize for comparison (lowercase, remove punctuation)
originalWords := strings.Fields(strings.ToLower(originalText))
transcribedWords := strings.Fields(strings.ToLower(transcribedText))
// Check that at least 50% of original words are found in transcription
foundWords := 0
for _, originalWord := range originalWords {
// Remove punctuation for comparison
cleanOriginal := strings.Trim(originalWord, ".,!?;:")
if len(cleanOriginal) < 3 { // Skip very short words
continue
}
for _, transcribedWord := range transcribedWords {
cleanTranscribed := strings.Trim(transcribedWord, ".,!?;:")
if strings.Contains(cleanTranscribed, cleanOriginal) || strings.Contains(cleanOriginal, cleanTranscribed) {
foundWords++
break
}
}
}
// Expect at least 50% word match for successful round-trip
minExpectedWords := len(originalWords) / 2
if foundWords < minExpectedWords {
t.Logf("⚠️ Round-trip validation concern:")
t.Logf(" Original: '%s'", originalText)
t.Logf(" Transcribed: '%s'", transcribedText)
t.Logf(" Found %d/%d words (%.1f%%), expected ≥ %d (50%%)",
foundWords, len(originalWords), float64(foundWords)/float64(len(originalWords))*100, minExpectedWords)
// Note: Not failing test as this can be provider/model dependent
} else {
t.Logf("✅ Round-trip validation passed: found %d/%d words (%.1f%%)",
foundWords, len(originalWords), float64(foundWords)/float64(len(originalWords))*100)
}
// Check provider field
if response.ExtraFields.Provider != testConfig.Provider {
t.Logf("⚠️ Provider mismatch: expected %s, got %s", testConfig.Provider, response.ExtraFields.Provider)
}
t.Logf("Round-trip test '%s' completed successfully", testName)
}