package llmtests import ( "encoding/json" "fmt" "strings" "github.com/maximhq/bifrost/core/schemas" ) // ============================================================================= // BASIC RESPONSE CONDITIONS // ============================================================================= // EmptyResponseCondition checks for empty or missing response content type EmptyResponseCondition struct{} func (c *EmptyResponseCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { // If there's an error, let the HTTP retry logic handle it if err != nil { return false, "" } // No response at all if response == nil { return true, "response is nil" } // Check if chat completions response exists if response.TextCompletionResponse == nil && response.ChatResponse == nil && response.ResponsesResponse == nil { return true, "response has no chat completions or responses data" } // Check if all choices are empty (no content AND no tool calls) hasContent := false // Check for textual content using the already robust GetResultContent function content := GetResultContent(response) if strings.TrimSpace(content) != "" { hasContent = true } // If no textual content, check for tool calls using the universal ExtractToolCalls function if !hasContent { toolCalls := ExtractToolCalls(response) if len(toolCalls) > 0 { // Validate that at least one tool call has a function name for _, toolCall := range toolCalls { if strings.TrimSpace(toolCall.Name) != "" { hasContent = true break } } } if len(toolCalls) == 0 { return true, "no tool calls found in response" } } if !hasContent { return true, "all choices have empty content and no tool calls" } return false, "" } func (c *EmptyResponseCondition) GetConditionName() string { return "EmptyResponse" } // ============================================================================= // TOOL CALLING CONDITIONS // ============================================================================= // MissingToolCallCondition checks if expected tool call is missing type MissingToolCallCondition struct { ExpectedToolName string // Name of the tool that should have been called } func (c *MissingToolCallCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check both Chat Completions and Responses API formats if response.ChatResponse == nil && response.ResponsesResponse == nil { return false, "" } expectedTool := c.ExpectedToolName if expectedTool == "" { // Try to get from context if tool, ok := context.ExpectedBehavior["expected_tool_name"].(string); ok { expectedTool = tool } else { return false, "" } } // Extract tool calls from both API formats toolCalls := ExtractToolCalls(response) // Check if any tool call has the expected name for _, toolCall := range toolCalls { if toolCall.Name == expectedTool { return false, "" // Found the expected tool call } } return true, fmt.Sprintf("expected tool call '%s' not found in response", expectedTool) } func (c *MissingToolCallCondition) GetConditionName() string { return "MissingToolCall" } // MalformedToolArgsCondition checks for malformed tool call arguments type MalformedToolArgsCondition struct{} func (c *MalformedToolArgsCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check both Chat Completions and Responses API formats if response.ChatResponse == nil && response.ResponsesResponse == nil { return false, "" } // Extract tool calls from both API formats toolCalls := ExtractToolCalls(response) // Check all tool calls for malformed arguments for i, toolCall := range toolCalls { if toolCall.Arguments == "" { continue // Skip empty arguments for now } // Try to parse arguments as JSON var args map[string]interface{} if err := json.Unmarshal([]byte(toolCall.Arguments), &args); err != nil { return true, fmt.Sprintf("tool call %d has malformed JSON arguments: %s", i, err.Error()) } // Check for empty arguments only when arguments are explicitly required // Some tools (like get_current_time) legitimately take no arguments requiresArgs := false if context.ExpectedBehavior != nil { // Check if this test expects arguments (default: false, allowing tools with no args) if expectArgs, ok := context.ExpectedBehavior["requires_arguments"].(bool); ok { requiresArgs = expectArgs } } if requiresArgs && len(args) == 0 && toolCall.Name != "" { return true, fmt.Sprintf("tool call %d (%s) has empty arguments but arguments are required", i, toolCall.Name) } } return false, "" } func (c *MalformedToolArgsCondition) GetConditionName() string { return "MalformedToolArgs" } // WrongToolCalledCondition checks if the wrong tool was called type WrongToolCalledCondition struct { ExpectedToolName string ForbiddenTools []string // Tools that should not be called } func (c *WrongToolCalledCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check both Chat Completions and Responses API formats if response.ChatResponse == nil && response.ResponsesResponse == nil { return false, "" } expectedTool := c.ExpectedToolName if expectedTool == "" { if tool, ok := context.ExpectedBehavior["expected_tool_name"].(string); ok { expectedTool = tool } } // Extract tool calls from both API formats toolCalls := ExtractToolCalls(response) // Check all tool calls for i, toolCall := range toolCalls { toolName := toolCall.Name if toolName == "" { continue } // Check if forbidden tool was called for _, forbidden := range c.ForbiddenTools { if toolName == forbidden { return true, fmt.Sprintf("tool call %d called forbidden tool '%s'", i, toolName) } } // If we have an expected tool and this isn't it if expectedTool != "" && toolName != expectedTool { return true, fmt.Sprintf("tool call %d called '%s' instead of expected '%s'", i, toolName, expectedTool) } } return false, "" } func (c *WrongToolCalledCondition) GetConditionName() string { return "WrongToolCalled" } // ============================================================================= // MULTIPLE TOOL CALL CONDITIONS // ============================================================================= // PartialToolCallCondition checks if we got fewer tool calls than expected type PartialToolCallCondition struct { ExpectedCount int // Expected number of tool calls } func (c *PartialToolCallCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check both Chat Completions and Responses API formats if response.ChatResponse == nil && response.ResponsesResponse == nil { return false, "" } expectedCount := c.ExpectedCount if expectedCount == 0 { if count, ok := context.ExpectedBehavior["expected_tool_count"].(int); ok { expectedCount = count } else { return false, "" } } // Extract tool calls from both API formats and count them toolCalls := ExtractToolCalls(response) actualCount := len(toolCalls) if actualCount < expectedCount { return true, fmt.Sprintf("got %d tool calls, expected %d", actualCount, expectedCount) } return false, "" } func (c *PartialToolCallCondition) GetConditionName() string { return "PartialToolCall" } // WrongToolSequenceCondition checks if tools were called in wrong order type WrongToolSequenceCondition struct { ExpectedTools []string // Expected sequence of tool names } func (c *WrongToolSequenceCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check both Chat Completions and Responses API formats if response.ChatResponse == nil && response.ResponsesResponse == nil { return false, "" } expectedTools := c.ExpectedTools if len(expectedTools) == 0 { if tools, ok := context.ExpectedBehavior["expected_tool_sequence"].([]string); ok { expectedTools = tools } else { return false, "" } } // Extract tool calls from both API formats toolCalls := ExtractToolCalls(response) // If we don't have enough tool calls if len(toolCalls) < len(expectedTools) { return true, fmt.Sprintf("got %d tool calls, expected at least %d", len(toolCalls), len(expectedTools)) } // Check sequence for j, expectedTool := range expectedTools { if j >= len(toolCalls) { break } actualTool := toolCalls[j].Name if actualTool != expectedTool { if actualTool == "" { actualTool = "nil" } return true, fmt.Sprintf("position %d: got '%s', expected '%s'", j, actualTool, expectedTool) } } return false, "" } func (c *WrongToolSequenceCondition) GetConditionName() string { return "WrongToolSequence" } // ============================================================================= // IMAGE PROCESSING CONDITIONS // ============================================================================= // ImageNotProcessedCondition checks if image content was actually processed type ImageNotProcessedCondition struct{} func (c *ImageNotProcessedCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check both Chat Completions and Responses API formats if response.ChatResponse == nil && response.ResponsesResponse == nil { return false, "" } // Get response content content := strings.ToLower(GetResultContent(response)) // Check for generic responses that don't indicate image processing genericPhrases := []string{ "i can't see", "i cannot see", "unable to see", "can't view", "cannot view", "no image", "not able to see", "i don't see", "i cannot process", } for _, phrase := range genericPhrases { if strings.Contains(content, phrase) { return true, fmt.Sprintf("response suggests image was not processed: contains '%s'", phrase) } } // If content is suspiciously short for image analysis if len(strings.TrimSpace(content)) < 20 { return true, "response too short for meaningful image analysis" } return false, "" } func (c *ImageNotProcessedCondition) GetConditionName() string { return "ImageNotProcessed" } // ============================================================================= // FILE/DOCUMENT PROCESSING CONDITIONS // ============================================================================= // FileNotProcessedCondition checks if file/document was not properly processed type FileNotProcessedCondition struct{} func (c *FileNotProcessedCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check both Chat Completions and Responses API formats if response.ChatResponse == nil && response.ResponsesResponse == nil { return false, "" } // Get response content content := strings.ToLower(GetResultContent(response)) // Check for generic responses that don't indicate file/document processing fileProcessingFailurePhrases := []string{ "i can't read", "i cannot read", "unable to read", "can't access", "cannot access", "no file", "no document", "not able to read", "i don't see", "i cannot process", "unable to process", "can't open", "cannot open", "invalid file", "corrupted", "unsupported format", "failed to load", "no pdf", "cannot view", } for _, phrase := range fileProcessingFailurePhrases { if strings.Contains(content, phrase) { return true, fmt.Sprintf("response suggests file was not processed: contains '%s'", phrase) } } // If content is suspiciously short for document analysis if len(strings.TrimSpace(content)) < 15 { return true, "response too short for meaningful document analysis" } return false, "" } func (c *FileNotProcessedCondition) GetConditionName() string { return "FileNotProcessed" } // GenericResponseCondition checks for generic/template responses type GenericResponseCondition struct{} func (c *GenericResponseCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check both Chat Completions and Responses API formats if response.TextCompletionResponse == nil && response.ChatResponse == nil && response.ResponsesResponse == nil { return false, "" } content := strings.ToLower(GetResultContent(response)) // Generic phrases that suggest the model didn't engage with the specific request genericPhrases := []string{ "as an ai", "as a language model", "i'm an ai", "i am an ai", "i'm a language model", "i am a language model", "i can help you with", "how can i assist you", "what would you like to know", "is there anything else", } // Check if response starts with generic phrases (more concerning) for _, phrase := range genericPhrases { if strings.HasPrefix(content, phrase) { return true, fmt.Sprintf("response starts with generic phrase: '%s'", phrase) } } // Check for overly generic responses (short and generic) if len(strings.TrimSpace(content)) < 30 { for _, phrase := range genericPhrases { if strings.Contains(content, phrase) { return true, fmt.Sprintf("short response contains generic phrase: '%s'", phrase) } } } return false, "" } func (c *GenericResponseCondition) GetConditionName() string { return "GenericResponse" } // ============================================================================= // CONTENT VALIDATION CONDITIONS // ============================================================================= // ContentValidationCondition checks if response fails basic content validation // This is crucial for vision tests where the AI might give different descriptions type ContentValidationCondition struct{} func (c *ContentValidationCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check both Chat Completions and Responses API formats if response.TextCompletionResponse == nil && response.ChatResponse == nil && response.ResponsesResponse == nil { return false, "" } content := strings.ToLower(GetResultContent(response)) // Skip if response is too short or generic (other conditions will handle these) if len(content) < 10 { return false, "" } // Only check content validation for vision-related scenarios scenarioName := strings.ToLower(context.ScenarioName) if !strings.Contains(scenarioName, "image") && !strings.Contains(scenarioName, "vision") { return false, "" } // Check if this looks like a valid vision response but might be missing keywords // Look for vision-related indicators that suggest the AI processed the image visionIndicators := []string{ "see", "shows", "depicts", "contains", "features", "displays", "appears", "looks", "visible", "image", "picture", "photo", "color", "shape", "object", "animal", "person", "building", "in the", "there is", "there are", "this is", "i can see", } hasVisionContent := false for _, indicator := range visionIndicators { if strings.Contains(content, indicator) { hasVisionContent = true break } } // If it looks like a valid vision response, check if we should retry based on missing expected keywords if hasVisionContent { // Check if this test has expected keywords from the TestRetryContext if testMetadata, exists := context.TestMetadata["expected_keywords"]; exists { if expectedKeywords, ok := testMetadata.([]string); ok && len(expectedKeywords) > 0 { // Check if ANY of the expected keywords are present foundExpectedKeyword := false for _, keyword := range expectedKeywords { if strings.Contains(content, strings.ToLower(keyword)) { foundExpectedKeyword = true break } } // If valid vision response but missing ALL expected keywords, retry // Allow longer responses for complex vision tasks (comparisons, detailed descriptions) if !foundExpectedKeyword && len(content) > 20 && len(content) < 2000 { return true, fmt.Sprintf("valid vision response but missing expected keywords %v, might include them on retry", expectedKeywords) } } } // Fallback: Check expected behavior fields for dynamic validation if expectedAnimal, ok := context.ExpectedBehavior["should_identify_animal"].(string); ok && expectedAnimal != "" { // Parse expected animal from behavior context (e.g., "lion or animal") expectedTerms := strings.Split(strings.ToLower(expectedAnimal), " or ") foundExpected := false for _, term := range expectedTerms { term = strings.TrimSpace(term) if term != "" && strings.Contains(content, term) { foundExpected = true break } } if !foundExpected && len(content) > 20 && len(content) < 1500 { return true, fmt.Sprintf("valid vision response but missing expected animal terms '%s', might get more specific on retry", expectedAnimal) } } if expectedObject, ok := context.ExpectedBehavior["should_identify_object"].(string); ok && expectedObject != "" { // Parse expected object from behavior context (e.g., "ant or insect") expectedTerms := strings.Split(strings.ToLower(expectedObject), " or ") foundExpected := false for _, term := range expectedTerms { term = strings.TrimSpace(term) if term != "" && strings.Contains(content, term) { foundExpected = true break } } if !foundExpected && len(content) > 15 && len(content) < 1500 { return true, fmt.Sprintf("valid vision response but missing expected object terms '%s', might get more specific on retry", expectedObject) } } } return false, "" } func (c *ContentValidationCondition) GetConditionName() string { return "ContentValidation" } // ============================================================================= // STREAMING CONDITIONS // ============================================================================= // StreamErrorCondition checks for streaming-specific errors that should trigger retries type StreamErrorCondition struct{} func (c *StreamErrorCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { // Only retry on actual stream errors, not when stream is successful but response is nil if err == nil { return false, "" } // Check for specific streaming errors that indicate retry-worthy conditions // Check both the Message field and nested Error field var errorMsg string if strings.TrimSpace(err.Error.Message) != "" { errorMsg = strings.ToLower(err.Error.Message) } else if err.Error.Error != nil { errorMsg = strings.ToLower(err.Error.Error.Error()) } else { return false, "" } // Retry on connection/timeout issues during streaming if strings.Contains(errorMsg, "connection reset") || strings.Contains(errorMsg, "connection refused") || strings.Contains(errorMsg, "timeout") || strings.Contains(errorMsg, "stream closed") || strings.Contains(errorMsg, "stream interrupted") { return true, fmt.Sprintf("stream connection error: %s", errorMsg) } // Retry on temporary streaming API errors if strings.Contains(errorMsg, "rate limit") || strings.Contains(errorMsg, "quota exceeded") || strings.Contains(errorMsg, "service unavailable") || strings.Contains(errorMsg, "server overloaded") { return true, fmt.Sprintf("temporary API error: %s", errorMsg) } // Don't retry on authentication, invalid request, or other permanent errors return false, "" } func (c *StreamErrorCondition) GetConditionName() string { return "StreamError" } // IncompleteStreamCondition checks for incomplete streaming responses type IncompleteStreamCondition struct{} func (c *IncompleteStreamCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check both Chat Completions and Responses API formats if response.TextCompletionResponse == nil && response.ChatResponse == nil && response.ResponsesResponse == nil { return false, "" } // For Chat Completions API, check finish reasons in choices if response.ChatResponse != nil { for i, choice := range response.ChatResponse.Choices { if choice.FinishReason == nil { return true, fmt.Sprintf("choice %d has no finish reason (stream may be incomplete)", i) } // Check for incomplete finish reasons finishReason := string(*choice.FinishReason) if finishReason == "length" { // This might be okay depending on context, but could indicate truncation singleChoiceResponse := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ Choices: []schemas.BifrostResponseChoice{choice}, }, } choiceContent := GetResultContent(singleChoiceResponse) if len(choiceContent) < 10 { return true, fmt.Sprintf("choice %d finished due to length but content is very short", i) } } } } if response.TextCompletionResponse != nil { for i, choice := range response.TextCompletionResponse.Choices { if choice.FinishReason == nil { return true, fmt.Sprintf("choice %d has no finish reason (stream may be incomplete)", i) } finishReason := string(*choice.FinishReason) if finishReason == "length" { // This might be okay depending on context, but could indicate truncation singleChoiceResponse := &schemas.BifrostResponse{ TextCompletionResponse: &schemas.BifrostTextCompletionResponse{ Choices: []schemas.BifrostResponseChoice{choice}, }, } choiceContent := GetResultContent(singleChoiceResponse) if len(choiceContent) < 10 { return true, fmt.Sprintf("choice %d finished due to length but content is very short", i) } } } } // For Responses API, check completion status in output messages if response.ResponsesResponse != nil { for i, output := range response.ResponsesResponse.Output { if output.Status == nil { return true, fmt.Sprintf("output %d has no status (stream may be incomplete)", i) } status := *output.Status if status == "incomplete" || status == "in_progress" { return true, fmt.Sprintf("output %d has incomplete status: %s", i, status) } } } return false, "" } func (c *IncompleteStreamCondition) GetConditionName() string { return "IncompleteStream" } // ============================================================================= // SPEECH SYNTHESIS CONDITIONS // ============================================================================= // EmptySpeechCondition checks for missing or invalid audio data in speech synthesis responses type EmptySpeechCondition struct{} func (c *EmptySpeechCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { // If there's an error, let other conditions handle it if err != nil { return false, "" } // No response at all if response == nil { return true, "response is nil" } // Check if speech response exists if response.SpeechResponse == nil { return true, "response has no speech data" } // Check if audio data exists and is not empty if response.SpeechResponse.Audio == nil { return true, "response has no audio data" } // Check for unreasonably small audio files (likely errors) if len(response.SpeechResponse.Audio) < 100 { return true, fmt.Sprintf("audio data too small (%d bytes), likely an error", len(response.SpeechResponse.Audio)) } return false, "" } func (c *EmptySpeechCondition) GetConditionName() string { return "EmptySpeech" } // ============================================================================= // TRANSCRIPTION CONDITIONS // ============================================================================= // EmptyTranscriptionCondition checks for missing or invalid transcription text type EmptyTranscriptionCondition struct{} func (c *EmptyTranscriptionCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { // If there's an error, let other conditions handle it if err != nil { return false, "" } // No response at all if response == nil { return true, "response is nil" } // Check if transcription response exists if response.TranscriptionResponse == nil { return true, "response has no transcription data" } // Check if text exists and is not empty if response.TranscriptionResponse.Text == "" || strings.TrimSpace(response.TranscriptionResponse.Text) == "" { return true, "response has no transcription text" } // Check for unreasonably short transcriptions (likely errors) text := strings.TrimSpace(response.TranscriptionResponse.Text) if len(text) < 3 { return true, fmt.Sprintf("transcription text too short (%d chars): '%s'", len(text), text) } return false, "" } func (c *EmptyTranscriptionCondition) GetConditionName() string { return "EmptyTranscription" } // ============================================================================= // EMBEDDING CONDITIONS // ============================================================================= // EmptyEmbeddingCondition checks for missing or empty embeddings type EmptyEmbeddingCondition struct{} func (c *EmptyEmbeddingCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check if we have embedding data if response.EmbeddingResponse == nil || len(response.EmbeddingResponse.Data) == 0 { return true, "response has no embedding data" } // Check each embedding for i, data := range response.EmbeddingResponse.Data { vec, extractErr := getEmbeddingVector(data) if extractErr != nil { return true, fmt.Sprintf("embedding %d: failed to extract vector: %s", i, extractErr.Error()) } if len(vec) == 0 { return true, fmt.Sprintf("embedding %d: vector is empty", i) } // Check for all-zero vectors (sometimes indicates an error) allZero := true for _, val := range vec { if val != 0.0 { allZero = false break } } if allZero { return true, fmt.Sprintf("embedding %d: vector is all zeros", i) } } return false, "" } func (c *EmptyEmbeddingCondition) GetConditionName() string { return "EmptyEmbedding" } // InvalidEmbeddingDimensionCondition checks for inconsistent embedding dimensions type InvalidEmbeddingDimensionCondition struct { ExpectedDimension int // Expected vector dimension (0 means any) } func (c *InvalidEmbeddingDimensionCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil || response.EmbeddingResponse == nil || len(response.EmbeddingResponse.Data) == 0 { return false, "" } expectedDim := c.ExpectedDimension if expectedDim == 0 { if dim, ok := context.ExpectedBehavior["expected_dimension"].(int); ok { expectedDim = dim } } var firstDimension int // Check each embedding for i, data := range response.EmbeddingResponse.Data { vec, extractErr := getEmbeddingVector(data) if extractErr != nil { return false, "" // Let EmptyEmbeddingCondition handle this } dimension := len(vec) // Set expected dimension from first embedding if not specified if i == 0 { firstDimension = dimension if expectedDim > 0 && dimension != expectedDim { return true, fmt.Sprintf("embedding %d: got dimension %d, expected %d", i, dimension, expectedDim) } } else { // Check consistency with first embedding if dimension != firstDimension { return true, fmt.Sprintf("embedding %d: dimension %d differs from first embedding dimension %d", i, dimension, firstDimension) } } // Check for unreasonably small dimensions (likely an error) if dimension < 50 { return true, fmt.Sprintf("embedding %d: dimension %d seems too small", i, dimension) } } return false, "" } func (c *InvalidEmbeddingDimensionCondition) GetConditionName() string { return "InvalidEmbeddingDimension" } // ============================================================================= // IMAGE CONDITIONS // ============================================================================= // EmptyImageGenerationCondition checks for missing or invalid image data type EmptyImageGenerationCondition struct{} func (c *EmptyImageGenerationCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { // If there's an error, let other conditions handle it if err != nil { return false, "" } // No response at all if response == nil { return true, "response is nil" } // Check if both response types are nil if response.ImageGenerationResponse == nil && response.ImageGenerationStreamResponse == nil { return true, "response has no image data" } // Check non-streaming response if response.ImageGenerationResponse != nil { if len(response.ImageGenerationResponse.Data) == 0 { return true, "response has no image data" } // Check each image has either B64JSON or URL for i, img := range response.ImageGenerationResponse.Data { if img.B64JSON == "" && img.URL == "" { return true, fmt.Sprintf("image %d has no B64JSON or URL", i) } } } // Check streaming response if response.ImageGenerationStreamResponse != nil { if response.ImageGenerationStreamResponse.B64JSON == "" && response.ImageGenerationStreamResponse.URL == "" { return true, "stream response has no B64JSON or URL" } } return false, "" } func (c *EmptyImageGenerationCondition) GetConditionName() string { return "EmptyImageGeneration" } // ============================================================================= // COUNT TOKENS CONDITIONS // ============================================================================= // EmptyCountTokensCondition checks for missing or invalid token counts type EmptyCountTokensCondition struct{} func (c *EmptyCountTokensCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { // If there's an error, let other conditions handle it if err != nil { return false, "" } // No response at all if response == nil { return true, "response is nil" } // Check if count tokens response exists if response.CountTokensResponse == nil { return true, "count tokens response is nil" } countTokensResp := response.CountTokensResponse // Check if token counts are valid if countTokensResp.InputTokens <= 0 { return true, "input_tokens is zero or negative" } // Check if total tokens is at least as large as input tokens if countTokensResp.TotalTokens != nil { if *countTokensResp.TotalTokens < countTokensResp.InputTokens { return true, fmt.Sprintf("total_tokens (%d) is less than input_tokens (%d)", *countTokensResp.TotalTokens, countTokensResp.InputTokens) } } return false, "" } func (c *EmptyCountTokensCondition) GetConditionName() string { return "EmptyCountTokens" } // InvalidCountTokensCondition checks for invalid token count data type InvalidCountTokensCondition struct{} func (c *InvalidCountTokensCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } // Check if count tokens response exists if response.CountTokensResponse == nil { return false, "" } countTokensResp := response.CountTokensResponse // Check if model is set if strings.TrimSpace(countTokensResp.Model) == "" { return true, "model field is empty" } // Check if request type is set correctly if countTokensResp.ExtraFields.RequestType != schemas.CountTokensRequest { return true, fmt.Sprintf("invalid request type: got %s, expected %s", countTokensResp.ExtraFields.RequestType, schemas.CountTokensRequest) } return false, "" } func (c *InvalidCountTokensCondition) GetConditionName() string { return "InvalidCountTokens" } // ============================================================================= // RESPONSES API CONDITIONS // These implement ResponsesRetryCondition for use with WithResponsesTestRetry // ============================================================================= // ResponsesEmptyCondition checks for empty Responses API responses type ResponsesEmptyCondition struct{} func (c *ResponsesEmptyCondition) ShouldRetry(response *schemas.BifrostResponsesResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil { return false, "" } if response == nil { return true, "response is nil" } content := GetResponsesContent(response) if strings.TrimSpace(content) == "" { return true, "response has empty content" } return false, "" } func (c *ResponsesEmptyCondition) GetConditionName() string { return "ResponsesEmpty" } // ResponsesFileNotProcessedCondition checks if file/document was not properly processed in Responses API type ResponsesFileNotProcessedCondition struct{} func (c *ResponsesFileNotProcessedCondition) ShouldRetry(response *schemas.BifrostResponsesResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } content := strings.ToLower(GetResponsesContent(response)) // Check for generic responses that don't indicate file/document processing fileProcessingFailurePhrases := []string{ "i can't read", "i cannot read", "unable to read", "can't access", "cannot access", "no file", "no document", "not able to read", "i don't see", "i cannot process", "unable to process", "can't open", "cannot open", "invalid file", "corrupted", "unsupported format", "failed to load", "no pdf", "cannot view", } for _, phrase := range fileProcessingFailurePhrases { if strings.Contains(content, phrase) { return true, fmt.Sprintf("response suggests file was not processed: contains '%s'", phrase) } } // If content is suspiciously short for document analysis if len(strings.TrimSpace(content)) < 15 { return true, "response too short for meaningful document analysis" } return false, "" } func (c *ResponsesFileNotProcessedCondition) GetConditionName() string { return "ResponsesFileNotProcessed" } // ResponsesGenericResponseCondition checks for generic/template responses in Responses API type ResponsesGenericResponseCondition struct{} func (c *ResponsesGenericResponseCondition) ShouldRetry(response *schemas.BifrostResponsesResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } content := strings.ToLower(GetResponsesContent(response)) // Generic phrases that suggest the model didn't engage with the specific request genericPhrases := []string{ "as an ai", "as a language model", "i'm an ai", "i am an ai", "i'm a language model", "i am a language model", "i can help you with", "how can i assist you", "what would you like to know", "is there anything else", } // Check if response starts with generic phrases (more concerning) for _, phrase := range genericPhrases { if strings.HasPrefix(content, phrase) { return true, fmt.Sprintf("response starts with generic phrase: '%s'", phrase) } } // Check for overly generic responses (short and generic) if len(strings.TrimSpace(content)) < 30 { for _, phrase := range genericPhrases { if strings.Contains(content, phrase) { return true, fmt.Sprintf("short response contains generic phrase: '%s'", phrase) } } } return false, "" } func (c *ResponsesGenericResponseCondition) GetConditionName() string { return "ResponsesGenericResponse" } // ResponsesContentValidationCondition checks if response fails basic content validation for Responses API type ResponsesContentValidationCondition struct{} func (c *ResponsesContentValidationCondition) ShouldRetry(response *schemas.BifrostResponsesResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string) { if err != nil || response == nil { return false, "" } content := strings.ToLower(GetResponsesContent(response)) // Skip if response is too short (other conditions will handle these) if len(content) < 10 { return false, "" } // Check for file/document processing scenarios scenarioName := strings.ToLower(context.ScenarioName) if strings.Contains(scenarioName, "file") || strings.Contains(scenarioName, "document") || strings.Contains(scenarioName, "pdf") { // Check if this test has expected keywords from the TestRetryContext if testMetadata, exists := context.TestMetadata["expected_keywords"]; exists { if expectedKeywords, ok := testMetadata.([]string); ok && len(expectedKeywords) > 0 { // Check if ANY of the expected keywords are present foundExpectedKeyword := false for _, keyword := range expectedKeywords { if strings.Contains(content, strings.ToLower(keyword)) { foundExpectedKeyword = true break } } // If valid response but missing ALL expected keywords, retry if !foundExpectedKeyword && len(content) > 20 && len(content) < 2000 { return true, fmt.Sprintf("response missing expected keywords %v, might include them on retry", expectedKeywords) } } } } return false, "" } func (c *ResponsesContentValidationCondition) GetConditionName() string { return "ResponsesContentValidation" }