first commit
This commit is contained in:
155
core/internal/llmtests/image_url.go
Normal file
155
core/internal/llmtests/image_url.go
Normal file
@@ -0,0 +1,155 @@
|
||||
package llmtests
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
bifrost "github.com/maximhq/bifrost/core"
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
)
|
||||
|
||||
// RunImageURLTest executes the image URL test scenario using dual API testing framework
|
||||
func RunImageURLTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
|
||||
if !testConfig.Scenarios.ImageURL {
|
||||
t.Logf("Image URL not supported for provider %s", testConfig.Provider)
|
||||
return
|
||||
}
|
||||
|
||||
t.Run("ImageURL", func(t *testing.T) {
|
||||
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
|
||||
t.Parallel()
|
||||
}
|
||||
|
||||
// Create messages for both APIs using the isResponsesAPI flag
|
||||
chatMessages := []schemas.ChatMessage{
|
||||
CreateImageChatMessage("What do you see in this image?", TestImageURL),
|
||||
}
|
||||
responsesMessages := []schemas.ResponsesMessage{
|
||||
CreateImageResponsesMessage("What do you see in this image?", TestImageURL),
|
||||
}
|
||||
|
||||
// Use retry framework for vision requests (can be flaky)
|
||||
retryConfig := GetTestRetryConfigForScenario("ImageURL", testConfig)
|
||||
retryContext := TestRetryContext{
|
||||
ScenarioName: "ImageURL",
|
||||
ExpectedBehavior: map[string]interface{}{
|
||||
"should_describe_image": true,
|
||||
"should_identify_object": "ant or insect",
|
||||
"vision_processing": true,
|
||||
},
|
||||
TestMetadata: map[string]interface{}{
|
||||
"provider": testConfig.Provider,
|
||||
"model": testConfig.VisionModel,
|
||||
"image_type": "url",
|
||||
"test_image": TestImageURL,
|
||||
"expected_keywords": []string{"ant", "insect", "bug", "arthropod"}, // 🎯 Test-specific retry keywords
|
||||
},
|
||||
}
|
||||
|
||||
// Enhanced validation for vision responses - should identify ant OR insect (same for both APIs)
|
||||
expectations := VisionExpectations([]string{}) // Start with base vision expectations
|
||||
expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)
|
||||
expectations.ShouldContainKeywords = nil // Clear strict keyword requirement
|
||||
expectations.ShouldContainAnyOf = []string{"ant", "insect", "bug", "arthropod"} // Accept any valid identification
|
||||
expectations.ShouldNotContainWords = append(expectations.ShouldNotContainWords, []string{"cannot see", "unable to view", "no image"}...) // Vision failure indicators
|
||||
|
||||
// Create operations for both Chat Completions and Responses API
|
||||
chatOperation := func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
|
||||
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
|
||||
chatReq := &schemas.BifrostChatRequest{
|
||||
Provider: testConfig.Provider,
|
||||
Model: testConfig.VisionModel,
|
||||
Params: &schemas.ChatParameters{
|
||||
MaxCompletionTokens: bifrost.Ptr(200),
|
||||
},
|
||||
Fallbacks: testConfig.Fallbacks,
|
||||
}
|
||||
chatReq.Input = chatMessages
|
||||
return client.ChatCompletionRequest(bfCtx, chatReq)
|
||||
}
|
||||
|
||||
responsesOperation := func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
|
||||
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
|
||||
responsesReq := &schemas.BifrostResponsesRequest{
|
||||
Provider: testConfig.Provider,
|
||||
Model: testConfig.VisionModel,
|
||||
Params: &schemas.ResponsesParameters{
|
||||
MaxOutputTokens: bifrost.Ptr(200),
|
||||
},
|
||||
Fallbacks: testConfig.Fallbacks,
|
||||
}
|
||||
responsesReq.Input = responsesMessages
|
||||
return client.ResponsesRequest(bfCtx, responsesReq)
|
||||
}
|
||||
|
||||
// Execute dual API test - passes only if BOTH APIs succeed
|
||||
result := WithDualAPITestRetry(t,
|
||||
retryConfig,
|
||||
retryContext,
|
||||
expectations,
|
||||
"ImageURL",
|
||||
chatOperation,
|
||||
responsesOperation)
|
||||
|
||||
// Validate both APIs succeeded
|
||||
if !result.BothSucceeded {
|
||||
var errors []string
|
||||
if result.ChatCompletionsError != nil {
|
||||
errors = append(errors, "Chat Completions: "+GetErrorMessage(result.ChatCompletionsError))
|
||||
}
|
||||
if result.ResponsesAPIError != nil {
|
||||
errors = append(errors, "Responses API: "+GetErrorMessage(result.ResponsesAPIError))
|
||||
}
|
||||
if len(errors) == 0 {
|
||||
errors = append(errors, "One or both APIs failed validation (see logs above)")
|
||||
}
|
||||
t.Fatalf("❌ ImageURL dual API test failed: %v", errors)
|
||||
}
|
||||
|
||||
// Additional vision-specific validation using universal content extraction
|
||||
validateChatImageProcessing := func(response *schemas.BifrostChatResponse, apiName string) {
|
||||
content := GetChatContent(response)
|
||||
validateImageProcessingContent(t, content, apiName)
|
||||
}
|
||||
|
||||
validateResponsesImageProcessing := func(response *schemas.BifrostResponsesResponse, apiName string) {
|
||||
content := GetResponsesContent(response)
|
||||
validateImageProcessingContent(t, content, apiName)
|
||||
}
|
||||
|
||||
// Validate both API responses
|
||||
if result.ChatCompletionsResponse != nil {
|
||||
validateChatImageProcessing(result.ChatCompletionsResponse, "Chat Completions")
|
||||
}
|
||||
|
||||
if result.ResponsesAPIResponse != nil {
|
||||
validateResponsesImageProcessing(result.ResponsesAPIResponse, "Responses")
|
||||
}
|
||||
|
||||
t.Logf("🎉 Both Chat Completions and Responses APIs passed ImageURL test!")
|
||||
})
|
||||
}
|
||||
|
||||
func validateImageProcessingContent(t *testing.T, content string, apiName string) {
|
||||
lowerContent := strings.ToLower(content)
|
||||
foundObjectIdentification := strings.Contains(lowerContent, "ant") || strings.Contains(lowerContent, "insect")
|
||||
|
||||
if foundObjectIdentification {
|
||||
t.Logf("✅ %s vision model successfully identified the object in image: %s", apiName, content)
|
||||
} else {
|
||||
// Log warning but don't fail immediately - some models might describe differently
|
||||
t.Logf("⚠️ %s vision model may not have explicitly identified 'ant' or 'insect': %s", apiName, content)
|
||||
|
||||
// Check for other possible valid descriptions
|
||||
if strings.Contains(lowerContent, "small") ||
|
||||
strings.Contains(lowerContent, "creature") ||
|
||||
strings.Contains(lowerContent, "animal") ||
|
||||
strings.Contains(lowerContent, "bug") {
|
||||
t.Logf("✅ But %s model provided a reasonable description of the image", apiName)
|
||||
} else {
|
||||
t.Logf("❌ %s model may have failed to properly process the image", apiName)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user