package llmtests import ( "encoding/json" "fmt" "regexp" "strings" "testing" "github.com/maximhq/bifrost/core/schemas" ) // ============================================================================= // RESPONSE VALIDATION FRAMEWORK // ============================================================================= // ResponseExpectations defines what we expect from a response type ResponseExpectations struct { // Basic structure expectations ShouldHaveContent bool // Response should have non-empty content ExpectedChoiceCount int // Expected number of choices (0 = any) ExpectedFinishReason *string // Expected finish reason // Content expectations ShouldContainKeywords []string // Content should contain ALL these keywords (AND logic) ShouldContainAnyOf []string // Content should contain AT LEAST ONE of these keywords (OR logic) ShouldNotContainWords []string // Content should NOT contain these words ContentPattern *regexp.Regexp // Content should match this pattern IsRelevantToPrompt bool // Content should be relevant to the original prompt // Tool calling expectations ExpectedToolCalls []ToolCallExpectation // Expected tool calls ShouldNotHaveFunctionCalls bool // Should not have any function calls // Technical expectations ShouldHaveUsageStats bool // Should have token usage information ShouldHaveTimestamps bool // Should have created timestamp ShouldHaveModel bool // Should have model field ShouldHaveLatency bool // Should have latency information in ExtraFields // Raw request/response expectations ShouldHaveRawRequest bool // Should have non-nil, compact JSON rawRequest in ExtraFields ShouldHaveRawResponse bool // Should have non-nil, compact JSON rawResponse in ExtraFields // Provider-specific expectations ProviderSpecific map[string]interface{} // Provider-specific validation data } // ToolCallExpectation defines expectations for a specific tool call type ToolCallExpectation struct { FunctionName string // Expected function name RequiredArgs []string // Arguments that must be present ForbiddenArgs []string // Arguments that should NOT be present ArgumentTypes map[string]string // Expected types for arguments ("string", "number", "boolean", "array", "object") ArgumentValues map[string]interface{} // Specific expected values for arguments ValidateArgsJSON bool // Whether arguments should be valid JSON } // ValidationResult contains the results of response validation type ValidationResult struct { Passed bool // Overall validation result Errors []string // List of validation errors Warnings []string // List of validation warnings MetricsCollected map[string]interface{} // Collected metrics for analysis } // ============================================================================= // MAIN VALIDATION FUNCTIONS // ============================================================================= // ValidateChatResponse performs comprehensive validation for chat completion responses func ValidateChatResponse(t *testing.T, response *schemas.BifrostChatResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } // If there's an error when we expected success, that's a failure if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } // If response is nil when we expected success, that's a failure if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate basic structure validateChatBasicStructure(t, response, expectations, &result, scenarioName) // Validate content validateChatContent(t, response, expectations, &result) // Validate tool calls validateChatToolCalls(t, response, expectations, &result) // Validate technical fields validateChatTechnicalFields(t, response, expectations, &result) // Collect metrics collectChatResponseMetrics(response, &result) // Log results logValidationResults(t, result, scenarioName) return result } // ValidateTextCompletionResponse performs comprehensive validation for text completion responses func ValidateTextCompletionResponse(t *testing.T, response *schemas.BifrostTextCompletionResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } // If there's an error when we expected success, that's a failure if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } // If response is nil when we expected success, that's a failure if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate basic structure validateTextCompletionBasicStructure(t, response, expectations, &result) // Validate content validateTextCompletionContent(t, response, expectations, &result) // Validate technical fields validateTextCompletionTechnicalFields(t, response, expectations, &result) // Collect metrics collectTextCompletionResponseMetrics(response, &result) // Log results logValidationResults(t, result, scenarioName) return result } // ValidateResponsesResponse performs comprehensive validation for Responses API responses func ValidateResponsesResponse(t *testing.T, response *schemas.BifrostResponsesResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } // If there's an error when we expected success, that's a failure if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } // If response is nil when we expected success, that's a failure if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate basic structure validateResponsesBasicStructure(response, expectations, &result) // Validate content validateResponsesContent(t, response, expectations, &result) // Validate tool calls validateResponsesToolCalls(t, response, expectations, &result) // Validate technical fields validateResponsesTechnicalFields(t, response, expectations, &result) // Collect metrics collectResponsesResponseMetrics(response, &result) // Log results logValidationResults(t, result, scenarioName) return result } // ValidateSpeechResponse performs comprehensive validation for speech synthesis responses func ValidateSpeechResponse(t *testing.T, response *schemas.BifrostSpeechResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } // If there's an error when we expected success, that's a failure if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } // If response is nil when we expected success, that's a failure if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate speech synthesis specific fields validateSpeechSynthesisResponse(t, response, expectations, &result) // Collect metrics collectSpeechResponseMetrics(response, &result) // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) // Log results logValidationResults(t, result, scenarioName) return result } // ValidateImageGenerationResponse performs comprehensive validation for image generation responses func ValidateImageGenerationResponse(t *testing.T, response *schemas.BifrostImageGenerationResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } // If there's an error when we expected success, that's a failure if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } // If response is nil when we expected success, that's a failure if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate image generation specific fields validateImageGenerationFields(t, response, expectations, &result) // Collect metrics collectImageGenerationResponseMetrics(response, &result) // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) // Log results logValidationResults(t, result, scenarioName) return result } // ValidateTranscriptionResponse performs comprehensive validation for transcription responses func ValidateTranscriptionResponse(t *testing.T, response *schemas.BifrostTranscriptionResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } // If there's an error when we expected success, that's a failure if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } // If response is nil when we expected success, that's a failure if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate transcription specific fields validateTranscriptionFields(t, response, expectations, &result) // Collect metrics collectTranscriptionResponseMetrics(response, &result) // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) // Log results logValidationResults(t, result, scenarioName) return result } // ValidateListModelsResponse performs comprehensive validation for list models responses func ValidateListModelsResponse(t *testing.T, response *schemas.BifrostListModelsResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } // If there's an error when we expected success, that's a failure if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } // If response is nil when we expected success, that's a failure if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate list models specific fields validateListModelsFields(t, response, expectations, &result) // Collect metrics collectListModelsResponseMetrics(response, &result) // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) // Log results logValidationResults(t, result, scenarioName) return result } // ValidateEmbeddingResponse performs comprehensive validation for embedding responses func ValidateEmbeddingResponse(t *testing.T, response *schemas.BifrostEmbeddingResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } // If there's an error when we expected success, that's a failure if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } // If response is nil when we expected success, that's a failure if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate embedding specific fields validateEmbeddingFields(t, response, expectations, &result) // Collect metrics collectEmbeddingResponseMetrics(response, &result) // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) // Log results logValidationResults(t, result, scenarioName) return result } // ValidateCountTokensResponse performs comprehensive validation for count tokens responses func ValidateCountTokensResponse(t *testing.T, response *schemas.BifrostCountTokensResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } // If there's an error when we expected success, that's a failure if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } // If response is nil when we expected success, that's a failure if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } validateCountTokensFields(t, response, expectations, &result) collectCountTokensResponseMetrics(response, &result) // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // ============================================================================= // VALIDATION HELPER FUNCTIONS - CHAT RESPONSE // ============================================================================= // validateChatBasicStructure checks the basic structure of the chat response func validateChatBasicStructure(t *testing.T, response *schemas.BifrostChatResponse, expectations ResponseExpectations, result *ValidationResult, scenarioName string) { // Object is a constant bifrost schema marker ("chat.completion" / "chat.completion.chunk"). // For streaming scenarios, per-chunk validation in chat_completion_stream.go covers this — // the aggregated/consolidated response built by the harness is a synthetic construct and // does not carry provider-originating semantics. Skip the check there to avoid asserting // that the harness remembered to copy a constant forward. if !strings.Contains(scenarioName, "Stream") { if response.Object == "" { result.Passed = false result.Errors = append(result.Errors, "Object field is empty in chat completion response") } } // Check choice count if expectations.ExpectedChoiceCount > 0 { actualCount := 0 if response.Choices != nil { actualCount = len(response.Choices) } if actualCount != expectations.ExpectedChoiceCount { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected %d choices, got %d", expectations.ExpectedChoiceCount, actualCount)) } } // Check finish reasons if expectations.ExpectedFinishReason != nil && response.Choices != nil { for i, choice := range response.Choices { if choice.FinishReason == nil { result.Warnings = append(result.Warnings, fmt.Sprintf("Choice %d has no finish reason", i)) } else if *choice.FinishReason != *expectations.ExpectedFinishReason { result.Warnings = append(result.Warnings, fmt.Sprintf("Choice %d has finish reason '%s', expected '%s'", i, *choice.FinishReason, *expectations.ExpectedFinishReason)) } } } } // validateChatContent checks the content of the chat response func validateChatContent(t *testing.T, response *schemas.BifrostChatResponse, expectations ResponseExpectations, result *ValidationResult) { // Skip content validation for responses that don't have text content if !expectations.ShouldHaveContent { return } content := GetChatContent(response) // Check if content exists when expected if expectations.ShouldHaveContent { if strings.TrimSpace(content) == "" { result.Passed = false result.Errors = append(result.Errors, "Expected content but got empty response") return } } // Check required keywords (AND logic - ALL must be present) // Note: Converted to warnings as LLMs are non-deterministic and tests focus on functionality lowerContent := strings.ToLower(content) for _, keyword := range expectations.ShouldContainKeywords { if !strings.Contains(lowerContent, strings.ToLower(keyword)) { result.Warnings = append(result.Warnings, fmt.Sprintf("Content expected to contain keyword '%s' but doesn't (LLMs are non-deterministic). Actual content: %s", keyword, truncateContentForError(content, 200))) } } // Check OR keywords (OR logic - AT LEAST ONE must be present) // Note: Converted to warnings as LLMs are non-deterministic if len(expectations.ShouldContainAnyOf) > 0 { foundAny := false for _, keyword := range expectations.ShouldContainAnyOf { if strings.Contains(lowerContent, strings.ToLower(keyword)) { foundAny = true break } } if !foundAny { result.Warnings = append(result.Warnings, fmt.Sprintf("Content expected to contain at least one of these keywords: %v, but doesn't (LLMs are non-deterministic). Actual content: %s", expectations.ShouldContainAnyOf, truncateContentForError(content, 200))) } } // Check forbidden words - Keep as warnings since these are often false positives with LLMs for _, word := range expectations.ShouldNotContainWords { if strings.Contains(lowerContent, strings.ToLower(word)) { result.Warnings = append(result.Warnings, fmt.Sprintf("Content contains word '%s' which was not expected (may be false positive with LLMs). Actual content: %s", word, truncateContentForError(content, 200))) } } // Check content pattern - Converted to warnings if expectations.ContentPattern != nil { if !expectations.ContentPattern.MatchString(content) { result.Warnings = append(result.Warnings, fmt.Sprintf("Content doesn't match expected pattern: %s (LLMs are non-deterministic). Actual content: %s", expectations.ContentPattern.String(), truncateContentForError(content, 200))) } } // Store content for metrics result.MetricsCollected["content_word_count"] = len(strings.Fields(content)) } // validateChatToolCalls checks tool calling aspects of chat response func validateChatToolCalls(t *testing.T, response *schemas.BifrostChatResponse, expectations ResponseExpectations, result *ValidationResult) { totalToolCalls := 0 // Count tool calls from Chat Completions API if response.Choices != nil { for _, choice := range response.Choices { if choice.Message.ChatAssistantMessage != nil && choice.Message.ChatAssistantMessage.ToolCalls != nil { totalToolCalls += len(choice.Message.ChatAssistantMessage.ToolCalls) } } } // Check if we should have no function calls if expectations.ShouldNotHaveFunctionCalls && totalToolCalls > 0 { result.Passed = false actualToolNames := extractChatToolCallNames(response) result.Errors = append(result.Errors, fmt.Sprintf("Expected no function calls but found %d: %v", totalToolCalls, actualToolNames)) } // Validate specific tool calls if len(expectations.ExpectedToolCalls) > 0 { validateChatSpecificToolCalls(response, expectations.ExpectedToolCalls, result) } result.MetricsCollected["tool_call_count"] = totalToolCalls } // validateChatTechnicalFields checks technical aspects of the chat response func validateChatTechnicalFields(t *testing.T, response *schemas.BifrostChatResponse, expectations ResponseExpectations, result *ValidationResult) { // Check usage stats if expectations.ShouldHaveUsageStats { if response.Usage == nil { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected usage statistics but not present (provider: %s)", response.ExtraFields.Provider)) } else { // Validate usage makes sense if response.Usage.TotalTokens < response.Usage.PromptTokens { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Total tokens (%d) less than prompt tokens (%d)", response.Usage.TotalTokens, response.Usage.PromptTokens)) } if response.Usage.TotalTokens < response.Usage.CompletionTokens { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Total tokens (%d) less than completion tokens (%d)", response.Usage.TotalTokens, response.Usage.CompletionTokens)) } } } // Check timestamps if expectations.ShouldHaveTimestamps { if response.Created == 0 { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected created timestamp but not present (provider: %s)", response.ExtraFields.Provider)) } } // Check model field if expectations.ShouldHaveModel { if strings.TrimSpace(response.Model) == "" { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected model field but not present or empty (provider: %s)", response.ExtraFields.Provider)) } } // Check latency field if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, result) // Check cached tokens percentage (for prompt caching tests) if expectations.ProviderSpecific != nil { if minPercentage, ok := expectations.ProviderSpecific["min_cached_tokens_percentage"].(float64); ok { if response.Usage == nil { result.Passed = false result.Errors = append(result.Errors, "Expected usage statistics for cached tokens validation but not present") } else if response.Usage.PromptTokensDetails == nil { result.Passed = false result.Errors = append(result.Errors, "Expected prompt tokens details for cached tokens validation but not present") } else { cachedTokens := response.Usage.PromptTokensDetails.CachedReadTokens + response.Usage.PromptTokensDetails.CachedWriteTokens promptTokens := response.Usage.PromptTokens if promptTokens > 0 { cachedPercentage := float64(cachedTokens) / float64(promptTokens) result.MetricsCollected["cached_tokens"] = cachedTokens result.MetricsCollected["prompt_tokens"] = promptTokens result.MetricsCollected["cached_percentage"] = cachedPercentage if cachedPercentage < minPercentage { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Cached tokens percentage %.2f%% is below required minimum %.2f%% (cached: %d, prompt: %d)", cachedPercentage*100, minPercentage*100, cachedTokens, promptTokens)) } } else { result.Passed = false result.Errors = append(result.Errors, "Prompt tokens is 0, cannot validate cached tokens percentage") } } } } } // collectChatResponseMetrics collects metrics from the chat response for analysis func collectChatResponseMetrics(response *schemas.BifrostChatResponse, result *ValidationResult) { result.MetricsCollected["choice_count"] = len(response.Choices) result.MetricsCollected["has_usage"] = response.Usage != nil result.MetricsCollected["has_model"] = response.Model != "" result.MetricsCollected["has_timestamp"] = response.Created > 0 if response.Usage != nil { result.MetricsCollected["total_tokens"] = response.Usage.TotalTokens result.MetricsCollected["prompt_tokens"] = response.Usage.PromptTokens result.MetricsCollected["completion_tokens"] = response.Usage.CompletionTokens } } // ============================================================================= // VALIDATION HELPER FUNCTIONS - TEXT COMPLETION RESPONSE // ============================================================================= // validateTextCompletionBasicStructure checks the basic structure of the text completion response func validateTextCompletionBasicStructure(t *testing.T, response *schemas.BifrostTextCompletionResponse, expectations ResponseExpectations, result *ValidationResult) { // Check choice count if expectations.ExpectedChoiceCount > 0 { actualCount := 0 if response.Choices != nil { actualCount = len(response.Choices) } if actualCount != expectations.ExpectedChoiceCount { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected %d choices, got %d", expectations.ExpectedChoiceCount, actualCount)) } } // Check finish reasons if expectations.ExpectedFinishReason != nil && response.Choices != nil { for i, choice := range response.Choices { if choice.FinishReason == nil { result.Warnings = append(result.Warnings, fmt.Sprintf("Choice %d has no finish reason", i)) } else if *choice.FinishReason != *expectations.ExpectedFinishReason { result.Warnings = append(result.Warnings, fmt.Sprintf("Choice %d has finish reason '%s', expected '%s'", i, *choice.FinishReason, *expectations.ExpectedFinishReason)) } } } } // validateTextCompletionContent checks the content of the text completion response func validateTextCompletionContent(t *testing.T, response *schemas.BifrostTextCompletionResponse, expectations ResponseExpectations, result *ValidationResult) { // Skip content validation for responses that don't have text content if !expectations.ShouldHaveContent { return } content := GetTextCompletionContent(response) // Check if content exists when expected if expectations.ShouldHaveContent { if strings.TrimSpace(content) == "" { result.Passed = false result.Errors = append(result.Errors, "Expected content but got empty response") return } } // Check required keywords (AND logic - ALL must be present) // Note: Converted to warnings as LLMs are non-deterministic and tests focus on functionality lowerContent := strings.ToLower(content) for _, keyword := range expectations.ShouldContainKeywords { if !strings.Contains(lowerContent, strings.ToLower(keyword)) { result.Warnings = append(result.Warnings, fmt.Sprintf("Content expected to contain keyword '%s' but doesn't (LLMs are non-deterministic). Actual content: %s", keyword, truncateContentForError(content, 200))) } } // Check OR keywords (OR logic - AT LEAST ONE must be present) // Note: Converted to warnings as LLMs are non-deterministic if len(expectations.ShouldContainAnyOf) > 0 { foundAny := false for _, keyword := range expectations.ShouldContainAnyOf { if strings.Contains(lowerContent, strings.ToLower(keyword)) { foundAny = true break } } if !foundAny { result.Warnings = append(result.Warnings, fmt.Sprintf("Content expected to contain at least one of these keywords: %v, but doesn't (LLMs are non-deterministic). Actual content: %s", expectations.ShouldContainAnyOf, truncateContentForError(content, 200))) } } // Check forbidden words - Keep as warnings since these are often false positives with LLMs for _, word := range expectations.ShouldNotContainWords { if strings.Contains(lowerContent, strings.ToLower(word)) { result.Warnings = append(result.Warnings, fmt.Sprintf("Content contains word '%s' which was not expected (may be false positive with LLMs). Actual content: %s", word, truncateContentForError(content, 200))) } } // Check content pattern - Converted to warnings if expectations.ContentPattern != nil { if !expectations.ContentPattern.MatchString(content) { result.Warnings = append(result.Warnings, fmt.Sprintf("Content doesn't match expected pattern: %s (LLMs are non-deterministic). Actual content: %s", expectations.ContentPattern.String(), truncateContentForError(content, 200))) } } // Store content for metrics result.MetricsCollected["content_word_count"] = len(strings.Fields(content)) } // validateTextCompletionTechnicalFields checks technical aspects of the text completion response func validateTextCompletionTechnicalFields(t *testing.T, response *schemas.BifrostTextCompletionResponse, expectations ResponseExpectations, result *ValidationResult) { // Check usage stats if expectations.ShouldHaveUsageStats { if response.Usage == nil { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected usage statistics but not present (provider: %s)", response.ExtraFields.Provider)) } else { // Validate usage makes sense if response.Usage.TotalTokens < response.Usage.PromptTokens { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Total tokens (%d) less than prompt tokens (%d)", response.Usage.TotalTokens, response.Usage.PromptTokens)) } if response.Usage.TotalTokens < response.Usage.CompletionTokens { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Total tokens (%d) less than completion tokens (%d)", response.Usage.TotalTokens, response.Usage.CompletionTokens)) } } } // Check timestamps - Text completion responses don't have a Created field in the schema // so we skip timestamp validation for text completions regardless of the expectation // Check model field if expectations.ShouldHaveModel { if strings.TrimSpace(response.Model) == "" { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected model field but not present or empty (provider: %s)", response.ExtraFields.Provider)) } } // Check latency field if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, result) } // collectTextCompletionResponseMetrics collects metrics from the text completion response for analysis func collectTextCompletionResponseMetrics(response *schemas.BifrostTextCompletionResponse, result *ValidationResult) { result.MetricsCollected["choice_count"] = len(response.Choices) result.MetricsCollected["has_usage"] = response.Usage != nil result.MetricsCollected["has_model"] = response.Model != "" result.MetricsCollected["has_timestamp"] = false // Text completion responses don't have timestamps if response.Usage != nil { result.MetricsCollected["total_tokens"] = response.Usage.TotalTokens result.MetricsCollected["prompt_tokens"] = response.Usage.PromptTokens result.MetricsCollected["completion_tokens"] = response.Usage.CompletionTokens } } // ============================================================================= // VALIDATION HELPER FUNCTIONS - RESPONSES API // ============================================================================= // validateResponsesBasicStructure checks the basic structure of the Responses API response func validateResponsesBasicStructure(response *schemas.BifrostResponsesResponse, expectations ResponseExpectations, result *ValidationResult) { // Check that Object field is not empty (should be "response") if response.Object == "" { result.Passed = false result.Errors = append(result.Errors, "Object field is empty in responses response") } // Check choice count if expectations.ExpectedChoiceCount > 0 { actualCount := 0 if response.Output != nil { // For Responses API, count "logical choices" instead of raw message count // Group related messages (text + tool calls) as one logical choice actualCount = countLogicalChoicesInResponsesAPI(response.Output) } if actualCount != expectations.ExpectedChoiceCount { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected %d choices, got %d", expectations.ExpectedChoiceCount, actualCount)) } } provider := response.ExtraFields.Provider model := response.ExtraFields.ResolvedModelUsed // Verify top level status is present for OpenAI and Azure with non-Claude models if provider != "" && (provider == schemas.OpenAI || provider == schemas.Azure) && !strings.Contains(strings.ToLower(model), "claude") { if response.Status == nil { result.Passed = false result.Errors = append(result.Errors, "Expected status but not present") } } } // validateResponsesContent checks the content of the Responses API response func validateResponsesContent(t *testing.T, response *schemas.BifrostResponsesResponse, expectations ResponseExpectations, result *ValidationResult) { // Skip content validation for responses that don't have text content if !expectations.ShouldHaveContent { return } content := GetResponsesContent(response) // Check if content exists when expected if expectations.ShouldHaveContent { if strings.TrimSpace(content) == "" { result.Passed = false result.Errors = append(result.Errors, "Expected content but got empty response") return } } // Check required keywords (AND logic - ALL must be present) // Note: Converted to warnings as LLMs are non-deterministic and tests focus on functionality lowerContent := strings.ToLower(content) for _, keyword := range expectations.ShouldContainKeywords { if !strings.Contains(lowerContent, strings.ToLower(keyword)) { result.Warnings = append(result.Warnings, fmt.Sprintf("Content expected to contain keyword '%s' but doesn't (LLMs are non-deterministic). Actual content: %s", keyword, truncateContentForError(content, 200))) } } // Check OR keywords (OR logic - AT LEAST ONE must be present) // Note: Converted to warnings as LLMs are non-deterministic if len(expectations.ShouldContainAnyOf) > 0 { foundAny := false for _, keyword := range expectations.ShouldContainAnyOf { if strings.Contains(lowerContent, strings.ToLower(keyword)) { foundAny = true break } } if !foundAny { result.Warnings = append(result.Warnings, fmt.Sprintf("Content expected to contain at least one of these keywords: %v, but doesn't (LLMs are non-deterministic). Actual content: %s", expectations.ShouldContainAnyOf, truncateContentForError(content, 200))) } } // Check forbidden words - Keep as warnings since these are often false positives with LLMs for _, word := range expectations.ShouldNotContainWords { if strings.Contains(lowerContent, strings.ToLower(word)) { result.Warnings = append(result.Warnings, fmt.Sprintf("Content contains word '%s' which was not expected (may be false positive with LLMs). Actual content: %s", word, truncateContentForError(content, 200))) } } // Check content pattern - Converted to warnings if expectations.ContentPattern != nil { if !expectations.ContentPattern.MatchString(content) { result.Warnings = append(result.Warnings, fmt.Sprintf("Content doesn't match expected pattern: %s (LLMs are non-deterministic). Actual content: %s", expectations.ContentPattern.String(), truncateContentForError(content, 200))) } } // Store content for metrics result.MetricsCollected["content_word_count"] = len(strings.Fields(content)) } // validateResponsesToolCalls checks tool calling aspects of Responses API response func validateResponsesToolCalls(t *testing.T, response *schemas.BifrostResponsesResponse, expectations ResponseExpectations, result *ValidationResult) { totalToolCalls := 0 // Count tool calls from Responses API if response.Output != nil { for _, output := range response.Output { // Check if this message contains tool call data regardless of Type if output.ResponsesToolMessage != nil { totalToolCalls++ } } } // Check if we should have no function calls if expectations.ShouldNotHaveFunctionCalls && totalToolCalls > 0 { result.Passed = false actualToolNames := extractResponsesToolCallNames(response) result.Errors = append(result.Errors, fmt.Sprintf("Expected no function calls but found %d: %v", totalToolCalls, actualToolNames)) } // Validate specific tool calls if len(expectations.ExpectedToolCalls) > 0 { validateResponsesSpecificToolCalls(response, expectations.ExpectedToolCalls, result) } result.MetricsCollected["tool_call_count"] = totalToolCalls } // validateResponsesTechnicalFields checks technical aspects of the Responses API response func validateResponsesTechnicalFields(t *testing.T, response *schemas.BifrostResponsesResponse, expectations ResponseExpectations, result *ValidationResult) { // Check usage stats if expectations.ShouldHaveUsageStats { if response.Usage == nil { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected usage statistics but not present (provider: %s)", response.ExtraFields.Provider)) } } // Check timestamps if expectations.ShouldHaveTimestamps { if response.CreatedAt == 0 { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected created timestamp but not present (provider: %s)", response.ExtraFields.Provider)) } } // Check model field if expectations.ShouldHaveModel { if strings.TrimSpace(response.Model) == "" { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected model field but not present or empty (provider: %s)", response.ExtraFields.Provider)) } } // Check latency field if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, result) } // collectResponsesResponseMetrics collects metrics from the Responses API response for analysis func collectResponsesResponseMetrics(response *schemas.BifrostResponsesResponse, result *ValidationResult) { if response.Output != nil { result.MetricsCollected["choice_count"] = len(response.Output) } result.MetricsCollected["has_usage"] = response.Usage != nil result.MetricsCollected["has_timestamp"] = response.CreatedAt > 0 if response.Usage != nil { // Responses API has different usage structure result.MetricsCollected["usage_present"] = true } } // ============================================================================= // VALIDATION HELPER FUNCTIONS - SPEECH RESPONSE // ============================================================================= // validateSpeechSynthesisResponse validates speech synthesis responses func validateSpeechSynthesisResponse(t *testing.T, response *schemas.BifrostSpeechResponse, expectations ResponseExpectations, result *ValidationResult) { // Check if response has speech data if response.Audio == nil { result.Passed = false result.Errors = append(result.Errors, "Speech synthesis response missing Audio field") return } // Check if audio data exists shouldHaveAudio, _ := expectations.ProviderSpecific["should_have_audio"].(bool) if shouldHaveAudio && response.Audio == nil { result.Passed = false result.Errors = append(result.Errors, "Speech synthesis response missing audio data") return } // Check minimum audio bytes if minBytes, ok := expectations.ProviderSpecific["min_audio_bytes"].(int); ok { if response.Audio != nil { actualSize := len(response.Audio) if actualSize < minBytes { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Audio data too small: got %d bytes, expected at least %d", actualSize, minBytes)) } else { result.MetricsCollected["audio_bytes"] = actualSize } } } // Validate audio format if specified if expectedFormat, ok := expectations.ProviderSpecific["expected_format"].(string); ok { // This could be extended to validate actual audio format based on file headers result.MetricsCollected["expected_audio_format"] = expectedFormat } // Check latency field if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } result.MetricsCollected["speech_validation"] = "completed" } // collectSpeechResponseMetrics collects metrics from the speech response for analysis func collectSpeechResponseMetrics(response *schemas.BifrostSpeechResponse, result *ValidationResult) { result.MetricsCollected["has_audio"] = response.Audio != nil if response.Audio != nil { result.MetricsCollected["audio_size"] = len(response.Audio) } } // ============================================================================= // VALIDATION HELPER FUNCTIONS - TRANSCRIPTION RESPONSE // ============================================================================= // validateTranscriptionFields validates transcription responses func validateTranscriptionFields(t *testing.T, response *schemas.BifrostTranscriptionResponse, expectations ResponseExpectations, result *ValidationResult) { // Check if transcribed text exists shouldHaveTranscription, _ := expectations.ProviderSpecific["should_have_transcription"].(bool) if shouldHaveTranscription && response.Text == "" { result.Passed = false result.Errors = append(result.Errors, "Transcription response missing transcribed text") return } // Check minimum transcription length if minLength, ok := expectations.ProviderSpecific["min_transcription_length"].(int); ok { actualLength := len(response.Text) if actualLength < minLength { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Transcribed text too short: got %d characters, expected at least %d", actualLength, minLength)) } else { result.MetricsCollected["transcription_length"] = actualLength } } // Check for common transcription failure indicators transcribedText := strings.ToLower(response.Text) for _, errorPhrase := range expectations.ShouldNotContainWords { if strings.Contains(transcribedText, errorPhrase) { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Transcribed text contains error indicator: '%s'", errorPhrase)) } } // Validate additional transcription fields if available if response.Language != nil { result.MetricsCollected["detected_language"] = *response.Language } if response.Duration != nil { result.MetricsCollected["audio_duration"] = *response.Duration } // Check latency field if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } result.MetricsCollected["transcription_validation"] = "completed" } // collectTranscriptionResponseMetrics collects metrics from the transcription response for analysis func collectTranscriptionResponseMetrics(response *schemas.BifrostTranscriptionResponse, result *ValidationResult) { result.MetricsCollected["has_text"] = response.Text != "" result.MetricsCollected["text_length"] = len(response.Text) result.MetricsCollected["has_language"] = response.Language != nil result.MetricsCollected["has_duration"] = response.Duration != nil } // ============================================================================= // VALIDATION HELPER FUNCTIONS - IMAGE GENERATION RESPONSE // ============================================================================= func validateImageGenerationFields(t *testing.T, response *schemas.BifrostImageGenerationResponse, expectations ResponseExpectations, result *ValidationResult) { // Check if response has image data if len(response.Data) == 0 { result.Passed = false result.Errors = append(result.Errors, "Image generation response missing image data") return } // Check each image has either B64JSON or URL for i, img := range response.Data { if img.B64JSON == "" && img.URL == "" { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Image %d has no B64JSON or URL", i)) } } // Check minimum number of images if specified if expectations.ProviderSpecific != nil { if minImagesVal, ok := expectations.ProviderSpecific["min_images"]; ok { var minImages int var parseErr error // Use type switch to handle various numeric types switch v := minImagesVal.(type) { case int: minImages = v case int64: minImages = int(v) case float64: minImages = int(v) case json.Number: var parsed int64 parsed, parseErr = v.Int64() if parseErr == nil { minImages = int(parsed) } default: parseErr = fmt.Errorf("unsupported type for min_images: %T", v) } if parseErr != nil { // Skip the min_images check if conversion fails, but record a warning result.Errors = append(result.Errors, fmt.Sprintf("Failed to parse min_images: %v (skipping check)", parseErr)) } else { actualCount := len(response.Data) result.MetricsCollected["image_count"] = actualCount if actualCount < minImages { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Too few images: got %d, expected at least %d", actualCount, minImages)) } } } } // Validate image size if specified if expectedSize, ok := expectations.ProviderSpecific["expected_size"].(string); ok { result.MetricsCollected["expected_size"] = expectedSize // Note: Actual size validation would require downloading/decoding images } // Check model field if expectations.ShouldHaveModel { if strings.TrimSpace(response.Model) == "" { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected model field but not present or empty (provider: %s)", response.ExtraFields.Provider)) } } // Check latency field if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } result.MetricsCollected["image_generation_validation"] = "completed" } func collectImageGenerationResponseMetrics(response *schemas.BifrostImageGenerationResponse, result *ValidationResult) { result.MetricsCollected["image_count"] = len(response.Data) result.MetricsCollected["has_images"] = len(response.Data) > 0 // Count images with URLs vs B64JSON urlCount := 0 b64Count := 0 for _, img := range response.Data { if img.URL != "" { urlCount++ } if img.B64JSON != "" { b64Count++ } } result.MetricsCollected["images_with_url"] = urlCount result.MetricsCollected["images_with_b64"] = b64Count if response.Usage != nil { result.MetricsCollected["input_tokens"] = response.Usage.InputTokens result.MetricsCollected["output_tokens"] = response.Usage.OutputTokens result.MetricsCollected["total_tokens"] = response.Usage.TotalTokens } } // ============================================================================= // VALIDATION HELPER FUNCTIONS - EMBEDDING RESPONSE // ============================================================================= // intFromProviderSpecific coerces provider-specific expectation values that may // be int, JSON float64, json.Number, or other numeric types into int. func intFromProviderSpecific(v any) (int, bool) { switch n := v.(type) { case int: return n, true case int8: return int(n), true case int16: return int(n), true case int32: return int(n), true case int64: return int(n), true case uint: return int(n), true case uint8: return int(n), true case uint16: return int(n), true case uint32: return int(n), true case uint64: return int(n), true case float32: return int(n), true case float64: return int(n), true case json.Number: i, err := n.Int64() if err != nil { f, err2 := n.Float64() if err2 != nil { return 0, false } return int(f), true } return int(i), true default: return 0, false } } // validateEmbeddingFields validates embedding responses func validateEmbeddingFields(t *testing.T, response *schemas.BifrostEmbeddingResponse, expectations ResponseExpectations, result *ValidationResult) { // Check if response has embedding data if len(response.Data) == 0 { result.Passed = false result.Errors = append(result.Errors, "Embedding response missing data") return } // Check embedding count matches expected if expectations.ProviderSpecific != nil { if raw, exists := expectations.ProviderSpecific["expected_embedding_count"]; exists { if expectedCount, ok := intFromProviderSpecific(raw); ok { actualCount := len(response.Data) // Also check for 2D arrays (some providers return single embedding with 2D array) if actualCount == 1 && response.Data[0].Embedding.Embedding2DArray != nil { actualCount = len(response.Data[0].Embedding.Embedding2DArray) } if actualCount != expectedCount { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected %d embeddings, got %d", expectedCount, actualCount)) } } } } // Validate each embedding has non-empty vector data for i, embedding := range response.Data { hasData := false if embedding.Embedding.EmbeddingArray != nil && len(embedding.Embedding.EmbeddingArray) > 0 { hasData = true } if embedding.Embedding.Embedding2DArray != nil && len(embedding.Embedding.Embedding2DArray) > 0 { hasData = true } if !hasData { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Embedding %d has no vector data", i)) } } // Check embedding dimensions if expectedDimensions, ok := expectations.ProviderSpecific["expected_dimensions"].(int); ok { for i, embedding := range response.Data { var actualDimensions int if embedding.Embedding.EmbeddingArray != nil { actualDimensions = len(embedding.Embedding.EmbeddingArray) } else if embedding.Embedding.Embedding2DArray != nil { if len(embedding.Embedding.Embedding2DArray) > 0 { actualDimensions = len(embedding.Embedding.Embedding2DArray[0]) } } if actualDimensions != expectedDimensions { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Embedding %d has %d dimensions, expected %d", i, actualDimensions, expectedDimensions)) } } } // Check model field if expectations.ShouldHaveModel { if strings.TrimSpace(response.Model) == "" { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected model field but not present or empty (provider: %s)", response.ExtraFields.Provider)) } } // Check latency field if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } result.MetricsCollected["embedding_validation"] = "completed" } // ============================================================================= // VALIDATION HELPER FUNCTIONS - COUNT TOKENS RESPONSE // ============================================================================= func validateCountTokensFields(t *testing.T, response *schemas.BifrostCountTokensResponse, expectations ResponseExpectations, result *ValidationResult) { _ = t if strings.TrimSpace(response.Model) == "" && expectations.ShouldHaveModel { result.Passed = false result.Errors = append(result.Errors, "Expected model field but got empty") } if response.InputTokens <= 0 { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("input_tokens should be > 0, got %d", response.InputTokens)) } if response.OutputTokens != nil { if *response.OutputTokens < 0 { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("output_tokens should be >= 0, got %d", *response.OutputTokens)) } } if response.TotalTokens != nil { if *response.TotalTokens < response.InputTokens { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("total_tokens (%d) should be >= input_tokens (%d)", *response.TotalTokens, response.InputTokens)) } } if response.ExtraFields.RequestType != schemas.CountTokensRequest { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Request type mismatch: expected %s, got %s", schemas.CountTokensRequest, response.ExtraFields.RequestType)) } if expectations.ProviderSpecific != nil { if expectedProvider, ok := expectations.ProviderSpecific["expected_provider"].(string); ok { if string(response.ExtraFields.Provider) != expectedProvider { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Provider mismatch: expected %s, got %s", expectedProvider, string(response.ExtraFields.Provider))) } } } if expectations.ShouldHaveLatency { if response.ExtraFields.Latency < 0 { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Invalid latency: %d ms (should be non-negative)", response.ExtraFields.Latency)) } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } result.MetricsCollected["count_tokens_validation"] = "completed" } // ============================================================================= // VALIDATION HELPER FUNCTIONS - LIST MODELS RESPONSE // ============================================================================= // validateListModelsFields validates list models responses func validateListModelsFields(t *testing.T, response *schemas.BifrostListModelsResponse, expectations ResponseExpectations, result *ValidationResult) { // Check that we have models in the response if len(response.Data) == 0 { result.Passed = false result.Errors = append(result.Errors, "List models response contains no models") return } // Validate individual model entries validModels := 0 for i, model := range response.Data { if model.ID == "" { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Model at index %d has empty ID", i)) continue } validModels++ } if validModels == 0 { result.Passed = false result.Errors = append(result.Errors, "No valid models found in response") } // Validate extra fields if expectations.ProviderSpecific != nil { if expectedProvider, ok := expectations.ProviderSpecific["expected_provider"].(string); ok { if string(response.ExtraFields.Provider) != expectedProvider { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Provider mismatch: expected %s, got %s", expectedProvider, string(response.ExtraFields.Provider))) } } } // Validate request type if response.ExtraFields.RequestType != schemas.ListModelsRequest { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Request type mismatch: expected %s, got %s", schemas.ListModelsRequest, response.ExtraFields.RequestType)) } // Validate latency field if expectations.ShouldHaveLatency { if response.ExtraFields.Latency < 0 { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Invalid latency: %d ms (should be non-negative)", response.ExtraFields.Latency)) } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Check minimum model count if specified if minModels, ok := expectations.ProviderSpecific["min_model_count"].(int); ok { if len(response.Data) < minModels { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Expected at least %d models, got %d", minModels, len(response.Data))) } } result.MetricsCollected["list_models_validation"] = "completed" } // collectListModelsResponseMetrics collects metrics from the list models response for analysis func collectListModelsResponseMetrics(response *schemas.BifrostListModelsResponse, result *ValidationResult) { result.MetricsCollected["model_count"] = len(response.Data) result.MetricsCollected["has_next_page_token"] = response.NextPageToken != "" result.MetricsCollected["has_provider"] = response.ExtraFields.Provider != "" result.MetricsCollected["has_request_type"] = response.ExtraFields.RequestType != "" result.MetricsCollected["has_latency"] = response.ExtraFields.Latency >= 0 } // collectEmbeddingResponseMetrics collects metrics from the embedding response for analysis func collectEmbeddingResponseMetrics(response *schemas.BifrostEmbeddingResponse, result *ValidationResult) { result.MetricsCollected["has_data"] = response.Data != nil result.MetricsCollected["embedding_count"] = len(response.Data) result.MetricsCollected["has_usage"] = response.Usage != nil if len(response.Data) > 0 { var dimensions int if response.Data[0].Embedding.EmbeddingArray != nil { dimensions = len(response.Data[0].Embedding.EmbeddingArray) } else if len(response.Data[0].Embedding.Embedding2DArray) > 0 { dimensions = len(response.Data[0].Embedding.Embedding2DArray[0]) } result.MetricsCollected["embedding_dimensions"] = dimensions } } func collectCountTokensResponseMetrics(response *schemas.BifrostCountTokensResponse, result *ValidationResult) { result.MetricsCollected["input_tokens"] = response.InputTokens result.MetricsCollected["has_total_tokens"] = response.TotalTokens != nil if response.TotalTokens != nil { result.MetricsCollected["total_tokens"] = *response.TotalTokens } result.MetricsCollected["has_model"] = response.Model != "" result.MetricsCollected["request_type"] = response.ExtraFields.RequestType } // ============================================================================= // BATCH API VALIDATION FUNCTIONS // ============================================================================= // ValidateBatchCreateResponse performs comprehensive validation for batch create responses func ValidateBatchCreateResponse(t *testing.T, response *schemas.BifrostBatchCreateResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate batch ID is present if response.ID == "" { result.Passed = false result.Errors = append(result.Errors, "Batch ID is empty") } // Validate latency if expected if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Collect metrics result.MetricsCollected["batch_id"] = response.ID result.MetricsCollected["status"] = response.Status result.MetricsCollected["has_endpoint"] = response.Endpoint != "" // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // ValidateBatchListResponse performs comprehensive validation for batch list responses func ValidateBatchListResponse(t *testing.T, response *schemas.BifrostBatchListResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate latency if expected if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Collect metrics result.MetricsCollected["batch_count"] = len(response.Data) result.MetricsCollected["has_more"] = response.HasMore // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // ValidateBatchRetrieveResponse performs comprehensive validation for batch retrieve responses func ValidateBatchRetrieveResponse(t *testing.T, response *schemas.BifrostBatchRetrieveResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate batch ID is present if response.ID == "" { result.Passed = false result.Errors = append(result.Errors, "Batch ID is empty") } // Validate latency if expected if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Collect metrics result.MetricsCollected["batch_id"] = response.ID result.MetricsCollected["status"] = response.Status result.MetricsCollected["has_request_counts"] = response.RequestCounts.Total > 0 // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // ValidateBatchCancelResponse performs comprehensive validation for batch cancel responses func ValidateBatchCancelResponse(t *testing.T, response *schemas.BifrostBatchCancelResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate batch ID is present if response.ID == "" { result.Passed = false result.Errors = append(result.Errors, "Batch ID is empty") } // Validate latency if expected if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Collect metrics result.MetricsCollected["batch_id"] = response.ID result.MetricsCollected["status"] = response.Status // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // ValidateBatchResultsResponse performs comprehensive validation for batch results responses func ValidateBatchResultsResponse(t *testing.T, response *schemas.BifrostBatchResultsResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate batch ID is present if response.BatchID == "" { result.Passed = false result.Errors = append(result.Errors, "Batch ID is empty") } // Validate latency if expected if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Collect metrics result.MetricsCollected["batch_id"] = response.BatchID result.MetricsCollected["results_count"] = len(response.Results) result.MetricsCollected["has_more"] = response.HasMore // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // ============================================================================= // FILE API VALIDATION FUNCTIONS // ============================================================================= // ValidateFileUploadResponse performs comprehensive validation for file upload responses func ValidateFileUploadResponse(t *testing.T, response *schemas.BifrostFileUploadResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate file ID is present if response.ID == "" { result.Passed = false result.Errors = append(result.Errors, "File ID is empty") } // Validate latency if expected if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Collect metrics result.MetricsCollected["file_id"] = response.ID result.MetricsCollected["filename"] = response.Filename result.MetricsCollected["bytes"] = response.Bytes result.MetricsCollected["purpose"] = response.Purpose // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // ValidateFileListResponse performs comprehensive validation for file list responses func ValidateFileListResponse(t *testing.T, response *schemas.BifrostFileListResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate latency if expected if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Collect metrics result.MetricsCollected["file_count"] = len(response.Data) result.MetricsCollected["has_more"] = response.HasMore // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // ValidateFileRetrieveResponse performs comprehensive validation for file retrieve responses func ValidateFileRetrieveResponse(t *testing.T, response *schemas.BifrostFileRetrieveResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate file ID is present if response.ID == "" { result.Passed = false result.Errors = append(result.Errors, "File ID is empty") } // Validate latency if expected if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Collect metrics result.MetricsCollected["file_id"] = response.ID result.MetricsCollected["filename"] = response.Filename result.MetricsCollected["bytes"] = response.Bytes result.MetricsCollected["status"] = response.Status // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // ValidateFileDeleteResponse performs comprehensive validation for file delete responses func ValidateFileDeleteResponse(t *testing.T, response *schemas.BifrostFileDeleteResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate file ID is present if response.ID == "" { result.Passed = false result.Errors = append(result.Errors, "File ID is empty") } // Validate deleted flag if !response.Deleted { result.Passed = false result.Errors = append(result.Errors, "File was not marked as deleted") } // Validate latency if expected if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Collect metrics result.MetricsCollected["file_id"] = response.ID result.MetricsCollected["deleted"] = response.Deleted // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // ValidateFileContentResponse performs comprehensive validation for file content responses func ValidateFileContentResponse(t *testing.T, response *schemas.BifrostFileContentResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult { result := ValidationResult{ Passed: true, Errors: make([]string, 0), Warnings: make([]string, 0), MetricsCollected: make(map[string]interface{}), } if err != nil { result.Passed = false parsed := ParseBifrostError(err) result.Errors = append(result.Errors, fmt.Sprintf("Got error when expecting success: %s", FormatErrorConcise(parsed))) LogError(t, err, scenarioName) return result } if response == nil { result.Passed = false result.Errors = append(result.Errors, "Response is nil") return result } // Validate file ID is present if response.FileID == "" { result.Passed = false result.Errors = append(result.Errors, "File ID is empty") } // Validate content is present if len(response.Content) == 0 { result.Passed = false result.Errors = append(result.Errors, "File content is empty") } // Validate latency if expected if expectations.ShouldHaveLatency { if response.ExtraFields.Latency <= 0 { result.Passed = false result.Errors = append(result.Errors, "Expected latency information but not present or invalid") } else { result.MetricsCollected["latency_ms"] = response.ExtraFields.Latency } } // Collect metrics result.MetricsCollected["file_id"] = response.FileID result.MetricsCollected["content_length"] = len(response.Content) result.MetricsCollected["content_type"] = response.ContentType // Check raw request/response fields validateRawFields(expectations, response.ExtraFields.RawRequest, response.ExtraFields.RawResponse, &result) logValidationResults(t, result, scenarioName) return result } // extractChatToolCallNames extracts tool call function names from chat response for error messages func extractChatToolCallNames(response *schemas.BifrostChatResponse) []string { var toolNames []string if response.Choices != nil { for _, choice := range response.Choices { if choice.Message.ChatAssistantMessage != nil && choice.Message.ChatAssistantMessage.ToolCalls != nil { for _, toolCall := range choice.Message.ChatAssistantMessage.ToolCalls { if toolCall.Function.Name != nil { toolNames = append(toolNames, *toolCall.Function.Name) } } } } } return toolNames } // extractResponsesToolCallNames extracts tool call function names from Responses API response for error messages func extractResponsesToolCallNames(response *schemas.BifrostResponsesResponse) []string { var toolNames []string if response.Output != nil { for _, output := range response.Output { if output.ResponsesToolMessage != nil && output.Name != nil { toolNames = append(toolNames, *output.Name) } } } return toolNames } // validateChatSpecificToolCalls validates individual tool call expectations for chat response func validateChatSpecificToolCalls(response *schemas.BifrostChatResponse, expectedCalls []ToolCallExpectation, result *ValidationResult) { for _, expected := range expectedCalls { found := false if response.Choices != nil { for _, message := range response.Choices { if message.Message.ChatAssistantMessage != nil && message.Message.ChatAssistantMessage.ToolCalls != nil { for _, toolCall := range message.Message.ChatAssistantMessage.ToolCalls { if toolCall.Function.Name != nil && *toolCall.Function.Name == expected.FunctionName { arguments := toolCall.Function.Arguments found = true validateSingleToolCall(arguments, expected, 0, 0, result) break } } } } } if !found { result.Passed = false actualToolNames := extractChatToolCallNames(response) if len(actualToolNames) == 0 { result.Errors = append(result.Errors, fmt.Sprintf("Expected tool call '%s' not found (no tool calls present)", expected.FunctionName)) } else { result.Errors = append(result.Errors, fmt.Sprintf("Expected tool call '%s' not found. Actual tool calls found: %v", expected.FunctionName, actualToolNames)) } } } } // validateResponsesSpecificToolCalls validates individual tool call expectations for Responses API response func validateResponsesSpecificToolCalls(response *schemas.BifrostResponsesResponse, expectedCalls []ToolCallExpectation, result *ValidationResult) { for _, expected := range expectedCalls { found := false if response.Output != nil { for _, message := range response.Output { if message.ResponsesToolMessage != nil && message.ResponsesToolMessage.Name != nil && *message.ResponsesToolMessage.Name == expected.FunctionName { if message.ResponsesToolMessage.Arguments != nil { arguments := *message.ResponsesToolMessage.Arguments found = true validateSingleToolCall(arguments, expected, 0, 0, result) break } } } } if !found { result.Passed = false actualToolNames := extractResponsesToolCallNames(response) if len(actualToolNames) == 0 { result.Errors = append(result.Errors, fmt.Sprintf("Expected tool call '%s' not found (no tool calls present)", expected.FunctionName)) } else { result.Errors = append(result.Errors, fmt.Sprintf("Expected tool call '%s' not found. Actual tool calls found: %v", expected.FunctionName, actualToolNames)) } } } } // ============================================================================= // UTILITY FUNCTIONS // ============================================================================= // truncateContentForError safely truncates content for error messages func truncateContentForError(content string, maxLength int) string { content = strings.TrimSpace(content) if len(content) <= maxLength { return fmt.Sprintf("'%s'", content) } return fmt.Sprintf("'%s...' (truncated from %d chars)", content[:maxLength], len(content)) } // getJSONType returns the JSON type of a value func getJSONType(value interface{}) string { switch value.(type) { case string: return "string" case float64, int, int64: return "number" case bool: return "boolean" case []interface{}: return "array" case map[string]interface{}: return "object" case nil: return "null" default: return "unknown" } } // validateSingleToolCall validates a specific tool call against expectations func validateSingleToolCall(arguments interface{}, expected ToolCallExpectation, choiceIdx, callIdx int, result *ValidationResult) { // Parse arguments with safe type handling var args map[string]interface{} if expected.ValidateArgsJSON { // Handle nil arguments if arguments == nil { args = nil } else if argsMap, ok := arguments.(map[string]interface{}); ok { // Already a map, use directly args = argsMap } else if argsMapInterface, ok := arguments.(map[interface{}]interface{}); ok { // Convert map[interface{}]interface{} to map[string]interface{} args = make(map[string]interface{}) for k, v := range argsMapInterface { if keyStr, ok := k.(string); ok { args[keyStr] = v } } } else if argsStr, ok := arguments.(string); ok { // String type - unmarshal as JSON if err := json.Unmarshal([]byte(argsStr), &args); err != nil { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Tool call %s (choice %d, call %d) has invalid JSON arguments: %s", expected.FunctionName, choiceIdx, callIdx, err.Error())) return } } else if argsBytes, ok := arguments.([]byte); ok { // []byte type - unmarshal as JSON if err := json.Unmarshal(argsBytes, &args); err != nil { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Tool call %s (choice %d, call %d) has invalid JSON arguments: %s", expected.FunctionName, choiceIdx, callIdx, err.Error())) return } } else { // Unsupported type result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Tool call %s (choice %d, call %d) has unsupported argument type: %T", expected.FunctionName, choiceIdx, callIdx, arguments)) return } } // Check required arguments for _, reqArg := range expected.RequiredArgs { if _, exists := args[reqArg]; !exists { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Tool call %s missing required argument '%s'", expected.FunctionName, reqArg)) } } // Check forbidden arguments for _, forbiddenArg := range expected.ForbiddenArgs { if _, exists := args[forbiddenArg]; exists { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Tool call %s has forbidden argument '%s'", expected.FunctionName, forbiddenArg)) } } // Check argument types for argName, expectedType := range expected.ArgumentTypes { if value, exists := args[argName]; exists { actualType := getJSONType(value) if actualType != expectedType { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Tool call %s argument '%s' is %s, expected %s", expected.FunctionName, argName, actualType, expectedType)) } } } // Check specific argument values for argName, expectedValue := range expected.ArgumentValues { if actualValue, exists := args[argName]; exists { if actualValue != expectedValue { result.Passed = false result.Errors = append(result.Errors, fmt.Sprintf("Tool call %s argument '%s' is %v, expected %v", expected.FunctionName, argName, actualValue, expectedValue)) } } } } // logValidationResults logs the validation results func logValidationResults(t *testing.T, result ValidationResult, scenarioName string) { if result.Passed { t.Logf("✅ Validation passed for %s", scenarioName) } else { // LogF, not ErrorF else later retries will still fail the test t.Logf("❌ Validation failed for %s with %d errors", scenarioName, len(result.Errors)) for _, err := range result.Errors { // Ensure each error line has ❌ prefix for consistency errorMsg := err if !strings.Contains(errorMsg, "❌") { errorMsg = fmt.Sprintf("❌ %s", errorMsg) } t.Logf(" %s", errorMsg) } } if len(result.Warnings) > 0 { t.Logf("⚠️ %d warnings for %s", len(result.Warnings), scenarioName) for _, warning := range result.Warnings { t.Logf(" Warning: %s", warning) } } } // countLogicalChoicesInResponsesAPI collapses a native Responses output array // into the single logical assistant turn expected by shared llmtests. func countLogicalChoicesInResponsesAPI(messages []schemas.ResponsesMessage) int { if len(messages) == 0 { return 0 } hasAssistantTurn := false nonInputItems := 0 for _, msg := range messages { if msg.Role != nil { switch *msg.Role { case schemas.ResponsesInputMessageRoleUser, schemas.ResponsesInputMessageRoleSystem, schemas.ResponsesInputMessageRoleDeveloper: // Native Responses output may include echoed input items; they are not model choices. continue } } nonInputItems++ if msg.Type != nil { switch *msg.Type { case schemas.ResponsesMessageTypeMessage: if msg.Role == nil || *msg.Role == schemas.ResponsesInputMessageRoleAssistant { hasAssistantTurn = true } case schemas.ResponsesMessageTypeReasoning, schemas.ResponsesMessageTypeRefusal, schemas.ResponsesMessageTypeFunctionCall, schemas.ResponsesMessageTypeFileSearchCall, schemas.ResponsesMessageTypeComputerCall, schemas.ResponsesMessageTypeWebSearchCall, schemas.ResponsesMessageTypeWebFetchCall, schemas.ResponsesMessageTypeCodeInterpreterCall, schemas.ResponsesMessageTypeLocalShellCall, schemas.ResponsesMessageTypeMCPCall, schemas.ResponsesMessageTypeCustomToolCall, schemas.ResponsesMessageTypeImageGenerationCall, schemas.ResponsesMessageTypeMCPListTools, schemas.ResponsesMessageTypeMCPApprovalRequest: hasAssistantTurn = true } } } if hasAssistantTurn { return 1 } return nonInputItems }