package llmtests import ( "context" "fmt" "os" "strings" "testing" "time" bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" ) // chunkTiming tracks the arrival time of each streaming chunk type chunkTiming struct { index int arrivalTime time.Time timeSincePrev time.Duration } // detectBatchedStream checks if chunks arrived in a batched manner rather than streaming individually // Returns true if streaming appears batched, with an error message func detectBatchedStream(chunkTimings []chunkTiming, minChunks int) (bool, string) { // Require at least 20 chunks to detect batching // Small responses legitimately have few chunks that may arrive quickly if len(chunkTimings) < 20 { return false, "" // Not enough data to determine } // Check if first-to-second chunk has reasonable delay (TTFT indicator) // True streaming usually has >1ms between first and second chunk if len(chunkTimings) >= 2 && chunkTimings[1].timeSincePrev > 50*time.Microsecond { return false, "" // First chunk delay indicates real streaming } var nearInstantCount int threshold := 50 * time.Microsecond // Start from index 1 (skip first chunk - no previous reference) for i := 1; i < len(chunkTimings); i++ { if chunkTimings[i].timeSincePrev < threshold { nearInstantCount++ } } // This goes off for faster models - so disabling it // totalIntervals := len(chunkTimings) - 1 // ratio := float64(nearInstantCount) / float64(totalIntervals) // // Threshold: >80% of chunks arriving near-instantly indicates batching // if ratio > 0.8 { // return true, fmt.Sprintf( // "chunks appear batched: %d/%d (%.0f%%) arrived within %v of each other", // nearInstantCount, totalIntervals, ratio*100, threshold, // ) // } return false, "" } // RunChatCompletionStreamTest executes the chat completion stream test scenario func RunChatCompletionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) { if !testConfig.Scenarios.CompletionStream { t.Logf("Chat completion stream not supported for provider %s", testConfig.Provider) return } t.Run("ChatCompletionStream", func(t *testing.T) { if os.Getenv("SKIP_PARALLEL_TESTS") != "true" { t.Parallel() } messages := []schemas.ChatMessage{ CreateBasicChatMessage("Tell me a short story about a robot learning to paint the city which has the eiffel tower. Keep it under 200 words and include the city's name."), } request := &schemas.BifrostChatRequest{ Provider: testConfig.Provider, Model: testConfig.ChatModel, Input: messages, Params: &schemas.ChatParameters{ MaxCompletionTokens: bifrost.Ptr(150), }, Fallbacks: testConfig.Fallbacks, } // Use retry framework for stream requests retryConfig := StreamingRetryConfig() retryContext := TestRetryContext{ ScenarioName: "ChatCompletionStream", ExpectedBehavior: map[string]interface{}{ "should_stream_content": true, "should_tell_story": true, "topic": "robot painting", }, TestMetadata: map[string]interface{}{ "provider": testConfig.Provider, "model": testConfig.ChatModel, }, } // Use proper streaming retry wrapper for the stream request responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) { bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) return client.ChatCompletionStreamRequest(bfCtx, request) }) // Enhanced error handling RequireNoError(t, err, "Chat completion stream request failed") if responseChannel == nil { t.Fatal("Response channel should not be nil") } var fullContent strings.Builder var responseCount int var lastResponse *schemas.BifrostStreamChunk // Chunk timing tracking for batch detection var chunkTimings []chunkTiming var lastChunkTime time.Time // Create a timeout context for the stream reading streamCtx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() t.Logf("📡 Starting to read streaming response...") // Read streaming responses for { select { case response, ok := <-responseChannel: if !ok { // Channel closed, streaming completed t.Logf("✅ Streaming completed. Total chunks received: %d", responseCount) goto streamComplete } if response == nil { t.Fatal("Streaming response should not be nil") } // Record chunk timing now := time.Now() var timeSincePrev time.Duration if responseCount > 0 { timeSincePrev = now.Sub(lastChunkTime) } chunkTimings = append(chunkTimings, chunkTiming{ index: responseCount, arrivalTime: now, timeSincePrev: timeSincePrev, }) lastChunkTime = now lastResponse = DeepCopyBifrostStreamChunk(response) // Basic validation of streaming response structure if response.BifrostChatResponse != nil { if response.BifrostChatResponse.ExtraFields.Provider != testConfig.Provider { t.Logf("⚠️ Warning: Provider mismatch - expected %s, got %s", testConfig.Provider, response.BifrostChatResponse.ExtraFields.Provider) } if response.BifrostChatResponse.ID == "" { t.Logf("⚠️ Warning: Response ID is empty") } // Per-chunk Object validation: bifrost normalizes every streaming chunk // to the OpenAI shape with Object="chat.completion.chunk", whether the // upstream provider natively emits it (OpenAI family) or bifrost // synthesizes it during translation (e.g., Anthropic's type-keyed events). // A missing/wrong Object here indicates a provider translation regression. if response.BifrostChatResponse.Object != "chat.completion.chunk" { t.Errorf("Chunk %d: Object field must be 'chat.completion.chunk', got %q", responseCount+1, response.BifrostChatResponse.Object) } // Log latency for each chunk (can be 0 for inter-chunks) t.Logf("📊 Chunk %d latency: %d ms", responseCount+1, response.BifrostChatResponse.ExtraFields.Latency) // Process each choice in the response for _, choice := range response.BifrostChatResponse.Choices { // Validate that this is a stream response if choice.ChatStreamResponseChoice == nil { t.Logf("⚠️ Warning: Stream response choice is nil for choice %d", choice.Index) continue } if choice.ChatNonStreamResponseChoice != nil { t.Logf("⚠️ Warning: Non-stream response choice should be nil in streaming response") } // Get content from delta if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil { delta := choice.ChatStreamResponseChoice.Delta if delta.Content != nil { fullContent.WriteString(*delta.Content) } // Log role if present (usually in first chunk) if delta.Role != nil { t.Logf("🤖 Role: %s", *delta.Role) } // Check finish reason if present if choice.FinishReason != nil { t.Logf("🏁 Finish reason: %s", *choice.FinishReason) } } } } responseCount++ // Safety check to prevent infinite loops in case of issues if responseCount > 500 { t.Fatal("Received too many streaming chunks, something might be wrong") } case <-streamCtx.Done(): t.Fatal("Timeout waiting for streaming response") } } streamComplete: // Check for batched streaming if isBatched, batchMsg := detectBatchedStream(chunkTimings, 5); isBatched { t.Fatalf("❌ Streaming validation failed: %s", batchMsg) } // Validate final streaming response finalContent := strings.TrimSpace(fullContent.String()) // Create a consolidated response for validation consolidatedResponse := &schemas.BifrostChatResponse{ Choices: []schemas.BifrostResponseChoice{ { Index: 0, ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ Message: &schemas.ChatMessage{ Role: schemas.ChatMessageRoleAssistant, Content: &schemas.ChatMessageContent{ ContentStr: &finalContent, }, }, }, }, }, ExtraFields: schemas.BifrostResponseExtraFields{ Provider: testConfig.Provider, }, } // Copy usage and other metadata from last response if available if lastResponse != nil && lastResponse.BifrostChatResponse != nil { consolidatedResponse.Usage = lastResponse.BifrostChatResponse.Usage consolidatedResponse.Model = lastResponse.BifrostChatResponse.Model consolidatedResponse.ID = lastResponse.BifrostChatResponse.ID consolidatedResponse.Created = lastResponse.BifrostChatResponse.Created // Copy finish reason from last choice if available if len(lastResponse.BifrostChatResponse.Choices) > 0 && lastResponse.BifrostChatResponse.Choices[0].FinishReason != nil { consolidatedResponse.Choices[0].FinishReason = lastResponse.BifrostChatResponse.Choices[0].FinishReason } consolidatedResponse.ExtraFields = lastResponse.BifrostChatResponse.ExtraFields } // Enhanced validation expectations for streaming expectations := GetExpectationsForScenario("ChatCompletionStream", testConfig, map[string]interface{}{}) expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider) expectations.ShouldContainAnyOf = append(expectations.ShouldContainAnyOf, []string{"paris"}...) // Should include story elements // Reasonable upper bound // Validate the consolidated streaming response validationResult := ValidateChatResponse(t, consolidatedResponse, nil, expectations, "ChatCompletionStream") // Basic streaming validation if responseCount == 0 { t.Fatal("Should receive at least one streaming response") } if finalContent == "" { t.Fatal("Final content should not be empty") } if len(finalContent) < 10 { t.Fatal("Final content should be substantial") } if !validationResult.Passed { t.Fatalf("❌ Streaming validation failed: %v", validationResult.Errors) } t.Logf("📊 Streaming metrics: %d chunks, %d chars", responseCount, len(finalContent)) t.Logf("✅ Streaming test completed successfully") t.Logf("📝 Final content (%d chars)", len(finalContent)) }) // Test streaming with tool calls if supported if testConfig.Scenarios.ToolCalls { t.Run("ChatCompletionStreamWithTools", func(t *testing.T) { if os.Getenv("SKIP_PARALLEL_TESTS") != "true" { t.Parallel() } messages := []schemas.ChatMessage{ CreateBasicChatMessage("What's the weather like in San Francisco in celsius? Please use the get_weather function."), } tool := GetSampleChatTool(SampleToolTypeWeather) request := &schemas.BifrostChatRequest{ Provider: testConfig.Provider, Model: testConfig.ChatModel, Input: messages, Params: &schemas.ChatParameters{ MaxCompletionTokens: bifrost.Ptr(150), Tools: []schemas.ChatTool{*tool}, }, Fallbacks: testConfig.Fallbacks, } // Use retry framework for stream requests with tools retryConfig := StreamingRetryConfig() retryContext := TestRetryContext{ ScenarioName: "ChatCompletionStreamWithTools", ExpectedBehavior: map[string]interface{}{ "should_stream_content": true, "should_have_tool_calls": true, "tool_name": "get_weather", }, TestMetadata: map[string]interface{}{ "provider": testConfig.Provider, "model": testConfig.ChatModel, "tools": true, }, } // Use validation retry wrapper that includes stream reading and validation validationResult := WithChatStreamValidationRetry( t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) { bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) return client.ChatCompletionStreamRequest(bfCtx, request) }, func(responseChannel chan *schemas.BifrostStreamChunk) ChatStreamValidationResult { var toolCallDetected bool var responseCount int var streamErrors []string // Chunk timing tracking for batch detection var chunkTimings []chunkTiming var lastChunkTime time.Time streamCtx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() t.Logf("🔧 Testing streaming with tool calls...") for { select { case response, ok := <-responseChannel: if !ok { goto toolStreamComplete } if response == nil || response.BifrostChatResponse == nil { streamErrors = append(streamErrors, "❌ Streaming response should not be nil") continue } // Record chunk timing now := time.Now() var timeSincePrev time.Duration if responseCount > 0 { timeSincePrev = now.Sub(lastChunkTime) } chunkTimings = append(chunkTimings, chunkTiming{ index: responseCount, arrivalTime: now, timeSincePrev: timeSincePrev, }) lastChunkTime = now responseCount++ if response.BifrostChatResponse.Choices != nil { for _, choice := range response.BifrostChatResponse.Choices { if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil { delta := choice.ChatStreamResponseChoice.Delta // Check for tool calls in delta if len(delta.ToolCalls) > 0 { toolCallDetected = true t.Logf("🔧 Tool call detected in streaming response") for _, toolCall := range delta.ToolCalls { if toolCall.Function.Name != nil { t.Logf("🔧 Tool: %s", *toolCall.Function.Name) if toolCall.Function.Arguments != "" { t.Logf("🔧 Args: %s", toolCall.Function.Arguments) } } } } } } } if responseCount > 100 { goto toolStreamComplete } case <-streamCtx.Done(): streamErrors = append(streamErrors, "❌ Timeout waiting for streaming response with tools") goto toolStreamComplete } } toolStreamComplete: var errors []string if responseCount == 0 { errors = append(errors, "❌ Should receive at least one streaming response") } if !toolCallDetected { errors = append(errors, fmt.Sprintf("❌ Should detect tool calls in streaming response (received %d chunks but no tool calls)", responseCount)) } // Check for batched streaming if isBatched, batchMsg := detectBatchedStream(chunkTimings, 5); isBatched { errors = append(errors, fmt.Sprintf("❌ Streaming validation failed: %s", batchMsg)) } if len(streamErrors) > 0 { errors = append(errors, streamErrors...) } return ChatStreamValidationResult{ Passed: len(errors) == 0, Errors: errors, ReceivedData: responseCount > 0, StreamErrors: streamErrors, ToolCallDetected: toolCallDetected, ResponseCount: responseCount, } }, ) // Check validation result if !validationResult.Passed { allErrors := append(validationResult.Errors, validationResult.StreamErrors...) t.Fatalf("❌ Chat completion stream with tools validation failed after retries: %s", strings.Join(allErrors, "; ")) } if validationResult.ResponseCount == 0 { t.Fatalf("❌ Should receive at least one streaming response") } if !validationResult.ToolCallDetected { t.Fatalf("❌ Should detect tool calls in streaming response (received %d chunks but no tool calls)", validationResult.ResponseCount) } t.Logf("✅ Streaming with tools test completed successfully") }) } // Test chat completion streaming with reasoning if supported if testConfig.Scenarios.Reasoning && testConfig.ReasoningModel != "" { t.Run("ChatCompletionStreamWithReasoning", func(t *testing.T) { if os.Getenv("SKIP_PARALLEL_TESTS") != "true" { t.Parallel() } problemPrompt := "Solve this step by step: If a train leaves station A at 2 PM traveling at 60 mph, and another train leaves station B at 3 PM traveling at 80 mph toward station A, and the stations are 420 miles apart, when will they meet?" messages := []schemas.ChatMessage{ CreateBasicChatMessage(problemPrompt), } request := &schemas.BifrostChatRequest{ Provider: testConfig.Provider, Model: testConfig.ReasoningModel, Input: messages, Params: &schemas.ChatParameters{ MaxCompletionTokens: bifrost.Ptr(1800), Reasoning: &schemas.ChatReasoning{ Effort: bifrost.Ptr("high"), MaxTokens: bifrost.Ptr(1500), }, }, Fallbacks: testConfig.Fallbacks, } // Use retry framework for stream requests with reasoning retryConfig := StreamingRetryConfig() retryContext := TestRetryContext{ ScenarioName: "ChatCompletionStreamWithReasoning", ExpectedBehavior: map[string]interface{}{ "should_stream_reasoning": true, "should_have_reasoning_events": true, "problem_type": "mathematical", }, TestMetadata: map[string]interface{}{ "provider": testConfig.Provider, "model": testConfig.ReasoningModel, "reasoning": true, }, } // Use proper streaming retry wrapper for the stream request responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) { bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) return client.ChatCompletionStreamRequest(bfCtx, request) }) RequireNoError(t, err, "Chat completion stream with reasoning failed") if responseChannel == nil { t.Fatal("Response channel should not be nil") } var reasoningDetected bool var reasoningDetailsDetected bool var reasoningTokensDetected bool var responseCount int // Chunk timing tracking for batch detection var chunkTimings []chunkTiming var lastChunkTime time.Time streamCtx, cancel := context.WithTimeout(ctx, 200*time.Second) defer cancel() t.Logf("🧠 Testing chat completion streaming with reasoning...") for { select { case response, ok := <-responseChannel: if !ok { goto reasoningStreamComplete } if response == nil { t.Fatal("Streaming response should not be nil") } // Record chunk timing now := time.Now() var timeSincePrev time.Duration if responseCount > 0 { timeSincePrev = now.Sub(lastChunkTime) } chunkTimings = append(chunkTimings, chunkTiming{ index: responseCount, arrivalTime: now, timeSincePrev: timeSincePrev, }) lastChunkTime = now responseCount++ if response.BifrostChatResponse != nil { chatResp := response.BifrostChatResponse // Check for reasoning in choices if len(chatResp.Choices) > 0 { for _, choice := range chatResp.Choices { if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil { delta := choice.ChatStreamResponseChoice.Delta // Check for reasoning content in delta if delta.Reasoning != nil && *delta.Reasoning != "" { reasoningDetected = true t.Logf("🧠 Reasoning content detected: %q", *delta.Reasoning) } // Check for reasoning details in delta if len(delta.ReasoningDetails) > 0 { reasoningDetailsDetected = true t.Logf("🧠 Reasoning details detected: %d entries", len(delta.ReasoningDetails)) for _, detail := range delta.ReasoningDetails { t.Logf(" - Type: %s, Index: %d", detail.Type, detail.Index) switch detail.Type { case schemas.BifrostReasoningDetailsTypeText: if detail.Text != nil && *detail.Text != "" { maxLen := 100 text := *detail.Text if len(text) < maxLen { maxLen = len(text) } t.Logf(" Text preview: %q", text[:maxLen]) } case schemas.BifrostReasoningDetailsTypeSummary: if detail.Summary != nil { t.Logf(" Summary length: %d", len(*detail.Summary)) } case schemas.BifrostReasoningDetailsTypeEncrypted: if detail.Data != nil { t.Logf(" Encrypted data length: %d", len(*detail.Data)) } } } } } } } // Check for reasoning tokens in usage (usually in final chunk) if chatResp.Usage != nil && chatResp.Usage.CompletionTokensDetails != nil { if chatResp.Usage.CompletionTokensDetails.ReasoningTokens > 0 { reasoningTokensDetected = true t.Logf("🔢 Reasoning tokens used: %d", chatResp.Usage.CompletionTokensDetails.ReasoningTokens) } } } if responseCount > 150 { goto reasoningStreamComplete } case <-streamCtx.Done(): t.Fatal("Timeout waiting for chat completion streaming response with reasoning") } } reasoningStreamComplete: // Check for batched streaming if isBatched, batchMsg := detectBatchedStream(chunkTimings, 5); isBatched { t.Fatalf("❌ Streaming validation failed: %s", batchMsg) } if responseCount == 0 { t.Fatal("Should receive at least one streaming response") } // At least one of these should be detected for reasoning if !reasoningDetected && !reasoningDetailsDetected && !reasoningTokensDetected { t.Logf("⚠️ Warning: No explicit reasoning indicators found in streaming response") } else { t.Logf("✅ Reasoning indicators detected:") if reasoningDetected { t.Logf(" - Reasoning content found") } if reasoningDetailsDetected { t.Logf(" - Reasoning details found") } if reasoningTokensDetected { t.Logf(" - Reasoning tokens reported") } } t.Logf("✅ Chat completion streaming with reasoning test completed successfully") }) // Additional test with full validation and retry support t.Run("ChatCompletionStreamWithReasoningValidated", func(t *testing.T) { if os.Getenv("SKIP_PARALLEL_TESTS") != "true" { t.Parallel() } if testConfig.Provider == schemas.OpenAI || testConfig.Provider == schemas.Groq { // OpenAI and Groq because reasoning for them in stream is extremely flaky t.Skip("Skipping ChatCompletionStreamWithReasoningValidated test for OpenAI and Groq") return } problemPrompt := "A farmer has 100 chickens and 50 cows. Each chicken lays 5 eggs per week, and each cow produces 20 liters of milk per day. If the farmer sells eggs for $0.25 each and milk for $1.50 per liter, and it costs $2 per week to feed each chicken and $15 per week to feed each cow, what is the farmer's weekly profit?" if testConfig.Provider == schemas.Cerebras { problemPrompt = "Hello how are you, can you search hackernews news regarding maxim ai for me? use your tools for this" } messages := []schemas.ChatMessage{ CreateBasicChatMessage(problemPrompt), } request := &schemas.BifrostChatRequest{ Provider: testConfig.Provider, Model: testConfig.ReasoningModel, Input: messages, Params: &schemas.ChatParameters{ MaxCompletionTokens: bifrost.Ptr(1800), Reasoning: &schemas.ChatReasoning{ Effort: bifrost.Ptr("high"), MaxTokens: bifrost.Ptr(1500), }, }, Fallbacks: testConfig.Fallbacks, } // Use retry framework for stream requests with reasoning and validation retryConfig := StreamingRetryConfig() retryContext := TestRetryContext{ ScenarioName: "ChatCompletionStreamWithReasoningValidated", ExpectedBehavior: map[string]interface{}{ "should_stream_reasoning": true, "should_have_reasoning_indicators": true, "problem_type": "mathematical", }, TestMetadata: map[string]interface{}{ "provider": testConfig.Provider, "model": testConfig.ReasoningModel, "reasoning": true, "validated": true, }, } // Use validation retry wrapper that includes stream reading and validation validationResult := WithChatStreamValidationRetry( t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) { bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) return client.ChatCompletionStreamRequest(bfCtx, request) }, func(responseChannel chan *schemas.BifrostStreamChunk) ChatStreamValidationResult { var reasoningDetected bool var reasoningDetailsDetected bool var reasoningTokensDetected bool var responseCount int var streamErrors []string var fullContent strings.Builder // Chunk timing tracking for batch detection var chunkTimings []chunkTiming var lastChunkTime time.Time streamCtx, cancel := context.WithTimeout(ctx, 200*time.Second) defer cancel() t.Logf("🧠 Testing validated chat completion streaming with reasoning...") for { select { case response, ok := <-responseChannel: if !ok { goto validatedReasoningStreamComplete } if response == nil { streamErrors = append(streamErrors, "❌ Streaming response should not be nil") continue } // Record chunk timing now := time.Now() var timeSincePrev time.Duration if responseCount > 0 { timeSincePrev = now.Sub(lastChunkTime) } chunkTimings = append(chunkTimings, chunkTiming{ index: responseCount, arrivalTime: now, timeSincePrev: timeSincePrev, }) lastChunkTime = now responseCount++ if response.BifrostChatResponse != nil { chatResp := response.BifrostChatResponse // Check for reasoning in choices if len(chatResp.Choices) > 0 { for _, choice := range chatResp.Choices { if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil { delta := choice.ChatStreamResponseChoice.Delta // Accumulate content if delta.Content != nil { fullContent.WriteString(*delta.Content) t.Logf("📝 Content chunk received (length: %d, total so far: %d)", len(*delta.Content), fullContent.Len()) } // Check for reasoning content in delta if delta.Reasoning != nil && *delta.Reasoning != "" { reasoningDetected = true t.Logf("🧠 Reasoning content detected (length: %d)", len(*delta.Reasoning)) } // Check for reasoning details in delta if len(delta.ReasoningDetails) > 0 { reasoningDetailsDetected = true t.Logf("🧠 Reasoning details detected: %d entries", len(delta.ReasoningDetails)) } } } } // Check for reasoning tokens in usage if chatResp.Usage != nil && chatResp.Usage.CompletionTokensDetails != nil { if chatResp.Usage.CompletionTokensDetails.ReasoningTokens > 0 { reasoningTokensDetected = true t.Logf("🔢 Reasoning tokens: %d", chatResp.Usage.CompletionTokensDetails.ReasoningTokens) } } } if responseCount > 150 { goto validatedReasoningStreamComplete } case <-streamCtx.Done(): streamErrors = append(streamErrors, "❌ Timeout waiting for streaming response with reasoning") goto validatedReasoningStreamComplete } } validatedReasoningStreamComplete: var errors []string if responseCount == 0 { errors = append(errors, "❌ Should receive at least one streaming response") } // Check for batched streaming if isBatched, batchMsg := detectBatchedStream(chunkTimings, 5); isBatched { errors = append(errors, fmt.Sprintf("❌ Streaming validation failed: %s", batchMsg)) } // Check if at least one reasoning indicator is present hasAnyReasoningIndicator := reasoningDetected || reasoningDetailsDetected || reasoningTokensDetected if !hasAnyReasoningIndicator { errors = append(errors, fmt.Sprintf("❌ No reasoning indicators found in streaming response (received %d chunks)", responseCount)) } // Check content - for reasoning models, content may come after reasoning or may not be present // If reasoning is detected, we consider it a valid response even without content content := strings.TrimSpace(fullContent.String()) if content == "" && !hasAnyReasoningIndicator { // Only require content if no reasoning indicators were found errors = append(errors, "❌ No content received in streaming response and no reasoning indicators found") } else if content == "" && hasAnyReasoningIndicator { // Log a warning but don't fail if reasoning is present t.Logf("⚠️ Warning: Reasoning detected but no content chunks received (this may be expected for some reasoning models)") } if len(streamErrors) > 0 { errors = append(errors, streamErrors...) } return ChatStreamValidationResult{ Passed: len(errors) == 0, Errors: errors, ReceivedData: responseCount > 0 && (content != "" || hasAnyReasoningIndicator), StreamErrors: streamErrors, ToolCallDetected: false, // Not testing tool calls here ResponseCount: responseCount, } }, ) // Check validation result if !validationResult.Passed { allErrors := append(validationResult.Errors, validationResult.StreamErrors...) t.Fatalf("❌ Chat completion stream with reasoning validation failed after retries: %s", strings.Join(allErrors, "; ")) } if validationResult.ResponseCount == 0 { t.Fatalf("❌ Should receive at least one streaming response") } t.Logf("✅ Validated chat completion streaming with reasoning test completed successfully") }) } }