Files
bifrost/core/internal/llmtests/chat_completion_stream.go
Beyhan Oğur 880f412e2c first commit
2026-04-26 21:52:23 +03:00

861 lines
30 KiB
Go

package llmtests
import (
"context"
"fmt"
"os"
"strings"
"testing"
"time"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
)
// chunkTiming tracks the arrival time of each streaming chunk
type chunkTiming struct {
index int
arrivalTime time.Time
timeSincePrev time.Duration
}
// detectBatchedStream checks if chunks arrived in a batched manner rather than streaming individually
// Returns true if streaming appears batched, with an error message
func detectBatchedStream(chunkTimings []chunkTiming, minChunks int) (bool, string) {
// Require at least 20 chunks to detect batching
// Small responses legitimately have few chunks that may arrive quickly
if len(chunkTimings) < 20 {
return false, "" // Not enough data to determine
}
// Check if first-to-second chunk has reasonable delay (TTFT indicator)
// True streaming usually has >1ms between first and second chunk
if len(chunkTimings) >= 2 && chunkTimings[1].timeSincePrev > 50*time.Microsecond {
return false, "" // First chunk delay indicates real streaming
}
var nearInstantCount int
threshold := 50 * time.Microsecond
// Start from index 1 (skip first chunk - no previous reference)
for i := 1; i < len(chunkTimings); i++ {
if chunkTimings[i].timeSincePrev < threshold {
nearInstantCount++
}
}
// This goes off for faster models - so disabling it
// totalIntervals := len(chunkTimings) - 1
// ratio := float64(nearInstantCount) / float64(totalIntervals)
// // Threshold: >80% of chunks arriving near-instantly indicates batching
// if ratio > 0.8 {
// return true, fmt.Sprintf(
// "chunks appear batched: %d/%d (%.0f%%) arrived within %v of each other",
// nearInstantCount, totalIntervals, ratio*100, threshold,
// )
// }
return false, ""
}
// RunChatCompletionStreamTest executes the chat completion stream test scenario
func RunChatCompletionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
if !testConfig.Scenarios.CompletionStream {
t.Logf("Chat completion stream not supported for provider %s", testConfig.Provider)
return
}
t.Run("ChatCompletionStream", func(t *testing.T) {
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
t.Parallel()
}
messages := []schemas.ChatMessage{
CreateBasicChatMessage("Tell me a short story about a robot learning to paint the city which has the eiffel tower. Keep it under 200 words and include the city's name."),
}
request := &schemas.BifrostChatRequest{
Provider: testConfig.Provider,
Model: testConfig.ChatModel,
Input: messages,
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(150),
},
Fallbacks: testConfig.Fallbacks,
}
// Use retry framework for stream requests
retryConfig := StreamingRetryConfig()
retryContext := TestRetryContext{
ScenarioName: "ChatCompletionStream",
ExpectedBehavior: map[string]interface{}{
"should_stream_content": true,
"should_tell_story": true,
"topic": "robot painting",
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.ChatModel,
},
}
// Use proper streaming retry wrapper for the stream request
responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.ChatCompletionStreamRequest(bfCtx, request)
})
// Enhanced error handling
RequireNoError(t, err, "Chat completion stream request failed")
if responseChannel == nil {
t.Fatal("Response channel should not be nil")
}
var fullContent strings.Builder
var responseCount int
var lastResponse *schemas.BifrostStreamChunk
// Chunk timing tracking for batch detection
var chunkTimings []chunkTiming
var lastChunkTime time.Time
// Create a timeout context for the stream reading
streamCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
t.Logf("📡 Starting to read streaming response...")
// Read streaming responses
for {
select {
case response, ok := <-responseChannel:
if !ok {
// Channel closed, streaming completed
t.Logf("✅ Streaming completed. Total chunks received: %d", responseCount)
goto streamComplete
}
if response == nil {
t.Fatal("Streaming response should not be nil")
}
// Record chunk timing
now := time.Now()
var timeSincePrev time.Duration
if responseCount > 0 {
timeSincePrev = now.Sub(lastChunkTime)
}
chunkTimings = append(chunkTimings, chunkTiming{
index: responseCount,
arrivalTime: now,
timeSincePrev: timeSincePrev,
})
lastChunkTime = now
lastResponse = DeepCopyBifrostStreamChunk(response)
// Basic validation of streaming response structure
if response.BifrostChatResponse != nil {
if response.BifrostChatResponse.ExtraFields.Provider != testConfig.Provider {
t.Logf("⚠️ Warning: Provider mismatch - expected %s, got %s", testConfig.Provider, response.BifrostChatResponse.ExtraFields.Provider)
}
if response.BifrostChatResponse.ID == "" {
t.Logf("⚠️ Warning: Response ID is empty")
}
// Per-chunk Object validation: bifrost normalizes every streaming chunk
// to the OpenAI shape with Object="chat.completion.chunk", whether the
// upstream provider natively emits it (OpenAI family) or bifrost
// synthesizes it during translation (e.g., Anthropic's type-keyed events).
// A missing/wrong Object here indicates a provider translation regression.
if response.BifrostChatResponse.Object != "chat.completion.chunk" {
t.Errorf("Chunk %d: Object field must be 'chat.completion.chunk', got %q", responseCount+1, response.BifrostChatResponse.Object)
}
// Log latency for each chunk (can be 0 for inter-chunks)
t.Logf("📊 Chunk %d latency: %d ms", responseCount+1, response.BifrostChatResponse.ExtraFields.Latency)
// Process each choice in the response
for _, choice := range response.BifrostChatResponse.Choices {
// Validate that this is a stream response
if choice.ChatStreamResponseChoice == nil {
t.Logf("⚠️ Warning: Stream response choice is nil for choice %d", choice.Index)
continue
}
if choice.ChatNonStreamResponseChoice != nil {
t.Logf("⚠️ Warning: Non-stream response choice should be nil in streaming response")
}
// Get content from delta
if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil {
delta := choice.ChatStreamResponseChoice.Delta
if delta.Content != nil {
fullContent.WriteString(*delta.Content)
}
// Log role if present (usually in first chunk)
if delta.Role != nil {
t.Logf("🤖 Role: %s", *delta.Role)
}
// Check finish reason if present
if choice.FinishReason != nil {
t.Logf("🏁 Finish reason: %s", *choice.FinishReason)
}
}
}
}
responseCount++
// Safety check to prevent infinite loops in case of issues
if responseCount > 500 {
t.Fatal("Received too many streaming chunks, something might be wrong")
}
case <-streamCtx.Done():
t.Fatal("Timeout waiting for streaming response")
}
}
streamComplete:
// Check for batched streaming
if isBatched, batchMsg := detectBatchedStream(chunkTimings, 5); isBatched {
t.Fatalf("❌ Streaming validation failed: %s", batchMsg)
}
// Validate final streaming response
finalContent := strings.TrimSpace(fullContent.String())
// Create a consolidated response for validation
consolidatedResponse := &schemas.BifrostChatResponse{
Choices: []schemas.BifrostResponseChoice{
{
Index: 0,
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: &finalContent,
},
},
},
},
},
ExtraFields: schemas.BifrostResponseExtraFields{
Provider: testConfig.Provider,
},
}
// Copy usage and other metadata from last response if available
if lastResponse != nil && lastResponse.BifrostChatResponse != nil {
consolidatedResponse.Usage = lastResponse.BifrostChatResponse.Usage
consolidatedResponse.Model = lastResponse.BifrostChatResponse.Model
consolidatedResponse.ID = lastResponse.BifrostChatResponse.ID
consolidatedResponse.Created = lastResponse.BifrostChatResponse.Created
// Copy finish reason from last choice if available
if len(lastResponse.BifrostChatResponse.Choices) > 0 && lastResponse.BifrostChatResponse.Choices[0].FinishReason != nil {
consolidatedResponse.Choices[0].FinishReason = lastResponse.BifrostChatResponse.Choices[0].FinishReason
}
consolidatedResponse.ExtraFields = lastResponse.BifrostChatResponse.ExtraFields
}
// Enhanced validation expectations for streaming
expectations := GetExpectationsForScenario("ChatCompletionStream", testConfig, map[string]interface{}{})
expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)
expectations.ShouldContainAnyOf = append(expectations.ShouldContainAnyOf, []string{"paris"}...) // Should include story elements // Reasonable upper bound
// Validate the consolidated streaming response
validationResult := ValidateChatResponse(t, consolidatedResponse, nil, expectations, "ChatCompletionStream")
// Basic streaming validation
if responseCount == 0 {
t.Fatal("Should receive at least one streaming response")
}
if finalContent == "" {
t.Fatal("Final content should not be empty")
}
if len(finalContent) < 10 {
t.Fatal("Final content should be substantial")
}
if !validationResult.Passed {
t.Fatalf("❌ Streaming validation failed: %v", validationResult.Errors)
}
t.Logf("📊 Streaming metrics: %d chunks, %d chars", responseCount, len(finalContent))
t.Logf("✅ Streaming test completed successfully")
t.Logf("📝 Final content (%d chars)", len(finalContent))
})
// Test streaming with tool calls if supported
if testConfig.Scenarios.ToolCalls {
t.Run("ChatCompletionStreamWithTools", func(t *testing.T) {
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
t.Parallel()
}
messages := []schemas.ChatMessage{
CreateBasicChatMessage("What's the weather like in San Francisco in celsius? Please use the get_weather function."),
}
tool := GetSampleChatTool(SampleToolTypeWeather)
request := &schemas.BifrostChatRequest{
Provider: testConfig.Provider,
Model: testConfig.ChatModel,
Input: messages,
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(150),
Tools: []schemas.ChatTool{*tool},
},
Fallbacks: testConfig.Fallbacks,
}
// Use retry framework for stream requests with tools
retryConfig := StreamingRetryConfig()
retryContext := TestRetryContext{
ScenarioName: "ChatCompletionStreamWithTools",
ExpectedBehavior: map[string]interface{}{
"should_stream_content": true,
"should_have_tool_calls": true,
"tool_name": "get_weather",
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.ChatModel,
"tools": true,
},
}
// Use validation retry wrapper that includes stream reading and validation
validationResult := WithChatStreamValidationRetry(
t,
retryConfig,
retryContext,
func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.ChatCompletionStreamRequest(bfCtx, request)
},
func(responseChannel chan *schemas.BifrostStreamChunk) ChatStreamValidationResult {
var toolCallDetected bool
var responseCount int
var streamErrors []string
// Chunk timing tracking for batch detection
var chunkTimings []chunkTiming
var lastChunkTime time.Time
streamCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
t.Logf("🔧 Testing streaming with tool calls...")
for {
select {
case response, ok := <-responseChannel:
if !ok {
goto toolStreamComplete
}
if response == nil || response.BifrostChatResponse == nil {
streamErrors = append(streamErrors, "❌ Streaming response should not be nil")
continue
}
// Record chunk timing
now := time.Now()
var timeSincePrev time.Duration
if responseCount > 0 {
timeSincePrev = now.Sub(lastChunkTime)
}
chunkTimings = append(chunkTimings, chunkTiming{
index: responseCount,
arrivalTime: now,
timeSincePrev: timeSincePrev,
})
lastChunkTime = now
responseCount++
if response.BifrostChatResponse.Choices != nil {
for _, choice := range response.BifrostChatResponse.Choices {
if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil {
delta := choice.ChatStreamResponseChoice.Delta
// Check for tool calls in delta
if len(delta.ToolCalls) > 0 {
toolCallDetected = true
t.Logf("🔧 Tool call detected in streaming response")
for _, toolCall := range delta.ToolCalls {
if toolCall.Function.Name != nil {
t.Logf("🔧 Tool: %s", *toolCall.Function.Name)
if toolCall.Function.Arguments != "" {
t.Logf("🔧 Args: %s", toolCall.Function.Arguments)
}
}
}
}
}
}
}
if responseCount > 100 {
goto toolStreamComplete
}
case <-streamCtx.Done():
streamErrors = append(streamErrors, "❌ Timeout waiting for streaming response with tools")
goto toolStreamComplete
}
}
toolStreamComplete:
var errors []string
if responseCount == 0 {
errors = append(errors, "❌ Should receive at least one streaming response")
}
if !toolCallDetected {
errors = append(errors, fmt.Sprintf("❌ Should detect tool calls in streaming response (received %d chunks but no tool calls)", responseCount))
}
// Check for batched streaming
if isBatched, batchMsg := detectBatchedStream(chunkTimings, 5); isBatched {
errors = append(errors, fmt.Sprintf("❌ Streaming validation failed: %s", batchMsg))
}
if len(streamErrors) > 0 {
errors = append(errors, streamErrors...)
}
return ChatStreamValidationResult{
Passed: len(errors) == 0,
Errors: errors,
ReceivedData: responseCount > 0,
StreamErrors: streamErrors,
ToolCallDetected: toolCallDetected,
ResponseCount: responseCount,
}
},
)
// Check validation result
if !validationResult.Passed {
allErrors := append(validationResult.Errors, validationResult.StreamErrors...)
t.Fatalf("❌ Chat completion stream with tools validation failed after retries: %s", strings.Join(allErrors, "; "))
}
if validationResult.ResponseCount == 0 {
t.Fatalf("❌ Should receive at least one streaming response")
}
if !validationResult.ToolCallDetected {
t.Fatalf("❌ Should detect tool calls in streaming response (received %d chunks but no tool calls)", validationResult.ResponseCount)
}
t.Logf("✅ Streaming with tools test completed successfully")
})
}
// Test chat completion streaming with reasoning if supported
if testConfig.Scenarios.Reasoning && testConfig.ReasoningModel != "" {
t.Run("ChatCompletionStreamWithReasoning", func(t *testing.T) {
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
t.Parallel()
}
problemPrompt := "Solve this step by step: If a train leaves station A at 2 PM traveling at 60 mph, and another train leaves station B at 3 PM traveling at 80 mph toward station A, and the stations are 420 miles apart, when will they meet?"
messages := []schemas.ChatMessage{
CreateBasicChatMessage(problemPrompt),
}
request := &schemas.BifrostChatRequest{
Provider: testConfig.Provider,
Model: testConfig.ReasoningModel,
Input: messages,
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(1800),
Reasoning: &schemas.ChatReasoning{
Effort: bifrost.Ptr("high"),
MaxTokens: bifrost.Ptr(1500),
},
},
Fallbacks: testConfig.Fallbacks,
}
// Use retry framework for stream requests with reasoning
retryConfig := StreamingRetryConfig()
retryContext := TestRetryContext{
ScenarioName: "ChatCompletionStreamWithReasoning",
ExpectedBehavior: map[string]interface{}{
"should_stream_reasoning": true,
"should_have_reasoning_events": true,
"problem_type": "mathematical",
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.ReasoningModel,
"reasoning": true,
},
}
// Use proper streaming retry wrapper for the stream request
responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.ChatCompletionStreamRequest(bfCtx, request)
})
RequireNoError(t, err, "Chat completion stream with reasoning failed")
if responseChannel == nil {
t.Fatal("Response channel should not be nil")
}
var reasoningDetected bool
var reasoningDetailsDetected bool
var reasoningTokensDetected bool
var responseCount int
// Chunk timing tracking for batch detection
var chunkTimings []chunkTiming
var lastChunkTime time.Time
streamCtx, cancel := context.WithTimeout(ctx, 200*time.Second)
defer cancel()
t.Logf("🧠 Testing chat completion streaming with reasoning...")
for {
select {
case response, ok := <-responseChannel:
if !ok {
goto reasoningStreamComplete
}
if response == nil {
t.Fatal("Streaming response should not be nil")
}
// Record chunk timing
now := time.Now()
var timeSincePrev time.Duration
if responseCount > 0 {
timeSincePrev = now.Sub(lastChunkTime)
}
chunkTimings = append(chunkTimings, chunkTiming{
index: responseCount,
arrivalTime: now,
timeSincePrev: timeSincePrev,
})
lastChunkTime = now
responseCount++
if response.BifrostChatResponse != nil {
chatResp := response.BifrostChatResponse
// Check for reasoning in choices
if len(chatResp.Choices) > 0 {
for _, choice := range chatResp.Choices {
if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil {
delta := choice.ChatStreamResponseChoice.Delta
// Check for reasoning content in delta
if delta.Reasoning != nil && *delta.Reasoning != "" {
reasoningDetected = true
t.Logf("🧠 Reasoning content detected: %q", *delta.Reasoning)
}
// Check for reasoning details in delta
if len(delta.ReasoningDetails) > 0 {
reasoningDetailsDetected = true
t.Logf("🧠 Reasoning details detected: %d entries", len(delta.ReasoningDetails))
for _, detail := range delta.ReasoningDetails {
t.Logf(" - Type: %s, Index: %d", detail.Type, detail.Index)
switch detail.Type {
case schemas.BifrostReasoningDetailsTypeText:
if detail.Text != nil && *detail.Text != "" {
maxLen := 100
text := *detail.Text
if len(text) < maxLen {
maxLen = len(text)
}
t.Logf(" Text preview: %q", text[:maxLen])
}
case schemas.BifrostReasoningDetailsTypeSummary:
if detail.Summary != nil {
t.Logf(" Summary length: %d", len(*detail.Summary))
}
case schemas.BifrostReasoningDetailsTypeEncrypted:
if detail.Data != nil {
t.Logf(" Encrypted data length: %d", len(*detail.Data))
}
}
}
}
}
}
}
// Check for reasoning tokens in usage (usually in final chunk)
if chatResp.Usage != nil && chatResp.Usage.CompletionTokensDetails != nil {
if chatResp.Usage.CompletionTokensDetails.ReasoningTokens > 0 {
reasoningTokensDetected = true
t.Logf("🔢 Reasoning tokens used: %d", chatResp.Usage.CompletionTokensDetails.ReasoningTokens)
}
}
}
if responseCount > 150 {
goto reasoningStreamComplete
}
case <-streamCtx.Done():
t.Fatal("Timeout waiting for chat completion streaming response with reasoning")
}
}
reasoningStreamComplete:
// Check for batched streaming
if isBatched, batchMsg := detectBatchedStream(chunkTimings, 5); isBatched {
t.Fatalf("❌ Streaming validation failed: %s", batchMsg)
}
if responseCount == 0 {
t.Fatal("Should receive at least one streaming response")
}
// At least one of these should be detected for reasoning
if !reasoningDetected && !reasoningDetailsDetected && !reasoningTokensDetected {
t.Logf("⚠️ Warning: No explicit reasoning indicators found in streaming response")
} else {
t.Logf("✅ Reasoning indicators detected:")
if reasoningDetected {
t.Logf(" - Reasoning content found")
}
if reasoningDetailsDetected {
t.Logf(" - Reasoning details found")
}
if reasoningTokensDetected {
t.Logf(" - Reasoning tokens reported")
}
}
t.Logf("✅ Chat completion streaming with reasoning test completed successfully")
})
// Additional test with full validation and retry support
t.Run("ChatCompletionStreamWithReasoningValidated", func(t *testing.T) {
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
t.Parallel()
}
if testConfig.Provider == schemas.OpenAI || testConfig.Provider == schemas.Groq {
// OpenAI and Groq because reasoning for them in stream is extremely flaky
t.Skip("Skipping ChatCompletionStreamWithReasoningValidated test for OpenAI and Groq")
return
}
problemPrompt := "A farmer has 100 chickens and 50 cows. Each chicken lays 5 eggs per week, and each cow produces 20 liters of milk per day. If the farmer sells eggs for $0.25 each and milk for $1.50 per liter, and it costs $2 per week to feed each chicken and $15 per week to feed each cow, what is the farmer's weekly profit?"
if testConfig.Provider == schemas.Cerebras {
problemPrompt = "Hello how are you, can you search hackernews news regarding maxim ai for me? use your tools for this"
}
messages := []schemas.ChatMessage{
CreateBasicChatMessage(problemPrompt),
}
request := &schemas.BifrostChatRequest{
Provider: testConfig.Provider,
Model: testConfig.ReasoningModel,
Input: messages,
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(1800),
Reasoning: &schemas.ChatReasoning{
Effort: bifrost.Ptr("high"),
MaxTokens: bifrost.Ptr(1500),
},
},
Fallbacks: testConfig.Fallbacks,
}
// Use retry framework for stream requests with reasoning and validation
retryConfig := StreamingRetryConfig()
retryContext := TestRetryContext{
ScenarioName: "ChatCompletionStreamWithReasoningValidated",
ExpectedBehavior: map[string]interface{}{
"should_stream_reasoning": true,
"should_have_reasoning_indicators": true,
"problem_type": "mathematical",
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.ReasoningModel,
"reasoning": true,
"validated": true,
},
}
// Use validation retry wrapper that includes stream reading and validation
validationResult := WithChatStreamValidationRetry(
t,
retryConfig,
retryContext,
func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.ChatCompletionStreamRequest(bfCtx, request)
},
func(responseChannel chan *schemas.BifrostStreamChunk) ChatStreamValidationResult {
var reasoningDetected bool
var reasoningDetailsDetected bool
var reasoningTokensDetected bool
var responseCount int
var streamErrors []string
var fullContent strings.Builder
// Chunk timing tracking for batch detection
var chunkTimings []chunkTiming
var lastChunkTime time.Time
streamCtx, cancel := context.WithTimeout(ctx, 200*time.Second)
defer cancel()
t.Logf("🧠 Testing validated chat completion streaming with reasoning...")
for {
select {
case response, ok := <-responseChannel:
if !ok {
goto validatedReasoningStreamComplete
}
if response == nil {
streamErrors = append(streamErrors, "❌ Streaming response should not be nil")
continue
}
// Record chunk timing
now := time.Now()
var timeSincePrev time.Duration
if responseCount > 0 {
timeSincePrev = now.Sub(lastChunkTime)
}
chunkTimings = append(chunkTimings, chunkTiming{
index: responseCount,
arrivalTime: now,
timeSincePrev: timeSincePrev,
})
lastChunkTime = now
responseCount++
if response.BifrostChatResponse != nil {
chatResp := response.BifrostChatResponse
// Check for reasoning in choices
if len(chatResp.Choices) > 0 {
for _, choice := range chatResp.Choices {
if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil {
delta := choice.ChatStreamResponseChoice.Delta
// Accumulate content
if delta.Content != nil {
fullContent.WriteString(*delta.Content)
t.Logf("📝 Content chunk received (length: %d, total so far: %d)", len(*delta.Content), fullContent.Len())
}
// Check for reasoning content in delta
if delta.Reasoning != nil && *delta.Reasoning != "" {
reasoningDetected = true
t.Logf("🧠 Reasoning content detected (length: %d)", len(*delta.Reasoning))
}
// Check for reasoning details in delta
if len(delta.ReasoningDetails) > 0 {
reasoningDetailsDetected = true
t.Logf("🧠 Reasoning details detected: %d entries", len(delta.ReasoningDetails))
}
}
}
}
// Check for reasoning tokens in usage
if chatResp.Usage != nil && chatResp.Usage.CompletionTokensDetails != nil {
if chatResp.Usage.CompletionTokensDetails.ReasoningTokens > 0 {
reasoningTokensDetected = true
t.Logf("🔢 Reasoning tokens: %d", chatResp.Usage.CompletionTokensDetails.ReasoningTokens)
}
}
}
if responseCount > 150 {
goto validatedReasoningStreamComplete
}
case <-streamCtx.Done():
streamErrors = append(streamErrors, "❌ Timeout waiting for streaming response with reasoning")
goto validatedReasoningStreamComplete
}
}
validatedReasoningStreamComplete:
var errors []string
if responseCount == 0 {
errors = append(errors, "❌ Should receive at least one streaming response")
}
// Check for batched streaming
if isBatched, batchMsg := detectBatchedStream(chunkTimings, 5); isBatched {
errors = append(errors, fmt.Sprintf("❌ Streaming validation failed: %s", batchMsg))
}
// Check if at least one reasoning indicator is present
hasAnyReasoningIndicator := reasoningDetected || reasoningDetailsDetected || reasoningTokensDetected
if !hasAnyReasoningIndicator {
errors = append(errors, fmt.Sprintf("❌ No reasoning indicators found in streaming response (received %d chunks)", responseCount))
}
// Check content - for reasoning models, content may come after reasoning or may not be present
// If reasoning is detected, we consider it a valid response even without content
content := strings.TrimSpace(fullContent.String())
if content == "" && !hasAnyReasoningIndicator {
// Only require content if no reasoning indicators were found
errors = append(errors, "❌ No content received in streaming response and no reasoning indicators found")
} else if content == "" && hasAnyReasoningIndicator {
// Log a warning but don't fail if reasoning is present
t.Logf("⚠️ Warning: Reasoning detected but no content chunks received (this may be expected for some reasoning models)")
}
if len(streamErrors) > 0 {
errors = append(errors, streamErrors...)
}
return ChatStreamValidationResult{
Passed: len(errors) == 0,
Errors: errors,
ReceivedData: responseCount > 0 && (content != "" || hasAnyReasoningIndicator),
StreamErrors: streamErrors,
ToolCallDetected: false, // Not testing tool calls here
ResponseCount: responseCount,
}
},
)
// Check validation result
if !validationResult.Passed {
allErrors := append(validationResult.Errors, validationResult.StreamErrors...)
t.Fatalf("❌ Chat completion stream with reasoning validation failed after retries: %s", strings.Join(allErrors, "; "))
}
if validationResult.ResponseCount == 0 {
t.Fatalf("❌ Should receive at least one streaming response")
}
t.Logf("✅ Validated chat completion streaming with reasoning test completed successfully")
})
}
}