bifrost/core/internal/llmtests/structured_outputs.go

package llmtests

import (
	"context"
	"encoding/json"
	"fmt"
	"os"
	"strings"
	"testing"
	"time"

	bifrost "github.com/maximhq/bifrost/core"
	"github.com/maximhq/bifrost/core/schemas"
)

// Test schema with nullable enum and multi-type fields (the problematic cases that were fixed)
var structuredOutputSchema = map[string]interface{}{
	"type": "object",
	"properties": map[string]interface{}{
		"action": map[string]interface{}{
			"type":        "string",
			"enum":        []string{"continue", "transition"},
			"description": "The action to take",
		},
		"target_node_id": map[string]interface{}{
			"type":        []interface{}{"string", "null"},
			"description": "The ID of the node to transition to. Required when action is 'transition', null/empty when action is 'continue'",
			"enum":        []string{"NODE-0", "NODE-1", "NODE-2", ""},
		},
		"priority": map[string]interface{}{
			"type":        []interface{}{"string", "integer"},
			"description": "Priority level - can be a number (1-10) or a string label (low/medium/high)",
			"enum":        []interface{}{"low", "medium", "high", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
		},
		"reason": map[string]interface{}{
			"type":        "string",
			"description": "Explanation for the decision",
		},
	},
	"required":             []string{"action", "target_node_id", "priority", "reason"},
	"additionalProperties": false,
}

// RunStructuredOutputChatTest tests structured outputs with Chat Completions API (non-streaming)
func RunStructuredOutputChatTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
	if !testConfig.Scenarios.StructuredOutputs {
		t.Logf("Structured outputs not supported for provider %s", testConfig.Provider)
		return
	}

	t.Run("StructuredOutputChat", func(t *testing.T) {
		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
			t.Parallel()
		}

		// Test Case 1: target_node_id should have a string value
		t.Run("WithTargetNode", func(t *testing.T) {
			testStructuredOutputChatWithValue(t, client, ctx, testConfig, true)
		})

		// Test Case 2: target_node_id should be null
		t.Run("WithNullTargetNode", func(t *testing.T) {
			testStructuredOutputChatWithValue(t, client, ctx, testConfig, false)
		})
	})
}

func testStructuredOutputChatWithValue(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig, expectValue bool) {
	var chatMessages []schemas.ChatMessage
	if expectValue {
		chatMessages = []schemas.ChatMessage{
			CreateBasicChatMessage("You are a workflow manager. User says: 'Transition to NODE-1'. Analyze this and return: action='transition', target_node_id='NODE-1' (NOT null or empty), and priority as number 5. Provide reasoning."),
		}
	} else {
		chatMessages = []schemas.ChatMessage{
			CreateBasicChatMessage("You are a workflow manager. User says: 'Continue with current task'. Analyze this and return: action='continue', target_node_id=null (must be null, not a string), and priority='medium'. Provide reasoning."),
		}
	}

	// Use retry framework
	retryConfig := GetTestRetryConfigForScenario("StructuredOutputChat", testConfig)
	retryContext := TestRetryContext{
		ScenarioName: "StructuredOutputChat",
		ExpectedBehavior: map[string]interface{}{
			"should_return_valid_json": true,
			"should_match_schema":      true,
		},
		TestMetadata: map[string]interface{}{
			"provider": testConfig.Provider,
			"model":    testConfig.ChatModel,
		},
	}

	chatRetryConfig := ChatRetryConfig{
		MaxAttempts: retryConfig.MaxAttempts,
		BaseDelay:   retryConfig.BaseDelay,
		MaxDelay:    retryConfig.MaxDelay,
		Conditions:  []ChatRetryCondition{},
		OnRetry:     retryConfig.OnRetry,
		OnFinalFail: retryConfig.OnFinalFail,
	}

	chatOperation := func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
		// Add Anthropic beta header for structured outputs if model contains "claude"
		reqCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
		if strings.Contains(strings.ToLower(testConfig.ChatModel), "claude") && testConfig.Provider != schemas.Vertex {
			extraHeaders := map[string][]string{
				"anthropic-beta": {"structured-outputs-2025-11-13"},
			}
			reqCtx.SetValue(schemas.BifrostContextKeyExtraHeaders, extraHeaders)
		}

		chatReq := &schemas.BifrostChatRequest{
			Provider: testConfig.Provider,
			Model:    testConfig.ChatModel,
			Input:    chatMessages,
			Params: &schemas.ChatParameters{
				MaxCompletionTokens: bifrost.Ptr(5000),
				ResponseFormat: func() *interface{} {
					var format interface{} = map[string]interface{}{
						"type": "json_schema",
						"json_schema": map[string]interface{}{
							"name":   "decision_schema",
							"strict": true,
							"schema": structuredOutputSchema,
						},
					}
					return &format
				}(),
			},
			Fallbacks: testConfig.Fallbacks,
		}
		return client.ChatCompletionRequest(reqCtx, chatReq)
	}

	expectations := GetExpectationsForScenario("StructuredOutputChat", testConfig, map[string]interface{}{})
	expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)

	chatResponse, chatError := WithChatTestRetry(t, chatRetryConfig, retryContext, expectations, "StructuredOutputChat", chatOperation)

	if chatError != nil {
		t.Fatalf("❌ Chat Completions API with structured output failed: %s", GetErrorMessage(chatError))
	}

	// Validate the response is valid JSON matching our schema
	if chatResponse != nil {
		content := GetChatContent(chatResponse)
		t.Logf("📝 Structured output response: %s", content)

		// Assert content is non-empty
		if content == "" {
			t.Fatalf("❌ Content should not be empty for structured output")
		}

		// For Bedrock: verify no tool calls leaked through (response_format was properly converted)
		if testConfig.Provider == schemas.Bedrock {
			if len(chatResponse.Choices) > 0 {
				choice := chatResponse.Choices[0]
				if choice.ChatNonStreamResponseChoice != nil && choice.Message != nil && choice.Message.ChatAssistantMessage != nil {
					if len(choice.Message.ChatAssistantMessage.ToolCalls) > 0 {
						t.Fatalf("❌ Bedrock: structured output should not contain tool calls, got %d tool calls", len(choice.Message.ChatAssistantMessage.ToolCalls))
					}
				}
			}
			t.Logf("✅ Bedrock: no tool calls in response (response_format properly converted)")
		}

		// Parse and validate the JSON
		var result map[string]interface{}
		if err := json.Unmarshal([]byte(content), &result); err != nil {
			t.Fatalf("❌ Failed to parse structured output as JSON: %v", err)
		}

		// Validate required fields
		if action, ok := result["action"].(string); !ok || action == "" {
			t.Fatalf("❌ Missing or invalid 'action' field in structured output")
		} else {
			t.Logf("✅ Action: %s", action)
		}

		if reason, ok := result["reason"].(string); !ok || reason == "" {
			t.Fatalf("❌ Missing or invalid 'reason' field in structured output")
		} else {
			t.Logf("✅ Reason: %s", reason)
		}

		// target_node_id can be string or null - validate based on expectation
		targetNodeID, hasTargetNode := result["target_node_id"]
		if !hasTargetNode {
			t.Fatalf("❌ Missing 'target_node_id' field in structured output")
		}

		if expectValue {
			// Should be a non-empty string
			if targetStr, ok := targetNodeID.(string); !ok || targetStr == "" {
				t.Fatalf("❌ Expected 'target_node_id' to be a non-empty string, got: %v (type: %T)", targetNodeID, targetNodeID)
			} else {
				t.Logf("✅ Target Node ID has value: %s", targetStr)
			}
		} else {
			// Should be null
			if targetNodeID != nil {
				t.Logf("⚠️  Expected 'target_node_id' to be null, got: %v (type: %T) - this is acceptable if provider returns empty string", targetNodeID, targetNodeID)
			} else {
				t.Logf("✅ Target Node ID is null (as expected)")
			}
		}

		// priority can be string or integer
		if priority, ok := result["priority"]; ok {
			t.Logf("✅ Priority: %v (type: %T)", priority, priority)
		} else {
			t.Fatalf("❌ Missing 'priority' field in structured output")
		}

		t.Logf("🎉 Chat Completions API with structured output test passed!")
	}
}

// RunStructuredOutputChatStreamTest tests structured outputs with Chat Completions API (streaming)
func RunStructuredOutputChatStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
	if !testConfig.Scenarios.StructuredOutputs || !testConfig.Scenarios.CompletionStream {
		t.Logf("Structured outputs streaming not supported for provider %s", testConfig.Provider)
		return
	}

	t.Run("StructuredOutputChatStream", func(t *testing.T) {
		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
			t.Parallel()
		}

		// Test with null target_node_id
		chatMessages := []schemas.ChatMessage{
			CreateBasicChatMessage("You are a workflow manager. User says: 'Continue with current task'. Analyze this and return: action='continue', target_node_id=null (must be null), and priority=3 (as integer). Provide reasoning."),
		}

		// Add Anthropic beta header for structured outputs if model contains "claude"
		reqCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
		if strings.Contains(strings.ToLower(testConfig.ChatModel), "claude") && testConfig.Provider != schemas.Vertex {
			extraHeaders := map[string][]string{
				"anthropic-beta": {"structured-outputs-2025-11-13"},
			}
			reqCtx.SetValue(schemas.BifrostContextKeyExtraHeaders, extraHeaders)
		}

		request := &schemas.BifrostChatRequest{
			Provider: testConfig.Provider,
			Model:    testConfig.ChatModel,
			Input:    chatMessages,
			Params: &schemas.ChatParameters{
				MaxCompletionTokens: bifrost.Ptr(5000),
				ResponseFormat: func() *interface{} {
					var format interface{} = map[string]interface{}{
						"type": "json_schema",
						"json_schema": map[string]interface{}{
							"name":   "decision_schema",
							"strict": true,
							"schema": structuredOutputSchema,
						},
					}
					return &format
				}(),
			},
			Fallbacks: testConfig.Fallbacks,
		}

		retryConfig := StreamingRetryConfig()
		retryContext := TestRetryContext{
			ScenarioName: "StructuredOutputChatStream",
			ExpectedBehavior: map[string]interface{}{
				"should_stream_json":  true,
				"should_match_schema": true,
			},
			TestMetadata: map[string]interface{}{
				"provider": testConfig.Provider,
				"model":    testConfig.ChatModel,
			},
		}

		responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
			return client.ChatCompletionStreamRequest(reqCtx, request)
		})

		RequireNoError(t, err, "Chat streaming with structured output failed")
		if responseChannel == nil {
			t.Fatal("Response channel should not be nil")
		}

		var fullContent strings.Builder
		var responseCount int
		var toolCallCount int // Track tool calls for Bedrock assertion

		streamCtx, cancel := context.WithTimeout(ctx, 200*time.Second)
		defer cancel()

		t.Logf("📡 Starting to read structured output streaming response...")

		for {
			select {
			case response, ok := <-responseChannel:
				if !ok {
					goto streamComplete
				}

				if response == nil {
					t.Fatal("❌ Streaming response should not be nil")
				}
				responseCount++

				if response.BifrostChatResponse != nil {
					if len(response.BifrostChatResponse.Choices) > 0 {
						choice := response.BifrostChatResponse.Choices[0]
						if choice.Delta != nil && choice.Delta.Content != nil {
							fullContent.WriteString(*choice.Delta.Content)
						}
						// Track tool calls for Bedrock assertion
						if choice.Delta != nil && len(choice.Delta.ToolCalls) > 0 {
							toolCallCount += len(choice.Delta.ToolCalls)
						}
					}
				}

				if responseCount > 500 {
					goto streamComplete
				}

			case <-streamCtx.Done():
				t.Fatal("❌ Timeout waiting for structured output streaming response")
			}
		}

	streamComplete:
		if responseCount == 0 {
			t.Fatal("❌ Should receive at least one streaming response")
		}

		finalContent := strings.TrimSpace(fullContent.String())
		t.Logf("📝 Assembled structured output (%d chars): %s", len(finalContent), finalContent)

		// Assert content is non-empty
		if finalContent == "" {
			t.Fatalf("❌ Content should not be empty for structured output")
		}

		// For Bedrock: verify no tool calls leaked through (response_format was properly converted)
		if testConfig.Provider == schemas.Bedrock {
			if toolCallCount > 0 {
				t.Fatalf("❌ Bedrock: structured output streaming should not contain tool calls, got %d tool call deltas", toolCallCount)
			}
			t.Logf("✅ Bedrock: no tool calls in streaming response (response_format properly converted)")
		}

		// Validate the assembled content is valid JSON matching our schema
		var result map[string]interface{}
		if err := json.Unmarshal([]byte(finalContent), &result); err != nil {
			t.Fatalf("❌ Failed to parse assembled structured output as JSON: %v", err)
		}

		// Validate required fields
		if action, ok := result["action"].(string); !ok || action == "" {
			t.Fatalf("❌ Missing or invalid 'action' field in structured output")
		} else {
			t.Logf("✅ Action: %s", action)
		}

		if reason, ok := result["reason"].(string); !ok || reason == "" {
			t.Fatalf("❌ Missing or invalid 'reason' field in structured output")
		} else {
			t.Logf("✅ Reason: %s", reason)
		}

		// target_node_id validation - should be null for "continue" action
		targetNodeID, hasTargetNode := result["target_node_id"]
		if !hasTargetNode {
			t.Fatalf("❌ Missing 'target_node_id' field in structured output")
		}
		if targetNodeID != nil {
			t.Logf("⚠️  Expected 'target_node_id' to be null, got: %v (type: %T)", targetNodeID, targetNodeID)
		} else {
			t.Logf("✅ Target Node ID is null (as expected)")
		}

		// priority can be string or integer (from JSON unmarshaling, numbers become float64)
		if priority, ok := result["priority"]; ok {
			t.Logf("✅ Priority: %v (type: %T)", priority, priority)
		} else {
			t.Fatalf("❌ Missing 'priority' field in structured output")
		}

		t.Logf("🎉 Chat streaming with structured output test passed!")
	})
}

// RunStructuredOutputResponsesTest tests structured outputs with Responses API (non-streaming)
func RunStructuredOutputResponsesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
	if !testConfig.Scenarios.StructuredOutputs {
		t.Logf("Structured outputs not supported for provider %s", testConfig.Provider)
		return
	}

	t.Run("StructuredOutputResponses", func(t *testing.T) {
		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
			t.Parallel()
		}

		// Test with string value for target_node_id
		responsesMessages := []schemas.ResponsesMessage{
			CreateBasicResponsesMessage("You are a workflow manager. User says: 'Transition to the first node'. Analyze this and return: action='transition', target_node_id='NODE-0' (NOT null), priority='high' (as string). Provide reasoning."),
		}

		// Add Anthropic beta header for structured outputs if model contains "claude"
		reqCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
		if strings.Contains(strings.ToLower(testConfig.ChatModel), "claude") && testConfig.Provider != schemas.Vertex {
			extraHeaders := map[string][]string{
				"anthropic-beta": {"structured-outputs-2025-11-13"},
			}
			reqCtx.SetValue(schemas.BifrostContextKeyExtraHeaders, extraHeaders)
		}

		retryConfig := GetTestRetryConfigForScenario("StructuredOutputResponses", testConfig)
		retryContext := TestRetryContext{
			ScenarioName: "StructuredOutputResponses",
			ExpectedBehavior: map[string]interface{}{
				"should_return_valid_json": true,
				"should_match_schema":      true,
			},
			TestMetadata: map[string]interface{}{
				"provider": testConfig.Provider,
				"model":    testConfig.ChatModel,
			},
		}

		responsesRetryConfig := ResponsesRetryConfig{
			MaxAttempts: retryConfig.MaxAttempts,
			BaseDelay:   retryConfig.BaseDelay,
			MaxDelay:    retryConfig.MaxDelay,
			Conditions:  []ResponsesRetryCondition{},
			OnRetry:     retryConfig.OnRetry,
			OnFinalFail: retryConfig.OnFinalFail,
		}

		responsesOperation := func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
			typeStr := "object"
			props := structuredOutputSchema["properties"].(map[string]interface{})
			additionalProps := structuredOutputSchema["additionalProperties"].(bool)
			responsesReq := &schemas.BifrostResponsesRequest{
				Provider: testConfig.Provider,
				Model:    testConfig.ChatModel,
				Input:    responsesMessages,
				Params: &schemas.ResponsesParameters{
					MaxOutputTokens: bifrost.Ptr(5000),
					Text: &schemas.ResponsesTextConfig{
						Format: &schemas.ResponsesTextConfigFormat{
							Type: "json_schema",
							Name: bifrost.Ptr("decision_schema"),
							JSONSchema: &schemas.ResponsesTextConfigFormatJSONSchema{
								Type:       &typeStr,
								Properties: &props,
								Required:   structuredOutputSchema["required"].([]string),
								AdditionalProperties: &schemas.AdditionalPropertiesStruct{
									AdditionalPropertiesBool: &additionalProps,
								},
							},
						},
					},
				},
				Fallbacks: testConfig.Fallbacks,
			}
			return client.ResponsesRequest(reqCtx, responsesReq)
		}

		expectations := GetExpectationsForScenario("StructuredOutputResponses", testConfig, map[string]interface{}{})
		expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)

		responsesResponse, responsesError := WithResponsesTestRetry(t, responsesRetryConfig, retryContext, expectations, "StructuredOutputResponses", responsesOperation)

		if responsesError != nil {
			t.Fatalf("❌ Responses API with structured output failed: %s", GetErrorMessage(responsesError))
		}

		// Validate the response is valid JSON matching our schema
		if responsesResponse != nil {
			content := GetResponsesContent(responsesResponse)
			t.Logf("📝 Structured output response: %s", content)

			// Assert content is non-empty
			if content == "" {
				t.Fatalf("❌ Content should not be empty for structured output")
			}

			// For Bedrock: verify no function_call items leaked through (response_format was properly converted)
			if testConfig.Provider == schemas.Bedrock {
				for _, outputItem := range responsesResponse.Output {
					if outputItem.Type != nil && *outputItem.Type == schemas.ResponsesMessageTypeFunctionCall {
						t.Fatalf("❌ Bedrock: structured output should not contain function_call items")
					}
				}
				t.Logf("✅ Bedrock: no function_call items in response (response_format properly converted)")
			}

			// Parse and validate the JSON
			var result map[string]interface{}
			if err := json.Unmarshal([]byte(content), &result); err != nil {
				t.Fatalf("❌ Failed to parse structured output as JSON: %v", err)
			}

			// Validate required fields
			if action, ok := result["action"].(string); !ok || action == "" {
				t.Fatalf("❌ Missing or invalid 'action' field in structured output")
			} else {
				t.Logf("✅ Action: %s", action)
			}

			if reason, ok := result["reason"].(string); !ok || reason == "" {
				t.Fatalf("❌ Missing or invalid 'reason' field in structured output")
			} else {
				t.Logf("✅ Reason: %s", reason)
			}

			// target_node_id validation - should be a string value for "transition" action
			targetNodeID, hasTargetNode := result["target_node_id"]
			if !hasTargetNode {
				t.Fatalf("❌ Missing 'target_node_id' field in structured output")
			}
			if targetStr, ok := targetNodeID.(string); !ok || targetStr == "" {
				t.Fatalf("❌ Expected 'target_node_id' to be a non-empty string, got: %v (type: %T)", targetNodeID, targetNodeID)
			} else {
				t.Logf("✅ Target Node ID has value: %s", targetStr)
			}

			// priority can be string or integer
			if priority, ok := result["priority"]; ok {
				t.Logf("✅ Priority: %v (type: %T)", priority, priority)
			} else {
				t.Fatalf("❌ Missing 'priority' field in structured output")
			}

			t.Logf("🎉 Responses API with structured output test passed!")
		}
	})
}

// RunStructuredOutputResponsesStreamTest tests structured outputs with Responses API (streaming)
func RunStructuredOutputResponsesStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
	if !testConfig.Scenarios.StructuredOutputs || !testConfig.Scenarios.CompletionStream {
		t.Logf("Structured outputs streaming not supported for provider %s", testConfig.Provider)
		return
	}

	t.Run("StructuredOutputResponsesStream", func(t *testing.T) {
		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
			t.Parallel()
		}

		// Test with null target_node_id
		responsesMessages := []schemas.ResponsesMessage{
			{
				Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser),
				Content: &schemas.ResponsesMessageContent{
					ContentStr: schemas.Ptr("You are a workflow manager. User says: 'Continue current task'. Analyze this and return: action='continue', target_node_id=null (must be null), priority=7 (as integer). Provide reasoning."),
				},
			},
		}

		// Add Anthropic beta header for structured outputs if model contains "claude"
		reqCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
		if strings.Contains(strings.ToLower(testConfig.ChatModel), "claude") && testConfig.Provider != schemas.Vertex {
			extraHeaders := map[string][]string{
				"anthropic-beta": {"structured-outputs-2025-11-13"},
			}
			reqCtx.SetValue(schemas.BifrostContextKeyExtraHeaders, extraHeaders)
		}

		typeStr := "object"
		props := structuredOutputSchema["properties"].(map[string]interface{})
		additionalProps := structuredOutputSchema["additionalProperties"].(bool)
		request := &schemas.BifrostResponsesRequest{
			Provider: testConfig.Provider,
			Model:    testConfig.ChatModel,
			Input:    responsesMessages,
			Params: &schemas.ResponsesParameters{
				MaxOutputTokens: bifrost.Ptr(5000),
				Text: &schemas.ResponsesTextConfig{
					Format: &schemas.ResponsesTextConfigFormat{
						Type: "json_schema",
						Name: bifrost.Ptr("decision_schema"),
						JSONSchema: &schemas.ResponsesTextConfigFormatJSONSchema{
							Type:       &typeStr,
							Properties: &props,
							Required:   structuredOutputSchema["required"].([]string),
							AdditionalProperties: &schemas.AdditionalPropertiesStruct{
								AdditionalPropertiesBool: &additionalProps,
							},
						},
					},
				},
			},
			Fallbacks: testConfig.Fallbacks,
		}

		retryConfig := StreamingRetryConfig()
		retryContext := TestRetryContext{
			ScenarioName: "StructuredOutputResponsesStream",
			ExpectedBehavior: map[string]interface{}{
				"should_stream_json":  true,
				"should_match_schema": true,
			},
			TestMetadata: map[string]interface{}{
				"provider": testConfig.Provider,
				"model":    testConfig.ChatModel,
			},
		}

		// Use validation retry wrapper
		validationResult := WithResponsesStreamValidationRetry(t, retryConfig, retryContext,
			func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
				return client.ResponsesStreamRequest(reqCtx, request)
			},
			func(responseChannel chan *schemas.BifrostStreamChunk) ResponsesStreamValidationResult {
				var fullContent strings.Builder
				var responseCount int
				var functionCallEventCount int // Track function call events for Bedrock assertion

				streamCtx, cancel := context.WithTimeout(ctx, 200*time.Second)
				defer cancel()

				t.Logf("📡 Starting to read structured output streaming response...")

				for {
					select {
					case response, ok := <-responseChannel:
						if !ok {
							if responseCount == 0 {
								return ResponsesStreamValidationResult{
									Passed:       false,
									Errors:       []string{"❌ Stream closed without receiving any data"},
									ReceivedData: false,
								}
							}
							goto streamComplete
						}

						if response == nil {
							return ResponsesStreamValidationResult{
								Passed: false,
								Errors: []string{"❌ Streaming response should not be nil"},
							}
						}
						responseCount++

						if response.BifrostResponsesStreamResponse != nil {
							streamResp := response.BifrostResponsesStreamResponse

							// Track function call events for Bedrock assertion
							if streamResp.Type == schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDelta ||
								streamResp.Type == schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDone {
								functionCallEventCount++
							}

							switch streamResp.Type {
							case schemas.ResponsesStreamResponseTypeOutputTextDelta:
								if streamResp.Delta != nil {
									fullContent.WriteString(*streamResp.Delta)
								}

							case schemas.ResponsesStreamResponseTypeOutputItemAdded:
								if streamResp.Item != nil && streamResp.Item.Content != nil {
									// Check ContentBlocks first
									if len(streamResp.Item.Content.ContentBlocks) > 0 {
										for _, block := range streamResp.Item.Content.ContentBlocks {
											if block.Type == schemas.ResponsesOutputMessageContentTypeText && block.Text != nil {
												fullContent.WriteString(*block.Text)
											}
										}
									} else if streamResp.Item.Content.ContentStr != nil {
										// Fallback to ContentStr
										fullContent.WriteString(*streamResp.Item.Content.ContentStr)
									}
								}
								// Track function call output items for Bedrock assertion
								if streamResp.Item != nil && streamResp.Item.Type != nil && *streamResp.Item.Type == schemas.ResponsesMessageTypeFunctionCall {
									functionCallEventCount++
								}

							case schemas.ResponsesStreamResponseTypeContentPartAdded:
								if streamResp.Part != nil && streamResp.Part.Text != nil {
									fullContent.WriteString(*streamResp.Part.Text)
								}

							case schemas.ResponsesStreamResponseTypeError:
								errorMsg := "unknown error"
								if streamResp.Message != nil {
									errorMsg = *streamResp.Message
								}
								return ResponsesStreamValidationResult{
									Passed: false,
									Errors: []string{fmt.Sprintf("❌ Error in streaming: %s", errorMsg)},
								}
							}
						}

						if responseCount > 500 {
							goto streamComplete
						}

					case <-streamCtx.Done():
						return ResponsesStreamValidationResult{
							Passed:       false,
							Errors:       []string{"❌ Timeout waiting for structured output streaming response"},
							ReceivedData: responseCount > 0,
						}
					}
				}

			streamComplete:
				finalContent := strings.TrimSpace(fullContent.String())
				t.Logf("📝 Assembled structured output (%d chars): %s", len(finalContent), finalContent)

				// Assert content is non-empty
				if finalContent == "" {
					return ResponsesStreamValidationResult{
						Passed:       false,
						Errors:       []string{"❌ Content should not be empty for structured output"},
						ReceivedData: responseCount > 0,
					}
				}

				// For Bedrock: verify no function_call events leaked through (response_format was properly converted)
				if testConfig.Provider == schemas.Bedrock {
					if functionCallEventCount > 0 {
						return ResponsesStreamValidationResult{
							Passed:       false,
							Errors:       []string{fmt.Sprintf("❌ Bedrock: structured output streaming should not contain function_call events, got %d", functionCallEventCount)},
							ReceivedData: responseCount > 0,
						}
					}
					t.Logf("✅ Bedrock: no function_call events in streaming response (response_format properly converted)")
				}

				// Validate the assembled content is valid JSON matching our schema
				var result map[string]interface{}
				if err := json.Unmarshal([]byte(finalContent), &result); err != nil {
					return ResponsesStreamValidationResult{
						Passed: false,
						Errors: []string{fmt.Sprintf("❌ Failed to parse assembled structured output as JSON: %v", err)},
					}
				}

				// Validate required fields
				var validationErrors []string

				if action, ok := result["action"].(string); !ok || action == "" {
					validationErrors = append(validationErrors, "❌ Missing or invalid 'action' field in structured output")
				} else {
					t.Logf("✅ Action: %s", action)
				}

				if reason, ok := result["reason"].(string); !ok || reason == "" {
					validationErrors = append(validationErrors, "❌ Missing or invalid 'reason' field in structured output")
				} else {
					t.Logf("✅ Reason: %s", reason)
				}

				// target_node_id validation - should be null for "continue" action
				targetNodeID, hasTargetNode := result["target_node_id"]
				if !hasTargetNode {
					validationErrors = append(validationErrors, "❌ Missing 'target_node_id' field in structured output")
				} else {
					if targetNodeID != nil {
						t.Logf("⚠️  Expected 'target_node_id' to be null, got: %v (type: %T)", targetNodeID, targetNodeID)
					} else {
						t.Logf("✅ Target Node ID is null (as expected)")
					}
				}

				if priority, ok := result["priority"]; !ok {
					validationErrors = append(validationErrors, "❌ Missing 'priority' field in structured output")
				} else {
					t.Logf("✅ Priority: %v (type: %T)", priority, priority)
				}

				if len(validationErrors) > 0 {
					return ResponsesStreamValidationResult{
						Passed:       false,
						Errors:       validationErrors,
						ReceivedData: responseCount > 0,
					}
				}

				return ResponsesStreamValidationResult{
					Passed:       true,
					ReceivedData: responseCount > 0,
				}
			})

		if !validationResult.Passed {
			allErrors := append(validationResult.Errors, validationResult.StreamErrors...)
			errorMsg := strings.Join(allErrors, "; ")
			if !strings.Contains(errorMsg, "❌") {
				errorMsg = fmt.Sprintf("❌ %s", errorMsg)
			}
			t.Fatalf("❌ Responses streaming with structured output validation failed: %s", errorMsg)
		}

		t.Logf("🎉 Responses streaming with structured output test passed!")
	})
}