first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/core/internal/llmtests/reasoning_opus.go
+++ b/core/internal/llmtests/reasoning_opus.go
@@ -0,0 +1,643 @@
+package llmtests
+
+import (
+	"context"
+	"os"
+	"testing"
+
+	bifrost "github.com/maximhq/bifrost/core"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// OpusReasoningTestConfig holds configuration for Opus-specific reasoning tests
+type OpusReasoningTestConfig struct {
+	Provider   schemas.ModelProvider
+	Opus45Model string // Opus 4.5 model identifier
+	Opus46Model string // Opus 4.6 model identifier
+	Fallbacks  []schemas.Fallback
+	SkipOpus45 bool   // Skip Opus 4.5 tests
+	SkipOpus46 bool   // Skip Opus 4.6 tests
+	SkipReason string // Reason for skipping
+}
+
+// GetOpusReasoningTestConfigs returns test configurations for Opus reasoning across providers
+func GetOpusReasoningTestConfigs() []OpusReasoningTestConfig {
+	return []OpusReasoningTestConfig{
+		{
+			Provider:    schemas.Anthropic,
+			Opus45Model: "claude-opus-4-5-20251101",
+			Opus46Model: "claude-opus-4-6-20260210",
+			Fallbacks:   []schemas.Fallback{},
+		},
+		{
+			Provider:    schemas.Bedrock,
+			Opus45Model: "global.anthropic.claude-opus-4-5-20251101-v1:0",
+			Opus46Model: "global.anthropic.claude-opus-4-6-v1",
+			Fallbacks:   []schemas.Fallback{},
+		},
+		{
+			Provider:    schemas.Azure,
+			Opus45Model: "claude-opus-4-5", // Uses deployment name
+			Opus46Model: "claude-opus-4-6", // Uses deployment name
+			Fallbacks:   []schemas.Fallback{},
+		},
+		{
+			Provider:    schemas.Vertex,
+			Opus45Model: "claude-opus-4-5", // Uses deployment name
+			Opus46Model: "claude-opus-4-6", // Uses deployment name
+			Fallbacks:   []schemas.Fallback{},
+		},
+	}
+}
+
+// RunOpus45ReasoningTest tests extended thinking with Opus 4.5 (budget_tokens mode)
+func RunOpus45ReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig) {
+	if config.SkipOpus45 {
+		t.Skipf("Skipping Opus 4.5 test: %s", config.SkipReason)
+		return
+	}
+
+	if config.Opus45Model == "" {
+		t.Skip("No Opus 4.5 model configured")
+		return
+	}
+
+	t.Run("Opus45_ExtendedThinking", func(t *testing.T) {
+		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+			t.Parallel()
+		}
+
+		// Complex reasoning problem
+		problemPrompt := "Solve this step by step: A train leaves station A at 9:00 AM traveling at 60 mph. Another train leaves station B (300 miles away) at 10:00 AM traveling towards station A at 80 mph. At what time will they meet, and how far from station A?"
+
+		// Create a test config for retry framework
+		testConfig := ComprehensiveTestConfig{
+			Provider:       config.Provider,
+			ReasoningModel: config.Opus45Model,
+			Scenarios: TestScenarios{
+				Reasoning: true,
+			},
+			Fallbacks: config.Fallbacks,
+		}
+
+		// Test via Responses API
+		t.Run("ResponsesAPI", func(t *testing.T) {
+			if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+				t.Parallel()
+			}
+
+			responsesMessages := []schemas.ResponsesMessage{
+				CreateBasicResponsesMessage(problemPrompt),
+			}
+
+			responsesReq := &schemas.BifrostResponsesRequest{
+				Provider: config.Provider,
+				Model:    config.Opus45Model,
+				Input:    responsesMessages,
+				Params: &schemas.ResponsesParameters{
+					MaxOutputTokens: bifrost.Ptr(4000),
+					Reasoning: &schemas.ResponsesParametersReasoning{
+						Effort: bifrost.Ptr("high"),
+					},
+					Include: []string{"reasoning.encrypted_content"},
+				},
+				Fallbacks: config.Fallbacks,
+			}
+
+			// Use retry framework with enhanced validation for reasoning
+			retryConfig := GetTestRetryConfigForScenario("Reasoning", testConfig)
+			retryContext := TestRetryContext{
+				ScenarioName: "Opus45_Reasoning_Responses",
+				ExpectedBehavior: map[string]interface{}{
+					"should_show_reasoning": true,
+					"mathematical_problem":  true,
+					"step_by_step":          true,
+					"model_version":         "opus-4.5",
+					"thinking_mode":         "budget_tokens",
+				},
+				TestMetadata: map[string]interface{}{
+					"provider":          config.Provider,
+					"model":             config.Opus45Model,
+					"problem_type":      "mathematical",
+					"complexity":        "high",
+					"expects_reasoning": true,
+				},
+			}
+			responsesRetryConfig := ResponsesRetryConfig{
+				MaxAttempts: retryConfig.MaxAttempts,
+				BaseDelay:   retryConfig.BaseDelay,
+				MaxDelay:    retryConfig.MaxDelay,
+				Conditions:  []ResponsesRetryCondition{},
+				OnRetry:     retryConfig.OnRetry,
+				OnFinalFail: retryConfig.OnFinalFail,
+			}
+
+			// Enhanced validation for reasoning scenarios
+			expectations := GetExpectationsForScenario("Reasoning", testConfig, map[string]interface{}{
+				"requires_reasoning": true,
+			})
+			expectations = ModifyExpectationsForProvider(expectations, config.Provider)
+
+			response, responsesError := WithResponsesTestRetry(t, responsesRetryConfig, retryContext, expectations, "Opus45_Reasoning_Responses", func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
+				bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+				return client.ResponsesRequest(bfCtx, responsesReq)
+			})
+
+			if responsesError != nil {
+				t.Fatalf("❌ Opus 4.5 Responses API reasoning test failed after retries: %v", GetErrorMessage(responsesError))
+			}
+
+			// Validate response has content
+			content := GetResponsesContent(response)
+			if content == "" {
+				t.Error("Expected non-empty response content")
+			} else {
+				t.Logf("✅ Opus 4.5 reasoning response (first 200 chars): %s", truncateString(content, 200))
+			}
+
+			// Check for reasoning indicators
+			reasoningDetected := validateResponsesAPIReasoning(t, response)
+			if !reasoningDetected {
+				t.Logf("⚠️ No explicit reasoning indicators found in response structure")
+			} else {
+				t.Logf("🧠 Reasoning structure detected in response")
+			}
+
+			t.Log("🎉 Opus 4.5 Responses API reasoning test passed!")
+		})
+
+		// Test via Chat Completions API
+		t.Run("ChatCompletionsAPI", func(t *testing.T) {
+			if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+				t.Parallel()
+			}
+
+			chatMessages := []schemas.ChatMessage{
+				CreateBasicChatMessage(problemPrompt),
+			}
+
+			chatReq := &schemas.BifrostChatRequest{
+				Provider: config.Provider,
+				Model:    config.Opus45Model,
+				Input:    chatMessages,
+				Params: &schemas.ChatParameters{
+					MaxCompletionTokens: bifrost.Ptr(4000),
+					Reasoning: &schemas.ChatReasoning{
+						Effort:    bifrost.Ptr("high"),
+						MaxTokens: bifrost.Ptr(2000), // Budget tokens for Opus 4.5
+					},
+				},
+				Fallbacks: config.Fallbacks,
+			}
+
+			// Use retry framework with enhanced validation for reasoning
+			retryConfig := GetTestRetryConfigForScenario("Reasoning", testConfig)
+			retryContext := TestRetryContext{
+				ScenarioName: "Opus45_Reasoning_Chat",
+				ExpectedBehavior: map[string]interface{}{
+					"should_show_reasoning": true,
+					"mathematical_problem":  true,
+					"step_by_step":          true,
+					"model_version":         "opus-4.5",
+					"thinking_mode":         "budget_tokens",
+				},
+				TestMetadata: map[string]interface{}{
+					"provider":          config.Provider,
+					"model":             config.Opus45Model,
+					"problem_type":      "mathematical",
+					"complexity":        "high",
+					"expects_reasoning": true,
+				},
+			}
+			chatRetryConfig := ChatRetryConfig{
+				MaxAttempts: retryConfig.MaxAttempts,
+				BaseDelay:   retryConfig.BaseDelay,
+				MaxDelay:    retryConfig.MaxDelay,
+				Conditions:  []ChatRetryCondition{},
+				OnRetry:     retryConfig.OnRetry,
+				OnFinalFail: retryConfig.OnFinalFail,
+			}
+
+			// Enhanced validation for reasoning scenarios
+			expectations := GetExpectationsForScenario("Reasoning", testConfig, map[string]interface{}{
+				"requires_reasoning": true,
+			})
+			expectations = ModifyExpectationsForProvider(expectations, config.Provider)
+
+			response, chatError := WithChatTestRetry(t, chatRetryConfig, retryContext, expectations, "Opus45_Reasoning_Chat", func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
+				bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+				return client.ChatCompletionRequest(bfCtx, chatReq)
+			})
+
+			if chatError != nil {
+				t.Fatalf("❌ Opus 4.5 Chat Completions API reasoning test failed after retries: %v", GetErrorMessage(chatError))
+			}
+
+			// Validate response has content
+			content := GetChatContent(response)
+			if content == "" {
+				t.Error("Expected non-empty response content")
+			} else {
+				t.Logf("✅ Opus 4.5 reasoning response (first 200 chars): %s", truncateString(content, 200))
+			}
+
+			// Check for reasoning indicators
+			reasoningDetected := validateChatCompletionReasoning(t, response)
+			if !reasoningDetected {
+				t.Logf("⚠️ No explicit reasoning indicators found in response structure")
+			} else {
+				t.Logf("🧠 Reasoning structure detected in response")
+			}
+
+			t.Log("🎉 Opus 4.5 Chat Completions API reasoning test passed!")
+		})
+	})
+}
+
+// RunOpus46ReasoningTest tests adaptive thinking with Opus 4.6 (adaptive mode + effort)
+func RunOpus46ReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig) {
+	if config.SkipOpus46 {
+		t.Skipf("Skipping Opus 4.6 test: %s", config.SkipReason)
+		return
+	}
+
+	if config.Opus46Model == "" {
+		t.Skip("No Opus 4.6 model configured")
+		return
+	}
+
+	t.Run("Opus46_AdaptiveThinking", func(t *testing.T) {
+		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+			t.Parallel()
+		}
+
+		// Complex reasoning problem that benefits from adaptive thinking
+		problemPrompt := "Analyze this logic puzzle: Five people (A, B, C, D, E) are sitting in a row. A is not at either end. B is somewhere to the left of C. D is not next to E. E is at one of the ends. In how many different valid arrangements can they sit? Show your reasoning."
+
+		// Create a test config for retry framework
+		testConfig := ComprehensiveTestConfig{
+			Provider:       config.Provider,
+			ReasoningModel: config.Opus46Model,
+			Scenarios: TestScenarios{
+				Reasoning: true,
+			},
+			Fallbacks: config.Fallbacks,
+		}
+
+		// Test via Responses API with different effort levels
+		effortLevels := []string{"low", "medium", "high"}
+
+		for _, effort := range effortLevels {
+			effort := effort // capture range variable
+			t.Run("ResponsesAPI_Effort_"+effort, func(t *testing.T) {
+				if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+					t.Parallel()
+				}
+
+				responsesMessages := []schemas.ResponsesMessage{
+					CreateBasicResponsesMessage(problemPrompt),
+				}
+
+				responsesReq := &schemas.BifrostResponsesRequest{
+					Provider: config.Provider,
+					Model:    config.Opus46Model,
+					Input:    responsesMessages,
+					Params: &schemas.ResponsesParameters{
+						MaxOutputTokens: bifrost.Ptr(4000),
+						Reasoning: &schemas.ResponsesParametersReasoning{
+							Effort: bifrost.Ptr(effort), // Adaptive thinking uses effort parameter
+						},
+						Include: []string{"reasoning.encrypted_content"},
+					},
+					Fallbacks: config.Fallbacks,
+				}
+
+				// Use retry framework with enhanced validation for reasoning
+				retryConfig := GetTestRetryConfigForScenario("Reasoning", testConfig)
+				retryContext := TestRetryContext{
+					ScenarioName: "Opus46_Reasoning_Responses_" + effort,
+					ExpectedBehavior: map[string]interface{}{
+						"should_show_reasoning": true,
+						"logic_puzzle":          true,
+						"step_by_step":          true,
+						"model_version":         "opus-4.6",
+						"thinking_mode":         "adaptive",
+						"effort_level":          effort,
+					},
+					TestMetadata: map[string]interface{}{
+						"provider":          config.Provider,
+						"model":             config.Opus46Model,
+						"problem_type":      "logic_puzzle",
+						"complexity":        "high",
+						"expects_reasoning": true,
+						"effort":            effort,
+					},
+				}
+				responsesRetryConfig := ResponsesRetryConfig{
+					MaxAttempts: retryConfig.MaxAttempts,
+					BaseDelay:   retryConfig.BaseDelay,
+					MaxDelay:    retryConfig.MaxDelay,
+					Conditions:  []ResponsesRetryCondition{},
+					OnRetry:     retryConfig.OnRetry,
+					OnFinalFail: retryConfig.OnFinalFail,
+				}
+
+				// Enhanced validation for reasoning scenarios
+				expectations := GetExpectationsForScenario("Reasoning", testConfig, map[string]interface{}{
+					"requires_reasoning": true,
+				})
+				expectations = ModifyExpectationsForProvider(expectations, config.Provider)
+
+				response, responsesError := WithResponsesTestRetry(t, responsesRetryConfig, retryContext, expectations, "Opus46_Reasoning_Responses_"+effort, func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
+					bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+					return client.ResponsesRequest(bfCtx, responsesReq)
+				})
+
+				if responsesError != nil {
+					t.Fatalf("❌ Opus 4.6 Responses API (effort=%s) reasoning test failed after retries: %v", effort, GetErrorMessage(responsesError))
+				}
+
+				// Validate response has content
+				content := GetResponsesContent(response)
+				if content == "" {
+					t.Errorf("Expected non-empty response content for effort=%s", effort)
+				} else {
+					t.Logf("✅ Opus 4.6 (effort=%s) response (first 200 chars): %s", effort, truncateString(content, 200))
+				}
+
+				// Check for reasoning indicators
+				reasoningDetected := validateResponsesAPIReasoning(t, response)
+				if !reasoningDetected {
+					t.Logf("⚠️ No explicit reasoning indicators found in response structure")
+				} else {
+					t.Logf("🧠 Reasoning structure detected in response")
+				}
+
+				t.Logf("🎉 Opus 4.6 Responses API (effort=%s) reasoning test passed!", effort)
+			})
+		}
+
+		// Test via Chat Completions API
+		t.Run("ChatCompletionsAPI", func(t *testing.T) {
+			if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+				t.Parallel()
+			}
+
+			chatMessages := []schemas.ChatMessage{
+				CreateBasicChatMessage(problemPrompt),
+			}
+
+			chatReq := &schemas.BifrostChatRequest{
+				Provider: config.Provider,
+				Model:    config.Opus46Model,
+				Input:    chatMessages,
+				Params: &schemas.ChatParameters{
+					MaxCompletionTokens: bifrost.Ptr(4000),
+					Reasoning: &schemas.ChatReasoning{
+						Effort: bifrost.Ptr("high"), // Opus 4.6 uses adaptive thinking with effort
+						// Note: MaxTokens (budget_tokens) is NOT used for Opus 4.6
+					},
+				},
+				Fallbacks: config.Fallbacks,
+			}
+
+			// Use retry framework with enhanced validation for reasoning
+			retryConfig := GetTestRetryConfigForScenario("Reasoning", testConfig)
+			retryContext := TestRetryContext{
+				ScenarioName: "Opus46_Reasoning_Chat",
+				ExpectedBehavior: map[string]interface{}{
+					"should_show_reasoning": true,
+					"logic_puzzle":          true,
+					"step_by_step":          true,
+					"model_version":         "opus-4.6",
+					"thinking_mode":         "adaptive",
+				},
+				TestMetadata: map[string]interface{}{
+					"provider":          config.Provider,
+					"model":             config.Opus46Model,
+					"problem_type":      "logic_puzzle",
+					"complexity":        "high",
+					"expects_reasoning": true,
+				},
+			}
+			chatRetryConfig := ChatRetryConfig{
+				MaxAttempts: retryConfig.MaxAttempts,
+				BaseDelay:   retryConfig.BaseDelay,
+				MaxDelay:    retryConfig.MaxDelay,
+				Conditions:  []ChatRetryCondition{},
+				OnRetry:     retryConfig.OnRetry,
+				OnFinalFail: retryConfig.OnFinalFail,
+			}
+
+			// Enhanced validation for reasoning scenarios
+			expectations := GetExpectationsForScenario("Reasoning", testConfig, map[string]interface{}{
+				"requires_reasoning": true,
+			})
+			expectations = ModifyExpectationsForProvider(expectations, config.Provider)
+
+			response, chatError := WithChatTestRetry(t, chatRetryConfig, retryContext, expectations, "Opus46_Reasoning_Chat", func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
+				bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+				return client.ChatCompletionRequest(bfCtx, chatReq)
+			})
+
+			if chatError != nil {
+				t.Fatalf("❌ Opus 4.6 Chat Completions API reasoning test failed after retries: %v", GetErrorMessage(chatError))
+			}
+
+			// Validate response has content
+			content := GetChatContent(response)
+			if content == "" {
+				t.Error("Expected non-empty response content")
+			} else {
+				t.Logf("✅ Opus 4.6 reasoning response (first 200 chars): %s", truncateString(content, 200))
+			}
+
+			// Check for reasoning indicators
+			reasoningDetected := validateChatCompletionReasoning(t, response)
+			if !reasoningDetected {
+				t.Logf("⚠️ No explicit reasoning indicators found in response structure")
+			} else {
+				t.Logf("🧠 Reasoning structure detected in response")
+			}
+
+			t.Log("🎉 Opus 4.6 Chat Completions API reasoning test passed!")
+		})
+	})
+}
+
+// RunOpus46MultiTurnReasoningTest tests multi-turn conversations with reasoning content passthrough.
+// This verifies that reasoning details (text + signature) from assistant messages are correctly
+// passed back to the model in follow-up turns.
+func RunOpus46MultiTurnReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig) {
+	if config.SkipOpus46 {
+		t.Skipf("Skipping Opus 4.6 multi-turn test: %s", config.SkipReason)
+		return
+	}
+
+	if config.Opus46Model == "" {
+		t.Skip("No Opus 4.6 model configured")
+		return
+	}
+
+	t.Run("Opus46_MultiTurnReasoning", func(t *testing.T) {
+		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+			t.Parallel()
+		}
+
+		testConfig := ComprehensiveTestConfig{
+			Provider:       config.Provider,
+			ReasoningModel: config.Opus46Model,
+			Scenarios:      TestScenarios{Reasoning: true},
+			Fallbacks:      config.Fallbacks,
+		}
+
+		// Step 1: Send initial reasoning request
+		initialPrompt := "What is 15 * 17? Think step by step."
+		chatMessages := []schemas.ChatMessage{
+			CreateBasicChatMessage(initialPrompt),
+		}
+
+		chatReq := &schemas.BifrostChatRequest{
+			Provider: config.Provider,
+			Model:    config.Opus46Model,
+			Input:    chatMessages,
+			Params: &schemas.ChatParameters{
+				MaxCompletionTokens: bifrost.Ptr(4000),
+				Reasoning: &schemas.ChatReasoning{
+					Effort: bifrost.Ptr("low"),
+				},
+			},
+			Fallbacks: config.Fallbacks,
+		}
+
+		retryConfig := GetTestRetryConfigForScenario("Reasoning", testConfig)
+		retryContext := TestRetryContext{
+			ScenarioName: "Opus46_MultiTurn_Step1",
+			ExpectedBehavior: map[string]interface{}{
+				"should_show_reasoning": true,
+				"model_version":         "opus-4.6",
+				"thinking_mode":         "adaptive",
+			},
+			TestMetadata: map[string]interface{}{
+				"provider": config.Provider,
+				"model":    config.Opus46Model,
+				"step":     "initial",
+			},
+		}
+		chatRetryConfig := ChatRetryConfig{
+			MaxAttempts: retryConfig.MaxAttempts,
+			BaseDelay:   retryConfig.BaseDelay,
+			MaxDelay:    retryConfig.MaxDelay,
+			Conditions:  []ChatRetryCondition{},
+			OnRetry:     retryConfig.OnRetry,
+			OnFinalFail: retryConfig.OnFinalFail,
+		}
+		expectations := GetExpectationsForScenario("Reasoning", testConfig, map[string]interface{}{
+			"requires_reasoning": true,
+		})
+		expectations = ModifyExpectationsForProvider(expectations, config.Provider)
+
+		firstResponse, chatError := WithChatTestRetry(t, chatRetryConfig, retryContext, expectations, "Opus46_MultiTurn_Step1", func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
+			bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+			return client.ChatCompletionRequest(bfCtx, chatReq)
+		})
+
+		if chatError != nil {
+			t.Fatalf("Step 1 failed: %v", GetErrorMessage(chatError))
+		}
+
+		firstContent := GetChatContent(firstResponse)
+		if firstContent == "" {
+			t.Fatal("Step 1: Expected non-empty response content")
+		}
+		t.Logf("Step 1 response (first 200 chars): %s", truncateString(firstContent, 200))
+
+		// Extract reasoning details from first response
+		var reasoningDetails []schemas.ChatReasoningDetails
+		if len(firstResponse.Choices) > 0 {
+			choice := firstResponse.Choices[0]
+			if choice.ChatNonStreamResponseChoice != nil &&
+				choice.ChatNonStreamResponseChoice.Message != nil &&
+				choice.ChatNonStreamResponseChoice.Message.ChatAssistantMessage != nil {
+				reasoningDetails = choice.ChatNonStreamResponseChoice.Message.ChatAssistantMessage.ReasoningDetails
+			}
+		}
+
+		t.Logf("Step 1: Found %d reasoning detail entries", len(reasoningDetails))
+
+		// Step 2: Build multi-turn conversation with reasoning details passed back
+		multiTurnMessages := []schemas.ChatMessage{
+			CreateBasicChatMessage(initialPrompt),
+			{
+				Role: schemas.ChatMessageRoleAssistant,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: &firstContent,
+				},
+				ChatAssistantMessage: &schemas.ChatAssistantMessage{
+					ReasoningDetails: reasoningDetails,
+				},
+			},
+			CreateBasicChatMessage("Now multiply that result by 2."),
+		}
+
+		multiTurnReq := &schemas.BifrostChatRequest{
+			Provider: config.Provider,
+			Model:    config.Opus46Model,
+			Input:    multiTurnMessages,
+			Params: &schemas.ChatParameters{
+				MaxCompletionTokens: bifrost.Ptr(4000),
+				Reasoning: &schemas.ChatReasoning{
+					Effort: bifrost.Ptr("low"),
+				},
+			},
+			Fallbacks: config.Fallbacks,
+		}
+
+		retryContext2 := TestRetryContext{
+			ScenarioName: "Opus46_MultiTurn_Step2",
+			ExpectedBehavior: map[string]interface{}{
+				"multi_turn":    true,
+				"model_version": "opus-4.6",
+				"thinking_mode": "adaptive",
+			},
+			TestMetadata: map[string]interface{}{
+				"provider": config.Provider,
+				"model":    config.Opus46Model,
+				"step":     "follow_up",
+			},
+		}
+
+		secondResponse, chatError2 := WithChatTestRetry(t, chatRetryConfig, retryContext2, expectations, "Opus46_MultiTurn_Step2", func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
+			bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+			return client.ChatCompletionRequest(bfCtx, multiTurnReq)
+		})
+
+		if chatError2 != nil {
+			t.Fatalf("Step 2 (multi-turn with reasoning passthrough) failed: %v", GetErrorMessage(chatError2))
+		}
+
+		secondContent := GetChatContent(secondResponse)
+		if secondContent == "" {
+			t.Error("Step 2: Expected non-empty response content")
+		} else {
+			t.Logf("Step 2 response (first 200 chars): %s", truncateString(secondContent, 200))
+		}
+
+		t.Log("Multi-turn reasoning passthrough test passed!")
+	})
+}
+
+// RunAllOpusReasoningTests runs Opus 4.5 and 4.6 reasoning tests for a given provider
+func RunAllOpusReasoningTests(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig) {
+	t.Run(string(config.Provider)+"_OpusReasoning", func(t *testing.T) {
+		t.Run("Opus45", func(t *testing.T) {
+			RunOpus45ReasoningTest(t, client, ctx, config)
+		})
+		t.Run("Opus46", func(t *testing.T) {
+			RunOpus46ReasoningTest(t, client, ctx, config)
+		})
+		t.Run("Opus46_MultiTurn", func(t *testing.T) {
+			RunOpus46MultiTurnReasoningTest(t, client, ctx, config)
+		})
+	})
+}