bifrost/core/internal/llmtests/reasoning_opus.go

package llmtests

import (
	"context"
	"os"
	"testing"

	bifrost "github.com/maximhq/bifrost/core"
	"github.com/maximhq/bifrost/core/schemas"
)

// OpusReasoningTestConfig holds configuration for Opus-specific reasoning tests
type OpusReasoningTestConfig struct {
	Provider   schemas.ModelProvider
	Opus45Model string // Opus 4.5 model identifier
	Opus46Model string // Opus 4.6 model identifier
	Fallbacks  []schemas.Fallback
	SkipOpus45 bool   // Skip Opus 4.5 tests
	SkipOpus46 bool   // Skip Opus 4.6 tests
	SkipReason string // Reason for skipping
}

// GetOpusReasoningTestConfigs returns test configurations for Opus reasoning across providers
func GetOpusReasoningTestConfigs() []OpusReasoningTestConfig {
	return []OpusReasoningTestConfig{
		{
			Provider:    schemas.Anthropic,
			Opus45Model: "claude-opus-4-5-20251101",
			Opus46Model: "claude-opus-4-6-20260210",
			Fallbacks:   []schemas.Fallback{},
		},
		{
			Provider:    schemas.Bedrock,
			Opus45Model: "global.anthropic.claude-opus-4-5-20251101-v1:0",
			Opus46Model: "global.anthropic.claude-opus-4-6-v1",
			Fallbacks:   []schemas.Fallback{},
		},
		{
			Provider:    schemas.Azure,
			Opus45Model: "claude-opus-4-5", // Uses deployment name
			Opus46Model: "claude-opus-4-6", // Uses deployment name
			Fallbacks:   []schemas.Fallback{},
		},
		{
			Provider:    schemas.Vertex,
			Opus45Model: "claude-opus-4-5", // Uses deployment name
			Opus46Model: "claude-opus-4-6", // Uses deployment name
			Fallbacks:   []schemas.Fallback{},
		},
	}
}

// RunOpus45ReasoningTest tests extended thinking with Opus 4.5 (budget_tokens mode)
func RunOpus45ReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig) {
	if config.SkipOpus45 {
		t.Skipf("Skipping Opus 4.5 test: %s", config.SkipReason)
		return
	}

	if config.Opus45Model == "" {
		t.Skip("No Opus 4.5 model configured")
		return
	}

	t.Run("Opus45_ExtendedThinking", func(t *testing.T) {
		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
			t.Parallel()
		}

		// Complex reasoning problem
		problemPrompt := "Solve this step by step: A train leaves station A at 9:00 AM traveling at 60 mph. Another train leaves station B (300 miles away) at 10:00 AM traveling towards station A at 80 mph. At what time will they meet, and how far from station A?"

		// Create a test config for retry framework
		testConfig := ComprehensiveTestConfig{
			Provider:       config.Provider,
			ReasoningModel: config.Opus45Model,
			Scenarios: TestScenarios{
				Reasoning: true,
			},
			Fallbacks: config.Fallbacks,
		}

		// Test via Responses API
		t.Run("ResponsesAPI", func(t *testing.T) {
			if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
				t.Parallel()
			}

			responsesMessages := []schemas.ResponsesMessage{
				CreateBasicResponsesMessage(problemPrompt),
			}

			responsesReq := &schemas.BifrostResponsesRequest{
				Provider: config.Provider,
				Model:    config.Opus45Model,
				Input:    responsesMessages,
				Params: &schemas.ResponsesParameters{
					MaxOutputTokens: bifrost.Ptr(4000),
					Reasoning: &schemas.ResponsesParametersReasoning{
						Effort: bifrost.Ptr("high"),
					},
					Include: []string{"reasoning.encrypted_content"},
				},
				Fallbacks: config.Fallbacks,
			}

			// Use retry framework with enhanced validation for reasoning
			retryConfig := GetTestRetryConfigForScenario("Reasoning", testConfig)
			retryContext := TestRetryContext{
				ScenarioName: "Opus45_Reasoning_Responses",
				ExpectedBehavior: map[string]interface{}{
					"should_show_reasoning": true,
					"mathematical_problem":  true,
					"step_by_step":          true,
					"model_version":         "opus-4.5",
					"thinking_mode":         "budget_tokens",
				},
				TestMetadata: map[string]interface{}{
					"provider":          config.Provider,
					"model":             config.Opus45Model,
					"problem_type":      "mathematical",
					"complexity":        "high",
					"expects_reasoning": true,
				},
			}
			responsesRetryConfig := ResponsesRetryConfig{
				MaxAttempts: retryConfig.MaxAttempts,
				BaseDelay:   retryConfig.BaseDelay,
				MaxDelay:    retryConfig.MaxDelay,
				Conditions:  []ResponsesRetryCondition{},
				OnRetry:     retryConfig.OnRetry,
				OnFinalFail: retryConfig.OnFinalFail,
			}

			// Enhanced validation for reasoning scenarios
			expectations := GetExpectationsForScenario("Reasoning", testConfig, map[string]interface{}{
				"requires_reasoning": true,
			})
			expectations = ModifyExpectationsForProvider(expectations, config.Provider)

			response, responsesError := WithResponsesTestRetry(t, responsesRetryConfig, retryContext, expectations, "Opus45_Reasoning_Responses", func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
				bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
				return client.ResponsesRequest(bfCtx, responsesReq)
			})

			if responsesError != nil {
				t.Fatalf("❌ Opus 4.5 Responses API reasoning test failed after retries: %v", GetErrorMessage(responsesError))
			}

			// Validate response has content
			content := GetResponsesContent(response)
			if content == "" {
				t.Error("Expected non-empty response content")
			} else {
				t.Logf("✅ Opus 4.5 reasoning response (first 200 chars): %s", truncateString(content, 200))
			}

			// Check for reasoning indicators
			reasoningDetected := validateResponsesAPIReasoning(t, response)
			if !reasoningDetected {
				t.Logf("⚠️ No explicit reasoning indicators found in response structure")
			} else {
				t.Logf("🧠 Reasoning structure detected in response")
			}

			t.Log("🎉 Opus 4.5 Responses API reasoning test passed!")
		})

		// Test via Chat Completions API
		t.Run("ChatCompletionsAPI", func(t *testing.T) {
			if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
				t.Parallel()
			}

			chatMessages := []schemas.ChatMessage{
				CreateBasicChatMessage(problemPrompt),
			}

			chatReq := &schemas.BifrostChatRequest{
				Provider: config.Provider,
				Model:    config.Opus45Model,
				Input:    chatMessages,
				Params: &schemas.ChatParameters{
					MaxCompletionTokens: bifrost.Ptr(4000),
					Reasoning: &schemas.ChatReasoning{
						Effort:    bifrost.Ptr("high"),
						MaxTokens: bifrost.Ptr(2000), // Budget tokens for Opus 4.5
					},
				},
				Fallbacks: config.Fallbacks,
			}

			// Use retry framework with enhanced validation for reasoning
			retryConfig := GetTestRetryConfigForScenario("Reasoning", testConfig)
			retryContext := TestRetryContext{
				ScenarioName: "Opus45_Reasoning_Chat",
				ExpectedBehavior: map[string]interface{}{
					"should_show_reasoning": true,
					"mathematical_problem":  true,
					"step_by_step":          true,
					"model_version":         "opus-4.5",
					"thinking_mode":         "budget_tokens",
				},
				TestMetadata: map[string]interface{}{
					"provider":          config.Provider,
					"model":             config.Opus45Model,
					"problem_type":      "mathematical",
					"complexity":        "high",
					"expects_reasoning": true,
				},
			}
			chatRetryConfig := ChatRetryConfig{
				MaxAttempts: retryConfig.MaxAttempts,
				BaseDelay:   retryConfig.BaseDelay,
				MaxDelay:    retryConfig.MaxDelay,
				Conditions:  []ChatRetryCondition{},
				OnRetry:     retryConfig.OnRetry,
				OnFinalFail: retryConfig.OnFinalFail,
			}

			// Enhanced validation for reasoning scenarios
			expectations := GetExpectationsForScenario("Reasoning", testConfig, map[string]interface{}{
				"requires_reasoning": true,
			})
			expectations = ModifyExpectationsForProvider(expectations, config.Provider)

			response, chatError := WithChatTestRetry(t, chatRetryConfig, retryContext, expectations, "Opus45_Reasoning_Chat", func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
				bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
				return client.ChatCompletionRequest(bfCtx, chatReq)
			})

			if chatError != nil {
				t.Fatalf("❌ Opus 4.5 Chat Completions API reasoning test failed after retries: %v", GetErrorMessage(chatError))
			}

			// Validate response has content
			content := GetChatContent(response)
			if content == "" {
				t.Error("Expected non-empty response content")
			} else {
				t.Logf("✅ Opus 4.5 reasoning response (first 200 chars): %s", truncateString(content, 200))
			}

			// Check for reasoning indicators
			reasoningDetected := validateChatCompletionReasoning(t, response)
			if !reasoningDetected {
				t.Logf("⚠️ No explicit reasoning indicators found in response structure")
			} else {
				t.Logf("🧠 Reasoning structure detected in response")
			}

			t.Log("🎉 Opus 4.5 Chat Completions API reasoning test passed!")
		})
	})
}

// RunOpus46ReasoningTest tests adaptive thinking with Opus 4.6 (adaptive mode + effort)
func RunOpus46ReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig) {
	if config.SkipOpus46 {
		t.Skipf("Skipping Opus 4.6 test: %s", config.SkipReason)
		return
	}

	if config.Opus46Model == "" {
		t.Skip("No Opus 4.6 model configured")
		return
	}

	t.Run("Opus46_AdaptiveThinking", func(t *testing.T) {
		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
			t.Parallel()
		}

		// Complex reasoning problem that benefits from adaptive thinking
		problemPrompt := "Analyze this logic puzzle: Five people (A, B, C, D, E) are sitting in a row. A is not at either end. B is somewhere to the left of C. D is not next to E. E is at one of the ends. In how many different valid arrangements can they sit? Show your reasoning."

		// Create a test config for retry framework
		testConfig := ComprehensiveTestConfig{
			Provider:       config.Provider,
			ReasoningModel: config.Opus46Model,
			Scenarios: TestScenarios{
				Reasoning: true,
			},
			Fallbacks: config.Fallbacks,
		}

		// Test via Responses API with different effort levels
		effortLevels := []string{"low", "medium", "high"}

		for _, effort := range effortLevels {
			effort := effort // capture range variable
			t.Run("ResponsesAPI_Effort_"+effort, func(t *testing.T) {
				if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
					t.Parallel()
				}

				responsesMessages := []schemas.ResponsesMessage{
					CreateBasicResponsesMessage(problemPrompt),
				}

				responsesReq := &schemas.BifrostResponsesRequest{
					Provider: config.Provider,
					Model:    config.Opus46Model,
					Input:    responsesMessages,
					Params: &schemas.ResponsesParameters{
						MaxOutputTokens: bifrost.Ptr(4000),
						Reasoning: &schemas.ResponsesParametersReasoning{
							Effort: bifrost.Ptr(effort), // Adaptive thinking uses effort parameter
						},
						Include: []string{"reasoning.encrypted_content"},
					},
					Fallbacks: config.Fallbacks,
				}

				// Use retry framework with enhanced validation for reasoning
				retryConfig := GetTestRetryConfigForScenario("Reasoning", testConfig)
				retryContext := TestRetryContext{
					ScenarioName: "Opus46_Reasoning_Responses_" + effort,
					ExpectedBehavior: map[string]interface{}{
						"should_show_reasoning": true,
						"logic_puzzle":          true,
						"step_by_step":          true,
						"model_version":         "opus-4.6",
						"thinking_mode":         "adaptive",
						"effort_level":          effort,
					},
					TestMetadata: map[string]interface{}{
						"provider":          config.Provider,
						"model":             config.Opus46Model,
						"problem_type":      "logic_puzzle",
						"complexity":        "high",
						"expects_reasoning": true,
						"effort":            effort,
					},
				}
				responsesRetryConfig := ResponsesRetryConfig{
					MaxAttempts: retryConfig.MaxAttempts,
					BaseDelay:   retryConfig.BaseDelay,
					MaxDelay:    retryConfig.MaxDelay,
					Conditions:  []ResponsesRetryCondition{},
					OnRetry:     retryConfig.OnRetry,
					OnFinalFail: retryConfig.OnFinalFail,
				}

				// Enhanced validation for reasoning scenarios
				expectations := GetExpectationsForScenario("Reasoning", testConfig, map[string]interface{}{
					"requires_reasoning": true,
				})
				expectations = ModifyExpectationsForProvider(expectations, config.Provider)

				response, responsesError := WithResponsesTestRetry(t, responsesRetryConfig, retryContext, expectations, "Opus46_Reasoning_Responses_"+effort, func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
					bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
					return client.ResponsesRequest(bfCtx, responsesReq)
				})

				if responsesError != nil {
					t.Fatalf("❌ Opus 4.6 Responses API (effort=%s) reasoning test failed after retries: %v", effort, GetErrorMessage(responsesError))
				}

				// Validate response has content
				content := GetResponsesContent(response)
				if content == "" {
					t.Errorf("Expected non-empty response content for effort=%s", effort)
				} else {
					t.Logf("✅ Opus 4.6 (effort=%s) response (first 200 chars): %s", effort, truncateString(content, 200))
				}

				// Check for reasoning indicators
				reasoningDetected := validateResponsesAPIReasoning(t, response)
				if !reasoningDetected {
					t.Logf("⚠️ No explicit reasoning indicators found in response structure")
				} else {
					t.Logf("🧠 Reasoning structure detected in response")
				}

				t.Logf("🎉 Opus 4.6 Responses API (effort=%s) reasoning test passed!", effort)
			})
		}

		// Test via Chat Completions API
		t.Run("ChatCompletionsAPI", func(t *testing.T) {
			if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
				t.Parallel()
			}

			chatMessages := []schemas.ChatMessage{
				CreateBasicChatMessage(problemPrompt),
			}

			chatReq := &schemas.BifrostChatRequest{
				Provider: config.Provider,
				Model:    config.Opus46Model,
				Input:    chatMessages,
				Params: &schemas.ChatParameters{
					MaxCompletionTokens: bifrost.Ptr(4000),
					Reasoning: &schemas.ChatReasoning{
						Effort: bifrost.Ptr("high"), // Opus 4.6 uses adaptive thinking with effort
						// Note: MaxTokens (budget_tokens) is NOT used for Opus 4.6
					},
				},
				Fallbacks: config.Fallbacks,
			}

			// Use retry framework with enhanced validation for reasoning
			retryConfig := GetTestRetryConfigForScenario("Reasoning", testConfig)
			retryContext := TestRetryContext{
				ScenarioName: "Opus46_Reasoning_Chat",
				ExpectedBehavior: map[string]interface{}{
					"should_show_reasoning": true,
					"logic_puzzle":          true,
					"step_by_step":          true,
					"model_version":         "opus-4.6",
					"thinking_mode":         "adaptive",
				},
				TestMetadata: map[string]interface{}{
					"provider":          config.Provider,
					"model":             config.Opus46Model,
					"problem_type":      "logic_puzzle",
					"complexity":        "high",
					"expects_reasoning": true,
				},
			}
			chatRetryConfig := ChatRetryConfig{
				MaxAttempts: retryConfig.MaxAttempts,
				BaseDelay:   retryConfig.BaseDelay,
				MaxDelay:    retryConfig.MaxDelay,
				Conditions:  []ChatRetryCondition{},
				OnRetry:     retryConfig.OnRetry,
				OnFinalFail: retryConfig.OnFinalFail,
			}

			// Enhanced validation for reasoning scenarios
			expectations := GetExpectationsForScenario("Reasoning", testConfig, map[string]interface{}{
				"requires_reasoning": true,
			})
			expectations = ModifyExpectationsForProvider(expectations, config.Provider)

			response, chatError := WithChatTestRetry(t, chatRetryConfig, retryContext, expectations, "Opus46_Reasoning_Chat", func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
				bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
				return client.ChatCompletionRequest(bfCtx, chatReq)
			})

			if chatError != nil {
				t.Fatalf("❌ Opus 4.6 Chat Completions API reasoning test failed after retries: %v", GetErrorMessage(chatError))
			}

			// Validate response has content
			content := GetChatContent(response)
			if content == "" {
				t.Error("Expected non-empty response content")
			} else {
				t.Logf("✅ Opus 4.6 reasoning response (first 200 chars): %s", truncateString(content, 200))
			}

			// Check for reasoning indicators
			reasoningDetected := validateChatCompletionReasoning(t, response)
			if !reasoningDetected {
				t.Logf("⚠️ No explicit reasoning indicators found in response structure")
			} else {
				t.Logf("🧠 Reasoning structure detected in response")
			}

			t.Log("🎉 Opus 4.6 Chat Completions API reasoning test passed!")
		})
	})
}

// RunOpus46MultiTurnReasoningTest tests multi-turn conversations with reasoning content passthrough.
// This verifies that reasoning details (text + signature) from assistant messages are correctly
// passed back to the model in follow-up turns.
func RunOpus46MultiTurnReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig) {
	if config.SkipOpus46 {
		t.Skipf("Skipping Opus 4.6 multi-turn test: %s", config.SkipReason)
		return
	}

	if config.Opus46Model == "" {
		t.Skip("No Opus 4.6 model configured")
		return
	}

	t.Run("Opus46_MultiTurnReasoning", func(t *testing.T) {
		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
			t.Parallel()
		}

		testConfig := ComprehensiveTestConfig{
			Provider:       config.Provider,
			ReasoningModel: config.Opus46Model,
			Scenarios:      TestScenarios{Reasoning: true},
			Fallbacks:      config.Fallbacks,
		}

		// Step 1: Send initial reasoning request
		initialPrompt := "What is 15 * 17? Think step by step."
		chatMessages := []schemas.ChatMessage{
			CreateBasicChatMessage(initialPrompt),
		}

		chatReq := &schemas.BifrostChatRequest{
			Provider: config.Provider,
			Model:    config.Opus46Model,
			Input:    chatMessages,
			Params: &schemas.ChatParameters{
				MaxCompletionTokens: bifrost.Ptr(4000),
				Reasoning: &schemas.ChatReasoning{
					Effort: bifrost.Ptr("low"),
				},
			},
			Fallbacks: config.Fallbacks,
		}

		retryConfig := GetTestRetryConfigForScenario("Reasoning", testConfig)
		retryContext := TestRetryContext{
			ScenarioName: "Opus46_MultiTurn_Step1",
			ExpectedBehavior: map[string]interface{}{
				"should_show_reasoning": true,
				"model_version":         "opus-4.6",
				"thinking_mode":         "adaptive",
			},
			TestMetadata: map[string]interface{}{
				"provider": config.Provider,
				"model":    config.Opus46Model,
				"step":     "initial",
			},
		}
		chatRetryConfig := ChatRetryConfig{
			MaxAttempts: retryConfig.MaxAttempts,
			BaseDelay:   retryConfig.BaseDelay,
			MaxDelay:    retryConfig.MaxDelay,
			Conditions:  []ChatRetryCondition{},
			OnRetry:     retryConfig.OnRetry,
			OnFinalFail: retryConfig.OnFinalFail,
		}
		expectations := GetExpectationsForScenario("Reasoning", testConfig, map[string]interface{}{
			"requires_reasoning": true,
		})
		expectations = ModifyExpectationsForProvider(expectations, config.Provider)

		firstResponse, chatError := WithChatTestRetry(t, chatRetryConfig, retryContext, expectations, "Opus46_MultiTurn_Step1", func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
			bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
			return client.ChatCompletionRequest(bfCtx, chatReq)
		})

		if chatError != nil {
			t.Fatalf("Step 1 failed: %v", GetErrorMessage(chatError))
		}

		firstContent := GetChatContent(firstResponse)
		if firstContent == "" {
			t.Fatal("Step 1: Expected non-empty response content")
		}
		t.Logf("Step 1 response (first 200 chars): %s", truncateString(firstContent, 200))

		// Extract reasoning details from first response
		var reasoningDetails []schemas.ChatReasoningDetails
		if len(firstResponse.Choices) > 0 {
			choice := firstResponse.Choices[0]
			if choice.ChatNonStreamResponseChoice != nil &&
				choice.ChatNonStreamResponseChoice.Message != nil &&
				choice.ChatNonStreamResponseChoice.Message.ChatAssistantMessage != nil {
				reasoningDetails = choice.ChatNonStreamResponseChoice.Message.ChatAssistantMessage.ReasoningDetails
			}
		}

		t.Logf("Step 1: Found %d reasoning detail entries", len(reasoningDetails))

		// Step 2: Build multi-turn conversation with reasoning details passed back
		multiTurnMessages := []schemas.ChatMessage{
			CreateBasicChatMessage(initialPrompt),
			{
				Role: schemas.ChatMessageRoleAssistant,
				Content: &schemas.ChatMessageContent{
					ContentStr: &firstContent,
				},
				ChatAssistantMessage: &schemas.ChatAssistantMessage{
					ReasoningDetails: reasoningDetails,
				},
			},
			CreateBasicChatMessage("Now multiply that result by 2."),
		}

		multiTurnReq := &schemas.BifrostChatRequest{
			Provider: config.Provider,
			Model:    config.Opus46Model,
			Input:    multiTurnMessages,
			Params: &schemas.ChatParameters{
				MaxCompletionTokens: bifrost.Ptr(4000),
				Reasoning: &schemas.ChatReasoning{
					Effort: bifrost.Ptr("low"),
				},
			},
			Fallbacks: config.Fallbacks,
		}

		retryContext2 := TestRetryContext{
			ScenarioName: "Opus46_MultiTurn_Step2",
			ExpectedBehavior: map[string]interface{}{
				"multi_turn":    true,
				"model_version": "opus-4.6",
				"thinking_mode": "adaptive",
			},
			TestMetadata: map[string]interface{}{
				"provider": config.Provider,
				"model":    config.Opus46Model,
				"step":     "follow_up",
			},
		}

		secondResponse, chatError2 := WithChatTestRetry(t, chatRetryConfig, retryContext2, expectations, "Opus46_MultiTurn_Step2", func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
			bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
			return client.ChatCompletionRequest(bfCtx, multiTurnReq)
		})

		if chatError2 != nil {
			t.Fatalf("Step 2 (multi-turn with reasoning passthrough) failed: %v", GetErrorMessage(chatError2))
		}

		secondContent := GetChatContent(secondResponse)
		if secondContent == "" {
			t.Error("Step 2: Expected non-empty response content")
		} else {
			t.Logf("Step 2 response (first 200 chars): %s", truncateString(secondContent, 200))
		}

		t.Log("Multi-turn reasoning passthrough test passed!")
	})
}

// RunAllOpusReasoningTests runs Opus 4.5 and 4.6 reasoning tests for a given provider
func RunAllOpusReasoningTests(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig) {
	t.Run(string(config.Provider)+"_OpusReasoning", func(t *testing.T) {
		t.Run("Opus45", func(t *testing.T) {
			RunOpus45ReasoningTest(t, client, ctx, config)
		})
		t.Run("Opus46", func(t *testing.T) {
			RunOpus46ReasoningTest(t, client, ctx, config)
		})
		t.Run("Opus46_MultiTurn", func(t *testing.T) {
			RunOpus46MultiTurnReasoningTest(t, client, ctx, config)
		})
	})
}