bifrost/core/internal/mcptests/agent_basic_test.go

package mcptests

import (
	"testing"
	"time"

	"github.com/maximhq/bifrost/core/schemas"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

// =============================================================================
// MOCK LLM FOR AGENT TESTS
// =============================================================================

// MockLLMCaller provides controlled LLM responses for testing agent mode
type MockLLMCaller struct {
	chatResponses      []*schemas.BifrostChatResponse
	responsesResponses []*schemas.BifrostResponsesResponse
	chatCallCount      int
	responsesCallCount int
}

func (m *MockLLMCaller) MakeChatRequest(ctx *schemas.BifrostContext, req *schemas.BifrostChatRequest) (*schemas.BifrostChatResponse, *schemas.BifrostError) {
	if m.chatCallCount >= len(m.chatResponses) {
		return nil, &schemas.BifrostError{
			IsBifrostError: false,
			Error: &schemas.ErrorField{
				Message: "no more mock chat responses available",
			},
		}
	}

	response := m.chatResponses[m.chatCallCount]
	m.chatCallCount++
	return response, nil
}

func (m *MockLLMCaller) MakeResponsesRequest(ctx *schemas.BifrostContext, req *schemas.BifrostResponsesRequest) (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
	if m.responsesCallCount >= len(m.responsesResponses) {
		return nil, &schemas.BifrostError{
			IsBifrostError: false,
			Error: &schemas.ErrorField{
				Message: "no more mock responses api responses available",
			},
		}
	}

	response := m.responsesResponses[m.responsesCallCount]
	m.responsesCallCount++
	return response, nil
}

// =============================================================================
// BASIC AGENT LOOP TESTS
// =============================================================================

func TestAgent_BasicLoop(t *testing.T) {
	t.Parallel()

	// Use InProcess tools - no external server needed
	manager := setupMCPManager(t)
	err := RegisterEchoTool(manager)
	require.NoError(t, err, "should register echo tool")

	// Setup MCP manager with auto-executable tools
	err = SetInternalClientAutoExecute(manager, []string{"echo"})
	require.NoError(t, err, "should set auto-execute for internal client")

	ctx := createTestContext()

	// Setup mock LLM with 2 responses:
	// 1. First response: LLM wants to call echo tool
	// 2. Second response: LLM finishes with text
	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			// First call: return tool call
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleEchoToolCall("call-1", "Hello from agent"),
			}),
			// Second call: return final text
			CreateChatResponseWithText("The echo tool returned your message successfully"),
		},
	}

	// Initial LLM response with tool call
	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1 // Start from second response for subsequent calls

	// Create mock request
	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Please echo hello"),
				},
			},
		},
	}

	// Execute agent mode
	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			// Use real tool execution
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, bifrostErr, "agent loop should complete successfully")
	require.NotNil(t, result)

	t.Logf("Agent completed with %d LLM calls total", mockLLM.chatCallCount)

	// Verify final response
	assert.NotEmpty(t, result.Choices)
	assert.Equal(t, "stop", *result.Choices[0].FinishReason, "should finish with stop reason")

	// Verify the agent executed at least one tool
	assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "should have made follow-up LLM call")
}

func TestAgent_BasicLoop_ChatFormat(t *testing.T) {
	t.Parallel()

	// Use InProcess tools - no external server needed
	manager := setupMCPManager(t)
	err := RegisterCalculatorTool(manager)
	require.NoError(t, err, "should register calculator tool")

	// Set auto-execute for calculator
	err = SetInternalClientAutoExecute(manager, []string{"calculator"})
	require.NoError(t, err, "should set auto-execute for internal client")

	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleCalculatorToolCall("call-1", "add", 5, 3),
			}),
			CreateChatResponseWithText("The result is 8"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Calculate 5+3"),
				},
			},
		},
	}

	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, bifrostErr)
	require.NotNil(t, result)
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	content := result.Choices[0].ChatNonStreamResponseChoice.Message.Content
	assert.NotNil(t, content)
	t.Logf("Final response: %s", *content.ContentStr)
}

func TestAgent_BasicLoop_ResponsesFormat(t *testing.T) {
	t.Parallel()

	// Use InProcess tools - no external server needed
	manager := setupMCPManager(t)
	err := RegisterEchoTool(manager)
	require.NoError(t, err, "should register echo tool")

	// Set auto-execute for echo
	err = SetInternalClientAutoExecute(manager, []string{"echo"})
	require.NoError(t, err, "should set auto-execute for internal client")

	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		responsesResponses: []*schemas.BifrostResponsesResponse{
			CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{
				{
					CallID:    schemas.Ptr("call-1"),
					Name: schemas.Ptr("bifrostInternal-echo"),
					Arguments: schemas.Ptr(`{"message": "testing responses format"}`),
				},
			}),
			CreateResponsesResponseWithText("Successfully echoed your message"),
		},
	}

	initialResponse := mockLLM.responsesResponses[0]
	mockLLM.responsesCallCount = 1

	originalReq := &schemas.BifrostResponsesRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ResponsesMessage{
			{
				Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
				Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser),
				Content: &schemas.ResponsesMessageContent{
					ContentStr: schemas.Ptr("Echo a message"),
				},
			},
		},
	}

	result, bifrostErr := manager.CheckAndExecuteAgentForResponsesRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeResponsesRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, bifrostErr)
	require.NotNil(t, result)
	assert.NotEmpty(t, result.Output)
	t.Logf("Agent completed with %d messages in output", len(result.Output))
}

// =============================================================================
// AGENT ITERATIONS TESTS
// =============================================================================

func TestAgent_SingleIteration(t *testing.T) {
	t.Parallel()

	// Use InProcess tools - no external server needed
	manager := setupMCPManager(t)
	err := RegisterEchoTool(manager)
	require.NoError(t, err, "should register echo tool")

	// Set auto-execute for all tools
	err = SetInternalClientAutoExecute(manager, []string{"*"})
	require.NoError(t, err, "should set auto-execute for internal client")

	ctx := createTestContext()

	// LLM returns one tool call, then stops
	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleEchoToolCall("call-1", "single iteration test"),
			}),
			// Immediately finish after tool execution
			CreateChatResponseWithText("Done after one tool call"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Test"),
				},
			},
		},
	}

	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, bifrostErr)
	require.NotNil(t, result)

	// Should have made exactly 2 LLM calls total (initial + 1 follow-up after tool execution)
	assert.Equal(t, 2, mockLLM.chatCallCount, "should have exactly one iteration")
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	// Verify no more tool calls in final response
	finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
	if finalMessage.ChatAssistantMessage != nil {
		assert.Empty(t, finalMessage.ChatAssistantMessage.ToolCalls, "final response should have no tool calls")
	}
}

func TestAgent_MultipleIterations(t *testing.T) {
	t.Parallel()

	// Use InProcess tools - no external server needed
	manager := setupMCPManager(t)
	err := RegisterEchoTool(manager)
	require.NoError(t, err, "should register echo tool")
	err = RegisterCalculatorTool(manager)
	require.NoError(t, err, "should register calculator tool")
	err = RegisterWeatherTool(manager)
	require.NoError(t, err, "should register weather tool")

	// Set auto-execute for all tools
	err = SetInternalClientAutoExecute(manager, []string{"*"})
	require.NoError(t, err, "should set auto-execute for internal client")

	ctx := createTestContext()

	// LLM returns tool calls for 3 iterations, then stops
	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			// Iteration 1: echo tool
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleEchoToolCall("call-1", "iteration 1"),
			}),
			// Iteration 2: calculator tool
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleCalculatorToolCall("call-2", "add", 10, 20),
			}),
			// Iteration 3: weather tool
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleWeatherToolCall("call-3", "New York", ""),
			}),
			// Final: stop
			CreateChatResponseWithText("Completed all 3 tool calls"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Multi-step task"),
				},
			},
		},
	}

	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, bifrostErr)
	require.NotNil(t, result)

	// Should have made 4 LLM calls total (initial + 3 follow-ups for each tool execution)
	assert.Equal(t, 4, mockLLM.chatCallCount, "should have 4 calls total (3 iterations + final)")
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	t.Logf("Completed agent loop with 3 iterations")
}

func TestAgent_NoToolCalls(t *testing.T) {
	t.Parallel()

	// Use InProcess tools (even though we won't call them)
	manager := setupMCPManager(t)
	err := RegisterEchoTool(manager)
	require.NoError(t, err, "should register echo tool")

	// Set auto-execute for all tools
	err = SetInternalClientAutoExecute(manager, []string{"*"})
	require.NoError(t, err, "should set auto-execute for internal client")

	ctx := createTestContext()

	// LLM returns response with no tool calls (immediate stop)
	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithText("I don't need to use any tools for this"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 0 // No calls should be made

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Simple question"),
				},
			},
		},
	}

	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, bifrostErr)
	require.NotNil(t, result)

	// Should have made NO additional LLM calls
	assert.Equal(t, 0, mockLLM.chatCallCount, "should not make any LLM calls when no tool calls")
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	// Should return the original response unchanged
	assert.Equal(t, initialResponse, result, "should return original response when no tool calls")
}

// =============================================================================
// MIXED AUTO AND NON-AUTO TOOLS TESTS
// =============================================================================

func TestAgent_MixedAutoAndNonAutoTools(t *testing.T) {
	t.Parallel()

	// Use InProcess tools - no external server needed
	manager := setupMCPManager(t)
	err := RegisterEchoTool(manager)
	require.NoError(t, err, "should register echo tool")
	err = RegisterCalculatorTool(manager)
	require.NoError(t, err, "should register calculator tool")

	// Configure only "echo" as auto-executable, other tools require approval
	err = SetInternalClientAutoExecute(manager, []string{"echo"})
	require.NoError(t, err, "should set auto-execute for internal client")

	ctx := createTestContext()

	// LLM returns both auto and non-auto tools
	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleEchoToolCall("call-1", "auto tool"),
				GetSampleCalculatorToolCall("call-2", "add", 5, 3), // Not auto-executable
			}),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 0 // Should not make additional calls

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Test mixed tools"),
				},
			},
		},
	}

	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, bifrostErr)
	require.NotNil(t, result)

	// Agent should stop and return non-auto tools
	assert.Equal(t, 0, mockLLM.chatCallCount, "should not make additional calls when non-auto tool present")
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	// Verify response contains the non-auto tool (calculator) for user approval
	finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
	require.NotNil(t, finalMessage.ChatAssistantMessage)
	require.NotEmpty(t, finalMessage.ChatAssistantMessage.ToolCalls)

	// Should have the calculator tool call (non-auto)
	found := false
	for _, tc := range finalMessage.ChatAssistantMessage.ToolCalls {
		if *tc.Function.Name == "bifrostInternal-calculator" {
			found = true
			break
		}
	}
	assert.True(t, found, "response should contain the non-auto-executable calculator tool")

	// Response should also include results of auto-executed tools in content
	assert.NotNil(t, finalMessage.Content)
	t.Logf("Response content: %s", *finalMessage.Content.ContentStr)
}

func TestAgent_OnlyAutoTools(t *testing.T) {
	t.Parallel()

	// Use InProcess tools - no external server needed
	manager := setupMCPManager(t)
	err := RegisterEchoTool(manager)
	require.NoError(t, err, "should register echo tool")

	// All tools auto-executable
	err = SetInternalClientAutoExecute(manager, []string{"*"})
	require.NoError(t, err, "should set auto-execute for internal client")

	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			// Multiple auto-executable tools
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleEchoToolCall("call-1", "first"),
				GetSampleEchoToolCall("call-2", "second"),
			}),
			// Continue loop
			CreateChatResponseWithText("All tools executed"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Test only auto tools"),
				},
			},
		},
	}

	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, bifrostErr)
	require.NotNil(t, result)

	// Agent should execute all tools and continue loop
	assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "should make follow-up LLM call")
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	// Final response should have no pending tool calls
	finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
	if finalMessage.ChatAssistantMessage != nil {
		assert.Empty(t, finalMessage.ChatAssistantMessage.ToolCalls, "no pending tool calls")
	}
}

func TestAgent_OnlyNonAutoTools(t *testing.T) {
	t.Parallel()

	// Use InProcess tools - no external server needed
	manager := setupMCPManager(t)
	err := RegisterEchoTool(manager)
	require.NoError(t, err, "should register echo tool")

	// No tools are auto-executable
	err = SetInternalClientAutoExecute(manager, []string{}) // No auto-executable tools
	require.NoError(t, err, "should set auto-execute for internal client")

	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleEchoToolCall("call-1", "needs approval"),
			}),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 0 // Should not make additional calls

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Test non-auto tools"),
				},
			},
		},
	}

	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, bifrostErr)
	require.NotNil(t, result)

	// Agent should stop immediately and return tools to user
	assert.Equal(t, 0, mockLLM.chatCallCount, "should not make any LLM calls")
	assert.Equal(t, "tool_calls", *result.Choices[0].FinishReason, "should return with tool_calls since tools need approval")

	// Verify response contains the non-auto tools
	finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
	require.NotNil(t, finalMessage.ChatAssistantMessage)
	require.NotEmpty(t, finalMessage.ChatAssistantMessage.ToolCalls)
	assert.Equal(t, "bifrostInternal-echo", *finalMessage.ChatAssistantMessage.ToolCalls[0].Function.Name)
}

// =============================================================================
// AGENT WITH REAL LLM TESTS
// =============================================================================

func TestAgent_WithRealLLM_Simple(t *testing.T) {
	t.Parallel()

	testConfig := GetTestConfig(t)
	if !testConfig.UseRealLLM {
		t.Skip("Real LLM not configured")
	}

	// Setup MCP with auto-executable calculator tool using InProcess
	manager := setupMCPManager(t)
	err := RegisterCalculatorTool(manager)
	require.NoError(t, err, "should register calculator tool")

	err = SetInternalClientAutoExecute(manager, []string{"calculator"})
	require.NoError(t, err, "should set auto-execute for internal client")

	// Setup bifrost with real LLM
	bifrost := setupBifrost(t)
	bifrost.SetMCPManager(manager)

	// Create context with timeout for real API call
	ctx, cancel := createTestContextWithTimeout(30 * time.Second)
	defer cancel()

	// Ask LLM to use calculator tool
	req := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4o",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Calculate 25 + 17 using the calculator tool"),
				},
			},
		},
	}

	// Make request - agent mode will activate if LLM returns tool calls
	result, bifrostErr := bifrost.ChatCompletionRequest(ctx, req)
	if bifrostErr != nil {
		// Skip if there's an API error (likely missing/invalid API key or config issue)
		t.Skipf("Skipping real LLM test due to API error: %v", bifrostErr.Error)
	}
	require.NotNil(t, result)

	// Verify we got a response
	assert.NotEmpty(t, result.Choices)
	t.Logf("Real LLM agent response: %s", *result.Choices[0].ChatNonStreamResponseChoice.Message.Content.ContentStr)

	// Check if response mentions the result (42)
	responseText := *result.Choices[0].ChatNonStreamResponseChoice.Message.Content.ContentStr
	// Don't assert exact match due to LLM variability, just log
	t.Logf("Response contains calculation result")
	_ = responseText
}

func TestAgent_WithRealLLM_MultiStep(t *testing.T) {
	t.Parallel()

	testConfig := GetTestConfig(t)
	if !testConfig.UseRealLLM {
		t.Skip("Real LLM not configured")
	}

	// Setup MCP with auto-executable tools using InProcess
	manager := setupMCPManager(t)
	err := RegisterCalculatorTool(manager)
	require.NoError(t, err, "should register calculator tool")
	err = RegisterEchoTool(manager)
	require.NoError(t, err, "should register echo tool")

	err = SetInternalClientAutoExecute(manager, []string{"calculator", "echo"})
	require.NoError(t, err, "should set auto-execute for internal client")

	bifrost := setupBifrost(t)
	bifrost.SetMCPManager(manager)

	// Create context with timeout for real API call
	ctx, cancel := createTestContextWithTimeout(30 * time.Second)
	defer cancel()

	// Ask LLM to perform multi-step task
	req := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4o",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("First calculate 10 + 5, then echo the result"),
				},
			},
		},
	}

	result, bifrostErr := bifrost.ChatCompletionRequest(ctx, req)
	if bifrostErr != nil {
		// Skip if there's an API error (likely missing/invalid API key or config issue)
		t.Skipf("Skipping real LLM test due to API error: %v", bifrostErr.Error)
	}
	require.NotNil(t, result)

	assert.NotEmpty(t, result.Choices)
	t.Logf("Multi-step agent response: %s", *result.Choices[0].ChatNonStreamResponseChoice.Message.Content.ContentStr)

	// Response should mention both operations
	// Due to LLM variability, we just log the result
	t.Logf("Multi-step task completed")
}