bifrost/core/internal/mcptests/codemode_agent_test.go

package mcptests

import (
	"fmt"
	"testing"
	"time"

	"github.com/maximhq/bifrost/core/schemas"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

// =============================================================================
// CODE MODE + AGENT BASIC TESTS
// =============================================================================

func TestCodeModeAgent_Basic(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}

	// Setup code mode client with agent enabled + HTTP client with tools
	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
	httpClient.ID = "mcpserver"
	httpClient.ToolsToExecute = []string{"*"}
	httpClient.ToolsToAutoExecute = []string{"echo"}

	manager := setupMCPManager(t, codeModeClient, httpClient)
	ctx := createTestContext()

	// Mock LLM with 2 responses:
	// 1. First response: executeToolCode that calls echo
	// 2. Second response: Final text
	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "result = mcpserver.echo(message='test')"),
			}),
			CreateChatResponseWithText("Execution complete"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1 // Start from second response

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Test code mode agent"),
				},
			},
		},
	}

	// Execute agent mode
	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err, "agent loop should complete successfully")
	require.NotNil(t, result)

	// Verify final response
	assert.NotEmpty(t, result.Choices)
	assert.Equal(t, "stop", *result.Choices[0].FinishReason, "should finish with stop reason")

	// Verify the agent executed code and made follow-up LLM call
	assert.Equal(t, 2, mockLLM.chatCallCount, "should have made 2 total LLM calls (initial + follow-up)")

	t.Logf("Agent completed with %d LLM calls total", mockLLM.chatCallCount)
}

func TestCodeModeAgent_NonAutoToolInCode(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	// Code returns result, then LLM returns non-auto tool
	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
	httpClient.ID = "mcpserver"
	httpClient.ToolsToExecute = []string{"*"}
	httpClient.ToolsToAutoExecute = []string{} // No auto tools (except code mode)

	manager := setupMCPManager(t, codeModeClient, httpClient)
	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "result = 'code result'"),
			}),
			// After code execution, LLM returns non-auto tool
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleEchoToolCall("call-2", "needs approval"),
			}),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Test non-auto tool"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Agent should stop when it encounters non-auto tool
	assert.Equal(t, 2, mockLLM.chatCallCount, "should make 2 total LLM calls (initial + follow-up before stopping)")
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	// Verify response contains the non-auto tool (awaiting approval)
	finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
	require.NotNil(t, finalMessage.ChatAssistantMessage)
	require.NotEmpty(t, finalMessage.ChatAssistantMessage.ToolCalls)
	// Tool name could be either "echo" or with prefix like "bifrostInternal-echo"
	toolName := *finalMessage.ChatAssistantMessage.ToolCalls[0].Function.Name
	assert.True(t, toolName == "echo" || toolName == "bifrostInternal-echo",
		fmt.Sprintf("expected echo tool, got %s", toolName))

	t.Logf("Agent correctly stopped at non-auto tool")
}

func TestCodeModeAgent_AutoToolInCode(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	// Code calls tool, agent continues loop
	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
	httpClient.ID = "mcpserver"
	httpClient.ToolsToExecute = []string{"*"}
	httpClient.ToolsToAutoExecute = []string{"echo"}

	manager := setupMCPManager(t, codeModeClient, httpClient)
	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "mcpserver.echo(message='test')\nresult = 'done'"),
			}),
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-2", "result = 'second iteration'"),
			}),
			CreateChatResponseWithText("All done"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Multi-iteration test"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Agent should execute both code iterations and then finish
	assert.Equal(t, 3, mockLLM.chatCallCount, "should have 3 total LLM calls (initial + 2 follow-ups)")
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	t.Logf("Agent completed 2 iterations successfully")
}

func TestCodeModeAgent_MixedToolsInCode(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	// After code execution, LLM returns mixed auto/non-auto tools
	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
	httpClient.ID = "mcpserver"
	httpClient.ToolsToExecute = []string{"*"}
	httpClient.ToolsToAutoExecute = []string{"echo"} // Only echo is auto

	manager := setupMCPManager(t, codeModeClient, httpClient)
	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "result = 'step 1'"),
			}),
			// After code, LLM returns mixed tools
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				GetSampleEchoToolCall("call-2", "auto"),
				GetSampleCalculatorToolCall("call-3", "add", 5, 3), // Non-auto
			}),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Mixed tools test"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Agent should execute echo, then stop at calculator
	assert.Equal(t, 2, mockLLM.chatCallCount, "should make 2 total LLM calls (initial + follow-up)")
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	// Verify response contains the non-auto calculator tool
	finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
	require.NotNil(t, finalMessage.ChatAssistantMessage)
	require.NotEmpty(t, finalMessage.ChatAssistantMessage.ToolCalls)

	// Should have calculator tool call (non-auto)
	found := false
	for _, tc := range finalMessage.ChatAssistantMessage.ToolCalls {
		toolName := *tc.Function.Name
		if toolName == "calculator" || toolName == "bifrostInternal-calculator" {
			found = true
			break
		}
	}
	assert.True(t, found, "response should contain the non-auto-executable calculator tool")

	// Response should also include results of auto-executed tools in content
	assert.NotNil(t, finalMessage.Content)
	t.Logf("Mixed tools handled correctly")
}

func TestCodeModeAgent_NoToolCallsInCode(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	// Code mode call is final step (no follow-up)
	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	manager := setupMCPManager(t, codeModeClient)
	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "result = 'final result'"),
			}),
			CreateChatResponseWithText("Done, no more tools"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Simple code test"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Agent should execute code, then finish
	assert.Equal(t, 2, mockLLM.chatCallCount, "should make 2 total LLM calls (initial + follow-up)")
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	t.Logf("Code execution with no follow-up tools completed")
}

// =============================================================================
// FILTERING IN CODE MODE AGENT
// =============================================================================

func TestCodeModeAgent_FilteringInCode(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	// ToolsToExecute filtering applies to tools called from code
	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
	httpClient.ID = "mcpserver"
	httpClient.ToolsToExecute = []string{"echo"} // Only echo allowed, calculator blocked
	httpClient.ToolsToAutoExecute = []string{}

	manager := setupMCPManager(t, codeModeClient, httpClient)
	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "result = mcpserver.calculator(operation='add', x=5, y=3)"),
			}),
			// Agent makes follow-up call with tool execution error
			CreateChatResponseWithText("Tool was blocked by filtering"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 0

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Test filtering"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Code should execute but calculator call should fail
	// The agent should make a follow-up call with the error
	assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle tool filtering")

	t.Logf("Filtering in code mode validated")
}

func TestCodeModeAgent_AutoExecuteFiltering(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	// ToolsToAutoExecute doesn't apply to tools called from within code
	// Tools called from code only need to be in ToolsToExecute
	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
	httpClient.ID = "mcpserver"
	httpClient.ToolsToExecute = []string{"*"}       // All tools can execute
	httpClient.ToolsToAutoExecute = []string{}      // No auto tools (agent-level)

	manager := setupMCPManager(t, codeModeClient, httpClient)
	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "result = mcpserver.echo(message='test')"),
			}),
			CreateChatResponseWithText("Complete"),
			CreateChatResponseWithText("Error handled"), // For code execution error follow-up
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Test auto-execute filtering"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Code should execute (executeToolCode is auto)
	// Echo should be called from code (ToolsToExecute allows it)
	// But mcpserver is not bound in code, so it will fail
	// Agent will make follow-up call with error
	// Auto-execute filtering only applies to agent-level tool calls
	assert.Equal(t, 2, mockLLM.chatCallCount, "should make follow-up call for error handling")
	assert.Equal(t, "stop", *result.Choices[0].FinishReason)

	t.Logf("Auto-execute filtering correctly applies only to agent-level calls")
}

// =============================================================================
// MAX DEPTH IN CODE MODE
// =============================================================================

func TestCodeModeAgent_MaxDepth(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	// Max depth applies to code mode iterations
	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
	httpClient.ID = "mcpserver"
	httpClient.ToolsToExecute = []string{"*"}
	httpClient.ToolsToAutoExecute = []string{"echo"}

	manager := setupMCPManager(t, codeModeClient, httpClient)
	manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
		MaxAgentDepth:        3,
		ToolExecutionTimeout: 30 * time.Second,
	})

	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "mcpserver.echo(message='iter 1')\nresult = 'done1'"),
			}),
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-2", "mcpserver.echo(message='iter 2')\nresult = 'done2'"),
			}),
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-3", "mcpserver.echo(message='iter 3')\nresult = 'done3'"),
			}),
			CreateChatResponseWithText("Should not reach - max depth hit"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Max depth test"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Max depth should be enforced
	// Initial call + up to 3 iterations = max 4 LLM calls
	assert.LessOrEqual(t, mockLLM.chatCallCount, 4, "max depth 3 should limit iterations (initial + 3 iterations)")
	t.Logf("Agent stopped at depth limit with %d calls", mockLLM.chatCallCount)
}

func TestCodeModeAgent_MaxDepth_ChatFormat(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
	httpClient.ID = "server"
	httpClient.ToolsToExecute = []string{"*"}
	httpClient.ToolsToAutoExecute = []string{"*"}

	manager := setupMCPManager(t, codeModeClient, httpClient)
	manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
		MaxAgentDepth:        2,
		ToolExecutionTimeout: 30 * time.Second,
	})

	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "result = 'test1'"),
			}),
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-2", "result = 'test2'"),
			}),
			CreateChatResponseWithText("Done"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 1

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Chat format max depth"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)
	// Initial call + up to 2 iterations = max 3 LLM calls
	assert.LessOrEqual(t, mockLLM.chatCallCount, 3, "max depth 2 in Chat format (initial + 2 iterations)")

	// Verify Chat response structure is maintained
	assert.NotEmpty(t, result.Choices)
	assert.NotNil(t, result.Choices[0].FinishReason)
	t.Logf("Chat format max depth enforced")
}

func TestCodeModeAgent_MaxDepth_ResponsesFormat(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
	httpClient.ID = "server"
	httpClient.ToolsToExecute = []string{"*"}
	httpClient.ToolsToAutoExecute = []string{"*"}

	manager := setupMCPManager(t, codeModeClient, httpClient)
	manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
		MaxAgentDepth:        2,
		ToolExecutionTimeout: 30 * time.Second,
	})

	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		responsesResponses: []*schemas.BifrostResponsesResponse{
			CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{
				CreateExecuteToolCodeCallResponses("call-1", "return 'test1';"),
			}),
			CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{
				CreateExecuteToolCodeCallResponses("call-2", "return 'test2';"),
			}),
			CreateResponsesResponseWithText("Done"),
		},
	}

	initialResponse := mockLLM.responsesResponses[0]
	mockLLM.responsesCallCount = 1

	originalReq := &schemas.BifrostResponsesRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ResponsesMessage{
			{
				Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
				Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser),
				Content: &schemas.ResponsesMessageContent{
					ContentStr: schemas.Ptr("Responses format max depth"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForResponsesRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeResponsesRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)
	// Initial call + up to 2 iterations = max 3 LLM calls
	assert.LessOrEqual(t, mockLLM.responsesCallCount, 3, "max depth 2 in Responses format (initial + 2 iterations)")

	// Verify Responses format structure is maintained
	assert.NotEmpty(t, result.Output)
	t.Logf("Responses format max depth enforced")
}

// =============================================================================
// TIMEOUT IN CODE MODE
// =============================================================================

func TestCodeModeAgent_Timeout(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	manager := setupMCPManager(t, codeModeClient)
	manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
		MaxAgentDepth:        10,
		ToolExecutionTimeout: 2 * time.Second, // Short timeout
	})

	ctx := createTestContext()

	// Code that will timeout (infinite loop simulation)
	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "def loop():\n    while True:\n        pass\n    return 'timeout'\nresult = loop()"),
			}),
			// Agent makes follow-up call with timeout error
			CreateChatResponseWithText("Code execution timed out"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 0

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Timeout test"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Agent should handle timeout gracefully
	assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle timeout gracefully")

	t.Logf("Timeout handled gracefully")
}

func TestCodeModeAgent_Timeout_ChatFormat(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	manager := setupMCPManager(t, codeModeClient)
	manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
		MaxAgentDepth:        10,
		ToolExecutionTimeout: 1 * time.Second,
	})

	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "def loop():\n    while True:\n        pass\n    return 'timeout'\nresult = loop()"),
			}),
			// Agent makes follow-up call with timeout error
			CreateChatResponseWithText("Code execution timed out"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 0

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Chat timeout test"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Verify Chat response structure with error
	assert.NotEmpty(t, result.Choices)
	t.Logf("Chat format timeout handled")
}

func TestCodeModeAgent_Timeout_ResponsesFormat(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	manager := setupMCPManager(t, codeModeClient)
	manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
		MaxAgentDepth:        10,
		ToolExecutionTimeout: 1 * time.Second,
	})

	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		responsesResponses: []*schemas.BifrostResponsesResponse{
			CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{
				CreateExecuteToolCodeCallResponses("call-1", "while(true) {}; return 'timeout';"),
			}),
			// Agent makes follow-up call with timeout error
			CreateResponsesResponseWithText("Code execution timed out"),
		},
	}

	initialResponse := mockLLM.responsesResponses[0]
	mockLLM.responsesCallCount = 0

	originalReq := &schemas.BifrostResponsesRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ResponsesMessage{
			{
				Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
				Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser),
				Content: &schemas.ResponsesMessageContent{
					ContentStr: schemas.Ptr("Responses timeout test"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForResponsesRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeResponsesRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Verify Responses format structure with error
	assert.NotEmpty(t, result.Output)
	t.Logf("Responses format timeout handled")
}

// =============================================================================
// ERROR HANDLING IN CODE MODE AGENT
// =============================================================================

func TestCodeModeAgent_ErrorInCode(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	// Runtime errors in code are handled gracefully
	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	manager := setupMCPManager(t, codeModeClient)
	ctx := createTestContext()

	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "fail('intentional error')"),
			}),
			// Agent makes follow-up call with error
			CreateChatResponseWithText("Error occurred during execution"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 0

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Error test"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Agent should handle error gracefully and may make a follow-up call to summarize
	assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle error gracefully")

	t.Logf("Error in code handled gracefully")
}

func TestCodeModeAgent_ToolErrorInCode(t *testing.T) {
	t.Parallel()

	config := GetTestConfig(t)
	if config.HTTPServerURL == "" {
		t.Skip("MCP_HTTP_URL not set")
	}


	// Tool errors from code are propagated
	codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
	httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
	httpClient.ID = "mcpserver"
	httpClient.ToolsToExecute = []string{"*"}
	httpClient.ToolsToAutoExecute = []string{"*"}

	manager := setupMCPManager(t, codeModeClient, httpClient)
	ctx := createTestContext()

	// Call calculator with invalid arguments to trigger tool error
	mockLLM := &MockLLMCaller{
		chatResponses: []*schemas.BifrostChatResponse{
			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
				CreateExecuteToolCodeCall("call-1", "mcpserver.calculator(operation='invalid', x=1, y=2)\nresult = 'done'"),
			}),
			// Agent makes follow-up call with tool error
			CreateChatResponseWithText("Tool error occurred"),
		},
	}

	initialResponse := mockLLM.chatResponses[0]
	mockLLM.chatCallCount = 0

	originalReq := &schemas.BifrostChatRequest{
		Provider: schemas.OpenAI,
		Model:    "gpt-4",
		Input: []schemas.ChatMessage{
			{
				Role: schemas.ChatMessageRoleUser,
				Content: &schemas.ChatMessageContent{
					ContentStr: schemas.Ptr("Tool error test"),
				},
			},
		},
	}

	result, err := manager.CheckAndExecuteAgentForChatRequest(
		ctx,
		originalReq,
		initialResponse,
		mockLLM.MakeChatRequest,
		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
			return manager.ExecuteToolCall(ctx, request)
		},
	)

	require.Nil(t, err)
	require.NotNil(t, result)

	// Agent should handle tool error appropriately and may make a follow-up call
	assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle tool error gracefully")

	t.Logf("Tool error from code handled")
}