package mcptests import ( "fmt" "testing" "time" "github.com/maximhq/bifrost/core/schemas" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) // ============================================================================= // CODE MODE + AGENT BASIC TESTS // ============================================================================= func TestCodeModeAgent_Basic(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } // Setup code mode client with agent enabled + HTTP client with tools codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL) httpClient.ID = "mcpserver" httpClient.ToolsToExecute = []string{"*"} httpClient.ToolsToAutoExecute = []string{"echo"} manager := setupMCPManager(t, codeModeClient, httpClient) ctx := createTestContext() // Mock LLM with 2 responses: // 1. First response: executeToolCode that calls echo // 2. Second response: Final text mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "result = mcpserver.echo(message='test')"), }), CreateChatResponseWithText("Execution complete"), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 1 // Start from second response originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Test code mode agent"), }, }, }, } // Execute agent mode result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err, "agent loop should complete successfully") require.NotNil(t, result) // Verify final response assert.NotEmpty(t, result.Choices) assert.Equal(t, "stop", *result.Choices[0].FinishReason, "should finish with stop reason") // Verify the agent executed code and made follow-up LLM call assert.Equal(t, 2, mockLLM.chatCallCount, "should have made 2 total LLM calls (initial + follow-up)") t.Logf("Agent completed with %d LLM calls total", mockLLM.chatCallCount) } func TestCodeModeAgent_NonAutoToolInCode(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } // Code returns result, then LLM returns non-auto tool codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL) httpClient.ID = "mcpserver" httpClient.ToolsToExecute = []string{"*"} httpClient.ToolsToAutoExecute = []string{} // No auto tools (except code mode) manager := setupMCPManager(t, codeModeClient, httpClient) ctx := createTestContext() mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "result = 'code result'"), }), // After code execution, LLM returns non-auto tool CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ GetSampleEchoToolCall("call-2", "needs approval"), }), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 1 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Test non-auto tool"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Agent should stop when it encounters non-auto tool assert.Equal(t, 2, mockLLM.chatCallCount, "should make 2 total LLM calls (initial + follow-up before stopping)") assert.Equal(t, "stop", *result.Choices[0].FinishReason) // Verify response contains the non-auto tool (awaiting approval) finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message require.NotNil(t, finalMessage.ChatAssistantMessage) require.NotEmpty(t, finalMessage.ChatAssistantMessage.ToolCalls) // Tool name could be either "echo" or with prefix like "bifrostInternal-echo" toolName := *finalMessage.ChatAssistantMessage.ToolCalls[0].Function.Name assert.True(t, toolName == "echo" || toolName == "bifrostInternal-echo", fmt.Sprintf("expected echo tool, got %s", toolName)) t.Logf("Agent correctly stopped at non-auto tool") } func TestCodeModeAgent_AutoToolInCode(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } // Code calls tool, agent continues loop codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL) httpClient.ID = "mcpserver" httpClient.ToolsToExecute = []string{"*"} httpClient.ToolsToAutoExecute = []string{"echo"} manager := setupMCPManager(t, codeModeClient, httpClient) ctx := createTestContext() mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "mcpserver.echo(message='test')\nresult = 'done'"), }), CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-2", "result = 'second iteration'"), }), CreateChatResponseWithText("All done"), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 1 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Multi-iteration test"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Agent should execute both code iterations and then finish assert.Equal(t, 3, mockLLM.chatCallCount, "should have 3 total LLM calls (initial + 2 follow-ups)") assert.Equal(t, "stop", *result.Choices[0].FinishReason) t.Logf("Agent completed 2 iterations successfully") } func TestCodeModeAgent_MixedToolsInCode(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } // After code execution, LLM returns mixed auto/non-auto tools codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL) httpClient.ID = "mcpserver" httpClient.ToolsToExecute = []string{"*"} httpClient.ToolsToAutoExecute = []string{"echo"} // Only echo is auto manager := setupMCPManager(t, codeModeClient, httpClient) ctx := createTestContext() mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "result = 'step 1'"), }), // After code, LLM returns mixed tools CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ GetSampleEchoToolCall("call-2", "auto"), GetSampleCalculatorToolCall("call-3", "add", 5, 3), // Non-auto }), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 1 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Mixed tools test"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Agent should execute echo, then stop at calculator assert.Equal(t, 2, mockLLM.chatCallCount, "should make 2 total LLM calls (initial + follow-up)") assert.Equal(t, "stop", *result.Choices[0].FinishReason) // Verify response contains the non-auto calculator tool finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message require.NotNil(t, finalMessage.ChatAssistantMessage) require.NotEmpty(t, finalMessage.ChatAssistantMessage.ToolCalls) // Should have calculator tool call (non-auto) found := false for _, tc := range finalMessage.ChatAssistantMessage.ToolCalls { toolName := *tc.Function.Name if toolName == "calculator" || toolName == "bifrostInternal-calculator" { found = true break } } assert.True(t, found, "response should contain the non-auto-executable calculator tool") // Response should also include results of auto-executed tools in content assert.NotNil(t, finalMessage.Content) t.Logf("Mixed tools handled correctly") } func TestCodeModeAgent_NoToolCallsInCode(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } // Code mode call is final step (no follow-up) codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) manager := setupMCPManager(t, codeModeClient) ctx := createTestContext() mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "result = 'final result'"), }), CreateChatResponseWithText("Done, no more tools"), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 1 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Simple code test"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Agent should execute code, then finish assert.Equal(t, 2, mockLLM.chatCallCount, "should make 2 total LLM calls (initial + follow-up)") assert.Equal(t, "stop", *result.Choices[0].FinishReason) t.Logf("Code execution with no follow-up tools completed") } // ============================================================================= // FILTERING IN CODE MODE AGENT // ============================================================================= func TestCodeModeAgent_FilteringInCode(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } // ToolsToExecute filtering applies to tools called from code codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL) httpClient.ID = "mcpserver" httpClient.ToolsToExecute = []string{"echo"} // Only echo allowed, calculator blocked httpClient.ToolsToAutoExecute = []string{} manager := setupMCPManager(t, codeModeClient, httpClient) ctx := createTestContext() mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "result = mcpserver.calculator(operation='add', x=5, y=3)"), }), // Agent makes follow-up call with tool execution error CreateChatResponseWithText("Tool was blocked by filtering"), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 0 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Test filtering"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Code should execute but calculator call should fail // The agent should make a follow-up call with the error assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle tool filtering") t.Logf("Filtering in code mode validated") } func TestCodeModeAgent_AutoExecuteFiltering(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } // ToolsToAutoExecute doesn't apply to tools called from within code // Tools called from code only need to be in ToolsToExecute codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL) httpClient.ID = "mcpserver" httpClient.ToolsToExecute = []string{"*"} // All tools can execute httpClient.ToolsToAutoExecute = []string{} // No auto tools (agent-level) manager := setupMCPManager(t, codeModeClient, httpClient) ctx := createTestContext() mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "result = mcpserver.echo(message='test')"), }), CreateChatResponseWithText("Complete"), CreateChatResponseWithText("Error handled"), // For code execution error follow-up }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 1 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Test auto-execute filtering"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Code should execute (executeToolCode is auto) // Echo should be called from code (ToolsToExecute allows it) // But mcpserver is not bound in code, so it will fail // Agent will make follow-up call with error // Auto-execute filtering only applies to agent-level tool calls assert.Equal(t, 2, mockLLM.chatCallCount, "should make follow-up call for error handling") assert.Equal(t, "stop", *result.Choices[0].FinishReason) t.Logf("Auto-execute filtering correctly applies only to agent-level calls") } // ============================================================================= // MAX DEPTH IN CODE MODE // ============================================================================= func TestCodeModeAgent_MaxDepth(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } // Max depth applies to code mode iterations codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL) httpClient.ID = "mcpserver" httpClient.ToolsToExecute = []string{"*"} httpClient.ToolsToAutoExecute = []string{"echo"} manager := setupMCPManager(t, codeModeClient, httpClient) manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{ MaxAgentDepth: 3, ToolExecutionTimeout: 30 * time.Second, }) ctx := createTestContext() mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "mcpserver.echo(message='iter 1')\nresult = 'done1'"), }), CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-2", "mcpserver.echo(message='iter 2')\nresult = 'done2'"), }), CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-3", "mcpserver.echo(message='iter 3')\nresult = 'done3'"), }), CreateChatResponseWithText("Should not reach - max depth hit"), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 1 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Max depth test"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Max depth should be enforced // Initial call + up to 3 iterations = max 4 LLM calls assert.LessOrEqual(t, mockLLM.chatCallCount, 4, "max depth 3 should limit iterations (initial + 3 iterations)") t.Logf("Agent stopped at depth limit with %d calls", mockLLM.chatCallCount) } func TestCodeModeAgent_MaxDepth_ChatFormat(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL) httpClient.ID = "server" httpClient.ToolsToExecute = []string{"*"} httpClient.ToolsToAutoExecute = []string{"*"} manager := setupMCPManager(t, codeModeClient, httpClient) manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{ MaxAgentDepth: 2, ToolExecutionTimeout: 30 * time.Second, }) ctx := createTestContext() mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "result = 'test1'"), }), CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-2", "result = 'test2'"), }), CreateChatResponseWithText("Done"), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 1 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Chat format max depth"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Initial call + up to 2 iterations = max 3 LLM calls assert.LessOrEqual(t, mockLLM.chatCallCount, 3, "max depth 2 in Chat format (initial + 2 iterations)") // Verify Chat response structure is maintained assert.NotEmpty(t, result.Choices) assert.NotNil(t, result.Choices[0].FinishReason) t.Logf("Chat format max depth enforced") } func TestCodeModeAgent_MaxDepth_ResponsesFormat(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL) httpClient.ID = "server" httpClient.ToolsToExecute = []string{"*"} httpClient.ToolsToAutoExecute = []string{"*"} manager := setupMCPManager(t, codeModeClient, httpClient) manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{ MaxAgentDepth: 2, ToolExecutionTimeout: 30 * time.Second, }) ctx := createTestContext() mockLLM := &MockLLMCaller{ responsesResponses: []*schemas.BifrostResponsesResponse{ CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{ CreateExecuteToolCodeCallResponses("call-1", "return 'test1';"), }), CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{ CreateExecuteToolCodeCallResponses("call-2", "return 'test2';"), }), CreateResponsesResponseWithText("Done"), }, } initialResponse := mockLLM.responsesResponses[0] mockLLM.responsesCallCount = 1 originalReq := &schemas.BifrostResponsesRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ResponsesMessage{ { Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser), Content: &schemas.ResponsesMessageContent{ ContentStr: schemas.Ptr("Responses format max depth"), }, }, }, } result, err := manager.CheckAndExecuteAgentForResponsesRequest( ctx, originalReq, initialResponse, mockLLM.MakeResponsesRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Initial call + up to 2 iterations = max 3 LLM calls assert.LessOrEqual(t, mockLLM.responsesCallCount, 3, "max depth 2 in Responses format (initial + 2 iterations)") // Verify Responses format structure is maintained assert.NotEmpty(t, result.Output) t.Logf("Responses format max depth enforced") } // ============================================================================= // TIMEOUT IN CODE MODE // ============================================================================= func TestCodeModeAgent_Timeout(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) manager := setupMCPManager(t, codeModeClient) manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{ MaxAgentDepth: 10, ToolExecutionTimeout: 2 * time.Second, // Short timeout }) ctx := createTestContext() // Code that will timeout (infinite loop simulation) mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "def loop():\n while True:\n pass\n return 'timeout'\nresult = loop()"), }), // Agent makes follow-up call with timeout error CreateChatResponseWithText("Code execution timed out"), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 0 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Timeout test"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Agent should handle timeout gracefully assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle timeout gracefully") t.Logf("Timeout handled gracefully") } func TestCodeModeAgent_Timeout_ChatFormat(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) manager := setupMCPManager(t, codeModeClient) manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{ MaxAgentDepth: 10, ToolExecutionTimeout: 1 * time.Second, }) ctx := createTestContext() mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "def loop():\n while True:\n pass\n return 'timeout'\nresult = loop()"), }), // Agent makes follow-up call with timeout error CreateChatResponseWithText("Code execution timed out"), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 0 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Chat timeout test"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Verify Chat response structure with error assert.NotEmpty(t, result.Choices) t.Logf("Chat format timeout handled") } func TestCodeModeAgent_Timeout_ResponsesFormat(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) manager := setupMCPManager(t, codeModeClient) manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{ MaxAgentDepth: 10, ToolExecutionTimeout: 1 * time.Second, }) ctx := createTestContext() mockLLM := &MockLLMCaller{ responsesResponses: []*schemas.BifrostResponsesResponse{ CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{ CreateExecuteToolCodeCallResponses("call-1", "while(true) {}; return 'timeout';"), }), // Agent makes follow-up call with timeout error CreateResponsesResponseWithText("Code execution timed out"), }, } initialResponse := mockLLM.responsesResponses[0] mockLLM.responsesCallCount = 0 originalReq := &schemas.BifrostResponsesRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ResponsesMessage{ { Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser), Content: &schemas.ResponsesMessageContent{ ContentStr: schemas.Ptr("Responses timeout test"), }, }, }, } result, err := manager.CheckAndExecuteAgentForResponsesRequest( ctx, originalReq, initialResponse, mockLLM.MakeResponsesRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Verify Responses format structure with error assert.NotEmpty(t, result.Output) t.Logf("Responses format timeout handled") } // ============================================================================= // ERROR HANDLING IN CODE MODE AGENT // ============================================================================= func TestCodeModeAgent_ErrorInCode(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } // Runtime errors in code are handled gracefully codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) manager := setupMCPManager(t, codeModeClient) ctx := createTestContext() mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "fail('intentional error')"), }), // Agent makes follow-up call with error CreateChatResponseWithText("Error occurred during execution"), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 0 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Error test"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Agent should handle error gracefully and may make a follow-up call to summarize assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle error gracefully") t.Logf("Error in code handled gracefully") } func TestCodeModeAgent_ToolErrorInCode(t *testing.T) { t.Parallel() config := GetTestConfig(t) if config.HTTPServerURL == "" { t.Skip("MCP_HTTP_URL not set") } // Tool errors from code are propagated codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL) httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL) httpClient.ID = "mcpserver" httpClient.ToolsToExecute = []string{"*"} httpClient.ToolsToAutoExecute = []string{"*"} manager := setupMCPManager(t, codeModeClient, httpClient) ctx := createTestContext() // Call calculator with invalid arguments to trigger tool error mockLLM := &MockLLMCaller{ chatResponses: []*schemas.BifrostChatResponse{ CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{ CreateExecuteToolCodeCall("call-1", "mcpserver.calculator(operation='invalid', x=1, y=2)\nresult = 'done'"), }), // Agent makes follow-up call with tool error CreateChatResponseWithText("Tool error occurred"), }, } initialResponse := mockLLM.chatResponses[0] mockLLM.chatCallCount = 0 originalReq := &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: schemas.Ptr("Tool error test"), }, }, }, } result, err := manager.CheckAndExecuteAgentForChatRequest( ctx, originalReq, initialResponse, mockLLM.MakeChatRequest, func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) { return manager.ExecuteToolCall(ctx, request) }, ) require.Nil(t, err) require.NotNil(t, result) // Agent should handle tool error appropriately and may make a follow-up call assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle tool error gracefully") t.Logf("Tool error from code handled") }