1056 lines
32 KiB
Go
1056 lines
32 KiB
Go
package mcptests
|
|
|
|
import (
|
|
"fmt"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/maximhq/bifrost/core/schemas"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// =============================================================================
|
|
// CODE MODE + AGENT BASIC TESTS
|
|
// =============================================================================
|
|
|
|
func TestCodeModeAgent_Basic(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
// Setup code mode client with agent enabled + HTTP client with tools
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
|
|
httpClient.ID = "mcpserver"
|
|
httpClient.ToolsToExecute = []string{"*"}
|
|
httpClient.ToolsToAutoExecute = []string{"echo"}
|
|
|
|
manager := setupMCPManager(t, codeModeClient, httpClient)
|
|
ctx := createTestContext()
|
|
|
|
// Mock LLM with 2 responses:
|
|
// 1. First response: executeToolCode that calls echo
|
|
// 2. Second response: Final text
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "result = mcpserver.echo(message='test')"),
|
|
}),
|
|
CreateChatResponseWithText("Execution complete"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 1 // Start from second response
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Test code mode agent"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
// Execute agent mode
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err, "agent loop should complete successfully")
|
|
require.NotNil(t, result)
|
|
|
|
// Verify final response
|
|
assert.NotEmpty(t, result.Choices)
|
|
assert.Equal(t, "stop", *result.Choices[0].FinishReason, "should finish with stop reason")
|
|
|
|
// Verify the agent executed code and made follow-up LLM call
|
|
assert.Equal(t, 2, mockLLM.chatCallCount, "should have made 2 total LLM calls (initial + follow-up)")
|
|
|
|
t.Logf("Agent completed with %d LLM calls total", mockLLM.chatCallCount)
|
|
}
|
|
|
|
func TestCodeModeAgent_NonAutoToolInCode(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
// Code returns result, then LLM returns non-auto tool
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
|
|
httpClient.ID = "mcpserver"
|
|
httpClient.ToolsToExecute = []string{"*"}
|
|
httpClient.ToolsToAutoExecute = []string{} // No auto tools (except code mode)
|
|
|
|
manager := setupMCPManager(t, codeModeClient, httpClient)
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "result = 'code result'"),
|
|
}),
|
|
// After code execution, LLM returns non-auto tool
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
GetSampleEchoToolCall("call-2", "needs approval"),
|
|
}),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 1
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Test non-auto tool"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Agent should stop when it encounters non-auto tool
|
|
assert.Equal(t, 2, mockLLM.chatCallCount, "should make 2 total LLM calls (initial + follow-up before stopping)")
|
|
assert.Equal(t, "stop", *result.Choices[0].FinishReason)
|
|
|
|
// Verify response contains the non-auto tool (awaiting approval)
|
|
finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
|
|
require.NotNil(t, finalMessage.ChatAssistantMessage)
|
|
require.NotEmpty(t, finalMessage.ChatAssistantMessage.ToolCalls)
|
|
// Tool name could be either "echo" or with prefix like "bifrostInternal-echo"
|
|
toolName := *finalMessage.ChatAssistantMessage.ToolCalls[0].Function.Name
|
|
assert.True(t, toolName == "echo" || toolName == "bifrostInternal-echo",
|
|
fmt.Sprintf("expected echo tool, got %s", toolName))
|
|
|
|
t.Logf("Agent correctly stopped at non-auto tool")
|
|
}
|
|
|
|
func TestCodeModeAgent_AutoToolInCode(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
// Code calls tool, agent continues loop
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
|
|
httpClient.ID = "mcpserver"
|
|
httpClient.ToolsToExecute = []string{"*"}
|
|
httpClient.ToolsToAutoExecute = []string{"echo"}
|
|
|
|
manager := setupMCPManager(t, codeModeClient, httpClient)
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "mcpserver.echo(message='test')\nresult = 'done'"),
|
|
}),
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-2", "result = 'second iteration'"),
|
|
}),
|
|
CreateChatResponseWithText("All done"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 1
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Multi-iteration test"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Agent should execute both code iterations and then finish
|
|
assert.Equal(t, 3, mockLLM.chatCallCount, "should have 3 total LLM calls (initial + 2 follow-ups)")
|
|
assert.Equal(t, "stop", *result.Choices[0].FinishReason)
|
|
|
|
t.Logf("Agent completed 2 iterations successfully")
|
|
}
|
|
|
|
func TestCodeModeAgent_MixedToolsInCode(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
// After code execution, LLM returns mixed auto/non-auto tools
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
|
|
httpClient.ID = "mcpserver"
|
|
httpClient.ToolsToExecute = []string{"*"}
|
|
httpClient.ToolsToAutoExecute = []string{"echo"} // Only echo is auto
|
|
|
|
manager := setupMCPManager(t, codeModeClient, httpClient)
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "result = 'step 1'"),
|
|
}),
|
|
// After code, LLM returns mixed tools
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
GetSampleEchoToolCall("call-2", "auto"),
|
|
GetSampleCalculatorToolCall("call-3", "add", 5, 3), // Non-auto
|
|
}),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 1
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Mixed tools test"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Agent should execute echo, then stop at calculator
|
|
assert.Equal(t, 2, mockLLM.chatCallCount, "should make 2 total LLM calls (initial + follow-up)")
|
|
assert.Equal(t, "stop", *result.Choices[0].FinishReason)
|
|
|
|
// Verify response contains the non-auto calculator tool
|
|
finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
|
|
require.NotNil(t, finalMessage.ChatAssistantMessage)
|
|
require.NotEmpty(t, finalMessage.ChatAssistantMessage.ToolCalls)
|
|
|
|
// Should have calculator tool call (non-auto)
|
|
found := false
|
|
for _, tc := range finalMessage.ChatAssistantMessage.ToolCalls {
|
|
toolName := *tc.Function.Name
|
|
if toolName == "calculator" || toolName == "bifrostInternal-calculator" {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
assert.True(t, found, "response should contain the non-auto-executable calculator tool")
|
|
|
|
// Response should also include results of auto-executed tools in content
|
|
assert.NotNil(t, finalMessage.Content)
|
|
t.Logf("Mixed tools handled correctly")
|
|
}
|
|
|
|
func TestCodeModeAgent_NoToolCallsInCode(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
// Code mode call is final step (no follow-up)
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
manager := setupMCPManager(t, codeModeClient)
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "result = 'final result'"),
|
|
}),
|
|
CreateChatResponseWithText("Done, no more tools"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 1
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Simple code test"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Agent should execute code, then finish
|
|
assert.Equal(t, 2, mockLLM.chatCallCount, "should make 2 total LLM calls (initial + follow-up)")
|
|
assert.Equal(t, "stop", *result.Choices[0].FinishReason)
|
|
|
|
t.Logf("Code execution with no follow-up tools completed")
|
|
}
|
|
|
|
// =============================================================================
|
|
// FILTERING IN CODE MODE AGENT
|
|
// =============================================================================
|
|
|
|
func TestCodeModeAgent_FilteringInCode(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
// ToolsToExecute filtering applies to tools called from code
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
|
|
httpClient.ID = "mcpserver"
|
|
httpClient.ToolsToExecute = []string{"echo"} // Only echo allowed, calculator blocked
|
|
httpClient.ToolsToAutoExecute = []string{}
|
|
|
|
manager := setupMCPManager(t, codeModeClient, httpClient)
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "result = mcpserver.calculator(operation='add', x=5, y=3)"),
|
|
}),
|
|
// Agent makes follow-up call with tool execution error
|
|
CreateChatResponseWithText("Tool was blocked by filtering"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 0
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Test filtering"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Code should execute but calculator call should fail
|
|
// The agent should make a follow-up call with the error
|
|
assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle tool filtering")
|
|
|
|
t.Logf("Filtering in code mode validated")
|
|
}
|
|
|
|
func TestCodeModeAgent_AutoExecuteFiltering(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
// ToolsToAutoExecute doesn't apply to tools called from within code
|
|
// Tools called from code only need to be in ToolsToExecute
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
|
|
httpClient.ID = "mcpserver"
|
|
httpClient.ToolsToExecute = []string{"*"} // All tools can execute
|
|
httpClient.ToolsToAutoExecute = []string{} // No auto tools (agent-level)
|
|
|
|
manager := setupMCPManager(t, codeModeClient, httpClient)
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "result = mcpserver.echo(message='test')"),
|
|
}),
|
|
CreateChatResponseWithText("Complete"),
|
|
CreateChatResponseWithText("Error handled"), // For code execution error follow-up
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 1
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Test auto-execute filtering"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Code should execute (executeToolCode is auto)
|
|
// Echo should be called from code (ToolsToExecute allows it)
|
|
// But mcpserver is not bound in code, so it will fail
|
|
// Agent will make follow-up call with error
|
|
// Auto-execute filtering only applies to agent-level tool calls
|
|
assert.Equal(t, 2, mockLLM.chatCallCount, "should make follow-up call for error handling")
|
|
assert.Equal(t, "stop", *result.Choices[0].FinishReason)
|
|
|
|
t.Logf("Auto-execute filtering correctly applies only to agent-level calls")
|
|
}
|
|
|
|
// =============================================================================
|
|
// MAX DEPTH IN CODE MODE
|
|
// =============================================================================
|
|
|
|
func TestCodeModeAgent_MaxDepth(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
// Max depth applies to code mode iterations
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
|
|
httpClient.ID = "mcpserver"
|
|
httpClient.ToolsToExecute = []string{"*"}
|
|
httpClient.ToolsToAutoExecute = []string{"echo"}
|
|
|
|
manager := setupMCPManager(t, codeModeClient, httpClient)
|
|
manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
|
|
MaxAgentDepth: 3,
|
|
ToolExecutionTimeout: 30 * time.Second,
|
|
})
|
|
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "mcpserver.echo(message='iter 1')\nresult = 'done1'"),
|
|
}),
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-2", "mcpserver.echo(message='iter 2')\nresult = 'done2'"),
|
|
}),
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-3", "mcpserver.echo(message='iter 3')\nresult = 'done3'"),
|
|
}),
|
|
CreateChatResponseWithText("Should not reach - max depth hit"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 1
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Max depth test"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Max depth should be enforced
|
|
// Initial call + up to 3 iterations = max 4 LLM calls
|
|
assert.LessOrEqual(t, mockLLM.chatCallCount, 4, "max depth 3 should limit iterations (initial + 3 iterations)")
|
|
t.Logf("Agent stopped at depth limit with %d calls", mockLLM.chatCallCount)
|
|
}
|
|
|
|
func TestCodeModeAgent_MaxDepth_ChatFormat(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
|
|
httpClient.ID = "server"
|
|
httpClient.ToolsToExecute = []string{"*"}
|
|
httpClient.ToolsToAutoExecute = []string{"*"}
|
|
|
|
manager := setupMCPManager(t, codeModeClient, httpClient)
|
|
manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
|
|
MaxAgentDepth: 2,
|
|
ToolExecutionTimeout: 30 * time.Second,
|
|
})
|
|
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "result = 'test1'"),
|
|
}),
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-2", "result = 'test2'"),
|
|
}),
|
|
CreateChatResponseWithText("Done"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 1
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Chat format max depth"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
// Initial call + up to 2 iterations = max 3 LLM calls
|
|
assert.LessOrEqual(t, mockLLM.chatCallCount, 3, "max depth 2 in Chat format (initial + 2 iterations)")
|
|
|
|
// Verify Chat response structure is maintained
|
|
assert.NotEmpty(t, result.Choices)
|
|
assert.NotNil(t, result.Choices[0].FinishReason)
|
|
t.Logf("Chat format max depth enforced")
|
|
}
|
|
|
|
func TestCodeModeAgent_MaxDepth_ResponsesFormat(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
|
|
httpClient.ID = "server"
|
|
httpClient.ToolsToExecute = []string{"*"}
|
|
httpClient.ToolsToAutoExecute = []string{"*"}
|
|
|
|
manager := setupMCPManager(t, codeModeClient, httpClient)
|
|
manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
|
|
MaxAgentDepth: 2,
|
|
ToolExecutionTimeout: 30 * time.Second,
|
|
})
|
|
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
responsesResponses: []*schemas.BifrostResponsesResponse{
|
|
CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{
|
|
CreateExecuteToolCodeCallResponses("call-1", "return 'test1';"),
|
|
}),
|
|
CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{
|
|
CreateExecuteToolCodeCallResponses("call-2", "return 'test2';"),
|
|
}),
|
|
CreateResponsesResponseWithText("Done"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.responsesResponses[0]
|
|
mockLLM.responsesCallCount = 1
|
|
|
|
originalReq := &schemas.BifrostResponsesRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ResponsesMessage{
|
|
{
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentStr: schemas.Ptr("Responses format max depth"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForResponsesRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeResponsesRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
// Initial call + up to 2 iterations = max 3 LLM calls
|
|
assert.LessOrEqual(t, mockLLM.responsesCallCount, 3, "max depth 2 in Responses format (initial + 2 iterations)")
|
|
|
|
// Verify Responses format structure is maintained
|
|
assert.NotEmpty(t, result.Output)
|
|
t.Logf("Responses format max depth enforced")
|
|
}
|
|
|
|
// =============================================================================
|
|
// TIMEOUT IN CODE MODE
|
|
// =============================================================================
|
|
|
|
func TestCodeModeAgent_Timeout(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
manager := setupMCPManager(t, codeModeClient)
|
|
manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
|
|
MaxAgentDepth: 10,
|
|
ToolExecutionTimeout: 2 * time.Second, // Short timeout
|
|
})
|
|
|
|
ctx := createTestContext()
|
|
|
|
// Code that will timeout (infinite loop simulation)
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "def loop():\n while True:\n pass\n return 'timeout'\nresult = loop()"),
|
|
}),
|
|
// Agent makes follow-up call with timeout error
|
|
CreateChatResponseWithText("Code execution timed out"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 0
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Timeout test"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Agent should handle timeout gracefully
|
|
assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle timeout gracefully")
|
|
|
|
t.Logf("Timeout handled gracefully")
|
|
}
|
|
|
|
func TestCodeModeAgent_Timeout_ChatFormat(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
manager := setupMCPManager(t, codeModeClient)
|
|
manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
|
|
MaxAgentDepth: 10,
|
|
ToolExecutionTimeout: 1 * time.Second,
|
|
})
|
|
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "def loop():\n while True:\n pass\n return 'timeout'\nresult = loop()"),
|
|
}),
|
|
// Agent makes follow-up call with timeout error
|
|
CreateChatResponseWithText("Code execution timed out"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 0
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Chat timeout test"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Verify Chat response structure with error
|
|
assert.NotEmpty(t, result.Choices)
|
|
t.Logf("Chat format timeout handled")
|
|
}
|
|
|
|
func TestCodeModeAgent_Timeout_ResponsesFormat(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
manager := setupMCPManager(t, codeModeClient)
|
|
manager.UpdateToolManagerConfig(&schemas.MCPToolManagerConfig{
|
|
MaxAgentDepth: 10,
|
|
ToolExecutionTimeout: 1 * time.Second,
|
|
})
|
|
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
responsesResponses: []*schemas.BifrostResponsesResponse{
|
|
CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{
|
|
CreateExecuteToolCodeCallResponses("call-1", "while(true) {}; return 'timeout';"),
|
|
}),
|
|
// Agent makes follow-up call with timeout error
|
|
CreateResponsesResponseWithText("Code execution timed out"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.responsesResponses[0]
|
|
mockLLM.responsesCallCount = 0
|
|
|
|
originalReq := &schemas.BifrostResponsesRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ResponsesMessage{
|
|
{
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentStr: schemas.Ptr("Responses timeout test"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForResponsesRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeResponsesRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Verify Responses format structure with error
|
|
assert.NotEmpty(t, result.Output)
|
|
t.Logf("Responses format timeout handled")
|
|
}
|
|
|
|
// =============================================================================
|
|
// ERROR HANDLING IN CODE MODE AGENT
|
|
// =============================================================================
|
|
|
|
func TestCodeModeAgent_ErrorInCode(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
// Runtime errors in code are handled gracefully
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
manager := setupMCPManager(t, codeModeClient)
|
|
ctx := createTestContext()
|
|
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "fail('intentional error')"),
|
|
}),
|
|
// Agent makes follow-up call with error
|
|
CreateChatResponseWithText("Error occurred during execution"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 0
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Error test"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Agent should handle error gracefully and may make a follow-up call to summarize
|
|
assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle error gracefully")
|
|
|
|
t.Logf("Error in code handled gracefully")
|
|
}
|
|
|
|
func TestCodeModeAgent_ToolErrorInCode(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
config := GetTestConfig(t)
|
|
if config.HTTPServerURL == "" {
|
|
t.Skip("MCP_HTTP_URL not set")
|
|
}
|
|
|
|
|
|
// Tool errors from code are propagated
|
|
codeModeClient := GetSampleCodeModeAgentClientConfig(t, config.HTTPServerURL)
|
|
httpClient := GetSampleHTTPClientConfigNoSpaces(config.HTTPServerURL)
|
|
httpClient.ID = "mcpserver"
|
|
httpClient.ToolsToExecute = []string{"*"}
|
|
httpClient.ToolsToAutoExecute = []string{"*"}
|
|
|
|
manager := setupMCPManager(t, codeModeClient, httpClient)
|
|
ctx := createTestContext()
|
|
|
|
// Call calculator with invalid arguments to trigger tool error
|
|
mockLLM := &MockLLMCaller{
|
|
chatResponses: []*schemas.BifrostChatResponse{
|
|
CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
|
|
CreateExecuteToolCodeCall("call-1", "mcpserver.calculator(operation='invalid', x=1, y=2)\nresult = 'done'"),
|
|
}),
|
|
// Agent makes follow-up call with tool error
|
|
CreateChatResponseWithText("Tool error occurred"),
|
|
},
|
|
}
|
|
|
|
initialResponse := mockLLM.chatResponses[0]
|
|
mockLLM.chatCallCount = 0
|
|
|
|
originalReq := &schemas.BifrostChatRequest{
|
|
Provider: schemas.OpenAI,
|
|
Model: "gpt-4",
|
|
Input: []schemas.ChatMessage{
|
|
{
|
|
Role: schemas.ChatMessageRoleUser,
|
|
Content: &schemas.ChatMessageContent{
|
|
ContentStr: schemas.Ptr("Tool error test"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
result, err := manager.CheckAndExecuteAgentForChatRequest(
|
|
ctx,
|
|
originalReq,
|
|
initialResponse,
|
|
mockLLM.MakeChatRequest,
|
|
func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
|
|
return manager.ExecuteToolCall(ctx, request)
|
|
},
|
|
)
|
|
|
|
require.Nil(t, err)
|
|
require.NotNil(t, result)
|
|
|
|
// Agent should handle tool error appropriately and may make a follow-up call
|
|
assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "agent should handle tool error gracefully")
|
|
|
|
t.Logf("Tool error from code handled")
|
|
}
|