first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/core/internal/mcptests/agent_basic_test.go
+++ b/core/internal/mcptests/agent_basic_test.go
@@ -0,0 +1,772 @@
+package mcptests
+
+import (
+	"testing"
+	"time"
+
+	"github.com/maximhq/bifrost/core/schemas"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// =============================================================================
+// MOCK LLM FOR AGENT TESTS
+// =============================================================================
+
+// MockLLMCaller provides controlled LLM responses for testing agent mode
+type MockLLMCaller struct {
+	chatResponses      []*schemas.BifrostChatResponse
+	responsesResponses []*schemas.BifrostResponsesResponse
+	chatCallCount      int
+	responsesCallCount int
+}
+
+func (m *MockLLMCaller) MakeChatRequest(ctx *schemas.BifrostContext, req *schemas.BifrostChatRequest) (*schemas.BifrostChatResponse, *schemas.BifrostError) {
+	if m.chatCallCount >= len(m.chatResponses) {
+		return nil, &schemas.BifrostError{
+			IsBifrostError: false,
+			Error: &schemas.ErrorField{
+				Message: "no more mock chat responses available",
+			},
+		}
+	}
+
+	response := m.chatResponses[m.chatCallCount]
+	m.chatCallCount++
+	return response, nil
+}
+
+func (m *MockLLMCaller) MakeResponsesRequest(ctx *schemas.BifrostContext, req *schemas.BifrostResponsesRequest) (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
+	if m.responsesCallCount >= len(m.responsesResponses) {
+		return nil, &schemas.BifrostError{
+			IsBifrostError: false,
+			Error: &schemas.ErrorField{
+				Message: "no more mock responses api responses available",
+			},
+		}
+	}
+
+	response := m.responsesResponses[m.responsesCallCount]
+	m.responsesCallCount++
+	return response, nil
+}
+
+// =============================================================================
+// BASIC AGENT LOOP TESTS
+// =============================================================================
+
+func TestAgent_BasicLoop(t *testing.T) {
+	t.Parallel()
+
+	// Use InProcess tools - no external server needed
+	manager := setupMCPManager(t)
+	err := RegisterEchoTool(manager)
+	require.NoError(t, err, "should register echo tool")
+
+	// Setup MCP manager with auto-executable tools
+	err = SetInternalClientAutoExecute(manager, []string{"echo"})
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	ctx := createTestContext()
+
+	// Setup mock LLM with 2 responses:
+	// 1. First response: LLM wants to call echo tool
+	// 2. Second response: LLM finishes with text
+	mockLLM := &MockLLMCaller{
+		chatResponses: []*schemas.BifrostChatResponse{
+			// First call: return tool call
+			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
+				GetSampleEchoToolCall("call-1", "Hello from agent"),
+			}),
+			// Second call: return final text
+			CreateChatResponseWithText("The echo tool returned your message successfully"),
+		},
+	}
+
+	// Initial LLM response with tool call
+	initialResponse := mockLLM.chatResponses[0]
+	mockLLM.chatCallCount = 1 // Start from second response for subsequent calls
+
+	// Create mock request
+	originalReq := &schemas.BifrostChatRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4",
+		Input: []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("Please echo hello"),
+				},
+			},
+		},
+	}
+
+	// Execute agent mode
+	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
+		ctx,
+		originalReq,
+		initialResponse,
+		mockLLM.MakeChatRequest,
+		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
+			// Use real tool execution
+			return manager.ExecuteToolCall(ctx, request)
+		},
+	)
+
+	require.Nil(t, bifrostErr, "agent loop should complete successfully")
+	require.NotNil(t, result)
+
+	t.Logf("Agent completed with %d LLM calls total", mockLLM.chatCallCount)
+
+	// Verify final response
+	assert.NotEmpty(t, result.Choices)
+	assert.Equal(t, "stop", *result.Choices[0].FinishReason, "should finish with stop reason")
+
+	// Verify the agent executed at least one tool
+	assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "should have made follow-up LLM call")
+}
+
+func TestAgent_BasicLoop_ChatFormat(t *testing.T) {
+	t.Parallel()
+
+	// Use InProcess tools - no external server needed
+	manager := setupMCPManager(t)
+	err := RegisterCalculatorTool(manager)
+	require.NoError(t, err, "should register calculator tool")
+
+	// Set auto-execute for calculator
+	err = SetInternalClientAutoExecute(manager, []string{"calculator"})
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	ctx := createTestContext()
+
+	mockLLM := &MockLLMCaller{
+		chatResponses: []*schemas.BifrostChatResponse{
+			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
+				GetSampleCalculatorToolCall("call-1", "add", 5, 3),
+			}),
+			CreateChatResponseWithText("The result is 8"),
+		},
+	}
+
+	initialResponse := mockLLM.chatResponses[0]
+	mockLLM.chatCallCount = 1
+
+	originalReq := &schemas.BifrostChatRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4",
+		Input: []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("Calculate 5+3"),
+				},
+			},
+		},
+	}
+
+	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
+		ctx,
+		originalReq,
+		initialResponse,
+		mockLLM.MakeChatRequest,
+		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
+			return manager.ExecuteToolCall(ctx, request)
+		},
+	)
+
+	require.Nil(t, bifrostErr)
+	require.NotNil(t, result)
+	assert.Equal(t, "stop", *result.Choices[0].FinishReason)
+
+	content := result.Choices[0].ChatNonStreamResponseChoice.Message.Content
+	assert.NotNil(t, content)
+	t.Logf("Final response: %s", *content.ContentStr)
+}
+
+func TestAgent_BasicLoop_ResponsesFormat(t *testing.T) {
+	t.Parallel()
+
+	// Use InProcess tools - no external server needed
+	manager := setupMCPManager(t)
+	err := RegisterEchoTool(manager)
+	require.NoError(t, err, "should register echo tool")
+
+	// Set auto-execute for echo
+	err = SetInternalClientAutoExecute(manager, []string{"echo"})
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	ctx := createTestContext()
+
+	mockLLM := &MockLLMCaller{
+		responsesResponses: []*schemas.BifrostResponsesResponse{
+			CreateResponsesResponseWithToolCalls([]schemas.ResponsesToolMessage{
+				{
+					CallID:    schemas.Ptr("call-1"),
+					Name: schemas.Ptr("bifrostInternal-echo"),
+					Arguments: schemas.Ptr(`{"message": "testing responses format"}`),
+				},
+			}),
+			CreateResponsesResponseWithText("Successfully echoed your message"),
+		},
+	}
+
+	initialResponse := mockLLM.responsesResponses[0]
+	mockLLM.responsesCallCount = 1
+
+	originalReq := &schemas.BifrostResponsesRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4",
+		Input: []schemas.ResponsesMessage{
+			{
+				Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
+				Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser),
+				Content: &schemas.ResponsesMessageContent{
+					ContentStr: schemas.Ptr("Echo a message"),
+				},
+			},
+		},
+	}
+
+	result, bifrostErr := manager.CheckAndExecuteAgentForResponsesRequest(
+		ctx,
+		originalReq,
+		initialResponse,
+		mockLLM.MakeResponsesRequest,
+		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
+			return manager.ExecuteToolCall(ctx, request)
+		},
+	)
+
+	require.Nil(t, bifrostErr)
+	require.NotNil(t, result)
+	assert.NotEmpty(t, result.Output)
+	t.Logf("Agent completed with %d messages in output", len(result.Output))
+}
+
+// =============================================================================
+// AGENT ITERATIONS TESTS
+// =============================================================================
+
+func TestAgent_SingleIteration(t *testing.T) {
+	t.Parallel()
+
+	// Use InProcess tools - no external server needed
+	manager := setupMCPManager(t)
+	err := RegisterEchoTool(manager)
+	require.NoError(t, err, "should register echo tool")
+
+	// Set auto-execute for all tools
+	err = SetInternalClientAutoExecute(manager, []string{"*"})
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	ctx := createTestContext()
+
+	// LLM returns one tool call, then stops
+	mockLLM := &MockLLMCaller{
+		chatResponses: []*schemas.BifrostChatResponse{
+			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
+				GetSampleEchoToolCall("call-1", "single iteration test"),
+			}),
+			// Immediately finish after tool execution
+			CreateChatResponseWithText("Done after one tool call"),
+		},
+	}
+
+	initialResponse := mockLLM.chatResponses[0]
+	mockLLM.chatCallCount = 1
+
+	originalReq := &schemas.BifrostChatRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4",
+		Input: []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("Test"),
+				},
+			},
+		},
+	}
+
+	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
+		ctx,
+		originalReq,
+		initialResponse,
+		mockLLM.MakeChatRequest,
+		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
+			return manager.ExecuteToolCall(ctx, request)
+		},
+	)
+
+	require.Nil(t, bifrostErr)
+	require.NotNil(t, result)
+
+	// Should have made exactly 2 LLM calls total (initial + 1 follow-up after tool execution)
+	assert.Equal(t, 2, mockLLM.chatCallCount, "should have exactly one iteration")
+	assert.Equal(t, "stop", *result.Choices[0].FinishReason)
+
+	// Verify no more tool calls in final response
+	finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
+	if finalMessage.ChatAssistantMessage != nil {
+		assert.Empty(t, finalMessage.ChatAssistantMessage.ToolCalls, "final response should have no tool calls")
+	}
+}
+
+func TestAgent_MultipleIterations(t *testing.T) {
+	t.Parallel()
+
+	// Use InProcess tools - no external server needed
+	manager := setupMCPManager(t)
+	err := RegisterEchoTool(manager)
+	require.NoError(t, err, "should register echo tool")
+	err = RegisterCalculatorTool(manager)
+	require.NoError(t, err, "should register calculator tool")
+	err = RegisterWeatherTool(manager)
+	require.NoError(t, err, "should register weather tool")
+
+	// Set auto-execute for all tools
+	err = SetInternalClientAutoExecute(manager, []string{"*"})
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	ctx := createTestContext()
+
+	// LLM returns tool calls for 3 iterations, then stops
+	mockLLM := &MockLLMCaller{
+		chatResponses: []*schemas.BifrostChatResponse{
+			// Iteration 1: echo tool
+			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
+				GetSampleEchoToolCall("call-1", "iteration 1"),
+			}),
+			// Iteration 2: calculator tool
+			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
+				GetSampleCalculatorToolCall("call-2", "add", 10, 20),
+			}),
+			// Iteration 3: weather tool
+			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
+				GetSampleWeatherToolCall("call-3", "New York", ""),
+			}),
+			// Final: stop
+			CreateChatResponseWithText("Completed all 3 tool calls"),
+		},
+	}
+
+	initialResponse := mockLLM.chatResponses[0]
+	mockLLM.chatCallCount = 1
+
+	originalReq := &schemas.BifrostChatRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4",
+		Input: []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("Multi-step task"),
+				},
+			},
+		},
+	}
+
+	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
+		ctx,
+		originalReq,
+		initialResponse,
+		mockLLM.MakeChatRequest,
+		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
+			return manager.ExecuteToolCall(ctx, request)
+		},
+	)
+
+	require.Nil(t, bifrostErr)
+	require.NotNil(t, result)
+
+	// Should have made 4 LLM calls total (initial + 3 follow-ups for each tool execution)
+	assert.Equal(t, 4, mockLLM.chatCallCount, "should have 4 calls total (3 iterations + final)")
+	assert.Equal(t, "stop", *result.Choices[0].FinishReason)
+
+	t.Logf("Completed agent loop with 3 iterations")
+}
+
+func TestAgent_NoToolCalls(t *testing.T) {
+	t.Parallel()
+
+	// Use InProcess tools (even though we won't call them)
+	manager := setupMCPManager(t)
+	err := RegisterEchoTool(manager)
+	require.NoError(t, err, "should register echo tool")
+
+	// Set auto-execute for all tools
+	err = SetInternalClientAutoExecute(manager, []string{"*"})
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	ctx := createTestContext()
+
+	// LLM returns response with no tool calls (immediate stop)
+	mockLLM := &MockLLMCaller{
+		chatResponses: []*schemas.BifrostChatResponse{
+			CreateChatResponseWithText("I don't need to use any tools for this"),
+		},
+	}
+
+	initialResponse := mockLLM.chatResponses[0]
+	mockLLM.chatCallCount = 0 // No calls should be made
+
+	originalReq := &schemas.BifrostChatRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4",
+		Input: []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("Simple question"),
+				},
+			},
+		},
+	}
+
+	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
+		ctx,
+		originalReq,
+		initialResponse,
+		mockLLM.MakeChatRequest,
+		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
+			return manager.ExecuteToolCall(ctx, request)
+		},
+	)
+
+	require.Nil(t, bifrostErr)
+	require.NotNil(t, result)
+
+	// Should have made NO additional LLM calls
+	assert.Equal(t, 0, mockLLM.chatCallCount, "should not make any LLM calls when no tool calls")
+	assert.Equal(t, "stop", *result.Choices[0].FinishReason)
+
+	// Should return the original response unchanged
+	assert.Equal(t, initialResponse, result, "should return original response when no tool calls")
+}
+
+// =============================================================================
+// MIXED AUTO AND NON-AUTO TOOLS TESTS
+// =============================================================================
+
+func TestAgent_MixedAutoAndNonAutoTools(t *testing.T) {
+	t.Parallel()
+
+	// Use InProcess tools - no external server needed
+	manager := setupMCPManager(t)
+	err := RegisterEchoTool(manager)
+	require.NoError(t, err, "should register echo tool")
+	err = RegisterCalculatorTool(manager)
+	require.NoError(t, err, "should register calculator tool")
+
+	// Configure only "echo" as auto-executable, other tools require approval
+	err = SetInternalClientAutoExecute(manager, []string{"echo"})
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	ctx := createTestContext()
+
+	// LLM returns both auto and non-auto tools
+	mockLLM := &MockLLMCaller{
+		chatResponses: []*schemas.BifrostChatResponse{
+			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
+				GetSampleEchoToolCall("call-1", "auto tool"),
+				GetSampleCalculatorToolCall("call-2", "add", 5, 3), // Not auto-executable
+			}),
+		},
+	}
+
+	initialResponse := mockLLM.chatResponses[0]
+	mockLLM.chatCallCount = 0 // Should not make additional calls
+
+	originalReq := &schemas.BifrostChatRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4",
+		Input: []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("Test mixed tools"),
+				},
+			},
+		},
+	}
+
+	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
+		ctx,
+		originalReq,
+		initialResponse,
+		mockLLM.MakeChatRequest,
+		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
+			return manager.ExecuteToolCall(ctx, request)
+		},
+	)
+
+	require.Nil(t, bifrostErr)
+	require.NotNil(t, result)
+
+	// Agent should stop and return non-auto tools
+	assert.Equal(t, 0, mockLLM.chatCallCount, "should not make additional calls when non-auto tool present")
+	assert.Equal(t, "stop", *result.Choices[0].FinishReason)
+
+	// Verify response contains the non-auto tool (calculator) for user approval
+	finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
+	require.NotNil(t, finalMessage.ChatAssistantMessage)
+	require.NotEmpty(t, finalMessage.ChatAssistantMessage.ToolCalls)
+
+	// Should have the calculator tool call (non-auto)
+	found := false
+	for _, tc := range finalMessage.ChatAssistantMessage.ToolCalls {
+		if *tc.Function.Name == "bifrostInternal-calculator" {
+			found = true
+			break
+		}
+	}
+	assert.True(t, found, "response should contain the non-auto-executable calculator tool")
+
+	// Response should also include results of auto-executed tools in content
+	assert.NotNil(t, finalMessage.Content)
+	t.Logf("Response content: %s", *finalMessage.Content.ContentStr)
+}
+
+func TestAgent_OnlyAutoTools(t *testing.T) {
+	t.Parallel()
+
+	// Use InProcess tools - no external server needed
+	manager := setupMCPManager(t)
+	err := RegisterEchoTool(manager)
+	require.NoError(t, err, "should register echo tool")
+
+	// All tools auto-executable
+	err = SetInternalClientAutoExecute(manager, []string{"*"})
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	ctx := createTestContext()
+
+	mockLLM := &MockLLMCaller{
+		chatResponses: []*schemas.BifrostChatResponse{
+			// Multiple auto-executable tools
+			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
+				GetSampleEchoToolCall("call-1", "first"),
+				GetSampleEchoToolCall("call-2", "second"),
+			}),
+			// Continue loop
+			CreateChatResponseWithText("All tools executed"),
+		},
+	}
+
+	initialResponse := mockLLM.chatResponses[0]
+	mockLLM.chatCallCount = 1
+
+	originalReq := &schemas.BifrostChatRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4",
+		Input: []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("Test only auto tools"),
+				},
+			},
+		},
+	}
+
+	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
+		ctx,
+		originalReq,
+		initialResponse,
+		mockLLM.MakeChatRequest,
+		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
+			return manager.ExecuteToolCall(ctx, request)
+		},
+	)
+
+	require.Nil(t, bifrostErr)
+	require.NotNil(t, result)
+
+	// Agent should execute all tools and continue loop
+	assert.GreaterOrEqual(t, mockLLM.chatCallCount, 1, "should make follow-up LLM call")
+	assert.Equal(t, "stop", *result.Choices[0].FinishReason)
+
+	// Final response should have no pending tool calls
+	finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
+	if finalMessage.ChatAssistantMessage != nil {
+		assert.Empty(t, finalMessage.ChatAssistantMessage.ToolCalls, "no pending tool calls")
+	}
+}
+
+func TestAgent_OnlyNonAutoTools(t *testing.T) {
+	t.Parallel()
+
+	// Use InProcess tools - no external server needed
+	manager := setupMCPManager(t)
+	err := RegisterEchoTool(manager)
+	require.NoError(t, err, "should register echo tool")
+
+	// No tools are auto-executable
+	err = SetInternalClientAutoExecute(manager, []string{}) // No auto-executable tools
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	ctx := createTestContext()
+
+	mockLLM := &MockLLMCaller{
+		chatResponses: []*schemas.BifrostChatResponse{
+			CreateChatResponseWithToolCalls([]schemas.ChatAssistantMessageToolCall{
+				GetSampleEchoToolCall("call-1", "needs approval"),
+			}),
+		},
+	}
+
+	initialResponse := mockLLM.chatResponses[0]
+	mockLLM.chatCallCount = 0 // Should not make additional calls
+
+	originalReq := &schemas.BifrostChatRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4",
+		Input: []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("Test non-auto tools"),
+				},
+			},
+		},
+	}
+
+	result, bifrostErr := manager.CheckAndExecuteAgentForChatRequest(
+		ctx,
+		originalReq,
+		initialResponse,
+		mockLLM.MakeChatRequest,
+		func(ctx *schemas.BifrostContext, request *schemas.BifrostMCPRequest) (*schemas.BifrostMCPResponse, error) {
+			return manager.ExecuteToolCall(ctx, request)
+		},
+	)
+
+	require.Nil(t, bifrostErr)
+	require.NotNil(t, result)
+
+	// Agent should stop immediately and return tools to user
+	assert.Equal(t, 0, mockLLM.chatCallCount, "should not make any LLM calls")
+	assert.Equal(t, "tool_calls", *result.Choices[0].FinishReason, "should return with tool_calls since tools need approval")
+
+	// Verify response contains the non-auto tools
+	finalMessage := result.Choices[0].ChatNonStreamResponseChoice.Message
+	require.NotNil(t, finalMessage.ChatAssistantMessage)
+	require.NotEmpty(t, finalMessage.ChatAssistantMessage.ToolCalls)
+	assert.Equal(t, "bifrostInternal-echo", *finalMessage.ChatAssistantMessage.ToolCalls[0].Function.Name)
+}
+
+// =============================================================================
+// AGENT WITH REAL LLM TESTS
+// =============================================================================
+
+func TestAgent_WithRealLLM_Simple(t *testing.T) {
+	t.Parallel()
+
+	testConfig := GetTestConfig(t)
+	if !testConfig.UseRealLLM {
+		t.Skip("Real LLM not configured")
+	}
+
+	// Setup MCP with auto-executable calculator tool using InProcess
+	manager := setupMCPManager(t)
+	err := RegisterCalculatorTool(manager)
+	require.NoError(t, err, "should register calculator tool")
+
+	err = SetInternalClientAutoExecute(manager, []string{"calculator"})
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	// Setup bifrost with real LLM
+	bifrost := setupBifrost(t)
+	bifrost.SetMCPManager(manager)
+
+	// Create context with timeout for real API call
+	ctx, cancel := createTestContextWithTimeout(30 * time.Second)
+	defer cancel()
+
+	// Ask LLM to use calculator tool
+	req := &schemas.BifrostChatRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4o",
+		Input: []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("Calculate 25 + 17 using the calculator tool"),
+				},
+			},
+		},
+	}
+
+	// Make request - agent mode will activate if LLM returns tool calls
+	result, bifrostErr := bifrost.ChatCompletionRequest(ctx, req)
+	if bifrostErr != nil {
+		// Skip if there's an API error (likely missing/invalid API key or config issue)
+		t.Skipf("Skipping real LLM test due to API error: %v", bifrostErr.Error)
+	}
+	require.NotNil(t, result)
+
+	// Verify we got a response
+	assert.NotEmpty(t, result.Choices)
+	t.Logf("Real LLM agent response: %s", *result.Choices[0].ChatNonStreamResponseChoice.Message.Content.ContentStr)
+
+	// Check if response mentions the result (42)
+	responseText := *result.Choices[0].ChatNonStreamResponseChoice.Message.Content.ContentStr
+	// Don't assert exact match due to LLM variability, just log
+	t.Logf("Response contains calculation result")
+	_ = responseText
+}
+
+func TestAgent_WithRealLLM_MultiStep(t *testing.T) {
+	t.Parallel()
+
+	testConfig := GetTestConfig(t)
+	if !testConfig.UseRealLLM {
+		t.Skip("Real LLM not configured")
+	}
+
+	// Setup MCP with auto-executable tools using InProcess
+	manager := setupMCPManager(t)
+	err := RegisterCalculatorTool(manager)
+	require.NoError(t, err, "should register calculator tool")
+	err = RegisterEchoTool(manager)
+	require.NoError(t, err, "should register echo tool")
+
+	err = SetInternalClientAutoExecute(manager, []string{"calculator", "echo"})
+	require.NoError(t, err, "should set auto-execute for internal client")
+
+	bifrost := setupBifrost(t)
+	bifrost.SetMCPManager(manager)
+
+	// Create context with timeout for real API call
+	ctx, cancel := createTestContextWithTimeout(30 * time.Second)
+	defer cancel()
+
+	// Ask LLM to perform multi-step task
+	req := &schemas.BifrostChatRequest{
+		Provider: schemas.OpenAI,
+		Model:    "gpt-4o",
+		Input: []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("First calculate 10 + 5, then echo the result"),
+				},
+			},
+		},
+	}
+
+	result, bifrostErr := bifrost.ChatCompletionRequest(ctx, req)
+	if bifrostErr != nil {
+		// Skip if there's an API error (likely missing/invalid API key or config issue)
+		t.Skipf("Skipping real LLM test due to API error: %v", bifrostErr.Error)
+	}
+	require.NotNil(t, result)
+
+	assert.NotEmpty(t, result.Choices)
+	t.Logf("Multi-step agent response: %s", *result.Choices[0].ChatNonStreamResponseChoice.Message.Content.ContentStr)
+
+	// Response should mention both operations
+	// Due to LLM variability, we just log the result
+	t.Logf("Multi-step task completed")
+}