first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/core/internal/llmtests/tool_calls.go
+++ b/core/internal/llmtests/tool_calls.go
@@ -0,0 +1,426 @@
+package llmtests
+
+import (
+	"context"
+	"encoding/json"
+	"os"
+	"strings"
+	"testing"
+
+	bifrost "github.com/maximhq/bifrost/core"
+	"github.com/maximhq/bifrost/core/schemas"
+	"github.com/stretchr/testify/require"
+)
+
+// RunToolCallsTest executes the tool calls test scenario using dual API testing framework
+func RunToolCallsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
+	if !testConfig.Scenarios.ToolCalls {
+		t.Logf("Tool calls not supported for provider %s", testConfig.Provider)
+		return
+	}
+
+	t.Run("ToolCalls", func(t *testing.T) {
+		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+			t.Parallel()
+		}
+
+		chatMessages := []schemas.ChatMessage{
+			CreateBasicChatMessage("What's the weather like in New York? answer in celsius"),
+		}
+		responsesMessages := []schemas.ResponsesMessage{
+			CreateBasicResponsesMessage("What's the weather like in New York? answer in celsius"),
+		}
+
+		// Get tools for both APIs using the new GetSampleTool function
+		chatTool := GetSampleChatTool(SampleToolTypeWeather)           // Chat Completions API
+		responsesTool := GetSampleResponsesTool(SampleToolTypeWeather) // Responses API
+
+		// Use specialized tool call retry configuration
+		retryConfig := ToolCallRetryConfig(string(SampleToolTypeWeather))
+		retryContext := TestRetryContext{
+			ScenarioName: "ToolCalls",
+			ExpectedBehavior: map[string]interface{}{
+				"expected_tool_name": string(SampleToolTypeWeather),
+				"required_location":  "new york",
+			},
+			TestMetadata: map[string]interface{}{
+				"provider": testConfig.Provider,
+				"model":    testConfig.ChatModel,
+			},
+		}
+
+		// Enhanced tool call validation (same for both APIs)
+		expectations := ToolCallExpectations(string(SampleToolTypeWeather), []string{"location"})
+		expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)
+
+		// Add additional tool-specific validations
+		expectations.ExpectedToolCalls[0].ArgumentTypes = map[string]string{
+			"location": "string",
+		}
+
+		// Create operations for both Chat Completions and Responses API
+		chatOperation := func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
+			bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+			chatReq := &schemas.BifrostChatRequest{
+				Provider: testConfig.Provider,
+				Model:    testConfig.ChatModel,
+				Input:    chatMessages,
+				Params: &schemas.ChatParameters{
+					MaxCompletionTokens: bifrost.Ptr(150),
+					Tools:               []schemas.ChatTool{*chatTool},
+				},
+				Fallbacks: testConfig.Fallbacks,
+			}
+			return client.ChatCompletionRequest(bfCtx, chatReq)
+		}
+
+		responsesOperation := func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
+			bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+			responsesReq := &schemas.BifrostResponsesRequest{
+				Provider: testConfig.Provider,
+				Model:    testConfig.ChatModel,
+				Input:    responsesMessages,
+				Params: &schemas.ResponsesParameters{
+					Tools: []schemas.ResponsesTool{*responsesTool},
+				},
+			}
+			return client.ResponsesRequest(bfCtx, responsesReq)
+		}
+
+		// Execute dual API test - passes only if BOTH APIs succeed
+		result := WithDualAPITestRetry(t,
+			retryConfig,
+			retryContext,
+			expectations,
+			"ToolCalls",
+			chatOperation,
+			responsesOperation)
+
+		// Validate both APIs succeeded
+		if !result.BothSucceeded {
+			var errors []string
+			if result.ChatCompletionsError != nil {
+				errors = append(errors, "Chat Completions: "+GetErrorMessage(result.ChatCompletionsError))
+			}
+			if result.ResponsesAPIError != nil {
+				errors = append(errors, "Responses API: "+GetErrorMessage(result.ResponsesAPIError))
+			}
+			if len(errors) == 0 {
+				errors = append(errors, "One or both APIs failed validation (see logs above)")
+			}
+			t.Fatalf("❌ ToolCalls dual API test failed: %v", errors)
+		}
+
+		// Verify location argument mentions New York using universal tool extraction
+		validateLocationInChatToolCalls := func(response *schemas.BifrostChatResponse, apiName string) {
+			toolCalls := ExtractChatToolCalls(response)
+			validateLocationInToolCalls(t, toolCalls, apiName)
+		}
+
+		validateLocationInResponsesToolCalls := func(response *schemas.BifrostResponsesResponse, apiName string) {
+			toolCalls := ExtractResponsesToolCalls(response)
+			validateLocationInToolCalls(t, toolCalls, apiName)
+		}
+
+		// Validate both API responses
+		if result.ChatCompletionsResponse != nil {
+			validateLocationInChatToolCalls(result.ChatCompletionsResponse, "Chat Completions")
+		}
+
+		if result.ResponsesAPIResponse != nil {
+			validateLocationInResponsesToolCalls(result.ResponsesAPIResponse, "Responses")
+		}
+
+		t.Logf("🎉 Both Chat Completions and Responses APIs passed ToolCalls test!")
+	})
+}
+
+func validateLocationInToolCalls(t *testing.T, toolCalls []ToolCallInfo, apiName string) {
+	locationFound := false
+
+	for _, toolCall := range toolCalls {
+		if toolCall.Name == string(SampleToolTypeWeather) {
+			var args map[string]interface{}
+			if json.Unmarshal([]byte(toolCall.Arguments), &args) == nil {
+				if location, exists := args["location"].(string); exists {
+					lowerLocation := strings.ToLower(location)
+					if strings.Contains(lowerLocation, "new york") || strings.Contains(lowerLocation, "nyc") {
+						locationFound = true
+						t.Logf("✅ %s tool call has correct location: %s", apiName, location)
+						break
+					}
+				}
+			}
+		}
+	}
+
+	require.True(t, locationFound, "%s API tool call should specify New York as the location", apiName)
+}
+
+// RunToolCallsWithEmptyPropertiesTest tests tool calls with explicitly empty properties ({})
+func RunToolCallsWithEmptyPropertiesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
+	if !testConfig.Scenarios.ToolCalls {
+		t.Logf("Tool calls not supported for provider %s", testConfig.Provider)
+		return
+	}
+
+	t.Run("ToolCallsWithEmptyProperties", func(t *testing.T) {
+		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+			t.Parallel()
+		}
+
+		chatMessages := []schemas.ChatMessage{
+			CreateBasicChatMessage("Call the ping tool"),
+		}
+		responsesMessages := []schemas.ResponsesMessage{
+			CreateBasicResponsesMessage("Call the ping tool"),
+		}
+
+		// Get tools using the sample tool helper functions
+		chatTool := GetSampleChatTool(SampleToolTypePingWithEmpty)
+		responsesTool := GetSampleResponsesTool(SampleToolTypePingWithEmpty)
+
+		retryConfig := ToolCallRetryConfig("ping")
+		retryContext := TestRetryContext{
+			ScenarioName: "ToolCallsWithEmptyProperties",
+			ExpectedBehavior: map[string]interface{}{
+				"expected_tool_name": "ping",
+			},
+			TestMetadata: map[string]interface{}{
+				"provider": testConfig.Provider,
+				"model":    testConfig.ChatModel,
+			},
+		}
+
+		expectations := ToolCallExpectations("ping", []string{}) // No required arguments
+		expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)
+
+		chatOperation := func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
+			bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+			chatReq := &schemas.BifrostChatRequest{
+				Provider: testConfig.Provider,
+				Model:    testConfig.ChatModel,
+				Input:    chatMessages,
+				Params: &schemas.ChatParameters{
+					MaxCompletionTokens: bifrost.Ptr(150),
+					Tools:               []schemas.ChatTool{*chatTool},
+					ToolChoice: &schemas.ChatToolChoice{
+						ChatToolChoiceStr: bifrost.Ptr("required"),
+					},
+				},
+				Fallbacks: testConfig.Fallbacks,
+			}
+			return client.ChatCompletionRequest(bfCtx, chatReq)
+		}
+
+		responsesOperation := func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
+			bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+			responsesReq := &schemas.BifrostResponsesRequest{
+				Provider: testConfig.Provider,
+				Model:    testConfig.ChatModel,
+				Input:    responsesMessages,
+				Params: &schemas.ResponsesParameters{
+					Tools: []schemas.ResponsesTool{*responsesTool},
+					ToolChoice: &schemas.ResponsesToolChoice{
+						ResponsesToolChoiceStr: bifrost.Ptr("required"),
+					},
+				},
+			}
+			return client.ResponsesRequest(bfCtx, responsesReq)
+		}
+
+		result := WithDualAPITestRetry(t,
+			retryConfig,
+			retryContext,
+			expectations,
+			"ToolCallsWithEmptyProperties",
+			chatOperation,
+			responsesOperation)
+
+		if !result.BothSucceeded {
+			var errors []string
+			if result.ChatCompletionsError != nil {
+				errors = append(errors, "Chat Completions: "+GetErrorMessage(result.ChatCompletionsError))
+			}
+			if result.ResponsesAPIError != nil {
+				errors = append(errors, "Responses API: "+GetErrorMessage(result.ResponsesAPIError))
+			}
+			if len(errors) == 0 {
+				errors = append(errors, "One or both APIs failed validation (see logs above)")
+			}
+			t.Fatalf("❌ ToolCallsWithEmptyProperties dual API test failed: %v", errors)
+		}
+
+		validatePingToolCall := func(response *schemas.BifrostChatResponse, apiName string) {
+			toolCalls := ExtractChatToolCalls(response)
+			require.True(t, len(toolCalls) > 0, "%s API should have tool calls", apiName)
+			pingFound := false
+			for _, toolCall := range toolCalls {
+				if toolCall.Name == "ping" {
+					pingFound = true
+					t.Logf("✅ %s tool call found: %s", apiName, toolCall.Name)
+					break
+				}
+			}
+			require.True(t, pingFound, "%s API tool call should include ping tool", apiName)
+		}
+
+		validatePingResponsesToolCall := func(response *schemas.BifrostResponsesResponse, apiName string) {
+			toolCalls := ExtractResponsesToolCalls(response)
+			require.True(t, len(toolCalls) > 0, "%s API should have tool calls", apiName)
+			pingFound := false
+			for _, toolCall := range toolCalls {
+				if toolCall.Name == "ping" {
+					pingFound = true
+					t.Logf("✅ %s tool call found: %s", apiName, toolCall.Name)
+					break
+				}
+			}
+			require.True(t, pingFound, "%s API tool call should include ping tool", apiName)
+		}
+
+		if result.ChatCompletionsResponse != nil {
+			validatePingToolCall(result.ChatCompletionsResponse, "Chat Completions")
+		}
+
+		if result.ResponsesAPIResponse != nil {
+			validatePingResponsesToolCall(result.ResponsesAPIResponse, "Responses")
+		}
+
+		t.Logf("🎉 Both Chat Completions and Responses APIs passed ToolCallsWithEmptyProperties test!")
+	})
+}
+
+// RunToolCallsWithNilPropertiesTest tests tool calls with nil properties (not defined)
+func RunToolCallsWithNilPropertiesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
+	if !testConfig.Scenarios.ToolCalls {
+		t.Logf("Tool calls not supported for provider %s", testConfig.Provider)
+		return
+	}
+
+	t.Run("ToolCallsWithNilProperties", func(t *testing.T) {
+		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+			t.Parallel()
+		}
+
+		chatMessages := []schemas.ChatMessage{
+			CreateBasicChatMessage("Call the ping tool"),
+		}
+		responsesMessages := []schemas.ResponsesMessage{
+			CreateBasicResponsesMessage("Call the ping tool"),
+		}
+
+		// Get tools using the sample tool helper functions
+		chatTool := GetSampleChatTool(SampleToolTypePingWithNil)
+		responsesTool := GetSampleResponsesTool(SampleToolTypePingWithNil)
+
+		retryConfig := ToolCallRetryConfig("ping")
+		retryContext := TestRetryContext{
+			ScenarioName: "ToolCallsWithNilProperties",
+			ExpectedBehavior: map[string]interface{}{
+				"expected_tool_name": "ping",
+			},
+			TestMetadata: map[string]interface{}{
+				"provider": testConfig.Provider,
+				"model":    testConfig.ChatModel,
+			},
+		}
+
+		expectations := ToolCallExpectations("ping", []string{}) // No required arguments
+		expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)
+
+		chatOperation := func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
+			bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+			chatReq := &schemas.BifrostChatRequest{
+				Provider: testConfig.Provider,
+				Model:    testConfig.ChatModel,
+				Input:    chatMessages,
+				Params: &schemas.ChatParameters{
+					MaxCompletionTokens: bifrost.Ptr(150),
+					Tools:               []schemas.ChatTool{*chatTool},
+					ToolChoice: &schemas.ChatToolChoice{
+						ChatToolChoiceStr: bifrost.Ptr("required"),
+					},
+				},
+				Fallbacks: testConfig.Fallbacks,
+			}
+			return client.ChatCompletionRequest(bfCtx, chatReq)
+		}
+
+		responsesOperation := func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
+			bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
+			responsesReq := &schemas.BifrostResponsesRequest{
+				Provider: testConfig.Provider,
+				Model:    testConfig.ChatModel,
+				Input:    responsesMessages,
+				Params: &schemas.ResponsesParameters{
+					Tools: []schemas.ResponsesTool{*responsesTool},
+					ToolChoice: &schemas.ResponsesToolChoice{
+						ResponsesToolChoiceStr: bifrost.Ptr("required"),
+					},
+				},
+			}
+			return client.ResponsesRequest(bfCtx, responsesReq)
+		}
+
+		result := WithDualAPITestRetry(t,
+			retryConfig,
+			retryContext,
+			expectations,
+			"ToolCallsWithNilProperties",
+			chatOperation,
+			responsesOperation)
+
+		if !result.BothSucceeded {
+			var errors []string
+			if result.ChatCompletionsError != nil {
+				errors = append(errors, "Chat Completions: "+GetErrorMessage(result.ChatCompletionsError))
+			}
+			if result.ResponsesAPIError != nil {
+				errors = append(errors, "Responses API: "+GetErrorMessage(result.ResponsesAPIError))
+			}
+			if len(errors) == 0 {
+				errors = append(errors, "One or both APIs failed validation (see logs above)")
+			}
+			t.Fatalf("❌ ToolCallsWithNilProperties dual API test failed: %v", errors)
+		}
+
+		validatePingToolCall := func(response *schemas.BifrostChatResponse, apiName string) {
+			toolCalls := ExtractChatToolCalls(response)
+			require.True(t, len(toolCalls) > 0, "%s API should have tool calls", apiName)
+			pingFound := false
+			for _, toolCall := range toolCalls {
+				if toolCall.Name == "ping" {
+					pingFound = true
+					t.Logf("✅ %s tool call found: %s", apiName, toolCall.Name)
+					break
+				}
+			}
+			require.True(t, pingFound, "%s API tool call should include ping tool", apiName)
+		}
+
+		validatePingResponsesToolCall := func(response *schemas.BifrostResponsesResponse, apiName string) {
+			toolCalls := ExtractResponsesToolCalls(response)
+			require.True(t, len(toolCalls) > 0, "%s API should have tool calls", apiName)
+			pingFound := false
+			for _, toolCall := range toolCalls {
+				if toolCall.Name == "ping" {
+					pingFound = true
+					t.Logf("✅ %s tool call found: %s", apiName, toolCall.Name)
+					break
+				}
+			}
+			require.True(t, pingFound, "%s API tool call should include ping tool", apiName)
+		}
+
+		if result.ChatCompletionsResponse != nil {
+			validatePingToolCall(result.ChatCompletionsResponse, "Chat Completions")
+		}
+
+		if result.ResponsesAPIResponse != nil {
+			validatePingResponsesToolCall(result.ResponsesAPIResponse, "Responses")
+		}
+
+		t.Logf("🎉 Both Chat Completions and Responses APIs passed ToolCallsWithNilProperties test!")
+	})
+}