427 lines
14 KiB
Go
427 lines
14 KiB
Go
package llmtests
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"os"
|
|
"strings"
|
|
"testing"
|
|
|
|
bifrost "github.com/maximhq/bifrost/core"
|
|
"github.com/maximhq/bifrost/core/schemas"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// RunToolCallsTest executes the tool calls test scenario using dual API testing framework
|
|
func RunToolCallsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
|
|
if !testConfig.Scenarios.ToolCalls {
|
|
t.Logf("Tool calls not supported for provider %s", testConfig.Provider)
|
|
return
|
|
}
|
|
|
|
t.Run("ToolCalls", func(t *testing.T) {
|
|
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
|
|
t.Parallel()
|
|
}
|
|
|
|
chatMessages := []schemas.ChatMessage{
|
|
CreateBasicChatMessage("What's the weather like in New York? answer in celsius"),
|
|
}
|
|
responsesMessages := []schemas.ResponsesMessage{
|
|
CreateBasicResponsesMessage("What's the weather like in New York? answer in celsius"),
|
|
}
|
|
|
|
// Get tools for both APIs using the new GetSampleTool function
|
|
chatTool := GetSampleChatTool(SampleToolTypeWeather) // Chat Completions API
|
|
responsesTool := GetSampleResponsesTool(SampleToolTypeWeather) // Responses API
|
|
|
|
// Use specialized tool call retry configuration
|
|
retryConfig := ToolCallRetryConfig(string(SampleToolTypeWeather))
|
|
retryContext := TestRetryContext{
|
|
ScenarioName: "ToolCalls",
|
|
ExpectedBehavior: map[string]interface{}{
|
|
"expected_tool_name": string(SampleToolTypeWeather),
|
|
"required_location": "new york",
|
|
},
|
|
TestMetadata: map[string]interface{}{
|
|
"provider": testConfig.Provider,
|
|
"model": testConfig.ChatModel,
|
|
},
|
|
}
|
|
|
|
// Enhanced tool call validation (same for both APIs)
|
|
expectations := ToolCallExpectations(string(SampleToolTypeWeather), []string{"location"})
|
|
expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)
|
|
|
|
// Add additional tool-specific validations
|
|
expectations.ExpectedToolCalls[0].ArgumentTypes = map[string]string{
|
|
"location": "string",
|
|
}
|
|
|
|
// Create operations for both Chat Completions and Responses API
|
|
chatOperation := func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
|
|
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
|
|
chatReq := &schemas.BifrostChatRequest{
|
|
Provider: testConfig.Provider,
|
|
Model: testConfig.ChatModel,
|
|
Input: chatMessages,
|
|
Params: &schemas.ChatParameters{
|
|
MaxCompletionTokens: bifrost.Ptr(150),
|
|
Tools: []schemas.ChatTool{*chatTool},
|
|
},
|
|
Fallbacks: testConfig.Fallbacks,
|
|
}
|
|
return client.ChatCompletionRequest(bfCtx, chatReq)
|
|
}
|
|
|
|
responsesOperation := func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
|
|
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
|
|
responsesReq := &schemas.BifrostResponsesRequest{
|
|
Provider: testConfig.Provider,
|
|
Model: testConfig.ChatModel,
|
|
Input: responsesMessages,
|
|
Params: &schemas.ResponsesParameters{
|
|
Tools: []schemas.ResponsesTool{*responsesTool},
|
|
},
|
|
}
|
|
return client.ResponsesRequest(bfCtx, responsesReq)
|
|
}
|
|
|
|
// Execute dual API test - passes only if BOTH APIs succeed
|
|
result := WithDualAPITestRetry(t,
|
|
retryConfig,
|
|
retryContext,
|
|
expectations,
|
|
"ToolCalls",
|
|
chatOperation,
|
|
responsesOperation)
|
|
|
|
// Validate both APIs succeeded
|
|
if !result.BothSucceeded {
|
|
var errors []string
|
|
if result.ChatCompletionsError != nil {
|
|
errors = append(errors, "Chat Completions: "+GetErrorMessage(result.ChatCompletionsError))
|
|
}
|
|
if result.ResponsesAPIError != nil {
|
|
errors = append(errors, "Responses API: "+GetErrorMessage(result.ResponsesAPIError))
|
|
}
|
|
if len(errors) == 0 {
|
|
errors = append(errors, "One or both APIs failed validation (see logs above)")
|
|
}
|
|
t.Fatalf("❌ ToolCalls dual API test failed: %v", errors)
|
|
}
|
|
|
|
// Verify location argument mentions New York using universal tool extraction
|
|
validateLocationInChatToolCalls := func(response *schemas.BifrostChatResponse, apiName string) {
|
|
toolCalls := ExtractChatToolCalls(response)
|
|
validateLocationInToolCalls(t, toolCalls, apiName)
|
|
}
|
|
|
|
validateLocationInResponsesToolCalls := func(response *schemas.BifrostResponsesResponse, apiName string) {
|
|
toolCalls := ExtractResponsesToolCalls(response)
|
|
validateLocationInToolCalls(t, toolCalls, apiName)
|
|
}
|
|
|
|
// Validate both API responses
|
|
if result.ChatCompletionsResponse != nil {
|
|
validateLocationInChatToolCalls(result.ChatCompletionsResponse, "Chat Completions")
|
|
}
|
|
|
|
if result.ResponsesAPIResponse != nil {
|
|
validateLocationInResponsesToolCalls(result.ResponsesAPIResponse, "Responses")
|
|
}
|
|
|
|
t.Logf("🎉 Both Chat Completions and Responses APIs passed ToolCalls test!")
|
|
})
|
|
}
|
|
|
|
func validateLocationInToolCalls(t *testing.T, toolCalls []ToolCallInfo, apiName string) {
|
|
locationFound := false
|
|
|
|
for _, toolCall := range toolCalls {
|
|
if toolCall.Name == string(SampleToolTypeWeather) {
|
|
var args map[string]interface{}
|
|
if json.Unmarshal([]byte(toolCall.Arguments), &args) == nil {
|
|
if location, exists := args["location"].(string); exists {
|
|
lowerLocation := strings.ToLower(location)
|
|
if strings.Contains(lowerLocation, "new york") || strings.Contains(lowerLocation, "nyc") {
|
|
locationFound = true
|
|
t.Logf("✅ %s tool call has correct location: %s", apiName, location)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
require.True(t, locationFound, "%s API tool call should specify New York as the location", apiName)
|
|
}
|
|
|
|
// RunToolCallsWithEmptyPropertiesTest tests tool calls with explicitly empty properties ({})
|
|
func RunToolCallsWithEmptyPropertiesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
|
|
if !testConfig.Scenarios.ToolCalls {
|
|
t.Logf("Tool calls not supported for provider %s", testConfig.Provider)
|
|
return
|
|
}
|
|
|
|
t.Run("ToolCallsWithEmptyProperties", func(t *testing.T) {
|
|
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
|
|
t.Parallel()
|
|
}
|
|
|
|
chatMessages := []schemas.ChatMessage{
|
|
CreateBasicChatMessage("Call the ping tool"),
|
|
}
|
|
responsesMessages := []schemas.ResponsesMessage{
|
|
CreateBasicResponsesMessage("Call the ping tool"),
|
|
}
|
|
|
|
// Get tools using the sample tool helper functions
|
|
chatTool := GetSampleChatTool(SampleToolTypePingWithEmpty)
|
|
responsesTool := GetSampleResponsesTool(SampleToolTypePingWithEmpty)
|
|
|
|
retryConfig := ToolCallRetryConfig("ping")
|
|
retryContext := TestRetryContext{
|
|
ScenarioName: "ToolCallsWithEmptyProperties",
|
|
ExpectedBehavior: map[string]interface{}{
|
|
"expected_tool_name": "ping",
|
|
},
|
|
TestMetadata: map[string]interface{}{
|
|
"provider": testConfig.Provider,
|
|
"model": testConfig.ChatModel,
|
|
},
|
|
}
|
|
|
|
expectations := ToolCallExpectations("ping", []string{}) // No required arguments
|
|
expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)
|
|
|
|
chatOperation := func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
|
|
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
|
|
chatReq := &schemas.BifrostChatRequest{
|
|
Provider: testConfig.Provider,
|
|
Model: testConfig.ChatModel,
|
|
Input: chatMessages,
|
|
Params: &schemas.ChatParameters{
|
|
MaxCompletionTokens: bifrost.Ptr(150),
|
|
Tools: []schemas.ChatTool{*chatTool},
|
|
ToolChoice: &schemas.ChatToolChoice{
|
|
ChatToolChoiceStr: bifrost.Ptr("required"),
|
|
},
|
|
},
|
|
Fallbacks: testConfig.Fallbacks,
|
|
}
|
|
return client.ChatCompletionRequest(bfCtx, chatReq)
|
|
}
|
|
|
|
responsesOperation := func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
|
|
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
|
|
responsesReq := &schemas.BifrostResponsesRequest{
|
|
Provider: testConfig.Provider,
|
|
Model: testConfig.ChatModel,
|
|
Input: responsesMessages,
|
|
Params: &schemas.ResponsesParameters{
|
|
Tools: []schemas.ResponsesTool{*responsesTool},
|
|
ToolChoice: &schemas.ResponsesToolChoice{
|
|
ResponsesToolChoiceStr: bifrost.Ptr("required"),
|
|
},
|
|
},
|
|
}
|
|
return client.ResponsesRequest(bfCtx, responsesReq)
|
|
}
|
|
|
|
result := WithDualAPITestRetry(t,
|
|
retryConfig,
|
|
retryContext,
|
|
expectations,
|
|
"ToolCallsWithEmptyProperties",
|
|
chatOperation,
|
|
responsesOperation)
|
|
|
|
if !result.BothSucceeded {
|
|
var errors []string
|
|
if result.ChatCompletionsError != nil {
|
|
errors = append(errors, "Chat Completions: "+GetErrorMessage(result.ChatCompletionsError))
|
|
}
|
|
if result.ResponsesAPIError != nil {
|
|
errors = append(errors, "Responses API: "+GetErrorMessage(result.ResponsesAPIError))
|
|
}
|
|
if len(errors) == 0 {
|
|
errors = append(errors, "One or both APIs failed validation (see logs above)")
|
|
}
|
|
t.Fatalf("❌ ToolCallsWithEmptyProperties dual API test failed: %v", errors)
|
|
}
|
|
|
|
validatePingToolCall := func(response *schemas.BifrostChatResponse, apiName string) {
|
|
toolCalls := ExtractChatToolCalls(response)
|
|
require.True(t, len(toolCalls) > 0, "%s API should have tool calls", apiName)
|
|
pingFound := false
|
|
for _, toolCall := range toolCalls {
|
|
if toolCall.Name == "ping" {
|
|
pingFound = true
|
|
t.Logf("✅ %s tool call found: %s", apiName, toolCall.Name)
|
|
break
|
|
}
|
|
}
|
|
require.True(t, pingFound, "%s API tool call should include ping tool", apiName)
|
|
}
|
|
|
|
validatePingResponsesToolCall := func(response *schemas.BifrostResponsesResponse, apiName string) {
|
|
toolCalls := ExtractResponsesToolCalls(response)
|
|
require.True(t, len(toolCalls) > 0, "%s API should have tool calls", apiName)
|
|
pingFound := false
|
|
for _, toolCall := range toolCalls {
|
|
if toolCall.Name == "ping" {
|
|
pingFound = true
|
|
t.Logf("✅ %s tool call found: %s", apiName, toolCall.Name)
|
|
break
|
|
}
|
|
}
|
|
require.True(t, pingFound, "%s API tool call should include ping tool", apiName)
|
|
}
|
|
|
|
if result.ChatCompletionsResponse != nil {
|
|
validatePingToolCall(result.ChatCompletionsResponse, "Chat Completions")
|
|
}
|
|
|
|
if result.ResponsesAPIResponse != nil {
|
|
validatePingResponsesToolCall(result.ResponsesAPIResponse, "Responses")
|
|
}
|
|
|
|
t.Logf("🎉 Both Chat Completions and Responses APIs passed ToolCallsWithEmptyProperties test!")
|
|
})
|
|
}
|
|
|
|
// RunToolCallsWithNilPropertiesTest tests tool calls with nil properties (not defined)
|
|
func RunToolCallsWithNilPropertiesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
|
|
if !testConfig.Scenarios.ToolCalls {
|
|
t.Logf("Tool calls not supported for provider %s", testConfig.Provider)
|
|
return
|
|
}
|
|
|
|
t.Run("ToolCallsWithNilProperties", func(t *testing.T) {
|
|
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
|
|
t.Parallel()
|
|
}
|
|
|
|
chatMessages := []schemas.ChatMessage{
|
|
CreateBasicChatMessage("Call the ping tool"),
|
|
}
|
|
responsesMessages := []schemas.ResponsesMessage{
|
|
CreateBasicResponsesMessage("Call the ping tool"),
|
|
}
|
|
|
|
// Get tools using the sample tool helper functions
|
|
chatTool := GetSampleChatTool(SampleToolTypePingWithNil)
|
|
responsesTool := GetSampleResponsesTool(SampleToolTypePingWithNil)
|
|
|
|
retryConfig := ToolCallRetryConfig("ping")
|
|
retryContext := TestRetryContext{
|
|
ScenarioName: "ToolCallsWithNilProperties",
|
|
ExpectedBehavior: map[string]interface{}{
|
|
"expected_tool_name": "ping",
|
|
},
|
|
TestMetadata: map[string]interface{}{
|
|
"provider": testConfig.Provider,
|
|
"model": testConfig.ChatModel,
|
|
},
|
|
}
|
|
|
|
expectations := ToolCallExpectations("ping", []string{}) // No required arguments
|
|
expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)
|
|
|
|
chatOperation := func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
|
|
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
|
|
chatReq := &schemas.BifrostChatRequest{
|
|
Provider: testConfig.Provider,
|
|
Model: testConfig.ChatModel,
|
|
Input: chatMessages,
|
|
Params: &schemas.ChatParameters{
|
|
MaxCompletionTokens: bifrost.Ptr(150),
|
|
Tools: []schemas.ChatTool{*chatTool},
|
|
ToolChoice: &schemas.ChatToolChoice{
|
|
ChatToolChoiceStr: bifrost.Ptr("required"),
|
|
},
|
|
},
|
|
Fallbacks: testConfig.Fallbacks,
|
|
}
|
|
return client.ChatCompletionRequest(bfCtx, chatReq)
|
|
}
|
|
|
|
responsesOperation := func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
|
|
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
|
|
responsesReq := &schemas.BifrostResponsesRequest{
|
|
Provider: testConfig.Provider,
|
|
Model: testConfig.ChatModel,
|
|
Input: responsesMessages,
|
|
Params: &schemas.ResponsesParameters{
|
|
Tools: []schemas.ResponsesTool{*responsesTool},
|
|
ToolChoice: &schemas.ResponsesToolChoice{
|
|
ResponsesToolChoiceStr: bifrost.Ptr("required"),
|
|
},
|
|
},
|
|
}
|
|
return client.ResponsesRequest(bfCtx, responsesReq)
|
|
}
|
|
|
|
result := WithDualAPITestRetry(t,
|
|
retryConfig,
|
|
retryContext,
|
|
expectations,
|
|
"ToolCallsWithNilProperties",
|
|
chatOperation,
|
|
responsesOperation)
|
|
|
|
if !result.BothSucceeded {
|
|
var errors []string
|
|
if result.ChatCompletionsError != nil {
|
|
errors = append(errors, "Chat Completions: "+GetErrorMessage(result.ChatCompletionsError))
|
|
}
|
|
if result.ResponsesAPIError != nil {
|
|
errors = append(errors, "Responses API: "+GetErrorMessage(result.ResponsesAPIError))
|
|
}
|
|
if len(errors) == 0 {
|
|
errors = append(errors, "One or both APIs failed validation (see logs above)")
|
|
}
|
|
t.Fatalf("❌ ToolCallsWithNilProperties dual API test failed: %v", errors)
|
|
}
|
|
|
|
validatePingToolCall := func(response *schemas.BifrostChatResponse, apiName string) {
|
|
toolCalls := ExtractChatToolCalls(response)
|
|
require.True(t, len(toolCalls) > 0, "%s API should have tool calls", apiName)
|
|
pingFound := false
|
|
for _, toolCall := range toolCalls {
|
|
if toolCall.Name == "ping" {
|
|
pingFound = true
|
|
t.Logf("✅ %s tool call found: %s", apiName, toolCall.Name)
|
|
break
|
|
}
|
|
}
|
|
require.True(t, pingFound, "%s API tool call should include ping tool", apiName)
|
|
}
|
|
|
|
validatePingResponsesToolCall := func(response *schemas.BifrostResponsesResponse, apiName string) {
|
|
toolCalls := ExtractResponsesToolCalls(response)
|
|
require.True(t, len(toolCalls) > 0, "%s API should have tool calls", apiName)
|
|
pingFound := false
|
|
for _, toolCall := range toolCalls {
|
|
if toolCall.Name == "ping" {
|
|
pingFound = true
|
|
t.Logf("✅ %s tool call found: %s", apiName, toolCall.Name)
|
|
break
|
|
}
|
|
}
|
|
require.True(t, pingFound, "%s API tool call should include ping tool", apiName)
|
|
}
|
|
|
|
if result.ChatCompletionsResponse != nil {
|
|
validatePingToolCall(result.ChatCompletionsResponse, "Chat Completions")
|
|
}
|
|
|
|
if result.ResponsesAPIResponse != nil {
|
|
validatePingResponsesToolCall(result.ResponsesAPIResponse, "Responses")
|
|
}
|
|
|
|
t.Logf("🎉 Both Chat Completions and Responses APIs passed ToolCallsWithNilProperties test!")
|
|
})
|
|
}
|