Files
bifrost/core/internal/llmtests/tool_calls_streaming.go
Beyhan Oğur 880f412e2c first commit
2026-04-26 21:52:23 +03:00

782 lines
27 KiB
Go

package llmtests
import (
"context"
"encoding/json"
"fmt"
"os"
"sort"
"strings"
"testing"
"time"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
)
// StreamingToolCallAccumulator accumulates tool call fragments from streaming responses
type StreamingToolCallAccumulator struct {
// For Chat Completions: map of tool call index -> accumulated tool call
ChatToolCalls map[int]*schemas.ChatAssistantMessageToolCall
// For Responses API: map of call ID or item ID -> accumulated tool call info
ResponsesToolCalls map[string]*ResponsesToolCallInfo
// Map itemID to the key used in ResponsesToolCalls for quick lookup
ItemIDToKey map[string]string
}
// ResponsesToolCallInfo accumulates tool call information from Responses API streaming
type ResponsesToolCallInfo struct {
ID string
Name string
Arguments string
}
// NewStreamingToolCallAccumulator creates a new accumulator
func NewStreamingToolCallAccumulator() *StreamingToolCallAccumulator {
return &StreamingToolCallAccumulator{
ChatToolCalls: make(map[int]*schemas.ChatAssistantMessageToolCall),
ResponsesToolCalls: make(map[string]*ResponsesToolCallInfo),
ItemIDToKey: make(map[string]string),
}
}
// AccumulateChatToolCall accumulates a tool call from a Chat Completions streaming chunk
func (acc *StreamingToolCallAccumulator) AccumulateChatToolCall(choiceIndex int, toolCall schemas.ChatAssistantMessageToolCall) {
// Prefer ID as key if available, otherwise use index
key := -1
var found bool
if toolCall.ID != nil && *toolCall.ID != "" {
// Try to find existing tool call by ID first
for k, existing := range acc.ChatToolCalls {
if existing.ID != nil && *existing.ID == *toolCall.ID {
key = k
found = true
break
}
}
// If not found by ID, use index
if !found {
key = int(toolCall.Index)
}
} else {
// Use the tool call index as the key
key = int(toolCall.Index)
}
existing, exists := acc.ChatToolCalls[key]
if !exists {
// First chunk for this tool call - initialize
acc.ChatToolCalls[key] = &schemas.ChatAssistantMessageToolCall{
Index: toolCall.Index,
Type: toolCall.Type,
ID: toolCall.ID,
Function: schemas.ChatAssistantMessageToolCallFunction{},
}
existing = acc.ChatToolCalls[key]
}
// Accumulate name if present
if toolCall.Function.Name != nil && *toolCall.Function.Name != "" {
existing.Function.Name = toolCall.Function.Name
}
// Accumulate ID if present (may come in later chunks)
if toolCall.ID != nil && *toolCall.ID != "" {
existing.ID = toolCall.ID
}
// Accumulate arguments (they come incrementally)
if toolCall.Function.Arguments != "" {
existing.Function.Arguments += toolCall.Function.Arguments
}
}
// AccumulateResponsesToolCall accumulates a tool call from a Responses API streaming chunk
func (acc *StreamingToolCallAccumulator) AccumulateResponsesToolCall(callID *string, name *string, arguments *string, itemID *string) {
// First, try to find existing tool call by itemID (most reliable for matching)
key := "default"
if itemID != nil && *itemID != "" {
itemIDStr := *itemID
// Check if we have a mapping for this itemID
if mappedKey, exists := acc.ItemIDToKey[itemIDStr]; exists {
key = mappedKey
} else {
// Try to find by itemID in keys (with or without prefix)
for k := range acc.ResponsesToolCalls {
if k == itemIDStr || k == "item:"+itemIDStr {
key = k
acc.ItemIDToKey[itemIDStr] = key
break
}
}
// If not found, use itemID as key
if key == "default" {
key = "item:" + itemIDStr
acc.ItemIDToKey[itemIDStr] = key
}
}
} else if callID != nil && *callID != "" {
// Use callID as key if no itemID
key = *callID
} else if name != nil && *name != "" {
// Try to find existing tool call by name if we don't have callID or itemID yet
for k, existing := range acc.ResponsesToolCalls {
if existing.Name == *name && existing.ID == "" {
key = k
break
}
}
// If not found, use name as temporary key
if key == "default" {
key = "name:" + *name
}
}
existing, exists := acc.ResponsesToolCalls[key]
if !exists {
existing = &ResponsesToolCallInfo{}
acc.ResponsesToolCalls[key] = existing
}
// Track the final key that will be used for this entry
finalKey := key
// Update fields if present
if callID != nil && *callID != "" {
existing.ID = *callID
// If we were using a temporary key, migrate to callID-based key
if key != *callID {
acc.ResponsesToolCalls[*callID] = existing
finalKey = *callID
// Update itemID mapping if we have one
if itemID != nil && *itemID != "" {
acc.ItemIDToKey[*itemID] = *callID
}
if key != "default" && key != *callID {
delete(acc.ResponsesToolCalls, key)
}
}
}
if name != nil && *name != "" {
existing.Name = *name
}
if arguments != nil && *arguments != "" {
// If we're getting complete arguments (from done event), replace instead of append
// Check if this looks like complete JSON (starts with { and ends with })
argsStr := *arguments
if len(argsStr) > 0 && argsStr[0] == '{' && argsStr[len(argsStr)-1] == '}' && existing.Arguments != "" {
// This looks like complete arguments, but only replace if we already have partial args
// Otherwise, this might be the first chunk which happens to be complete
existing.Arguments = argsStr
} else {
// Incremental chunk, append
existing.Arguments += argsStr
}
}
// Update itemID mapping if we have itemID but haven't mapped it yet
// Use finalKey which is the actual key where the entry is stored
if itemID != nil && *itemID != "" {
if _, exists := acc.ItemIDToKey[*itemID]; !exists {
acc.ItemIDToKey[*itemID] = finalKey
}
}
}
// GetFinalChatToolCalls returns the final accumulated tool calls for Chat Completions
func (acc *StreamingToolCallAccumulator) GetFinalChatToolCalls() []ToolCallInfo {
keys := make([]int, 0, len(acc.ChatToolCalls))
for k := range acc.ChatToolCalls {
keys = append(keys, k)
}
sort.Ints(keys)
var result []ToolCallInfo
for _, key := range keys {
toolCall := acc.ChatToolCalls[key]
info := ToolCallInfo{
Index: key,
}
if toolCall.ID != nil {
info.ID = *toolCall.ID
}
if toolCall.Function.Name != nil {
info.Name = *toolCall.Function.Name
}
info.Arguments = toolCall.Function.Arguments
result = append(result, info)
}
return result
}
// GetFinalResponsesToolCalls returns the final accumulated tool calls for Responses API
func (acc *StreamingToolCallAccumulator) GetFinalResponsesToolCalls() []ToolCallInfo {
var result []ToolCallInfo
for _, toolCall := range acc.ResponsesToolCalls {
result = append(result, ToolCallInfo{
ID: toolCall.ID,
Name: toolCall.Name,
Arguments: toolCall.Arguments,
})
}
return result
}
// RunToolCallsStreamingTest executes the tool calls streaming test scenario
func RunToolCallsStreamingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
if !testConfig.Scenarios.ToolCallsStreaming {
t.Logf("Tool calls streaming not supported for provider %s", testConfig.Provider)
return
}
// Test Chat Completions streaming with tool calls
t.Run("ToolCallsStreamingChatCompletions", func(t *testing.T) {
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
t.Parallel()
}
chatMessages := []schemas.ChatMessage{
CreateBasicChatMessage("What's the weather like in New York? answer in celsius"),
}
chatTool := GetSampleChatTool(SampleToolTypeWeather)
request := &schemas.BifrostChatRequest{
Provider: testConfig.Provider,
Model: testConfig.ChatModel,
Input: chatMessages,
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(150),
Tools: []schemas.ChatTool{*chatTool},
},
Fallbacks: testConfig.Fallbacks,
}
// Use retry framework for stream requests with tools
retryConfig := StreamingRetryConfig()
retryContext := TestRetryContext{
ScenarioName: "ToolCallsStreamingChatCompletions",
ExpectedBehavior: map[string]interface{}{
"should_stream_content": true,
"should_have_tool_calls": true,
"tool_name": "get_weather",
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.ChatModel,
"tools": true,
},
}
responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.ChatCompletionStreamRequest(bfCtx, request)
})
RequireNoError(t, err, "Chat completion stream with tools failed")
if responseChannel == nil {
t.Fatal("Response channel should not be nil")
}
accumulator := NewStreamingToolCallAccumulator()
var responseCount int
t.Logf("🔧 Testing Chat Completions streaming with tool calls...")
for response := range responseChannel {
if response == nil || response.BifrostChatResponse == nil {
t.Fatal("Streaming response should not be nil")
}
responseCount++
// Process tool calls from this chunk
if response.BifrostChatResponse.Choices != nil {
for _, choice := range response.BifrostChatResponse.Choices {
if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil {
delta := choice.ChatStreamResponseChoice.Delta
// Check for tool calls in delta
if len(delta.ToolCalls) > 0 {
for _, toolCall := range delta.ToolCalls {
// Debug logging: what fields are present in this chunk
chunkType := "ChatCompletions.Delta.ToolCalls"
hasID := toolCall.ID != nil && *toolCall.ID != ""
hasName := toolCall.Function.Name != nil && *toolCall.Function.Name != ""
hasArgs := toolCall.Function.Arguments != ""
t.Logf("📊 [%s] Chunk fields: ID=%v (field: toolCall.ID), Name=%v (field: toolCall.Function.Name), Args=%v (field: toolCall.Function.Arguments, len=%d)",
chunkType, hasID, hasName, hasArgs, len(toolCall.Function.Arguments))
if hasID {
t.Logf(" ✅ ID found in %s: %s", chunkType, *toolCall.ID)
}
if hasName {
t.Logf(" ✅ Name found in %s: %s", chunkType, *toolCall.Function.Name)
}
if hasArgs {
t.Logf(" ✅ Arguments found in %s: %s", chunkType, toolCall.Function.Arguments)
}
accumulator.AccumulateChatToolCall(choice.Index, toolCall)
t.Logf("🔧 Accumulated tool call chunk: index=%d, id=%v, name=%v, args_len=%d",
choice.Index,
toolCall.ID,
toolCall.Function.Name,
len(toolCall.Function.Arguments))
}
}
}
}
}
if responseCount > 500 {
break
}
}
if responseCount == 0 {
t.Fatal("Should receive at least one streaming response")
}
// Validate final tool calls
finalToolCalls := accumulator.GetFinalChatToolCalls()
if len(finalToolCalls) == 0 {
t.Fatal("❌ No tool calls found in streaming response")
}
for i, toolCall := range finalToolCalls {
if toolCall.ID == "" || toolCall.Name == "" || toolCall.Arguments == "" {
t.Fatalf("❌ Tool call %d missing required fields: ID=%v, Name=%v, Arguments=%v",
i, toolCall.ID != "", toolCall.Name != "", toolCall.Arguments != "")
}
}
if err := validateStreamingToolCalls(finalToolCalls, "Chat Completions"); err != nil {
t.Fatalf("❌ %v", err)
}
t.Logf("✅ Chat Completions streaming with tools test completed successfully")
})
// Test Responses API streaming with tool calls
t.Run("ToolCallsStreamingResponses", func(t *testing.T) {
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
t.Parallel()
}
responsesMessages := []schemas.ResponsesMessage{
CreateBasicResponsesMessage("What's the weather like in New York? answer in celsius"),
}
responsesTool := GetSampleResponsesTool(SampleToolTypeWeather)
request := &schemas.BifrostResponsesRequest{
Provider: testConfig.Provider,
Model: testConfig.ChatModel,
Input: responsesMessages,
Params: &schemas.ResponsesParameters{
Tools: []schemas.ResponsesTool{*responsesTool},
},
Fallbacks: testConfig.Fallbacks,
}
// Use retry framework for stream requests with tools
retryConfig := StreamingRetryConfig()
retryContext := TestRetryContext{
ScenarioName: "ToolCallsStreamingResponses",
ExpectedBehavior: map[string]interface{}{
"should_stream_content": true,
"should_have_tool_calls": true,
"tool_name": "get_weather",
},
TestMetadata: map[string]interface{}{
"provider": testConfig.Provider,
"model": testConfig.ChatModel,
"tools": true,
},
}
// Use validation retry wrapper that validates tool calls and retries on validation failures
validationResult := WithResponsesStreamValidationRetry(t, retryConfig, retryContext,
func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline)
return client.ResponsesStreamRequest(bfCtx, request)
},
func(responseChannel chan *schemas.BifrostStreamChunk) ResponsesStreamValidationResult {
accumulator := NewStreamingToolCallAccumulator()
var responseCount int
t.Logf("🔧 Testing Responses API streaming with tool calls...")
// Create a timeout context for the stream reading
streamCtx, cancel := context.WithTimeout(ctx, 200*time.Second)
defer cancel()
for {
select {
case response, ok := <-responseChannel:
if !ok {
// Channel closed, streaming completed
t.Logf("✅ Responses streaming completed. Total chunks received: %d", responseCount)
goto streamComplete
}
if response == nil {
return ResponsesStreamValidationResult{
Passed: false,
Errors: []string{"❌ Streaming response should not be nil"},
}
}
responseCount++
if response.BifrostResponsesStreamResponse != nil {
streamResp := response.BifrostResponsesStreamResponse
// Check for function call events
switch streamResp.Type {
case schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDelta:
// Arguments are being streamed - check both Delta and Arguments fields
// Delta is used by most providers (Anthropic, Cohere, Bedrock, OpenAI)
// Arguments is used by some providers (OpenAI-compatible via mux)
chunkType := string(streamResp.Type)
var arguments *string
argsField := "<none>"
if streamResp.Delta != nil {
arguments = streamResp.Delta
argsField = "streamResp.Delta"
} else if streamResp.Arguments != nil {
arguments = streamResp.Arguments
argsField = "streamResp.Arguments"
}
if arguments != nil {
// Try to get call ID, name, and item ID
var callID *string
var name *string
var itemID *string
callIDField := "<none>"
nameField := "<none>"
itemIDField := "<none>"
// Item ID is often in the delta event itself (for OpenAI)
if streamResp.ItemID != nil {
itemID = streamResp.ItemID
itemIDField = "streamResp.ItemID"
}
// Try to get call ID and name from item if available
if streamResp.Item != nil && streamResp.Item.ResponsesToolMessage != nil {
if streamResp.Item.ResponsesToolMessage.CallID != nil {
callID = streamResp.Item.ResponsesToolMessage.CallID
callIDField = "streamResp.Item.ResponsesToolMessage.CallID"
}
if streamResp.Item.ResponsesToolMessage.Name != nil {
name = streamResp.Item.ResponsesToolMessage.Name
nameField = "streamResp.Item.ResponsesToolMessage.Name"
}
}
// Also check if item has an ID
if streamResp.Item != nil && streamResp.Item.ID != nil {
itemID = streamResp.Item.ID
itemIDField = "streamResp.Item.ID"
}
// Debug logging: what fields are present in this chunk
hasID := callID != nil && *callID != ""
hasName := name != nil && *name != ""
hasArgs := *arguments != ""
hasItemID := itemID != nil && *itemID != ""
t.Logf("📊 [%s] Chunk fields: ID=%v (%s), Name=%v (%s), Args=%v (%s, len=%d), ItemID=%v (%s)",
chunkType, hasID, callIDField, hasName, nameField, hasArgs, argsField, len(*arguments), hasItemID, itemIDField)
if hasID {
t.Logf(" ✅ ID found in %s: %s", chunkType, *callID)
}
if hasName {
t.Logf(" ✅ Name found in %s: %s", chunkType, *name)
}
if hasArgs {
t.Logf(" ✅ Arguments found in %s: %s", chunkType, *arguments)
}
if hasItemID {
t.Logf(" ✅ ItemID found in %s: %s", chunkType, *itemID)
}
accumulator.AccumulateResponsesToolCall(callID, name, arguments, itemID)
callIDStr := "<nil>"
if callID != nil {
callIDStr = *callID
}
nameStr := "<nil>"
if name != nil {
nameStr = *name
}
itemIDStr := "<nil>"
if itemID != nil {
itemIDStr = *itemID
}
t.Logf("🔧 Accumulated function call arguments chunk: callID=%s, name=%s, itemID=%s, args_len=%d",
callIDStr, nameStr, itemIDStr, len(*arguments))
}
case schemas.ResponsesStreamResponseTypeOutputItemAdded:
// A new function call item was added
if streamResp.Item != nil && streamResp.Item.Type != nil {
if *streamResp.Item.Type == schemas.ResponsesMessageTypeFunctionCall {
chunkType := string(streamResp.Type)
var callID *string
var name *string
var itemID *string
callIDField := "<none>"
nameField := "<none>"
itemIDField := "<none>"
// Extract itemID first, before any accumulation calls
if streamResp.Item.ID != nil {
itemID = streamResp.Item.ID
itemIDField = "streamResp.Item.ID"
}
if streamResp.Item.ResponsesToolMessage != nil {
if streamResp.Item.ResponsesToolMessage.CallID != nil {
callID = streamResp.Item.ResponsesToolMessage.CallID
callIDField = "streamResp.Item.ResponsesToolMessage.CallID"
}
if streamResp.Item.ResponsesToolMessage.Name != nil {
name = streamResp.Item.ResponsesToolMessage.Name
nameField = "streamResp.Item.ResponsesToolMessage.Name"
}
if streamResp.Item.ResponsesToolMessage.Arguments != nil {
argsField := "streamResp.Item.ResponsesToolMessage.Arguments"
t.Logf("📊 [%s] Arguments also found in item: %s (len=%d)", chunkType, argsField, len(*streamResp.Item.ResponsesToolMessage.Arguments))
// Accumulate arguments if found in item
accumulator.AccumulateResponsesToolCall(callID, name, streamResp.Item.ResponsesToolMessage.Arguments, itemID)
}
}
// Debug logging: what fields are present in this chunk
hasID := callID != nil && *callID != ""
hasName := name != nil && *name != ""
hasItemID := itemID != nil && *itemID != ""
t.Logf("📊 [%s] Chunk fields: ID=%v (%s), Name=%v (%s), ItemID=%v (%s)",
chunkType, hasID, callIDField, hasName, nameField, hasItemID, itemIDField)
if hasID {
t.Logf(" ✅ ID found in %s: %s", chunkType, *callID)
}
if hasName {
t.Logf(" ✅ Name found in %s: %s", chunkType, *name)
}
if hasItemID {
t.Logf(" ✅ ItemID found in %s: %s", chunkType, *itemID)
}
// Initialize or update the tool call (only if Arguments not already accumulated)
if streamResp.Item.ResponsesToolMessage == nil || streamResp.Item.ResponsesToolMessage.Arguments == nil {
accumulator.AccumulateResponsesToolCall(callID, name, nil, itemID)
}
callIDStr := "<nil>"
if callID != nil {
callIDStr = *callID
}
nameStr := "<nil>"
if name != nil {
nameStr = *name
}
itemIDStr := "<nil>"
if itemID != nil {
itemIDStr = *itemID
}
t.Logf("🔧 Function call item added: callID=%s, name=%s, itemID=%s",
callIDStr, nameStr, itemIDStr)
}
}
case schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDone:
// Function call arguments are complete - use the complete arguments
if streamResp.Arguments != nil {
chunkType := string(streamResp.Type)
var callID *string
var name *string
var itemID *string
callIDField := "<none>"
nameField := "<none>"
itemIDField := "<none>"
argsField := "streamResp.Arguments"
if streamResp.ItemID != nil {
itemID = streamResp.ItemID
itemIDField = "streamResp.ItemID"
}
if streamResp.Item != nil && streamResp.Item.ResponsesToolMessage != nil {
if streamResp.Item.ResponsesToolMessage.CallID != nil {
callID = streamResp.Item.ResponsesToolMessage.CallID
callIDField = "streamResp.Item.ResponsesToolMessage.CallID"
}
if streamResp.Item.ResponsesToolMessage.Name != nil {
name = streamResp.Item.ResponsesToolMessage.Name
nameField = "streamResp.Item.ResponsesToolMessage.Name"
}
}
if streamResp.Item != nil && streamResp.Item.ID != nil {
itemID = streamResp.Item.ID
itemIDField = "streamResp.Item.ID"
}
// Debug logging: what fields are present in this chunk
hasID := callID != nil && *callID != ""
hasName := name != nil && *name != ""
hasArgs := streamResp.Arguments != nil && *streamResp.Arguments != ""
hasItemID := itemID != nil && *itemID != ""
t.Logf("📊 [%s] Chunk fields: ID=%v (%s), Name=%v (%s), Args=%v (%s, len=%d), ItemID=%v (%s)",
chunkType, hasID, callIDField, hasName, nameField, hasArgs, argsField, len(*streamResp.Arguments), hasItemID, itemIDField)
if hasID {
t.Logf(" ✅ ID found in %s: %s", chunkType, *callID)
}
if hasName {
t.Logf(" ✅ Name found in %s: %s", chunkType, *name)
}
if hasArgs {
t.Logf(" ✅ Complete Arguments found in %s: %s", chunkType, *streamResp.Arguments)
}
if hasItemID {
t.Logf(" ✅ ItemID found in %s: %s", chunkType, *itemID)
}
// Use the complete arguments from the done event
accumulator.AccumulateResponsesToolCall(callID, name, streamResp.Arguments, itemID)
callIDStr := "<nil>"
if callID != nil {
callIDStr = *callID
}
nameStr := "<nil>"
if name != nil {
nameStr = *name
}
itemIDStr := "<nil>"
if itemID != nil {
itemIDStr = *itemID
}
t.Logf("🔧 Function call arguments done: callID=%s, name=%s, itemID=%s, complete_args=%s",
callIDStr, nameStr, itemIDStr, *streamResp.Arguments)
}
}
}
// Safety check to prevent infinite loops
if responseCount > 500 {
return ResponsesStreamValidationResult{
Passed: false,
Errors: []string{"❌ Received too many streaming chunks, something might be wrong"},
}
}
case <-streamCtx.Done():
return ResponsesStreamValidationResult{
Passed: false,
Errors: []string{"❌ Timeout waiting for responses streaming response"},
ReceivedData: responseCount > 0,
}
}
}
streamComplete:
if responseCount == 0 {
return ResponsesStreamValidationResult{
Passed: false,
Errors: []string{"❌ Stream closed without receiving any data"},
ReceivedData: false,
}
}
// Validate final tool calls
finalToolCalls := accumulator.GetFinalResponsesToolCalls()
if len(finalToolCalls) == 0 {
return ResponsesStreamValidationResult{
Passed: false,
Errors: []string{"❌ No tool calls found in streaming response"},
ReceivedData: responseCount > 0,
}
}
// Check for missing required fields
var validationErrors []string
for i, toolCall := range finalToolCalls {
if toolCall.ID == "" || toolCall.Name == "" || toolCall.Arguments == "" {
validationErrors = append(validationErrors, fmt.Sprintf("Tool call %d missing required fields: ID=%v, Name=%v, Arguments=%v",
i, toolCall.ID != "", toolCall.Name != "", toolCall.Arguments != ""))
}
}
if len(validationErrors) > 0 {
return ResponsesStreamValidationResult{
Passed: false,
Errors: validationErrors,
ReceivedData: responseCount > 0,
}
}
if err := validateStreamingToolCalls(finalToolCalls, "Responses API"); err != nil {
return ResponsesStreamValidationResult{
Passed: false,
Errors: []string{fmt.Sprintf("❌ %v", err)},
ReceivedData: responseCount > 0,
}
}
return ResponsesStreamValidationResult{
Passed: true,
ReceivedData: responseCount > 0,
}
})
// Check validation result and fail test if validation failed after all retries
if !validationResult.Passed {
allErrors := append(validationResult.Errors, validationResult.StreamErrors...)
errorMsg := strings.Join(allErrors, "; ")
if !strings.Contains(errorMsg, "❌") {
errorMsg = fmt.Sprintf("❌ %s", errorMsg)
}
t.Fatalf("❌ Responses streaming tool calls validation failed after retries: %s", errorMsg)
}
t.Logf("✅ Responses API streaming with tools test completed successfully")
})
}
// validateStreamingToolCalls validates that all tool calls have ID, name, and arguments.
func validateStreamingToolCalls(toolCalls []ToolCallInfo, apiName string) error {
if len(toolCalls) == 0 {
return fmt.Errorf("%s: no tool calls found in streaming response", apiName)
}
for i, toolCall := range toolCalls {
if toolCall.ID == "" {
return fmt.Errorf("%s: tool call %d missing ID", apiName, i)
}
if toolCall.Name == "" {
return fmt.Errorf("%s: tool call %d missing name", apiName, i)
}
if toolCall.Arguments == "" {
return fmt.Errorf("%s: tool call %d missing arguments", apiName, i)
}
// Try to parse arguments as JSON to ensure they're valid
var args map[string]interface{}
if err := json.Unmarshal([]byte(toolCall.Arguments), &args); err != nil {
// Don't fail on invalid JSON - some providers might send partial JSON during streaming
// But we should at least have some content
if strings.TrimSpace(toolCall.Arguments) == "" {
return fmt.Errorf("%s: tool call %d has empty arguments", apiName, i)
}
}
}
return nil
}