Files
bifrost/core/providers/gemini/responses.go
Beyhan Oğur 880f412e2c first commit
2026-04-26 21:52:23 +03:00

3653 lines
122 KiB
Go

package gemini
import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"strings"
"sync"
"time"
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
"github.com/maximhq/bifrost/core/schemas"
)
func (request *GeminiGenerationRequest) ToBifrostResponsesRequest(ctx *schemas.BifrostContext) *schemas.BifrostResponsesRequest {
if request == nil {
return nil
}
provider, model := schemas.ParseModelString(request.Model, providerUtils.CheckAndSetDefaultProvider(ctx, schemas.Gemini))
// Create the BifrostResponsesRequest
bifrostReq := &schemas.BifrostResponsesRequest{
Provider: provider,
Model: model,
}
params := request.convertGenerationConfigToResponsesParameters()
// Convert SystemInstruction to system messages first
var inputMessages []schemas.ResponsesMessage
if request.SystemInstruction != nil && len(request.SystemInstruction.Parts) > 0 {
systemMsg := convertGeminiSystemInstructionToResponsesMessage(request.SystemInstruction)
if systemMsg != nil {
inputMessages = append(inputMessages, *systemMsg)
}
}
// Convert Contents to Input messages
if len(request.Contents) > 0 {
contentsMessages := convertGeminiContentsToResponsesMessages(request.Contents)
if len(contentsMessages) > 0 {
inputMessages = append(inputMessages, contentsMessages...)
}
}
if len(inputMessages) > 0 {
bifrostReq.Input = inputMessages
}
if len(request.Tools) > 0 {
params.Tools = convertGeminiToolsToResponsesTools(request.Tools)
}
if request.ToolConfig != nil && request.ToolConfig.FunctionCallingConfig != nil {
params.ToolChoice = convertGeminiToolConfigToToolChoice(request.ToolConfig)
}
if request.SafetySettings != nil {
params.ExtraParams["safety_settings"] = request.SafetySettings
}
if request.CachedContent != "" {
params.ExtraParams["cached_content"] = request.CachedContent
}
bifrostReq.Params = params
return bifrostReq
}
func ToGeminiResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) (*GeminiGenerationRequest, error) {
if bifrostReq == nil {
return nil, nil
}
// Create the base Gemini generation request
geminiReq := &GeminiGenerationRequest{
Model: bifrostReq.Model,
}
// Convert parameters to generation config
if bifrostReq.Params != nil {
var err error
geminiReq.GenerationConfig, err = geminiReq.convertParamsToGenerationConfigResponses(bifrostReq.Params)
if err != nil {
return nil, err
}
geminiReq.ExtraParams = bifrostReq.Params.ExtraParams
// Handle tool-related parameters
if len(bifrostReq.Params.Tools) > 0 {
geminiReq.Tools = convertResponsesToolsToGemini(bifrostReq.Params.Tools)
// Convert tool choice if present
if bifrostReq.Params.ToolChoice != nil {
geminiReq.ToolConfig = convertResponsesToolChoiceToGemini(bifrostReq.Params.ToolChoice)
}
}
}
// Convert ResponsesInput messages to Gemini contents
if bifrostReq.Input != nil {
contents, systemInstruction, err := convertResponsesMessagesToGeminiContents(bifrostReq.Input)
if err != nil {
return nil, err
}
geminiReq.Contents = contents
if systemInstruction != nil {
geminiReq.SystemInstruction = systemInstruction
}
}
if bifrostReq.Params != nil {
if bifrostReq.Params.Instructions != nil {
// check if system instruction is already set
if geminiReq.SystemInstruction == nil {
geminiReq.SystemInstruction = &Content{
Parts: []*Part{
{Text: *bifrostReq.Params.Instructions},
},
}
}
}
if bifrostReq.Params.ExtraParams != nil {
if safetySettings, ok := schemas.SafeExtractFromMap(bifrostReq.Params.ExtraParams, "safety_settings"); ok {
delete(geminiReq.ExtraParams, "safety_settings")
if settings, ok := SafeExtractSafetySettings(safetySettings); ok {
geminiReq.SafetySettings = settings
}
}
if cachedContent, ok := schemas.SafeExtractString(bifrostReq.Params.ExtraParams["cached_content"]); ok {
delete(geminiReq.ExtraParams, "cached_content")
geminiReq.CachedContent = cachedContent
}
}
}
return geminiReq, nil
}
// ToResponsesBifrostResponsesResponse converts a Gemini GenerateContentResponse to a BifrostResponsesResponse
func (response *GenerateContentResponse) ToResponsesBifrostResponsesResponse() *schemas.BifrostResponsesResponse {
if response == nil {
return nil
}
// Create the BifrostResponse with Responses structure
bifrostResp := &schemas.BifrostResponsesResponse{
ID: schemas.Ptr("resp_" + providerUtils.GetRandomString(50)),
CreatedAt: int(time.Now().Unix()),
Model: response.ModelVersion,
}
// Convert usage information
bifrostResp.Usage = ConvertGeminiUsageMetadataToResponsesUsage(response.UsageMetadata)
// Convert candidates to Responses output messages
if len(response.Candidates) > 0 {
candidate := response.Candidates[0]
// Persist finish reason as Bifrost canonical stop_reason
if candidate.FinishReason != "" && candidate.FinishReason != FinishReasonUnspecified {
stopReason := ConvertGeminiFinishReasonToBifrost(candidate.FinishReason)
bifrostResp.StopReason = &stopReason
if isErrorFinishReason(candidate.FinishReason) {
failedStatus := "failed"
bifrostResp.Status = &failedStatus
errMsg := candidate.FinishMessage
if errMsg == "" {
errMsg = string(candidate.FinishReason)
}
bifrostResp.Error = &schemas.ResponsesResponseError{
Code: stopReason,
Message: errMsg,
}
return bifrostResp
}
}
outputMessages := convertGeminiCandidatesToResponsesOutput(response.Candidates)
if len(outputMessages) > 0 {
bifrostResp.Output = outputMessages
}
}
return bifrostResp
}
func ToGeminiResponsesResponse(bifrostResp *schemas.BifrostResponsesResponse) *GenerateContentResponse {
if bifrostResp == nil {
return nil
}
geminiResp := &GenerateContentResponse{
ModelVersion: bifrostResp.Model,
}
// Set response ID if available
if bifrostResp.ID != nil {
geminiResp.ResponseID = *bifrostResp.ID
}
// Set creation time
if bifrostResp.CreatedAt > 0 {
geminiResp.CreateTime = time.Unix(int64(bifrostResp.CreatedAt), 0)
}
// Convert output messages to candidates
if len(bifrostResp.Output) > 0 {
candidates := []*Candidate{}
// Group messages by their role to create candidates
var currentParts []*Part
var currentRole string
// Track which message indices have been consumed as thought signatures
consumedIndices := make(map[int]bool)
// Find last web_search_call and collect annotations and rendered_content for grounding metadata
var lastWebSearchCall *schemas.ResponsesMessage
var webSearchAnnotations []schemas.ResponsesOutputMessageContentTextAnnotation
var lastRenderedContent *string
for i := range bifrostResp.Output {
msg := &bifrostResp.Output[i]
if msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeWebSearchCall {
lastWebSearchCall = msg
consumedIndices[i] = true
}
// Collect annotations (typically in message after web search)
if msg.Content != nil && msg.Content.ContentBlocks != nil {
for _, block := range msg.Content.ContentBlocks {
if block.ResponsesOutputMessageContentText != nil && len(block.ResponsesOutputMessageContentText.Annotations) > 0 {
webSearchAnnotations = append(webSearchAnnotations, block.ResponsesOutputMessageContentText.Annotations...)
}
// Collect rendered_content
if block.Type == schemas.ResponsesOutputMessageContentTypeRenderedContent &&
block.ResponsesOutputMessageContentRenderedContent != nil &&
block.ResponsesOutputMessageContentRenderedContent.RenderedContent != "" {
lastRenderedContent = &block.ResponsesOutputMessageContentRenderedContent.RenderedContent
consumedIndices[i] = true // Mark this message as consumed
}
}
}
}
for i, msg := range bifrostResp.Output {
// Skip web_search_call messages as they're converted to grounding metadata
if consumedIndices[i] {
continue
}
// Determine the role
role := "model" // default
if msg.Role != nil {
if *msg.Role == schemas.ResponsesInputMessageRoleUser {
role = "user"
}
}
// If we're starting a new candidate (role changed), save the previous one
if currentRole != "" && currentRole != role && len(currentParts) > 0 {
candidates = append(candidates, &Candidate{
Index: int32(len(candidates)),
Content: &Content{
Parts: currentParts,
Role: currentRole,
},
})
currentParts = []*Part{}
}
currentRole = role
// Convert message content to parts
if msg.Content != nil {
// Handle string content
if msg.Content.ContentStr != nil && *msg.Content.ContentStr != "" {
currentParts = append(currentParts, &Part{
Text: *msg.Content.ContentStr,
})
}
// Handle content blocks
if msg.Content.ContentBlocks != nil {
for _, block := range msg.Content.ContentBlocks {
part, err := convertContentBlockToGeminiPart(block)
if err == nil && part != nil {
currentParts = append(currentParts, part)
}
}
}
}
// Handle tool calls (function calls)
if msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeFunctionCall && msg.ResponsesToolMessage != nil {
argsRaw := json.RawMessage("{}")
if msg.ResponsesToolMessage.Arguments != nil {
rawArgs := strings.TrimSpace(*msg.ResponsesToolMessage.Arguments)
if rawArgs == "" {
rawArgs = "{}"
}
var buf bytes.Buffer
if err := json.Compact(&buf, []byte(rawArgs)); err == nil {
argsRaw = buf.Bytes()
}
}
functionCall := &FunctionCall{
Args: argsRaw,
}
if msg.ResponsesToolMessage.Name != nil {
functionCall.Name = *msg.ResponsesToolMessage.Name
}
// Extract thought signature from CallID if present
var thoughtSignature []byte
if msg.ResponsesToolMessage.CallID != nil {
callID := *msg.ResponsesToolMessage.CallID
// Check if the ID contains a thought signature (format: "ToolName_ts_base64signature")
if strings.Contains(callID, thoughtSignatureSeparator) {
parts := strings.SplitN(callID, thoughtSignatureSeparator, 2)
if len(parts) == 2 {
// Try to decode the signature part
if decodedSig, err := base64.RawURLEncoding.DecodeString(parts[1]); err == nil {
thoughtSignature = decodedSig
}
}
}
functionCall.ID = callID
}
part := &Part{
FunctionCall: functionCall,
}
// Use thought signature from CallID if we extracted one
if len(thoughtSignature) > 0 {
part.ThoughtSignature = thoughtSignature
} else {
// Otherwise, look ahead to see if the next message is a reasoning message with encrypted content
// (thought signature for this function call)
if i+1 < len(bifrostResp.Output) {
nextMsg := bifrostResp.Output[i+1]
if nextMsg.Type != nil && *nextMsg.Type == schemas.ResponsesMessageTypeReasoning &&
nextMsg.ResponsesReasoning != nil && nextMsg.ResponsesReasoning.EncryptedContent != nil {
decodedSig, err := base64.StdEncoding.DecodeString(*nextMsg.ResponsesReasoning.EncryptedContent)
if err == nil {
part.ThoughtSignature = decodedSig
// Mark this reasoning message as consumed
consumedIndices[i+1] = true
}
}
}
}
currentParts = append(currentParts, part)
}
// Handle function responses (function call outputs)
if msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeFunctionCallOutput && msg.ResponsesToolMessage != nil {
responseMap := make(map[string]any)
if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesToolCallOutputStr != nil {
output := *msg.ResponsesToolMessage.Output.ResponsesToolCallOutputStr
if json.Valid([]byte(output)) {
responseMap["output"] = json.RawMessage(output)
} else {
responseMap["output"] = output
}
}
funcName := ""
if msg.ResponsesToolMessage.Name != nil && strings.TrimSpace(*msg.ResponsesToolMessage.Name) != "" {
funcName = *msg.ResponsesToolMessage.Name
} else if msg.ResponsesToolMessage.CallID != nil {
funcName = *msg.ResponsesToolMessage.CallID
}
responseBytes, _ := providerUtils.MarshalSorted(responseMap)
functionResponse := &FunctionResponse{
Name: funcName,
Response: json.RawMessage(responseBytes),
}
if msg.ResponsesToolMessage.CallID != nil {
functionResponse.ID = *msg.ResponsesToolMessage.CallID
}
currentParts = append(currentParts, &Part{
FunctionResponse: functionResponse,
})
}
// Handle reasoning messages
if msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeReasoning && msg.ResponsesReasoning != nil {
// Skip this reasoning message if it was already consumed as a thought signature
if consumedIndices[i] {
continue
}
// Reasoning content is in the Summary array
if len(msg.ResponsesReasoning.Summary) > 0 {
for _, summaryBlock := range msg.ResponsesReasoning.Summary {
if summaryBlock.Text != "" {
currentParts = append(currentParts, &Part{
Text: summaryBlock.Text,
Thought: true,
})
}
}
}
if msg.ResponsesReasoning.EncryptedContent != nil {
decodedSig, err := base64.StdEncoding.DecodeString(*msg.ResponsesReasoning.EncryptedContent)
if err == nil {
currentParts = append(currentParts, &Part{
ThoughtSignature: decodedSig,
})
}
}
}
}
// Add the last candidate if we have parts
if len(currentParts) > 0 {
candidate := &Candidate{
Index: int32(len(candidates)),
Content: &Content{
Parts: currentParts,
Role: currentRole,
},
}
// Determine finish reason: prefer StopReason (Bifrost canonical), fall back to IncompleteDetails
if bifrostResp.StopReason != nil {
candidate.FinishReason = ConvertBifrostFinishReasonToGemini(*bifrostResp.StopReason)
} else if bifrostResp.IncompleteDetails != nil {
switch bifrostResp.IncompleteDetails.Reason {
case "max_tokens":
candidate.FinishReason = FinishReasonMaxTokens
case "content_filter":
candidate.FinishReason = FinishReasonSafety
default:
candidate.FinishReason = FinishReasonOther
}
} else {
candidate.FinishReason = FinishReasonStop
}
// Attach grounding metadata if web search was used
if lastWebSearchCall != nil {
candidate.GroundingMetadata = buildGroundingMetadataFromWebSearch(lastWebSearchCall, webSearchAnnotations, lastRenderedContent)
}
candidates = append(candidates, candidate)
}
geminiResp.Candidates = candidates
}
// Convert usage metadata
if bifrostResp.Usage != nil {
geminiResp.UsageMetadata = ConvertBifrostResponsesUsageToGeminiUsageMetadata(bifrostResp.Usage)
}
return geminiResp
}
// BifrostToGeminiStreamState tracks state when converting Bifrost streams to Gemini format
type BifrostToGeminiStreamState struct {
// Web search buffering
WebSearchCall *schemas.ResponsesMessage // Buffered web_search_call
Annotations []schemas.ResponsesOutputMessageContentTextAnnotation // Buffered annotations
RenderedContent *string // Buffered rendered content from search entry point
HasWebSearch bool // Whether we've seen web search
// Tool call tracking (for FunctionCallArgumentsDone events that don't include Item)
ToolCallNames map[int]string // Maps output_index to tool name
ToolCallIDs map[int]string // Maps output_index to tool call ID
}
// NewBifrostToGeminiStreamState creates a new state for Bifrost→Gemini streaming
func NewBifrostToGeminiStreamState() *BifrostToGeminiStreamState {
return &BifrostToGeminiStreamState{
Annotations: make([]schemas.ResponsesOutputMessageContentTextAnnotation, 0),
ToolCallNames: make(map[int]string),
ToolCallIDs: make(map[int]string),
}
}
func ToGeminiResponsesStreamResponse(bifrostResp *schemas.BifrostResponsesStreamResponse, state *BifrostToGeminiStreamState) *GenerateContentResponse {
if bifrostResp == nil {
return nil
}
// Initialize state if not provided (backward compatibility)
if state == nil {
state = NewBifrostToGeminiStreamState()
}
// Buffer web search call
if bifrostResp.Type == schemas.ResponsesStreamResponseTypeOutputItemDone &&
bifrostResp.Item != nil &&
bifrostResp.Item.Type != nil &&
*bifrostResp.Item.Type == schemas.ResponsesMessageTypeWebSearchCall {
state.WebSearchCall = bifrostResp.Item
state.HasWebSearch = true
return nil // Don't emit yet, wait for completion
}
// Buffer annotations
if bifrostResp.Type == schemas.ResponsesStreamResponseTypeOutputTextAnnotationAdded &&
bifrostResp.Annotation != nil {
state.Annotations = append(state.Annotations, *bifrostResp.Annotation)
return nil // Don't emit yet, wait for completion
}
// Buffer rendered_content messages
if bifrostResp.Type == schemas.ResponsesStreamResponseTypeOutputItemDone &&
bifrostResp.Item != nil &&
bifrostResp.Item.Content != nil &&
bifrostResp.Item.Content.ContentBlocks != nil {
for _, block := range bifrostResp.Item.Content.ContentBlocks {
if block.Type == schemas.ResponsesOutputMessageContentTypeRenderedContent &&
block.ResponsesOutputMessageContentRenderedContent != nil &&
block.ResponsesOutputMessageContentRenderedContent.RenderedContent != "" {
state.RenderedContent = &block.ResponsesOutputMessageContentRenderedContent.RenderedContent
return nil // Don't emit yet, wait for completion
}
}
}
// Skip lifecycle events that don't have corresponding Gemini equivalents
switch bifrostResp.Type {
case schemas.ResponsesStreamResponseTypePing,
schemas.ResponsesStreamResponseTypeCreated,
schemas.ResponsesStreamResponseTypeInProgress,
schemas.ResponsesStreamResponseTypeReasoningSummaryPartAdded,
schemas.ResponsesStreamResponseTypeQueued,
// Skip web search lifecycle events - buffered above
schemas.ResponsesStreamResponseTypeWebSearchCallInProgress,
schemas.ResponsesStreamResponseTypeWebSearchCallSearching,
schemas.ResponsesStreamResponseTypeWebSearchCallCompleted,
schemas.ResponsesStreamResponseTypeWebSearchCallResultsAdded,
schemas.ResponsesStreamResponseTypeWebSearchCallResultsCompleted:
// These are lifecycle events with no Gemini equivalent or are buffered
return nil
}
streamResp := &GenerateContentResponse{
Candidates: []*Candidate{
{
Content: &Content{
Parts: []*Part{},
Role: "model",
},
},
},
}
candidate := streamResp.Candidates[0]
switch bifrostResp.Type {
case schemas.ResponsesStreamResponseTypeOutputTextDelta:
if bifrostResp.Delta != nil && *bifrostResp.Delta != "" {
candidate.Content.Parts = append(candidate.Content.Parts, &Part{
Text: *bifrostResp.Delta,
})
}
case schemas.ResponsesStreamResponseTypeReasoningSummaryTextDelta:
if bifrostResp.Delta != nil && *bifrostResp.Delta != "" {
candidate.Content.Parts = append(candidate.Content.Parts, &Part{
Text: *bifrostResp.Delta,
Thought: true,
})
}
case schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDelta:
// For streaming, we'll accumulate these, but Gemini typically sends complete calls
// We'll return nil here and let the done event handle it
return nil
// Function call completed
case schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDone:
// Handle arguments from either Item.ResponsesToolMessage or directly from Arguments field
var argsStr *string
var name *string
var callID *string
if bifrostResp.Item != nil && bifrostResp.Item.ResponsesToolMessage != nil {
argsStr = bifrostResp.Item.ResponsesToolMessage.Arguments
name = bifrostResp.Item.ResponsesToolMessage.Name
callID = bifrostResp.Item.ResponsesToolMessage.CallID
}
if argsStr == nil && bifrostResp.Arguments != nil {
// Some providers (e.g., Anthropic) send Arguments directly on the response
argsStr = bifrostResp.Arguments
// Try to get name and callID from state if available
if state != nil {
outputIndex := 0
if bifrostResp.OutputIndex != nil {
outputIndex = *bifrostResp.OutputIndex
}
if name == nil {
if n, ok := state.ToolCallNames[outputIndex]; ok {
name = &n
}
}
if callID == nil {
if id, ok := state.ToolCallIDs[outputIndex]; ok {
callID = &id
}
}
}
}
if argsStr != nil {
rawArgs := strings.TrimSpace(*argsStr)
if rawArgs == "" {
rawArgs = "{}"
}
var argsRaw json.RawMessage
var buf bytes.Buffer
if err := json.Compact(&buf, []byte(rawArgs)); err == nil {
argsRaw = buf.Bytes()
} else {
argsRaw = json.RawMessage("{}")
}
functionCall := &FunctionCall{
Name: "",
Args: argsRaw,
}
if name != nil {
functionCall.Name = *name
}
var thoughtSig string
if callID != nil {
// Extract thought signature from CallID if present
if strings.Contains(*callID, thoughtSignatureSeparator) {
parts := strings.SplitN(*callID, thoughtSignatureSeparator, 2)
if len(parts) == 2 {
thoughtSig = parts[1]
}
}
functionCall.ID = *callID
}
functionCallPart := &Part{
FunctionCall: functionCall,
}
if thoughtSig != "" {
if decodedSig, err := base64.RawURLEncoding.DecodeString(thoughtSig); err == nil {
functionCallPart.ThoughtSignature = decodedSig
}
}
candidate.Content.Parts = append(candidate.Content.Parts, functionCallPart)
}
case schemas.ResponsesStreamResponseTypeOutputTextDone:
// Text was already streamed via OutputTextDelta chunks, skip to avoid duplication
return nil
case schemas.ResponsesStreamResponseTypeReasoningSummaryTextDone,
schemas.ResponsesStreamResponseTypeReasoningSummaryPartDone:
// Already handled via deltas, skip
return nil
case schemas.ResponsesStreamResponseTypeOutputItemAdded:
if bifrostResp.Item != nil && bifrostResp.Item.ResponsesReasoning != nil && bifrostResp.Item.EncryptedContent != nil {
candidate.Content.Parts = append(candidate.Content.Parts, &Part{
ThoughtSignature: []byte(*bifrostResp.Item.ResponsesReasoning.EncryptedContent),
})
}
// Track function call metadata for later use in FunctionCallArgumentsDone
if bifrostResp.Item != nil && bifrostResp.Item.Type != nil &&
*bifrostResp.Item.Type == schemas.ResponsesMessageTypeFunctionCall &&
bifrostResp.Item.ResponsesToolMessage != nil {
outputIndex := 0
if bifrostResp.OutputIndex != nil {
outputIndex = *bifrostResp.OutputIndex
}
if bifrostResp.Item.ResponsesToolMessage.Name != nil {
state.ToolCallNames[outputIndex] = *bifrostResp.Item.ResponsesToolMessage.Name
}
if bifrostResp.Item.ResponsesToolMessage.CallID != nil {
state.ToolCallIDs[outputIndex] = *bifrostResp.Item.ResponsesToolMessage.CallID
}
}
return nil
case schemas.ResponsesStreamResponseTypeOutputItemDone:
return nil
case schemas.ResponsesStreamResponseTypeContentPartAdded:
// Handle content parts that contain images, audio, or files
if bifrostResp.Part != nil {
part, err := convertContentBlockToGeminiPart(*bifrostResp.Part)
if err == nil && part != nil {
candidate.Content.Parts = append(candidate.Content.Parts, part)
}
}
case schemas.ResponsesStreamResponseTypeContentPartDone:
// Already handled via ContentPartAdded
return nil
case schemas.ResponsesStreamResponseTypeCompleted:
if bifrostResp.Response != nil {
// Set model version if available
if bifrostResp.Response.Model != "" {
streamResp.ModelVersion = bifrostResp.Response.Model
}
// Convert usage metadata if available
if bifrostResp.Response.Usage != nil {
streamResp.UsageMetadata = ConvertBifrostResponsesUsageToGeminiUsageMetadata(bifrostResp.Response.Usage)
}
// Derive finish reason from StopReason when present
if bifrostResp.Response.StopReason != nil {
candidate.FinishReason = ConvertBifrostFinishReasonToGemini(*bifrostResp.Response.StopReason)
} else {
candidate.FinishReason = FinishReasonStop
}
// Attach grounding metadata if we buffered web search data
if state.HasWebSearch && state.WebSearchCall != nil {
candidate.GroundingMetadata = buildGroundingMetadataFromWebSearch(state.WebSearchCall, state.Annotations, state.RenderedContent)
}
}
// Response failed
case schemas.ResponsesStreamResponseTypeFailed:
candidate.FinishReason = FinishReasonOther
if bifrostResp.Response != nil && bifrostResp.Response.Error != nil {
streamResp.PromptFeedback = &GenerateContentResponsePromptFeedback{
BlockReason: "ERROR",
BlockReasonMessage: bifrostResp.Response.Error.Message,
}
}
// Refusal
case schemas.ResponsesStreamResponseTypeRefusalDelta:
if bifrostResp.Delta != nil && *bifrostResp.Delta != "" {
candidate.Content.Parts = append(candidate.Content.Parts, &Part{
Text: *bifrostResp.Delta,
})
}
case schemas.ResponsesStreamResponseTypeRefusalDone:
if bifrostResp.Refusal != nil && *bifrostResp.Refusal != "" {
candidate.FinishReason = FinishReasonSafety
}
default:
// For any other event types we don't explicitly handle, return nil
return nil
}
// If we didn't add any parts and there's no metadata, return nil
if len(candidate.Content.Parts) == 0 && streamResp.UsageMetadata == nil &&
streamResp.PromptFeedback == nil && candidate.FinishReason == "" {
return nil
}
return streamResp
}
// GeminiResponsesStreamState tracks state during streaming conversion for responses API
type GeminiResponsesStreamState struct {
// Lifecycle flags
HasEmittedCreated bool // Whether response.created has been sent
HasEmittedInProgress bool // Whether response.in_progress has been sent
HasEmittedCompleted bool // Whether response.completed has been sent
// Item tracking
CurrentOutputIndex int // Current output index counter
TextOutputIndex int // Output index of the current text item (cached for reuse)
ItemIDs map[int]string // Maps output_index to item ID
TextItemClosed bool // Whether text item has been closed
// Tool call tracking
ToolCallIDs map[int]string // Maps output_index to tool call ID
ToolCallNames map[int]string // Maps output_index to tool name
ToolArgumentBuffers map[int]string // Accumulates tool arguments as JSON
// Response metadata
MessageID *string // Generated message ID
Model *string // Model version
CreatedAt int // Timestamp for consistency
ResponseID *string // Gemini's responseId
// Content tracking
HasStartedText bool // Whether we've started text content
HasStartedToolCall bool // Whether we've started a tool call
TextBuffer strings.Builder // Accumulates text deltas for output_text.done
// Web search tracking
HasEmittedWebSearch bool // Whether web_search_call events have been emitted
}
// geminiResponsesStreamStatePool provides a pool for Gemini responses stream state objects.
var geminiResponsesStreamStatePool = sync.Pool{
New: func() interface{} {
return &GeminiResponsesStreamState{
ItemIDs: make(map[int]string),
ToolCallIDs: make(map[int]string),
ToolCallNames: make(map[int]string),
ToolArgumentBuffers: make(map[int]string),
CurrentOutputIndex: 0,
TextOutputIndex: -1,
CreatedAt: int(time.Now().Unix()),
HasEmittedCreated: false,
HasEmittedInProgress: false,
HasEmittedCompleted: false,
TextItemClosed: false,
HasStartedText: false,
HasStartedToolCall: false,
HasEmittedWebSearch: false,
}
},
}
// acquireGeminiResponsesStreamState gets a Gemini responses stream state from the pool.
func acquireGeminiResponsesStreamState() *GeminiResponsesStreamState {
state := geminiResponsesStreamStatePool.Get().(*GeminiResponsesStreamState)
state.flush()
return state
}
// releaseGeminiResponsesStreamState returns a Gemini responses stream state to the pool.
func releaseGeminiResponsesStreamState(state *GeminiResponsesStreamState) {
if state != nil {
state.flush()
geminiResponsesStreamStatePool.Put(state)
}
}
func (state *GeminiResponsesStreamState) flush() {
// Clear maps
if state.ItemIDs == nil {
state.ItemIDs = make(map[int]string)
} else {
clear(state.ItemIDs)
}
if state.ToolCallIDs == nil {
state.ToolCallIDs = make(map[int]string)
} else {
clear(state.ToolCallIDs)
}
if state.ToolCallNames == nil {
state.ToolCallNames = make(map[int]string)
} else {
clear(state.ToolCallNames)
}
if state.ToolArgumentBuffers == nil {
state.ToolArgumentBuffers = make(map[int]string)
} else {
clear(state.ToolArgumentBuffers)
}
state.CurrentOutputIndex = 0
state.TextOutputIndex = -1
state.MessageID = nil
state.Model = nil
state.ResponseID = nil
state.CreatedAt = int(time.Now().Unix())
state.HasEmittedCreated = false
state.HasEmittedCompleted = false
state.HasEmittedInProgress = false
state.TextItemClosed = false
state.HasStartedText = false
state.HasStartedToolCall = false
state.TextBuffer.Reset()
}
// closeTextItemIfOpen closes the text item if it's open and returns the responses.
// Returns nil if no text item was open.
func (state *GeminiResponsesStreamState) closeTextItemIfOpen(sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
if state.HasStartedText && !state.TextItemClosed {
return closeGeminiTextItem(state, sequenceNumber)
}
return nil
}
// nextOutputIndex returns the current output index and increments it for the next use.
func (state *GeminiResponsesStreamState) nextOutputIndex() int {
index := state.CurrentOutputIndex
state.CurrentOutputIndex++
return index
}
// generateItemID creates a unique item ID with the given suffix.
// Falls back to index-based ID if MessageID is nil.
func (state *GeminiResponsesStreamState) generateItemID(suffix string, outputIndex int) string {
if state.MessageID != nil {
return fmt.Sprintf("msg_%s_%s_%d", *state.MessageID, suffix, outputIndex)
}
return fmt.Sprintf("%s_%d", suffix, outputIndex)
}
// ToBifrostResponsesStream converts a Gemini stream event to Bifrost Responses Stream responses
func (response *GenerateContentResponse) ToBifrostResponsesStream(sequenceNumber int, state *GeminiResponsesStreamState) ([]*schemas.BifrostResponsesStreamResponse, *schemas.BifrostError) {
var responses []*schemas.BifrostResponsesStreamResponse
// First event: Emit response.created and response.in_progress
if !state.HasEmittedCreated {
// Generate message ID
if state.MessageID == nil {
messageID := fmt.Sprintf("msg_%d", state.CreatedAt)
state.MessageID = &messageID
}
// Set model and response ID from Gemini
if response.ModelVersion != "" && state.Model == nil {
state.Model = &response.ModelVersion
}
if response.ResponseID != "" && state.ResponseID == nil {
state.ResponseID = &response.ResponseID
}
// Emit response.created
createdResp := &schemas.BifrostResponsesResponse{
ID: state.MessageID,
CreatedAt: state.CreatedAt,
}
if state.Model != nil {
createdResp.Model = *state.Model
}
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeCreated,
SequenceNumber: sequenceNumber + len(responses),
Response: createdResp,
})
state.HasEmittedCreated = true
// Emit response.in_progress
inProgressResp := &schemas.BifrostResponsesResponse{
ID: state.MessageID,
CreatedAt: state.CreatedAt,
}
if state.Model != nil {
inProgressResp.Model = *state.Model
}
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeInProgress,
SequenceNumber: sequenceNumber + len(responses),
Response: inProgressResp,
})
state.HasEmittedInProgress = true
}
// Process candidates
if len(response.Candidates) > 0 {
candidate := response.Candidates[0]
if candidate.Content != nil && len(candidate.Content.Parts) > 0 {
for _, part := range candidate.Content.Parts {
partResponses := processGeminiPart(part, state, sequenceNumber+len(responses))
responses = append(responses, partResponses...)
}
}
// Check for finish reason (indicates end of generation)
// Only close if we've actually started emitting content (text, tool calls, etc.)
// This prevents emitting response.completed for empty chunks with just finishReason
if candidate.FinishReason != "" && len(state.ItemIDs) > 0 {
// Check for grounding metadata (web search results)
if candidate.GroundingMetadata != nil && !state.HasEmittedWebSearch {
// Emit web search events before closing
webSearchResponses := emitWebSearchFromGroundingMetadata(
candidate.GroundingMetadata,
state,
sequenceNumber+len(responses),
)
responses = append(responses, webSearchResponses...)
}
// Close any open items
closeResponses := closeGeminiOpenItems(state, candidate.GroundingMetadata, response.UsageMetadata, sequenceNumber+len(responses), candidate.FinishReason, candidate.FinishMessage)
responses = append(responses, closeResponses...)
}
}
return responses, nil
}
// processGeminiPart processes a single Gemini part and returns appropriate lifecycle events
func processGeminiPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
switch {
case part.Thought && part.Text != "":
// Reasoning/thinking content
responses = append(responses, processGeminiThoughtPart(part, state, sequenceNumber)...)
case part.Text != "" && !part.Thought:
// Regular text content
responses = append(responses, processGeminiTextPart(part, state, sequenceNumber)...)
case part.FunctionCall != nil:
// Function call
responses = append(responses, processGeminiFunctionCallPart(part, state, sequenceNumber)...)
case part.ThoughtSignature != nil:
// Encrypted reasoning content (thoughtSignature)
responses = append(responses, processGeminiThoughtSignaturePart(part, state, sequenceNumber)...)
case part.FunctionResponse != nil:
// Function response (tool result)
responses = append(responses, processGeminiFunctionResponsePart(part, state, sequenceNumber)...)
case part.InlineData != nil:
// Inline data
responses = append(responses, processGeminiInlineDataPart(part, state, sequenceNumber)...)
case part.FileData != nil:
// File data
responses = append(responses, processGeminiFileDataPart(part, state, sequenceNumber)...)
}
return responses
}
// processGeminiTextPart handles regular text parts
func processGeminiTextPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
var outputIndex int
// If this is the first text, emit output_item.added and content_part.added
if !state.HasStartedText {
outputIndex = state.nextOutputIndex()
state.TextOutputIndex = outputIndex // Cache the text item's output index
itemID := state.generateItemID("item", outputIndex)
state.ItemIDs[outputIndex] = itemID
// Emit output_item.added
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Status: schemas.Ptr("in_progress"),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{},
},
},
})
// Emit content_part.added
contentIndex := 0
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeContentPartAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Part: &schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesOutputMessageContentTypeText,
Text: schemas.Ptr(""),
ResponsesOutputMessageContentText: &schemas.ResponsesOutputMessageContentText{
LogProbs: []schemas.ResponsesOutputMessageContentTextLogProb{},
Annotations: []schemas.ResponsesOutputMessageContentTextAnnotation{},
},
},
})
state.HasStartedText = true
} else {
// Text already started, reuse the cached text item's output index
outputIndex = state.TextOutputIndex
}
// Emit output_text.delta for the text content
if part.Text != "" {
itemID := state.ItemIDs[outputIndex]
contentIndex := 0
text := part.Text
// Accumulate text for output_text.done
state.TextBuffer.WriteString(text)
streamResponse := &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputTextDelta,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Delta: &text,
LogProbs: []schemas.ResponsesOutputMessageContentTextLogProb{},
}
if len(part.ThoughtSignature) > 0 {
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
streamResponse.Signature = &thoughtSig
}
responses = append(responses, streamResponse)
}
return responses
}
// processGeminiThoughtPart handles reasoning/thought parts
func processGeminiThoughtPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
// Close text item if open
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
responses = append(responses, closeResponses...)
}
// For Gemini thoughts/reasoning, we emit them as reasoning summary text deltas
outputIndex := state.nextOutputIndex()
itemID := state.generateItemID("reasoning", outputIndex)
state.ItemIDs[outputIndex] = itemID
// Emit output_item.added for reasoning
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
},
})
// Emit reasoning summary part added
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeReasoningSummaryPartAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
})
// Emit reasoning summary text delta with the thought content
if part.Text != "" {
text := part.Text
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeReasoningSummaryTextDelta,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Delta: &text,
})
}
// Emit reasoning summary text done
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeReasoningSummaryTextDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
})
// Emit reasoning summary part done
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeReasoningSummaryPartDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
})
// Emit output_item.done for reasoning
statusCompleted := "completed"
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Status: &statusCompleted,
ResponsesReasoning: &schemas.ResponsesReasoning{
Summary: []schemas.ResponsesReasoningSummary{},
},
},
})
return responses
}
// processGeminiThoughtSignaturePart handles encrypted reasoning content (thoughtSignature)
func processGeminiThoughtSignaturePart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
// Close text item if open
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
responses = append(responses, closeResponses...)
}
// Create a new reasoning item for the thought signature
outputIndex := state.nextOutputIndex()
itemID := state.generateItemID("reasoning", outputIndex)
state.ItemIDs[outputIndex] = itemID
// Convert thoughtSignature to string
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
// Emit output_item.added for reasoning with encrypted content
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
ResponsesReasoning: &schemas.ResponsesReasoning{
Summary: []schemas.ResponsesReasoningSummary{},
EncryptedContent: &thoughtSig,
},
},
})
// Emit output_item.done for reasoning (thought signature is complete)
statusCompleted := "completed"
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Status: &statusCompleted,
ResponsesReasoning: &schemas.ResponsesReasoning{
Summary: []schemas.ResponsesReasoningSummary{},
EncryptedContent: &thoughtSig,
},
},
})
return responses
}
// processGeminiFunctionCallPart handles function call parts
func processGeminiFunctionCallPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
// Close text item if open
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
responses = append(responses, closeResponses...)
}
// Start new function call item
outputIndex := state.nextOutputIndex()
toolUseID := part.FunctionCall.ID
if toolUseID == "" {
toolUseID = part.FunctionCall.Name // Fallback to name as ID
}
state.ItemIDs[outputIndex] = toolUseID
state.ToolCallIDs[outputIndex] = toolUseID
state.ToolCallNames[outputIndex] = part.FunctionCall.Name
// Convert args to JSON string
argsJSON := ""
if len(part.FunctionCall.Args) > 0 {
argsJSON = string(part.FunctionCall.Args)
}
state.ToolArgumentBuffers[outputIndex] = argsJSON
// Attach thought signature to ID if present
if len(part.ThoughtSignature) > 0 && !strings.Contains(toolUseID, thoughtSignatureSeparator) {
encoded := base64.RawURLEncoding.EncodeToString(part.ThoughtSignature)
toolUseID = fmt.Sprintf("%s%s%s", toolUseID, thoughtSignatureSeparator, encoded)
}
// Emit output_item.added for function call
status := "in_progress"
addedEvent := &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &toolUseID,
Item: &schemas.ResponsesMessage{
ID: &toolUseID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
Status: &status,
ResponsesToolMessage: &schemas.ResponsesToolMessage{
CallID: &toolUseID,
Name: &part.FunctionCall.Name,
Arguments: schemas.Ptr(""),
},
},
}
responses = append(responses, addedEvent)
// Generate synthetic argument deltas to simulate streaming behavior
if argsJSON != "" {
deltaEvents := generateSyntheticFunctionCallArgumentDeltas(
argsJSON,
&outputIndex,
&toolUseID,
sequenceNumber+len(responses),
)
responses = append(responses, deltaEvents...)
}
// Gemini sends complete function calls, so emit done event after synthetic deltas
doneEvent := &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &toolUseID,
Arguments: &argsJSON,
Item: &schemas.ResponsesMessage{
ResponsesToolMessage: &schemas.ResponsesToolMessage{
CallID: &toolUseID,
Name: &part.FunctionCall.Name,
},
},
}
responses = append(responses, doneEvent)
outputItemDone := &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &toolUseID,
Item: &schemas.ResponsesMessage{
ID: &toolUseID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
Status: schemas.Ptr("completed"),
ResponsesToolMessage: &schemas.ResponsesToolMessage{
CallID: &toolUseID,
Name: &part.FunctionCall.Name,
Arguments: &argsJSON,
},
},
}
responses = append(responses, outputItemDone)
delete(state.ToolArgumentBuffers, outputIndex)
state.HasStartedToolCall = true
return responses
}
// processGeminiFunctionResponsePart handles function response (tool result) parts
func processGeminiFunctionResponsePart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
// Close text item if open
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
responses = append(responses, closeResponses...)
}
// Extract output from function response
output := extractFunctionResponseOutput(part.FunctionResponse)
// Create new output item for the function response
outputIndex := state.nextOutputIndex()
responseID := part.FunctionResponse.ID
if responseID == "" {
responseID = part.FunctionResponse.Name // Fallback to name
}
itemID := fmt.Sprintf("func_resp_%s", responseID)
state.ItemIDs[outputIndex] = itemID
// Emit output_item.added for function call output
status := "completed"
item := &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Status: &status,
ResponsesToolMessage: &schemas.ResponsesToolMessage{
CallID: &responseID,
Output: &schemas.ResponsesToolMessageOutputStruct{
ResponsesToolCallOutputStr: &output,
},
},
}
// Set tool name if present
if name := strings.TrimSpace(part.FunctionResponse.Name); name != "" {
item.ResponsesToolMessage.Name = schemas.Ptr(name)
}
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: item,
})
// Immediately emit output_item.done since function responses are complete
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Status: &status,
ResponsesToolMessage: &schemas.ResponsesToolMessage{
CallID: &responseID,
Output: &schemas.ResponsesToolMessageOutputStruct{
ResponsesToolCallOutputStr: &output,
},
},
},
})
// Add tool name if present
if name := strings.TrimSpace(part.FunctionResponse.Name); name != "" {
last := responses[len(responses)-1]
if last.Item != nil && last.Item.ResponsesToolMessage != nil {
last.Item.ResponsesToolMessage.Name = schemas.Ptr(name)
}
}
return responses
}
// processGeminiInlineDataPart handles inline data parts
func processGeminiInlineDataPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
// Close text item if open
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
responses = append(responses, closeResponses...)
}
// Convert inline data to content block
block := convertGeminiInlineDataToContentBlock(part.InlineData)
if block == nil {
return responses
}
// Create new output item for the inline data
outputIndex := state.nextOutputIndex()
itemID := state.generateItemID("item", outputIndex)
state.ItemIDs[outputIndex] = itemID
// Emit output_item.added with the inline data content block
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{*block},
},
},
})
// Emit content_part.added
contentIndex := 0
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeContentPartAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Part: block,
})
// Emit content_part.done
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeContentPartDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Part: block,
})
// Emit output_item.done
statusCompleted := "completed"
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Status: &statusCompleted,
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{},
},
},
})
return responses
}
// processGeminiFileDataPart handles file data parts
func processGeminiFileDataPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
// Close text item if open
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
responses = append(responses, closeResponses...)
}
// Convert file data to content block
block := convertGeminiFileDataToContentBlock(part.FileData)
if block == nil {
return responses
}
// Create new output item for the file data
outputIndex := state.nextOutputIndex()
itemID := state.generateItemID("item", outputIndex)
state.ItemIDs[outputIndex] = itemID
// Emit output_item.added with the file data content block
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{*block},
},
},
})
// Emit content_part.added
contentIndex := 0
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeContentPartAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Part: block,
})
// Emit content_part.done
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeContentPartDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Part: block,
})
// Emit output_item.done
statusCompleted := "completed"
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Status: &statusCompleted,
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{},
},
},
})
return responses
}
// closeGeminiTextItem closes the text item and emits appropriate done events
func closeGeminiTextItem(state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
outputIndex := state.TextOutputIndex
itemID := state.ItemIDs[outputIndex]
contentIndex := 0
// Emit output_text.done
fullText := state.TextBuffer.String()
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputTextDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Text: &fullText,
LogProbs: []schemas.ResponsesOutputMessageContentTextLogProb{},
})
// Emit content_part.done
part := &schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesOutputMessageContentTypeText,
Text: schemas.Ptr(""),
ResponsesOutputMessageContentText: &schemas.ResponsesOutputMessageContentText{
LogProbs: []schemas.ResponsesOutputMessageContentTextLogProb{},
Annotations: []schemas.ResponsesOutputMessageContentTextAnnotation{},
},
}
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeContentPartDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Part: part,
})
// Emit output_item.done
doneItem := &schemas.ResponsesMessage{
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Status: schemas.Ptr("completed"),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{},
},
}
if itemID != "" {
doneItem.ID = &itemID
}
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: doneItem,
})
state.TextItemClosed = true
return responses
}
// closeGeminiOpenItems closes any open items and emits the final completed event
func closeGeminiOpenItems(state *GeminiResponsesStreamState, groundingMetadata *GroundingMetadata, usage *GenerateContentResponseUsageMetadata, sequenceNumber int, finishReason FinishReason, finishMessage string) []*schemas.BifrostResponsesStreamResponse {
if state.HasEmittedCompleted {
return nil
}
var responses []*schemas.BifrostResponsesStreamResponse
// Close text item if still open
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
responses = append(responses, closeResponses...)
}
// Emit annotations from grounding supports if present
if groundingMetadata != nil && len(groundingMetadata.GroundingSupports) > 0 && state.TextOutputIndex >= 0 {
annotationResponses := emitAnnotationsFromGroundingSupports(
groundingMetadata,
state,
sequenceNumber+len(responses),
)
responses = append(responses, annotationResponses...)
}
// Close any open tool calls
for outputIndex := range state.ToolArgumentBuffers {
itemID := state.ItemIDs[outputIndex]
toolCallID := state.ToolCallIDs[outputIndex]
toolName := state.ToolCallNames[outputIndex]
toolArgs := state.ToolArgumentBuffers[outputIndex]
if strings.TrimSpace(toolName) == "" {
toolName = toolCallID
}
// Emit output_item.done for tool call
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
Status: schemas.Ptr("completed"),
ResponsesToolMessage: &schemas.ResponsesToolMessage{
CallID: &toolCallID,
Name: &toolName,
Arguments: &toolArgs,
},
},
})
}
// For error finish reasons with a finish message, emit the error as text content BEFORE completed event
// This ensures the error message is visible to the client
if isErrorFinishReason(finishReason) && finishMessage != "" {
errorText := fmt.Sprintf("Error: %s - %s", finishReason, finishMessage)
outputIndex := state.nextOutputIndex()
itemID := state.generateItemID("error", outputIndex)
state.ItemIDs[outputIndex] = itemID
contentIndex := 0
// Emit output_item.added for error message
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Status: schemas.Ptr("in_progress"),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{},
},
},
})
// Emit content_part.added
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeContentPartAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Part: &schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesOutputMessageContentTypeText,
Text: schemas.Ptr(""),
},
})
// Emit output_text.delta with the error message
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputTextDelta,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Delta: &errorText,
})
// Emit output_text.done
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputTextDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Text: &errorText,
})
// Emit content_part.done
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeContentPartDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ContentIndex: &contentIndex,
ItemID: &itemID,
Part: &schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesOutputMessageContentTypeText,
Text: &errorText,
},
})
// Emit output_item.done for error message
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Status: schemas.Ptr("completed"),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{
{
Type: schemas.ResponsesOutputMessageContentTypeText,
Text: &errorText,
},
},
},
},
})
}
// Emit response.completed with usage
bifrostUsage := ConvertGeminiUsageMetadataToResponsesUsage(usage)
completedResp := &schemas.BifrostResponsesResponse{
ID: state.MessageID,
CreatedAt: state.CreatedAt,
Usage: bifrostUsage,
}
if state.Model != nil {
completedResp.Model = *state.Model
}
// Set stop reason from finish reason
if finishReason != "" {
stopReason := ConvertGeminiFinishReasonToBifrost(finishReason)
completedResp.StopReason = &stopReason
// For error finish reasons, set status to failed
if isErrorFinishReason(finishReason) {
failedStatus := "failed"
completedResp.Status = &failedStatus
}
}
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeCompleted,
SequenceNumber: sequenceNumber + len(responses),
Response: completedResp,
})
state.HasEmittedCompleted = true
return responses
}
// FinalizeGeminiResponsesStream finalizes the stream by closing any open items and emitting completed event
func FinalizeGeminiResponsesStream(state *GeminiResponsesStreamState, usage *GenerateContentResponseUsageMetadata, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
return closeGeminiOpenItems(state, nil, usage, sequenceNumber, "", "")
}
// convertGeminiSystemInstructionToResponsesMessage converts Gemini SystemInstruction to a system role message
func convertGeminiSystemInstructionToResponsesMessage(systemInstruction *Content) *schemas.ResponsesMessage {
if systemInstruction == nil || len(systemInstruction.Parts) == 0 {
return nil
}
var contentBlocks []schemas.ResponsesMessageContentBlock
var hasTextContent bool
for _, part := range systemInstruction.Parts {
if part.Text != "" {
contentBlocks = append(contentBlocks, schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesInputMessageContentBlockTypeText,
Text: &part.Text,
})
hasTextContent = true
}
}
if !hasTextContent {
return nil
}
// If single text block, use ContentStr
if len(contentBlocks) == 1 {
return &schemas.ResponsesMessage{
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleSystem),
Content: &schemas.ResponsesMessageContent{
ContentStr: contentBlocks[0].Text,
},
}
}
// Multiple blocks, use ContentBlocks
return &schemas.ResponsesMessage{
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleSystem),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: contentBlocks,
},
}
}
func convertGeminiContentsToResponsesMessages(contents []Content) []schemas.ResponsesMessage {
var messages []schemas.ResponsesMessage
// Track function call IDs by name to match with responses
functionCallIDs := make(map[string]string)
for _, content := range contents {
// Determine the role for all messages from this Content
var role *schemas.ResponsesMessageRoleType
switch content.Role {
case "model":
role = schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant)
case "user":
role = schemas.Ptr(schemas.ResponsesInputMessageRoleUser)
default:
// Default to user for unknown roles
role = schemas.Ptr(schemas.ResponsesInputMessageRoleUser)
}
// Process each part - each part can become a separate message
for _, part := range content.Parts {
switch {
case part.FunctionCall != nil:
// Function call message
argsJSON := "{}"
if len(part.FunctionCall.Args) > 0 {
argsJSON = string(part.FunctionCall.Args)
}
callID := part.FunctionCall.ID
if callID == "" {
callID = part.FunctionCall.Name
}
// Track this function call ID by name for later matching with responses
functionCallIDs[part.FunctionCall.Name] = callID
msg := schemas.ResponsesMessage{
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
ResponsesToolMessage: &schemas.ResponsesToolMessage{
CallID: &callID,
Name: &part.FunctionCall.Name,
Arguments: &argsJSON,
},
}
messages = append(messages, msg)
// If this part also has a thought signature, create a separate reasoning message
if len(part.ThoughtSignature) > 0 {
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
reasoningMsg := schemas.ResponsesMessage{
Role: role,
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
ResponsesReasoning: &schemas.ResponsesReasoning{
Summary: []schemas.ResponsesReasoningSummary{},
EncryptedContent: &thoughtSig,
},
}
messages = append(messages, reasoningMsg)
}
case part.FunctionResponse != nil:
// Function response message
responseID := part.FunctionResponse.ID
if responseID == "" {
// Try to find the matching function call ID by name
if callID, ok := functionCallIDs[part.FunctionResponse.Name]; ok {
responseID = callID
} else {
// Fallback to function name if no matching call found
responseID = part.FunctionResponse.Name
}
}
// Convert response to string — extract output field if present
responseStr := ""
if part.FunctionResponse.Response != nil {
if r := providerUtils.GetJSONField(part.FunctionResponse.Response, "output"); r.Exists() {
responseStr = r.String()
} else {
responseStr = string(part.FunctionResponse.Response)
}
}
msg := schemas.ResponsesMessage{
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
ResponsesToolMessage: &schemas.ResponsesToolMessage{
CallID: &responseID,
Output: &schemas.ResponsesToolMessageOutputStruct{
ResponsesToolCallOutputStr: &responseStr,
},
},
}
messages = append(messages, msg)
case part.Thought && part.Text != "":
// Thought/reasoning text content
msg := schemas.ResponsesMessage{
Role: role,
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{
{
Type: schemas.ResponsesOutputMessageContentTypeReasoning,
Text: &part.Text,
},
},
},
}
messages = append(messages, msg)
case part.Text != "":
// Regular text message
msg := schemas.ResponsesMessage{
Role: role,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{
{
Type: func() schemas.ResponsesMessageContentBlockType {
if content.Role == "model" {
return schemas.ResponsesOutputMessageContentTypeText
}
return schemas.ResponsesInputMessageContentBlockTypeText
}(),
Text: &part.Text,
},
},
},
}
// add signature to above text content block if present
if len(part.ThoughtSignature) > 0 {
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
msg.Content.ContentBlocks[len(msg.Content.ContentBlocks)-1].Signature = &thoughtSig
}
messages = append(messages, msg)
case part.InlineData != nil:
// Handle inline data (images, audio, files)
block := convertGeminiInlineDataToContentBlock(part.InlineData)
if block != nil {
msg := schemas.ResponsesMessage{
Role: role,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{*block},
},
}
messages = append(messages, msg)
}
case part.FileData != nil:
// Handle file data (URI-based)
block := convertGeminiFileDataToContentBlock(part.FileData)
if block != nil {
msg := schemas.ResponsesMessage{
Role: role,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{*block},
},
}
messages = append(messages, msg)
}
}
}
}
return messages
}
// convertGeminiInlineDataToContentBlock converts Gemini inline data (blob) to content block
func convertGeminiInlineDataToContentBlock(blob *Blob) *schemas.ResponsesMessageContentBlock {
if blob == nil {
return nil
}
// Determine content type based on MIME type
mimeType := blob.MIMEType
if mimeType == "" {
return nil
}
// Handle images
if isImageMimeType(mimeType) {
// Convert to base64 data URL
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, blob.Data)
return &schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesInputMessageContentBlockTypeImage,
ResponsesInputMessageContentBlockImage: &schemas.ResponsesInputMessageContentBlockImage{
ImageURL: &imageURL,
},
}
}
// Handle audio
if strings.HasPrefix(mimeType, "audio/") {
encodedData := blob.Data
format := mimeType
if strings.HasPrefix(mimeType, "audio/") {
format = mimeType[6:] // Remove "audio/" prefix
}
return &schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesInputMessageContentBlockTypeAudio,
Audio: &schemas.ResponsesInputMessageContentBlockAudio{
Format: format,
Data: encodedData,
},
}
}
// Handle other files - format as data URL
mimeTypeForFile := mimeType
if mimeTypeForFile == "" {
mimeTypeForFile = "application/pdf"
}
filename := blob.DisplayName
if filename == "" {
filename = "unnamed_file"
}
fileDataURL := blob.Data
if !strings.HasPrefix(fileDataURL, "data:") {
fileDataURL = fmt.Sprintf("data:%s;base64,%s", mimeTypeForFile, fileDataURL)
}
return &schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesInputMessageContentBlockTypeFile,
ResponsesInputMessageContentBlockFile: &schemas.ResponsesInputMessageContentBlockFile{
FileData: &fileDataURL,
FileType: &mimeTypeForFile,
Filename: &filename,
},
}
}
// convertGeminiFileDataToContentBlock converts Gemini file data (URI) to content block
func convertGeminiFileDataToContentBlock(fileData *FileData) *schemas.ResponsesMessageContentBlock {
if fileData == nil || fileData.FileURI == "" {
return nil
}
mimeType := fileData.MIMEType
if mimeType == "" {
mimeType = "application/pdf"
}
// Handle images
if isImageMimeType(mimeType) {
return &schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesInputMessageContentBlockTypeImage,
ResponsesInputMessageContentBlockImage: &schemas.ResponsesInputMessageContentBlockImage{
ImageURL: &fileData.FileURI,
},
}
}
// Handle other files
block := &schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesInputMessageContentBlockTypeFile,
ResponsesInputMessageContentBlockFile: &schemas.ResponsesInputMessageContentBlockFile{
FileURL: &fileData.FileURI,
},
}
// Set FileType if available
block.ResponsesInputMessageContentBlockFile.FileType = &mimeType
return block
}
func convertGeminiToolsToResponsesTools(tools []Tool) []schemas.ResponsesTool {
var responsesTools []schemas.ResponsesTool
for _, tool := range tools {
// you cant use function declarations and google search together
if tool.GoogleSearch != nil {
responsesTool := schemas.ResponsesTool{
Type: schemas.ResponsesToolTypeWebSearch,
}
responsesTool.ResponsesToolWebSearch = &schemas.ResponsesToolWebSearch{}
if tool.GoogleSearch.TimeRangeFilter != nil || len(tool.GoogleSearch.ExcludeDomains) > 0 {
filters := &schemas.ResponsesToolWebSearchFilters{
BlockedDomains: tool.GoogleSearch.ExcludeDomains,
}
if tool.GoogleSearch.TimeRangeFilter != nil {
filters.TimeRangeFilter = &schemas.Interval{
StartTime: tool.GoogleSearch.TimeRangeFilter.StartTime,
EndTime: tool.GoogleSearch.TimeRangeFilter.EndTime,
}
}
responsesTool.ResponsesToolWebSearch.Filters = filters
}
responsesTools = append(responsesTools, responsesTool)
} else if len(tool.FunctionDeclarations) > 0 {
for _, fn := range tool.FunctionDeclarations {
responsesTool := schemas.ResponsesTool{
Type: schemas.ResponsesToolTypeFunction,
Name: schemas.Ptr(fn.Name),
Description: schemas.Ptr(fn.Description),
ResponsesToolFunction: &schemas.ResponsesToolFunction{},
}
// Convert parameters schema if present
if fn.Parameters != nil {
params := convertSchemaToFunctionParameters(fn.Parameters)
responsesTool.ResponsesToolFunction.Parameters = &params
}
responsesTools = append(responsesTools, responsesTool)
}
}
}
return responsesTools
}
func convertGeminiToolConfigToToolChoice(toolConfig *ToolConfig) *schemas.ResponsesToolChoice {
if toolConfig == nil || toolConfig.FunctionCallingConfig == nil {
return nil
}
toolChoice := &schemas.ResponsesToolChoiceStruct{
Type: schemas.ResponsesToolChoiceTypeFunction,
}
switch toolConfig.FunctionCallingConfig.Mode {
case FunctionCallingConfigModeAuto:
toolChoice.Mode = schemas.Ptr("auto")
case FunctionCallingConfigModeNone:
toolChoice.Mode = schemas.Ptr("none")
default:
toolChoice.Mode = schemas.Ptr("auto")
}
if toolConfig.FunctionCallingConfig.AllowedFunctionNames != nil {
for _, functionName := range toolConfig.FunctionCallingConfig.AllowedFunctionNames {
toolChoice.Tools = append(toolChoice.Tools, schemas.ResponsesToolChoiceAllowedToolDef{
Type: string(schemas.ResponsesToolTypeFunction),
Name: schemas.Ptr(functionName),
})
}
}
return &schemas.ResponsesToolChoice{
ResponsesToolChoiceStruct: toolChoice,
}
}
// Helper functions for Responses conversion
// convertGeminiCandidatesToResponsesOutput converts Gemini candidates to Responses output messages
func convertGeminiCandidatesToResponsesOutput(candidates []*Candidate) []schemas.ResponsesMessage {
var messages []schemas.ResponsesMessage
for _, candidate := range candidates {
if candidate.Content == nil || len(candidate.Content.Parts) == 0 {
continue
}
for _, part := range candidate.Content.Parts {
// Handle different types of parts
switch {
case part.Thought:
// Thinking/reasoning message
if part.Text != "" {
msg := schemas.ResponsesMessage{
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{
{
Type: schemas.ResponsesOutputMessageContentTypeReasoning,
Text: &part.Text,
},
},
},
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
}
messages = append(messages, msg)
}
case part.Text != "":
// Regular text message
msg := schemas.ResponsesMessage{
ID: schemas.Ptr("msg_" + providerUtils.GetRandomString(50)),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Status: schemas.Ptr("completed"),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{
{
Type: schemas.ResponsesOutputMessageContentTypeText,
Text: &part.Text,
ResponsesOutputMessageContentText: &schemas.ResponsesOutputMessageContentText{
LogProbs: []schemas.ResponsesOutputMessageContentTextLogProb{},
Annotations: []schemas.ResponsesOutputMessageContentTextAnnotation{},
},
},
},
},
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
}
// add signature to above text content block if present
if len(part.ThoughtSignature) > 0 {
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
msg.Content.ContentBlocks[len(msg.Content.ContentBlocks)-1].Signature = &thoughtSig
}
messages = append(messages, msg)
case part.FunctionCall != nil:
// Function call message
// Convert Args to JSON string if it's not already a string
argumentsStr := ""
if len(part.FunctionCall.Args) > 0 {
argumentsStr = string(part.FunctionCall.Args)
}
callID := part.FunctionCall.ID
if strings.TrimSpace(callID) == "" {
callID = part.FunctionCall.Name
}
// Attach thought signature to callID (same as streaming path)
if len(part.ThoughtSignature) > 0 && !strings.Contains(callID, thoughtSignatureSeparator) {
thoughtSig := base64.RawURLEncoding.EncodeToString(part.ThoughtSignature)
callID = fmt.Sprintf("%s%s%s", callID, thoughtSignatureSeparator, thoughtSig)
}
name := part.FunctionCall.Name
toolMsg := &schemas.ResponsesToolMessage{
CallID: &callID,
Name: &name,
Arguments: &argumentsStr,
}
msg := schemas.ResponsesMessage{
ID: schemas.Ptr("fc_" + providerUtils.GetRandomString(50)),
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
Status: schemas.Ptr("completed"),
ResponsesToolMessage: toolMsg,
}
messages = append(messages, msg)
case part.FunctionResponse != nil:
// Function response message
output := extractFunctionResponseOutput(part.FunctionResponse)
msg := schemas.ResponsesMessage{
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
ResponsesToolMessage: &schemas.ResponsesToolMessage{
CallID: schemas.Ptr(part.FunctionResponse.ID),
Output: &schemas.ResponsesToolMessageOutputStruct{
ResponsesToolCallOutputStr: &output,
},
},
}
// Also set the tool name if present (Gemini associates on name)
if name := strings.TrimSpace(part.FunctionResponse.Name); name != "" {
msg.ResponsesToolMessage.Name = schemas.Ptr(name)
} else {
// set name from call id
// if it contains a thought signature, remove it
if strings.Contains(part.FunctionResponse.ID, thoughtSignatureSeparator) {
parts := strings.SplitN(part.FunctionResponse.ID, thoughtSignatureSeparator, 2)
if len(parts) == 2 {
name := parts[0]
msg.ResponsesToolMessage.Name = schemas.Ptr(name)
}
} else {
msg.ResponsesToolMessage.Name = schemas.Ptr(part.FunctionResponse.ID)
}
}
messages = append(messages, msg)
case part.InlineData != nil:
// Handle inline data (images, audio, etc.)
contentBlocks := []schemas.ResponsesMessageContentBlock{
{
Type: func() schemas.ResponsesMessageContentBlockType {
if strings.HasPrefix(part.InlineData.MIMEType, "image/") {
return schemas.ResponsesInputMessageContentBlockTypeImage
} else if strings.HasPrefix(part.InlineData.MIMEType, "audio/") {
return schemas.ResponsesInputMessageContentBlockTypeAudio
}
return schemas.ResponsesInputMessageContentBlockTypeText
}(),
ResponsesInputMessageContentBlockImage: func() *schemas.ResponsesInputMessageContentBlockImage {
if strings.HasPrefix(part.InlineData.MIMEType, "image/") {
return &schemas.ResponsesInputMessageContentBlockImage{
ImageURL: schemas.Ptr("data:" + part.InlineData.MIMEType + ";base64," + part.InlineData.Data),
}
}
return nil
}(),
Audio: func() *schemas.ResponsesInputMessageContentBlockAudio {
if strings.HasPrefix(part.InlineData.MIMEType, "audio/") {
// Extract format from MIME type (e.g., "audio/wav" -> "wav")
format := strings.TrimPrefix(part.InlineData.MIMEType, "audio/")
return &schemas.ResponsesInputMessageContentBlockAudio{
Format: format,
Data: part.InlineData.Data,
}
}
return nil
}(),
},
}
msg := schemas.ResponsesMessage{
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: contentBlocks,
},
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
}
messages = append(messages, msg)
case part.FileData != nil:
// Handle file data
block := schemas.ResponsesMessageContentBlock{
Type: schemas.ResponsesInputMessageContentBlockTypeFile,
ResponsesInputMessageContentBlockFile: &schemas.ResponsesInputMessageContentBlockFile{
FileURL: schemas.Ptr(part.FileData.FileURI),
},
}
if strings.HasPrefix(part.FileData.MIMEType, "image/") {
block.Type = schemas.ResponsesInputMessageContentBlockTypeImage
block.ResponsesInputMessageContentBlockImage = &schemas.ResponsesInputMessageContentBlockImage{
ImageURL: schemas.Ptr(part.FileData.FileURI),
}
}
contentBlocks := []schemas.ResponsesMessageContentBlock{block}
msg := schemas.ResponsesMessage{
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: contentBlocks,
},
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
}
messages = append(messages, msg)
case part.CodeExecutionResult != nil:
// Handle code execution results
output := part.CodeExecutionResult.Output
if part.CodeExecutionResult.Outcome != OutcomeOK {
output = "Error: " + output
}
msg := schemas.ResponsesMessage{
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{
{
Type: schemas.ResponsesOutputMessageContentTypeText,
Text: &output,
},
},
},
Type: schemas.Ptr(schemas.ResponsesMessageTypeCodeInterpreterCall),
}
messages = append(messages, msg)
case part.ExecutableCode != nil:
// Handle executable code
codeContent := "```" + part.ExecutableCode.Language + "\n" + part.ExecutableCode.Code + "\n```"
msg := schemas.ResponsesMessage{
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{
{
Type: schemas.ResponsesOutputMessageContentTypeText,
Text: &codeContent,
},
},
},
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
}
messages = append(messages, msg)
case part.ThoughtSignature != nil:
// Handle thought signature
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
msg := schemas.ResponsesMessage{
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
ResponsesReasoning: &schemas.ResponsesReasoning{
Summary: []schemas.ResponsesReasoningSummary{},
EncryptedContent: &thoughtSig,
},
}
messages = append(messages, msg)
}
}
// check if gemini used google search tool
if candidate.GroundingMetadata != nil {
webSearchmessage := schemas.ResponsesMessage{
Type: schemas.Ptr(schemas.ResponsesMessageTypeWebSearchCall),
Status: schemas.Ptr("completed"),
ResponsesToolMessage: &schemas.ResponsesToolMessage{
Action: &schemas.ResponsesToolMessageActionStruct{
ResponsesWebSearchToolCallAction: &schemas.ResponsesWebSearchToolCallAction{
Type: "search",
Queries: candidate.GroundingMetadata.WebSearchQueries,
},
},
},
}
if len(candidate.GroundingMetadata.WebSearchQueries) > 0 {
webSearchmessage.ResponsesToolMessage.Action.ResponsesWebSearchToolCallAction.Query =
schemas.Ptr(candidate.GroundingMetadata.WebSearchQueries[0])
}
sources := []schemas.ResponsesWebSearchToolCallActionSearchSource{}
for _, source := range candidate.GroundingMetadata.GroundingChunks {
if source.Web != nil {
sources = append(sources, schemas.ResponsesWebSearchToolCallActionSearchSource{
Type: "url",
Title: schemas.Ptr(source.Web.Title),
URL: source.Web.URI,
})
}
}
if len(sources) > 0 {
webSearchmessage.ResponsesToolMessage.Action.ResponsesWebSearchToolCallAction.Sources = sources
}
messages = append(messages, webSearchmessage)
// create a annotations message for grounding supports
if len(candidate.GroundingMetadata.GroundingSupports) > 0 {
annotations := []schemas.ResponsesOutputMessageContentTextAnnotation{}
for _, support := range candidate.GroundingMetadata.GroundingSupports {
if support.Segment != nil {
annotation := schemas.ResponsesOutputMessageContentTextAnnotation{
Type: "url_citation",
Text: schemas.Ptr(support.Segment.Text),
StartIndex: schemas.Ptr(int(support.Segment.StartIndex)),
EndIndex: schemas.Ptr(int(support.Segment.EndIndex)),
}
// Look up URL from grounding chunks
if len(support.GroundingChunkIndices) > 0 {
chunkIdx := support.GroundingChunkIndices[0]
if chunkIdx >= 0 && int(chunkIdx) < len(candidate.GroundingMetadata.GroundingChunks) {
chunk := candidate.GroundingMetadata.GroundingChunks[chunkIdx]
if chunk.Web != nil {
annotation.URL = schemas.Ptr(chunk.Web.URI)
if chunk.Web.Title != "" {
annotation.Title = schemas.Ptr(chunk.Web.Title)
}
}
}
}
if annotation.URL != nil {
annotations = append(annotations, annotation)
}
}
}
annotationsMessage := schemas.ResponsesMessage{
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Status: schemas.Ptr("completed"),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{
{
Type: schemas.ResponsesOutputMessageContentTypeText,
Text: schemas.Ptr(""),
ResponsesOutputMessageContentText: &schemas.ResponsesOutputMessageContentText{
Annotations: annotations,
},
},
},
},
}
messages = append(messages, annotationsMessage)
}
// Emit rendered content if present
if candidate.GroundingMetadata.SearchEntryPoint != nil &&
candidate.GroundingMetadata.SearchEntryPoint.RenderedContent != "" {
renderedContentMessage := schemas.ResponsesMessage{
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Status: schemas.Ptr("completed"),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{
{
Type: schemas.ResponsesOutputMessageContentTypeRenderedContent,
ResponsesOutputMessageContentRenderedContent: &schemas.ResponsesOutputMessageContentRenderedContent{
RenderedContent: candidate.GroundingMetadata.SearchEntryPoint.RenderedContent,
},
},
},
},
}
messages = append(messages, renderedContentMessage)
}
}
}
return messages
}
// convertTextConfigToGenerationConfig converts ResponsesTextConfig to Gemini's GenerationConfig fields
func convertTextConfigToGenerationConfig(textConfig *schemas.ResponsesTextConfig, config *GenerationConfig) {
if textConfig == nil || config == nil {
return
}
if textConfig.Format == nil {
return
}
switch textConfig.Format.Type {
case "json_schema":
config.ResponseMIMEType = "application/json"
if textConfig.Format.JSONSchema != nil {
if schema := reconstructSchemaFromJSONSchema(textConfig.Format.JSONSchema); schema != nil {
config.ResponseJSONSchema = schema
}
// no schema, mime type remains as is
}
case "json_object":
config.ResponseMIMEType = "application/json"
case "text":
config.ResponseMIMEType = "text/plain"
}
}
// reconstructSchemaFromJSONSchema rebuilds a schema map from ResponsesTextConfigFormatJSONSchema
func reconstructSchemaFromJSONSchema(jsonSchema *schemas.ResponsesTextConfigFormatJSONSchema) interface{} {
var schema map[string]interface{}
if jsonSchema.Schema != nil {
// If Schema field is set, use it directly
schemaMap, ok := (*jsonSchema.Schema).(map[string]interface{})
if !ok {
return *jsonSchema.Schema
}
schema = schemaMap
} else {
// New format: Schema is spread across individual fields
schema = make(map[string]interface{})
if jsonSchema.Defs != nil {
schema["$defs"] = *jsonSchema.Defs
}
if jsonSchema.Type != nil {
schema["type"] = *jsonSchema.Type
}
if jsonSchema.Properties != nil {
schema["properties"] = *jsonSchema.Properties
}
if len(jsonSchema.Required) > 0 {
schema["required"] = jsonSchema.Required
}
if jsonSchema.Description != nil {
schema["description"] = *jsonSchema.Description
}
if jsonSchema.AdditionalProperties != nil {
schema["additionalProperties"] = *jsonSchema.AdditionalProperties
}
if jsonSchema.Name != nil {
schema["title"] = *jsonSchema.Name
}
if len(jsonSchema.PropertyOrdering) > 0 {
schema["propertyOrdering"] = jsonSchema.PropertyOrdering
}
// Return nil if no fields were populated
if len(schema) == 0 {
return nil
}
}
// Normalize the schema for Gemini compatibility (handle union types, etc.)
return normalizeSchemaForGemini(schema)
}
// convertParamsToGenerationConfigResponses converts ChatParameters to GenerationConfig for Responses
func (r *GeminiGenerationRequest) convertParamsToGenerationConfigResponses(params *schemas.ResponsesParameters) (GenerationConfig, error) {
config := GenerationConfig{}
if params.Temperature != nil {
config.Temperature = schemas.Ptr(float64(*params.Temperature))
}
if params.TopP != nil {
config.TopP = schemas.Ptr(float64(*params.TopP))
}
if params.MaxOutputTokens != nil {
config.MaxOutputTokens = int32(*params.MaxOutputTokens)
}
// Only set ThinkingConfig if the model actually supports thinking
if params.Reasoning != nil && supportsThinkingConfig(r.Model) {
config.ThinkingConfig = &GenerationConfigThinkingConfig{
IncludeThoughts: true,
}
hasMaxTokens := params.Reasoning.MaxTokens != nil
hasEffort := params.Reasoning.Effort != nil
supportsLevel := isGemini3Plus(r.Model) // Check if model is 3.0+
// PRIORITY RULE: If both max_tokens and effort are present, use ONLY max_tokens (budget)
// This ensures we send only thinkingBudget to Gemini, not thinkingLevel
// Handle "none" effort explicitly (only if max_tokens not present)
if !hasMaxTokens && hasEffort && *params.Reasoning.Effort == "none" {
config.ThinkingConfig.IncludeThoughts = false
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
} else if hasMaxTokens {
// User provided max_tokens - use thinkingBudget (all Gemini models support this)
// If both max_tokens and effort are present, we ignore effort and use ONLY max_tokens
budget := *params.Reasoning.MaxTokens
switch budget {
case 0:
config.ThinkingConfig.IncludeThoughts = false
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
case DynamicReasoningBudget: // Special case: -1 means dynamic budget
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(DynamicReasoningBudget))
default:
if err := validateThinkingBudget(r.Model, budget); err != nil {
return config, err
}
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budget))
}
} else if hasEffort {
// User provided effort only (no max_tokens)
if supportsLevel {
// Gemini 3.0+ - use thinkingLevel (more native)
config.ThinkingConfig.ThinkingLevel = schemas.Ptr(effortToThinkingLevel(*params.Reasoning.Effort, r.Model))
} else {
maxTokens := providerUtils.GetMaxOutputTokensOrDefault(r.Model, DefaultCompletionMaxTokens)
if config.MaxOutputTokens > 0 {
maxTokens = int(config.MaxOutputTokens)
}
budgetRange := getThinkingBudgetRange(r.Model, maxTokens)
// Gemini < 3.0 - must convert effort to budget
budgetTokens, err := providerUtils.GetBudgetTokensFromReasoningEffort(
*params.Reasoning.Effort,
budgetRange.Min,
budgetRange.Max,
)
if err == nil {
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budgetTokens))
}
}
}
}
if params.Text != nil {
convertTextConfigToGenerationConfig(params.Text, &config)
}
if params.ExtraParams != nil {
if topK, ok := params.ExtraParams["top_k"]; ok {
delete(params.ExtraParams, "top_k")
if val, success := schemas.SafeExtractInt(topK); success {
config.TopK = schemas.Ptr(val)
}
}
if frequencyPenalty, ok := params.ExtraParams["frequency_penalty"]; ok {
delete(params.ExtraParams, "frequency_penalty")
if val, success := schemas.SafeExtractFloat64(frequencyPenalty); success {
config.FrequencyPenalty = schemas.Ptr(val)
}
}
if presencePenalty, ok := params.ExtraParams["presence_penalty"]; ok {
delete(params.ExtraParams, "presence_penalty")
if val, success := schemas.SafeExtractFloat64(presencePenalty); success {
config.PresencePenalty = schemas.Ptr(val)
}
}
if stopSequences, ok := params.ExtraParams["stop_sequences"]; ok {
delete(params.ExtraParams, "stop_sequences")
if val, success := schemas.SafeExtractStringSlice(stopSequences); success {
config.StopSequences = val
}
}
}
return config, nil
}
// convertResponsesToolsToGemini converts Responses tools to Gemini tools
func convertResponsesToolsToGemini(tools []schemas.ResponsesTool) []Tool {
geminiTool := Tool{}
hasWebSearchTool := false
for _, tool := range tools {
if tool.Type == schemas.ResponsesToolTypeWebSearch {
hasWebSearchTool = true
break
}
}
for _, tool := range tools {
// you cant use function declarations and google search together
if tool.Type == schemas.ResponsesToolTypeFunction && !hasWebSearchTool {
// Extract function information from ResponsesExtendedTool
if tool.ResponsesToolFunction != nil {
if tool.Name != nil && tool.ResponsesToolFunction != nil {
funcDecl := &FunctionDeclaration{
Name: *tool.Name,
Description: func() string {
if tool.Description != nil {
return *tool.Description
}
return ""
}(),
Parameters: func() *Schema {
if tool.ResponsesToolFunction.Parameters != nil {
return convertFunctionParametersToSchema(*tool.ResponsesToolFunction.Parameters)
}
return nil
}(),
}
geminiTool.FunctionDeclarations = append(geminiTool.FunctionDeclarations, funcDecl)
}
}
}
if tool.Type == schemas.ResponsesToolTypeWebSearch {
geminiTool.GoogleSearch = &GoogleSearch{}
if tool.ResponsesToolWebSearch != nil && tool.ResponsesToolWebSearch.Filters != nil {
if tool.ResponsesToolWebSearch.Filters.TimeRangeFilter != nil {
geminiTool.GoogleSearch.TimeRangeFilter = &Interval{
StartTime: tool.ResponsesToolWebSearch.Filters.TimeRangeFilter.StartTime,
EndTime: tool.ResponsesToolWebSearch.Filters.TimeRangeFilter.EndTime,
}
}
if len(tool.ResponsesToolWebSearch.Filters.BlockedDomains) > 0 {
geminiTool.GoogleSearch.ExcludeDomains = tool.ResponsesToolWebSearch.Filters.BlockedDomains
}
}
}
}
if len(geminiTool.FunctionDeclarations) > 0 || geminiTool.GoogleSearch != nil {
return []Tool{geminiTool}
}
return []Tool{}
}
// convertResponsesToolChoiceToGemini converts Responses tool choice to Gemini tool config
func convertResponsesToolChoiceToGemini(toolChoice *schemas.ResponsesToolChoice) *ToolConfig {
config := &ToolConfig{}
if toolChoice.ResponsesToolChoiceStruct != nil {
funcConfig := &FunctionCallingConfig{}
ext := toolChoice.ResponsesToolChoiceStruct
if ext.Mode != nil {
switch *ext.Mode {
case "auto":
funcConfig.Mode = FunctionCallingConfigModeAuto
case "required":
funcConfig.Mode = FunctionCallingConfigModeAny
case "none":
funcConfig.Mode = FunctionCallingConfigModeNone
}
}
if ext.Name != nil {
funcConfig.Mode = FunctionCallingConfigModeAny
funcConfig.AllowedFunctionNames = []string{*ext.Name}
}
config.FunctionCallingConfig = funcConfig
return config
}
// Handle string-based tool choice modes
if toolChoice.ResponsesToolChoiceStr != nil {
funcConfig := &FunctionCallingConfig{}
switch *toolChoice.ResponsesToolChoiceStr {
case "none":
funcConfig.Mode = FunctionCallingConfigModeNone
case "required", "any":
funcConfig.Mode = FunctionCallingConfigModeAny
default: // "auto" or any other value
funcConfig.Mode = FunctionCallingConfigModeAuto
}
config.FunctionCallingConfig = funcConfig
}
return config
}
// convertResponsesMessagesToGeminiContents converts Responses messages to Gemini contents
func convertResponsesMessagesToGeminiContents(messages []schemas.ResponsesMessage) ([]Content, *Content, error) {
var contents []Content
var systemInstruction *Content
// Build a map from callID → function name by scanning function_call messages.
callIDToName := make(map[string]string)
for i := range messages {
m := &messages[i]
if m.Type != nil && *m.Type == schemas.ResponsesMessageTypeFunctionCall &&
m.ResponsesToolMessage != nil &&
m.ResponsesToolMessage.CallID != nil &&
m.ResponsesToolMessage.Name != nil {
if name := strings.TrimSpace(*m.ResponsesToolMessage.Name); name != "" {
callIDToName[*m.ResponsesToolMessage.CallID] = name
}
}
}
// Track consecutive function call output messages to group them for parallel function calling
// According to Gemini docs, all function responses must be in a single message
var pendingFunctionResponseParts []*Part
for i, msg := range messages {
// Skip standalone reasoning messages (they're handled as part of function calls)
if msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeReasoning && msg.ResponsesReasoning != nil {
continue
}
// Handle system messages separately
if msg.Role != nil && *msg.Role == schemas.ResponsesInputMessageRoleSystem {
if systemInstruction == nil {
systemInstruction = &Content{}
}
// Convert system message content
if msg.Content != nil {
if msg.Content.ContentStr != nil {
systemInstruction.Parts = append(systemInstruction.Parts, &Part{
Text: *msg.Content.ContentStr,
})
}
if msg.Content.ContentBlocks != nil {
for _, block := range msg.Content.ContentBlocks {
part, err := convertContentBlockToGeminiPart(block)
if err != nil {
return nil, nil, fmt.Errorf("failed to convert system message content block: %w", err)
}
if part != nil {
systemInstruction.Parts = append(systemInstruction.Parts, part)
}
}
}
}
continue
}
// Check if this is a function call output message
isFunctionOutput := msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeFunctionCallOutput && msg.ResponsesToolMessage != nil
// If we have pending function responses and current message is NOT a function output,
// flush the pending responses as a single Content (for parallel function calling)
if len(pendingFunctionResponseParts) > 0 && !isFunctionOutput {
contents = append(contents, Content{
Parts: pendingFunctionResponseParts,
Role: "model", // Function responses use "model" role in Gemini
})
pendingFunctionResponseParts = nil
}
// Handle regular messages
content := Content{}
if msg.Role != nil {
// Map Responses roles to Gemini roles (Gemini only supports "user" and "model")
switch *msg.Role {
case schemas.ResponsesInputMessageRoleAssistant:
content.Role = "model"
case schemas.ResponsesInputMessageRoleUser, schemas.ResponsesInputMessageRoleDeveloper:
content.Role = "user"
default:
// Default to "user" for input messages (any instructions/context)
content.Role = "user"
}
}
// Handle tool calls/responses
if msg.ResponsesToolMessage != nil && msg.Type != nil {
switch *msg.Type {
case schemas.ResponsesMessageTypeFunctionCall:
// Convert function call to Gemini FunctionCall
if msg.ResponsesToolMessage.Name != nil {
var argsRaw json.RawMessage
if msg.ResponsesToolMessage.Arguments != nil {
rawArgs := strings.TrimSpace(*msg.ResponsesToolMessage.Arguments)
if rawArgs == "" {
rawArgs = "{}"
}
var buf bytes.Buffer
if err := json.Compact(&buf, []byte(rawArgs)); err != nil {
return nil, nil, fmt.Errorf("failed to decode function call arguments: %w", err)
}
argsRaw = buf.Bytes()
}
if argsRaw == nil {
argsRaw = json.RawMessage("{}")
}
var thoughtSig string
part := &Part{
FunctionCall: &FunctionCall{
Name: *msg.ResponsesToolMessage.Name,
Args: argsRaw,
},
}
if msg.ResponsesToolMessage.CallID != nil {
if strings.Contains(*msg.ResponsesToolMessage.CallID, thoughtSignatureSeparator) {
parts := strings.SplitN(*msg.ResponsesToolMessage.CallID, thoughtSignatureSeparator, 2)
if len(parts) == 2 {
thoughtSig = parts[1] // Extract signature (after separator)
}
}
// Keep the full CallID as-is (don't strip thought signature)
part.FunctionCall.ID = *msg.ResponsesToolMessage.CallID
}
if thoughtSig != "" {
var err error
part.ThoughtSignature, err = base64.RawURLEncoding.DecodeString(thoughtSig)
if err != nil {
// Silently ignore decode errors - ID will be used without signature
thoughtSig = ""
}
}
// Preserve thought signature from ResponsesReasoning message (required for Gemini 3 Pro)
// Look ahead to see if the next message is a reasoning message with encrypted content
if i+1 < len(messages) {
nextMsg := messages[i+1]
if nextMsg.Type != nil && *nextMsg.Type == schemas.ResponsesMessageTypeReasoning &&
nextMsg.ResponsesReasoning != nil && nextMsg.ResponsesReasoning.EncryptedContent != nil {
decodedSig, err := base64.StdEncoding.DecodeString(*nextMsg.ResponsesReasoning.EncryptedContent)
if err == nil {
part.ThoughtSignature = decodedSig
}
}
}
content.Parts = append(content.Parts, part)
}
case schemas.ResponsesMessageTypeFunctionCallOutput:
// Convert function response - collect for grouping
// According to Gemini parallel function calling docs, multiple function responses
// must be sent in a single message with only functionResponse parts (no text/content parts)
if msg.ResponsesToolMessage.CallID != nil {
responseMap := make(map[string]any)
// Extract output from ResponsesToolMessage.Output
if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesToolCallOutputStr != nil {
output := *msg.ResponsesToolMessage.Output.ResponsesToolCallOutputStr
if json.Valid([]byte(output)) {
responseMap["output"] = json.RawMessage(output)
} else {
responseMap["output"] = output
}
} else if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil {
// Handle structured output blocks (e.g. from Anthropic Responses API format
// where output is an array of content blocks like [{"type":"input_text","text":"..."}])
var textParts []string
for _, block := range msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks {
if block.Text != nil && *block.Text != "" {
textParts = append(textParts, *block.Text)
}
}
if len(textParts) > 0 {
combined := strings.Join(textParts, "\n")
if json.Valid([]byte(combined)) {
responseMap["output"] = json.RawMessage(combined)
} else {
responseMap["output"] = combined
}
} else {
// Fallback for non-text blocks (e.g. images, files): marshal the raw blocks
// so responseMap["output"] is never left empty when blocks are present
rawBlocks, err := providerUtils.MarshalSorted(msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks)
if err == nil && len(rawBlocks) > 0 {
responseMap["output"] = json.RawMessage(rawBlocks)
}
}
} else if msg.Content != nil && msg.Content.ContentStr != nil {
// Fallback to Content.ContentStr for backward compatibility
output := *msg.Content.ContentStr
if json.Valid([]byte(output)) {
responseMap["output"] = json.RawMessage(output)
} else {
responseMap["output"] = output
}
}
// Prefer the declared tool name; fallback to callIDToName lookup, then raw CallID
funcName := ""
if msg.ResponsesToolMessage.Name != nil && strings.TrimSpace(*msg.ResponsesToolMessage.Name) != "" {
funcName = *msg.ResponsesToolMessage.Name
} else if name, ok := callIDToName[*msg.ResponsesToolMessage.CallID]; ok && strings.TrimSpace(name) != "" {
funcName = name
} else {
funcName = *msg.ResponsesToolMessage.CallID
}
responseBytes, _ := providerUtils.MarshalSorted(responseMap)
part := &Part{
FunctionResponse: &FunctionResponse{
Name: funcName,
Response: json.RawMessage(responseBytes),
ID: *msg.ResponsesToolMessage.CallID,
},
}
pendingFunctionResponseParts = append(pendingFunctionResponseParts, part)
// If this is the last message, flush pending responses
if i == len(messages)-1 && len(pendingFunctionResponseParts) > 0 {
contents = append(contents, Content{
Parts: pendingFunctionResponseParts,
Role: "model",
})
pendingFunctionResponseParts = nil
}
continue // Skip normal content handling
}
}
}
// For non-function-output messages, convert message content normally
if !isFunctionOutput {
// Convert message content
if msg.Content != nil {
if msg.Content.ContentStr != nil {
content.Parts = append(content.Parts, &Part{
Text: *msg.Content.ContentStr,
})
}
if msg.Content.ContentBlocks != nil {
for _, block := range msg.Content.ContentBlocks {
part, err := convertContentBlockToGeminiPart(block)
if err != nil {
return nil, nil, fmt.Errorf("failed to convert message content block: %w", err)
}
if part != nil {
content.Parts = append(content.Parts, part)
}
}
}
}
}
if len(content.Parts) > 0 {
contents = append(contents, content)
}
}
return contents, systemInstruction, nil
}
// convertContentBlockToGeminiPart converts a content block to Gemini part
func convertContentBlockToGeminiPart(block schemas.ResponsesMessageContentBlock) (*Part, error) {
switch block.Type {
case schemas.ResponsesInputMessageContentBlockTypeText,
schemas.ResponsesOutputMessageContentTypeText:
if block.Text != nil && *block.Text != "" {
part := &Part{
Text: *block.Text,
}
if block.Signature != nil {
decodedSig, err := base64.StdEncoding.DecodeString(*block.Signature)
if err == nil {
part.ThoughtSignature = decodedSig
}
}
return part, nil
}
case schemas.ResponsesOutputMessageContentTypeReasoning:
if block.Text != nil && *block.Text != "" {
return &Part{
Text: *block.Text,
Thought: true,
}, nil
}
case schemas.ResponsesOutputMessageContentTypeRefusal:
// Refusals are treated as regular text in Gemini
if block.ResponsesOutputMessageContentRefusal != nil {
return &Part{
Text: block.ResponsesOutputMessageContentRefusal.Refusal,
}, nil
}
case schemas.ResponsesOutputMessageContentTypeCompaction:
// Convert compaction to text block for Gemini (compaction is Anthropic-specific)
if block.ResponsesOutputMessageContentCompaction != nil {
if summary := strings.TrimSpace(block.ResponsesOutputMessageContentCompaction.Summary); summary != "" {
return &Part{Text: summary}, nil
}
}
case schemas.ResponsesInputMessageContentBlockTypeImage:
if block.ResponsesInputMessageContentBlockImage != nil && block.ResponsesInputMessageContentBlockImage.ImageURL != nil {
imageURL := *block.ResponsesInputMessageContentBlockImage.ImageURL
// Use existing utility functions to handle URL parsing
sanitizedURL, err := schemas.SanitizeImageURL(imageURL)
if err != nil {
return nil, fmt.Errorf("failed to sanitize image URL: %w", err)
}
urlInfo := schemas.ExtractURLTypeInfo(sanitizedURL)
mimeType := "image/jpeg" // default
if urlInfo.MediaType != nil {
mimeType = *urlInfo.MediaType
}
if urlInfo.Type == schemas.ImageContentTypeBase64 {
data := ""
if urlInfo.DataURLWithoutPrefix != nil {
data = *urlInfo.DataURLWithoutPrefix
}
// Decode base64 data (handles both standard and URL-safe base64)
decodedData, err := decodeBase64StringToBytes(data)
if err != nil {
return nil, fmt.Errorf("failed to decode base64 image data: %w", err)
}
return &Part{
InlineData: &Blob{
MIMEType: mimeType,
Data: encodeBytesToBase64String(decodedData),
},
}, nil
} else {
return &Part{
FileData: &FileData{
MIMEType: mimeType,
FileURI: sanitizedURL,
},
}, nil
}
}
case schemas.ResponsesInputMessageContentBlockTypeAudio:
if block.Audio != nil {
// Decode base64 audio data (handles both standard and URL-safe base64)
decodedData, err := decodeBase64StringToBytes(block.Audio.Data)
if err != nil {
return nil, fmt.Errorf("failed to decode base64 audio data: %w", err)
}
return &Part{
InlineData: &Blob{
MIMEType: func() string {
f := strings.ToLower(strings.TrimSpace(block.Audio.Format))
if f == "" {
return "audio/mpeg"
}
if strings.HasPrefix(f, "audio/") {
return f
}
return "audio/" + f
}(),
Data: encodeBytesToBase64String(decodedData),
},
}, nil
}
case schemas.ResponsesInputMessageContentBlockTypeFile:
if block.ResponsesInputMessageContentBlockFile != nil {
fileBlock := block.ResponsesInputMessageContentBlockFile
// Handle FileURL (URI-based file)
if fileBlock.FileURL != nil {
mimeType := "application/pdf"
if fileBlock.FileType != nil {
mimeType = *fileBlock.FileType
}
part := &Part{
FileData: &FileData{
MIMEType: mimeType,
FileURI: *fileBlock.FileURL,
},
}
return part, nil
}
// Handle FileData (inline file data)
if fileBlock.FileData != nil {
mimeType := "application/pdf"
if fileBlock.FileType != nil {
mimeType = *fileBlock.FileType
}
// Convert file data to bytes using the helper function
dataBytes, extractedMimeType := convertFileDataToBytes(*fileBlock.FileData)
if extractedMimeType != "" {
mimeType = extractedMimeType
}
if len(dataBytes) > 0 {
part := &Part{
InlineData: &Blob{
MIMEType: mimeType,
Data: encodeBytesToBase64String(dataBytes),
},
}
return part, nil
}
}
}
}
return nil, nil
}
// buildGroundingMetadataFromWebSearch converts a Bifrost web_search_call message to Gemini GroundingMetadata
func buildGroundingMetadataFromWebSearch(webSearchCall *schemas.ResponsesMessage, annotations []schemas.ResponsesOutputMessageContentTextAnnotation, renderedContent *string) *GroundingMetadata {
if webSearchCall == nil || webSearchCall.ResponsesToolMessage == nil || webSearchCall.ResponsesToolMessage.Action == nil {
return nil
}
action := webSearchCall.ResponsesToolMessage.Action.ResponsesWebSearchToolCallAction
if action == nil {
return nil
}
groundingMetadata := &GroundingMetadata{}
// Add SearchEntryPoint with rendered content if provided
if renderedContent != nil && *renderedContent != "" {
groundingMetadata.SearchEntryPoint = &SearchEntryPoint{
RenderedContent: *renderedContent,
}
}
// Extract web search queries
if len(action.Queries) > 0 {
groundingMetadata.WebSearchQueries = action.Queries
} else if action.Query != nil {
groundingMetadata.WebSearchQueries = []string{*action.Query}
}
// Extract grounding chunks from sources
var groundingChunks []*GroundingChunk
urlToIndexMap := make(map[string]int32) // Map URL to chunk index for annotation processing
for _, source := range action.Sources {
if source.URL == "" {
continue
}
title := source.URL // Use URL as fallback
if source.Title != nil && *source.Title != "" {
title = *source.Title
}
chunk := &GroundingChunk{
Web: &GroundingChunkWeb{
URI: source.URL,
Title: title,
},
}
groundingChunks = append(groundingChunks, chunk)
urlToIndexMap[source.URL] = int32(len(groundingChunks) - 1)
}
if len(groundingChunks) > 0 {
groundingMetadata.GroundingChunks = groundingChunks
}
// Convert annotations to grounding supports
var groundingSupports []*GroundingSupport
for _, annotation := range annotations {
if annotation.Type != "url_citation" {
continue
}
support := &GroundingSupport{
Segment: &Segment{},
}
// Set segment text
if annotation.Text != nil {
support.Segment.Text = *annotation.Text
}
// Set segment indices
if annotation.StartIndex != nil {
support.Segment.StartIndex = int32(*annotation.StartIndex)
}
if annotation.EndIndex != nil {
support.Segment.EndIndex = int32(*annotation.EndIndex)
}
// Map annotation URL to chunk indices
if annotation.URL != nil {
if chunkIdx, exists := urlToIndexMap[*annotation.URL]; exists {
support.GroundingChunkIndices = []int32{chunkIdx}
}
}
// Only add support if we have valid segment or chunk indices
if support.Segment.Text != "" || len(support.GroundingChunkIndices) > 0 {
groundingSupports = append(groundingSupports, support)
}
}
if len(groundingSupports) > 0 {
groundingMetadata.GroundingSupports = groundingSupports
}
// Return nil if no meaningful data was extracted
if len(groundingMetadata.WebSearchQueries) == 0 && len(groundingMetadata.GroundingChunks) == 0 {
return nil
}
return groundingMetadata
}
// emitWebSearchFromGroundingMetadata converts grounding metadata to web search event stream
func emitWebSearchFromGroundingMetadata(
metadata *GroundingMetadata,
state *GeminiResponsesStreamState,
sequenceNumber int,
) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
if metadata == nil || len(metadata.WebSearchQueries) == 0 {
return responses
}
outputIndex := state.nextOutputIndex()
itemID := state.generateItemID("ws", outputIndex)
state.ItemIDs[outputIndex] = itemID
// Build web search action
action := &schemas.ResponsesWebSearchToolCallAction{
Type: "search",
Queries: metadata.WebSearchQueries,
}
if len(metadata.WebSearchQueries) > 0 {
action.Query = &metadata.WebSearchQueries[0]
}
// Convert groundingChunks to sources
var sources []schemas.ResponsesWebSearchToolCallActionSearchSource
for _, chunk := range metadata.GroundingChunks {
if chunk.Web != nil && chunk.Web.URI != "" {
source := schemas.ResponsesWebSearchToolCallActionSearchSource{
Type: "url",
URL: chunk.Web.URI,
}
if chunk.Web.Title != "" {
source.Title = &chunk.Web.Title
} else {
source.Title = &chunk.Web.URI // Fallback to URI
}
sources = append(sources, source)
}
}
action.Sources = sources
// 1. output_item.added (web_search_call, in_progress)
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeWebSearchCall),
Status: schemas.Ptr("in_progress"),
ResponsesToolMessage: &schemas.ResponsesToolMessage{
Action: &schemas.ResponsesToolMessageActionStruct{
ResponsesWebSearchToolCallAction: &schemas.ResponsesWebSearchToolCallAction{
Type: "search",
},
},
},
},
})
// 2. web_search_call.in_progress
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeWebSearchCallInProgress,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
})
// 3. web_search_call.searching
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeWebSearchCallSearching,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
})
// 4. web_search_call.completed
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeWebSearchCallCompleted,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
})
// 5. output_item.done (with full action including sources)
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &outputIndex,
ItemID: &itemID,
Item: &schemas.ResponsesMessage{
ID: &itemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeWebSearchCall),
Status: schemas.Ptr("completed"),
ResponsesToolMessage: &schemas.ResponsesToolMessage{
Action: &schemas.ResponsesToolMessageActionStruct{
ResponsesWebSearchToolCallAction: action,
},
},
},
})
state.HasEmittedWebSearch = true
// Emit rendered content if present
if metadata.SearchEntryPoint != nil && metadata.SearchEntryPoint.RenderedContent != "" {
renderedIndex := state.nextOutputIndex()
renderedItemID := state.generateItemID("rc", renderedIndex)
state.ItemIDs[renderedIndex] = renderedItemID
// output_item.added with rendered_content
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &renderedIndex,
Item: &schemas.ResponsesMessage{
ID: &renderedItemID,
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
Status: schemas.Ptr("completed"),
Content: &schemas.ResponsesMessageContent{
ContentBlocks: []schemas.ResponsesMessageContentBlock{
{
Type: schemas.ResponsesOutputMessageContentTypeRenderedContent,
ResponsesOutputMessageContentRenderedContent: &schemas.ResponsesOutputMessageContentRenderedContent{
RenderedContent: metadata.SearchEntryPoint.RenderedContent,
},
},
},
},
},
})
// output_item.done for rendered content
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &renderedIndex,
ItemID: &renderedItemID,
})
}
return responses
}
// emitAnnotationsFromGroundingSupports converts grounding supports to annotation events
func emitAnnotationsFromGroundingSupports(
metadata *GroundingMetadata,
state *GeminiResponsesStreamState,
sequenceNumber int,
) []*schemas.BifrostResponsesStreamResponse {
var responses []*schemas.BifrostResponsesStreamResponse
if metadata == nil || len(metadata.GroundingSupports) == 0 || state.TextOutputIndex < 0 {
return responses
}
itemID := state.ItemIDs[state.TextOutputIndex]
emmitedIndex := 0
// Convert each grounding support to an annotation event
for _, support := range metadata.GroundingSupports {
if support.Segment == nil {
continue
}
annotation := schemas.ResponsesOutputMessageContentTextAnnotation{
Type: "url_citation",
}
// Set text and indices
if support.Segment.Text != "" {
annotation.Text = &support.Segment.Text
}
annotation.StartIndex = schemas.Ptr(int(support.Segment.StartIndex))
annotation.EndIndex = schemas.Ptr(int(support.Segment.EndIndex))
// Find URL and title from chunk indices
if len(support.GroundingChunkIndices) > 0 {
chunkIdx := support.GroundingChunkIndices[0]
if int(chunkIdx) < len(metadata.GroundingChunks) {
chunk := metadata.GroundingChunks[chunkIdx]
if chunk.Web != nil {
annotation.URL = &chunk.Web.URI
if chunk.Web.Title != "" {
annotation.Title = &chunk.Web.Title
}
}
}
}
if annotation.URL == nil {
continue
}
// Emit annotation.added event
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeOutputTextAnnotationAdded,
SequenceNumber: sequenceNumber + len(responses),
OutputIndex: &state.TextOutputIndex,
ItemID: &itemID,
ContentIndex: schemas.Ptr(0),
Annotation: &annotation,
AnnotationIndex: &emmitedIndex,
})
emmitedIndex++
}
return responses
}
// generateSyntheticFunctionCallArgumentDeltas creates synthetic FunctionCallArgumentsDelta events
// from complete JSON arguments to simulate streaming behavior for providers that don't natively stream
func generateSyntheticFunctionCallArgumentDeltas(argumentsJSON string, outputIndex *int, itemID *string, baseSequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
var events []*schemas.BifrostResponsesStreamResponse
// Chunk size for synthetic streaming (matching realistic streaming patterns)
chunkSize := 8 // Small chunks to simulate realistic streaming
// Break the JSON into chunks
runes := []rune(argumentsJSON)
for i := 0; i < len(runes); i += chunkSize {
end := min(i+chunkSize, len(runes))
chunk := string(runes[i:end])
deltaEvent := &schemas.BifrostResponsesStreamResponse{
Type: schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDelta,
SequenceNumber: baseSequenceNumber + len(events),
OutputIndex: outputIndex,
ItemID: itemID,
Delta: &chunk,
}
events = append(events, deltaEvent)
}
return events
}