3653 lines
122 KiB
Go
3653 lines
122 KiB
Go
package gemini
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
|
|
"github.com/maximhq/bifrost/core/schemas"
|
|
)
|
|
|
|
func (request *GeminiGenerationRequest) ToBifrostResponsesRequest(ctx *schemas.BifrostContext) *schemas.BifrostResponsesRequest {
|
|
if request == nil {
|
|
return nil
|
|
}
|
|
|
|
provider, model := schemas.ParseModelString(request.Model, providerUtils.CheckAndSetDefaultProvider(ctx, schemas.Gemini))
|
|
|
|
// Create the BifrostResponsesRequest
|
|
bifrostReq := &schemas.BifrostResponsesRequest{
|
|
Provider: provider,
|
|
Model: model,
|
|
}
|
|
|
|
params := request.convertGenerationConfigToResponsesParameters()
|
|
|
|
// Convert SystemInstruction to system messages first
|
|
var inputMessages []schemas.ResponsesMessage
|
|
if request.SystemInstruction != nil && len(request.SystemInstruction.Parts) > 0 {
|
|
systemMsg := convertGeminiSystemInstructionToResponsesMessage(request.SystemInstruction)
|
|
if systemMsg != nil {
|
|
inputMessages = append(inputMessages, *systemMsg)
|
|
}
|
|
}
|
|
|
|
// Convert Contents to Input messages
|
|
if len(request.Contents) > 0 {
|
|
contentsMessages := convertGeminiContentsToResponsesMessages(request.Contents)
|
|
if len(contentsMessages) > 0 {
|
|
inputMessages = append(inputMessages, contentsMessages...)
|
|
}
|
|
}
|
|
|
|
if len(inputMessages) > 0 {
|
|
bifrostReq.Input = inputMessages
|
|
}
|
|
|
|
if len(request.Tools) > 0 {
|
|
params.Tools = convertGeminiToolsToResponsesTools(request.Tools)
|
|
}
|
|
|
|
if request.ToolConfig != nil && request.ToolConfig.FunctionCallingConfig != nil {
|
|
params.ToolChoice = convertGeminiToolConfigToToolChoice(request.ToolConfig)
|
|
}
|
|
|
|
if request.SafetySettings != nil {
|
|
params.ExtraParams["safety_settings"] = request.SafetySettings
|
|
}
|
|
|
|
if request.CachedContent != "" {
|
|
params.ExtraParams["cached_content"] = request.CachedContent
|
|
}
|
|
|
|
bifrostReq.Params = params
|
|
|
|
return bifrostReq
|
|
|
|
}
|
|
|
|
func ToGeminiResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) (*GeminiGenerationRequest, error) {
|
|
if bifrostReq == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
// Create the base Gemini generation request
|
|
geminiReq := &GeminiGenerationRequest{
|
|
Model: bifrostReq.Model,
|
|
}
|
|
|
|
// Convert parameters to generation config
|
|
if bifrostReq.Params != nil {
|
|
var err error
|
|
geminiReq.GenerationConfig, err = geminiReq.convertParamsToGenerationConfigResponses(bifrostReq.Params)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
geminiReq.ExtraParams = bifrostReq.Params.ExtraParams
|
|
// Handle tool-related parameters
|
|
if len(bifrostReq.Params.Tools) > 0 {
|
|
geminiReq.Tools = convertResponsesToolsToGemini(bifrostReq.Params.Tools)
|
|
|
|
// Convert tool choice if present
|
|
if bifrostReq.Params.ToolChoice != nil {
|
|
geminiReq.ToolConfig = convertResponsesToolChoiceToGemini(bifrostReq.Params.ToolChoice)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert ResponsesInput messages to Gemini contents
|
|
if bifrostReq.Input != nil {
|
|
contents, systemInstruction, err := convertResponsesMessagesToGeminiContents(bifrostReq.Input)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
geminiReq.Contents = contents
|
|
|
|
if systemInstruction != nil {
|
|
geminiReq.SystemInstruction = systemInstruction
|
|
}
|
|
}
|
|
|
|
if bifrostReq.Params != nil {
|
|
if bifrostReq.Params.Instructions != nil {
|
|
// check if system instruction is already set
|
|
if geminiReq.SystemInstruction == nil {
|
|
geminiReq.SystemInstruction = &Content{
|
|
Parts: []*Part{
|
|
{Text: *bifrostReq.Params.Instructions},
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
if bifrostReq.Params.ExtraParams != nil {
|
|
if safetySettings, ok := schemas.SafeExtractFromMap(bifrostReq.Params.ExtraParams, "safety_settings"); ok {
|
|
delete(geminiReq.ExtraParams, "safety_settings")
|
|
if settings, ok := SafeExtractSafetySettings(safetySettings); ok {
|
|
geminiReq.SafetySettings = settings
|
|
}
|
|
}
|
|
if cachedContent, ok := schemas.SafeExtractString(bifrostReq.Params.ExtraParams["cached_content"]); ok {
|
|
delete(geminiReq.ExtraParams, "cached_content")
|
|
geminiReq.CachedContent = cachedContent
|
|
}
|
|
}
|
|
}
|
|
|
|
return geminiReq, nil
|
|
}
|
|
|
|
// ToResponsesBifrostResponsesResponse converts a Gemini GenerateContentResponse to a BifrostResponsesResponse
|
|
func (response *GenerateContentResponse) ToResponsesBifrostResponsesResponse() *schemas.BifrostResponsesResponse {
|
|
if response == nil {
|
|
return nil
|
|
}
|
|
|
|
// Create the BifrostResponse with Responses structure
|
|
bifrostResp := &schemas.BifrostResponsesResponse{
|
|
ID: schemas.Ptr("resp_" + providerUtils.GetRandomString(50)),
|
|
CreatedAt: int(time.Now().Unix()),
|
|
Model: response.ModelVersion,
|
|
}
|
|
|
|
// Convert usage information
|
|
bifrostResp.Usage = ConvertGeminiUsageMetadataToResponsesUsage(response.UsageMetadata)
|
|
|
|
// Convert candidates to Responses output messages
|
|
if len(response.Candidates) > 0 {
|
|
candidate := response.Candidates[0]
|
|
|
|
// Persist finish reason as Bifrost canonical stop_reason
|
|
if candidate.FinishReason != "" && candidate.FinishReason != FinishReasonUnspecified {
|
|
stopReason := ConvertGeminiFinishReasonToBifrost(candidate.FinishReason)
|
|
bifrostResp.StopReason = &stopReason
|
|
|
|
if isErrorFinishReason(candidate.FinishReason) {
|
|
failedStatus := "failed"
|
|
bifrostResp.Status = &failedStatus
|
|
|
|
errMsg := candidate.FinishMessage
|
|
if errMsg == "" {
|
|
errMsg = string(candidate.FinishReason)
|
|
}
|
|
bifrostResp.Error = &schemas.ResponsesResponseError{
|
|
Code: stopReason,
|
|
Message: errMsg,
|
|
}
|
|
|
|
return bifrostResp
|
|
}
|
|
}
|
|
|
|
outputMessages := convertGeminiCandidatesToResponsesOutput(response.Candidates)
|
|
if len(outputMessages) > 0 {
|
|
bifrostResp.Output = outputMessages
|
|
}
|
|
}
|
|
|
|
return bifrostResp
|
|
}
|
|
|
|
func ToGeminiResponsesResponse(bifrostResp *schemas.BifrostResponsesResponse) *GenerateContentResponse {
|
|
if bifrostResp == nil {
|
|
return nil
|
|
}
|
|
|
|
geminiResp := &GenerateContentResponse{
|
|
ModelVersion: bifrostResp.Model,
|
|
}
|
|
|
|
// Set response ID if available
|
|
if bifrostResp.ID != nil {
|
|
geminiResp.ResponseID = *bifrostResp.ID
|
|
}
|
|
|
|
// Set creation time
|
|
if bifrostResp.CreatedAt > 0 {
|
|
geminiResp.CreateTime = time.Unix(int64(bifrostResp.CreatedAt), 0)
|
|
}
|
|
|
|
// Convert output messages to candidates
|
|
if len(bifrostResp.Output) > 0 {
|
|
candidates := []*Candidate{}
|
|
|
|
// Group messages by their role to create candidates
|
|
var currentParts []*Part
|
|
var currentRole string
|
|
|
|
// Track which message indices have been consumed as thought signatures
|
|
consumedIndices := make(map[int]bool)
|
|
|
|
// Find last web_search_call and collect annotations and rendered_content for grounding metadata
|
|
var lastWebSearchCall *schemas.ResponsesMessage
|
|
var webSearchAnnotations []schemas.ResponsesOutputMessageContentTextAnnotation
|
|
var lastRenderedContent *string
|
|
for i := range bifrostResp.Output {
|
|
msg := &bifrostResp.Output[i]
|
|
if msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeWebSearchCall {
|
|
lastWebSearchCall = msg
|
|
consumedIndices[i] = true
|
|
}
|
|
// Collect annotations (typically in message after web search)
|
|
if msg.Content != nil && msg.Content.ContentBlocks != nil {
|
|
for _, block := range msg.Content.ContentBlocks {
|
|
if block.ResponsesOutputMessageContentText != nil && len(block.ResponsesOutputMessageContentText.Annotations) > 0 {
|
|
webSearchAnnotations = append(webSearchAnnotations, block.ResponsesOutputMessageContentText.Annotations...)
|
|
}
|
|
// Collect rendered_content
|
|
if block.Type == schemas.ResponsesOutputMessageContentTypeRenderedContent &&
|
|
block.ResponsesOutputMessageContentRenderedContent != nil &&
|
|
block.ResponsesOutputMessageContentRenderedContent.RenderedContent != "" {
|
|
lastRenderedContent = &block.ResponsesOutputMessageContentRenderedContent.RenderedContent
|
|
consumedIndices[i] = true // Mark this message as consumed
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for i, msg := range bifrostResp.Output {
|
|
// Skip web_search_call messages as they're converted to grounding metadata
|
|
if consumedIndices[i] {
|
|
continue
|
|
}
|
|
|
|
// Determine the role
|
|
role := "model" // default
|
|
if msg.Role != nil {
|
|
if *msg.Role == schemas.ResponsesInputMessageRoleUser {
|
|
role = "user"
|
|
}
|
|
}
|
|
|
|
// If we're starting a new candidate (role changed), save the previous one
|
|
if currentRole != "" && currentRole != role && len(currentParts) > 0 {
|
|
candidates = append(candidates, &Candidate{
|
|
Index: int32(len(candidates)),
|
|
Content: &Content{
|
|
Parts: currentParts,
|
|
Role: currentRole,
|
|
},
|
|
})
|
|
currentParts = []*Part{}
|
|
}
|
|
currentRole = role
|
|
|
|
// Convert message content to parts
|
|
if msg.Content != nil {
|
|
// Handle string content
|
|
if msg.Content.ContentStr != nil && *msg.Content.ContentStr != "" {
|
|
currentParts = append(currentParts, &Part{
|
|
Text: *msg.Content.ContentStr,
|
|
})
|
|
}
|
|
|
|
// Handle content blocks
|
|
if msg.Content.ContentBlocks != nil {
|
|
for _, block := range msg.Content.ContentBlocks {
|
|
part, err := convertContentBlockToGeminiPart(block)
|
|
if err == nil && part != nil {
|
|
currentParts = append(currentParts, part)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Handle tool calls (function calls)
|
|
if msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeFunctionCall && msg.ResponsesToolMessage != nil {
|
|
argsRaw := json.RawMessage("{}")
|
|
if msg.ResponsesToolMessage.Arguments != nil {
|
|
rawArgs := strings.TrimSpace(*msg.ResponsesToolMessage.Arguments)
|
|
if rawArgs == "" {
|
|
rawArgs = "{}"
|
|
}
|
|
var buf bytes.Buffer
|
|
if err := json.Compact(&buf, []byte(rawArgs)); err == nil {
|
|
argsRaw = buf.Bytes()
|
|
}
|
|
}
|
|
functionCall := &FunctionCall{
|
|
Args: argsRaw,
|
|
}
|
|
if msg.ResponsesToolMessage.Name != nil {
|
|
functionCall.Name = *msg.ResponsesToolMessage.Name
|
|
}
|
|
|
|
// Extract thought signature from CallID if present
|
|
var thoughtSignature []byte
|
|
if msg.ResponsesToolMessage.CallID != nil {
|
|
callID := *msg.ResponsesToolMessage.CallID
|
|
// Check if the ID contains a thought signature (format: "ToolName_ts_base64signature")
|
|
if strings.Contains(callID, thoughtSignatureSeparator) {
|
|
parts := strings.SplitN(callID, thoughtSignatureSeparator, 2)
|
|
if len(parts) == 2 {
|
|
// Try to decode the signature part
|
|
if decodedSig, err := base64.RawURLEncoding.DecodeString(parts[1]); err == nil {
|
|
thoughtSignature = decodedSig
|
|
}
|
|
}
|
|
}
|
|
functionCall.ID = callID
|
|
}
|
|
|
|
part := &Part{
|
|
FunctionCall: functionCall,
|
|
}
|
|
|
|
// Use thought signature from CallID if we extracted one
|
|
if len(thoughtSignature) > 0 {
|
|
part.ThoughtSignature = thoughtSignature
|
|
} else {
|
|
// Otherwise, look ahead to see if the next message is a reasoning message with encrypted content
|
|
// (thought signature for this function call)
|
|
if i+1 < len(bifrostResp.Output) {
|
|
nextMsg := bifrostResp.Output[i+1]
|
|
if nextMsg.Type != nil && *nextMsg.Type == schemas.ResponsesMessageTypeReasoning &&
|
|
nextMsg.ResponsesReasoning != nil && nextMsg.ResponsesReasoning.EncryptedContent != nil {
|
|
decodedSig, err := base64.StdEncoding.DecodeString(*nextMsg.ResponsesReasoning.EncryptedContent)
|
|
if err == nil {
|
|
part.ThoughtSignature = decodedSig
|
|
// Mark this reasoning message as consumed
|
|
consumedIndices[i+1] = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
currentParts = append(currentParts, part)
|
|
}
|
|
|
|
// Handle function responses (function call outputs)
|
|
if msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeFunctionCallOutput && msg.ResponsesToolMessage != nil {
|
|
responseMap := make(map[string]any)
|
|
|
|
if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesToolCallOutputStr != nil {
|
|
output := *msg.ResponsesToolMessage.Output.ResponsesToolCallOutputStr
|
|
if json.Valid([]byte(output)) {
|
|
responseMap["output"] = json.RawMessage(output)
|
|
} else {
|
|
responseMap["output"] = output
|
|
}
|
|
}
|
|
funcName := ""
|
|
if msg.ResponsesToolMessage.Name != nil && strings.TrimSpace(*msg.ResponsesToolMessage.Name) != "" {
|
|
funcName = *msg.ResponsesToolMessage.Name
|
|
} else if msg.ResponsesToolMessage.CallID != nil {
|
|
funcName = *msg.ResponsesToolMessage.CallID
|
|
}
|
|
|
|
responseBytes, _ := providerUtils.MarshalSorted(responseMap)
|
|
functionResponse := &FunctionResponse{
|
|
Name: funcName,
|
|
Response: json.RawMessage(responseBytes),
|
|
}
|
|
if msg.ResponsesToolMessage.CallID != nil {
|
|
functionResponse.ID = *msg.ResponsesToolMessage.CallID
|
|
}
|
|
|
|
currentParts = append(currentParts, &Part{
|
|
FunctionResponse: functionResponse,
|
|
})
|
|
}
|
|
|
|
// Handle reasoning messages
|
|
if msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeReasoning && msg.ResponsesReasoning != nil {
|
|
// Skip this reasoning message if it was already consumed as a thought signature
|
|
if consumedIndices[i] {
|
|
continue
|
|
}
|
|
|
|
// Reasoning content is in the Summary array
|
|
if len(msg.ResponsesReasoning.Summary) > 0 {
|
|
for _, summaryBlock := range msg.ResponsesReasoning.Summary {
|
|
if summaryBlock.Text != "" {
|
|
currentParts = append(currentParts, &Part{
|
|
Text: summaryBlock.Text,
|
|
Thought: true,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
if msg.ResponsesReasoning.EncryptedContent != nil {
|
|
decodedSig, err := base64.StdEncoding.DecodeString(*msg.ResponsesReasoning.EncryptedContent)
|
|
if err == nil {
|
|
currentParts = append(currentParts, &Part{
|
|
ThoughtSignature: decodedSig,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add the last candidate if we have parts
|
|
if len(currentParts) > 0 {
|
|
candidate := &Candidate{
|
|
Index: int32(len(candidates)),
|
|
Content: &Content{
|
|
Parts: currentParts,
|
|
Role: currentRole,
|
|
},
|
|
}
|
|
|
|
// Determine finish reason: prefer StopReason (Bifrost canonical), fall back to IncompleteDetails
|
|
if bifrostResp.StopReason != nil {
|
|
candidate.FinishReason = ConvertBifrostFinishReasonToGemini(*bifrostResp.StopReason)
|
|
} else if bifrostResp.IncompleteDetails != nil {
|
|
switch bifrostResp.IncompleteDetails.Reason {
|
|
case "max_tokens":
|
|
candidate.FinishReason = FinishReasonMaxTokens
|
|
case "content_filter":
|
|
candidate.FinishReason = FinishReasonSafety
|
|
default:
|
|
candidate.FinishReason = FinishReasonOther
|
|
}
|
|
} else {
|
|
candidate.FinishReason = FinishReasonStop
|
|
}
|
|
|
|
// Attach grounding metadata if web search was used
|
|
if lastWebSearchCall != nil {
|
|
candidate.GroundingMetadata = buildGroundingMetadataFromWebSearch(lastWebSearchCall, webSearchAnnotations, lastRenderedContent)
|
|
}
|
|
|
|
candidates = append(candidates, candidate)
|
|
}
|
|
|
|
geminiResp.Candidates = candidates
|
|
}
|
|
|
|
// Convert usage metadata
|
|
if bifrostResp.Usage != nil {
|
|
geminiResp.UsageMetadata = ConvertBifrostResponsesUsageToGeminiUsageMetadata(bifrostResp.Usage)
|
|
}
|
|
|
|
return geminiResp
|
|
}
|
|
|
|
// BifrostToGeminiStreamState tracks state when converting Bifrost streams to Gemini format
|
|
type BifrostToGeminiStreamState struct {
|
|
// Web search buffering
|
|
WebSearchCall *schemas.ResponsesMessage // Buffered web_search_call
|
|
Annotations []schemas.ResponsesOutputMessageContentTextAnnotation // Buffered annotations
|
|
RenderedContent *string // Buffered rendered content from search entry point
|
|
HasWebSearch bool // Whether we've seen web search
|
|
|
|
// Tool call tracking (for FunctionCallArgumentsDone events that don't include Item)
|
|
ToolCallNames map[int]string // Maps output_index to tool name
|
|
ToolCallIDs map[int]string // Maps output_index to tool call ID
|
|
}
|
|
|
|
// NewBifrostToGeminiStreamState creates a new state for Bifrost→Gemini streaming
|
|
func NewBifrostToGeminiStreamState() *BifrostToGeminiStreamState {
|
|
return &BifrostToGeminiStreamState{
|
|
Annotations: make([]schemas.ResponsesOutputMessageContentTextAnnotation, 0),
|
|
ToolCallNames: make(map[int]string),
|
|
ToolCallIDs: make(map[int]string),
|
|
}
|
|
}
|
|
|
|
func ToGeminiResponsesStreamResponse(bifrostResp *schemas.BifrostResponsesStreamResponse, state *BifrostToGeminiStreamState) *GenerateContentResponse {
|
|
if bifrostResp == nil {
|
|
return nil
|
|
}
|
|
|
|
// Initialize state if not provided (backward compatibility)
|
|
if state == nil {
|
|
state = NewBifrostToGeminiStreamState()
|
|
}
|
|
|
|
// Buffer web search call
|
|
if bifrostResp.Type == schemas.ResponsesStreamResponseTypeOutputItemDone &&
|
|
bifrostResp.Item != nil &&
|
|
bifrostResp.Item.Type != nil &&
|
|
*bifrostResp.Item.Type == schemas.ResponsesMessageTypeWebSearchCall {
|
|
state.WebSearchCall = bifrostResp.Item
|
|
state.HasWebSearch = true
|
|
return nil // Don't emit yet, wait for completion
|
|
}
|
|
|
|
// Buffer annotations
|
|
if bifrostResp.Type == schemas.ResponsesStreamResponseTypeOutputTextAnnotationAdded &&
|
|
bifrostResp.Annotation != nil {
|
|
state.Annotations = append(state.Annotations, *bifrostResp.Annotation)
|
|
return nil // Don't emit yet, wait for completion
|
|
}
|
|
|
|
// Buffer rendered_content messages
|
|
if bifrostResp.Type == schemas.ResponsesStreamResponseTypeOutputItemDone &&
|
|
bifrostResp.Item != nil &&
|
|
bifrostResp.Item.Content != nil &&
|
|
bifrostResp.Item.Content.ContentBlocks != nil {
|
|
for _, block := range bifrostResp.Item.Content.ContentBlocks {
|
|
if block.Type == schemas.ResponsesOutputMessageContentTypeRenderedContent &&
|
|
block.ResponsesOutputMessageContentRenderedContent != nil &&
|
|
block.ResponsesOutputMessageContentRenderedContent.RenderedContent != "" {
|
|
state.RenderedContent = &block.ResponsesOutputMessageContentRenderedContent.RenderedContent
|
|
return nil // Don't emit yet, wait for completion
|
|
}
|
|
}
|
|
}
|
|
|
|
// Skip lifecycle events that don't have corresponding Gemini equivalents
|
|
switch bifrostResp.Type {
|
|
case schemas.ResponsesStreamResponseTypePing,
|
|
schemas.ResponsesStreamResponseTypeCreated,
|
|
schemas.ResponsesStreamResponseTypeInProgress,
|
|
schemas.ResponsesStreamResponseTypeReasoningSummaryPartAdded,
|
|
schemas.ResponsesStreamResponseTypeQueued,
|
|
// Skip web search lifecycle events - buffered above
|
|
schemas.ResponsesStreamResponseTypeWebSearchCallInProgress,
|
|
schemas.ResponsesStreamResponseTypeWebSearchCallSearching,
|
|
schemas.ResponsesStreamResponseTypeWebSearchCallCompleted,
|
|
schemas.ResponsesStreamResponseTypeWebSearchCallResultsAdded,
|
|
schemas.ResponsesStreamResponseTypeWebSearchCallResultsCompleted:
|
|
// These are lifecycle events with no Gemini equivalent or are buffered
|
|
return nil
|
|
}
|
|
|
|
streamResp := &GenerateContentResponse{
|
|
Candidates: []*Candidate{
|
|
{
|
|
Content: &Content{
|
|
Parts: []*Part{},
|
|
Role: "model",
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
candidate := streamResp.Candidates[0]
|
|
|
|
switch bifrostResp.Type {
|
|
case schemas.ResponsesStreamResponseTypeOutputTextDelta:
|
|
if bifrostResp.Delta != nil && *bifrostResp.Delta != "" {
|
|
candidate.Content.Parts = append(candidate.Content.Parts, &Part{
|
|
Text: *bifrostResp.Delta,
|
|
})
|
|
}
|
|
|
|
case schemas.ResponsesStreamResponseTypeReasoningSummaryTextDelta:
|
|
if bifrostResp.Delta != nil && *bifrostResp.Delta != "" {
|
|
candidate.Content.Parts = append(candidate.Content.Parts, &Part{
|
|
Text: *bifrostResp.Delta,
|
|
Thought: true,
|
|
})
|
|
}
|
|
|
|
case schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDelta:
|
|
// For streaming, we'll accumulate these, but Gemini typically sends complete calls
|
|
// We'll return nil here and let the done event handle it
|
|
return nil
|
|
|
|
// Function call completed
|
|
case schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDone:
|
|
// Handle arguments from either Item.ResponsesToolMessage or directly from Arguments field
|
|
var argsStr *string
|
|
var name *string
|
|
var callID *string
|
|
|
|
if bifrostResp.Item != nil && bifrostResp.Item.ResponsesToolMessage != nil {
|
|
argsStr = bifrostResp.Item.ResponsesToolMessage.Arguments
|
|
name = bifrostResp.Item.ResponsesToolMessage.Name
|
|
callID = bifrostResp.Item.ResponsesToolMessage.CallID
|
|
}
|
|
if argsStr == nil && bifrostResp.Arguments != nil {
|
|
// Some providers (e.g., Anthropic) send Arguments directly on the response
|
|
argsStr = bifrostResp.Arguments
|
|
// Try to get name and callID from state if available
|
|
if state != nil {
|
|
outputIndex := 0
|
|
if bifrostResp.OutputIndex != nil {
|
|
outputIndex = *bifrostResp.OutputIndex
|
|
}
|
|
if name == nil {
|
|
if n, ok := state.ToolCallNames[outputIndex]; ok {
|
|
name = &n
|
|
}
|
|
}
|
|
if callID == nil {
|
|
if id, ok := state.ToolCallIDs[outputIndex]; ok {
|
|
callID = &id
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if argsStr != nil {
|
|
rawArgs := strings.TrimSpace(*argsStr)
|
|
if rawArgs == "" {
|
|
rawArgs = "{}"
|
|
}
|
|
var argsRaw json.RawMessage
|
|
var buf bytes.Buffer
|
|
if err := json.Compact(&buf, []byte(rawArgs)); err == nil {
|
|
argsRaw = buf.Bytes()
|
|
} else {
|
|
argsRaw = json.RawMessage("{}")
|
|
}
|
|
functionCall := &FunctionCall{
|
|
Name: "",
|
|
Args: argsRaw,
|
|
}
|
|
if name != nil {
|
|
functionCall.Name = *name
|
|
}
|
|
|
|
var thoughtSig string
|
|
if callID != nil {
|
|
// Extract thought signature from CallID if present
|
|
if strings.Contains(*callID, thoughtSignatureSeparator) {
|
|
parts := strings.SplitN(*callID, thoughtSignatureSeparator, 2)
|
|
if len(parts) == 2 {
|
|
thoughtSig = parts[1]
|
|
}
|
|
}
|
|
functionCall.ID = *callID
|
|
}
|
|
functionCallPart := &Part{
|
|
FunctionCall: functionCall,
|
|
}
|
|
if thoughtSig != "" {
|
|
if decodedSig, err := base64.RawURLEncoding.DecodeString(thoughtSig); err == nil {
|
|
functionCallPart.ThoughtSignature = decodedSig
|
|
}
|
|
}
|
|
candidate.Content.Parts = append(candidate.Content.Parts, functionCallPart)
|
|
}
|
|
|
|
case schemas.ResponsesStreamResponseTypeOutputTextDone:
|
|
// Text was already streamed via OutputTextDelta chunks, skip to avoid duplication
|
|
return nil
|
|
|
|
case schemas.ResponsesStreamResponseTypeReasoningSummaryTextDone,
|
|
schemas.ResponsesStreamResponseTypeReasoningSummaryPartDone:
|
|
// Already handled via deltas, skip
|
|
return nil
|
|
case schemas.ResponsesStreamResponseTypeOutputItemAdded:
|
|
if bifrostResp.Item != nil && bifrostResp.Item.ResponsesReasoning != nil && bifrostResp.Item.EncryptedContent != nil {
|
|
candidate.Content.Parts = append(candidate.Content.Parts, &Part{
|
|
ThoughtSignature: []byte(*bifrostResp.Item.ResponsesReasoning.EncryptedContent),
|
|
})
|
|
}
|
|
// Track function call metadata for later use in FunctionCallArgumentsDone
|
|
if bifrostResp.Item != nil && bifrostResp.Item.Type != nil &&
|
|
*bifrostResp.Item.Type == schemas.ResponsesMessageTypeFunctionCall &&
|
|
bifrostResp.Item.ResponsesToolMessage != nil {
|
|
outputIndex := 0
|
|
if bifrostResp.OutputIndex != nil {
|
|
outputIndex = *bifrostResp.OutputIndex
|
|
}
|
|
if bifrostResp.Item.ResponsesToolMessage.Name != nil {
|
|
state.ToolCallNames[outputIndex] = *bifrostResp.Item.ResponsesToolMessage.Name
|
|
}
|
|
if bifrostResp.Item.ResponsesToolMessage.CallID != nil {
|
|
state.ToolCallIDs[outputIndex] = *bifrostResp.Item.ResponsesToolMessage.CallID
|
|
}
|
|
}
|
|
return nil
|
|
|
|
case schemas.ResponsesStreamResponseTypeOutputItemDone:
|
|
return nil
|
|
|
|
case schemas.ResponsesStreamResponseTypeContentPartAdded:
|
|
// Handle content parts that contain images, audio, or files
|
|
if bifrostResp.Part != nil {
|
|
part, err := convertContentBlockToGeminiPart(*bifrostResp.Part)
|
|
if err == nil && part != nil {
|
|
candidate.Content.Parts = append(candidate.Content.Parts, part)
|
|
}
|
|
}
|
|
|
|
case schemas.ResponsesStreamResponseTypeContentPartDone:
|
|
// Already handled via ContentPartAdded
|
|
return nil
|
|
|
|
case schemas.ResponsesStreamResponseTypeCompleted:
|
|
if bifrostResp.Response != nil {
|
|
// Set model version if available
|
|
if bifrostResp.Response.Model != "" {
|
|
streamResp.ModelVersion = bifrostResp.Response.Model
|
|
}
|
|
|
|
// Convert usage metadata if available
|
|
if bifrostResp.Response.Usage != nil {
|
|
streamResp.UsageMetadata = ConvertBifrostResponsesUsageToGeminiUsageMetadata(bifrostResp.Response.Usage)
|
|
}
|
|
|
|
// Derive finish reason from StopReason when present
|
|
if bifrostResp.Response.StopReason != nil {
|
|
candidate.FinishReason = ConvertBifrostFinishReasonToGemini(*bifrostResp.Response.StopReason)
|
|
} else {
|
|
candidate.FinishReason = FinishReasonStop
|
|
}
|
|
|
|
// Attach grounding metadata if we buffered web search data
|
|
if state.HasWebSearch && state.WebSearchCall != nil {
|
|
candidate.GroundingMetadata = buildGroundingMetadataFromWebSearch(state.WebSearchCall, state.Annotations, state.RenderedContent)
|
|
}
|
|
}
|
|
|
|
// Response failed
|
|
case schemas.ResponsesStreamResponseTypeFailed:
|
|
candidate.FinishReason = FinishReasonOther
|
|
if bifrostResp.Response != nil && bifrostResp.Response.Error != nil {
|
|
streamResp.PromptFeedback = &GenerateContentResponsePromptFeedback{
|
|
BlockReason: "ERROR",
|
|
BlockReasonMessage: bifrostResp.Response.Error.Message,
|
|
}
|
|
}
|
|
|
|
// Refusal
|
|
case schemas.ResponsesStreamResponseTypeRefusalDelta:
|
|
if bifrostResp.Delta != nil && *bifrostResp.Delta != "" {
|
|
candidate.Content.Parts = append(candidate.Content.Parts, &Part{
|
|
Text: *bifrostResp.Delta,
|
|
})
|
|
}
|
|
|
|
case schemas.ResponsesStreamResponseTypeRefusalDone:
|
|
if bifrostResp.Refusal != nil && *bifrostResp.Refusal != "" {
|
|
candidate.FinishReason = FinishReasonSafety
|
|
}
|
|
|
|
default:
|
|
// For any other event types we don't explicitly handle, return nil
|
|
return nil
|
|
}
|
|
|
|
// If we didn't add any parts and there's no metadata, return nil
|
|
if len(candidate.Content.Parts) == 0 && streamResp.UsageMetadata == nil &&
|
|
streamResp.PromptFeedback == nil && candidate.FinishReason == "" {
|
|
return nil
|
|
}
|
|
|
|
return streamResp
|
|
}
|
|
|
|
// GeminiResponsesStreamState tracks state during streaming conversion for responses API
|
|
type GeminiResponsesStreamState struct {
|
|
// Lifecycle flags
|
|
HasEmittedCreated bool // Whether response.created has been sent
|
|
HasEmittedInProgress bool // Whether response.in_progress has been sent
|
|
HasEmittedCompleted bool // Whether response.completed has been sent
|
|
|
|
// Item tracking
|
|
CurrentOutputIndex int // Current output index counter
|
|
TextOutputIndex int // Output index of the current text item (cached for reuse)
|
|
ItemIDs map[int]string // Maps output_index to item ID
|
|
TextItemClosed bool // Whether text item has been closed
|
|
|
|
// Tool call tracking
|
|
ToolCallIDs map[int]string // Maps output_index to tool call ID
|
|
ToolCallNames map[int]string // Maps output_index to tool name
|
|
ToolArgumentBuffers map[int]string // Accumulates tool arguments as JSON
|
|
|
|
// Response metadata
|
|
MessageID *string // Generated message ID
|
|
Model *string // Model version
|
|
CreatedAt int // Timestamp for consistency
|
|
ResponseID *string // Gemini's responseId
|
|
|
|
// Content tracking
|
|
HasStartedText bool // Whether we've started text content
|
|
HasStartedToolCall bool // Whether we've started a tool call
|
|
TextBuffer strings.Builder // Accumulates text deltas for output_text.done
|
|
|
|
// Web search tracking
|
|
HasEmittedWebSearch bool // Whether web_search_call events have been emitted
|
|
}
|
|
|
|
// geminiResponsesStreamStatePool provides a pool for Gemini responses stream state objects.
|
|
var geminiResponsesStreamStatePool = sync.Pool{
|
|
New: func() interface{} {
|
|
return &GeminiResponsesStreamState{
|
|
ItemIDs: make(map[int]string),
|
|
ToolCallIDs: make(map[int]string),
|
|
ToolCallNames: make(map[int]string),
|
|
ToolArgumentBuffers: make(map[int]string),
|
|
CurrentOutputIndex: 0,
|
|
TextOutputIndex: -1,
|
|
CreatedAt: int(time.Now().Unix()),
|
|
HasEmittedCreated: false,
|
|
HasEmittedInProgress: false,
|
|
HasEmittedCompleted: false,
|
|
TextItemClosed: false,
|
|
HasStartedText: false,
|
|
HasStartedToolCall: false,
|
|
HasEmittedWebSearch: false,
|
|
}
|
|
},
|
|
}
|
|
|
|
// acquireGeminiResponsesStreamState gets a Gemini responses stream state from the pool.
|
|
func acquireGeminiResponsesStreamState() *GeminiResponsesStreamState {
|
|
state := geminiResponsesStreamStatePool.Get().(*GeminiResponsesStreamState)
|
|
state.flush()
|
|
return state
|
|
}
|
|
|
|
// releaseGeminiResponsesStreamState returns a Gemini responses stream state to the pool.
|
|
func releaseGeminiResponsesStreamState(state *GeminiResponsesStreamState) {
|
|
if state != nil {
|
|
state.flush()
|
|
geminiResponsesStreamStatePool.Put(state)
|
|
}
|
|
}
|
|
|
|
func (state *GeminiResponsesStreamState) flush() {
|
|
// Clear maps
|
|
if state.ItemIDs == nil {
|
|
state.ItemIDs = make(map[int]string)
|
|
} else {
|
|
clear(state.ItemIDs)
|
|
}
|
|
if state.ToolCallIDs == nil {
|
|
state.ToolCallIDs = make(map[int]string)
|
|
} else {
|
|
clear(state.ToolCallIDs)
|
|
}
|
|
if state.ToolCallNames == nil {
|
|
state.ToolCallNames = make(map[int]string)
|
|
} else {
|
|
clear(state.ToolCallNames)
|
|
}
|
|
if state.ToolArgumentBuffers == nil {
|
|
state.ToolArgumentBuffers = make(map[int]string)
|
|
} else {
|
|
clear(state.ToolArgumentBuffers)
|
|
}
|
|
state.CurrentOutputIndex = 0
|
|
state.TextOutputIndex = -1
|
|
state.MessageID = nil
|
|
state.Model = nil
|
|
state.ResponseID = nil
|
|
state.CreatedAt = int(time.Now().Unix())
|
|
state.HasEmittedCreated = false
|
|
state.HasEmittedCompleted = false
|
|
state.HasEmittedInProgress = false
|
|
state.TextItemClosed = false
|
|
state.HasStartedText = false
|
|
state.HasStartedToolCall = false
|
|
state.TextBuffer.Reset()
|
|
}
|
|
|
|
// closeTextItemIfOpen closes the text item if it's open and returns the responses.
|
|
// Returns nil if no text item was open.
|
|
func (state *GeminiResponsesStreamState) closeTextItemIfOpen(sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
if state.HasStartedText && !state.TextItemClosed {
|
|
return closeGeminiTextItem(state, sequenceNumber)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// nextOutputIndex returns the current output index and increments it for the next use.
|
|
func (state *GeminiResponsesStreamState) nextOutputIndex() int {
|
|
index := state.CurrentOutputIndex
|
|
state.CurrentOutputIndex++
|
|
return index
|
|
}
|
|
|
|
// generateItemID creates a unique item ID with the given suffix.
|
|
// Falls back to index-based ID if MessageID is nil.
|
|
func (state *GeminiResponsesStreamState) generateItemID(suffix string, outputIndex int) string {
|
|
if state.MessageID != nil {
|
|
return fmt.Sprintf("msg_%s_%s_%d", *state.MessageID, suffix, outputIndex)
|
|
}
|
|
return fmt.Sprintf("%s_%d", suffix, outputIndex)
|
|
}
|
|
|
|
// ToBifrostResponsesStream converts a Gemini stream event to Bifrost Responses Stream responses
|
|
func (response *GenerateContentResponse) ToBifrostResponsesStream(sequenceNumber int, state *GeminiResponsesStreamState) ([]*schemas.BifrostResponsesStreamResponse, *schemas.BifrostError) {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
// First event: Emit response.created and response.in_progress
|
|
if !state.HasEmittedCreated {
|
|
// Generate message ID
|
|
if state.MessageID == nil {
|
|
messageID := fmt.Sprintf("msg_%d", state.CreatedAt)
|
|
state.MessageID = &messageID
|
|
}
|
|
|
|
// Set model and response ID from Gemini
|
|
if response.ModelVersion != "" && state.Model == nil {
|
|
state.Model = &response.ModelVersion
|
|
}
|
|
if response.ResponseID != "" && state.ResponseID == nil {
|
|
state.ResponseID = &response.ResponseID
|
|
}
|
|
|
|
// Emit response.created
|
|
createdResp := &schemas.BifrostResponsesResponse{
|
|
ID: state.MessageID,
|
|
CreatedAt: state.CreatedAt,
|
|
}
|
|
if state.Model != nil {
|
|
createdResp.Model = *state.Model
|
|
}
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeCreated,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
Response: createdResp,
|
|
})
|
|
state.HasEmittedCreated = true
|
|
|
|
// Emit response.in_progress
|
|
inProgressResp := &schemas.BifrostResponsesResponse{
|
|
ID: state.MessageID,
|
|
CreatedAt: state.CreatedAt,
|
|
}
|
|
if state.Model != nil {
|
|
inProgressResp.Model = *state.Model
|
|
}
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeInProgress,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
Response: inProgressResp,
|
|
})
|
|
state.HasEmittedInProgress = true
|
|
}
|
|
|
|
// Process candidates
|
|
if len(response.Candidates) > 0 {
|
|
candidate := response.Candidates[0]
|
|
|
|
if candidate.Content != nil && len(candidate.Content.Parts) > 0 {
|
|
for _, part := range candidate.Content.Parts {
|
|
partResponses := processGeminiPart(part, state, sequenceNumber+len(responses))
|
|
responses = append(responses, partResponses...)
|
|
}
|
|
}
|
|
|
|
// Check for finish reason (indicates end of generation)
|
|
// Only close if we've actually started emitting content (text, tool calls, etc.)
|
|
// This prevents emitting response.completed for empty chunks with just finishReason
|
|
if candidate.FinishReason != "" && len(state.ItemIDs) > 0 {
|
|
// Check for grounding metadata (web search results)
|
|
if candidate.GroundingMetadata != nil && !state.HasEmittedWebSearch {
|
|
// Emit web search events before closing
|
|
webSearchResponses := emitWebSearchFromGroundingMetadata(
|
|
candidate.GroundingMetadata,
|
|
state,
|
|
sequenceNumber+len(responses),
|
|
)
|
|
responses = append(responses, webSearchResponses...)
|
|
}
|
|
|
|
// Close any open items
|
|
closeResponses := closeGeminiOpenItems(state, candidate.GroundingMetadata, response.UsageMetadata, sequenceNumber+len(responses), candidate.FinishReason, candidate.FinishMessage)
|
|
responses = append(responses, closeResponses...)
|
|
}
|
|
}
|
|
|
|
return responses, nil
|
|
}
|
|
|
|
// processGeminiPart processes a single Gemini part and returns appropriate lifecycle events
|
|
func processGeminiPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
switch {
|
|
case part.Thought && part.Text != "":
|
|
// Reasoning/thinking content
|
|
responses = append(responses, processGeminiThoughtPart(part, state, sequenceNumber)...)
|
|
case part.Text != "" && !part.Thought:
|
|
// Regular text content
|
|
responses = append(responses, processGeminiTextPart(part, state, sequenceNumber)...)
|
|
|
|
case part.FunctionCall != nil:
|
|
// Function call
|
|
responses = append(responses, processGeminiFunctionCallPart(part, state, sequenceNumber)...)
|
|
|
|
case part.ThoughtSignature != nil:
|
|
// Encrypted reasoning content (thoughtSignature)
|
|
responses = append(responses, processGeminiThoughtSignaturePart(part, state, sequenceNumber)...)
|
|
|
|
case part.FunctionResponse != nil:
|
|
// Function response (tool result)
|
|
responses = append(responses, processGeminiFunctionResponsePart(part, state, sequenceNumber)...)
|
|
case part.InlineData != nil:
|
|
// Inline data
|
|
responses = append(responses, processGeminiInlineDataPart(part, state, sequenceNumber)...)
|
|
case part.FileData != nil:
|
|
// File data
|
|
responses = append(responses, processGeminiFileDataPart(part, state, sequenceNumber)...)
|
|
}
|
|
|
|
return responses
|
|
}
|
|
|
|
// processGeminiTextPart handles regular text parts
|
|
func processGeminiTextPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
var outputIndex int
|
|
// If this is the first text, emit output_item.added and content_part.added
|
|
if !state.HasStartedText {
|
|
outputIndex = state.nextOutputIndex()
|
|
state.TextOutputIndex = outputIndex // Cache the text item's output index
|
|
itemID := state.generateItemID("item", outputIndex)
|
|
state.ItemIDs[outputIndex] = itemID
|
|
|
|
// Emit output_item.added
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Status: schemas.Ptr("in_progress"),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{},
|
|
},
|
|
},
|
|
})
|
|
|
|
// Emit content_part.added
|
|
contentIndex := 0
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeContentPartAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Part: &schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesOutputMessageContentTypeText,
|
|
Text: schemas.Ptr(""),
|
|
ResponsesOutputMessageContentText: &schemas.ResponsesOutputMessageContentText{
|
|
LogProbs: []schemas.ResponsesOutputMessageContentTextLogProb{},
|
|
Annotations: []schemas.ResponsesOutputMessageContentTextAnnotation{},
|
|
},
|
|
},
|
|
})
|
|
|
|
state.HasStartedText = true
|
|
} else {
|
|
// Text already started, reuse the cached text item's output index
|
|
outputIndex = state.TextOutputIndex
|
|
}
|
|
|
|
// Emit output_text.delta for the text content
|
|
if part.Text != "" {
|
|
itemID := state.ItemIDs[outputIndex]
|
|
contentIndex := 0
|
|
text := part.Text
|
|
|
|
// Accumulate text for output_text.done
|
|
state.TextBuffer.WriteString(text)
|
|
|
|
streamResponse := &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputTextDelta,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Delta: &text,
|
|
LogProbs: []schemas.ResponsesOutputMessageContentTextLogProb{},
|
|
}
|
|
if len(part.ThoughtSignature) > 0 {
|
|
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
|
|
streamResponse.Signature = &thoughtSig
|
|
}
|
|
|
|
responses = append(responses, streamResponse)
|
|
}
|
|
|
|
return responses
|
|
}
|
|
|
|
// processGeminiThoughtPart handles reasoning/thought parts
|
|
func processGeminiThoughtPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
// Close text item if open
|
|
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
|
|
responses = append(responses, closeResponses...)
|
|
}
|
|
|
|
// For Gemini thoughts/reasoning, we emit them as reasoning summary text deltas
|
|
outputIndex := state.nextOutputIndex()
|
|
itemID := state.generateItemID("reasoning", outputIndex)
|
|
state.ItemIDs[outputIndex] = itemID
|
|
|
|
// Emit output_item.added for reasoning
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
},
|
|
})
|
|
|
|
// Emit reasoning summary part added
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeReasoningSummaryPartAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
})
|
|
|
|
// Emit reasoning summary text delta with the thought content
|
|
if part.Text != "" {
|
|
text := part.Text
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeReasoningSummaryTextDelta,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Delta: &text,
|
|
})
|
|
}
|
|
|
|
// Emit reasoning summary text done
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeReasoningSummaryTextDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
})
|
|
|
|
// Emit reasoning summary part done
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeReasoningSummaryPartDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
})
|
|
|
|
// Emit output_item.done for reasoning
|
|
statusCompleted := "completed"
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Status: &statusCompleted,
|
|
ResponsesReasoning: &schemas.ResponsesReasoning{
|
|
Summary: []schemas.ResponsesReasoningSummary{},
|
|
},
|
|
},
|
|
})
|
|
|
|
return responses
|
|
}
|
|
|
|
// processGeminiThoughtSignaturePart handles encrypted reasoning content (thoughtSignature)
|
|
func processGeminiThoughtSignaturePart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
// Close text item if open
|
|
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
|
|
responses = append(responses, closeResponses...)
|
|
}
|
|
|
|
// Create a new reasoning item for the thought signature
|
|
outputIndex := state.nextOutputIndex()
|
|
itemID := state.generateItemID("reasoning", outputIndex)
|
|
state.ItemIDs[outputIndex] = itemID
|
|
|
|
// Convert thoughtSignature to string
|
|
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
|
|
|
|
// Emit output_item.added for reasoning with encrypted content
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
ResponsesReasoning: &schemas.ResponsesReasoning{
|
|
Summary: []schemas.ResponsesReasoningSummary{},
|
|
EncryptedContent: &thoughtSig,
|
|
},
|
|
},
|
|
})
|
|
|
|
// Emit output_item.done for reasoning (thought signature is complete)
|
|
statusCompleted := "completed"
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Status: &statusCompleted,
|
|
ResponsesReasoning: &schemas.ResponsesReasoning{
|
|
Summary: []schemas.ResponsesReasoningSummary{},
|
|
EncryptedContent: &thoughtSig,
|
|
},
|
|
},
|
|
})
|
|
|
|
return responses
|
|
}
|
|
|
|
// processGeminiFunctionCallPart handles function call parts
|
|
func processGeminiFunctionCallPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
// Close text item if open
|
|
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
|
|
responses = append(responses, closeResponses...)
|
|
}
|
|
|
|
// Start new function call item
|
|
outputIndex := state.nextOutputIndex()
|
|
|
|
toolUseID := part.FunctionCall.ID
|
|
if toolUseID == "" {
|
|
toolUseID = part.FunctionCall.Name // Fallback to name as ID
|
|
}
|
|
|
|
state.ItemIDs[outputIndex] = toolUseID
|
|
state.ToolCallIDs[outputIndex] = toolUseID
|
|
state.ToolCallNames[outputIndex] = part.FunctionCall.Name
|
|
|
|
// Convert args to JSON string
|
|
argsJSON := ""
|
|
if len(part.FunctionCall.Args) > 0 {
|
|
argsJSON = string(part.FunctionCall.Args)
|
|
}
|
|
state.ToolArgumentBuffers[outputIndex] = argsJSON
|
|
|
|
// Attach thought signature to ID if present
|
|
if len(part.ThoughtSignature) > 0 && !strings.Contains(toolUseID, thoughtSignatureSeparator) {
|
|
encoded := base64.RawURLEncoding.EncodeToString(part.ThoughtSignature)
|
|
toolUseID = fmt.Sprintf("%s%s%s", toolUseID, thoughtSignatureSeparator, encoded)
|
|
}
|
|
|
|
// Emit output_item.added for function call
|
|
status := "in_progress"
|
|
addedEvent := &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &toolUseID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &toolUseID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
|
|
Status: &status,
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
CallID: &toolUseID,
|
|
Name: &part.FunctionCall.Name,
|
|
Arguments: schemas.Ptr(""),
|
|
},
|
|
},
|
|
}
|
|
|
|
responses = append(responses, addedEvent)
|
|
|
|
// Generate synthetic argument deltas to simulate streaming behavior
|
|
if argsJSON != "" {
|
|
deltaEvents := generateSyntheticFunctionCallArgumentDeltas(
|
|
argsJSON,
|
|
&outputIndex,
|
|
&toolUseID,
|
|
sequenceNumber+len(responses),
|
|
)
|
|
responses = append(responses, deltaEvents...)
|
|
}
|
|
|
|
// Gemini sends complete function calls, so emit done event after synthetic deltas
|
|
doneEvent := &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &toolUseID,
|
|
Arguments: &argsJSON,
|
|
Item: &schemas.ResponsesMessage{
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
CallID: &toolUseID,
|
|
Name: &part.FunctionCall.Name,
|
|
},
|
|
},
|
|
}
|
|
|
|
responses = append(responses, doneEvent)
|
|
|
|
outputItemDone := &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &toolUseID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &toolUseID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
|
|
Status: schemas.Ptr("completed"),
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
CallID: &toolUseID,
|
|
Name: &part.FunctionCall.Name,
|
|
Arguments: &argsJSON,
|
|
},
|
|
},
|
|
}
|
|
|
|
responses = append(responses, outputItemDone)
|
|
|
|
delete(state.ToolArgumentBuffers, outputIndex)
|
|
|
|
state.HasStartedToolCall = true
|
|
|
|
return responses
|
|
}
|
|
|
|
// processGeminiFunctionResponsePart handles function response (tool result) parts
|
|
func processGeminiFunctionResponsePart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
// Close text item if open
|
|
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
|
|
responses = append(responses, closeResponses...)
|
|
}
|
|
|
|
// Extract output from function response
|
|
output := extractFunctionResponseOutput(part.FunctionResponse)
|
|
|
|
// Create new output item for the function response
|
|
outputIndex := state.nextOutputIndex()
|
|
|
|
responseID := part.FunctionResponse.ID
|
|
if responseID == "" {
|
|
responseID = part.FunctionResponse.Name // Fallback to name
|
|
}
|
|
|
|
itemID := fmt.Sprintf("func_resp_%s", responseID)
|
|
state.ItemIDs[outputIndex] = itemID
|
|
|
|
// Emit output_item.added for function call output
|
|
status := "completed"
|
|
item := &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Status: &status,
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
CallID: &responseID,
|
|
Output: &schemas.ResponsesToolMessageOutputStruct{
|
|
ResponsesToolCallOutputStr: &output,
|
|
},
|
|
},
|
|
}
|
|
|
|
// Set tool name if present
|
|
if name := strings.TrimSpace(part.FunctionResponse.Name); name != "" {
|
|
item.ResponsesToolMessage.Name = schemas.Ptr(name)
|
|
}
|
|
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: item,
|
|
})
|
|
|
|
// Immediately emit output_item.done since function responses are complete
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Status: &status,
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
CallID: &responseID,
|
|
Output: &schemas.ResponsesToolMessageOutputStruct{
|
|
ResponsesToolCallOutputStr: &output,
|
|
},
|
|
},
|
|
},
|
|
})
|
|
// Add tool name if present
|
|
if name := strings.TrimSpace(part.FunctionResponse.Name); name != "" {
|
|
last := responses[len(responses)-1]
|
|
if last.Item != nil && last.Item.ResponsesToolMessage != nil {
|
|
last.Item.ResponsesToolMessage.Name = schemas.Ptr(name)
|
|
}
|
|
}
|
|
|
|
return responses
|
|
}
|
|
|
|
// processGeminiInlineDataPart handles inline data parts
|
|
func processGeminiInlineDataPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
// Close text item if open
|
|
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
|
|
responses = append(responses, closeResponses...)
|
|
}
|
|
|
|
// Convert inline data to content block
|
|
block := convertGeminiInlineDataToContentBlock(part.InlineData)
|
|
if block == nil {
|
|
return responses
|
|
}
|
|
|
|
// Create new output item for the inline data
|
|
outputIndex := state.nextOutputIndex()
|
|
itemID := state.generateItemID("item", outputIndex)
|
|
state.ItemIDs[outputIndex] = itemID
|
|
|
|
// Emit output_item.added with the inline data content block
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{*block},
|
|
},
|
|
},
|
|
})
|
|
|
|
// Emit content_part.added
|
|
contentIndex := 0
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeContentPartAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Part: block,
|
|
})
|
|
|
|
// Emit content_part.done
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeContentPartDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Part: block,
|
|
})
|
|
|
|
// Emit output_item.done
|
|
statusCompleted := "completed"
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Status: &statusCompleted,
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{},
|
|
},
|
|
},
|
|
})
|
|
|
|
return responses
|
|
}
|
|
|
|
// processGeminiFileDataPart handles file data parts
|
|
func processGeminiFileDataPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
// Close text item if open
|
|
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
|
|
responses = append(responses, closeResponses...)
|
|
}
|
|
|
|
// Convert file data to content block
|
|
block := convertGeminiFileDataToContentBlock(part.FileData)
|
|
if block == nil {
|
|
return responses
|
|
}
|
|
|
|
// Create new output item for the file data
|
|
outputIndex := state.nextOutputIndex()
|
|
itemID := state.generateItemID("item", outputIndex)
|
|
state.ItemIDs[outputIndex] = itemID
|
|
|
|
// Emit output_item.added with the file data content block
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{*block},
|
|
},
|
|
},
|
|
})
|
|
|
|
// Emit content_part.added
|
|
contentIndex := 0
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeContentPartAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Part: block,
|
|
})
|
|
|
|
// Emit content_part.done
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeContentPartDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Part: block,
|
|
})
|
|
|
|
// Emit output_item.done
|
|
statusCompleted := "completed"
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Status: &statusCompleted,
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{},
|
|
},
|
|
},
|
|
})
|
|
|
|
return responses
|
|
}
|
|
|
|
// closeGeminiTextItem closes the text item and emits appropriate done events
|
|
func closeGeminiTextItem(state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
outputIndex := state.TextOutputIndex
|
|
itemID := state.ItemIDs[outputIndex]
|
|
contentIndex := 0
|
|
|
|
// Emit output_text.done
|
|
fullText := state.TextBuffer.String()
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputTextDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Text: &fullText,
|
|
LogProbs: []schemas.ResponsesOutputMessageContentTextLogProb{},
|
|
})
|
|
|
|
// Emit content_part.done
|
|
part := &schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesOutputMessageContentTypeText,
|
|
Text: schemas.Ptr(""),
|
|
ResponsesOutputMessageContentText: &schemas.ResponsesOutputMessageContentText{
|
|
LogProbs: []schemas.ResponsesOutputMessageContentTextLogProb{},
|
|
Annotations: []schemas.ResponsesOutputMessageContentTextAnnotation{},
|
|
},
|
|
}
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeContentPartDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Part: part,
|
|
})
|
|
|
|
// Emit output_item.done
|
|
doneItem := &schemas.ResponsesMessage{
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Status: schemas.Ptr("completed"),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{},
|
|
},
|
|
}
|
|
if itemID != "" {
|
|
doneItem.ID = &itemID
|
|
}
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: doneItem,
|
|
})
|
|
|
|
state.TextItemClosed = true
|
|
|
|
return responses
|
|
}
|
|
|
|
// closeGeminiOpenItems closes any open items and emits the final completed event
|
|
func closeGeminiOpenItems(state *GeminiResponsesStreamState, groundingMetadata *GroundingMetadata, usage *GenerateContentResponseUsageMetadata, sequenceNumber int, finishReason FinishReason, finishMessage string) []*schemas.BifrostResponsesStreamResponse {
|
|
if state.HasEmittedCompleted {
|
|
return nil
|
|
}
|
|
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
// Close text item if still open
|
|
if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil {
|
|
responses = append(responses, closeResponses...)
|
|
}
|
|
|
|
// Emit annotations from grounding supports if present
|
|
if groundingMetadata != nil && len(groundingMetadata.GroundingSupports) > 0 && state.TextOutputIndex >= 0 {
|
|
annotationResponses := emitAnnotationsFromGroundingSupports(
|
|
groundingMetadata,
|
|
state,
|
|
sequenceNumber+len(responses),
|
|
)
|
|
responses = append(responses, annotationResponses...)
|
|
}
|
|
|
|
// Close any open tool calls
|
|
for outputIndex := range state.ToolArgumentBuffers {
|
|
itemID := state.ItemIDs[outputIndex]
|
|
toolCallID := state.ToolCallIDs[outputIndex]
|
|
toolName := state.ToolCallNames[outputIndex]
|
|
toolArgs := state.ToolArgumentBuffers[outputIndex]
|
|
if strings.TrimSpace(toolName) == "" {
|
|
toolName = toolCallID
|
|
}
|
|
|
|
// Emit output_item.done for tool call
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
|
|
Status: schemas.Ptr("completed"),
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
CallID: &toolCallID,
|
|
Name: &toolName,
|
|
Arguments: &toolArgs,
|
|
},
|
|
},
|
|
})
|
|
}
|
|
|
|
// For error finish reasons with a finish message, emit the error as text content BEFORE completed event
|
|
// This ensures the error message is visible to the client
|
|
if isErrorFinishReason(finishReason) && finishMessage != "" {
|
|
errorText := fmt.Sprintf("Error: %s - %s", finishReason, finishMessage)
|
|
outputIndex := state.nextOutputIndex()
|
|
itemID := state.generateItemID("error", outputIndex)
|
|
state.ItemIDs[outputIndex] = itemID
|
|
contentIndex := 0
|
|
|
|
// Emit output_item.added for error message
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Status: schemas.Ptr("in_progress"),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{},
|
|
},
|
|
},
|
|
})
|
|
|
|
// Emit content_part.added
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeContentPartAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Part: &schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesOutputMessageContentTypeText,
|
|
Text: schemas.Ptr(""),
|
|
},
|
|
})
|
|
|
|
// Emit output_text.delta with the error message
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputTextDelta,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Delta: &errorText,
|
|
})
|
|
|
|
// Emit output_text.done
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputTextDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Text: &errorText,
|
|
})
|
|
|
|
// Emit content_part.done
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeContentPartDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: &contentIndex,
|
|
ItemID: &itemID,
|
|
Part: &schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesOutputMessageContentTypeText,
|
|
Text: &errorText,
|
|
},
|
|
})
|
|
|
|
// Emit output_item.done for error message
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Status: schemas.Ptr("completed"),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: schemas.ResponsesOutputMessageContentTypeText,
|
|
Text: &errorText,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
})
|
|
}
|
|
|
|
// Emit response.completed with usage
|
|
bifrostUsage := ConvertGeminiUsageMetadataToResponsesUsage(usage)
|
|
|
|
completedResp := &schemas.BifrostResponsesResponse{
|
|
ID: state.MessageID,
|
|
CreatedAt: state.CreatedAt,
|
|
Usage: bifrostUsage,
|
|
}
|
|
if state.Model != nil {
|
|
completedResp.Model = *state.Model
|
|
}
|
|
|
|
// Set stop reason from finish reason
|
|
if finishReason != "" {
|
|
stopReason := ConvertGeminiFinishReasonToBifrost(finishReason)
|
|
completedResp.StopReason = &stopReason
|
|
|
|
// For error finish reasons, set status to failed
|
|
if isErrorFinishReason(finishReason) {
|
|
failedStatus := "failed"
|
|
completedResp.Status = &failedStatus
|
|
}
|
|
}
|
|
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeCompleted,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
Response: completedResp,
|
|
})
|
|
|
|
state.HasEmittedCompleted = true
|
|
|
|
return responses
|
|
}
|
|
|
|
// FinalizeGeminiResponsesStream finalizes the stream by closing any open items and emitting completed event
|
|
func FinalizeGeminiResponsesStream(state *GeminiResponsesStreamState, usage *GenerateContentResponseUsageMetadata, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
return closeGeminiOpenItems(state, nil, usage, sequenceNumber, "", "")
|
|
}
|
|
|
|
// convertGeminiSystemInstructionToResponsesMessage converts Gemini SystemInstruction to a system role message
|
|
func convertGeminiSystemInstructionToResponsesMessage(systemInstruction *Content) *schemas.ResponsesMessage {
|
|
if systemInstruction == nil || len(systemInstruction.Parts) == 0 {
|
|
return nil
|
|
}
|
|
|
|
var contentBlocks []schemas.ResponsesMessageContentBlock
|
|
var hasTextContent bool
|
|
|
|
for _, part := range systemInstruction.Parts {
|
|
if part.Text != "" {
|
|
contentBlocks = append(contentBlocks, schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesInputMessageContentBlockTypeText,
|
|
Text: &part.Text,
|
|
})
|
|
hasTextContent = true
|
|
}
|
|
}
|
|
|
|
if !hasTextContent {
|
|
return nil
|
|
}
|
|
|
|
// If single text block, use ContentStr
|
|
if len(contentBlocks) == 1 {
|
|
return &schemas.ResponsesMessage{
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleSystem),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentStr: contentBlocks[0].Text,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Multiple blocks, use ContentBlocks
|
|
return &schemas.ResponsesMessage{
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleSystem),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: contentBlocks,
|
|
},
|
|
}
|
|
}
|
|
|
|
func convertGeminiContentsToResponsesMessages(contents []Content) []schemas.ResponsesMessage {
|
|
var messages []schemas.ResponsesMessage
|
|
// Track function call IDs by name to match with responses
|
|
functionCallIDs := make(map[string]string)
|
|
|
|
for _, content := range contents {
|
|
// Determine the role for all messages from this Content
|
|
var role *schemas.ResponsesMessageRoleType
|
|
switch content.Role {
|
|
case "model":
|
|
role = schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant)
|
|
case "user":
|
|
role = schemas.Ptr(schemas.ResponsesInputMessageRoleUser)
|
|
default:
|
|
// Default to user for unknown roles
|
|
role = schemas.Ptr(schemas.ResponsesInputMessageRoleUser)
|
|
}
|
|
|
|
// Process each part - each part can become a separate message
|
|
for _, part := range content.Parts {
|
|
switch {
|
|
case part.FunctionCall != nil:
|
|
// Function call message
|
|
argsJSON := "{}"
|
|
if len(part.FunctionCall.Args) > 0 {
|
|
argsJSON = string(part.FunctionCall.Args)
|
|
}
|
|
|
|
callID := part.FunctionCall.ID
|
|
if callID == "" {
|
|
callID = part.FunctionCall.Name
|
|
}
|
|
|
|
// Track this function call ID by name for later matching with responses
|
|
functionCallIDs[part.FunctionCall.Name] = callID
|
|
|
|
msg := schemas.ResponsesMessage{
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
CallID: &callID,
|
|
Name: &part.FunctionCall.Name,
|
|
Arguments: &argsJSON,
|
|
},
|
|
}
|
|
messages = append(messages, msg)
|
|
|
|
// If this part also has a thought signature, create a separate reasoning message
|
|
if len(part.ThoughtSignature) > 0 {
|
|
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
|
|
reasoningMsg := schemas.ResponsesMessage{
|
|
Role: role,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
|
|
ResponsesReasoning: &schemas.ResponsesReasoning{
|
|
Summary: []schemas.ResponsesReasoningSummary{},
|
|
EncryptedContent: &thoughtSig,
|
|
},
|
|
}
|
|
messages = append(messages, reasoningMsg)
|
|
}
|
|
|
|
case part.FunctionResponse != nil:
|
|
// Function response message
|
|
responseID := part.FunctionResponse.ID
|
|
if responseID == "" {
|
|
// Try to find the matching function call ID by name
|
|
if callID, ok := functionCallIDs[part.FunctionResponse.Name]; ok {
|
|
responseID = callID
|
|
} else {
|
|
// Fallback to function name if no matching call found
|
|
responseID = part.FunctionResponse.Name
|
|
}
|
|
}
|
|
|
|
// Convert response to string — extract output field if present
|
|
responseStr := ""
|
|
if part.FunctionResponse.Response != nil {
|
|
if r := providerUtils.GetJSONField(part.FunctionResponse.Response, "output"); r.Exists() {
|
|
responseStr = r.String()
|
|
} else {
|
|
responseStr = string(part.FunctionResponse.Response)
|
|
}
|
|
}
|
|
|
|
msg := schemas.ResponsesMessage{
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
CallID: &responseID,
|
|
Output: &schemas.ResponsesToolMessageOutputStruct{
|
|
ResponsesToolCallOutputStr: &responseStr,
|
|
},
|
|
},
|
|
}
|
|
|
|
messages = append(messages, msg)
|
|
|
|
case part.Thought && part.Text != "":
|
|
// Thought/reasoning text content
|
|
msg := schemas.ResponsesMessage{
|
|
Role: role,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: schemas.ResponsesOutputMessageContentTypeReasoning,
|
|
Text: &part.Text,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
messages = append(messages, msg)
|
|
|
|
case part.Text != "":
|
|
// Regular text message
|
|
msg := schemas.ResponsesMessage{
|
|
Role: role,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: func() schemas.ResponsesMessageContentBlockType {
|
|
if content.Role == "model" {
|
|
return schemas.ResponsesOutputMessageContentTypeText
|
|
}
|
|
return schemas.ResponsesInputMessageContentBlockTypeText
|
|
}(),
|
|
Text: &part.Text,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
// add signature to above text content block if present
|
|
if len(part.ThoughtSignature) > 0 {
|
|
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
|
|
msg.Content.ContentBlocks[len(msg.Content.ContentBlocks)-1].Signature = &thoughtSig
|
|
}
|
|
|
|
messages = append(messages, msg)
|
|
|
|
case part.InlineData != nil:
|
|
// Handle inline data (images, audio, files)
|
|
block := convertGeminiInlineDataToContentBlock(part.InlineData)
|
|
if block != nil {
|
|
msg := schemas.ResponsesMessage{
|
|
Role: role,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{*block},
|
|
},
|
|
}
|
|
messages = append(messages, msg)
|
|
}
|
|
|
|
case part.FileData != nil:
|
|
// Handle file data (URI-based)
|
|
block := convertGeminiFileDataToContentBlock(part.FileData)
|
|
if block != nil {
|
|
msg := schemas.ResponsesMessage{
|
|
Role: role,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{*block},
|
|
},
|
|
}
|
|
messages = append(messages, msg)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return messages
|
|
}
|
|
|
|
// convertGeminiInlineDataToContentBlock converts Gemini inline data (blob) to content block
|
|
func convertGeminiInlineDataToContentBlock(blob *Blob) *schemas.ResponsesMessageContentBlock {
|
|
if blob == nil {
|
|
return nil
|
|
}
|
|
|
|
// Determine content type based on MIME type
|
|
mimeType := blob.MIMEType
|
|
if mimeType == "" {
|
|
return nil
|
|
}
|
|
|
|
// Handle images
|
|
if isImageMimeType(mimeType) {
|
|
// Convert to base64 data URL
|
|
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, blob.Data)
|
|
return &schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesInputMessageContentBlockTypeImage,
|
|
ResponsesInputMessageContentBlockImage: &schemas.ResponsesInputMessageContentBlockImage{
|
|
ImageURL: &imageURL,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Handle audio
|
|
if strings.HasPrefix(mimeType, "audio/") {
|
|
encodedData := blob.Data
|
|
format := mimeType
|
|
if strings.HasPrefix(mimeType, "audio/") {
|
|
format = mimeType[6:] // Remove "audio/" prefix
|
|
}
|
|
|
|
return &schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesInputMessageContentBlockTypeAudio,
|
|
Audio: &schemas.ResponsesInputMessageContentBlockAudio{
|
|
Format: format,
|
|
Data: encodedData,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Handle other files - format as data URL
|
|
mimeTypeForFile := mimeType
|
|
if mimeTypeForFile == "" {
|
|
mimeTypeForFile = "application/pdf"
|
|
}
|
|
|
|
filename := blob.DisplayName
|
|
if filename == "" {
|
|
filename = "unnamed_file"
|
|
}
|
|
|
|
fileDataURL := blob.Data
|
|
if !strings.HasPrefix(fileDataURL, "data:") {
|
|
fileDataURL = fmt.Sprintf("data:%s;base64,%s", mimeTypeForFile, fileDataURL)
|
|
}
|
|
return &schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesInputMessageContentBlockTypeFile,
|
|
ResponsesInputMessageContentBlockFile: &schemas.ResponsesInputMessageContentBlockFile{
|
|
FileData: &fileDataURL,
|
|
FileType: &mimeTypeForFile,
|
|
Filename: &filename,
|
|
},
|
|
}
|
|
}
|
|
|
|
// convertGeminiFileDataToContentBlock converts Gemini file data (URI) to content block
|
|
func convertGeminiFileDataToContentBlock(fileData *FileData) *schemas.ResponsesMessageContentBlock {
|
|
if fileData == nil || fileData.FileURI == "" {
|
|
return nil
|
|
}
|
|
|
|
mimeType := fileData.MIMEType
|
|
if mimeType == "" {
|
|
mimeType = "application/pdf"
|
|
}
|
|
|
|
// Handle images
|
|
if isImageMimeType(mimeType) {
|
|
return &schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesInputMessageContentBlockTypeImage,
|
|
ResponsesInputMessageContentBlockImage: &schemas.ResponsesInputMessageContentBlockImage{
|
|
ImageURL: &fileData.FileURI,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Handle other files
|
|
block := &schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesInputMessageContentBlockTypeFile,
|
|
ResponsesInputMessageContentBlockFile: &schemas.ResponsesInputMessageContentBlockFile{
|
|
FileURL: &fileData.FileURI,
|
|
},
|
|
}
|
|
|
|
// Set FileType if available
|
|
block.ResponsesInputMessageContentBlockFile.FileType = &mimeType
|
|
|
|
return block
|
|
}
|
|
|
|
func convertGeminiToolsToResponsesTools(tools []Tool) []schemas.ResponsesTool {
|
|
var responsesTools []schemas.ResponsesTool
|
|
|
|
for _, tool := range tools {
|
|
// you cant use function declarations and google search together
|
|
if tool.GoogleSearch != nil {
|
|
responsesTool := schemas.ResponsesTool{
|
|
Type: schemas.ResponsesToolTypeWebSearch,
|
|
}
|
|
responsesTool.ResponsesToolWebSearch = &schemas.ResponsesToolWebSearch{}
|
|
if tool.GoogleSearch.TimeRangeFilter != nil || len(tool.GoogleSearch.ExcludeDomains) > 0 {
|
|
filters := &schemas.ResponsesToolWebSearchFilters{
|
|
BlockedDomains: tool.GoogleSearch.ExcludeDomains,
|
|
}
|
|
if tool.GoogleSearch.TimeRangeFilter != nil {
|
|
filters.TimeRangeFilter = &schemas.Interval{
|
|
StartTime: tool.GoogleSearch.TimeRangeFilter.StartTime,
|
|
EndTime: tool.GoogleSearch.TimeRangeFilter.EndTime,
|
|
}
|
|
}
|
|
responsesTool.ResponsesToolWebSearch.Filters = filters
|
|
}
|
|
responsesTools = append(responsesTools, responsesTool)
|
|
} else if len(tool.FunctionDeclarations) > 0 {
|
|
for _, fn := range tool.FunctionDeclarations {
|
|
responsesTool := schemas.ResponsesTool{
|
|
Type: schemas.ResponsesToolTypeFunction,
|
|
Name: schemas.Ptr(fn.Name),
|
|
Description: schemas.Ptr(fn.Description),
|
|
ResponsesToolFunction: &schemas.ResponsesToolFunction{},
|
|
}
|
|
// Convert parameters schema if present
|
|
if fn.Parameters != nil {
|
|
params := convertSchemaToFunctionParameters(fn.Parameters)
|
|
responsesTool.ResponsesToolFunction.Parameters = ¶ms
|
|
}
|
|
responsesTools = append(responsesTools, responsesTool)
|
|
}
|
|
}
|
|
}
|
|
|
|
return responsesTools
|
|
}
|
|
|
|
func convertGeminiToolConfigToToolChoice(toolConfig *ToolConfig) *schemas.ResponsesToolChoice {
|
|
if toolConfig == nil || toolConfig.FunctionCallingConfig == nil {
|
|
return nil
|
|
}
|
|
|
|
toolChoice := &schemas.ResponsesToolChoiceStruct{
|
|
Type: schemas.ResponsesToolChoiceTypeFunction,
|
|
}
|
|
|
|
switch toolConfig.FunctionCallingConfig.Mode {
|
|
case FunctionCallingConfigModeAuto:
|
|
toolChoice.Mode = schemas.Ptr("auto")
|
|
case FunctionCallingConfigModeNone:
|
|
toolChoice.Mode = schemas.Ptr("none")
|
|
default:
|
|
toolChoice.Mode = schemas.Ptr("auto")
|
|
}
|
|
|
|
if toolConfig.FunctionCallingConfig.AllowedFunctionNames != nil {
|
|
for _, functionName := range toolConfig.FunctionCallingConfig.AllowedFunctionNames {
|
|
toolChoice.Tools = append(toolChoice.Tools, schemas.ResponsesToolChoiceAllowedToolDef{
|
|
Type: string(schemas.ResponsesToolTypeFunction),
|
|
Name: schemas.Ptr(functionName),
|
|
})
|
|
}
|
|
}
|
|
|
|
return &schemas.ResponsesToolChoice{
|
|
ResponsesToolChoiceStruct: toolChoice,
|
|
}
|
|
}
|
|
|
|
// Helper functions for Responses conversion
|
|
// convertGeminiCandidatesToResponsesOutput converts Gemini candidates to Responses output messages
|
|
func convertGeminiCandidatesToResponsesOutput(candidates []*Candidate) []schemas.ResponsesMessage {
|
|
var messages []schemas.ResponsesMessage
|
|
|
|
for _, candidate := range candidates {
|
|
if candidate.Content == nil || len(candidate.Content.Parts) == 0 {
|
|
continue
|
|
}
|
|
|
|
for _, part := range candidate.Content.Parts {
|
|
// Handle different types of parts
|
|
switch {
|
|
case part.Thought:
|
|
// Thinking/reasoning message
|
|
if part.Text != "" {
|
|
msg := schemas.ResponsesMessage{
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: schemas.ResponsesOutputMessageContentTypeReasoning,
|
|
Text: &part.Text,
|
|
},
|
|
},
|
|
},
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
|
|
}
|
|
messages = append(messages, msg)
|
|
}
|
|
|
|
case part.Text != "":
|
|
// Regular text message
|
|
msg := schemas.ResponsesMessage{
|
|
ID: schemas.Ptr("msg_" + providerUtils.GetRandomString(50)),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Status: schemas.Ptr("completed"),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: schemas.ResponsesOutputMessageContentTypeText,
|
|
Text: &part.Text,
|
|
ResponsesOutputMessageContentText: &schemas.ResponsesOutputMessageContentText{
|
|
LogProbs: []schemas.ResponsesOutputMessageContentTextLogProb{},
|
|
Annotations: []schemas.ResponsesOutputMessageContentTextAnnotation{},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
}
|
|
// add signature to above text content block if present
|
|
if len(part.ThoughtSignature) > 0 {
|
|
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
|
|
msg.Content.ContentBlocks[len(msg.Content.ContentBlocks)-1].Signature = &thoughtSig
|
|
}
|
|
messages = append(messages, msg)
|
|
|
|
case part.FunctionCall != nil:
|
|
// Function call message
|
|
// Convert Args to JSON string if it's not already a string
|
|
argumentsStr := ""
|
|
if len(part.FunctionCall.Args) > 0 {
|
|
argumentsStr = string(part.FunctionCall.Args)
|
|
}
|
|
|
|
callID := part.FunctionCall.ID
|
|
if strings.TrimSpace(callID) == "" {
|
|
callID = part.FunctionCall.Name
|
|
}
|
|
|
|
// Attach thought signature to callID (same as streaming path)
|
|
if len(part.ThoughtSignature) > 0 && !strings.Contains(callID, thoughtSignatureSeparator) {
|
|
thoughtSig := base64.RawURLEncoding.EncodeToString(part.ThoughtSignature)
|
|
callID = fmt.Sprintf("%s%s%s", callID, thoughtSignatureSeparator, thoughtSig)
|
|
}
|
|
|
|
name := part.FunctionCall.Name
|
|
toolMsg := &schemas.ResponsesToolMessage{
|
|
CallID: &callID,
|
|
Name: &name,
|
|
Arguments: &argumentsStr,
|
|
}
|
|
msg := schemas.ResponsesMessage{
|
|
ID: schemas.Ptr("fc_" + providerUtils.GetRandomString(50)),
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
|
|
Status: schemas.Ptr("completed"),
|
|
ResponsesToolMessage: toolMsg,
|
|
}
|
|
messages = append(messages, msg)
|
|
|
|
case part.FunctionResponse != nil:
|
|
// Function response message
|
|
output := extractFunctionResponseOutput(part.FunctionResponse)
|
|
|
|
msg := schemas.ResponsesMessage{
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
CallID: schemas.Ptr(part.FunctionResponse.ID),
|
|
Output: &schemas.ResponsesToolMessageOutputStruct{
|
|
ResponsesToolCallOutputStr: &output,
|
|
},
|
|
},
|
|
}
|
|
|
|
// Also set the tool name if present (Gemini associates on name)
|
|
if name := strings.TrimSpace(part.FunctionResponse.Name); name != "" {
|
|
msg.ResponsesToolMessage.Name = schemas.Ptr(name)
|
|
} else {
|
|
// set name from call id
|
|
// if it contains a thought signature, remove it
|
|
if strings.Contains(part.FunctionResponse.ID, thoughtSignatureSeparator) {
|
|
parts := strings.SplitN(part.FunctionResponse.ID, thoughtSignatureSeparator, 2)
|
|
if len(parts) == 2 {
|
|
name := parts[0]
|
|
msg.ResponsesToolMessage.Name = schemas.Ptr(name)
|
|
}
|
|
} else {
|
|
msg.ResponsesToolMessage.Name = schemas.Ptr(part.FunctionResponse.ID)
|
|
}
|
|
}
|
|
messages = append(messages, msg)
|
|
|
|
case part.InlineData != nil:
|
|
// Handle inline data (images, audio, etc.)
|
|
contentBlocks := []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: func() schemas.ResponsesMessageContentBlockType {
|
|
if strings.HasPrefix(part.InlineData.MIMEType, "image/") {
|
|
return schemas.ResponsesInputMessageContentBlockTypeImage
|
|
} else if strings.HasPrefix(part.InlineData.MIMEType, "audio/") {
|
|
return schemas.ResponsesInputMessageContentBlockTypeAudio
|
|
}
|
|
return schemas.ResponsesInputMessageContentBlockTypeText
|
|
}(),
|
|
ResponsesInputMessageContentBlockImage: func() *schemas.ResponsesInputMessageContentBlockImage {
|
|
if strings.HasPrefix(part.InlineData.MIMEType, "image/") {
|
|
return &schemas.ResponsesInputMessageContentBlockImage{
|
|
ImageURL: schemas.Ptr("data:" + part.InlineData.MIMEType + ";base64," + part.InlineData.Data),
|
|
}
|
|
}
|
|
return nil
|
|
}(),
|
|
Audio: func() *schemas.ResponsesInputMessageContentBlockAudio {
|
|
if strings.HasPrefix(part.InlineData.MIMEType, "audio/") {
|
|
// Extract format from MIME type (e.g., "audio/wav" -> "wav")
|
|
format := strings.TrimPrefix(part.InlineData.MIMEType, "audio/")
|
|
return &schemas.ResponsesInputMessageContentBlockAudio{
|
|
Format: format,
|
|
Data: part.InlineData.Data,
|
|
}
|
|
}
|
|
return nil
|
|
}(),
|
|
},
|
|
}
|
|
|
|
msg := schemas.ResponsesMessage{
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: contentBlocks,
|
|
},
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
}
|
|
messages = append(messages, msg)
|
|
|
|
case part.FileData != nil:
|
|
// Handle file data
|
|
block := schemas.ResponsesMessageContentBlock{
|
|
Type: schemas.ResponsesInputMessageContentBlockTypeFile,
|
|
ResponsesInputMessageContentBlockFile: &schemas.ResponsesInputMessageContentBlockFile{
|
|
FileURL: schemas.Ptr(part.FileData.FileURI),
|
|
},
|
|
}
|
|
if strings.HasPrefix(part.FileData.MIMEType, "image/") {
|
|
block.Type = schemas.ResponsesInputMessageContentBlockTypeImage
|
|
block.ResponsesInputMessageContentBlockImage = &schemas.ResponsesInputMessageContentBlockImage{
|
|
ImageURL: schemas.Ptr(part.FileData.FileURI),
|
|
}
|
|
}
|
|
contentBlocks := []schemas.ResponsesMessageContentBlock{block}
|
|
|
|
msg := schemas.ResponsesMessage{
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: contentBlocks,
|
|
},
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
}
|
|
messages = append(messages, msg)
|
|
|
|
case part.CodeExecutionResult != nil:
|
|
// Handle code execution results
|
|
output := part.CodeExecutionResult.Output
|
|
if part.CodeExecutionResult.Outcome != OutcomeOK {
|
|
output = "Error: " + output
|
|
}
|
|
|
|
msg := schemas.ResponsesMessage{
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: schemas.ResponsesOutputMessageContentTypeText,
|
|
Text: &output,
|
|
},
|
|
},
|
|
},
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeCodeInterpreterCall),
|
|
}
|
|
messages = append(messages, msg)
|
|
|
|
case part.ExecutableCode != nil:
|
|
// Handle executable code
|
|
codeContent := "```" + part.ExecutableCode.Language + "\n" + part.ExecutableCode.Code + "\n```"
|
|
|
|
msg := schemas.ResponsesMessage{
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: schemas.ResponsesOutputMessageContentTypeText,
|
|
Text: &codeContent,
|
|
},
|
|
},
|
|
},
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
}
|
|
messages = append(messages, msg)
|
|
case part.ThoughtSignature != nil:
|
|
// Handle thought signature
|
|
thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature)
|
|
msg := schemas.ResponsesMessage{
|
|
Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant),
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning),
|
|
ResponsesReasoning: &schemas.ResponsesReasoning{
|
|
Summary: []schemas.ResponsesReasoningSummary{},
|
|
EncryptedContent: &thoughtSig,
|
|
},
|
|
}
|
|
messages = append(messages, msg)
|
|
}
|
|
}
|
|
|
|
// check if gemini used google search tool
|
|
if candidate.GroundingMetadata != nil {
|
|
webSearchmessage := schemas.ResponsesMessage{
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeWebSearchCall),
|
|
Status: schemas.Ptr("completed"),
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
Action: &schemas.ResponsesToolMessageActionStruct{
|
|
ResponsesWebSearchToolCallAction: &schemas.ResponsesWebSearchToolCallAction{
|
|
Type: "search",
|
|
Queries: candidate.GroundingMetadata.WebSearchQueries,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
if len(candidate.GroundingMetadata.WebSearchQueries) > 0 {
|
|
webSearchmessage.ResponsesToolMessage.Action.ResponsesWebSearchToolCallAction.Query =
|
|
schemas.Ptr(candidate.GroundingMetadata.WebSearchQueries[0])
|
|
}
|
|
|
|
sources := []schemas.ResponsesWebSearchToolCallActionSearchSource{}
|
|
for _, source := range candidate.GroundingMetadata.GroundingChunks {
|
|
if source.Web != nil {
|
|
sources = append(sources, schemas.ResponsesWebSearchToolCallActionSearchSource{
|
|
Type: "url",
|
|
Title: schemas.Ptr(source.Web.Title),
|
|
URL: source.Web.URI,
|
|
})
|
|
}
|
|
}
|
|
|
|
if len(sources) > 0 {
|
|
webSearchmessage.ResponsesToolMessage.Action.ResponsesWebSearchToolCallAction.Sources = sources
|
|
}
|
|
|
|
messages = append(messages, webSearchmessage)
|
|
|
|
// create a annotations message for grounding supports
|
|
if len(candidate.GroundingMetadata.GroundingSupports) > 0 {
|
|
annotations := []schemas.ResponsesOutputMessageContentTextAnnotation{}
|
|
for _, support := range candidate.GroundingMetadata.GroundingSupports {
|
|
if support.Segment != nil {
|
|
annotation := schemas.ResponsesOutputMessageContentTextAnnotation{
|
|
Type: "url_citation",
|
|
Text: schemas.Ptr(support.Segment.Text),
|
|
StartIndex: schemas.Ptr(int(support.Segment.StartIndex)),
|
|
EndIndex: schemas.Ptr(int(support.Segment.EndIndex)),
|
|
}
|
|
|
|
// Look up URL from grounding chunks
|
|
if len(support.GroundingChunkIndices) > 0 {
|
|
chunkIdx := support.GroundingChunkIndices[0]
|
|
if chunkIdx >= 0 && int(chunkIdx) < len(candidate.GroundingMetadata.GroundingChunks) {
|
|
chunk := candidate.GroundingMetadata.GroundingChunks[chunkIdx]
|
|
if chunk.Web != nil {
|
|
annotation.URL = schemas.Ptr(chunk.Web.URI)
|
|
if chunk.Web.Title != "" {
|
|
annotation.Title = schemas.Ptr(chunk.Web.Title)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if annotation.URL != nil {
|
|
annotations = append(annotations, annotation)
|
|
}
|
|
}
|
|
}
|
|
annotationsMessage := schemas.ResponsesMessage{
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Status: schemas.Ptr("completed"),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: schemas.ResponsesOutputMessageContentTypeText,
|
|
Text: schemas.Ptr(""),
|
|
ResponsesOutputMessageContentText: &schemas.ResponsesOutputMessageContentText{
|
|
Annotations: annotations,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
messages = append(messages, annotationsMessage)
|
|
}
|
|
|
|
// Emit rendered content if present
|
|
if candidate.GroundingMetadata.SearchEntryPoint != nil &&
|
|
candidate.GroundingMetadata.SearchEntryPoint.RenderedContent != "" {
|
|
renderedContentMessage := schemas.ResponsesMessage{
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Status: schemas.Ptr("completed"),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: schemas.ResponsesOutputMessageContentTypeRenderedContent,
|
|
ResponsesOutputMessageContentRenderedContent: &schemas.ResponsesOutputMessageContentRenderedContent{
|
|
RenderedContent: candidate.GroundingMetadata.SearchEntryPoint.RenderedContent,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
messages = append(messages, renderedContentMessage)
|
|
}
|
|
}
|
|
}
|
|
|
|
return messages
|
|
}
|
|
|
|
// convertTextConfigToGenerationConfig converts ResponsesTextConfig to Gemini's GenerationConfig fields
|
|
func convertTextConfigToGenerationConfig(textConfig *schemas.ResponsesTextConfig, config *GenerationConfig) {
|
|
if textConfig == nil || config == nil {
|
|
return
|
|
}
|
|
|
|
if textConfig.Format == nil {
|
|
return
|
|
}
|
|
|
|
switch textConfig.Format.Type {
|
|
case "json_schema":
|
|
config.ResponseMIMEType = "application/json"
|
|
if textConfig.Format.JSONSchema != nil {
|
|
if schema := reconstructSchemaFromJSONSchema(textConfig.Format.JSONSchema); schema != nil {
|
|
config.ResponseJSONSchema = schema
|
|
}
|
|
// no schema, mime type remains as is
|
|
}
|
|
|
|
case "json_object":
|
|
config.ResponseMIMEType = "application/json"
|
|
|
|
case "text":
|
|
config.ResponseMIMEType = "text/plain"
|
|
}
|
|
}
|
|
|
|
// reconstructSchemaFromJSONSchema rebuilds a schema map from ResponsesTextConfigFormatJSONSchema
|
|
func reconstructSchemaFromJSONSchema(jsonSchema *schemas.ResponsesTextConfigFormatJSONSchema) interface{} {
|
|
var schema map[string]interface{}
|
|
|
|
if jsonSchema.Schema != nil {
|
|
// If Schema field is set, use it directly
|
|
schemaMap, ok := (*jsonSchema.Schema).(map[string]interface{})
|
|
if !ok {
|
|
return *jsonSchema.Schema
|
|
}
|
|
schema = schemaMap
|
|
} else {
|
|
// New format: Schema is spread across individual fields
|
|
schema = make(map[string]interface{})
|
|
|
|
if jsonSchema.Defs != nil {
|
|
schema["$defs"] = *jsonSchema.Defs
|
|
}
|
|
|
|
if jsonSchema.Type != nil {
|
|
schema["type"] = *jsonSchema.Type
|
|
}
|
|
|
|
if jsonSchema.Properties != nil {
|
|
schema["properties"] = *jsonSchema.Properties
|
|
}
|
|
|
|
if len(jsonSchema.Required) > 0 {
|
|
schema["required"] = jsonSchema.Required
|
|
}
|
|
|
|
if jsonSchema.Description != nil {
|
|
schema["description"] = *jsonSchema.Description
|
|
}
|
|
|
|
if jsonSchema.AdditionalProperties != nil {
|
|
schema["additionalProperties"] = *jsonSchema.AdditionalProperties
|
|
}
|
|
|
|
if jsonSchema.Name != nil {
|
|
schema["title"] = *jsonSchema.Name
|
|
}
|
|
|
|
if len(jsonSchema.PropertyOrdering) > 0 {
|
|
schema["propertyOrdering"] = jsonSchema.PropertyOrdering
|
|
}
|
|
|
|
// Return nil if no fields were populated
|
|
if len(schema) == 0 {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// Normalize the schema for Gemini compatibility (handle union types, etc.)
|
|
return normalizeSchemaForGemini(schema)
|
|
}
|
|
|
|
// convertParamsToGenerationConfigResponses converts ChatParameters to GenerationConfig for Responses
|
|
func (r *GeminiGenerationRequest) convertParamsToGenerationConfigResponses(params *schemas.ResponsesParameters) (GenerationConfig, error) {
|
|
config := GenerationConfig{}
|
|
|
|
if params.Temperature != nil {
|
|
config.Temperature = schemas.Ptr(float64(*params.Temperature))
|
|
}
|
|
if params.TopP != nil {
|
|
config.TopP = schemas.Ptr(float64(*params.TopP))
|
|
}
|
|
if params.MaxOutputTokens != nil {
|
|
config.MaxOutputTokens = int32(*params.MaxOutputTokens)
|
|
}
|
|
// Only set ThinkingConfig if the model actually supports thinking
|
|
if params.Reasoning != nil && supportsThinkingConfig(r.Model) {
|
|
config.ThinkingConfig = &GenerationConfigThinkingConfig{
|
|
IncludeThoughts: true,
|
|
}
|
|
|
|
hasMaxTokens := params.Reasoning.MaxTokens != nil
|
|
hasEffort := params.Reasoning.Effort != nil
|
|
supportsLevel := isGemini3Plus(r.Model) // Check if model is 3.0+
|
|
|
|
// PRIORITY RULE: If both max_tokens and effort are present, use ONLY max_tokens (budget)
|
|
// This ensures we send only thinkingBudget to Gemini, not thinkingLevel
|
|
|
|
// Handle "none" effort explicitly (only if max_tokens not present)
|
|
if !hasMaxTokens && hasEffort && *params.Reasoning.Effort == "none" {
|
|
config.ThinkingConfig.IncludeThoughts = false
|
|
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
|
|
} else if hasMaxTokens {
|
|
// User provided max_tokens - use thinkingBudget (all Gemini models support this)
|
|
// If both max_tokens and effort are present, we ignore effort and use ONLY max_tokens
|
|
budget := *params.Reasoning.MaxTokens
|
|
switch budget {
|
|
case 0:
|
|
config.ThinkingConfig.IncludeThoughts = false
|
|
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
|
|
case DynamicReasoningBudget: // Special case: -1 means dynamic budget
|
|
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(DynamicReasoningBudget))
|
|
default:
|
|
if err := validateThinkingBudget(r.Model, budget); err != nil {
|
|
return config, err
|
|
}
|
|
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budget))
|
|
}
|
|
} else if hasEffort {
|
|
// User provided effort only (no max_tokens)
|
|
if supportsLevel {
|
|
// Gemini 3.0+ - use thinkingLevel (more native)
|
|
config.ThinkingConfig.ThinkingLevel = schemas.Ptr(effortToThinkingLevel(*params.Reasoning.Effort, r.Model))
|
|
} else {
|
|
maxTokens := providerUtils.GetMaxOutputTokensOrDefault(r.Model, DefaultCompletionMaxTokens)
|
|
if config.MaxOutputTokens > 0 {
|
|
maxTokens = int(config.MaxOutputTokens)
|
|
}
|
|
budgetRange := getThinkingBudgetRange(r.Model, maxTokens)
|
|
// Gemini < 3.0 - must convert effort to budget
|
|
budgetTokens, err := providerUtils.GetBudgetTokensFromReasoningEffort(
|
|
*params.Reasoning.Effort,
|
|
budgetRange.Min,
|
|
budgetRange.Max,
|
|
)
|
|
if err == nil {
|
|
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budgetTokens))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if params.Text != nil {
|
|
convertTextConfigToGenerationConfig(params.Text, &config)
|
|
}
|
|
|
|
if params.ExtraParams != nil {
|
|
if topK, ok := params.ExtraParams["top_k"]; ok {
|
|
delete(params.ExtraParams, "top_k")
|
|
if val, success := schemas.SafeExtractInt(topK); success {
|
|
config.TopK = schemas.Ptr(val)
|
|
}
|
|
}
|
|
if frequencyPenalty, ok := params.ExtraParams["frequency_penalty"]; ok {
|
|
delete(params.ExtraParams, "frequency_penalty")
|
|
if val, success := schemas.SafeExtractFloat64(frequencyPenalty); success {
|
|
config.FrequencyPenalty = schemas.Ptr(val)
|
|
}
|
|
}
|
|
if presencePenalty, ok := params.ExtraParams["presence_penalty"]; ok {
|
|
delete(params.ExtraParams, "presence_penalty")
|
|
if val, success := schemas.SafeExtractFloat64(presencePenalty); success {
|
|
config.PresencePenalty = schemas.Ptr(val)
|
|
}
|
|
}
|
|
if stopSequences, ok := params.ExtraParams["stop_sequences"]; ok {
|
|
delete(params.ExtraParams, "stop_sequences")
|
|
if val, success := schemas.SafeExtractStringSlice(stopSequences); success {
|
|
config.StopSequences = val
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
return config, nil
|
|
}
|
|
|
|
// convertResponsesToolsToGemini converts Responses tools to Gemini tools
|
|
func convertResponsesToolsToGemini(tools []schemas.ResponsesTool) []Tool {
|
|
geminiTool := Tool{}
|
|
|
|
hasWebSearchTool := false
|
|
|
|
for _, tool := range tools {
|
|
if tool.Type == schemas.ResponsesToolTypeWebSearch {
|
|
hasWebSearchTool = true
|
|
break
|
|
}
|
|
}
|
|
|
|
for _, tool := range tools {
|
|
// you cant use function declarations and google search together
|
|
if tool.Type == schemas.ResponsesToolTypeFunction && !hasWebSearchTool {
|
|
// Extract function information from ResponsesExtendedTool
|
|
if tool.ResponsesToolFunction != nil {
|
|
if tool.Name != nil && tool.ResponsesToolFunction != nil {
|
|
funcDecl := &FunctionDeclaration{
|
|
Name: *tool.Name,
|
|
Description: func() string {
|
|
if tool.Description != nil {
|
|
return *tool.Description
|
|
}
|
|
return ""
|
|
}(),
|
|
Parameters: func() *Schema {
|
|
if tool.ResponsesToolFunction.Parameters != nil {
|
|
return convertFunctionParametersToSchema(*tool.ResponsesToolFunction.Parameters)
|
|
}
|
|
return nil
|
|
}(),
|
|
}
|
|
geminiTool.FunctionDeclarations = append(geminiTool.FunctionDeclarations, funcDecl)
|
|
}
|
|
}
|
|
}
|
|
if tool.Type == schemas.ResponsesToolTypeWebSearch {
|
|
geminiTool.GoogleSearch = &GoogleSearch{}
|
|
if tool.ResponsesToolWebSearch != nil && tool.ResponsesToolWebSearch.Filters != nil {
|
|
if tool.ResponsesToolWebSearch.Filters.TimeRangeFilter != nil {
|
|
geminiTool.GoogleSearch.TimeRangeFilter = &Interval{
|
|
StartTime: tool.ResponsesToolWebSearch.Filters.TimeRangeFilter.StartTime,
|
|
EndTime: tool.ResponsesToolWebSearch.Filters.TimeRangeFilter.EndTime,
|
|
}
|
|
}
|
|
if len(tool.ResponsesToolWebSearch.Filters.BlockedDomains) > 0 {
|
|
geminiTool.GoogleSearch.ExcludeDomains = tool.ResponsesToolWebSearch.Filters.BlockedDomains
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(geminiTool.FunctionDeclarations) > 0 || geminiTool.GoogleSearch != nil {
|
|
return []Tool{geminiTool}
|
|
}
|
|
return []Tool{}
|
|
}
|
|
|
|
// convertResponsesToolChoiceToGemini converts Responses tool choice to Gemini tool config
|
|
func convertResponsesToolChoiceToGemini(toolChoice *schemas.ResponsesToolChoice) *ToolConfig {
|
|
config := &ToolConfig{}
|
|
|
|
if toolChoice.ResponsesToolChoiceStruct != nil {
|
|
funcConfig := &FunctionCallingConfig{}
|
|
ext := toolChoice.ResponsesToolChoiceStruct
|
|
|
|
if ext.Mode != nil {
|
|
switch *ext.Mode {
|
|
case "auto":
|
|
funcConfig.Mode = FunctionCallingConfigModeAuto
|
|
case "required":
|
|
funcConfig.Mode = FunctionCallingConfigModeAny
|
|
case "none":
|
|
funcConfig.Mode = FunctionCallingConfigModeNone
|
|
}
|
|
}
|
|
|
|
if ext.Name != nil {
|
|
funcConfig.Mode = FunctionCallingConfigModeAny
|
|
funcConfig.AllowedFunctionNames = []string{*ext.Name}
|
|
}
|
|
|
|
config.FunctionCallingConfig = funcConfig
|
|
return config
|
|
}
|
|
|
|
// Handle string-based tool choice modes
|
|
if toolChoice.ResponsesToolChoiceStr != nil {
|
|
funcConfig := &FunctionCallingConfig{}
|
|
switch *toolChoice.ResponsesToolChoiceStr {
|
|
case "none":
|
|
funcConfig.Mode = FunctionCallingConfigModeNone
|
|
case "required", "any":
|
|
funcConfig.Mode = FunctionCallingConfigModeAny
|
|
default: // "auto" or any other value
|
|
funcConfig.Mode = FunctionCallingConfigModeAuto
|
|
}
|
|
config.FunctionCallingConfig = funcConfig
|
|
}
|
|
|
|
return config
|
|
}
|
|
|
|
// convertResponsesMessagesToGeminiContents converts Responses messages to Gemini contents
|
|
func convertResponsesMessagesToGeminiContents(messages []schemas.ResponsesMessage) ([]Content, *Content, error) {
|
|
var contents []Content
|
|
var systemInstruction *Content
|
|
|
|
// Build a map from callID → function name by scanning function_call messages.
|
|
callIDToName := make(map[string]string)
|
|
for i := range messages {
|
|
m := &messages[i]
|
|
if m.Type != nil && *m.Type == schemas.ResponsesMessageTypeFunctionCall &&
|
|
m.ResponsesToolMessage != nil &&
|
|
m.ResponsesToolMessage.CallID != nil &&
|
|
m.ResponsesToolMessage.Name != nil {
|
|
if name := strings.TrimSpace(*m.ResponsesToolMessage.Name); name != "" {
|
|
callIDToName[*m.ResponsesToolMessage.CallID] = name
|
|
}
|
|
}
|
|
}
|
|
|
|
// Track consecutive function call output messages to group them for parallel function calling
|
|
// According to Gemini docs, all function responses must be in a single message
|
|
var pendingFunctionResponseParts []*Part
|
|
|
|
for i, msg := range messages {
|
|
// Skip standalone reasoning messages (they're handled as part of function calls)
|
|
if msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeReasoning && msg.ResponsesReasoning != nil {
|
|
continue
|
|
}
|
|
|
|
// Handle system messages separately
|
|
if msg.Role != nil && *msg.Role == schemas.ResponsesInputMessageRoleSystem {
|
|
if systemInstruction == nil {
|
|
systemInstruction = &Content{}
|
|
}
|
|
|
|
// Convert system message content
|
|
if msg.Content != nil {
|
|
if msg.Content.ContentStr != nil {
|
|
systemInstruction.Parts = append(systemInstruction.Parts, &Part{
|
|
Text: *msg.Content.ContentStr,
|
|
})
|
|
}
|
|
if msg.Content.ContentBlocks != nil {
|
|
for _, block := range msg.Content.ContentBlocks {
|
|
part, err := convertContentBlockToGeminiPart(block)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("failed to convert system message content block: %w", err)
|
|
}
|
|
if part != nil {
|
|
systemInstruction.Parts = append(systemInstruction.Parts, part)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
continue
|
|
}
|
|
|
|
// Check if this is a function call output message
|
|
isFunctionOutput := msg.Type != nil && *msg.Type == schemas.ResponsesMessageTypeFunctionCallOutput && msg.ResponsesToolMessage != nil
|
|
|
|
// If we have pending function responses and current message is NOT a function output,
|
|
// flush the pending responses as a single Content (for parallel function calling)
|
|
if len(pendingFunctionResponseParts) > 0 && !isFunctionOutput {
|
|
contents = append(contents, Content{
|
|
Parts: pendingFunctionResponseParts,
|
|
Role: "model", // Function responses use "model" role in Gemini
|
|
})
|
|
pendingFunctionResponseParts = nil
|
|
}
|
|
|
|
// Handle regular messages
|
|
content := Content{}
|
|
|
|
if msg.Role != nil {
|
|
// Map Responses roles to Gemini roles (Gemini only supports "user" and "model")
|
|
switch *msg.Role {
|
|
case schemas.ResponsesInputMessageRoleAssistant:
|
|
content.Role = "model"
|
|
case schemas.ResponsesInputMessageRoleUser, schemas.ResponsesInputMessageRoleDeveloper:
|
|
content.Role = "user"
|
|
default:
|
|
// Default to "user" for input messages (any instructions/context)
|
|
content.Role = "user"
|
|
}
|
|
}
|
|
|
|
// Handle tool calls/responses
|
|
if msg.ResponsesToolMessage != nil && msg.Type != nil {
|
|
switch *msg.Type {
|
|
case schemas.ResponsesMessageTypeFunctionCall:
|
|
// Convert function call to Gemini FunctionCall
|
|
if msg.ResponsesToolMessage.Name != nil {
|
|
var argsRaw json.RawMessage
|
|
if msg.ResponsesToolMessage.Arguments != nil {
|
|
rawArgs := strings.TrimSpace(*msg.ResponsesToolMessage.Arguments)
|
|
if rawArgs == "" {
|
|
rawArgs = "{}"
|
|
}
|
|
var buf bytes.Buffer
|
|
if err := json.Compact(&buf, []byte(rawArgs)); err != nil {
|
|
return nil, nil, fmt.Errorf("failed to decode function call arguments: %w", err)
|
|
}
|
|
argsRaw = buf.Bytes()
|
|
}
|
|
if argsRaw == nil {
|
|
argsRaw = json.RawMessage("{}")
|
|
}
|
|
|
|
var thoughtSig string
|
|
part := &Part{
|
|
FunctionCall: &FunctionCall{
|
|
Name: *msg.ResponsesToolMessage.Name,
|
|
Args: argsRaw,
|
|
},
|
|
}
|
|
if msg.ResponsesToolMessage.CallID != nil {
|
|
if strings.Contains(*msg.ResponsesToolMessage.CallID, thoughtSignatureSeparator) {
|
|
parts := strings.SplitN(*msg.ResponsesToolMessage.CallID, thoughtSignatureSeparator, 2)
|
|
if len(parts) == 2 {
|
|
thoughtSig = parts[1] // Extract signature (after separator)
|
|
}
|
|
}
|
|
// Keep the full CallID as-is (don't strip thought signature)
|
|
part.FunctionCall.ID = *msg.ResponsesToolMessage.CallID
|
|
}
|
|
if thoughtSig != "" {
|
|
var err error
|
|
part.ThoughtSignature, err = base64.RawURLEncoding.DecodeString(thoughtSig)
|
|
if err != nil {
|
|
// Silently ignore decode errors - ID will be used without signature
|
|
thoughtSig = ""
|
|
}
|
|
}
|
|
|
|
// Preserve thought signature from ResponsesReasoning message (required for Gemini 3 Pro)
|
|
// Look ahead to see if the next message is a reasoning message with encrypted content
|
|
if i+1 < len(messages) {
|
|
nextMsg := messages[i+1]
|
|
if nextMsg.Type != nil && *nextMsg.Type == schemas.ResponsesMessageTypeReasoning &&
|
|
nextMsg.ResponsesReasoning != nil && nextMsg.ResponsesReasoning.EncryptedContent != nil {
|
|
decodedSig, err := base64.StdEncoding.DecodeString(*nextMsg.ResponsesReasoning.EncryptedContent)
|
|
if err == nil {
|
|
part.ThoughtSignature = decodedSig
|
|
}
|
|
}
|
|
}
|
|
|
|
content.Parts = append(content.Parts, part)
|
|
}
|
|
|
|
case schemas.ResponsesMessageTypeFunctionCallOutput:
|
|
// Convert function response - collect for grouping
|
|
// According to Gemini parallel function calling docs, multiple function responses
|
|
// must be sent in a single message with only functionResponse parts (no text/content parts)
|
|
if msg.ResponsesToolMessage.CallID != nil {
|
|
responseMap := make(map[string]any)
|
|
|
|
// Extract output from ResponsesToolMessage.Output
|
|
if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesToolCallOutputStr != nil {
|
|
output := *msg.ResponsesToolMessage.Output.ResponsesToolCallOutputStr
|
|
if json.Valid([]byte(output)) {
|
|
responseMap["output"] = json.RawMessage(output)
|
|
} else {
|
|
responseMap["output"] = output
|
|
}
|
|
} else if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil {
|
|
// Handle structured output blocks (e.g. from Anthropic Responses API format
|
|
// where output is an array of content blocks like [{"type":"input_text","text":"..."}])
|
|
var textParts []string
|
|
for _, block := range msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks {
|
|
if block.Text != nil && *block.Text != "" {
|
|
textParts = append(textParts, *block.Text)
|
|
}
|
|
}
|
|
if len(textParts) > 0 {
|
|
combined := strings.Join(textParts, "\n")
|
|
if json.Valid([]byte(combined)) {
|
|
responseMap["output"] = json.RawMessage(combined)
|
|
} else {
|
|
responseMap["output"] = combined
|
|
}
|
|
} else {
|
|
// Fallback for non-text blocks (e.g. images, files): marshal the raw blocks
|
|
// so responseMap["output"] is never left empty when blocks are present
|
|
rawBlocks, err := providerUtils.MarshalSorted(msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks)
|
|
if err == nil && len(rawBlocks) > 0 {
|
|
responseMap["output"] = json.RawMessage(rawBlocks)
|
|
}
|
|
}
|
|
} else if msg.Content != nil && msg.Content.ContentStr != nil {
|
|
// Fallback to Content.ContentStr for backward compatibility
|
|
output := *msg.Content.ContentStr
|
|
if json.Valid([]byte(output)) {
|
|
responseMap["output"] = json.RawMessage(output)
|
|
} else {
|
|
responseMap["output"] = output
|
|
}
|
|
}
|
|
|
|
// Prefer the declared tool name; fallback to callIDToName lookup, then raw CallID
|
|
funcName := ""
|
|
if msg.ResponsesToolMessage.Name != nil && strings.TrimSpace(*msg.ResponsesToolMessage.Name) != "" {
|
|
funcName = *msg.ResponsesToolMessage.Name
|
|
} else if name, ok := callIDToName[*msg.ResponsesToolMessage.CallID]; ok && strings.TrimSpace(name) != "" {
|
|
funcName = name
|
|
} else {
|
|
funcName = *msg.ResponsesToolMessage.CallID
|
|
}
|
|
|
|
responseBytes, _ := providerUtils.MarshalSorted(responseMap)
|
|
part := &Part{
|
|
FunctionResponse: &FunctionResponse{
|
|
Name: funcName,
|
|
Response: json.RawMessage(responseBytes),
|
|
ID: *msg.ResponsesToolMessage.CallID,
|
|
},
|
|
}
|
|
pendingFunctionResponseParts = append(pendingFunctionResponseParts, part)
|
|
|
|
// If this is the last message, flush pending responses
|
|
if i == len(messages)-1 && len(pendingFunctionResponseParts) > 0 {
|
|
contents = append(contents, Content{
|
|
Parts: pendingFunctionResponseParts,
|
|
Role: "model",
|
|
})
|
|
pendingFunctionResponseParts = nil
|
|
}
|
|
|
|
continue // Skip normal content handling
|
|
}
|
|
}
|
|
}
|
|
|
|
// For non-function-output messages, convert message content normally
|
|
if !isFunctionOutput {
|
|
// Convert message content
|
|
if msg.Content != nil {
|
|
if msg.Content.ContentStr != nil {
|
|
content.Parts = append(content.Parts, &Part{
|
|
Text: *msg.Content.ContentStr,
|
|
})
|
|
}
|
|
|
|
if msg.Content.ContentBlocks != nil {
|
|
for _, block := range msg.Content.ContentBlocks {
|
|
part, err := convertContentBlockToGeminiPart(block)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("failed to convert message content block: %w", err)
|
|
}
|
|
if part != nil {
|
|
content.Parts = append(content.Parts, part)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(content.Parts) > 0 {
|
|
contents = append(contents, content)
|
|
}
|
|
}
|
|
|
|
return contents, systemInstruction, nil
|
|
}
|
|
|
|
// convertContentBlockToGeminiPart converts a content block to Gemini part
|
|
func convertContentBlockToGeminiPart(block schemas.ResponsesMessageContentBlock) (*Part, error) {
|
|
switch block.Type {
|
|
case schemas.ResponsesInputMessageContentBlockTypeText,
|
|
schemas.ResponsesOutputMessageContentTypeText:
|
|
if block.Text != nil && *block.Text != "" {
|
|
part := &Part{
|
|
Text: *block.Text,
|
|
}
|
|
if block.Signature != nil {
|
|
decodedSig, err := base64.StdEncoding.DecodeString(*block.Signature)
|
|
if err == nil {
|
|
part.ThoughtSignature = decodedSig
|
|
}
|
|
}
|
|
return part, nil
|
|
}
|
|
|
|
case schemas.ResponsesOutputMessageContentTypeReasoning:
|
|
if block.Text != nil && *block.Text != "" {
|
|
return &Part{
|
|
Text: *block.Text,
|
|
Thought: true,
|
|
}, nil
|
|
}
|
|
|
|
case schemas.ResponsesOutputMessageContentTypeRefusal:
|
|
// Refusals are treated as regular text in Gemini
|
|
if block.ResponsesOutputMessageContentRefusal != nil {
|
|
return &Part{
|
|
Text: block.ResponsesOutputMessageContentRefusal.Refusal,
|
|
}, nil
|
|
}
|
|
|
|
case schemas.ResponsesOutputMessageContentTypeCompaction:
|
|
// Convert compaction to text block for Gemini (compaction is Anthropic-specific)
|
|
if block.ResponsesOutputMessageContentCompaction != nil {
|
|
if summary := strings.TrimSpace(block.ResponsesOutputMessageContentCompaction.Summary); summary != "" {
|
|
return &Part{Text: summary}, nil
|
|
}
|
|
}
|
|
|
|
case schemas.ResponsesInputMessageContentBlockTypeImage:
|
|
if block.ResponsesInputMessageContentBlockImage != nil && block.ResponsesInputMessageContentBlockImage.ImageURL != nil {
|
|
imageURL := *block.ResponsesInputMessageContentBlockImage.ImageURL
|
|
|
|
// Use existing utility functions to handle URL parsing
|
|
sanitizedURL, err := schemas.SanitizeImageURL(imageURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to sanitize image URL: %w", err)
|
|
}
|
|
|
|
urlInfo := schemas.ExtractURLTypeInfo(sanitizedURL)
|
|
mimeType := "image/jpeg" // default
|
|
if urlInfo.MediaType != nil {
|
|
mimeType = *urlInfo.MediaType
|
|
}
|
|
|
|
if urlInfo.Type == schemas.ImageContentTypeBase64 {
|
|
data := ""
|
|
if urlInfo.DataURLWithoutPrefix != nil {
|
|
data = *urlInfo.DataURLWithoutPrefix
|
|
}
|
|
|
|
// Decode base64 data (handles both standard and URL-safe base64)
|
|
decodedData, err := decodeBase64StringToBytes(data)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to decode base64 image data: %w", err)
|
|
}
|
|
|
|
return &Part{
|
|
InlineData: &Blob{
|
|
MIMEType: mimeType,
|
|
Data: encodeBytesToBase64String(decodedData),
|
|
},
|
|
}, nil
|
|
} else {
|
|
return &Part{
|
|
FileData: &FileData{
|
|
MIMEType: mimeType,
|
|
FileURI: sanitizedURL,
|
|
},
|
|
}, nil
|
|
}
|
|
}
|
|
|
|
case schemas.ResponsesInputMessageContentBlockTypeAudio:
|
|
if block.Audio != nil {
|
|
// Decode base64 audio data (handles both standard and URL-safe base64)
|
|
decodedData, err := decodeBase64StringToBytes(block.Audio.Data)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to decode base64 audio data: %w", err)
|
|
}
|
|
|
|
return &Part{
|
|
InlineData: &Blob{
|
|
MIMEType: func() string {
|
|
f := strings.ToLower(strings.TrimSpace(block.Audio.Format))
|
|
if f == "" {
|
|
return "audio/mpeg"
|
|
}
|
|
if strings.HasPrefix(f, "audio/") {
|
|
return f
|
|
}
|
|
return "audio/" + f
|
|
}(),
|
|
Data: encodeBytesToBase64String(decodedData),
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
case schemas.ResponsesInputMessageContentBlockTypeFile:
|
|
if block.ResponsesInputMessageContentBlockFile != nil {
|
|
fileBlock := block.ResponsesInputMessageContentBlockFile
|
|
|
|
// Handle FileURL (URI-based file)
|
|
if fileBlock.FileURL != nil {
|
|
mimeType := "application/pdf"
|
|
if fileBlock.FileType != nil {
|
|
mimeType = *fileBlock.FileType
|
|
}
|
|
|
|
part := &Part{
|
|
FileData: &FileData{
|
|
MIMEType: mimeType,
|
|
FileURI: *fileBlock.FileURL,
|
|
},
|
|
}
|
|
|
|
return part, nil
|
|
}
|
|
|
|
// Handle FileData (inline file data)
|
|
if fileBlock.FileData != nil {
|
|
mimeType := "application/pdf"
|
|
if fileBlock.FileType != nil {
|
|
mimeType = *fileBlock.FileType
|
|
}
|
|
|
|
// Convert file data to bytes using the helper function
|
|
dataBytes, extractedMimeType := convertFileDataToBytes(*fileBlock.FileData)
|
|
if extractedMimeType != "" {
|
|
mimeType = extractedMimeType
|
|
}
|
|
|
|
if len(dataBytes) > 0 {
|
|
part := &Part{
|
|
InlineData: &Blob{
|
|
MIMEType: mimeType,
|
|
Data: encodeBytesToBase64String(dataBytes),
|
|
},
|
|
}
|
|
|
|
return part, nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil, nil
|
|
}
|
|
|
|
// buildGroundingMetadataFromWebSearch converts a Bifrost web_search_call message to Gemini GroundingMetadata
|
|
func buildGroundingMetadataFromWebSearch(webSearchCall *schemas.ResponsesMessage, annotations []schemas.ResponsesOutputMessageContentTextAnnotation, renderedContent *string) *GroundingMetadata {
|
|
if webSearchCall == nil || webSearchCall.ResponsesToolMessage == nil || webSearchCall.ResponsesToolMessage.Action == nil {
|
|
return nil
|
|
}
|
|
|
|
action := webSearchCall.ResponsesToolMessage.Action.ResponsesWebSearchToolCallAction
|
|
if action == nil {
|
|
return nil
|
|
}
|
|
|
|
groundingMetadata := &GroundingMetadata{}
|
|
|
|
// Add SearchEntryPoint with rendered content if provided
|
|
if renderedContent != nil && *renderedContent != "" {
|
|
groundingMetadata.SearchEntryPoint = &SearchEntryPoint{
|
|
RenderedContent: *renderedContent,
|
|
}
|
|
}
|
|
|
|
// Extract web search queries
|
|
if len(action.Queries) > 0 {
|
|
groundingMetadata.WebSearchQueries = action.Queries
|
|
} else if action.Query != nil {
|
|
groundingMetadata.WebSearchQueries = []string{*action.Query}
|
|
}
|
|
|
|
// Extract grounding chunks from sources
|
|
var groundingChunks []*GroundingChunk
|
|
urlToIndexMap := make(map[string]int32) // Map URL to chunk index for annotation processing
|
|
|
|
for _, source := range action.Sources {
|
|
if source.URL == "" {
|
|
continue
|
|
}
|
|
|
|
title := source.URL // Use URL as fallback
|
|
if source.Title != nil && *source.Title != "" {
|
|
title = *source.Title
|
|
}
|
|
|
|
chunk := &GroundingChunk{
|
|
Web: &GroundingChunkWeb{
|
|
URI: source.URL,
|
|
Title: title,
|
|
},
|
|
}
|
|
groundingChunks = append(groundingChunks, chunk)
|
|
urlToIndexMap[source.URL] = int32(len(groundingChunks) - 1)
|
|
}
|
|
|
|
if len(groundingChunks) > 0 {
|
|
groundingMetadata.GroundingChunks = groundingChunks
|
|
}
|
|
|
|
// Convert annotations to grounding supports
|
|
var groundingSupports []*GroundingSupport
|
|
for _, annotation := range annotations {
|
|
if annotation.Type != "url_citation" {
|
|
continue
|
|
}
|
|
|
|
support := &GroundingSupport{
|
|
Segment: &Segment{},
|
|
}
|
|
|
|
// Set segment text
|
|
if annotation.Text != nil {
|
|
support.Segment.Text = *annotation.Text
|
|
}
|
|
|
|
// Set segment indices
|
|
if annotation.StartIndex != nil {
|
|
support.Segment.StartIndex = int32(*annotation.StartIndex)
|
|
}
|
|
if annotation.EndIndex != nil {
|
|
support.Segment.EndIndex = int32(*annotation.EndIndex)
|
|
}
|
|
|
|
// Map annotation URL to chunk indices
|
|
if annotation.URL != nil {
|
|
if chunkIdx, exists := urlToIndexMap[*annotation.URL]; exists {
|
|
support.GroundingChunkIndices = []int32{chunkIdx}
|
|
}
|
|
}
|
|
|
|
// Only add support if we have valid segment or chunk indices
|
|
if support.Segment.Text != "" || len(support.GroundingChunkIndices) > 0 {
|
|
groundingSupports = append(groundingSupports, support)
|
|
}
|
|
}
|
|
|
|
if len(groundingSupports) > 0 {
|
|
groundingMetadata.GroundingSupports = groundingSupports
|
|
}
|
|
|
|
// Return nil if no meaningful data was extracted
|
|
if len(groundingMetadata.WebSearchQueries) == 0 && len(groundingMetadata.GroundingChunks) == 0 {
|
|
return nil
|
|
}
|
|
|
|
return groundingMetadata
|
|
}
|
|
|
|
// emitWebSearchFromGroundingMetadata converts grounding metadata to web search event stream
|
|
func emitWebSearchFromGroundingMetadata(
|
|
metadata *GroundingMetadata,
|
|
state *GeminiResponsesStreamState,
|
|
sequenceNumber int,
|
|
) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
if metadata == nil || len(metadata.WebSearchQueries) == 0 {
|
|
return responses
|
|
}
|
|
|
|
outputIndex := state.nextOutputIndex()
|
|
itemID := state.generateItemID("ws", outputIndex)
|
|
state.ItemIDs[outputIndex] = itemID
|
|
|
|
// Build web search action
|
|
action := &schemas.ResponsesWebSearchToolCallAction{
|
|
Type: "search",
|
|
Queries: metadata.WebSearchQueries,
|
|
}
|
|
if len(metadata.WebSearchQueries) > 0 {
|
|
action.Query = &metadata.WebSearchQueries[0]
|
|
}
|
|
|
|
// Convert groundingChunks to sources
|
|
var sources []schemas.ResponsesWebSearchToolCallActionSearchSource
|
|
for _, chunk := range metadata.GroundingChunks {
|
|
if chunk.Web != nil && chunk.Web.URI != "" {
|
|
source := schemas.ResponsesWebSearchToolCallActionSearchSource{
|
|
Type: "url",
|
|
URL: chunk.Web.URI,
|
|
}
|
|
if chunk.Web.Title != "" {
|
|
source.Title = &chunk.Web.Title
|
|
} else {
|
|
source.Title = &chunk.Web.URI // Fallback to URI
|
|
}
|
|
sources = append(sources, source)
|
|
}
|
|
}
|
|
action.Sources = sources
|
|
|
|
// 1. output_item.added (web_search_call, in_progress)
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeWebSearchCall),
|
|
Status: schemas.Ptr("in_progress"),
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
Action: &schemas.ResponsesToolMessageActionStruct{
|
|
ResponsesWebSearchToolCallAction: &schemas.ResponsesWebSearchToolCallAction{
|
|
Type: "search",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
})
|
|
|
|
// 2. web_search_call.in_progress
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeWebSearchCallInProgress,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
})
|
|
|
|
// 3. web_search_call.searching
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeWebSearchCallSearching,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
})
|
|
|
|
// 4. web_search_call.completed
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeWebSearchCallCompleted,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
})
|
|
|
|
// 5. output_item.done (with full action including sources)
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &outputIndex,
|
|
ItemID: &itemID,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &itemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeWebSearchCall),
|
|
Status: schemas.Ptr("completed"),
|
|
ResponsesToolMessage: &schemas.ResponsesToolMessage{
|
|
Action: &schemas.ResponsesToolMessageActionStruct{
|
|
ResponsesWebSearchToolCallAction: action,
|
|
},
|
|
},
|
|
},
|
|
})
|
|
|
|
state.HasEmittedWebSearch = true
|
|
|
|
// Emit rendered content if present
|
|
if metadata.SearchEntryPoint != nil && metadata.SearchEntryPoint.RenderedContent != "" {
|
|
renderedIndex := state.nextOutputIndex()
|
|
renderedItemID := state.generateItemID("rc", renderedIndex)
|
|
state.ItemIDs[renderedIndex] = renderedItemID
|
|
|
|
// output_item.added with rendered_content
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &renderedIndex,
|
|
Item: &schemas.ResponsesMessage{
|
|
ID: &renderedItemID,
|
|
Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
|
|
Status: schemas.Ptr("completed"),
|
|
Content: &schemas.ResponsesMessageContent{
|
|
ContentBlocks: []schemas.ResponsesMessageContentBlock{
|
|
{
|
|
Type: schemas.ResponsesOutputMessageContentTypeRenderedContent,
|
|
ResponsesOutputMessageContentRenderedContent: &schemas.ResponsesOutputMessageContentRenderedContent{
|
|
RenderedContent: metadata.SearchEntryPoint.RenderedContent,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
})
|
|
|
|
// output_item.done for rendered content
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputItemDone,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &renderedIndex,
|
|
ItemID: &renderedItemID,
|
|
})
|
|
}
|
|
|
|
return responses
|
|
}
|
|
|
|
// emitAnnotationsFromGroundingSupports converts grounding supports to annotation events
|
|
func emitAnnotationsFromGroundingSupports(
|
|
metadata *GroundingMetadata,
|
|
state *GeminiResponsesStreamState,
|
|
sequenceNumber int,
|
|
) []*schemas.BifrostResponsesStreamResponse {
|
|
var responses []*schemas.BifrostResponsesStreamResponse
|
|
|
|
if metadata == nil || len(metadata.GroundingSupports) == 0 || state.TextOutputIndex < 0 {
|
|
return responses
|
|
}
|
|
|
|
itemID := state.ItemIDs[state.TextOutputIndex]
|
|
|
|
emmitedIndex := 0
|
|
// Convert each grounding support to an annotation event
|
|
for _, support := range metadata.GroundingSupports {
|
|
if support.Segment == nil {
|
|
continue
|
|
}
|
|
|
|
annotation := schemas.ResponsesOutputMessageContentTextAnnotation{
|
|
Type: "url_citation",
|
|
}
|
|
|
|
// Set text and indices
|
|
if support.Segment.Text != "" {
|
|
annotation.Text = &support.Segment.Text
|
|
}
|
|
annotation.StartIndex = schemas.Ptr(int(support.Segment.StartIndex))
|
|
annotation.EndIndex = schemas.Ptr(int(support.Segment.EndIndex))
|
|
|
|
// Find URL and title from chunk indices
|
|
if len(support.GroundingChunkIndices) > 0 {
|
|
chunkIdx := support.GroundingChunkIndices[0]
|
|
if int(chunkIdx) < len(metadata.GroundingChunks) {
|
|
chunk := metadata.GroundingChunks[chunkIdx]
|
|
if chunk.Web != nil {
|
|
annotation.URL = &chunk.Web.URI
|
|
if chunk.Web.Title != "" {
|
|
annotation.Title = &chunk.Web.Title
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if annotation.URL == nil {
|
|
continue
|
|
}
|
|
|
|
// Emit annotation.added event
|
|
responses = append(responses, &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeOutputTextAnnotationAdded,
|
|
SequenceNumber: sequenceNumber + len(responses),
|
|
OutputIndex: &state.TextOutputIndex,
|
|
ItemID: &itemID,
|
|
ContentIndex: schemas.Ptr(0),
|
|
Annotation: &annotation,
|
|
AnnotationIndex: &emmitedIndex,
|
|
})
|
|
emmitedIndex++
|
|
}
|
|
|
|
return responses
|
|
}
|
|
|
|
// generateSyntheticFunctionCallArgumentDeltas creates synthetic FunctionCallArgumentsDelta events
|
|
// from complete JSON arguments to simulate streaming behavior for providers that don't natively stream
|
|
func generateSyntheticFunctionCallArgumentDeltas(argumentsJSON string, outputIndex *int, itemID *string, baseSequenceNumber int) []*schemas.BifrostResponsesStreamResponse {
|
|
var events []*schemas.BifrostResponsesStreamResponse
|
|
|
|
// Chunk size for synthetic streaming (matching realistic streaming patterns)
|
|
chunkSize := 8 // Small chunks to simulate realistic streaming
|
|
|
|
// Break the JSON into chunks
|
|
runes := []rune(argumentsJSON)
|
|
for i := 0; i < len(runes); i += chunkSize {
|
|
end := min(i+chunkSize, len(runes))
|
|
|
|
chunk := string(runes[i:end])
|
|
deltaEvent := &schemas.BifrostResponsesStreamResponse{
|
|
Type: schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDelta,
|
|
SequenceNumber: baseSequenceNumber + len(events),
|
|
OutputIndex: outputIndex,
|
|
ItemID: itemID,
|
|
Delta: &chunk,
|
|
}
|
|
events = append(events, deltaEvent)
|
|
}
|
|
|
|
return events
|
|
}
|