234 lines
6.5 KiB
Go
234 lines
6.5 KiB
Go
package gemini
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/maximhq/bifrost/core/providers/utils"
|
|
"github.com/maximhq/bifrost/core/schemas"
|
|
)
|
|
|
|
// ToBifrostTranscriptionRequest converts a GeminiGenerationRequest to a BifrostTranscriptionRequest
|
|
func (request *GeminiGenerationRequest) ToBifrostTranscriptionRequest(ctx *schemas.BifrostContext) (*schemas.BifrostTranscriptionRequest, error) {
|
|
provider, model := schemas.ParseModelString(request.Model, utils.CheckAndSetDefaultProvider(ctx, schemas.Gemini))
|
|
|
|
bifrostReq := &schemas.BifrostTranscriptionRequest{
|
|
Provider: provider,
|
|
Model: model,
|
|
}
|
|
|
|
// Extract audio data and prompt from contents
|
|
var promptText string
|
|
var audioData []byte
|
|
var audioMimeType string
|
|
|
|
for _, content := range request.Contents {
|
|
for _, part := range content.Parts {
|
|
// Extract text prompt
|
|
if part.Text != "" {
|
|
if promptText != "" {
|
|
promptText += " "
|
|
}
|
|
promptText += part.Text
|
|
}
|
|
|
|
// Extract audio data from inline data
|
|
if part.InlineData != nil && strings.HasPrefix(strings.ToLower(part.InlineData.MIMEType), "audio/") {
|
|
decodedData, err := decodeBase64StringToBytes(part.InlineData.Data)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to decode base64 audio data: %v", err)
|
|
}
|
|
audioData = append(audioData, decodedData...)
|
|
if audioMimeType == "" {
|
|
audioMimeType = part.InlineData.MIMEType
|
|
}
|
|
}
|
|
|
|
// Extract audio data from file data (would need to be fetched separately in real scenario)
|
|
// For now, we just note the file URI in extra params
|
|
if part.FileData != nil && strings.HasPrefix(strings.ToLower(part.FileData.MIMEType), "audio/") {
|
|
if bifrostReq.Params == nil {
|
|
bifrostReq.Params = &schemas.TranscriptionParameters{}
|
|
}
|
|
if bifrostReq.Params.ExtraParams == nil {
|
|
bifrostReq.Params.ExtraParams = make(map[string]interface{})
|
|
}
|
|
bifrostReq.Params.ExtraParams["file_uri"] = part.FileData.FileURI
|
|
if audioMimeType == "" {
|
|
audioMimeType = part.FileData.MIMEType
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Set the audio input
|
|
bifrostReq.Input = &schemas.TranscriptionInput{
|
|
File: audioData,
|
|
}
|
|
|
|
// Set parameters
|
|
if bifrostReq.Params == nil {
|
|
bifrostReq.Params = &schemas.TranscriptionParameters{}
|
|
}
|
|
|
|
// Set prompt if provided
|
|
if promptText != "" {
|
|
bifrostReq.Params.Prompt = &promptText
|
|
}
|
|
|
|
// Handle safety settings from request
|
|
if len(request.SafetySettings) > 0 {
|
|
if bifrostReq.Params.ExtraParams == nil {
|
|
bifrostReq.Params.ExtraParams = make(map[string]interface{})
|
|
}
|
|
bifrostReq.Params.ExtraParams["safety_settings"] = request.SafetySettings
|
|
}
|
|
|
|
// Handle cached content
|
|
if request.CachedContent != "" {
|
|
if bifrostReq.Params.ExtraParams == nil {
|
|
bifrostReq.Params.ExtraParams = make(map[string]interface{})
|
|
}
|
|
bifrostReq.Params.ExtraParams["cached_content"] = request.CachedContent
|
|
}
|
|
|
|
// Handle labels
|
|
if len(request.Labels) > 0 {
|
|
if bifrostReq.Params.ExtraParams == nil {
|
|
bifrostReq.Params.ExtraParams = make(map[string]interface{})
|
|
}
|
|
bifrostReq.Params.ExtraParams["labels"] = request.Labels
|
|
}
|
|
|
|
return bifrostReq, nil
|
|
}
|
|
|
|
func ToGeminiTranscriptionRequest(bifrostReq *schemas.BifrostTranscriptionRequest) *GeminiGenerationRequest {
|
|
if bifrostReq == nil {
|
|
return nil
|
|
}
|
|
|
|
// Create the base Gemini generation request
|
|
geminiReq := &GeminiGenerationRequest{
|
|
Model: bifrostReq.Model,
|
|
}
|
|
|
|
// Convert parameters to generation config
|
|
if bifrostReq.Params != nil {
|
|
geminiReq.ExtraParams = bifrostReq.Params.ExtraParams
|
|
// Handle extra parameters
|
|
if bifrostReq.Params.ExtraParams != nil {
|
|
// Safety settings
|
|
if safetySettings, ok := schemas.SafeExtractFromMap(bifrostReq.Params.ExtraParams, "safety_settings"); ok {
|
|
delete(geminiReq.ExtraParams, "safety_settings")
|
|
if settings, ok := SafeExtractSafetySettings(safetySettings); ok {
|
|
geminiReq.SafetySettings = settings
|
|
}
|
|
}
|
|
|
|
// Cached content
|
|
if cachedContent, ok := schemas.SafeExtractString(bifrostReq.Params.ExtraParams["cached_content"]); ok {
|
|
delete(geminiReq.ExtraParams, "cached_content")
|
|
geminiReq.CachedContent = cachedContent
|
|
}
|
|
|
|
// Labels
|
|
if labels, ok := schemas.SafeExtractFromMap(bifrostReq.Params.ExtraParams, "labels"); ok {
|
|
if labelMap, ok := schemas.SafeExtractStringMap(labels); ok {
|
|
delete(geminiReq.ExtraParams, "labels")
|
|
geminiReq.Labels = labelMap
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Determine the prompt text
|
|
var prompt string
|
|
if bifrostReq.Params != nil && bifrostReq.Params.Prompt != nil {
|
|
prompt = *bifrostReq.Params.Prompt
|
|
} else {
|
|
prompt = "Generate a transcript of the speech."
|
|
}
|
|
|
|
// Create parts for the transcription request
|
|
parts := []*Part{
|
|
{
|
|
Text: prompt,
|
|
},
|
|
}
|
|
|
|
// Add audio file if present
|
|
if len(bifrostReq.Input.File) > 0 {
|
|
parts = append(parts, &Part{
|
|
InlineData: &Blob{
|
|
MIMEType: utils.DetectAudioMimeType(bifrostReq.Input.File),
|
|
Data: encodeBytesToBase64String(bifrostReq.Input.File),
|
|
},
|
|
})
|
|
}
|
|
|
|
geminiReq.Contents = []Content{
|
|
{
|
|
Parts: parts,
|
|
},
|
|
}
|
|
|
|
return geminiReq
|
|
}
|
|
|
|
// ToBifrostTranscriptionResponse converts a GenerateContentResponse to a BifrostTranscriptionResponse
|
|
func (response *GenerateContentResponse) ToBifrostTranscriptionResponse() *schemas.BifrostTranscriptionResponse {
|
|
bifrostResp := &schemas.BifrostTranscriptionResponse{}
|
|
|
|
// Process candidates to extract text content
|
|
if len(response.Candidates) > 0 {
|
|
candidate := response.Candidates[0]
|
|
if candidate.Content != nil && len(candidate.Content.Parts) > 0 {
|
|
var textContent string
|
|
|
|
// Extract text content from all parts
|
|
for _, part := range candidate.Content.Parts {
|
|
if part.Text != "" {
|
|
textContent += part.Text
|
|
}
|
|
}
|
|
|
|
if textContent != "" {
|
|
bifrostResp.Text = textContent
|
|
bifrostResp.Task = schemas.Ptr("transcribe")
|
|
|
|
// Set usage information with modality details
|
|
bifrostResp.Usage = convertGeminiUsageMetadataToTranscriptionUsage(response.UsageMetadata)
|
|
}
|
|
}
|
|
}
|
|
|
|
return bifrostResp
|
|
}
|
|
|
|
// ToGeminiTranscriptionResponse converts a BifrostTranscriptionResponse to Gemini's GenerateContentResponse
|
|
func ToGeminiTranscriptionResponse(bifrostResp *schemas.BifrostTranscriptionResponse) *GenerateContentResponse {
|
|
if bifrostResp == nil {
|
|
return nil
|
|
}
|
|
|
|
genaiResp := &GenerateContentResponse{}
|
|
|
|
candidate := &Candidate{
|
|
Content: &Content{
|
|
Parts: []*Part{
|
|
{
|
|
Text: bifrostResp.Text,
|
|
},
|
|
},
|
|
Role: string(RoleModel),
|
|
},
|
|
}
|
|
|
|
// Set usage metadata from transcription usage with modality details
|
|
genaiResp.UsageMetadata = convertBifrostTranscriptionUsageToGeminiUsageMetadata(bifrostResp.Usage)
|
|
|
|
genaiResp.Candidates = []*Candidate{candidate}
|
|
return genaiResp
|
|
}
|