first commit
This commit is contained in:
933
core/providers/elevenlabs/elevenlabs.go
Normal file
933
core/providers/elevenlabs/elevenlabs.go
Normal file
@@ -0,0 +1,933 @@
|
||||
package elevenlabs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/bytedance/sonic"
|
||||
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
|
||||
schemas "github.com/maximhq/bifrost/core/schemas"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
|
||||
type ElevenlabsProvider struct {
|
||||
logger schemas.Logger // Logger for provider operations
|
||||
client *fasthttp.Client // HTTP client for unary API requests (ReadTimeout bounds overall response)
|
||||
streamingClient *fasthttp.Client // HTTP client for streaming API requests (no ReadTimeout; idle governed by NewIdleTimeoutReader)
|
||||
networkConfig schemas.NetworkConfig // Network configuration including extra headers
|
||||
sendBackRawRequest bool // Whether to include raw request in BifrostResponse
|
||||
sendBackRawResponse bool // Whether to include raw response in BifrostResponse
|
||||
customProviderConfig *schemas.CustomProviderConfig // Custom provider config
|
||||
}
|
||||
|
||||
// NewElevenlabsProvider creates a new Elevenlabs provider instance.
|
||||
// It initializes the HTTP client with the provided configuration.
|
||||
// The client is configured with timeouts, concurrency limits, and optional proxy settings.
|
||||
func NewElevenlabsProvider(config *schemas.ProviderConfig, logger schemas.Logger) *ElevenlabsProvider {
|
||||
config.CheckAndSetDefaults()
|
||||
|
||||
requestTimeout := time.Second * time.Duration(config.NetworkConfig.DefaultRequestTimeoutInSeconds)
|
||||
client := &fasthttp.Client{
|
||||
ReadTimeout: requestTimeout,
|
||||
WriteTimeout: requestTimeout,
|
||||
MaxConnsPerHost: config.NetworkConfig.MaxConnsPerHost,
|
||||
MaxIdleConnDuration: 30 * time.Second,
|
||||
MaxConnWaitTimeout: requestTimeout,
|
||||
MaxConnDuration: time.Second * time.Duration(schemas.DefaultMaxConnDurationInSeconds),
|
||||
ConnPoolStrategy: fasthttp.FIFO,
|
||||
}
|
||||
|
||||
// Configure proxy and retry policy
|
||||
client = providerUtils.ConfigureProxy(client, config.ProxyConfig, logger)
|
||||
client = providerUtils.ConfigureDialer(client)
|
||||
client = providerUtils.ConfigureTLS(client, config.NetworkConfig, logger)
|
||||
streamingClient := providerUtils.BuildStreamingClient(client)
|
||||
// Set default BaseURL if not provided
|
||||
if config.NetworkConfig.BaseURL == "" {
|
||||
config.NetworkConfig.BaseURL = "https://api.elevenlabs.io"
|
||||
}
|
||||
config.NetworkConfig.BaseURL = strings.TrimRight(config.NetworkConfig.BaseURL, "/")
|
||||
|
||||
return &ElevenlabsProvider{
|
||||
logger: logger,
|
||||
client: client,
|
||||
streamingClient: streamingClient,
|
||||
networkConfig: config.NetworkConfig,
|
||||
customProviderConfig: config.CustomProviderConfig,
|
||||
sendBackRawRequest: config.SendBackRawRequest,
|
||||
sendBackRawResponse: config.SendBackRawResponse,
|
||||
}
|
||||
}
|
||||
|
||||
// GetProviderKey returns the provider identifier for Elevenlabs.
|
||||
func (provider *ElevenlabsProvider) GetProviderKey() schemas.ModelProvider {
|
||||
return providerUtils.GetProviderName(schemas.Elevenlabs, provider.customProviderConfig)
|
||||
}
|
||||
|
||||
// listModelsByKey performs a list models request for a single key.
|
||||
// Returns the response and latency, or an error if the request fails.
|
||||
func (provider *ElevenlabsProvider) listModelsByKey(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostListModelsRequest) (*schemas.BifrostListModelsResponse, *schemas.BifrostError) {
|
||||
// Create request
|
||||
req := fasthttp.AcquireRequest()
|
||||
resp := fasthttp.AcquireResponse()
|
||||
defer fasthttp.ReleaseRequest(req)
|
||||
defer fasthttp.ReleaseResponse(resp)
|
||||
|
||||
// Set any extra headers from network config
|
||||
providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
|
||||
|
||||
// Build URL using centralized URL construction
|
||||
req.SetRequestURI(provider.networkConfig.BaseURL + providerUtils.GetPathFromContext(ctx, "/v1/models"))
|
||||
req.Header.SetMethod(http.MethodGet)
|
||||
req.Header.SetContentType("application/json")
|
||||
|
||||
if key.Value.GetValue() != "" {
|
||||
req.Header.Set("xi-api-key", key.Value.GetValue())
|
||||
}
|
||||
|
||||
// Make request
|
||||
latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
|
||||
defer wait()
|
||||
if bifrostErr != nil {
|
||||
return nil, bifrostErr
|
||||
}
|
||||
// Extract and set provider response headers so they're available on error paths
|
||||
ctx.SetValue(schemas.BifrostContextKeyProviderResponseHeaders, providerUtils.ExtractProviderResponseHeaders(resp))
|
||||
if resp.StatusCode() != fasthttp.StatusOK {
|
||||
return nil, parseElevenlabsError(resp)
|
||||
}
|
||||
|
||||
var elevenlabsResponse ElevenlabsListModelsResponse
|
||||
rawRequest, rawResponse, bifrostErr := providerUtils.HandleProviderResponse(resp.Body(), &elevenlabsResponse, nil, providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse))
|
||||
if bifrostErr != nil {
|
||||
return nil, bifrostErr
|
||||
}
|
||||
|
||||
response := elevenlabsResponse.ToBifrostListModelsResponse(provider.GetProviderKey(), key.Models, key.BlacklistedModels, key.Aliases, request.Unfiltered)
|
||||
|
||||
response.ExtraFields.Latency = latency.Milliseconds()
|
||||
response.ExtraFields.ProviderResponseHeaders = providerUtils.ExtractProviderResponseHeaders(resp)
|
||||
|
||||
// Set raw request if enabled
|
||||
if providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest) {
|
||||
response.ExtraFields.RawRequest = rawRequest
|
||||
}
|
||||
|
||||
// Set raw response if enabled
|
||||
if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
|
||||
response.ExtraFields.RawResponse = rawResponse
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// ListModels performs a list models request to Elevenlabs' API.
|
||||
// Requests are made concurrently for improved performance.
|
||||
func (provider *ElevenlabsProvider) ListModels(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostListModelsRequest) (*schemas.BifrostListModelsResponse, *schemas.BifrostError) {
|
||||
if err := providerUtils.CheckOperationAllowed(schemas.Elevenlabs, provider.customProviderConfig, schemas.ListModelsRequest); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return providerUtils.HandleMultipleListModelsRequests(
|
||||
ctx,
|
||||
keys,
|
||||
request,
|
||||
provider.listModelsByKey,
|
||||
)
|
||||
}
|
||||
|
||||
// TextCompletion is not supported by the Elevenlabs provider
|
||||
func (provider *ElevenlabsProvider) TextCompletion(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostTextCompletionRequest) (*schemas.BifrostTextCompletionResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.TextCompletionRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// TextCompletionStream is not supported by the Elevenlabs provider
|
||||
func (provider *ElevenlabsProvider) TextCompletionStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostTextCompletionRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.TextCompletionStreamRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ChatCompletion is not supported by the Elevenlabs provider
|
||||
func (provider *ElevenlabsProvider) ChatCompletion(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostChatRequest) (*schemas.BifrostChatResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ChatCompletionRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ChatCompletionStream is not supported by the Elevenlabs provider
|
||||
func (provider *ElevenlabsProvider) ChatCompletionStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostChatRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ChatCompletionStreamRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// Responses is not supported by the Elevenlabs provider
|
||||
func (provider *ElevenlabsProvider) Responses(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostResponsesRequest) (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ResponsesRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ResponsesStream is not supported by the Elevenlabs provider
|
||||
func (provider *ElevenlabsProvider) ResponsesStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostResponsesRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ResponsesStreamRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// Embedding is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) Embedding(ctx *schemas.BifrostContext, key schemas.Key, input *schemas.BifrostEmbeddingRequest) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.EmbeddingRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// Speech performs a text to speech request
|
||||
func (provider *ElevenlabsProvider) Speech(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostSpeechRequest) (*schemas.BifrostSpeechResponse, *schemas.BifrostError) {
|
||||
if err := providerUtils.CheckOperationAllowed(schemas.Elevenlabs, provider.customProviderConfig, schemas.SpeechRequest); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create request
|
||||
req := fasthttp.AcquireRequest()
|
||||
resp := fasthttp.AcquireResponse()
|
||||
defer fasthttp.ReleaseRequest(req)
|
||||
defer fasthttp.ReleaseResponse(resp)
|
||||
|
||||
// Set any extra headers from network config
|
||||
providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
|
||||
|
||||
withTimestampsRequest := request.Params != nil && request.Params.WithTimestamps != nil && *request.Params.WithTimestamps
|
||||
|
||||
var endpoint string
|
||||
if request.Params != nil && request.Params.VoiceConfig != nil && request.Params.VoiceConfig.Voice != nil {
|
||||
voice := *request.Params.VoiceConfig.Voice
|
||||
// Determine if timestamps are requested
|
||||
if withTimestampsRequest {
|
||||
endpoint = "/v1/text-to-speech/" + voice + "/with-timestamps"
|
||||
} else {
|
||||
endpoint = "/v1/text-to-speech/" + voice
|
||||
}
|
||||
} else {
|
||||
return nil, providerUtils.NewBifrostOperationError("voice parameter is required", nil)
|
||||
}
|
||||
|
||||
requestURL := provider.buildBaseSpeechRequestURL(ctx, endpoint, schemas.SpeechRequest, request)
|
||||
req.SetRequestURI(requestURL)
|
||||
|
||||
req.Header.SetMethod(http.MethodPost)
|
||||
req.Header.SetContentType("application/json")
|
||||
if key.Value.GetValue() != "" {
|
||||
req.Header.Set("xi-api-key", key.Value.GetValue())
|
||||
}
|
||||
|
||||
jsonData, bifrostErr := providerUtils.CheckContextAndGetRequestBody(
|
||||
ctx,
|
||||
request,
|
||||
func() (providerUtils.RequestBodyWithExtraParams, error) {
|
||||
return ToElevenlabsSpeechRequest(request), nil
|
||||
})
|
||||
|
||||
if bifrostErr != nil {
|
||||
return nil, bifrostErr
|
||||
}
|
||||
|
||||
if !providerUtils.ApplyLargePayloadRequestBody(ctx, req) {
|
||||
req.SetBody(jsonData)
|
||||
}
|
||||
|
||||
// Make request
|
||||
latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
|
||||
defer wait()
|
||||
if bifrostErr != nil {
|
||||
return nil, providerUtils.EnrichError(ctx, bifrostErr, jsonData, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
|
||||
}
|
||||
// Extract and set provider response headers so they're available on error paths
|
||||
ctx.SetValue(schemas.BifrostContextKeyProviderResponseHeaders, providerUtils.ExtractProviderResponseHeaders(resp))
|
||||
|
||||
// Handle error response
|
||||
if resp.StatusCode() != fasthttp.StatusOK {
|
||||
return nil, providerUtils.EnrichError(ctx, parseElevenlabsError(resp), jsonData, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
|
||||
}
|
||||
|
||||
// Get the response body
|
||||
body, err := providerUtils.CheckAndDecodeBody(resp)
|
||||
if err != nil {
|
||||
return nil, providerUtils.EnrichError(ctx, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, err), jsonData, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
|
||||
}
|
||||
|
||||
// Create response based on whether timestamps were requested
|
||||
bifrostResponse := &schemas.BifrostSpeechResponse{
|
||||
ExtraFields: schemas.BifrostResponseExtraFields{
|
||||
Latency: latency.Milliseconds(),
|
||||
ProviderResponseHeaders: providerUtils.ExtractProviderResponseHeaders(resp),
|
||||
},
|
||||
}
|
||||
|
||||
if providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest) {
|
||||
providerUtils.ParseAndSetRawRequest(&bifrostResponse.ExtraFields, jsonData)
|
||||
}
|
||||
|
||||
if withTimestampsRequest {
|
||||
var timestampResponse ElevenlabsSpeechWithTimestampsResponse
|
||||
if err := sonic.Unmarshal(body, ×tampResponse); err != nil {
|
||||
return nil, providerUtils.NewBifrostOperationError("failed to parse with-timestamps response", err)
|
||||
}
|
||||
|
||||
bifrostResponse.AudioBase64 = ×tampResponse.AudioBase64
|
||||
|
||||
if timestampResponse.Alignment != nil {
|
||||
bifrostResponse.Alignment = &schemas.SpeechAlignment{
|
||||
CharStartTimesMs: timestampResponse.Alignment.CharStartTimesMs,
|
||||
CharEndTimesMs: timestampResponse.Alignment.CharEndTimesMs,
|
||||
Characters: timestampResponse.Alignment.Characters,
|
||||
}
|
||||
}
|
||||
|
||||
if timestampResponse.NormalizedAlignment != nil {
|
||||
bifrostResponse.NormalizedAlignment = &schemas.SpeechAlignment{
|
||||
CharStartTimesMs: timestampResponse.NormalizedAlignment.CharStartTimesMs,
|
||||
CharEndTimesMs: timestampResponse.NormalizedAlignment.CharEndTimesMs,
|
||||
Characters: timestampResponse.NormalizedAlignment.Characters,
|
||||
}
|
||||
}
|
||||
|
||||
return bifrostResponse, nil
|
||||
}
|
||||
|
||||
bifrostResponse.Audio = body
|
||||
return bifrostResponse, nil
|
||||
}
|
||||
|
||||
// Rerank is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) Rerank(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostRerankRequest) (*schemas.BifrostRerankResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.RerankRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// OCR is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) OCR(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostOCRRequest) (*schemas.BifrostOCRResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.OCRRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// SpeechStream performs a text to speech stream request
|
||||
func (provider *ElevenlabsProvider) SpeechStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostSpeechRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
|
||||
if err := providerUtils.CheckOperationAllowed(schemas.Elevenlabs, provider.customProviderConfig, schemas.SpeechStreamRequest); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jsonBody, bifrostErr := providerUtils.CheckContextAndGetRequestBody(
|
||||
ctx,
|
||||
request,
|
||||
func() (providerUtils.RequestBodyWithExtraParams, error) {
|
||||
return ToElevenlabsSpeechRequest(request), nil
|
||||
})
|
||||
|
||||
if bifrostErr != nil {
|
||||
return nil, bifrostErr
|
||||
}
|
||||
|
||||
// Create HTTP request for streaming
|
||||
req := fasthttp.AcquireRequest()
|
||||
resp := fasthttp.AcquireResponse()
|
||||
resp.StreamBody = true
|
||||
defer fasthttp.ReleaseRequest(req)
|
||||
|
||||
// Set any extra headers from network config
|
||||
providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
|
||||
|
||||
if request.Params == nil || request.Params.VoiceConfig == nil || request.Params.VoiceConfig.Voice == nil {
|
||||
return nil, providerUtils.EnrichError(ctx, providerUtils.NewBifrostOperationError("voice parameter is required", nil), jsonBody, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
|
||||
}
|
||||
|
||||
req.SetRequestURI(provider.buildBaseSpeechRequestURL(ctx, "/v1/text-to-speech/"+*request.Params.VoiceConfig.Voice+"/stream", schemas.SpeechStreamRequest, request))
|
||||
|
||||
req.Header.SetMethod(http.MethodPost)
|
||||
req.Header.SetContentType("application/json")
|
||||
if key.Value.GetValue() != "" {
|
||||
req.Header.Set("xi-api-key", key.Value.GetValue())
|
||||
}
|
||||
|
||||
if !providerUtils.ApplyLargePayloadRequestBody(ctx, req) {
|
||||
req.SetBody(jsonBody)
|
||||
}
|
||||
|
||||
// Make request
|
||||
startTime := time.Now()
|
||||
err := provider.streamingClient.Do(req, resp)
|
||||
if err != nil {
|
||||
defer providerUtils.ReleaseStreamingResponse(resp)
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil, providerUtils.EnrichError(ctx, &schemas.BifrostError{
|
||||
IsBifrostError: false,
|
||||
Error: &schemas.ErrorField{
|
||||
Type: schemas.Ptr(schemas.RequestCancelled),
|
||||
Message: schemas.ErrRequestCancelled,
|
||||
Error: err,
|
||||
},
|
||||
}, jsonBody, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
|
||||
}
|
||||
if errors.Is(err, fasthttp.ErrTimeout) || errors.Is(err, context.DeadlineExceeded) {
|
||||
return nil, providerUtils.EnrichError(ctx, providerUtils.NewBifrostTimeoutError(schemas.ErrProviderRequestTimedOut, err), jsonBody, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
|
||||
}
|
||||
return nil, providerUtils.EnrichError(ctx, providerUtils.NewBifrostOperationError(schemas.ErrProviderDoRequest, err), jsonBody, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
|
||||
}
|
||||
|
||||
// Extract provider response headers before status check so error responses also forward them
|
||||
ctx.SetValue(schemas.BifrostContextKeyProviderResponseHeaders, providerUtils.ExtractProviderResponseHeaders(resp))
|
||||
|
||||
// Check for HTTP errors
|
||||
if resp.StatusCode() != fasthttp.StatusOK {
|
||||
defer providerUtils.ReleaseStreamingResponse(resp)
|
||||
return nil, providerUtils.EnrichError(ctx, parseElevenlabsError(resp), jsonBody, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
|
||||
}
|
||||
|
||||
// Create response channel
|
||||
responseChan := make(chan *schemas.BifrostStreamChunk, schemas.DefaultStreamBufferSize)
|
||||
|
||||
providerUtils.SetStreamIdleTimeoutIfEmpty(ctx, provider.networkConfig.StreamIdleTimeoutInSeconds)
|
||||
|
||||
go func() {
|
||||
defer func() {
|
||||
if ctx.Err() == context.Canceled {
|
||||
providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, provider.logger, postHookSpanFinalizer)
|
||||
} else if ctx.Err() == context.DeadlineExceeded {
|
||||
providerUtils.HandleStreamTimeout(ctx, postHookRunner, responseChan, provider.logger, postHookSpanFinalizer)
|
||||
}
|
||||
close(responseChan)
|
||||
}()
|
||||
defer providerUtils.ReleaseStreamingResponse(resp)
|
||||
// Decompress gzip-encoded streams transparently (no-op for non-gzip)
|
||||
reader, releaseGzip := providerUtils.DecompressStreamBody(resp)
|
||||
defer releaseGzip()
|
||||
|
||||
// Wrap reader with idle timeout to detect stalled streams.
|
||||
reader, stopIdleTimeout := providerUtils.NewIdleTimeoutReader(reader, resp.BodyStream(), providerUtils.GetStreamIdleTimeout(ctx))
|
||||
defer stopIdleTimeout()
|
||||
|
||||
// Setup cancellation handler to close the raw network stream on ctx cancellation,
|
||||
// which immediately unblocks any in-progress read (including reads blocked inside a gzip decompression layer).
|
||||
stopCancellation := providerUtils.SetupStreamCancellation(ctx, resp.BodyStream(), provider.logger)
|
||||
defer stopCancellation()
|
||||
defer providerUtils.EnsureStreamFinalizerCalled(ctx, postHookSpanFinalizer)
|
||||
|
||||
// read binary audio chunks from the stream
|
||||
// 4KB buffer for reading chunks
|
||||
buffer := make([]byte, 4096)
|
||||
bodyStream := reader
|
||||
chunkIndex := -1
|
||||
lastChunkTime := time.Now()
|
||||
|
||||
for {
|
||||
// If context was cancelled/timed out, let defer handle it
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
n, err := bodyStream.Read(buffer)
|
||||
if err != nil {
|
||||
// If context was cancelled/timed out, let defer handle it
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true)
|
||||
provider.logger.Warn("Error reading stream: %v", err)
|
||||
providerUtils.ProcessAndSendError(ctx, postHookRunner, err, responseChan, provider.logger, postHookSpanFinalizer)
|
||||
return
|
||||
}
|
||||
|
||||
if n > 0 {
|
||||
chunkIndex++
|
||||
audioChunk := make([]byte, n)
|
||||
copy(audioChunk, buffer[:n])
|
||||
|
||||
response := &schemas.BifrostSpeechStreamResponse{
|
||||
Type: schemas.SpeechStreamResponseTypeDelta,
|
||||
Audio: audioChunk,
|
||||
ExtraFields: schemas.BifrostResponseExtraFields{
|
||||
ChunkIndex: chunkIndex,
|
||||
Latency: time.Since(lastChunkTime).Milliseconds(),
|
||||
},
|
||||
}
|
||||
|
||||
lastChunkTime = time.Now()
|
||||
|
||||
if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
|
||||
response.ExtraFields.RawResponse = audioChunk
|
||||
}
|
||||
|
||||
providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, nil, response, nil, nil), responseChan, postHookSpanFinalizer)
|
||||
}
|
||||
}
|
||||
|
||||
// Send final response after natural loop termination (similar to Gemini pattern)
|
||||
finalResponse := &schemas.BifrostSpeechStreamResponse{
|
||||
Type: schemas.SpeechStreamResponseTypeDone,
|
||||
Audio: []byte{},
|
||||
ExtraFields: schemas.BifrostResponseExtraFields{
|
||||
ChunkIndex: chunkIndex + 1,
|
||||
Latency: time.Since(startTime).Milliseconds(),
|
||||
},
|
||||
}
|
||||
|
||||
// Set raw request if enabled
|
||||
if providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest) {
|
||||
providerUtils.ParseAndSetRawRequest(&finalResponse.ExtraFields, jsonBody)
|
||||
}
|
||||
ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true)
|
||||
providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, nil, finalResponse, nil, nil), responseChan, postHookSpanFinalizer)
|
||||
}()
|
||||
|
||||
return responseChan, nil
|
||||
}
|
||||
|
||||
// Transcription performs a transcription request
|
||||
func (provider *ElevenlabsProvider) Transcription(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostTranscriptionRequest) (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) {
|
||||
if err := providerUtils.CheckOperationAllowed(schemas.Elevenlabs, provider.customProviderConfig, schemas.TranscriptionRequest); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
reqBody := ToElevenlabsTranscriptionRequest(request)
|
||||
if reqBody == nil {
|
||||
return nil, providerUtils.NewBifrostOperationError("transcription request is not provided", nil)
|
||||
}
|
||||
|
||||
hasFile := len(reqBody.File) > 0
|
||||
hasURL := reqBody.CloudStorageURL != nil && strings.TrimSpace(*reqBody.CloudStorageURL) != ""
|
||||
if hasFile && hasURL {
|
||||
return nil, providerUtils.NewBifrostOperationError("provide either a file or cloud_storage_url, not both", nil)
|
||||
}
|
||||
if !hasFile && !hasURL {
|
||||
return nil, providerUtils.NewBifrostOperationError("either a transcription file or cloud_storage_url must be provided", nil)
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
writer := multipart.NewWriter(&body)
|
||||
|
||||
if bifrostErr := writeTranscriptionMultipart(writer, reqBody); bifrostErr != nil {
|
||||
return nil, bifrostErr
|
||||
}
|
||||
|
||||
contentType := writer.FormDataContentType()
|
||||
if err := writer.Close(); err != nil {
|
||||
return nil, providerUtils.NewBifrostOperationError("failed to finalize multipart transcription request", err)
|
||||
}
|
||||
|
||||
req := fasthttp.AcquireRequest()
|
||||
resp := fasthttp.AcquireResponse()
|
||||
defer fasthttp.ReleaseRequest(req)
|
||||
defer fasthttp.ReleaseResponse(resp)
|
||||
|
||||
providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
|
||||
|
||||
requestPath, isCompleteURL := providerUtils.GetRequestPath(ctx, "/v1/speech-to-text", provider.customProviderConfig, schemas.TranscriptionRequest)
|
||||
if isCompleteURL {
|
||||
req.SetRequestURI(requestPath)
|
||||
} else {
|
||||
req.SetRequestURI(provider.networkConfig.BaseURL + requestPath)
|
||||
}
|
||||
req.Header.SetMethod(http.MethodPost)
|
||||
req.Header.SetContentType(contentType)
|
||||
if key.Value.GetValue() != "" {
|
||||
req.Header.Set("xi-api-key", key.Value.GetValue())
|
||||
}
|
||||
req.SetBody(body.Bytes())
|
||||
|
||||
latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
|
||||
defer wait()
|
||||
if bifrostErr != nil {
|
||||
return nil, bifrostErr
|
||||
}
|
||||
// Extract and set provider response headers so they're available on error paths
|
||||
ctx.SetValue(schemas.BifrostContextKeyProviderResponseHeaders, providerUtils.ExtractProviderResponseHeaders(resp))
|
||||
if resp.StatusCode() != fasthttp.StatusOK {
|
||||
return nil, parseElevenlabsError(resp)
|
||||
}
|
||||
|
||||
responseBody, err := providerUtils.CheckAndDecodeBody(resp)
|
||||
if err != nil {
|
||||
return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, err)
|
||||
}
|
||||
|
||||
// Check for empty response
|
||||
trimmed := strings.TrimSpace(string(responseBody))
|
||||
if len(trimmed) == 0 {
|
||||
return nil, &schemas.BifrostError{
|
||||
IsBifrostError: true,
|
||||
Error: &schemas.ErrorField{
|
||||
Message: schemas.ErrProviderResponseEmpty,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
chunks, err := parseTranscriptionResponse(responseBody)
|
||||
if err != nil {
|
||||
return nil, providerUtils.NewBifrostOperationError(err.Error(), nil)
|
||||
}
|
||||
|
||||
if len(chunks) == 0 {
|
||||
return nil, providerUtils.NewBifrostOperationError("no chunks found in transcription response", nil)
|
||||
}
|
||||
|
||||
response := ToBifrostTranscriptionResponse(chunks)
|
||||
response.ExtraFields = schemas.BifrostResponseExtraFields{
|
||||
Latency: latency.Milliseconds(),
|
||||
ProviderResponseHeaders: providerUtils.ExtractProviderResponseHeaders(resp),
|
||||
}
|
||||
|
||||
if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
|
||||
var rawResponse interface{}
|
||||
if err := sonic.Unmarshal(responseBody, &rawResponse); err != nil {
|
||||
rawResponse = string(responseBody)
|
||||
}
|
||||
response.ExtraFields.RawResponse = rawResponse
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func writeTranscriptionMultipart(writer *multipart.Writer, reqBody *ElevenlabsTranscriptionRequest) *schemas.BifrostError {
|
||||
if err := writer.WriteField("model_id", reqBody.ModelID); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write model_id field", err)
|
||||
}
|
||||
|
||||
if len(reqBody.File) > 0 {
|
||||
filename := reqBody.Filename
|
||||
if filename == "" {
|
||||
filename = providerUtils.AudioFilenameFromBytes(reqBody.File)
|
||||
}
|
||||
fileWriter, err := writer.CreateFormFile("file", filename)
|
||||
if err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to create file field", err)
|
||||
}
|
||||
if _, err := fileWriter.Write(reqBody.File); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write file data", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.CloudStorageURL != nil && strings.TrimSpace(*reqBody.CloudStorageURL) != "" {
|
||||
if err := writer.WriteField("cloud_storage_url", *reqBody.CloudStorageURL); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write cloud_storage_url field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.LanguageCode != nil && strings.TrimSpace(*reqBody.LanguageCode) != "" {
|
||||
if err := writer.WriteField("language_code", *reqBody.LanguageCode); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write language_code field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.TagAudioEvents != nil {
|
||||
if err := writer.WriteField("tag_audio_events", strconv.FormatBool(*reqBody.TagAudioEvents)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write tag_audio_events field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.NumSpeakers != nil && *reqBody.NumSpeakers > 0 {
|
||||
if err := writer.WriteField("num_speakers", strconv.Itoa(*reqBody.NumSpeakers)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write num_speakers field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.TimestampsGranularity != nil && *reqBody.TimestampsGranularity != "" {
|
||||
if err := writer.WriteField("timestamps_granularity", string(*reqBody.TimestampsGranularity)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write timestamps_granularity field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.Diarize != nil {
|
||||
if err := writer.WriteField("diarize", strconv.FormatBool(*reqBody.Diarize)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write diarize field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.DiarizationThreshold != nil {
|
||||
if err := writer.WriteField("diarization_threshold", strconv.FormatFloat(*reqBody.DiarizationThreshold, 'f', -1, 64)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write diarization_threshold field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(reqBody.AdditionalFormats) > 0 {
|
||||
payload, err := providerUtils.MarshalSorted(reqBody.AdditionalFormats)
|
||||
if err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to marshal additional_formats", err)
|
||||
}
|
||||
if err := writer.WriteField("additional_formats", string(payload)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write additional_formats field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.FileFormat != nil && *reqBody.FileFormat != "" {
|
||||
if err := writer.WriteField("file_format", string(*reqBody.FileFormat)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write file_format field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.Webhook != nil {
|
||||
if err := writer.WriteField("webhook", strconv.FormatBool(*reqBody.Webhook)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write webhook field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.WebhookID != nil && strings.TrimSpace(*reqBody.WebhookID) != "" {
|
||||
if err := writer.WriteField("webhook_id", *reqBody.WebhookID); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write webhook_id field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.Temperature != nil {
|
||||
if err := writer.WriteField("temperature", strconv.FormatFloat(*reqBody.Temperature, 'f', -1, 64)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write temperature field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.Seed != nil {
|
||||
if err := writer.WriteField("seed", strconv.Itoa(*reqBody.Seed)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write seed field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.UseMultiChannel != nil {
|
||||
if err := writer.WriteField("use_multi_channel", strconv.FormatBool(*reqBody.UseMultiChannel)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write use_multi_channel field", err)
|
||||
}
|
||||
}
|
||||
|
||||
if reqBody.WebhookMetadata != nil {
|
||||
switch v := reqBody.WebhookMetadata.(type) {
|
||||
case string:
|
||||
if strings.TrimSpace(v) != "" {
|
||||
if err := writer.WriteField("webhook_metadata", v); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write webhook_metadata field", err)
|
||||
}
|
||||
}
|
||||
default:
|
||||
payload, err := providerUtils.MarshalSorted(v)
|
||||
if err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to marshal webhook_metadata", err)
|
||||
}
|
||||
if err := writer.WriteField("webhook_metadata", string(payload)); err != nil {
|
||||
return providerUtils.NewBifrostOperationError("failed to write webhook_metadata field", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// TranscriptionStream is not supported by the Elevenlabs provider
|
||||
func (provider *ElevenlabsProvider) TranscriptionStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostTranscriptionRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.TranscriptionStreamRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ImageGeneration is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ImageGeneration(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostImageGenerationRequest) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ImageGenerationRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ImageGenerationStream is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ImageGenerationStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostImageGenerationRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ImageGenerationStreamRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ImageEdit is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ImageEdit(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostImageEditRequest) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ImageEditRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ImageEditStream is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ImageEditStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostImageEditRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ImageEditStreamRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ImageVariation is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ImageVariation(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostImageVariationRequest) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ImageVariationRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// VideoGeneration is not supported by the ElevenLabs provider.
|
||||
func (provider *ElevenlabsProvider) VideoGeneration(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoGenerationRequest) (*schemas.BifrostVideoGenerationResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoGenerationRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// VideoRetrieve is not supported by the ElevenLabs provider.
|
||||
func (provider *ElevenlabsProvider) VideoRetrieve(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoRetrieveRequest) (*schemas.BifrostVideoGenerationResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoRetrieveRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// VideoDownload is not supported by the ElevenLabs provider.
|
||||
func (provider *ElevenlabsProvider) VideoDownload(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoDownloadRequest) (*schemas.BifrostVideoDownloadResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoDownloadRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// VideoDelete is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) VideoDelete(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoDeleteRequest) (*schemas.BifrostVideoDeleteResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoDeleteRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// VideoList is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) VideoList(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoListRequest) (*schemas.BifrostVideoListResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoListRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// VideoRemix is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) VideoRemix(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoRemixRequest) (*schemas.BifrostVideoGenerationResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoRemixRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// buildSpeechRequestURL constructs the full request URL using the provider's configuration for speech.
|
||||
func (provider *ElevenlabsProvider) buildBaseSpeechRequestURL(ctx *schemas.BifrostContext, defaultPath string, requestType schemas.RequestType, request *schemas.BifrostSpeechRequest) string {
|
||||
baseURL := provider.networkConfig.BaseURL
|
||||
requestPath, isCompleteURL := providerUtils.GetRequestPath(ctx, defaultPath, provider.customProviderConfig, requestType)
|
||||
|
||||
var finalURL string
|
||||
if isCompleteURL {
|
||||
finalURL = requestPath
|
||||
} else {
|
||||
u, parseErr := url.Parse(baseURL)
|
||||
if parseErr != nil {
|
||||
finalURL = baseURL + requestPath
|
||||
} else {
|
||||
u.Path = path.Join(u.Path, requestPath)
|
||||
finalURL = u.String()
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the final URL to add query parameters
|
||||
u, parseErr := url.Parse(finalURL)
|
||||
if parseErr != nil {
|
||||
return finalURL
|
||||
}
|
||||
|
||||
q := u.Query()
|
||||
|
||||
if request.Params != nil {
|
||||
if request.Params.EnableLogging != nil {
|
||||
q.Set("enable_logging", strconv.FormatBool(*request.Params.EnableLogging))
|
||||
}
|
||||
|
||||
convertedFormat := ConvertBifrostSpeechFormatToElevenlabs(request.Params.ResponseFormat)
|
||||
if convertedFormat != "" {
|
||||
q.Set("output_format", convertedFormat)
|
||||
}
|
||||
|
||||
if request.Params.OptimizeStreamingLatency != nil {
|
||||
q.Set("optimize_streaming_latency", strconv.FormatBool(*request.Params.OptimizeStreamingLatency))
|
||||
}
|
||||
}
|
||||
|
||||
u.RawQuery = q.Encode()
|
||||
return u.String()
|
||||
}
|
||||
|
||||
// BatchCreate is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) BatchCreate(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostBatchCreateRequest) (*schemas.BifrostBatchCreateResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchCreateRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// BatchList is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) BatchList(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostBatchListRequest) (*schemas.BifrostBatchListResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchListRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// BatchRetrieve is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) BatchRetrieve(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostBatchRetrieveRequest) (*schemas.BifrostBatchRetrieveResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchRetrieveRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// BatchCancel is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) BatchCancel(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostBatchCancelRequest) (*schemas.BifrostBatchCancelResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchCancelRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// BatchDelete is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) BatchDelete(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostBatchDeleteRequest) (*schemas.BifrostBatchDeleteResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchDeleteRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// BatchResults is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) BatchResults(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostBatchResultsRequest) (*schemas.BifrostBatchResultsResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchResultsRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// FileUpload is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) FileUpload(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostFileUploadRequest) (*schemas.BifrostFileUploadResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.FileUploadRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// FileList is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) FileList(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostFileListRequest) (*schemas.BifrostFileListResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.FileListRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// FileRetrieve is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) FileRetrieve(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostFileRetrieveRequest) (*schemas.BifrostFileRetrieveResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.FileRetrieveRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// FileDelete is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) FileDelete(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostFileDeleteRequest) (*schemas.BifrostFileDeleteResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.FileDeleteRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// FileContent is not supported by Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) FileContent(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostFileContentRequest) (*schemas.BifrostFileContentResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.FileContentRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// CountTokens is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) CountTokens(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostResponsesRequest) (*schemas.BifrostCountTokensResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.CountTokensRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ContainerCreate is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ContainerCreate(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostContainerCreateRequest) (*schemas.BifrostContainerCreateResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerCreateRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ContainerList is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ContainerList(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerListRequest) (*schemas.BifrostContainerListResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerListRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ContainerRetrieve is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ContainerRetrieve(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerRetrieveRequest) (*schemas.BifrostContainerRetrieveResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerRetrieveRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ContainerDelete is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ContainerDelete(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerDeleteRequest) (*schemas.BifrostContainerDeleteResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerDeleteRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ContainerFileCreate is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ContainerFileCreate(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostContainerFileCreateRequest) (*schemas.BifrostContainerFileCreateResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerFileCreateRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ContainerFileList is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ContainerFileList(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerFileListRequest) (*schemas.BifrostContainerFileListResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerFileListRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ContainerFileRetrieve is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ContainerFileRetrieve(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerFileRetrieveRequest) (*schemas.BifrostContainerFileRetrieveResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerFileRetrieveRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ContainerFileContent is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ContainerFileContent(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerFileContentRequest) (*schemas.BifrostContainerFileContentResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerFileContentRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// ContainerFileDelete is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) ContainerFileDelete(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerFileDeleteRequest) (*schemas.BifrostContainerFileDeleteResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerFileDeleteRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
// Passthrough is not supported by the Elevenlabs provider.
|
||||
func (provider *ElevenlabsProvider) Passthrough(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostPassthroughRequest) (*schemas.BifrostPassthroughResponse, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.PassthroughRequest, provider.GetProviderKey())
|
||||
}
|
||||
|
||||
func (provider *ElevenlabsProvider) PassthroughStream(_ *schemas.BifrostContext, _ schemas.PostHookRunner, _ func(context.Context), _ schemas.Key, _ *schemas.BifrostPassthroughRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
|
||||
return nil, providerUtils.NewUnsupportedOperationError(schemas.PassthroughStreamRequest, provider.GetProviderKey())
|
||||
}
|
||||
62
core/providers/elevenlabs/elevenlabs_test.go
Normal file
62
core/providers/elevenlabs/elevenlabs_test.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package elevenlabs_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/maximhq/bifrost/core/internal/llmtests"
|
||||
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
)
|
||||
|
||||
func TestElevenlabs(t *testing.T) {
|
||||
t.Parallel()
|
||||
if strings.TrimSpace(os.Getenv("ELEVENLABS_API_KEY")) == "" {
|
||||
t.Skip("Skipping Elevenlabs tests because ELEVENLABS_API_KEY is not set")
|
||||
}
|
||||
|
||||
client, ctx, cancel, err := llmtests.SetupTest()
|
||||
if err != nil {
|
||||
t.Fatalf("Error initializing test setup: %v", err)
|
||||
}
|
||||
defer cancel()
|
||||
defer client.Shutdown()
|
||||
|
||||
realtimeAgentID := strings.TrimSpace(os.Getenv("ELEVENLABS_AGENT_ID"))
|
||||
hasRealtimeAgent := false
|
||||
|
||||
testConfig := llmtests.ComprehensiveTestConfig{
|
||||
Provider: schemas.Elevenlabs,
|
||||
SpeechSynthesisModel: "eleven_turbo_v2_5",
|
||||
TranscriptionModel: "scribe_v1",
|
||||
RealtimeModel: realtimeAgentID,
|
||||
Scenarios: llmtests.TestScenarios{
|
||||
TextCompletion: false,
|
||||
TextCompletionStream: false,
|
||||
SimpleChat: false,
|
||||
CompletionStream: false,
|
||||
MultiTurnConversation: false,
|
||||
ToolCalls: false,
|
||||
MultipleToolCalls: false,
|
||||
End2EndToolCalling: false,
|
||||
AutomaticFunctionCall: false,
|
||||
ImageURL: false,
|
||||
ImageBase64: false,
|
||||
MultipleImages: false,
|
||||
CompleteEnd2End: false,
|
||||
SpeechSynthesis: true,
|
||||
SpeechSynthesisStream: true,
|
||||
Transcription: true,
|
||||
TranscriptionStream: false,
|
||||
Embedding: false,
|
||||
Reasoning: false,
|
||||
ListModels: false,
|
||||
Realtime: hasRealtimeAgent,
|
||||
},
|
||||
}
|
||||
|
||||
t.Run("ElevenlabsTests", func(t *testing.T) {
|
||||
llmtests.RunAllComprehensiveTests(t, client, ctx, testConfig)
|
||||
})
|
||||
}
|
||||
90
core/providers/elevenlabs/errors.go
Normal file
90
core/providers/elevenlabs/errors.go
Normal file
@@ -0,0 +1,90 @@
|
||||
package elevenlabs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/valyala/fasthttp"
|
||||
|
||||
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
|
||||
schemas "github.com/maximhq/bifrost/core/schemas"
|
||||
)
|
||||
|
||||
func parseElevenlabsError(resp *fasthttp.Response) *schemas.BifrostError {
|
||||
var errorResp ElevenlabsError
|
||||
bifrostErr := providerUtils.HandleProviderAPIError(resp, &errorResp)
|
||||
if errorResp.Detail != nil {
|
||||
var message string
|
||||
// Handle validation errors (array format)
|
||||
if len(errorResp.Detail.ValidationErrors) > 0 {
|
||||
var messages []string
|
||||
var locations []string
|
||||
var errorTypes []string
|
||||
|
||||
for _, validationErr := range errorResp.Detail.ValidationErrors {
|
||||
// Get message from either Message or Msg field
|
||||
msg := validationErr.Message
|
||||
if msg == "" {
|
||||
msg = validationErr.Msg
|
||||
}
|
||||
if msg != "" {
|
||||
messages = append(messages, msg)
|
||||
}
|
||||
|
||||
// Collect location if available
|
||||
if len(validationErr.Loc) > 0 {
|
||||
locations = append(locations, strings.Join(validationErr.Loc, "."))
|
||||
}
|
||||
|
||||
// Collect error type if available
|
||||
if validationErr.Type != "" {
|
||||
errorTypes = append(errorTypes, validationErr.Type)
|
||||
}
|
||||
}
|
||||
|
||||
// Build combined message
|
||||
if len(messages) > 0 {
|
||||
message = strings.Join(messages, "; ")
|
||||
}
|
||||
if len(locations) > 0 {
|
||||
locationStr := strings.Join(locations, ", ")
|
||||
message = message + " [" + locationStr + "]"
|
||||
}
|
||||
|
||||
errorType := ""
|
||||
if len(errorTypes) > 0 {
|
||||
errorType = strings.Join(errorTypes, ", ")
|
||||
}
|
||||
|
||||
if message != "" {
|
||||
result := &schemas.BifrostError{
|
||||
IsBifrostError: false,
|
||||
StatusCode: schemas.Ptr(resp.StatusCode()),
|
||||
Error: &schemas.ErrorField{
|
||||
Type: schemas.Ptr(errorType),
|
||||
Message: message,
|
||||
},
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
// Handle non-validation errors (single object format)
|
||||
if errorResp.Detail.Message != nil {
|
||||
message = *errorResp.Detail.Message
|
||||
}
|
||||
|
||||
errorType := ""
|
||||
if errorResp.Detail.Status != nil {
|
||||
errorType = *errorResp.Detail.Status
|
||||
}
|
||||
|
||||
if message != "" {
|
||||
if bifrostErr.Error == nil {
|
||||
bifrostErr.Error = &schemas.ErrorField{}
|
||||
}
|
||||
bifrostErr.Error.Type = schemas.Ptr(errorType)
|
||||
bifrostErr.Error.Message = message
|
||||
}
|
||||
}
|
||||
return bifrostErr
|
||||
}
|
||||
51
core/providers/elevenlabs/models.go
Normal file
51
core/providers/elevenlabs/models.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package elevenlabs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
)
|
||||
|
||||
func (response *ElevenlabsListModelsResponse) ToBifrostListModelsResponse(providerKey schemas.ModelProvider, allowedModels schemas.WhiteList, blacklistedModels schemas.BlackList, aliases map[string]string, unfiltered bool) *schemas.BifrostListModelsResponse {
|
||||
if response == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
bifrostResponse := &schemas.BifrostListModelsResponse{
|
||||
Data: make([]schemas.Model, 0, len(*response)),
|
||||
}
|
||||
|
||||
pipeline := &providerUtils.ListModelsPipeline{
|
||||
AllowedModels: allowedModels,
|
||||
BlacklistedModels: blacklistedModels,
|
||||
Aliases: aliases,
|
||||
Unfiltered: unfiltered,
|
||||
ProviderKey: providerKey,
|
||||
MatchFns: providerUtils.DefaultMatchFns(),
|
||||
}
|
||||
if pipeline.ShouldEarlyExit() {
|
||||
return bifrostResponse
|
||||
}
|
||||
|
||||
included := make(map[string]bool)
|
||||
|
||||
for _, model := range *response {
|
||||
for _, result := range pipeline.FilterModel(model.ModelID) {
|
||||
entry := schemas.Model{
|
||||
ID: string(providerKey) + "/" + result.ResolvedID,
|
||||
Name: schemas.Ptr(model.Name),
|
||||
}
|
||||
if result.AliasValue != "" {
|
||||
entry.Alias = schemas.Ptr(result.AliasValue)
|
||||
}
|
||||
bifrostResponse.Data = append(bifrostResponse.Data, entry)
|
||||
included[strings.ToLower(result.ResolvedID)] = true
|
||||
}
|
||||
}
|
||||
|
||||
bifrostResponse.Data = append(bifrostResponse.Data,
|
||||
pipeline.BackfillModels(included)...)
|
||||
|
||||
return bifrostResponse
|
||||
}
|
||||
257
core/providers/elevenlabs/realtime.go
Normal file
257
core/providers/elevenlabs/realtime.go
Normal file
@@ -0,0 +1,257 @@
|
||||
package elevenlabs
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
|
||||
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
|
||||
)
|
||||
|
||||
// SupportsRealtimeAPI returns true since ElevenLabs supports Conversational AI via WebSocket.
|
||||
func (provider *ElevenlabsProvider) SupportsRealtimeAPI() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// RealtimeWebSocketURL returns the WSS URL for the ElevenLabs Conversational AI endpoint.
|
||||
// The model parameter is used as the agent_id query parameter.
|
||||
// Format: wss://api.elevenlabs.io/v1/convai/conversation?agent_id=<model>
|
||||
func (provider *ElevenlabsProvider) RealtimeWebSocketURL(key schemas.Key, model string) string {
|
||||
base := provider.networkConfig.BaseURL
|
||||
base = strings.Replace(base, "https://", "wss://", 1)
|
||||
base = strings.Replace(base, "http://", "ws://", 1)
|
||||
return base + "/v1/convai/conversation?agent_id=" + model
|
||||
}
|
||||
|
||||
// RealtimeHeaders returns the headers required for the ElevenLabs Conversational AI WebSocket.
|
||||
func (provider *ElevenlabsProvider) RealtimeHeaders(key schemas.Key) map[string]string {
|
||||
headers := map[string]string{
|
||||
"xi-api-key": key.Value.GetValue(),
|
||||
}
|
||||
for k, v := range provider.networkConfig.ExtraHeaders {
|
||||
if strings.EqualFold(k, "xi-api-key") {
|
||||
continue
|
||||
}
|
||||
headers[k] = v
|
||||
}
|
||||
return headers
|
||||
}
|
||||
|
||||
// SupportsRealtimeWebRTC returns false — ElevenLabs WebRTC SDP exchange is not yet implemented.
|
||||
func (provider *ElevenlabsProvider) SupportsRealtimeWebRTC() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// ExchangeRealtimeWebRTCSDP is not yet implemented for ElevenLabs.
|
||||
func (provider *ElevenlabsProvider) ExchangeRealtimeWebRTCSDP(_ *schemas.BifrostContext, _ schemas.Key, _ string, _ string, _ json.RawMessage) (string, *schemas.BifrostError) {
|
||||
return "", &schemas.BifrostError{
|
||||
IsBifrostError: true,
|
||||
StatusCode: schemas.Ptr(400),
|
||||
Error: &schemas.ErrorField{Type: schemas.Ptr("invalid_request_error"), Message: "WebRTC SDP exchange is not yet implemented for ElevenLabs"},
|
||||
}
|
||||
}
|
||||
|
||||
func (provider *ElevenlabsProvider) ShouldStartRealtimeTurn(event *schemas.BifrostRealtimeEvent) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (provider *ElevenlabsProvider) RealtimeTurnFinalEvent() schemas.RealtimeEventType {
|
||||
return schemas.RTEventResponseDone
|
||||
}
|
||||
|
||||
func (provider *ElevenlabsProvider) RealtimeWebRTCDataChannelLabel() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (provider *ElevenlabsProvider) RealtimeWebSocketSubprotocol() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (provider *ElevenlabsProvider) ShouldForwardRealtimeEvent(event *schemas.BifrostRealtimeEvent) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (provider *ElevenlabsProvider) ShouldAccumulateRealtimeOutput(eventType schemas.RealtimeEventType) bool {
|
||||
return eventType == schemas.RTEventResponseDone
|
||||
}
|
||||
|
||||
// ElevenLabs Conversational AI WebSocket event types
|
||||
const (
|
||||
elConversationInitMetadata = "conversation_initiation_metadata"
|
||||
elPing = "ping"
|
||||
elAudio = "audio"
|
||||
elUserTranscript = "user_transcript"
|
||||
elAgentResponse = "agent_response"
|
||||
elAgentResponseCorrection = "agent_response_correction"
|
||||
elInterruption = "interruption"
|
||||
elClientToolCall = "client_tool_call"
|
||||
|
||||
elUserAudioChunk = "user_audio_chunk"
|
||||
elPong = "pong"
|
||||
elClientToolResult = "client_tool_result"
|
||||
elContextualUpdate = "contextual_update"
|
||||
)
|
||||
|
||||
// elevenlabsEvent represents a raw ElevenLabs Conversational AI WebSocket event.
|
||||
type elevenlabsEvent struct {
|
||||
Type string `json:"type"`
|
||||
|
||||
// Server events
|
||||
ConversationInitMetadata json.RawMessage `json:"conversation_initiation_metadata_event,omitempty"`
|
||||
Audio json.RawMessage `json:"audio_event,omitempty"`
|
||||
UserTranscript json.RawMessage `json:"user_transcription_event,omitempty"`
|
||||
AgentResponse json.RawMessage `json:"agent_response_event,omitempty"`
|
||||
AgentResponseCorrection json.RawMessage `json:"agent_response_correction_event,omitempty"`
|
||||
ClientToolCall json.RawMessage `json:"client_tool_call,omitempty"`
|
||||
PingEvent json.RawMessage `json:"ping_event,omitempty"`
|
||||
|
||||
// Client events
|
||||
UserAudioChunk json.RawMessage `json:"user_audio_chunk,omitempty"`
|
||||
}
|
||||
|
||||
// elevenlabsAudioEvent is the audio event structure from ElevenLabs.
|
||||
type elevenlabsAudioEvent struct {
|
||||
Audio string `json:"audio_base_64,omitempty"`
|
||||
Alignment json.RawMessage `json:"alignment,omitempty"`
|
||||
}
|
||||
|
||||
// elevenlabsTranscriptEvent is the user/agent transcript event from ElevenLabs.
|
||||
type elevenlabsTranscriptEvent struct {
|
||||
UserTranscript string `json:"user_transcript,omitempty"`
|
||||
AgentResponse string `json:"agent_response,omitempty"`
|
||||
AgentResponseID string `json:"agent_response_id,omitempty"`
|
||||
}
|
||||
|
||||
// elevenlabsCorrectionEvent is the agent response correction event from ElevenLabs.
|
||||
type elevenlabsCorrectionEvent struct {
|
||||
OriginalAgentResponse string `json:"original_agent_response,omitempty"`
|
||||
CorrectedAgentResponse string `json:"corrected_agent_response,omitempty"`
|
||||
}
|
||||
|
||||
// ToBifrostRealtimeEvent converts an ElevenLabs Conversational AI event to the unified Bifrost format.
|
||||
func (provider *ElevenlabsProvider) ToBifrostRealtimeEvent(providerEvent json.RawMessage) (*schemas.BifrostRealtimeEvent, error) {
|
||||
var raw elevenlabsEvent
|
||||
if err := json.Unmarshal(providerEvent, &raw); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal ElevenLabs realtime event: %w", err)
|
||||
}
|
||||
|
||||
event := &schemas.BifrostRealtimeEvent{
|
||||
RawData: providerEvent,
|
||||
}
|
||||
|
||||
switch raw.Type {
|
||||
case elConversationInitMetadata:
|
||||
event.Type = schemas.RTEventSessionCreated
|
||||
event.Session = &schemas.RealtimeSession{}
|
||||
|
||||
case elPing:
|
||||
event.Type = schemas.RealtimeEventType("ping")
|
||||
|
||||
case elAudio:
|
||||
event.Type = schemas.RTEventResponseAudioDelta
|
||||
if raw.Audio != nil {
|
||||
var audioEvt elevenlabsAudioEvent
|
||||
if err := json.Unmarshal(raw.Audio, &audioEvt); err == nil {
|
||||
event.Delta = &schemas.RealtimeDelta{
|
||||
Audio: audioEvt.Audio,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case elUserTranscript:
|
||||
event.Type = schemas.RTEventInputAudioTransCompleted
|
||||
if raw.UserTranscript != nil {
|
||||
var transcript elevenlabsTranscriptEvent
|
||||
if err := json.Unmarshal(raw.UserTranscript, &transcript); err == nil {
|
||||
event.Delta = &schemas.RealtimeDelta{
|
||||
Transcript: transcript.UserTranscript,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case elAgentResponse:
|
||||
event.Type = schemas.RTEventResponseDone
|
||||
if raw.AgentResponse != nil {
|
||||
var agentResp elevenlabsTranscriptEvent
|
||||
if err := json.Unmarshal(raw.AgentResponse, &agentResp); err == nil {
|
||||
event.Delta = &schemas.RealtimeDelta{
|
||||
Text: agentResp.AgentResponse,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case elAgentResponseCorrection:
|
||||
event.Type = schemas.RTEventResponseTextDelta
|
||||
if raw.AgentResponseCorrection != nil {
|
||||
var correction elevenlabsCorrectionEvent
|
||||
if err := json.Unmarshal(raw.AgentResponseCorrection, &correction); err == nil {
|
||||
event.Delta = &schemas.RealtimeDelta{
|
||||
Text: correction.CorrectedAgentResponse,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case elInterruption:
|
||||
event.Type = schemas.RTEventResponseCancel
|
||||
|
||||
case elClientToolCall:
|
||||
event.Type = schemas.RealtimeEventType("client_tool_call")
|
||||
if raw.ClientToolCall != nil {
|
||||
var toolCall struct {
|
||||
ToolName string `json:"tool_name"`
|
||||
Parameters json.RawMessage `json:"parameters"`
|
||||
ToolCallID string `json:"tool_call_id"`
|
||||
}
|
||||
if err := json.Unmarshal(raw.ClientToolCall, &toolCall); err == nil {
|
||||
args := string(toolCall.Parameters)
|
||||
if len(toolCall.Parameters) > 0 {
|
||||
var parsed interface{}
|
||||
if err := json.Unmarshal(toolCall.Parameters, &parsed); err == nil {
|
||||
if sorted, err := providerUtils.MarshalSorted(parsed); err == nil {
|
||||
args = string(sorted)
|
||||
}
|
||||
}
|
||||
}
|
||||
event.Item = &schemas.RealtimeItem{
|
||||
Type: "function_call",
|
||||
Name: toolCall.ToolName,
|
||||
CallID: toolCall.ToolCallID,
|
||||
Arguments: args,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
event.Type = schemas.RealtimeEventType(raw.Type)
|
||||
}
|
||||
|
||||
return event, nil
|
||||
}
|
||||
|
||||
// ToProviderRealtimeEvent converts a unified Bifrost Realtime event to ElevenLabs' native JSON.
|
||||
func (provider *ElevenlabsProvider) ToProviderRealtimeEvent(bifrostEvent *schemas.BifrostRealtimeEvent) (json.RawMessage, error) {
|
||||
switch bifrostEvent.Type {
|
||||
case schemas.RTEventInputAudioAppend:
|
||||
if bifrostEvent.Delta == nil {
|
||||
return nil, fmt.Errorf("delta must be set for input_audio_buffer.append events")
|
||||
}
|
||||
out := map[string]interface{}{
|
||||
"type": elUserAudioChunk,
|
||||
"user_audio_chunk": bifrostEvent.Delta.Audio,
|
||||
}
|
||||
return schemas.MarshalSorted(out)
|
||||
|
||||
case schemas.RealtimeEventType("pong"):
|
||||
return schemas.MarshalSorted(map[string]interface{}{
|
||||
"type": "pong",
|
||||
})
|
||||
|
||||
default:
|
||||
out := map[string]interface{}{
|
||||
"type": string(bifrostEvent.Type),
|
||||
}
|
||||
return schemas.MarshalSorted(out)
|
||||
}
|
||||
}
|
||||
102
core/providers/elevenlabs/speech.go
Normal file
102
core/providers/elevenlabs/speech.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package elevenlabs
|
||||
|
||||
import (
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
)
|
||||
|
||||
func ToElevenlabsSpeechRequest(bifrostReq *schemas.BifrostSpeechRequest) *ElevenlabsSpeechRequest {
|
||||
if bifrostReq == nil || bifrostReq.Input == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
elevenlabsReq := &ElevenlabsSpeechRequest{
|
||||
ModelID: bifrostReq.Model,
|
||||
Text: bifrostReq.Input.Input,
|
||||
}
|
||||
|
||||
if bifrostReq.Params != nil {
|
||||
elevenlabsReq.ExtraParams = bifrostReq.Params.ExtraParams
|
||||
voiceSettings := ElevenlabsVoiceSettings{}
|
||||
hasVoiceSettings := false
|
||||
|
||||
if bifrostReq.Params.Speed != nil {
|
||||
voiceSettings.Speed = *bifrostReq.Params.Speed
|
||||
hasVoiceSettings = true
|
||||
}
|
||||
|
||||
if bifrostReq.Params.ExtraParams != nil {
|
||||
if stability, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["stability"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "stability")
|
||||
voiceSettings.Stability = *stability
|
||||
hasVoiceSettings = true
|
||||
}
|
||||
if useSpeakerBoost, ok := schemas.SafeExtractBoolPointer(bifrostReq.Params.ExtraParams["use_speaker_boost"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "use_speaker_boost")
|
||||
voiceSettings.UseSpeakerBoost = *useSpeakerBoost
|
||||
hasVoiceSettings = true
|
||||
}
|
||||
if similarityBoost, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["similarity_boost"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "similarity_boost")
|
||||
voiceSettings.SimilarityBoost = *similarityBoost
|
||||
hasVoiceSettings = true
|
||||
}
|
||||
if style, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["style"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "style")
|
||||
voiceSettings.Style = *style
|
||||
hasVoiceSettings = true
|
||||
}
|
||||
if seed, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["seed"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "seed")
|
||||
elevenlabsReq.Seed = seed
|
||||
}
|
||||
if previousText, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["previous_text"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "previous_text")
|
||||
elevenlabsReq.PreviousText = previousText
|
||||
}
|
||||
if nextText, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["next_text"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "next_text")
|
||||
elevenlabsReq.NextText = nextText
|
||||
}
|
||||
if previousRequestIDs, ok := schemas.SafeExtractStringSlice(bifrostReq.Params.ExtraParams["previous_request_ids"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "previous_request_ids")
|
||||
elevenlabsReq.PreviousRequestIDs = previousRequestIDs
|
||||
}
|
||||
if nextRequestIDs, ok := schemas.SafeExtractStringSlice(bifrostReq.Params.ExtraParams["next_request_ids"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "next_request_ids")
|
||||
elevenlabsReq.NextRequestIDs = nextRequestIDs
|
||||
}
|
||||
if applyTextNormalization, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["apply_text_normalization"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "apply_text_normalization")
|
||||
elevenlabsReq.ApplyTextNormalization = applyTextNormalization
|
||||
}
|
||||
if applyLanguageTextNormalization, ok := schemas.SafeExtractBoolPointer(bifrostReq.Params.ExtraParams["apply_language_text_normalization"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "apply_language_text_normalization")
|
||||
elevenlabsReq.ApplyLanguageTextNormalization = applyLanguageTextNormalization
|
||||
}
|
||||
if usePVCAsIVC, ok := schemas.SafeExtractBoolPointer(bifrostReq.Params.ExtraParams["use_pvc_as_ivc"]); ok {
|
||||
delete(elevenlabsReq.ExtraParams, "use_pvc_as_ivc")
|
||||
elevenlabsReq.UsePVCAsIVC = usePVCAsIVC
|
||||
}
|
||||
}
|
||||
|
||||
if hasVoiceSettings {
|
||||
elevenlabsReq.VoiceSettings = &voiceSettings
|
||||
}
|
||||
|
||||
if bifrostReq.Params.LanguageCode != nil {
|
||||
elevenlabsReq.LanguageCode = bifrostReq.Params.LanguageCode
|
||||
}
|
||||
|
||||
if len(bifrostReq.Params.PronunciationDictionaryLocators) > 0 {
|
||||
elevenlabsReq.PronunciationDictionaryLocators = make([]ElevenlabsPronunciationDictionaryLocator, len(bifrostReq.Params.PronunciationDictionaryLocators))
|
||||
for i, locator := range bifrostReq.Params.PronunciationDictionaryLocators {
|
||||
elevenlabsReq.PronunciationDictionaryLocators[i] = ElevenlabsPronunciationDictionaryLocator{
|
||||
PronunciationDictionaryID: locator.PronunciationDictionaryID,
|
||||
VersionID: locator.VersionID,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return elevenlabsReq
|
||||
}
|
||||
269
core/providers/elevenlabs/transcription.go
Normal file
269
core/providers/elevenlabs/transcription.go
Normal file
@@ -0,0 +1,269 @@
|
||||
package elevenlabs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"github.com/bytedance/sonic"
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
)
|
||||
|
||||
func ToElevenlabsTranscriptionRequest(bifrostReq *schemas.BifrostTranscriptionRequest) *ElevenlabsTranscriptionRequest {
|
||||
if bifrostReq == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
req := &ElevenlabsTranscriptionRequest{
|
||||
ModelID: bifrostReq.Model,
|
||||
}
|
||||
|
||||
if bifrostReq.Input != nil && len(bifrostReq.Input.File) > 0 {
|
||||
req.File = bifrostReq.Input.File
|
||||
req.Filename = bifrostReq.Input.Filename
|
||||
}
|
||||
|
||||
if bifrostReq.Params == nil {
|
||||
return req
|
||||
}
|
||||
|
||||
params := bifrostReq.Params
|
||||
|
||||
if params.Language != nil {
|
||||
req.LanguageCode = params.Language
|
||||
}
|
||||
|
||||
if params.ExtraParams != nil {
|
||||
if tagAudioEvents, ok := schemas.SafeExtractBoolPointer(params.ExtraParams["tag_audio_events"]); ok {
|
||||
delete(params.ExtraParams, "tag_audio_events")
|
||||
req.TagAudioEvents = tagAudioEvents
|
||||
}
|
||||
if numSpeakers, ok := schemas.SafeExtractIntPointer(params.ExtraParams["num_speakers"]); ok {
|
||||
delete(params.ExtraParams, "num_speakers")
|
||||
req.NumSpeakers = numSpeakers
|
||||
}
|
||||
if timestampsGranularity, ok := schemas.SafeExtractStringPointer(params.ExtraParams["timestamps_granularity"]); ok {
|
||||
granularity := ElevenlabsTimestampsGranularity(*timestampsGranularity)
|
||||
delete(params.ExtraParams, "timestamps_granularity")
|
||||
req.TimestampsGranularity = &granularity
|
||||
}
|
||||
if diarize, ok := schemas.SafeExtractBoolPointer(params.ExtraParams["diarize"]); ok {
|
||||
delete(params.ExtraParams, "diarize")
|
||||
req.Diarize = diarize
|
||||
}
|
||||
if diarizationThreshold, ok := schemas.SafeExtractFloat64Pointer(params.ExtraParams["diarization_threshold"]); ok {
|
||||
delete(params.ExtraParams, "diarization_threshold")
|
||||
req.DiarizationThreshold = diarizationThreshold
|
||||
}
|
||||
if fileFormat, ok := schemas.SafeExtractStringPointer(params.ExtraParams["file_format"]); ok {
|
||||
fileFormat := ElevenlabsFileFormat(*fileFormat)
|
||||
delete(params.ExtraParams, "file_format")
|
||||
req.FileFormat = &fileFormat
|
||||
}
|
||||
if cloudStorageURL, ok := schemas.SafeExtractStringPointer(params.ExtraParams["cloud_storage_url"]); ok {
|
||||
delete(params.ExtraParams, "cloud_storage_url")
|
||||
req.CloudStorageURL = cloudStorageURL
|
||||
}
|
||||
if webhook, ok := schemas.SafeExtractBoolPointer(params.ExtraParams["webhook"]); ok {
|
||||
delete(params.ExtraParams, "webhook")
|
||||
req.Webhook = webhook
|
||||
}
|
||||
if webhookID, ok := schemas.SafeExtractStringPointer(params.ExtraParams["webhook_id"]); ok {
|
||||
delete(params.ExtraParams, "webhook_id")
|
||||
req.WebhookID = webhookID
|
||||
}
|
||||
if temperature, ok := schemas.SafeExtractFloat64Pointer(params.ExtraParams["temperature"]); ok {
|
||||
delete(params.ExtraParams, "temperature")
|
||||
req.Temperature = temperature
|
||||
}
|
||||
if seed, ok := schemas.SafeExtractIntPointer(params.ExtraParams["seed"]); ok {
|
||||
delete(params.ExtraParams, "seed")
|
||||
req.Seed = seed
|
||||
}
|
||||
if useMultiChannel, ok := schemas.SafeExtractBoolPointer(params.ExtraParams["use_multi_channel"]); ok {
|
||||
delete(params.ExtraParams, "use_multi_channel")
|
||||
req.UseMultiChannel = useMultiChannel
|
||||
}
|
||||
req.ExtraParams = bifrostReq.Params.ExtraParams
|
||||
}
|
||||
|
||||
if len(params.AdditionalFormats) > 0 {
|
||||
additionalFormats := make([]ElevenlabsAdditionalFormat, 0, len(params.AdditionalFormats))
|
||||
for _, format := range params.AdditionalFormats {
|
||||
if converted, ok := convertAdditionalFormat(format); ok {
|
||||
additionalFormats = append(additionalFormats, converted)
|
||||
}
|
||||
}
|
||||
if len(additionalFormats) > 0 {
|
||||
req.AdditionalFormats = additionalFormats
|
||||
}
|
||||
}
|
||||
|
||||
if params.WebhookMetadata != nil {
|
||||
if metadataMap, ok := params.WebhookMetadata.(map[string]interface{}); ok {
|
||||
if len(metadataMap) > 0 {
|
||||
req.WebhookMetadata = metadataMap
|
||||
}
|
||||
} else {
|
||||
req.WebhookMetadata = params.WebhookMetadata
|
||||
}
|
||||
}
|
||||
|
||||
return req
|
||||
}
|
||||
|
||||
func ToBifrostTranscriptionResponse(chunks []ElevenlabsSpeechToTextChunkResponse) *schemas.BifrostTranscriptionResponse {
|
||||
if len(chunks) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
textParts := make([]string, 0, len(chunks))
|
||||
allWords := make([]schemas.TranscriptionWord, 0)
|
||||
allLogProbs := make([]schemas.TranscriptionLogProb, 0)
|
||||
|
||||
var language *string
|
||||
var overallDuration *float64
|
||||
|
||||
for _, chunk := range chunks {
|
||||
textParts = append(textParts, chunk.Text)
|
||||
|
||||
words, logProbs, chunkDuration := convertWords(chunk.Words)
|
||||
allWords = append(allWords, words...)
|
||||
allLogProbs = append(allLogProbs, logProbs...)
|
||||
|
||||
if language == nil && chunk.LanguageCode != "" {
|
||||
lc := chunk.LanguageCode
|
||||
language = &lc
|
||||
}
|
||||
|
||||
if chunkDuration != nil {
|
||||
if overallDuration == nil || *chunkDuration > *overallDuration {
|
||||
val := *chunkDuration
|
||||
overallDuration = &val
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
text := strings.Join(textParts, "\n")
|
||||
|
||||
response := &schemas.BifrostTranscriptionResponse{
|
||||
Text: text,
|
||||
Words: allWords,
|
||||
LogProbs: allLogProbs,
|
||||
}
|
||||
|
||||
if language != nil {
|
||||
response.Language = language
|
||||
}
|
||||
|
||||
if overallDuration != nil {
|
||||
response.Duration = overallDuration
|
||||
}
|
||||
|
||||
return response
|
||||
|
||||
}
|
||||
|
||||
func convertAdditionalFormat(format schemas.TranscriptionAdditionalFormat) (ElevenlabsAdditionalFormat, bool) {
|
||||
if format.Format == "" {
|
||||
return ElevenlabsAdditionalFormat{}, false
|
||||
}
|
||||
|
||||
converted := ElevenlabsAdditionalFormat{
|
||||
Format: ElevenlabsExportOptions(format.Format),
|
||||
}
|
||||
|
||||
if format.IncludeSpeakers != nil {
|
||||
converted.IncludeSpeakers = format.IncludeSpeakers
|
||||
}
|
||||
|
||||
if format.IncludeTimestamps != nil {
|
||||
converted.IncludeTimestamps = format.IncludeTimestamps
|
||||
}
|
||||
|
||||
if format.SegmentOnSilenceLongerThanS != nil {
|
||||
converted.SegmentOnSilenceLongerThanS = format.SegmentOnSilenceLongerThanS
|
||||
}
|
||||
|
||||
if format.MaxSegmentDurationS != nil {
|
||||
converted.MaxSegmentDurationS = format.MaxSegmentDurationS
|
||||
}
|
||||
|
||||
if format.MaxSegmentChars != nil {
|
||||
converted.MaxSegmentChars = format.MaxSegmentChars
|
||||
}
|
||||
|
||||
if format.MaxCharactersPerLine != nil {
|
||||
converted.MaxCharactersPerLine = format.MaxCharactersPerLine
|
||||
}
|
||||
|
||||
return converted, true
|
||||
}
|
||||
|
||||
func convertWords(words []ElevenlabsSpeechToTextWord) ([]schemas.TranscriptionWord, []schemas.TranscriptionLogProb, *float64) {
|
||||
if len(words) == 0 {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
convertedWords := make([]schemas.TranscriptionWord, 0, len(words))
|
||||
logProbs := make([]schemas.TranscriptionLogProb, 0, len(words))
|
||||
|
||||
var maxEnd float64
|
||||
var hasEnd bool
|
||||
|
||||
for _, word := range words {
|
||||
trimmed := strings.TrimSpace(word.Text)
|
||||
if word.Type == "spacing" && trimmed == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
transcriptionWord := schemas.TranscriptionWord{
|
||||
Word: word.Text,
|
||||
}
|
||||
|
||||
if word.Start != nil {
|
||||
transcriptionWord.Start = *word.Start
|
||||
}
|
||||
|
||||
if word.End != nil {
|
||||
transcriptionWord.End = *word.End
|
||||
if !hasEnd || *word.End > maxEnd {
|
||||
maxEnd = *word.End
|
||||
hasEnd = true
|
||||
}
|
||||
}
|
||||
|
||||
convertedWords = append(convertedWords, transcriptionWord)
|
||||
logProbs = append(logProbs, schemas.TranscriptionLogProb{
|
||||
Token: word.Text,
|
||||
LogProb: word.LogProb,
|
||||
})
|
||||
}
|
||||
|
||||
if !hasEnd {
|
||||
return convertedWords, logProbs, nil
|
||||
}
|
||||
|
||||
duration := maxEnd
|
||||
return convertedWords, logProbs, &duration
|
||||
}
|
||||
|
||||
func parseTranscriptionResponse(body []byte) ([]ElevenlabsSpeechToTextChunkResponse, error) {
|
||||
var multichannel ElevenlabsMultichannelSpeechToTextResponse
|
||||
if err := sonic.Unmarshal(body, &multichannel); err == nil && len(multichannel.Transcripts) > 0 {
|
||||
return multichannel.Transcripts, nil
|
||||
}
|
||||
|
||||
var single ElevenlabsSpeechToTextChunkResponse
|
||||
if err := sonic.Unmarshal(body, &single); err == nil {
|
||||
if single.LanguageCode != "" || single.Text != "" || len(single.Words) > 0 {
|
||||
return []ElevenlabsSpeechToTextChunkResponse{single}, nil
|
||||
}
|
||||
}
|
||||
|
||||
var webhook ElevenlabsSpeechToTextWebhookResponse
|
||||
if err := sonic.Unmarshal(body, &webhook); err == nil && strings.TrimSpace(webhook.Message) != "" {
|
||||
return nil, errors.New(webhook.Message)
|
||||
}
|
||||
|
||||
return nil, errors.New("unexpected Elevenlabs transcription response format")
|
||||
}
|
||||
289
core/providers/elevenlabs/types.go
Normal file
289
core/providers/elevenlabs/types.go
Normal file
@@ -0,0 +1,289 @@
|
||||
package elevenlabs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/bytedance/sonic"
|
||||
)
|
||||
|
||||
// SPEECH TYPES
|
||||
|
||||
type ElevenlabsSpeechRequest struct {
|
||||
Text string `json:"text"`
|
||||
ModelID string `json:"model_id"` // defaults to "eleven_multilingual_v2"
|
||||
LanguageCode *string `json:"language_code,omitempty"`
|
||||
VoiceSettings *ElevenlabsVoiceSettings `json:"voice_settings,omitempty"`
|
||||
PronunciationDictionaryLocators []ElevenlabsPronunciationDictionaryLocator `json:"pronunciation_dictionary_locators"`
|
||||
Seed *int `json:"seed,omitempty"`
|
||||
PreviousText *string `json:"previous_text,omitempty"`
|
||||
NextText *string `json:"next_text,omitempty"`
|
||||
PreviousRequestIDs []string `json:"previous_request_ids"`
|
||||
NextRequestIDs []string `json:"next_request_ids"`
|
||||
ApplyTextNormalization *string `json:"apply_text_normalization,omitempty"`
|
||||
ApplyLanguageTextNormalization *bool `json:"apply_language_text_normalization,omitempty"`
|
||||
UsePVCAsIVC *bool `json:"use_pvc_as_ivc,omitempty"` // deprecated
|
||||
ExtraParams map[string]interface{} `json:"-"`
|
||||
}
|
||||
|
||||
// GetExtraParams implements the providerUtils.RequestBodyWithExtraParams interface.
|
||||
func (r *ElevenlabsSpeechRequest) GetExtraParams() map[string]interface{} {
|
||||
return r.ExtraParams
|
||||
}
|
||||
|
||||
// ElevenlabsSpeechWithTimestampsResponse represents the response from the with-timestamps endpoint
|
||||
type ElevenlabsSpeechWithTimestampsResponse struct {
|
||||
AudioBase64 string `json:"audio_base64"`
|
||||
Alignment *ElevenlabsAlignment `json:"alignment,omitempty"`
|
||||
NormalizedAlignment *ElevenlabsAlignment `json:"normalized_alignment,omitempty"`
|
||||
}
|
||||
|
||||
// ElevenlabsAlignment represents character-level timing information
|
||||
type ElevenlabsAlignment struct {
|
||||
CharStartTimesMs []float64 `json:"char_start_times_ms"`
|
||||
CharEndTimesMs []float64 `json:"char_end_times_ms"`
|
||||
Characters []string `json:"characters"`
|
||||
}
|
||||
|
||||
type ElevenlabsVoiceSettings struct {
|
||||
Stability float64 `json:"stability"` // 0-1, default 0.5
|
||||
UseSpeakerBoost bool `json:"use_speaker_boost"` // default true
|
||||
SimilarityBoost float64 `json:"similarity_boost"` // 0-1, default 0.75
|
||||
Style float64 `json:"style"` // default 0
|
||||
Speed float64 `json:"speed"` // default 1
|
||||
}
|
||||
|
||||
type ElevenlabsPronunciationDictionaryLocator struct {
|
||||
PronunciationDictionaryID string `json:"pronunciation_dictionary_id"`
|
||||
VersionID *string `json:"version_id,omitempty"`
|
||||
}
|
||||
|
||||
// TRANSCRIPTION TYPES
|
||||
type ElevenlabsTranscriptionRequest struct {
|
||||
ModelID string `json:"model_id"`
|
||||
File []byte `json:"-"`
|
||||
Filename string `json:"-"` // Original filename, used to preserve file format extension
|
||||
LanguageCode *string `json:"language_code,omitempty"`
|
||||
TagAudioEvents *bool `json:"tag_audio_events,omitempty"`
|
||||
NumSpeakers *int `json:"num_speakers,omitempty"`
|
||||
TimestampsGranularity *ElevenlabsTimestampsGranularity `json:"timestamps_granularity,omitempty"`
|
||||
Diarize *bool `json:"diarize,omitempty"`
|
||||
DiarizationThreshold *float64 `json:"diarization_threshold,omitempty"`
|
||||
AdditionalFormats []ElevenlabsAdditionalFormat `json:"additional_formats,omitempty"`
|
||||
FileFormat *ElevenlabsFileFormat `json:"file_format,omitempty"`
|
||||
CloudStorageURL *string `json:"cloud_storage_url,omitempty"`
|
||||
Webhook *bool `json:"webhook,omitempty"`
|
||||
WebhookID *string `json:"webhook_id,omitempty"`
|
||||
Temperature *float64 `json:"temperature,omitempty"`
|
||||
Seed *int `json:"seed,omitempty"`
|
||||
UseMultiChannel *bool `json:"use_multi_channel,omitempty"`
|
||||
WebhookMetadata interface{} `json:"webhook_metadata,omitempty"`
|
||||
ExtraParams map[string]interface{} `json:"-"`
|
||||
}
|
||||
|
||||
// GetExtraParams implements the RequestBodyWithExtraParams interface
|
||||
func (req *ElevenlabsTranscriptionRequest) GetExtraParams() map[string]interface{} {
|
||||
return req.ExtraParams
|
||||
}
|
||||
|
||||
type ElevenlabsTimestampsGranularity string
|
||||
|
||||
const (
|
||||
ElevenlabsTimestampsGranularityNone ElevenlabsTimestampsGranularity = "none"
|
||||
ElevenlabsTimestampsGranularityWord ElevenlabsTimestampsGranularity = "word"
|
||||
ElevenlabsTimestampsGranularityCharacter ElevenlabsTimestampsGranularity = "character"
|
||||
)
|
||||
|
||||
type ElevenlabsFileFormat string
|
||||
|
||||
const (
|
||||
ElevenlabsFileFormatPcmS16le16 ElevenlabsFileFormat = "pcm_s16le_16"
|
||||
ElevenlabsFileFormatOther ElevenlabsFileFormat = "other"
|
||||
)
|
||||
|
||||
type ElevenlabsAdditionalFormat struct {
|
||||
Format ElevenlabsExportOptions `json:"format"`
|
||||
IncludeSpeakers *bool `json:"include_speakers,omitempty"`
|
||||
IncludeTimestamps *bool `json:"include_timestamps,omitempty"`
|
||||
SegmentOnSilenceLongerThanS *float64 `json:"segment_on_silence_longer_than_s,omitempty"`
|
||||
MaxSegmentDurationS *float64 `json:"max_segment_duration_s,omitempty"`
|
||||
MaxSegmentChars *int `json:"max_segment_chars,omitempty"`
|
||||
MaxCharactersPerLine *int `json:"max_characters_per_line,omitempty"`
|
||||
}
|
||||
|
||||
type ElevenlabsExportOptions string
|
||||
|
||||
const (
|
||||
ElevenlabsExportOptionsSegmentedJson ElevenlabsExportOptions = "segmented_json"
|
||||
ElevenlabsExportOptionsDocx ElevenlabsExportOptions = "docx"
|
||||
ElevenlabsExportOptionsPdf ElevenlabsExportOptions = "pdf"
|
||||
ElevenlabsExportOptionsTxt ElevenlabsExportOptions = "txt"
|
||||
ElevenlabsExportOptionsHtml ElevenlabsExportOptions = "html"
|
||||
ElevenlabsExportOptionsSrt ElevenlabsExportOptions = "srt"
|
||||
)
|
||||
|
||||
type ElevenlabsSpeechToTextChunkResponse struct {
|
||||
LanguageCode string `json:"language_code"`
|
||||
LanguageProbability *float64 `json:"language_probability,omitempty"`
|
||||
Text string `json:"text"`
|
||||
Words []ElevenlabsSpeechToTextWord `json:"words"`
|
||||
ChannelIndex *int `json:"channel_index,omitempty"`
|
||||
AdditionalFormats []*ElevenlabsAdditionalFormatResponse `json:"additional_formats,omitempty"`
|
||||
TranscriptionID *string `json:"transcription_id,omitempty"`
|
||||
}
|
||||
|
||||
type ElevenlabsSpeechToTextWord struct {
|
||||
Text string `json:"text"`
|
||||
Start *float64 `json:"start,omitempty"`
|
||||
End *float64 `json:"end,omitempty"`
|
||||
Type string `json:"type"`
|
||||
SpeakerID *string `json:"speaker_id,omitempty"`
|
||||
LogProb float64 `json:"logprob"`
|
||||
Characters []ElevenlabsSpeechToTextCharacter `json:"characters,omitempty"`
|
||||
}
|
||||
|
||||
type ElevenlabsSpeechToTextCharacter struct {
|
||||
Text string `json:"text"`
|
||||
Start *float64 `json:"start,omitempty"`
|
||||
End *float64 `json:"end,omitempty"`
|
||||
}
|
||||
|
||||
type ElevenlabsAdditionalFormatResponse struct {
|
||||
RequestedFormat string `json:"requested_format"`
|
||||
FileExtension string `json:"file_extension"`
|
||||
ContentType string `json:"content_type"`
|
||||
IsBase64Encoded bool `json:"is_base64_encoded"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type ElevenlabsMultichannelSpeechToTextResponse struct {
|
||||
Transcripts []ElevenlabsSpeechToTextChunkResponse `json:"transcripts"`
|
||||
TranscriptionID *string `json:"transcription_id,omitempty"`
|
||||
}
|
||||
|
||||
type ElevenlabsSpeechToTextWebhookResponse struct {
|
||||
Message string `json:"message"`
|
||||
RequestID string `json:"request_id"`
|
||||
TranscriptionID *string `json:"transcription_id,omitempty"`
|
||||
}
|
||||
|
||||
// ERROR TYPES
|
||||
type ElevenlabsError struct {
|
||||
Detail *ElevenlabsErrorDetail `json:"detail,omitempty"`
|
||||
}
|
||||
|
||||
// ElevenlabsErrorDetail handles both single object (non-validation errors) and
|
||||
// array of objects (validation errors) formats from ElevenLabs API.
|
||||
type ElevenlabsErrorDetail struct {
|
||||
// Non-validation error fields (when detail is a single object)
|
||||
Status *string `json:"status,omitempty"`
|
||||
Message *string `json:"message,omitempty"`
|
||||
|
||||
// Validation error fields (when detail is an array)
|
||||
ValidationErrors []ElevenlabsValidationError `json:"-"`
|
||||
}
|
||||
|
||||
// ElevenlabsValidationError represents a single validation error entry
|
||||
type ElevenlabsValidationError struct {
|
||||
Loc []string `json:"loc"`
|
||||
Msg string `json:"msg"`
|
||||
Message string `json:"message"` // Some APIs use "message" instead of "msg"
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
// UnmarshalJSON implements custom JSON unmarshaling to handle both
|
||||
// single object and array formats from ElevenLabs API.
|
||||
func (d *ElevenlabsErrorDetail) UnmarshalJSON(data []byte) error {
|
||||
// First, try to unmarshal as an array (validation errors)
|
||||
// Check if it's an array by looking at the first non-whitespace character
|
||||
trimmed := strings.TrimSpace(string(data))
|
||||
if len(trimmed) > 0 && trimmed[0] == '[' {
|
||||
var validationErrors []ElevenlabsValidationError
|
||||
if err := sonic.Unmarshal(data, &validationErrors); err != nil {
|
||||
return err
|
||||
}
|
||||
d.ValidationErrors = validationErrors
|
||||
// Extract message from first validation error if available
|
||||
if len(validationErrors) > 0 {
|
||||
if validationErrors[0].Message != "" {
|
||||
d.Message = &validationErrors[0].Message
|
||||
} else if validationErrors[0].Msg != "" {
|
||||
d.Message = &validationErrors[0].Msg
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// If not an array, try to unmarshal as a single object (non-validation error)
|
||||
var obj struct {
|
||||
Type *string `json:"type,omitempty"`
|
||||
Loc []string `json:"loc,omitempty"`
|
||||
Message *string `json:"message,omitempty"`
|
||||
Status *string `json:"status,omitempty"`
|
||||
Msg *string `json:"msg,omitempty"` // Some APIs use "msg" instead of "message"
|
||||
}
|
||||
if err := sonic.Unmarshal(data, &obj); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Populate non-validation error fields
|
||||
d.Status = obj.Status
|
||||
if obj.Message != nil {
|
||||
d.Message = obj.Message
|
||||
} else if obj.Msg != nil {
|
||||
d.Message = obj.Msg
|
||||
}
|
||||
|
||||
// If this object has validation-like fields (Loc, Type), treat it as a single validation error
|
||||
if len(obj.Loc) > 0 || obj.Type != nil {
|
||||
validationErr := ElevenlabsValidationError{
|
||||
Loc: obj.Loc,
|
||||
Type: func() string {
|
||||
if obj.Type != nil {
|
||||
return *obj.Type
|
||||
}
|
||||
return ""
|
||||
}(),
|
||||
}
|
||||
if obj.Message != nil {
|
||||
validationErr.Message = *obj.Message
|
||||
} else if obj.Msg != nil {
|
||||
validationErr.Msg = *obj.Msg
|
||||
validationErr.Message = *obj.Msg
|
||||
}
|
||||
d.ValidationErrors = []ElevenlabsValidationError{validationErr}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MODEL TYPES
|
||||
type ElevenlabsModel struct {
|
||||
ModelID string `json:"model_id"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
ServesProVoices bool `json:"serves_pro_voices"`
|
||||
TokenCostFactor float64 `json:"token_cost_factor"`
|
||||
CanBeFinetuned bool `json:"can_be_finetuned"`
|
||||
CanDoTextToSpeech bool `json:"can_do_text_to_speech"`
|
||||
CanDoVoiceConversion bool `json:"can_do_voice_conversion"`
|
||||
CanUseStyle bool `json:"can_use_style"`
|
||||
CanUseSpeakerBoost bool `json:"can_use_speaker_boost"`
|
||||
Languages []ElevenlabsLanguage `json:"languages"`
|
||||
RequiresAlphaAccess bool `json:"requires_alpha_access"`
|
||||
MaxCharactersRequestFreeUser int `json:"max_characters_request_free_user"`
|
||||
MaxCharactersRequestSubscribedUser int `json:"max_characters_request_subscribed_user"`
|
||||
MaxTextLengthPerRequest int `json:"maximum_text_length_per_request"`
|
||||
ModelRates ElevenlabsModelRate `json:"model_rates"`
|
||||
ConcurrencyGroup string `json:"concurrency_group"`
|
||||
}
|
||||
|
||||
type ElevenlabsLanguage struct {
|
||||
LanguageID string `json:"language_id"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type ElevenlabsModelRate struct {
|
||||
CharacterCostMultiplier float64 `json:"character_cost_multiplier"`
|
||||
}
|
||||
|
||||
type ElevenlabsListModelsResponse []ElevenlabsModel
|
||||
35
core/providers/elevenlabs/utils.go
Normal file
35
core/providers/elevenlabs/utils.go
Normal file
@@ -0,0 +1,35 @@
|
||||
package elevenlabs
|
||||
|
||||
var (
|
||||
// Maps provider-specific finish reasons to Bifrost format
|
||||
bifrostToElevenlabsSpeechFormat = map[string]string{
|
||||
"": "mp3_44100_128",
|
||||
"mp3": "mp3_44100_128",
|
||||
"opus": "opus_48000_128",
|
||||
"wav": "pcm_44100",
|
||||
"pcm": "pcm_44100",
|
||||
}
|
||||
|
||||
// Maps Bifrost finish reasons to provider-specific format
|
||||
elevenlabsSpeechFormatToBifrost = map[string]string{
|
||||
"mp3_44100_128": "mp3",
|
||||
"opus_48000_128": "opus",
|
||||
"pcm_44100": "wav",
|
||||
}
|
||||
)
|
||||
|
||||
// ConvertBifrostSpeechFormatToElevenlabs converts Bifrost speech format to Elevenlabs format
|
||||
func ConvertBifrostSpeechFormatToElevenlabs(format string) string {
|
||||
if elevenlabsFormat, ok := bifrostToElevenlabsSpeechFormat[format]; ok {
|
||||
return elevenlabsFormat
|
||||
}
|
||||
return format
|
||||
}
|
||||
|
||||
// ConvertElevenlabsSpeechFormatToBifrost converts Elevenlabs speech format to Bifrost format
|
||||
func ConvertElevenlabsSpeechFormatToBifrost(format string) string {
|
||||
if bifrostFormat, ok := elevenlabsSpeechFormatToBifrost[format]; ok {
|
||||
return bifrostFormat
|
||||
}
|
||||
return format
|
||||
}
|
||||
Reference in New Issue
Block a user