first commit

This commit is contained in:
Beyhan Oğur
2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions

View File

@@ -0,0 +1,933 @@
package elevenlabs
import (
"bytes"
"context"
"errors"
"io"
"mime/multipart"
"net/http"
"net/url"
"path"
"strconv"
"strings"
"time"
"github.com/bytedance/sonic"
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
schemas "github.com/maximhq/bifrost/core/schemas"
"github.com/valyala/fasthttp"
)
type ElevenlabsProvider struct {
logger schemas.Logger // Logger for provider operations
client *fasthttp.Client // HTTP client for unary API requests (ReadTimeout bounds overall response)
streamingClient *fasthttp.Client // HTTP client for streaming API requests (no ReadTimeout; idle governed by NewIdleTimeoutReader)
networkConfig schemas.NetworkConfig // Network configuration including extra headers
sendBackRawRequest bool // Whether to include raw request in BifrostResponse
sendBackRawResponse bool // Whether to include raw response in BifrostResponse
customProviderConfig *schemas.CustomProviderConfig // Custom provider config
}
// NewElevenlabsProvider creates a new Elevenlabs provider instance.
// It initializes the HTTP client with the provided configuration.
// The client is configured with timeouts, concurrency limits, and optional proxy settings.
func NewElevenlabsProvider(config *schemas.ProviderConfig, logger schemas.Logger) *ElevenlabsProvider {
config.CheckAndSetDefaults()
requestTimeout := time.Second * time.Duration(config.NetworkConfig.DefaultRequestTimeoutInSeconds)
client := &fasthttp.Client{
ReadTimeout: requestTimeout,
WriteTimeout: requestTimeout,
MaxConnsPerHost: config.NetworkConfig.MaxConnsPerHost,
MaxIdleConnDuration: 30 * time.Second,
MaxConnWaitTimeout: requestTimeout,
MaxConnDuration: time.Second * time.Duration(schemas.DefaultMaxConnDurationInSeconds),
ConnPoolStrategy: fasthttp.FIFO,
}
// Configure proxy and retry policy
client = providerUtils.ConfigureProxy(client, config.ProxyConfig, logger)
client = providerUtils.ConfigureDialer(client)
client = providerUtils.ConfigureTLS(client, config.NetworkConfig, logger)
streamingClient := providerUtils.BuildStreamingClient(client)
// Set default BaseURL if not provided
if config.NetworkConfig.BaseURL == "" {
config.NetworkConfig.BaseURL = "https://api.elevenlabs.io"
}
config.NetworkConfig.BaseURL = strings.TrimRight(config.NetworkConfig.BaseURL, "/")
return &ElevenlabsProvider{
logger: logger,
client: client,
streamingClient: streamingClient,
networkConfig: config.NetworkConfig,
customProviderConfig: config.CustomProviderConfig,
sendBackRawRequest: config.SendBackRawRequest,
sendBackRawResponse: config.SendBackRawResponse,
}
}
// GetProviderKey returns the provider identifier for Elevenlabs.
func (provider *ElevenlabsProvider) GetProviderKey() schemas.ModelProvider {
return providerUtils.GetProviderName(schemas.Elevenlabs, provider.customProviderConfig)
}
// listModelsByKey performs a list models request for a single key.
// Returns the response and latency, or an error if the request fails.
func (provider *ElevenlabsProvider) listModelsByKey(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostListModelsRequest) (*schemas.BifrostListModelsResponse, *schemas.BifrostError) {
// Create request
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
defer fasthttp.ReleaseRequest(req)
defer fasthttp.ReleaseResponse(resp)
// Set any extra headers from network config
providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
// Build URL using centralized URL construction
req.SetRequestURI(provider.networkConfig.BaseURL + providerUtils.GetPathFromContext(ctx, "/v1/models"))
req.Header.SetMethod(http.MethodGet)
req.Header.SetContentType("application/json")
if key.Value.GetValue() != "" {
req.Header.Set("xi-api-key", key.Value.GetValue())
}
// Make request
latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
defer wait()
if bifrostErr != nil {
return nil, bifrostErr
}
// Extract and set provider response headers so they're available on error paths
ctx.SetValue(schemas.BifrostContextKeyProviderResponseHeaders, providerUtils.ExtractProviderResponseHeaders(resp))
if resp.StatusCode() != fasthttp.StatusOK {
return nil, parseElevenlabsError(resp)
}
var elevenlabsResponse ElevenlabsListModelsResponse
rawRequest, rawResponse, bifrostErr := providerUtils.HandleProviderResponse(resp.Body(), &elevenlabsResponse, nil, providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse))
if bifrostErr != nil {
return nil, bifrostErr
}
response := elevenlabsResponse.ToBifrostListModelsResponse(provider.GetProviderKey(), key.Models, key.BlacklistedModels, key.Aliases, request.Unfiltered)
response.ExtraFields.Latency = latency.Milliseconds()
response.ExtraFields.ProviderResponseHeaders = providerUtils.ExtractProviderResponseHeaders(resp)
// Set raw request if enabled
if providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest) {
response.ExtraFields.RawRequest = rawRequest
}
// Set raw response if enabled
if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
response.ExtraFields.RawResponse = rawResponse
}
return response, nil
}
// ListModels performs a list models request to Elevenlabs' API.
// Requests are made concurrently for improved performance.
func (provider *ElevenlabsProvider) ListModels(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostListModelsRequest) (*schemas.BifrostListModelsResponse, *schemas.BifrostError) {
if err := providerUtils.CheckOperationAllowed(schemas.Elevenlabs, provider.customProviderConfig, schemas.ListModelsRequest); err != nil {
return nil, err
}
return providerUtils.HandleMultipleListModelsRequests(
ctx,
keys,
request,
provider.listModelsByKey,
)
}
// TextCompletion is not supported by the Elevenlabs provider
func (provider *ElevenlabsProvider) TextCompletion(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostTextCompletionRequest) (*schemas.BifrostTextCompletionResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.TextCompletionRequest, provider.GetProviderKey())
}
// TextCompletionStream is not supported by the Elevenlabs provider
func (provider *ElevenlabsProvider) TextCompletionStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostTextCompletionRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.TextCompletionStreamRequest, provider.GetProviderKey())
}
// ChatCompletion is not supported by the Elevenlabs provider
func (provider *ElevenlabsProvider) ChatCompletion(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostChatRequest) (*schemas.BifrostChatResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ChatCompletionRequest, provider.GetProviderKey())
}
// ChatCompletionStream is not supported by the Elevenlabs provider
func (provider *ElevenlabsProvider) ChatCompletionStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostChatRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ChatCompletionStreamRequest, provider.GetProviderKey())
}
// Responses is not supported by the Elevenlabs provider
func (provider *ElevenlabsProvider) Responses(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostResponsesRequest) (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ResponsesRequest, provider.GetProviderKey())
}
// ResponsesStream is not supported by the Elevenlabs provider
func (provider *ElevenlabsProvider) ResponsesStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostResponsesRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ResponsesStreamRequest, provider.GetProviderKey())
}
// Embedding is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) Embedding(ctx *schemas.BifrostContext, key schemas.Key, input *schemas.BifrostEmbeddingRequest) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.EmbeddingRequest, provider.GetProviderKey())
}
// Speech performs a text to speech request
func (provider *ElevenlabsProvider) Speech(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostSpeechRequest) (*schemas.BifrostSpeechResponse, *schemas.BifrostError) {
if err := providerUtils.CheckOperationAllowed(schemas.Elevenlabs, provider.customProviderConfig, schemas.SpeechRequest); err != nil {
return nil, err
}
// Create request
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
defer fasthttp.ReleaseRequest(req)
defer fasthttp.ReleaseResponse(resp)
// Set any extra headers from network config
providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
withTimestampsRequest := request.Params != nil && request.Params.WithTimestamps != nil && *request.Params.WithTimestamps
var endpoint string
if request.Params != nil && request.Params.VoiceConfig != nil && request.Params.VoiceConfig.Voice != nil {
voice := *request.Params.VoiceConfig.Voice
// Determine if timestamps are requested
if withTimestampsRequest {
endpoint = "/v1/text-to-speech/" + voice + "/with-timestamps"
} else {
endpoint = "/v1/text-to-speech/" + voice
}
} else {
return nil, providerUtils.NewBifrostOperationError("voice parameter is required", nil)
}
requestURL := provider.buildBaseSpeechRequestURL(ctx, endpoint, schemas.SpeechRequest, request)
req.SetRequestURI(requestURL)
req.Header.SetMethod(http.MethodPost)
req.Header.SetContentType("application/json")
if key.Value.GetValue() != "" {
req.Header.Set("xi-api-key", key.Value.GetValue())
}
jsonData, bifrostErr := providerUtils.CheckContextAndGetRequestBody(
ctx,
request,
func() (providerUtils.RequestBodyWithExtraParams, error) {
return ToElevenlabsSpeechRequest(request), nil
})
if bifrostErr != nil {
return nil, bifrostErr
}
if !providerUtils.ApplyLargePayloadRequestBody(ctx, req) {
req.SetBody(jsonData)
}
// Make request
latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
defer wait()
if bifrostErr != nil {
return nil, providerUtils.EnrichError(ctx, bifrostErr, jsonData, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
}
// Extract and set provider response headers so they're available on error paths
ctx.SetValue(schemas.BifrostContextKeyProviderResponseHeaders, providerUtils.ExtractProviderResponseHeaders(resp))
// Handle error response
if resp.StatusCode() != fasthttp.StatusOK {
return nil, providerUtils.EnrichError(ctx, parseElevenlabsError(resp), jsonData, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
}
// Get the response body
body, err := providerUtils.CheckAndDecodeBody(resp)
if err != nil {
return nil, providerUtils.EnrichError(ctx, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, err), jsonData, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
}
// Create response based on whether timestamps were requested
bifrostResponse := &schemas.BifrostSpeechResponse{
ExtraFields: schemas.BifrostResponseExtraFields{
Latency: latency.Milliseconds(),
ProviderResponseHeaders: providerUtils.ExtractProviderResponseHeaders(resp),
},
}
if providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest) {
providerUtils.ParseAndSetRawRequest(&bifrostResponse.ExtraFields, jsonData)
}
if withTimestampsRequest {
var timestampResponse ElevenlabsSpeechWithTimestampsResponse
if err := sonic.Unmarshal(body, &timestampResponse); err != nil {
return nil, providerUtils.NewBifrostOperationError("failed to parse with-timestamps response", err)
}
bifrostResponse.AudioBase64 = &timestampResponse.AudioBase64
if timestampResponse.Alignment != nil {
bifrostResponse.Alignment = &schemas.SpeechAlignment{
CharStartTimesMs: timestampResponse.Alignment.CharStartTimesMs,
CharEndTimesMs: timestampResponse.Alignment.CharEndTimesMs,
Characters: timestampResponse.Alignment.Characters,
}
}
if timestampResponse.NormalizedAlignment != nil {
bifrostResponse.NormalizedAlignment = &schemas.SpeechAlignment{
CharStartTimesMs: timestampResponse.NormalizedAlignment.CharStartTimesMs,
CharEndTimesMs: timestampResponse.NormalizedAlignment.CharEndTimesMs,
Characters: timestampResponse.NormalizedAlignment.Characters,
}
}
return bifrostResponse, nil
}
bifrostResponse.Audio = body
return bifrostResponse, nil
}
// Rerank is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) Rerank(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostRerankRequest) (*schemas.BifrostRerankResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.RerankRequest, provider.GetProviderKey())
}
// OCR is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) OCR(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostOCRRequest) (*schemas.BifrostOCRResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.OCRRequest, provider.GetProviderKey())
}
// SpeechStream performs a text to speech stream request
func (provider *ElevenlabsProvider) SpeechStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostSpeechRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
if err := providerUtils.CheckOperationAllowed(schemas.Elevenlabs, provider.customProviderConfig, schemas.SpeechStreamRequest); err != nil {
return nil, err
}
jsonBody, bifrostErr := providerUtils.CheckContextAndGetRequestBody(
ctx,
request,
func() (providerUtils.RequestBodyWithExtraParams, error) {
return ToElevenlabsSpeechRequest(request), nil
})
if bifrostErr != nil {
return nil, bifrostErr
}
// Create HTTP request for streaming
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
resp.StreamBody = true
defer fasthttp.ReleaseRequest(req)
// Set any extra headers from network config
providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
if request.Params == nil || request.Params.VoiceConfig == nil || request.Params.VoiceConfig.Voice == nil {
return nil, providerUtils.EnrichError(ctx, providerUtils.NewBifrostOperationError("voice parameter is required", nil), jsonBody, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
}
req.SetRequestURI(provider.buildBaseSpeechRequestURL(ctx, "/v1/text-to-speech/"+*request.Params.VoiceConfig.Voice+"/stream", schemas.SpeechStreamRequest, request))
req.Header.SetMethod(http.MethodPost)
req.Header.SetContentType("application/json")
if key.Value.GetValue() != "" {
req.Header.Set("xi-api-key", key.Value.GetValue())
}
if !providerUtils.ApplyLargePayloadRequestBody(ctx, req) {
req.SetBody(jsonBody)
}
// Make request
startTime := time.Now()
err := provider.streamingClient.Do(req, resp)
if err != nil {
defer providerUtils.ReleaseStreamingResponse(resp)
if errors.Is(err, context.Canceled) {
return nil, providerUtils.EnrichError(ctx, &schemas.BifrostError{
IsBifrostError: false,
Error: &schemas.ErrorField{
Type: schemas.Ptr(schemas.RequestCancelled),
Message: schemas.ErrRequestCancelled,
Error: err,
},
}, jsonBody, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
}
if errors.Is(err, fasthttp.ErrTimeout) || errors.Is(err, context.DeadlineExceeded) {
return nil, providerUtils.EnrichError(ctx, providerUtils.NewBifrostTimeoutError(schemas.ErrProviderRequestTimedOut, err), jsonBody, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
}
return nil, providerUtils.EnrichError(ctx, providerUtils.NewBifrostOperationError(schemas.ErrProviderDoRequest, err), jsonBody, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
}
// Extract provider response headers before status check so error responses also forward them
ctx.SetValue(schemas.BifrostContextKeyProviderResponseHeaders, providerUtils.ExtractProviderResponseHeaders(resp))
// Check for HTTP errors
if resp.StatusCode() != fasthttp.StatusOK {
defer providerUtils.ReleaseStreamingResponse(resp)
return nil, providerUtils.EnrichError(ctx, parseElevenlabsError(resp), jsonBody, nil, provider.sendBackRawRequest, provider.sendBackRawResponse)
}
// Create response channel
responseChan := make(chan *schemas.BifrostStreamChunk, schemas.DefaultStreamBufferSize)
providerUtils.SetStreamIdleTimeoutIfEmpty(ctx, provider.networkConfig.StreamIdleTimeoutInSeconds)
go func() {
defer func() {
if ctx.Err() == context.Canceled {
providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, provider.logger, postHookSpanFinalizer)
} else if ctx.Err() == context.DeadlineExceeded {
providerUtils.HandleStreamTimeout(ctx, postHookRunner, responseChan, provider.logger, postHookSpanFinalizer)
}
close(responseChan)
}()
defer providerUtils.ReleaseStreamingResponse(resp)
// Decompress gzip-encoded streams transparently (no-op for non-gzip)
reader, releaseGzip := providerUtils.DecompressStreamBody(resp)
defer releaseGzip()
// Wrap reader with idle timeout to detect stalled streams.
reader, stopIdleTimeout := providerUtils.NewIdleTimeoutReader(reader, resp.BodyStream(), providerUtils.GetStreamIdleTimeout(ctx))
defer stopIdleTimeout()
// Setup cancellation handler to close the raw network stream on ctx cancellation,
// which immediately unblocks any in-progress read (including reads blocked inside a gzip decompression layer).
stopCancellation := providerUtils.SetupStreamCancellation(ctx, resp.BodyStream(), provider.logger)
defer stopCancellation()
defer providerUtils.EnsureStreamFinalizerCalled(ctx, postHookSpanFinalizer)
// read binary audio chunks from the stream
// 4KB buffer for reading chunks
buffer := make([]byte, 4096)
bodyStream := reader
chunkIndex := -1
lastChunkTime := time.Now()
for {
// If context was cancelled/timed out, let defer handle it
if ctx.Err() != nil {
return
}
n, err := bodyStream.Read(buffer)
if err != nil {
// If context was cancelled/timed out, let defer handle it
if ctx.Err() != nil {
return
}
if err == io.EOF {
break
}
ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true)
provider.logger.Warn("Error reading stream: %v", err)
providerUtils.ProcessAndSendError(ctx, postHookRunner, err, responseChan, provider.logger, postHookSpanFinalizer)
return
}
if n > 0 {
chunkIndex++
audioChunk := make([]byte, n)
copy(audioChunk, buffer[:n])
response := &schemas.BifrostSpeechStreamResponse{
Type: schemas.SpeechStreamResponseTypeDelta,
Audio: audioChunk,
ExtraFields: schemas.BifrostResponseExtraFields{
ChunkIndex: chunkIndex,
Latency: time.Since(lastChunkTime).Milliseconds(),
},
}
lastChunkTime = time.Now()
if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
response.ExtraFields.RawResponse = audioChunk
}
providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, nil, response, nil, nil), responseChan, postHookSpanFinalizer)
}
}
// Send final response after natural loop termination (similar to Gemini pattern)
finalResponse := &schemas.BifrostSpeechStreamResponse{
Type: schemas.SpeechStreamResponseTypeDone,
Audio: []byte{},
ExtraFields: schemas.BifrostResponseExtraFields{
ChunkIndex: chunkIndex + 1,
Latency: time.Since(startTime).Milliseconds(),
},
}
// Set raw request if enabled
if providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest) {
providerUtils.ParseAndSetRawRequest(&finalResponse.ExtraFields, jsonBody)
}
ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true)
providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, nil, finalResponse, nil, nil), responseChan, postHookSpanFinalizer)
}()
return responseChan, nil
}
// Transcription performs a transcription request
func (provider *ElevenlabsProvider) Transcription(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostTranscriptionRequest) (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError) {
if err := providerUtils.CheckOperationAllowed(schemas.Elevenlabs, provider.customProviderConfig, schemas.TranscriptionRequest); err != nil {
return nil, err
}
reqBody := ToElevenlabsTranscriptionRequest(request)
if reqBody == nil {
return nil, providerUtils.NewBifrostOperationError("transcription request is not provided", nil)
}
hasFile := len(reqBody.File) > 0
hasURL := reqBody.CloudStorageURL != nil && strings.TrimSpace(*reqBody.CloudStorageURL) != ""
if hasFile && hasURL {
return nil, providerUtils.NewBifrostOperationError("provide either a file or cloud_storage_url, not both", nil)
}
if !hasFile && !hasURL {
return nil, providerUtils.NewBifrostOperationError("either a transcription file or cloud_storage_url must be provided", nil)
}
var body bytes.Buffer
writer := multipart.NewWriter(&body)
if bifrostErr := writeTranscriptionMultipart(writer, reqBody); bifrostErr != nil {
return nil, bifrostErr
}
contentType := writer.FormDataContentType()
if err := writer.Close(); err != nil {
return nil, providerUtils.NewBifrostOperationError("failed to finalize multipart transcription request", err)
}
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
defer fasthttp.ReleaseRequest(req)
defer fasthttp.ReleaseResponse(resp)
providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
requestPath, isCompleteURL := providerUtils.GetRequestPath(ctx, "/v1/speech-to-text", provider.customProviderConfig, schemas.TranscriptionRequest)
if isCompleteURL {
req.SetRequestURI(requestPath)
} else {
req.SetRequestURI(provider.networkConfig.BaseURL + requestPath)
}
req.Header.SetMethod(http.MethodPost)
req.Header.SetContentType(contentType)
if key.Value.GetValue() != "" {
req.Header.Set("xi-api-key", key.Value.GetValue())
}
req.SetBody(body.Bytes())
latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
defer wait()
if bifrostErr != nil {
return nil, bifrostErr
}
// Extract and set provider response headers so they're available on error paths
ctx.SetValue(schemas.BifrostContextKeyProviderResponseHeaders, providerUtils.ExtractProviderResponseHeaders(resp))
if resp.StatusCode() != fasthttp.StatusOK {
return nil, parseElevenlabsError(resp)
}
responseBody, err := providerUtils.CheckAndDecodeBody(resp)
if err != nil {
return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, err)
}
// Check for empty response
trimmed := strings.TrimSpace(string(responseBody))
if len(trimmed) == 0 {
return nil, &schemas.BifrostError{
IsBifrostError: true,
Error: &schemas.ErrorField{
Message: schemas.ErrProviderResponseEmpty,
},
}
}
chunks, err := parseTranscriptionResponse(responseBody)
if err != nil {
return nil, providerUtils.NewBifrostOperationError(err.Error(), nil)
}
if len(chunks) == 0 {
return nil, providerUtils.NewBifrostOperationError("no chunks found in transcription response", nil)
}
response := ToBifrostTranscriptionResponse(chunks)
response.ExtraFields = schemas.BifrostResponseExtraFields{
Latency: latency.Milliseconds(),
ProviderResponseHeaders: providerUtils.ExtractProviderResponseHeaders(resp),
}
if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
var rawResponse interface{}
if err := sonic.Unmarshal(responseBody, &rawResponse); err != nil {
rawResponse = string(responseBody)
}
response.ExtraFields.RawResponse = rawResponse
}
return response, nil
}
func writeTranscriptionMultipart(writer *multipart.Writer, reqBody *ElevenlabsTranscriptionRequest) *schemas.BifrostError {
if err := writer.WriteField("model_id", reqBody.ModelID); err != nil {
return providerUtils.NewBifrostOperationError("failed to write model_id field", err)
}
if len(reqBody.File) > 0 {
filename := reqBody.Filename
if filename == "" {
filename = providerUtils.AudioFilenameFromBytes(reqBody.File)
}
fileWriter, err := writer.CreateFormFile("file", filename)
if err != nil {
return providerUtils.NewBifrostOperationError("failed to create file field", err)
}
if _, err := fileWriter.Write(reqBody.File); err != nil {
return providerUtils.NewBifrostOperationError("failed to write file data", err)
}
}
if reqBody.CloudStorageURL != nil && strings.TrimSpace(*reqBody.CloudStorageURL) != "" {
if err := writer.WriteField("cloud_storage_url", *reqBody.CloudStorageURL); err != nil {
return providerUtils.NewBifrostOperationError("failed to write cloud_storage_url field", err)
}
}
if reqBody.LanguageCode != nil && strings.TrimSpace(*reqBody.LanguageCode) != "" {
if err := writer.WriteField("language_code", *reqBody.LanguageCode); err != nil {
return providerUtils.NewBifrostOperationError("failed to write language_code field", err)
}
}
if reqBody.TagAudioEvents != nil {
if err := writer.WriteField("tag_audio_events", strconv.FormatBool(*reqBody.TagAudioEvents)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write tag_audio_events field", err)
}
}
if reqBody.NumSpeakers != nil && *reqBody.NumSpeakers > 0 {
if err := writer.WriteField("num_speakers", strconv.Itoa(*reqBody.NumSpeakers)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write num_speakers field", err)
}
}
if reqBody.TimestampsGranularity != nil && *reqBody.TimestampsGranularity != "" {
if err := writer.WriteField("timestamps_granularity", string(*reqBody.TimestampsGranularity)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write timestamps_granularity field", err)
}
}
if reqBody.Diarize != nil {
if err := writer.WriteField("diarize", strconv.FormatBool(*reqBody.Diarize)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write diarize field", err)
}
}
if reqBody.DiarizationThreshold != nil {
if err := writer.WriteField("diarization_threshold", strconv.FormatFloat(*reqBody.DiarizationThreshold, 'f', -1, 64)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write diarization_threshold field", err)
}
}
if len(reqBody.AdditionalFormats) > 0 {
payload, err := providerUtils.MarshalSorted(reqBody.AdditionalFormats)
if err != nil {
return providerUtils.NewBifrostOperationError("failed to marshal additional_formats", err)
}
if err := writer.WriteField("additional_formats", string(payload)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write additional_formats field", err)
}
}
if reqBody.FileFormat != nil && *reqBody.FileFormat != "" {
if err := writer.WriteField("file_format", string(*reqBody.FileFormat)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write file_format field", err)
}
}
if reqBody.Webhook != nil {
if err := writer.WriteField("webhook", strconv.FormatBool(*reqBody.Webhook)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write webhook field", err)
}
}
if reqBody.WebhookID != nil && strings.TrimSpace(*reqBody.WebhookID) != "" {
if err := writer.WriteField("webhook_id", *reqBody.WebhookID); err != nil {
return providerUtils.NewBifrostOperationError("failed to write webhook_id field", err)
}
}
if reqBody.Temperature != nil {
if err := writer.WriteField("temperature", strconv.FormatFloat(*reqBody.Temperature, 'f', -1, 64)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write temperature field", err)
}
}
if reqBody.Seed != nil {
if err := writer.WriteField("seed", strconv.Itoa(*reqBody.Seed)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write seed field", err)
}
}
if reqBody.UseMultiChannel != nil {
if err := writer.WriteField("use_multi_channel", strconv.FormatBool(*reqBody.UseMultiChannel)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write use_multi_channel field", err)
}
}
if reqBody.WebhookMetadata != nil {
switch v := reqBody.WebhookMetadata.(type) {
case string:
if strings.TrimSpace(v) != "" {
if err := writer.WriteField("webhook_metadata", v); err != nil {
return providerUtils.NewBifrostOperationError("failed to write webhook_metadata field", err)
}
}
default:
payload, err := providerUtils.MarshalSorted(v)
if err != nil {
return providerUtils.NewBifrostOperationError("failed to marshal webhook_metadata", err)
}
if err := writer.WriteField("webhook_metadata", string(payload)); err != nil {
return providerUtils.NewBifrostOperationError("failed to write webhook_metadata field", err)
}
}
}
return nil
}
// TranscriptionStream is not supported by the Elevenlabs provider
func (provider *ElevenlabsProvider) TranscriptionStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostTranscriptionRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.TranscriptionStreamRequest, provider.GetProviderKey())
}
// ImageGeneration is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ImageGeneration(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostImageGenerationRequest) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ImageGenerationRequest, provider.GetProviderKey())
}
// ImageGenerationStream is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ImageGenerationStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostImageGenerationRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ImageGenerationStreamRequest, provider.GetProviderKey())
}
// ImageEdit is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ImageEdit(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostImageEditRequest) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ImageEditRequest, provider.GetProviderKey())
}
// ImageEditStream is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ImageEditStream(ctx *schemas.BifrostContext, postHookRunner schemas.PostHookRunner, postHookSpanFinalizer func(context.Context), key schemas.Key, request *schemas.BifrostImageEditRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ImageEditStreamRequest, provider.GetProviderKey())
}
// ImageVariation is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ImageVariation(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostImageVariationRequest) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ImageVariationRequest, provider.GetProviderKey())
}
// VideoGeneration is not supported by the ElevenLabs provider.
func (provider *ElevenlabsProvider) VideoGeneration(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoGenerationRequest) (*schemas.BifrostVideoGenerationResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoGenerationRequest, provider.GetProviderKey())
}
// VideoRetrieve is not supported by the ElevenLabs provider.
func (provider *ElevenlabsProvider) VideoRetrieve(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoRetrieveRequest) (*schemas.BifrostVideoGenerationResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoRetrieveRequest, provider.GetProviderKey())
}
// VideoDownload is not supported by the ElevenLabs provider.
func (provider *ElevenlabsProvider) VideoDownload(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoDownloadRequest) (*schemas.BifrostVideoDownloadResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoDownloadRequest, provider.GetProviderKey())
}
// VideoDelete is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) VideoDelete(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoDeleteRequest) (*schemas.BifrostVideoDeleteResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoDeleteRequest, provider.GetProviderKey())
}
// VideoList is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) VideoList(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoListRequest) (*schemas.BifrostVideoListResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoListRequest, provider.GetProviderKey())
}
// VideoRemix is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) VideoRemix(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostVideoRemixRequest) (*schemas.BifrostVideoGenerationResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.VideoRemixRequest, provider.GetProviderKey())
}
// buildSpeechRequestURL constructs the full request URL using the provider's configuration for speech.
func (provider *ElevenlabsProvider) buildBaseSpeechRequestURL(ctx *schemas.BifrostContext, defaultPath string, requestType schemas.RequestType, request *schemas.BifrostSpeechRequest) string {
baseURL := provider.networkConfig.BaseURL
requestPath, isCompleteURL := providerUtils.GetRequestPath(ctx, defaultPath, provider.customProviderConfig, requestType)
var finalURL string
if isCompleteURL {
finalURL = requestPath
} else {
u, parseErr := url.Parse(baseURL)
if parseErr != nil {
finalURL = baseURL + requestPath
} else {
u.Path = path.Join(u.Path, requestPath)
finalURL = u.String()
}
}
// Parse the final URL to add query parameters
u, parseErr := url.Parse(finalURL)
if parseErr != nil {
return finalURL
}
q := u.Query()
if request.Params != nil {
if request.Params.EnableLogging != nil {
q.Set("enable_logging", strconv.FormatBool(*request.Params.EnableLogging))
}
convertedFormat := ConvertBifrostSpeechFormatToElevenlabs(request.Params.ResponseFormat)
if convertedFormat != "" {
q.Set("output_format", convertedFormat)
}
if request.Params.OptimizeStreamingLatency != nil {
q.Set("optimize_streaming_latency", strconv.FormatBool(*request.Params.OptimizeStreamingLatency))
}
}
u.RawQuery = q.Encode()
return u.String()
}
// BatchCreate is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) BatchCreate(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostBatchCreateRequest) (*schemas.BifrostBatchCreateResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchCreateRequest, provider.GetProviderKey())
}
// BatchList is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) BatchList(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostBatchListRequest) (*schemas.BifrostBatchListResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchListRequest, provider.GetProviderKey())
}
// BatchRetrieve is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) BatchRetrieve(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostBatchRetrieveRequest) (*schemas.BifrostBatchRetrieveResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchRetrieveRequest, provider.GetProviderKey())
}
// BatchCancel is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) BatchCancel(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostBatchCancelRequest) (*schemas.BifrostBatchCancelResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchCancelRequest, provider.GetProviderKey())
}
// BatchDelete is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) BatchDelete(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostBatchDeleteRequest) (*schemas.BifrostBatchDeleteResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchDeleteRequest, provider.GetProviderKey())
}
// BatchResults is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) BatchResults(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostBatchResultsRequest) (*schemas.BifrostBatchResultsResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.BatchResultsRequest, provider.GetProviderKey())
}
// FileUpload is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) FileUpload(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostFileUploadRequest) (*schemas.BifrostFileUploadResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.FileUploadRequest, provider.GetProviderKey())
}
// FileList is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) FileList(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostFileListRequest) (*schemas.BifrostFileListResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.FileListRequest, provider.GetProviderKey())
}
// FileRetrieve is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) FileRetrieve(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostFileRetrieveRequest) (*schemas.BifrostFileRetrieveResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.FileRetrieveRequest, provider.GetProviderKey())
}
// FileDelete is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) FileDelete(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostFileDeleteRequest) (*schemas.BifrostFileDeleteResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.FileDeleteRequest, provider.GetProviderKey())
}
// FileContent is not supported by Elevenlabs provider.
func (provider *ElevenlabsProvider) FileContent(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostFileContentRequest) (*schemas.BifrostFileContentResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.FileContentRequest, provider.GetProviderKey())
}
// CountTokens is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) CountTokens(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostResponsesRequest) (*schemas.BifrostCountTokensResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.CountTokensRequest, provider.GetProviderKey())
}
// ContainerCreate is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ContainerCreate(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostContainerCreateRequest) (*schemas.BifrostContainerCreateResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerCreateRequest, provider.GetProviderKey())
}
// ContainerList is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ContainerList(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerListRequest) (*schemas.BifrostContainerListResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerListRequest, provider.GetProviderKey())
}
// ContainerRetrieve is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ContainerRetrieve(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerRetrieveRequest) (*schemas.BifrostContainerRetrieveResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerRetrieveRequest, provider.GetProviderKey())
}
// ContainerDelete is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ContainerDelete(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerDeleteRequest) (*schemas.BifrostContainerDeleteResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerDeleteRequest, provider.GetProviderKey())
}
// ContainerFileCreate is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ContainerFileCreate(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostContainerFileCreateRequest) (*schemas.BifrostContainerFileCreateResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerFileCreateRequest, provider.GetProviderKey())
}
// ContainerFileList is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ContainerFileList(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerFileListRequest) (*schemas.BifrostContainerFileListResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerFileListRequest, provider.GetProviderKey())
}
// ContainerFileRetrieve is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ContainerFileRetrieve(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerFileRetrieveRequest) (*schemas.BifrostContainerFileRetrieveResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerFileRetrieveRequest, provider.GetProviderKey())
}
// ContainerFileContent is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ContainerFileContent(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerFileContentRequest) (*schemas.BifrostContainerFileContentResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerFileContentRequest, provider.GetProviderKey())
}
// ContainerFileDelete is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) ContainerFileDelete(_ *schemas.BifrostContext, _ []schemas.Key, _ *schemas.BifrostContainerFileDeleteRequest) (*schemas.BifrostContainerFileDeleteResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.ContainerFileDeleteRequest, provider.GetProviderKey())
}
// Passthrough is not supported by the Elevenlabs provider.
func (provider *ElevenlabsProvider) Passthrough(_ *schemas.BifrostContext, _ schemas.Key, _ *schemas.BifrostPassthroughRequest) (*schemas.BifrostPassthroughResponse, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.PassthroughRequest, provider.GetProviderKey())
}
func (provider *ElevenlabsProvider) PassthroughStream(_ *schemas.BifrostContext, _ schemas.PostHookRunner, _ func(context.Context), _ schemas.Key, _ *schemas.BifrostPassthroughRequest) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) {
return nil, providerUtils.NewUnsupportedOperationError(schemas.PassthroughStreamRequest, provider.GetProviderKey())
}

View File

@@ -0,0 +1,62 @@
package elevenlabs_test
import (
"os"
"strings"
"testing"
"github.com/maximhq/bifrost/core/internal/llmtests"
"github.com/maximhq/bifrost/core/schemas"
)
func TestElevenlabs(t *testing.T) {
t.Parallel()
if strings.TrimSpace(os.Getenv("ELEVENLABS_API_KEY")) == "" {
t.Skip("Skipping Elevenlabs tests because ELEVENLABS_API_KEY is not set")
}
client, ctx, cancel, err := llmtests.SetupTest()
if err != nil {
t.Fatalf("Error initializing test setup: %v", err)
}
defer cancel()
defer client.Shutdown()
realtimeAgentID := strings.TrimSpace(os.Getenv("ELEVENLABS_AGENT_ID"))
hasRealtimeAgent := false
testConfig := llmtests.ComprehensiveTestConfig{
Provider: schemas.Elevenlabs,
SpeechSynthesisModel: "eleven_turbo_v2_5",
TranscriptionModel: "scribe_v1",
RealtimeModel: realtimeAgentID,
Scenarios: llmtests.TestScenarios{
TextCompletion: false,
TextCompletionStream: false,
SimpleChat: false,
CompletionStream: false,
MultiTurnConversation: false,
ToolCalls: false,
MultipleToolCalls: false,
End2EndToolCalling: false,
AutomaticFunctionCall: false,
ImageURL: false,
ImageBase64: false,
MultipleImages: false,
CompleteEnd2End: false,
SpeechSynthesis: true,
SpeechSynthesisStream: true,
Transcription: true,
TranscriptionStream: false,
Embedding: false,
Reasoning: false,
ListModels: false,
Realtime: hasRealtimeAgent,
},
}
t.Run("ElevenlabsTests", func(t *testing.T) {
llmtests.RunAllComprehensiveTests(t, client, ctx, testConfig)
})
}

View File

@@ -0,0 +1,90 @@
package elevenlabs
import (
"strings"
"github.com/valyala/fasthttp"
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
schemas "github.com/maximhq/bifrost/core/schemas"
)
func parseElevenlabsError(resp *fasthttp.Response) *schemas.BifrostError {
var errorResp ElevenlabsError
bifrostErr := providerUtils.HandleProviderAPIError(resp, &errorResp)
if errorResp.Detail != nil {
var message string
// Handle validation errors (array format)
if len(errorResp.Detail.ValidationErrors) > 0 {
var messages []string
var locations []string
var errorTypes []string
for _, validationErr := range errorResp.Detail.ValidationErrors {
// Get message from either Message or Msg field
msg := validationErr.Message
if msg == "" {
msg = validationErr.Msg
}
if msg != "" {
messages = append(messages, msg)
}
// Collect location if available
if len(validationErr.Loc) > 0 {
locations = append(locations, strings.Join(validationErr.Loc, "."))
}
// Collect error type if available
if validationErr.Type != "" {
errorTypes = append(errorTypes, validationErr.Type)
}
}
// Build combined message
if len(messages) > 0 {
message = strings.Join(messages, "; ")
}
if len(locations) > 0 {
locationStr := strings.Join(locations, ", ")
message = message + " [" + locationStr + "]"
}
errorType := ""
if len(errorTypes) > 0 {
errorType = strings.Join(errorTypes, ", ")
}
if message != "" {
result := &schemas.BifrostError{
IsBifrostError: false,
StatusCode: schemas.Ptr(resp.StatusCode()),
Error: &schemas.ErrorField{
Type: schemas.Ptr(errorType),
Message: message,
},
}
return result
}
}
// Handle non-validation errors (single object format)
if errorResp.Detail.Message != nil {
message = *errorResp.Detail.Message
}
errorType := ""
if errorResp.Detail.Status != nil {
errorType = *errorResp.Detail.Status
}
if message != "" {
if bifrostErr.Error == nil {
bifrostErr.Error = &schemas.ErrorField{}
}
bifrostErr.Error.Type = schemas.Ptr(errorType)
bifrostErr.Error.Message = message
}
}
return bifrostErr
}

View File

@@ -0,0 +1,51 @@
package elevenlabs
import (
"strings"
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
"github.com/maximhq/bifrost/core/schemas"
)
func (response *ElevenlabsListModelsResponse) ToBifrostListModelsResponse(providerKey schemas.ModelProvider, allowedModels schemas.WhiteList, blacklistedModels schemas.BlackList, aliases map[string]string, unfiltered bool) *schemas.BifrostListModelsResponse {
if response == nil {
return nil
}
bifrostResponse := &schemas.BifrostListModelsResponse{
Data: make([]schemas.Model, 0, len(*response)),
}
pipeline := &providerUtils.ListModelsPipeline{
AllowedModels: allowedModels,
BlacklistedModels: blacklistedModels,
Aliases: aliases,
Unfiltered: unfiltered,
ProviderKey: providerKey,
MatchFns: providerUtils.DefaultMatchFns(),
}
if pipeline.ShouldEarlyExit() {
return bifrostResponse
}
included := make(map[string]bool)
for _, model := range *response {
for _, result := range pipeline.FilterModel(model.ModelID) {
entry := schemas.Model{
ID: string(providerKey) + "/" + result.ResolvedID,
Name: schemas.Ptr(model.Name),
}
if result.AliasValue != "" {
entry.Alias = schemas.Ptr(result.AliasValue)
}
bifrostResponse.Data = append(bifrostResponse.Data, entry)
included[strings.ToLower(result.ResolvedID)] = true
}
}
bifrostResponse.Data = append(bifrostResponse.Data,
pipeline.BackfillModels(included)...)
return bifrostResponse
}

View File

@@ -0,0 +1,257 @@
package elevenlabs
import (
"encoding/json"
"fmt"
"strings"
"github.com/maximhq/bifrost/core/schemas"
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
)
// SupportsRealtimeAPI returns true since ElevenLabs supports Conversational AI via WebSocket.
func (provider *ElevenlabsProvider) SupportsRealtimeAPI() bool {
return true
}
// RealtimeWebSocketURL returns the WSS URL for the ElevenLabs Conversational AI endpoint.
// The model parameter is used as the agent_id query parameter.
// Format: wss://api.elevenlabs.io/v1/convai/conversation?agent_id=<model>
func (provider *ElevenlabsProvider) RealtimeWebSocketURL(key schemas.Key, model string) string {
base := provider.networkConfig.BaseURL
base = strings.Replace(base, "https://", "wss://", 1)
base = strings.Replace(base, "http://", "ws://", 1)
return base + "/v1/convai/conversation?agent_id=" + model
}
// RealtimeHeaders returns the headers required for the ElevenLabs Conversational AI WebSocket.
func (provider *ElevenlabsProvider) RealtimeHeaders(key schemas.Key) map[string]string {
headers := map[string]string{
"xi-api-key": key.Value.GetValue(),
}
for k, v := range provider.networkConfig.ExtraHeaders {
if strings.EqualFold(k, "xi-api-key") {
continue
}
headers[k] = v
}
return headers
}
// SupportsRealtimeWebRTC returns false — ElevenLabs WebRTC SDP exchange is not yet implemented.
func (provider *ElevenlabsProvider) SupportsRealtimeWebRTC() bool {
return false
}
// ExchangeRealtimeWebRTCSDP is not yet implemented for ElevenLabs.
func (provider *ElevenlabsProvider) ExchangeRealtimeWebRTCSDP(_ *schemas.BifrostContext, _ schemas.Key, _ string, _ string, _ json.RawMessage) (string, *schemas.BifrostError) {
return "", &schemas.BifrostError{
IsBifrostError: true,
StatusCode: schemas.Ptr(400),
Error: &schemas.ErrorField{Type: schemas.Ptr("invalid_request_error"), Message: "WebRTC SDP exchange is not yet implemented for ElevenLabs"},
}
}
func (provider *ElevenlabsProvider) ShouldStartRealtimeTurn(event *schemas.BifrostRealtimeEvent) bool {
return false
}
func (provider *ElevenlabsProvider) RealtimeTurnFinalEvent() schemas.RealtimeEventType {
return schemas.RTEventResponseDone
}
func (provider *ElevenlabsProvider) RealtimeWebRTCDataChannelLabel() string {
return ""
}
func (provider *ElevenlabsProvider) RealtimeWebSocketSubprotocol() string {
return ""
}
func (provider *ElevenlabsProvider) ShouldForwardRealtimeEvent(event *schemas.BifrostRealtimeEvent) bool {
return true
}
func (provider *ElevenlabsProvider) ShouldAccumulateRealtimeOutput(eventType schemas.RealtimeEventType) bool {
return eventType == schemas.RTEventResponseDone
}
// ElevenLabs Conversational AI WebSocket event types
const (
elConversationInitMetadata = "conversation_initiation_metadata"
elPing = "ping"
elAudio = "audio"
elUserTranscript = "user_transcript"
elAgentResponse = "agent_response"
elAgentResponseCorrection = "agent_response_correction"
elInterruption = "interruption"
elClientToolCall = "client_tool_call"
elUserAudioChunk = "user_audio_chunk"
elPong = "pong"
elClientToolResult = "client_tool_result"
elContextualUpdate = "contextual_update"
)
// elevenlabsEvent represents a raw ElevenLabs Conversational AI WebSocket event.
type elevenlabsEvent struct {
Type string `json:"type"`
// Server events
ConversationInitMetadata json.RawMessage `json:"conversation_initiation_metadata_event,omitempty"`
Audio json.RawMessage `json:"audio_event,omitempty"`
UserTranscript json.RawMessage `json:"user_transcription_event,omitempty"`
AgentResponse json.RawMessage `json:"agent_response_event,omitempty"`
AgentResponseCorrection json.RawMessage `json:"agent_response_correction_event,omitempty"`
ClientToolCall json.RawMessage `json:"client_tool_call,omitempty"`
PingEvent json.RawMessage `json:"ping_event,omitempty"`
// Client events
UserAudioChunk json.RawMessage `json:"user_audio_chunk,omitempty"`
}
// elevenlabsAudioEvent is the audio event structure from ElevenLabs.
type elevenlabsAudioEvent struct {
Audio string `json:"audio_base_64,omitempty"`
Alignment json.RawMessage `json:"alignment,omitempty"`
}
// elevenlabsTranscriptEvent is the user/agent transcript event from ElevenLabs.
type elevenlabsTranscriptEvent struct {
UserTranscript string `json:"user_transcript,omitempty"`
AgentResponse string `json:"agent_response,omitempty"`
AgentResponseID string `json:"agent_response_id,omitempty"`
}
// elevenlabsCorrectionEvent is the agent response correction event from ElevenLabs.
type elevenlabsCorrectionEvent struct {
OriginalAgentResponse string `json:"original_agent_response,omitempty"`
CorrectedAgentResponse string `json:"corrected_agent_response,omitempty"`
}
// ToBifrostRealtimeEvent converts an ElevenLabs Conversational AI event to the unified Bifrost format.
func (provider *ElevenlabsProvider) ToBifrostRealtimeEvent(providerEvent json.RawMessage) (*schemas.BifrostRealtimeEvent, error) {
var raw elevenlabsEvent
if err := json.Unmarshal(providerEvent, &raw); err != nil {
return nil, fmt.Errorf("failed to unmarshal ElevenLabs realtime event: %w", err)
}
event := &schemas.BifrostRealtimeEvent{
RawData: providerEvent,
}
switch raw.Type {
case elConversationInitMetadata:
event.Type = schemas.RTEventSessionCreated
event.Session = &schemas.RealtimeSession{}
case elPing:
event.Type = schemas.RealtimeEventType("ping")
case elAudio:
event.Type = schemas.RTEventResponseAudioDelta
if raw.Audio != nil {
var audioEvt elevenlabsAudioEvent
if err := json.Unmarshal(raw.Audio, &audioEvt); err == nil {
event.Delta = &schemas.RealtimeDelta{
Audio: audioEvt.Audio,
}
}
}
case elUserTranscript:
event.Type = schemas.RTEventInputAudioTransCompleted
if raw.UserTranscript != nil {
var transcript elevenlabsTranscriptEvent
if err := json.Unmarshal(raw.UserTranscript, &transcript); err == nil {
event.Delta = &schemas.RealtimeDelta{
Transcript: transcript.UserTranscript,
}
}
}
case elAgentResponse:
event.Type = schemas.RTEventResponseDone
if raw.AgentResponse != nil {
var agentResp elevenlabsTranscriptEvent
if err := json.Unmarshal(raw.AgentResponse, &agentResp); err == nil {
event.Delta = &schemas.RealtimeDelta{
Text: agentResp.AgentResponse,
}
}
}
case elAgentResponseCorrection:
event.Type = schemas.RTEventResponseTextDelta
if raw.AgentResponseCorrection != nil {
var correction elevenlabsCorrectionEvent
if err := json.Unmarshal(raw.AgentResponseCorrection, &correction); err == nil {
event.Delta = &schemas.RealtimeDelta{
Text: correction.CorrectedAgentResponse,
}
}
}
case elInterruption:
event.Type = schemas.RTEventResponseCancel
case elClientToolCall:
event.Type = schemas.RealtimeEventType("client_tool_call")
if raw.ClientToolCall != nil {
var toolCall struct {
ToolName string `json:"tool_name"`
Parameters json.RawMessage `json:"parameters"`
ToolCallID string `json:"tool_call_id"`
}
if err := json.Unmarshal(raw.ClientToolCall, &toolCall); err == nil {
args := string(toolCall.Parameters)
if len(toolCall.Parameters) > 0 {
var parsed interface{}
if err := json.Unmarshal(toolCall.Parameters, &parsed); err == nil {
if sorted, err := providerUtils.MarshalSorted(parsed); err == nil {
args = string(sorted)
}
}
}
event.Item = &schemas.RealtimeItem{
Type: "function_call",
Name: toolCall.ToolName,
CallID: toolCall.ToolCallID,
Arguments: args,
}
}
}
default:
event.Type = schemas.RealtimeEventType(raw.Type)
}
return event, nil
}
// ToProviderRealtimeEvent converts a unified Bifrost Realtime event to ElevenLabs' native JSON.
func (provider *ElevenlabsProvider) ToProviderRealtimeEvent(bifrostEvent *schemas.BifrostRealtimeEvent) (json.RawMessage, error) {
switch bifrostEvent.Type {
case schemas.RTEventInputAudioAppend:
if bifrostEvent.Delta == nil {
return nil, fmt.Errorf("delta must be set for input_audio_buffer.append events")
}
out := map[string]interface{}{
"type": elUserAudioChunk,
"user_audio_chunk": bifrostEvent.Delta.Audio,
}
return schemas.MarshalSorted(out)
case schemas.RealtimeEventType("pong"):
return schemas.MarshalSorted(map[string]interface{}{
"type": "pong",
})
default:
out := map[string]interface{}{
"type": string(bifrostEvent.Type),
}
return schemas.MarshalSorted(out)
}
}

View File

@@ -0,0 +1,102 @@
package elevenlabs
import (
"github.com/maximhq/bifrost/core/schemas"
)
func ToElevenlabsSpeechRequest(bifrostReq *schemas.BifrostSpeechRequest) *ElevenlabsSpeechRequest {
if bifrostReq == nil || bifrostReq.Input == nil {
return nil
}
elevenlabsReq := &ElevenlabsSpeechRequest{
ModelID: bifrostReq.Model,
Text: bifrostReq.Input.Input,
}
if bifrostReq.Params != nil {
elevenlabsReq.ExtraParams = bifrostReq.Params.ExtraParams
voiceSettings := ElevenlabsVoiceSettings{}
hasVoiceSettings := false
if bifrostReq.Params.Speed != nil {
voiceSettings.Speed = *bifrostReq.Params.Speed
hasVoiceSettings = true
}
if bifrostReq.Params.ExtraParams != nil {
if stability, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["stability"]); ok {
delete(elevenlabsReq.ExtraParams, "stability")
voiceSettings.Stability = *stability
hasVoiceSettings = true
}
if useSpeakerBoost, ok := schemas.SafeExtractBoolPointer(bifrostReq.Params.ExtraParams["use_speaker_boost"]); ok {
delete(elevenlabsReq.ExtraParams, "use_speaker_boost")
voiceSettings.UseSpeakerBoost = *useSpeakerBoost
hasVoiceSettings = true
}
if similarityBoost, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["similarity_boost"]); ok {
delete(elevenlabsReq.ExtraParams, "similarity_boost")
voiceSettings.SimilarityBoost = *similarityBoost
hasVoiceSettings = true
}
if style, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["style"]); ok {
delete(elevenlabsReq.ExtraParams, "style")
voiceSettings.Style = *style
hasVoiceSettings = true
}
if seed, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["seed"]); ok {
delete(elevenlabsReq.ExtraParams, "seed")
elevenlabsReq.Seed = seed
}
if previousText, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["previous_text"]); ok {
delete(elevenlabsReq.ExtraParams, "previous_text")
elevenlabsReq.PreviousText = previousText
}
if nextText, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["next_text"]); ok {
delete(elevenlabsReq.ExtraParams, "next_text")
elevenlabsReq.NextText = nextText
}
if previousRequestIDs, ok := schemas.SafeExtractStringSlice(bifrostReq.Params.ExtraParams["previous_request_ids"]); ok {
delete(elevenlabsReq.ExtraParams, "previous_request_ids")
elevenlabsReq.PreviousRequestIDs = previousRequestIDs
}
if nextRequestIDs, ok := schemas.SafeExtractStringSlice(bifrostReq.Params.ExtraParams["next_request_ids"]); ok {
delete(elevenlabsReq.ExtraParams, "next_request_ids")
elevenlabsReq.NextRequestIDs = nextRequestIDs
}
if applyTextNormalization, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["apply_text_normalization"]); ok {
delete(elevenlabsReq.ExtraParams, "apply_text_normalization")
elevenlabsReq.ApplyTextNormalization = applyTextNormalization
}
if applyLanguageTextNormalization, ok := schemas.SafeExtractBoolPointer(bifrostReq.Params.ExtraParams["apply_language_text_normalization"]); ok {
delete(elevenlabsReq.ExtraParams, "apply_language_text_normalization")
elevenlabsReq.ApplyLanguageTextNormalization = applyLanguageTextNormalization
}
if usePVCAsIVC, ok := schemas.SafeExtractBoolPointer(bifrostReq.Params.ExtraParams["use_pvc_as_ivc"]); ok {
delete(elevenlabsReq.ExtraParams, "use_pvc_as_ivc")
elevenlabsReq.UsePVCAsIVC = usePVCAsIVC
}
}
if hasVoiceSettings {
elevenlabsReq.VoiceSettings = &voiceSettings
}
if bifrostReq.Params.LanguageCode != nil {
elevenlabsReq.LanguageCode = bifrostReq.Params.LanguageCode
}
if len(bifrostReq.Params.PronunciationDictionaryLocators) > 0 {
elevenlabsReq.PronunciationDictionaryLocators = make([]ElevenlabsPronunciationDictionaryLocator, len(bifrostReq.Params.PronunciationDictionaryLocators))
for i, locator := range bifrostReq.Params.PronunciationDictionaryLocators {
elevenlabsReq.PronunciationDictionaryLocators[i] = ElevenlabsPronunciationDictionaryLocator{
PronunciationDictionaryID: locator.PronunciationDictionaryID,
VersionID: locator.VersionID,
}
}
}
}
return elevenlabsReq
}

View File

@@ -0,0 +1,269 @@
package elevenlabs
import (
"errors"
"strings"
"github.com/bytedance/sonic"
"github.com/maximhq/bifrost/core/schemas"
)
func ToElevenlabsTranscriptionRequest(bifrostReq *schemas.BifrostTranscriptionRequest) *ElevenlabsTranscriptionRequest {
if bifrostReq == nil {
return nil
}
req := &ElevenlabsTranscriptionRequest{
ModelID: bifrostReq.Model,
}
if bifrostReq.Input != nil && len(bifrostReq.Input.File) > 0 {
req.File = bifrostReq.Input.File
req.Filename = bifrostReq.Input.Filename
}
if bifrostReq.Params == nil {
return req
}
params := bifrostReq.Params
if params.Language != nil {
req.LanguageCode = params.Language
}
if params.ExtraParams != nil {
if tagAudioEvents, ok := schemas.SafeExtractBoolPointer(params.ExtraParams["tag_audio_events"]); ok {
delete(params.ExtraParams, "tag_audio_events")
req.TagAudioEvents = tagAudioEvents
}
if numSpeakers, ok := schemas.SafeExtractIntPointer(params.ExtraParams["num_speakers"]); ok {
delete(params.ExtraParams, "num_speakers")
req.NumSpeakers = numSpeakers
}
if timestampsGranularity, ok := schemas.SafeExtractStringPointer(params.ExtraParams["timestamps_granularity"]); ok {
granularity := ElevenlabsTimestampsGranularity(*timestampsGranularity)
delete(params.ExtraParams, "timestamps_granularity")
req.TimestampsGranularity = &granularity
}
if diarize, ok := schemas.SafeExtractBoolPointer(params.ExtraParams["diarize"]); ok {
delete(params.ExtraParams, "diarize")
req.Diarize = diarize
}
if diarizationThreshold, ok := schemas.SafeExtractFloat64Pointer(params.ExtraParams["diarization_threshold"]); ok {
delete(params.ExtraParams, "diarization_threshold")
req.DiarizationThreshold = diarizationThreshold
}
if fileFormat, ok := schemas.SafeExtractStringPointer(params.ExtraParams["file_format"]); ok {
fileFormat := ElevenlabsFileFormat(*fileFormat)
delete(params.ExtraParams, "file_format")
req.FileFormat = &fileFormat
}
if cloudStorageURL, ok := schemas.SafeExtractStringPointer(params.ExtraParams["cloud_storage_url"]); ok {
delete(params.ExtraParams, "cloud_storage_url")
req.CloudStorageURL = cloudStorageURL
}
if webhook, ok := schemas.SafeExtractBoolPointer(params.ExtraParams["webhook"]); ok {
delete(params.ExtraParams, "webhook")
req.Webhook = webhook
}
if webhookID, ok := schemas.SafeExtractStringPointer(params.ExtraParams["webhook_id"]); ok {
delete(params.ExtraParams, "webhook_id")
req.WebhookID = webhookID
}
if temperature, ok := schemas.SafeExtractFloat64Pointer(params.ExtraParams["temperature"]); ok {
delete(params.ExtraParams, "temperature")
req.Temperature = temperature
}
if seed, ok := schemas.SafeExtractIntPointer(params.ExtraParams["seed"]); ok {
delete(params.ExtraParams, "seed")
req.Seed = seed
}
if useMultiChannel, ok := schemas.SafeExtractBoolPointer(params.ExtraParams["use_multi_channel"]); ok {
delete(params.ExtraParams, "use_multi_channel")
req.UseMultiChannel = useMultiChannel
}
req.ExtraParams = bifrostReq.Params.ExtraParams
}
if len(params.AdditionalFormats) > 0 {
additionalFormats := make([]ElevenlabsAdditionalFormat, 0, len(params.AdditionalFormats))
for _, format := range params.AdditionalFormats {
if converted, ok := convertAdditionalFormat(format); ok {
additionalFormats = append(additionalFormats, converted)
}
}
if len(additionalFormats) > 0 {
req.AdditionalFormats = additionalFormats
}
}
if params.WebhookMetadata != nil {
if metadataMap, ok := params.WebhookMetadata.(map[string]interface{}); ok {
if len(metadataMap) > 0 {
req.WebhookMetadata = metadataMap
}
} else {
req.WebhookMetadata = params.WebhookMetadata
}
}
return req
}
func ToBifrostTranscriptionResponse(chunks []ElevenlabsSpeechToTextChunkResponse) *schemas.BifrostTranscriptionResponse {
if len(chunks) == 0 {
return nil
}
textParts := make([]string, 0, len(chunks))
allWords := make([]schemas.TranscriptionWord, 0)
allLogProbs := make([]schemas.TranscriptionLogProb, 0)
var language *string
var overallDuration *float64
for _, chunk := range chunks {
textParts = append(textParts, chunk.Text)
words, logProbs, chunkDuration := convertWords(chunk.Words)
allWords = append(allWords, words...)
allLogProbs = append(allLogProbs, logProbs...)
if language == nil && chunk.LanguageCode != "" {
lc := chunk.LanguageCode
language = &lc
}
if chunkDuration != nil {
if overallDuration == nil || *chunkDuration > *overallDuration {
val := *chunkDuration
overallDuration = &val
}
}
}
text := strings.Join(textParts, "\n")
response := &schemas.BifrostTranscriptionResponse{
Text: text,
Words: allWords,
LogProbs: allLogProbs,
}
if language != nil {
response.Language = language
}
if overallDuration != nil {
response.Duration = overallDuration
}
return response
}
func convertAdditionalFormat(format schemas.TranscriptionAdditionalFormat) (ElevenlabsAdditionalFormat, bool) {
if format.Format == "" {
return ElevenlabsAdditionalFormat{}, false
}
converted := ElevenlabsAdditionalFormat{
Format: ElevenlabsExportOptions(format.Format),
}
if format.IncludeSpeakers != nil {
converted.IncludeSpeakers = format.IncludeSpeakers
}
if format.IncludeTimestamps != nil {
converted.IncludeTimestamps = format.IncludeTimestamps
}
if format.SegmentOnSilenceLongerThanS != nil {
converted.SegmentOnSilenceLongerThanS = format.SegmentOnSilenceLongerThanS
}
if format.MaxSegmentDurationS != nil {
converted.MaxSegmentDurationS = format.MaxSegmentDurationS
}
if format.MaxSegmentChars != nil {
converted.MaxSegmentChars = format.MaxSegmentChars
}
if format.MaxCharactersPerLine != nil {
converted.MaxCharactersPerLine = format.MaxCharactersPerLine
}
return converted, true
}
func convertWords(words []ElevenlabsSpeechToTextWord) ([]schemas.TranscriptionWord, []schemas.TranscriptionLogProb, *float64) {
if len(words) == 0 {
return nil, nil, nil
}
convertedWords := make([]schemas.TranscriptionWord, 0, len(words))
logProbs := make([]schemas.TranscriptionLogProb, 0, len(words))
var maxEnd float64
var hasEnd bool
for _, word := range words {
trimmed := strings.TrimSpace(word.Text)
if word.Type == "spacing" && trimmed == "" {
continue
}
transcriptionWord := schemas.TranscriptionWord{
Word: word.Text,
}
if word.Start != nil {
transcriptionWord.Start = *word.Start
}
if word.End != nil {
transcriptionWord.End = *word.End
if !hasEnd || *word.End > maxEnd {
maxEnd = *word.End
hasEnd = true
}
}
convertedWords = append(convertedWords, transcriptionWord)
logProbs = append(logProbs, schemas.TranscriptionLogProb{
Token: word.Text,
LogProb: word.LogProb,
})
}
if !hasEnd {
return convertedWords, logProbs, nil
}
duration := maxEnd
return convertedWords, logProbs, &duration
}
func parseTranscriptionResponse(body []byte) ([]ElevenlabsSpeechToTextChunkResponse, error) {
var multichannel ElevenlabsMultichannelSpeechToTextResponse
if err := sonic.Unmarshal(body, &multichannel); err == nil && len(multichannel.Transcripts) > 0 {
return multichannel.Transcripts, nil
}
var single ElevenlabsSpeechToTextChunkResponse
if err := sonic.Unmarshal(body, &single); err == nil {
if single.LanguageCode != "" || single.Text != "" || len(single.Words) > 0 {
return []ElevenlabsSpeechToTextChunkResponse{single}, nil
}
}
var webhook ElevenlabsSpeechToTextWebhookResponse
if err := sonic.Unmarshal(body, &webhook); err == nil && strings.TrimSpace(webhook.Message) != "" {
return nil, errors.New(webhook.Message)
}
return nil, errors.New("unexpected Elevenlabs transcription response format")
}

View File

@@ -0,0 +1,289 @@
package elevenlabs
import (
"strings"
"github.com/bytedance/sonic"
)
// SPEECH TYPES
type ElevenlabsSpeechRequest struct {
Text string `json:"text"`
ModelID string `json:"model_id"` // defaults to "eleven_multilingual_v2"
LanguageCode *string `json:"language_code,omitempty"`
VoiceSettings *ElevenlabsVoiceSettings `json:"voice_settings,omitempty"`
PronunciationDictionaryLocators []ElevenlabsPronunciationDictionaryLocator `json:"pronunciation_dictionary_locators"`
Seed *int `json:"seed,omitempty"`
PreviousText *string `json:"previous_text,omitempty"`
NextText *string `json:"next_text,omitempty"`
PreviousRequestIDs []string `json:"previous_request_ids"`
NextRequestIDs []string `json:"next_request_ids"`
ApplyTextNormalization *string `json:"apply_text_normalization,omitempty"`
ApplyLanguageTextNormalization *bool `json:"apply_language_text_normalization,omitempty"`
UsePVCAsIVC *bool `json:"use_pvc_as_ivc,omitempty"` // deprecated
ExtraParams map[string]interface{} `json:"-"`
}
// GetExtraParams implements the providerUtils.RequestBodyWithExtraParams interface.
func (r *ElevenlabsSpeechRequest) GetExtraParams() map[string]interface{} {
return r.ExtraParams
}
// ElevenlabsSpeechWithTimestampsResponse represents the response from the with-timestamps endpoint
type ElevenlabsSpeechWithTimestampsResponse struct {
AudioBase64 string `json:"audio_base64"`
Alignment *ElevenlabsAlignment `json:"alignment,omitempty"`
NormalizedAlignment *ElevenlabsAlignment `json:"normalized_alignment,omitempty"`
}
// ElevenlabsAlignment represents character-level timing information
type ElevenlabsAlignment struct {
CharStartTimesMs []float64 `json:"char_start_times_ms"`
CharEndTimesMs []float64 `json:"char_end_times_ms"`
Characters []string `json:"characters"`
}
type ElevenlabsVoiceSettings struct {
Stability float64 `json:"stability"` // 0-1, default 0.5
UseSpeakerBoost bool `json:"use_speaker_boost"` // default true
SimilarityBoost float64 `json:"similarity_boost"` // 0-1, default 0.75
Style float64 `json:"style"` // default 0
Speed float64 `json:"speed"` // default 1
}
type ElevenlabsPronunciationDictionaryLocator struct {
PronunciationDictionaryID string `json:"pronunciation_dictionary_id"`
VersionID *string `json:"version_id,omitempty"`
}
// TRANSCRIPTION TYPES
type ElevenlabsTranscriptionRequest struct {
ModelID string `json:"model_id"`
File []byte `json:"-"`
Filename string `json:"-"` // Original filename, used to preserve file format extension
LanguageCode *string `json:"language_code,omitempty"`
TagAudioEvents *bool `json:"tag_audio_events,omitempty"`
NumSpeakers *int `json:"num_speakers,omitempty"`
TimestampsGranularity *ElevenlabsTimestampsGranularity `json:"timestamps_granularity,omitempty"`
Diarize *bool `json:"diarize,omitempty"`
DiarizationThreshold *float64 `json:"diarization_threshold,omitempty"`
AdditionalFormats []ElevenlabsAdditionalFormat `json:"additional_formats,omitempty"`
FileFormat *ElevenlabsFileFormat `json:"file_format,omitempty"`
CloudStorageURL *string `json:"cloud_storage_url,omitempty"`
Webhook *bool `json:"webhook,omitempty"`
WebhookID *string `json:"webhook_id,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
Seed *int `json:"seed,omitempty"`
UseMultiChannel *bool `json:"use_multi_channel,omitempty"`
WebhookMetadata interface{} `json:"webhook_metadata,omitempty"`
ExtraParams map[string]interface{} `json:"-"`
}
// GetExtraParams implements the RequestBodyWithExtraParams interface
func (req *ElevenlabsTranscriptionRequest) GetExtraParams() map[string]interface{} {
return req.ExtraParams
}
type ElevenlabsTimestampsGranularity string
const (
ElevenlabsTimestampsGranularityNone ElevenlabsTimestampsGranularity = "none"
ElevenlabsTimestampsGranularityWord ElevenlabsTimestampsGranularity = "word"
ElevenlabsTimestampsGranularityCharacter ElevenlabsTimestampsGranularity = "character"
)
type ElevenlabsFileFormat string
const (
ElevenlabsFileFormatPcmS16le16 ElevenlabsFileFormat = "pcm_s16le_16"
ElevenlabsFileFormatOther ElevenlabsFileFormat = "other"
)
type ElevenlabsAdditionalFormat struct {
Format ElevenlabsExportOptions `json:"format"`
IncludeSpeakers *bool `json:"include_speakers,omitempty"`
IncludeTimestamps *bool `json:"include_timestamps,omitempty"`
SegmentOnSilenceLongerThanS *float64 `json:"segment_on_silence_longer_than_s,omitempty"`
MaxSegmentDurationS *float64 `json:"max_segment_duration_s,omitempty"`
MaxSegmentChars *int `json:"max_segment_chars,omitempty"`
MaxCharactersPerLine *int `json:"max_characters_per_line,omitempty"`
}
type ElevenlabsExportOptions string
const (
ElevenlabsExportOptionsSegmentedJson ElevenlabsExportOptions = "segmented_json"
ElevenlabsExportOptionsDocx ElevenlabsExportOptions = "docx"
ElevenlabsExportOptionsPdf ElevenlabsExportOptions = "pdf"
ElevenlabsExportOptionsTxt ElevenlabsExportOptions = "txt"
ElevenlabsExportOptionsHtml ElevenlabsExportOptions = "html"
ElevenlabsExportOptionsSrt ElevenlabsExportOptions = "srt"
)
type ElevenlabsSpeechToTextChunkResponse struct {
LanguageCode string `json:"language_code"`
LanguageProbability *float64 `json:"language_probability,omitempty"`
Text string `json:"text"`
Words []ElevenlabsSpeechToTextWord `json:"words"`
ChannelIndex *int `json:"channel_index,omitempty"`
AdditionalFormats []*ElevenlabsAdditionalFormatResponse `json:"additional_formats,omitempty"`
TranscriptionID *string `json:"transcription_id,omitempty"`
}
type ElevenlabsSpeechToTextWord struct {
Text string `json:"text"`
Start *float64 `json:"start,omitempty"`
End *float64 `json:"end,omitempty"`
Type string `json:"type"`
SpeakerID *string `json:"speaker_id,omitempty"`
LogProb float64 `json:"logprob"`
Characters []ElevenlabsSpeechToTextCharacter `json:"characters,omitempty"`
}
type ElevenlabsSpeechToTextCharacter struct {
Text string `json:"text"`
Start *float64 `json:"start,omitempty"`
End *float64 `json:"end,omitempty"`
}
type ElevenlabsAdditionalFormatResponse struct {
RequestedFormat string `json:"requested_format"`
FileExtension string `json:"file_extension"`
ContentType string `json:"content_type"`
IsBase64Encoded bool `json:"is_base64_encoded"`
Content string `json:"content"`
}
type ElevenlabsMultichannelSpeechToTextResponse struct {
Transcripts []ElevenlabsSpeechToTextChunkResponse `json:"transcripts"`
TranscriptionID *string `json:"transcription_id,omitempty"`
}
type ElevenlabsSpeechToTextWebhookResponse struct {
Message string `json:"message"`
RequestID string `json:"request_id"`
TranscriptionID *string `json:"transcription_id,omitempty"`
}
// ERROR TYPES
type ElevenlabsError struct {
Detail *ElevenlabsErrorDetail `json:"detail,omitempty"`
}
// ElevenlabsErrorDetail handles both single object (non-validation errors) and
// array of objects (validation errors) formats from ElevenLabs API.
type ElevenlabsErrorDetail struct {
// Non-validation error fields (when detail is a single object)
Status *string `json:"status,omitempty"`
Message *string `json:"message,omitempty"`
// Validation error fields (when detail is an array)
ValidationErrors []ElevenlabsValidationError `json:"-"`
}
// ElevenlabsValidationError represents a single validation error entry
type ElevenlabsValidationError struct {
Loc []string `json:"loc"`
Msg string `json:"msg"`
Message string `json:"message"` // Some APIs use "message" instead of "msg"
Type string `json:"type"`
}
// UnmarshalJSON implements custom JSON unmarshaling to handle both
// single object and array formats from ElevenLabs API.
func (d *ElevenlabsErrorDetail) UnmarshalJSON(data []byte) error {
// First, try to unmarshal as an array (validation errors)
// Check if it's an array by looking at the first non-whitespace character
trimmed := strings.TrimSpace(string(data))
if len(trimmed) > 0 && trimmed[0] == '[' {
var validationErrors []ElevenlabsValidationError
if err := sonic.Unmarshal(data, &validationErrors); err != nil {
return err
}
d.ValidationErrors = validationErrors
// Extract message from first validation error if available
if len(validationErrors) > 0 {
if validationErrors[0].Message != "" {
d.Message = &validationErrors[0].Message
} else if validationErrors[0].Msg != "" {
d.Message = &validationErrors[0].Msg
}
}
return nil
}
// If not an array, try to unmarshal as a single object (non-validation error)
var obj struct {
Type *string `json:"type,omitempty"`
Loc []string `json:"loc,omitempty"`
Message *string `json:"message,omitempty"`
Status *string `json:"status,omitempty"`
Msg *string `json:"msg,omitempty"` // Some APIs use "msg" instead of "message"
}
if err := sonic.Unmarshal(data, &obj); err != nil {
return err
}
// Populate non-validation error fields
d.Status = obj.Status
if obj.Message != nil {
d.Message = obj.Message
} else if obj.Msg != nil {
d.Message = obj.Msg
}
// If this object has validation-like fields (Loc, Type), treat it as a single validation error
if len(obj.Loc) > 0 || obj.Type != nil {
validationErr := ElevenlabsValidationError{
Loc: obj.Loc,
Type: func() string {
if obj.Type != nil {
return *obj.Type
}
return ""
}(),
}
if obj.Message != nil {
validationErr.Message = *obj.Message
} else if obj.Msg != nil {
validationErr.Msg = *obj.Msg
validationErr.Message = *obj.Msg
}
d.ValidationErrors = []ElevenlabsValidationError{validationErr}
}
return nil
}
// MODEL TYPES
type ElevenlabsModel struct {
ModelID string `json:"model_id"`
Name string `json:"name"`
Description string `json:"description"`
ServesProVoices bool `json:"serves_pro_voices"`
TokenCostFactor float64 `json:"token_cost_factor"`
CanBeFinetuned bool `json:"can_be_finetuned"`
CanDoTextToSpeech bool `json:"can_do_text_to_speech"`
CanDoVoiceConversion bool `json:"can_do_voice_conversion"`
CanUseStyle bool `json:"can_use_style"`
CanUseSpeakerBoost bool `json:"can_use_speaker_boost"`
Languages []ElevenlabsLanguage `json:"languages"`
RequiresAlphaAccess bool `json:"requires_alpha_access"`
MaxCharactersRequestFreeUser int `json:"max_characters_request_free_user"`
MaxCharactersRequestSubscribedUser int `json:"max_characters_request_subscribed_user"`
MaxTextLengthPerRequest int `json:"maximum_text_length_per_request"`
ModelRates ElevenlabsModelRate `json:"model_rates"`
ConcurrencyGroup string `json:"concurrency_group"`
}
type ElevenlabsLanguage struct {
LanguageID string `json:"language_id"`
Name string `json:"name"`
}
type ElevenlabsModelRate struct {
CharacterCostMultiplier float64 `json:"character_cost_multiplier"`
}
type ElevenlabsListModelsResponse []ElevenlabsModel

View File

@@ -0,0 +1,35 @@
package elevenlabs
var (
// Maps provider-specific finish reasons to Bifrost format
bifrostToElevenlabsSpeechFormat = map[string]string{
"": "mp3_44100_128",
"mp3": "mp3_44100_128",
"opus": "opus_48000_128",
"wav": "pcm_44100",
"pcm": "pcm_44100",
}
// Maps Bifrost finish reasons to provider-specific format
elevenlabsSpeechFormatToBifrost = map[string]string{
"mp3_44100_128": "mp3",
"opus_48000_128": "opus",
"pcm_44100": "wav",
}
)
// ConvertBifrostSpeechFormatToElevenlabs converts Bifrost speech format to Elevenlabs format
func ConvertBifrostSpeechFormatToElevenlabs(format string) string {
if elevenlabsFormat, ok := bifrostToElevenlabsSpeechFormat[format]; ok {
return elevenlabsFormat
}
return format
}
// ConvertElevenlabsSpeechFormatToBifrost converts Elevenlabs speech format to Bifrost format
func ConvertElevenlabsSpeechFormatToBifrost(format string) string {
if bifrostFormat, ok := elevenlabsSpeechFormatToBifrost[format]; ok {
return bifrostFormat
}
return format
}