package bifrost import ( "bytes" "context" "crypto/sha256" "encoding/hex" "encoding/json" "errors" "fmt" "math/rand" "net" "net/url" "slices" "strings" "time" "github.com/maximhq/bifrost/core/mcp" "github.com/maximhq/bifrost/core/schemas" ) // Define a set of retryable status codes var retryableStatusCodes = map[int]bool{ 500: true, // Internal Server Error 502: true, // Bad Gateway 503: true, // Service Unavailable 504: true, // Gateway Timeout 429: true, // Too Many Requests } // Define rate limit error message patterns (case-insensitive) var rateLimitPatterns = []string{ "rate limit", "rate_limit", "ratelimit", "too many requests", "quota exceeded", "quota_exceeded", "request limit", "throttled", "throttling", "rate exceeded", "limit exceeded", "requests per", "rpm exceeded", "tpm exceeded", "tokens per minute", "requests per minute", "requests per second", "api rate limit", "usage limit", "concurrent requests limit", "burst_rate", "rate increased", } // dynamicallyConfigurableProviders is the list of providers that can be dynamically configured. // Excluding providers that require extra configuration (e.g. Ollama, SGL, vLLM). var dynamicallyConfigurableProviders = []schemas.ModelProvider{ schemas.Anthropic, schemas.Azure, schemas.Bedrock, schemas.Cerebras, schemas.Cohere, schemas.Elevenlabs, schemas.Gemini, schemas.Groq, schemas.HuggingFace, schemas.Mistral, schemas.Nebius, schemas.OpenAI, schemas.OpenRouter, schemas.Parasail, schemas.Perplexity, schemas.Vertex, schemas.XAI, } // isModelRequired returns true if the request type requires a model func isModelRequired(reqType schemas.RequestType) bool { return reqType == schemas.TextCompletionRequest || reqType == schemas.TextCompletionStreamRequest || reqType == schemas.ChatCompletionRequest || reqType == schemas.ChatCompletionStreamRequest || reqType == schemas.ResponsesRequest || reqType == schemas.ResponsesStreamRequest || reqType == schemas.SpeechRequest || reqType == schemas.SpeechStreamRequest || reqType == schemas.TranscriptionRequest || reqType == schemas.TranscriptionStreamRequest || reqType == schemas.EmbeddingRequest || reqType == schemas.ImageGenerationRequest || reqType == schemas.ImageGenerationStreamRequest || reqType == schemas.VideoGenerationRequest } // Ptr returns a pointer to the given value. func Ptr[T any](v T) *T { return &v } // providerRequiresKey returns true if the given provider requires an API key for authentication. func providerRequiresKey(customConfig *schemas.CustomProviderConfig) bool { // Keyless custom providers are not allowed for Bedrock. if customConfig != nil && customConfig.IsKeyLess && customConfig.BaseProviderType != schemas.Bedrock { return false } return true } // CanProviderKeyValueBeEmpty returns true if the given provider allows the API key to be empty. // Some providers like Vertex and Bedrock have their credentials in additional key configs. // Ollama and SGL are keyless (API Key is optional) but use per-key server URLs. func CanProviderKeyValueBeEmpty(providerKey schemas.ModelProvider) bool { return providerKey == schemas.Vertex || providerKey == schemas.Bedrock || providerKey == schemas.VLLM || providerKey == schemas.Azure || providerKey == schemas.Ollama || providerKey == schemas.SGL } func isKeySkippingAllowed(providerKey schemas.ModelProvider) bool { return providerKey != schemas.Azure && providerKey != schemas.Bedrock && providerKey != schemas.Vertex } // calculateBackoff implements exponential backoff with jitter for retry attempts. func calculateBackoff(attempt int, config *schemas.ProviderConfig) time.Duration { // Calculate an exponential backoff: initial * 2^attempt backoff := min(config.NetworkConfig.RetryBackoffInitial*time.Duration(1<