package schemas import ( "encoding/json" "fmt" "math/rand" "net/url" "regexp" "strconv" "strings" "sync" "time" ) // Ptr creates a pointer to any value. // This is a helper function for creating pointers to values. func Ptr[T any](v T) *T { return &v } // GetRandomString generates a random alphanumeric string of the given length. func GetRandomString(length int) string { if length <= 0 { return "" } randomSource := rand.New(rand.NewSource(time.Now().UnixNano())) letters := []rune("abcdef0123456789") b := make([]rune, length) for i := range b { b[i] = letters[randomSource.Intn(len(letters))] } return string(b) } // knownProvidersMu protects concurrent access to knownProviders. var knownProvidersMu sync.RWMutex // knownProviders is a set of all known provider strings for O(1) lookup. // Built once from StandardProviders at package init time, and dynamically // updated when custom providers are added or removed. // Used by ParseModelString to distinguish real provider prefixes (e.g. "openai/gpt-4o") // from model namespace prefixes (e.g. "meta-llama/Llama-3.1-8B"). var knownProviders = func() map[string]bool { m := make(map[string]bool, len(StandardProviders)) for _, p := range StandardProviders { m[string(p)] = true } return m }() // RegisterKnownProvider adds a provider to the known providers set. // This allows ParseModelString to correctly parse model strings with // custom provider prefixes (e.g., "my-custom-provider/gpt-4"). func RegisterKnownProvider(provider ModelProvider) { knownProvidersMu.Lock() defer knownProvidersMu.Unlock() knownProviders[string(provider)] = true } // UnregisterKnownProvider removes a custom provider from the known providers set. // Standard providers cannot be unregistered. func UnregisterKnownProvider(provider ModelProvider) { for _, p := range StandardProviders { if p == provider { return // Don't unregister standard providers } } knownProvidersMu.Lock() defer knownProvidersMu.Unlock() delete(knownProviders, string(provider)) } // IsKnownProvider checks if a provider string is known. func IsKnownProvider(provider string) bool { knownProvidersMu.RLock() defer knownProvidersMu.RUnlock() return knownProviders[provider] } // ParseModelString extracts provider and model from a model string. // For model strings like "anthropic/claude", it returns ("anthropic", "claude"). // For model strings like "claude", it returns ("", "claude"). // Only splits on "/" when the prefix is a known Bifrost provider, so model // namespaces like "meta-llama/Llama-3.1-8B" are preserved as-is. func ParseModelString(model string, defaultProvider ModelProvider) (ModelProvider, string) { // Check if model contains a provider prefix (only split on first "/" to preserve model names with "/") if strings.Contains(model, "/") { parts := strings.SplitN(model, "/", 2) if len(parts) == 2 && IsKnownProvider(parts[0]) { return ModelProvider(parts[0]), parts[1] } } // No known provider prefix found, return default provider and the original model return defaultProvider, model } // IsAllDigitsASCII checks if a string contains only ASCII digits (0-9). func IsAllDigitsASCII(s string) bool { if s == "" { return false } for i := 0; i < len(s); i++ { c := s[i] if c < '0' || c > '9' { return false } } return true } // ParseFallbacks parses a slice of strings into a slice of Fallback structs func ParseFallbacks(fallbacks []string) []Fallback { if len(fallbacks) == 0 { return nil } parsedFallbacks := make([]Fallback, 0, len(fallbacks)) for _, fallback := range fallbacks { if fallback == "" { continue } fallbackProvider, fallbackModel := ParseModelString(fallback, "") if fallbackProvider != "" && fallbackModel != "" { parsedFallbacks = append(parsedFallbacks, Fallback{Provider: fallbackProvider, Model: fallbackModel}) } } return parsedFallbacks } //* IMAGE UTILS *// // dataURIRegex is a precompiled regex for matching data URI format patterns. // It matches patterns like: data:image/png;base64,iVBORw0KGgo... var dataURIRegex = regexp.MustCompile(`^data:([^;]+)(;base64)?,(.+)$`) // base64Regex is a precompiled regex for matching base64 strings. // It matches strings containing only valid base64 characters with optional padding. var base64Regex = regexp.MustCompile(`^[A-Za-z0-9+/]*={0,2}$`) // fileExtensionToMediaType maps common image file extensions to their corresponding media types. // This map is used to infer media types from file extensions in URLs. var fileExtensionToMediaType = map[string]string{ ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png", ".gif": "image/gif", ".webp": "image/webp", ".svg": "image/svg+xml", ".bmp": "image/bmp", } // ImageContentType represents the type of image content type ImageContentType string const ( ImageContentTypeBase64 ImageContentType = "base64" ImageContentTypeURL ImageContentType = "url" ) // URLTypeInfo contains extracted information about a URL type URLTypeInfo struct { Type ImageContentType MediaType *string DataURLWithoutPrefix *string // URL without the prefix (eg data:image/png;base64,iVBORw0KGgo...) } // SanitizeImageURL sanitizes and validates an image URL. // It handles both data URLs and regular HTTP/HTTPS URLs. // It also detects raw base64 image data and adds proper data URL headers. func SanitizeImageURL(rawURL string) (string, error) { if rawURL == "" { return rawURL, fmt.Errorf("URL cannot be empty") } // Trim whitespace rawURL = strings.TrimSpace(rawURL) // Check if it's already a proper data URL if strings.HasPrefix(rawURL, "data:") { // Validate data URL format if !dataURIRegex.MatchString(rawURL) { return rawURL, fmt.Errorf("invalid data URL format") } return rawURL, nil } // Check if it looks like raw base64 image data if isLikelyBase64(rawURL) { // Detect the image type from the base64 data mediaType := detectImageTypeFromBase64(rawURL) // Remove any whitespace/newlines from base64 data cleanBase64 := strings.ReplaceAll(strings.ReplaceAll(rawURL, "\n", ""), " ", "") // Create proper data URL return fmt.Sprintf("data:%s;base64,%s", mediaType, cleanBase64), nil } // Parse as regular URL parsedURL, err := url.Parse(rawURL) if err != nil { return rawURL, fmt.Errorf("invalid URL format: %w", err) } // Validate scheme if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { return rawURL, fmt.Errorf("URL must use http or https scheme") } // Validate host if parsedURL.Host == "" { return rawURL, fmt.Errorf("URL must have a valid host") } return parsedURL.String(), nil } // ExtractURLTypeInfo extracts type and media type information from a sanitized URL. // For data URLs, it parses the media type and encoding. // For regular URLs, it attempts to infer the media type from the file extension. func ExtractURLTypeInfo(sanitizedURL string) URLTypeInfo { if strings.HasPrefix(sanitizedURL, "data:") { return extractDataURLInfo(sanitizedURL) } return extractRegularURLInfo(sanitizedURL) } // extractDataURLInfo extracts information from a data URL func extractDataURLInfo(dataURL string) URLTypeInfo { // Parse data URL: data:[][;base64], matches := dataURIRegex.FindStringSubmatch(dataURL) if len(matches) != 4 { return URLTypeInfo{Type: ImageContentTypeBase64} } mediaType := matches[1] isBase64 := matches[2] == ";base64" dataURLWithoutPrefix := dataURL if isBase64 { dataURLWithoutPrefix = dataURL[len("data:")+len(mediaType)+len(";base64,"):] } info := URLTypeInfo{ MediaType: &mediaType, DataURLWithoutPrefix: &dataURLWithoutPrefix, } if isBase64 { info.Type = ImageContentTypeBase64 } else { info.Type = ImageContentTypeURL // Non-base64 data URL } return info } // extractRegularURLInfo extracts information from a regular HTTP/HTTPS URL func extractRegularURLInfo(regularURL string) URLTypeInfo { info := URLTypeInfo{ Type: ImageContentTypeURL, } // Try to infer media type from file extension parsedURL, err := url.Parse(regularURL) if err != nil { return info } path := strings.ToLower(parsedURL.Path) // Check for known file extensions using the map for ext, mediaType := range fileExtensionToMediaType { if strings.HasSuffix(path, ext) { info.MediaType = &mediaType break } } // For URLs without recognizable extensions, MediaType remains nil return info } // detectImageTypeFromBase64 detects the image type from base64 data by examining the header bytes func detectImageTypeFromBase64(base64Data string) string { // Remove any whitespace or newlines cleanData := strings.ReplaceAll(strings.ReplaceAll(base64Data, "\n", ""), " ", "") // Check common image format signatures in base64 switch { case strings.HasPrefix(cleanData, "/9j/") || strings.HasPrefix(cleanData, "/9k/"): // JPEG images typically start with /9j/ or /9k/ in base64 (FFD8 in hex) return "image/jpeg" case strings.HasPrefix(cleanData, "iVBORw0KGgo"): // PNG images start with iVBORw0KGgo in base64 (89504E470D0A1A0A in hex) return "image/png" case strings.HasPrefix(cleanData, "R0lGOD"): // GIF images start with R0lGOD in base64 (474946 in hex) return "image/gif" case strings.HasPrefix(cleanData, "Qk"): // BMP images start with Qk in base64 (424D in hex) return "image/bmp" case strings.HasPrefix(cleanData, "UklGR") && len(cleanData) >= 16 && cleanData[12:16] == "V0VC": // WebP images start with RIFF header (UklGR in base64) and have WEBP signature at offset 8-11 (V0VC in base64) return "image/webp" case strings.HasPrefix(cleanData, "PHN2Zy") || strings.HasPrefix(cleanData, "PD94bW"): // SVG images often start with ("claude-sonnet-4", "") // "claude-sonnet-4-20250514" -> ("claude-sonnet-4", "20250514") // "gpt-4.1-2024-09-12" -> ("gpt-4.1", "2024-09-12") // "gpt-4.1-mini-2024-09-12" -> ("gpt-4.1-mini", "2024-09-12") // "some-model-v2" -> ("some-model", "v2") // "text-embedding-3-large-beta" -> ("text-embedding-3-large", "beta") // "claude-sonnet-4.5" -> ("claude-sonnet-4.5", "") func SplitModelAndVersion(id string) (base, version string) { if id == "" { return "", "" } parts := strings.Split(id, "-") n := len(parts) if n == 0 { return "", "" } // 1. Try OpenAI-style date: last 3 parts, e.g. "2024-09-12". if n >= 3 { last3 := strings.Join(parts[n-3:], "-") if openAIDateRe.MatchString(last3) { base := strings.Join(parts[:n-3], "-") return base, last3 } } // 2. Try Anthropic-style date (20250514) or tagged versions (v1, beta, etc.) in last part. if n >= 2 { last := parts[n-1] if anthropicDateRe.MatchString(last) || taggedVersionRe.MatchString(last) { base := strings.Join(parts[:n-1], "-") return base, last } } // 3. No recognized version suffix. return id, "" } // BaseModelName returns the model id with any recognized version suffix stripped. // // This is your "model name without version". func BaseModelName(id string) string { base, _ := SplitModelAndVersion(id) return base } // SameBaseModel reports whether two model ids refer to the same base model, // ignoring any recognized version suffixes. // // This works even if both sides are versioned, or both unversioned. func SameBaseModel(a, b string) bool { // Fast path: exact match. if a == b { return true } // Compare normalized base names. return BaseModelName(a) == BaseModelName(b) }