bifrost/framework/modelcatalog/utils.go

package modelcatalog

import (
	"context"
	"slices"
	"strings"
	"time"

	"github.com/bytedance/sonic"
	"github.com/maximhq/bifrost/core/schemas"
	configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
)

const retryBackoffMin = time.Second

// WithRetries runs op until it succeeds or maxRetries retries are exhausted
// (1 initial attempt + maxRetries retries). After each failure it waits with
// exponential backoff starting at 1 second (retryBackoffMin), capped at maxBackoff
// when maxBackoff > 0. If maxBackoff is zero, there is no upper cap on the delay.
func WithRetries[T any](ctx context.Context, maxRetries int, maxBackoff time.Duration, op func() (T, error)) (T, error) {
	var zero T
	if maxRetries < 0 {
		maxRetries = 0
	}
	var lastErr error
	for attempt := 0; attempt <= maxRetries; attempt++ {
		select {
		case <-ctx.Done():
			return zero, ctx.Err()
		default:
		}

		if attempt > 0 {
			backoff := retryBackoffMin * time.Duration(1<<uint(attempt-1))
			if maxBackoff > 0 && backoff > maxBackoff {
				backoff = maxBackoff
			}
			select {
			case <-ctx.Done():
				return zero, ctx.Err()
			case <-time.After(backoff):
			}
		}
		v, err := op()
		if err == nil {
			return v, nil
		}
		lastErr = err
	}
	return zero, lastErr
}

// makeKey creates a unique key for a model, provider, and mode for pricingData map
func makeKey(model, provider, mode string) string { return model + "|" + provider + "|" + mode }

// normalizeProvider normalizes the provider name to a consistent format
func normalizeProvider(p string) string {
	if strings.Contains(p, "vertex_ai") || p == "google-vertex" {
		return string(schemas.Vertex)
	} else if strings.Contains(p, "bedrock") {
		return string(schemas.Bedrock)
	} else if strings.Contains(p, "cohere") {
		return string(schemas.Cohere)
	} else if strings.Contains(p, "runwayml") {
		return string(schemas.Runway)
	} else if strings.Contains(p, "fireworks_ai") {
		return string(schemas.Fireworks)
	} else {
		return p
	}
}

// normalizeRequestType normalizes the request type to a consistent format
func normalizeRequestType(reqType schemas.RequestType) string {
	baseType := "unknown"

	switch reqType {
	case schemas.TextCompletionRequest, schemas.TextCompletionStreamRequest:
		baseType = "completion"
	case schemas.ChatCompletionRequest, schemas.ChatCompletionStreamRequest:
		baseType = "chat"
	case schemas.ResponsesRequest, schemas.ResponsesStreamRequest, schemas.RealtimeRequest:
		baseType = "responses"
	case schemas.EmbeddingRequest:
		baseType = "embedding"
	case schemas.RerankRequest:
		baseType = "rerank"
	case schemas.SpeechRequest, schemas.SpeechStreamRequest:
		baseType = "audio_speech"
	case schemas.TranscriptionRequest, schemas.TranscriptionStreamRequest:
		baseType = "audio_transcription"
	case schemas.ImageGenerationRequest, schemas.ImageGenerationStreamRequest, schemas.ImageVariationRequest:
		baseType = "image_generation"
	case schemas.ImageEditRequest, schemas.ImageEditStreamRequest:
		baseType = "image_edit"
	case schemas.VideoGenerationRequest, schemas.VideoRemixRequest:
		baseType = "video_generation"
	case schemas.OCRRequest:
		baseType = "ocr"
	}

	return baseType
}

// normalizeStreamRequestType normalizes the stream request type to a consistent format
// It returns the base request type for the stream request type.
func normalizeStreamRequestType(rt schemas.RequestType) schemas.RequestType {
	switch rt {
	case schemas.TextCompletionStreamRequest:
		return schemas.TextCompletionRequest
	case schemas.ChatCompletionStreamRequest:
		return schemas.ChatCompletionRequest
	case schemas.ResponsesStreamRequest:
		return schemas.ResponsesRequest
	case schemas.RealtimeRequest:
		return schemas.RealtimeRequest
	case schemas.SpeechStreamRequest:
		return schemas.SpeechRequest
	case schemas.TranscriptionStreamRequest:
		return schemas.TranscriptionRequest
	case schemas.ImageGenerationStreamRequest:
		return schemas.ImageGenerationRequest
	case schemas.ImageEditStreamRequest:
		return schemas.ImageEditRequest
	default:
		return rt
	}
}

// extractModelName extracts the model name from a model key that may be in provider/model format
func extractModelName(modelKey string) string {
	if strings.Contains(modelKey, "/") {
		parts := strings.Split(modelKey, "/")
		if len(parts) > 1 {
			return strings.Join(parts[1:], "/")
		}
	}
	return modelKey
}

// convertPricingDataToTableModelPricing converts the pricing data to a TableModelPricing struct
func convertPricingDataToTableModelPricing(modelKey string, entry PricingEntry) configstoreTables.TableModelPricing {
	provider := normalizeProvider(entry.Provider)
	modelName := extractModelName(modelKey)

	return configstoreTables.TableModelPricing{
		Model:           modelName,
		BaseModel:       entry.BaseModel,
		Provider:        provider,
		Mode:            entry.Mode,
		ContextLength:   entry.ContextLength,
		MaxInputTokens:  entry.MaxInputTokens,
		MaxOutputTokens: entry.MaxOutputTokens,
		Architecture:    entry.Architecture,

		// Costs - Text
		InputCostPerToken:                 entry.InputCostPerToken,
		OutputCostPerToken:                entry.OutputCostPerToken,
		InputCostPerTokenBatches:          entry.InputCostPerTokenBatches,
		OutputCostPerTokenBatches:         entry.OutputCostPerTokenBatches,
		InputCostPerTokenPriority:         entry.InputCostPerTokenPriority,
		OutputCostPerTokenPriority:        entry.OutputCostPerTokenPriority,
		InputCostPerTokenFlex:             entry.InputCostPerTokenFlex,
		OutputCostPerTokenFlex:            entry.OutputCostPerTokenFlex,
		InputCostPerTokenAbove200kTokens:         entry.InputCostPerTokenAbove200kTokens,
		InputCostPerTokenAbove200kTokensPriority: entry.InputCostPerTokenAbove200kTokensPriority,
		OutputCostPerTokenAbove200kTokens:         entry.OutputCostPerTokenAbove200kTokens,
		OutputCostPerTokenAbove200kTokensPriority: entry.OutputCostPerTokenAbove200kTokensPriority,
		// Costs - 272k Tier
		InputCostPerTokenAbove272kTokens:          entry.InputCostPerTokenAbove272kTokens,
		InputCostPerTokenAbove272kTokensPriority:  entry.InputCostPerTokenAbove272kTokensPriority,
		OutputCostPerTokenAbove272kTokens:         entry.OutputCostPerTokenAbove272kTokens,
		OutputCostPerTokenAbove272kTokensPriority: entry.OutputCostPerTokenAbove272kTokensPriority,
		// Costs - Character
		InputCostPerCharacter: entry.InputCostPerCharacter,
		// Costs - 128k Tier
		InputCostPerTokenAbove128kTokens:          entry.InputCostPerTokenAbove128kTokens,
		InputCostPerImageAbove128kTokens:          entry.InputCostPerImageAbove128kTokens,
		InputCostPerVideoPerSecondAbove128kTokens: entry.InputCostPerVideoPerSecondAbove128kTokens,
		InputCostPerAudioPerSecondAbove128kTokens: entry.InputCostPerAudioPerSecondAbove128kTokens,
		OutputCostPerTokenAbove128kTokens:         entry.OutputCostPerTokenAbove128kTokens,

		// Costs - Cache
		CacheCreationInputTokenCost:                        entry.CacheCreationInputTokenCost,
		CacheReadInputTokenCost:                            entry.CacheReadInputTokenCost,
		CacheCreationInputTokenCostAbove200kTokens:         entry.CacheCreationInputTokenCostAbove200kTokens,
		CacheReadInputTokenCostAbove200kTokens:             entry.CacheReadInputTokenCostAbove200kTokens,
		CacheReadInputTokenCostAbove200kTokensPriority:     entry.CacheReadInputTokenCostAbove200kTokensPriority,
		CacheCreationInputTokenCostAbove1hr:                entry.CacheCreationInputTokenCostAbove1hr,
		CacheCreationInputTokenCostAbove1hrAbove200kTokens: entry.CacheCreationInputTokenCostAbove1hrAbove200kTokens,
		CacheCreationInputAudioTokenCost:                   entry.CacheCreationInputAudioTokenCost,
		CacheReadInputTokenCostPriority:                    entry.CacheReadInputTokenCostPriority,
		CacheReadInputTokenCostFlex:                        entry.CacheReadInputTokenCostFlex,
		CacheReadInputImageTokenCost:                       entry.CacheReadInputImageTokenCost,
		CacheReadInputTokenCostAbove272kTokens:             entry.CacheReadInputTokenCostAbove272kTokens,
		CacheReadInputTokenCostAbove272kTokensPriority:     entry.CacheReadInputTokenCostAbove272kTokensPriority,

		// Costs - Image
		InputCostPerImage:                             entry.InputCostPerImage,
		InputCostPerPixel:                             entry.InputCostPerPixel,
		OutputCostPerImage:                            entry.OutputCostPerImage,
		OutputCostPerPixel:                            entry.OutputCostPerPixel,
		OutputCostPerImagePremiumImage:                entry.OutputCostPerImagePremiumImage,
		OutputCostPerImageAbove512x512Pixels:          entry.OutputCostPerImageAbove512x512Pixels,
		OutputCostPerImageAbove512x512PixelsPremium:   entry.OutputCostPerImageAbove512x512PixelsPremium,
		OutputCostPerImageAbove1024x1024Pixels:        entry.OutputCostPerImageAbove1024x1024Pixels,
		OutputCostPerImageAbove1024x1024PixelsPremium: entry.OutputCostPerImageAbove1024x1024PixelsPremium,
		OutputCostPerImageAbove2048x2048Pixels:        entry.OutputCostPerImageAbove2048x2048Pixels,
		OutputCostPerImageAbove4096x4096Pixels:        entry.OutputCostPerImageAbove4096x4096Pixels,
		OutputCostPerImageLowQuality:                  entry.OutputCostPerImageLowQuality,
		OutputCostPerImageMediumQuality:               entry.OutputCostPerImageMediumQuality,
		OutputCostPerImageHighQuality:                 entry.OutputCostPerImageHighQuality,
		OutputCostPerImageAutoQuality:                 entry.OutputCostPerImageAutoQuality,
		// Costs - Image Token
		InputCostPerImageToken:  entry.InputCostPerImageToken,
		OutputCostPerImageToken: entry.OutputCostPerImageToken,

		// Costs - Audio/Video
		InputCostPerAudioToken:      entry.InputCostPerAudioToken,
		InputCostPerAudioPerSecond:  entry.InputCostPerAudioPerSecond,
		InputCostPerSecond:          entry.InputCostPerSecond,
		InputCostPerVideoPerSecond:  entry.InputCostPerVideoPerSecond,
		OutputCostPerAudioToken:     entry.OutputCostPerAudioToken,
		OutputCostPerVideoPerSecond: entry.OutputCostPerVideoPerSecond,
		OutputCostPerSecond:         entry.OutputCostPerSecond,

		// Costs - Other
		SearchContextCostPerQuery:     entry.SearchContextCostPerQuery,
		CodeInterpreterCostPerSession: entry.CodeInterpreterCostPerSession,

		// Costs - OCR
		OCRCostPerPage:        entry.OCRCostPerPage,
		AnnotationCostPerPage: entry.AnnotationCostPerPage,
	}
}

// convertTableModelPricingToPricingData converts the TableModelPricing struct to a PricingEntry struct
func convertTableModelPricingToPricingData(pricing *configstoreTables.TableModelPricing) *PricingEntry {
	options := PricingOptions{
		// Costs - Text
		InputCostPerToken:                 pricing.InputCostPerToken,
		OutputCostPerToken:                pricing.OutputCostPerToken,
		InputCostPerTokenBatches:          pricing.InputCostPerTokenBatches,
		OutputCostPerTokenBatches:         pricing.OutputCostPerTokenBatches,
		InputCostPerTokenPriority:         pricing.InputCostPerTokenPriority,
		OutputCostPerTokenPriority:        pricing.OutputCostPerTokenPriority,
		InputCostPerTokenFlex:             pricing.InputCostPerTokenFlex,
		OutputCostPerTokenFlex:            pricing.OutputCostPerTokenFlex,
		InputCostPerTokenAbove200kTokens:         pricing.InputCostPerTokenAbove200kTokens,
		InputCostPerTokenAbove200kTokensPriority: pricing.InputCostPerTokenAbove200kTokensPriority,
		OutputCostPerTokenAbove200kTokens:         pricing.OutputCostPerTokenAbove200kTokens,
		OutputCostPerTokenAbove200kTokensPriority: pricing.OutputCostPerTokenAbove200kTokensPriority,
		// Costs - 272k Tier
		InputCostPerTokenAbove272kTokens:          pricing.InputCostPerTokenAbove272kTokens,
		InputCostPerTokenAbove272kTokensPriority:  pricing.InputCostPerTokenAbove272kTokensPriority,
		OutputCostPerTokenAbove272kTokens:         pricing.OutputCostPerTokenAbove272kTokens,
		OutputCostPerTokenAbove272kTokensPriority: pricing.OutputCostPerTokenAbove272kTokensPriority,
		// Costs - Character
		InputCostPerCharacter: pricing.InputCostPerCharacter,
		// Costs - 128k Tier
		InputCostPerTokenAbove128kTokens:          pricing.InputCostPerTokenAbove128kTokens,
		InputCostPerImageAbove128kTokens:          pricing.InputCostPerImageAbove128kTokens,
		InputCostPerVideoPerSecondAbove128kTokens: pricing.InputCostPerVideoPerSecondAbove128kTokens,
		InputCostPerAudioPerSecondAbove128kTokens: pricing.InputCostPerAudioPerSecondAbove128kTokens,
		OutputCostPerTokenAbove128kTokens:         pricing.OutputCostPerTokenAbove128kTokens,

		// Costs - Cache
		CacheCreationInputTokenCost:                        pricing.CacheCreationInputTokenCost,
		CacheReadInputTokenCost:                            pricing.CacheReadInputTokenCost,
		CacheCreationInputTokenCostAbove200kTokens:         pricing.CacheCreationInputTokenCostAbove200kTokens,
		CacheReadInputTokenCostAbove200kTokens:             pricing.CacheReadInputTokenCostAbove200kTokens,
		CacheReadInputTokenCostAbove200kTokensPriority:     pricing.CacheReadInputTokenCostAbove200kTokensPriority,
		CacheCreationInputTokenCostAbove1hr:                pricing.CacheCreationInputTokenCostAbove1hr,
		CacheCreationInputTokenCostAbove1hrAbove200kTokens: pricing.CacheCreationInputTokenCostAbove1hrAbove200kTokens,
		CacheCreationInputAudioTokenCost:                   pricing.CacheCreationInputAudioTokenCost,
		CacheReadInputTokenCostPriority:                    pricing.CacheReadInputTokenCostPriority,
		CacheReadInputTokenCostFlex:                        pricing.CacheReadInputTokenCostFlex,
		CacheReadInputImageTokenCost:                       pricing.CacheReadInputImageTokenCost,
		CacheReadInputTokenCostAbove272kTokens:             pricing.CacheReadInputTokenCostAbove272kTokens,
		CacheReadInputTokenCostAbove272kTokensPriority:     pricing.CacheReadInputTokenCostAbove272kTokensPriority,

		// Costs - Image
		InputCostPerImage:                             pricing.InputCostPerImage,
		InputCostPerPixel:                             pricing.InputCostPerPixel,
		OutputCostPerImage:                            pricing.OutputCostPerImage,
		OutputCostPerPixel:                            pricing.OutputCostPerPixel,
		OutputCostPerImagePremiumImage:                pricing.OutputCostPerImagePremiumImage,
		OutputCostPerImageAbove512x512Pixels:          pricing.OutputCostPerImageAbove512x512Pixels,
		OutputCostPerImageAbove512x512PixelsPremium:   pricing.OutputCostPerImageAbove512x512PixelsPremium,
		OutputCostPerImageAbove1024x1024Pixels:        pricing.OutputCostPerImageAbove1024x1024Pixels,
		OutputCostPerImageAbove1024x1024PixelsPremium: pricing.OutputCostPerImageAbove1024x1024PixelsPremium,
		OutputCostPerImageAbove2048x2048Pixels:        pricing.OutputCostPerImageAbove2048x2048Pixels,
		OutputCostPerImageAbove4096x4096Pixels:        pricing.OutputCostPerImageAbove4096x4096Pixels,
		OutputCostPerImageLowQuality:                  pricing.OutputCostPerImageLowQuality,
		OutputCostPerImageMediumQuality:               pricing.OutputCostPerImageMediumQuality,
		OutputCostPerImageHighQuality:                 pricing.OutputCostPerImageHighQuality,
		OutputCostPerImageAutoQuality:                 pricing.OutputCostPerImageAutoQuality,
		// Costs - Image Token
		InputCostPerImageToken:  pricing.InputCostPerImageToken,
		OutputCostPerImageToken: pricing.OutputCostPerImageToken,

		// Costs - Audio/Video
		InputCostPerAudioToken:      pricing.InputCostPerAudioToken,
		InputCostPerAudioPerSecond:  pricing.InputCostPerAudioPerSecond,
		InputCostPerSecond:          pricing.InputCostPerSecond,
		InputCostPerVideoPerSecond:  pricing.InputCostPerVideoPerSecond,
		OutputCostPerAudioToken:     pricing.OutputCostPerAudioToken,
		OutputCostPerVideoPerSecond: pricing.OutputCostPerVideoPerSecond,
		OutputCostPerSecond:         pricing.OutputCostPerSecond,

		// Costs - Other
		SearchContextCostPerQuery:     pricing.SearchContextCostPerQuery,
		CodeInterpreterCostPerSession: pricing.CodeInterpreterCostPerSession,

		// Costs - OCR
		OCRCostPerPage:        pricing.OCRCostPerPage,
		AnnotationCostPerPage: pricing.AnnotationCostPerPage,
	}
	return &PricingEntry{
		BaseModel:       pricing.BaseModel,
		Provider:        pricing.Provider,
		Mode:            pricing.Mode,
		ContextLength:   pricing.ContextLength,
		MaxInputTokens:  pricing.MaxInputTokens,
		MaxOutputTokens: pricing.MaxOutputTokens,
		Architecture:    pricing.Architecture,
		PricingOptions:  options,
	}
}

// convertTablePricingOverrideToPricingOverride converts a TablePricingOverride to a PricingOverride.
func convertTablePricingOverrideToPricingOverride(override *configstoreTables.TablePricingOverride) (PricingOverride, error) {
	var options PricingOptions
	if err := sonic.Unmarshal([]byte(override.PricingPatchJSON), &options); err != nil {
		return PricingOverride{}, err
	}
	return PricingOverride{
		ID:            override.ID,
		Name:          override.Name,
		ScopeKind:     ScopeKind(override.ScopeKind),
		VirtualKeyID:  override.VirtualKeyID,
		ProviderID:    override.ProviderID,
		ProviderKeyID: override.ProviderKeyID,
		MatchType:     MatchType(override.MatchType),
		Pattern:       override.Pattern,
		RequestTypes:  override.RequestTypes,
		Options:       options,
	}, nil
}

// normalizeEndpointToOutputType converts a supported_endpoints URL path to a normalized output type.
// Returns empty string for unrecognized endpoints.
func normalizeEndpointToOutputType(endpoint string) string {
	switch {
	case strings.Contains(endpoint, "/chat/completions"):
		return "chat_completion"
	case strings.Contains(endpoint, "/responses"):
		return "responses"
	case strings.Contains(endpoint, "/completions"):
		return "text_completion"
	default:
		return ""
	}
}

// normalizeModeToOutputType converts mode to a normalized output type.
func normalizeModeToOutputType(mode string) string {
	switch mode {
	case "chat":
		return "chat_completion"
	case "completion":
		return "text_completion"
	case "responses":
		return "responses"
	default:
		return ""
	}
}

// modelParametersParseResult is the parsed result type used by buildSupportedOutputsIndex.
type modelParametersParseResult struct {
	Mode               *string  `json:"mode,omitempty"`
	SupportedEndpoints []string `json:"supported_endpoints,omitempty"`
	ModelParameters    []struct {
		ID string `json:"id"`
	} `json:"model_parameters,omitempty"`
	SupportsFunctionCalling         *bool `json:"supports_function_calling,omitempty"`
	SupportsParallelFunctionCalling *bool `json:"supports_parallel_function_calling,omitempty"`
	SupportsToolChoice              *bool `json:"supports_tool_choice,omitempty"`
	SupportsReasoning               *bool `json:"supports_reasoning,omitempty"`
	SupportsServiceTier             *bool `json:"supports_service_tier,omitempty"`
	SupportsPromptCaching           *bool `json:"supports_prompt_caching,omitempty"`
}

// extractSupportedParams builds a list of supported OpenAI-compatible parameter
// names from model_parameters[].id values and supports_* boolean flags.
func extractSupportedParams(parsed *modelParametersParseResult) []string {
	var supported []string
	addParam := func(name string) {
		if !slices.Contains(supported, name) {
			supported = append(supported, name)
		}
	}

	// From model_parameters[].id — map IDs to request param names
	for _, mp := range parsed.ModelParameters {
		switch mp.ID {
		case "reasoning_effort", "reasoning_summary":
			addParam("reasoning")
		case "web_search":
			addParam("web_search_options")
		case "promptTools", "image_detail", "stream":
			// skip — not top-level request parameters
		default:
			addParam(mp.ID)
		}
	}

	// From supports_* boolean flags
	if parsed.SupportsFunctionCalling != nil && *parsed.SupportsFunctionCalling {
		addParam("tools")
	}
	if parsed.SupportsParallelFunctionCalling != nil && *parsed.SupportsParallelFunctionCalling {
		addParam("parallel_tool_calls")
	}
	if parsed.SupportsToolChoice != nil && *parsed.SupportsToolChoice {
		addParam("tool_choice")
	}
	if parsed.SupportsReasoning != nil && *parsed.SupportsReasoning {
		addParam("reasoning")
	}
	if parsed.SupportsServiceTier != nil && *parsed.SupportsServiceTier {
		addParam("service_tier")
	}
	if parsed.SupportsPromptCaching != nil && *parsed.SupportsPromptCaching {
		addParam("prompt_cache_key")
		addParam("prompt_cache_retention")
	}

	return supported
}