1206 lines
52 KiB
Go
1206 lines
52 KiB
Go
package modelcatalog
|
||
|
||
import (
|
||
"strconv"
|
||
"strings"
|
||
|
||
"github.com/bytedance/sonic"
|
||
"github.com/maximhq/bifrost/core/schemas"
|
||
configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
|
||
)
|
||
|
||
// Default sync interval and config key
|
||
const (
|
||
TokenTierAbove272K = 272000
|
||
TokenTierAbove200K = 200000
|
||
TokenTierAbove128K = 128000
|
||
)
|
||
|
||
// PricingEntry represents a single model's pricing information.
|
||
// Field names and JSON tags match the datasheet schema exactly.
|
||
type PricingEntry struct {
|
||
BaseModel string `json:"base_model,omitempty"`
|
||
Provider string `json:"provider"`
|
||
Mode string `json:"mode"`
|
||
|
||
ContextLength *int `json:"context_length,omitempty"`
|
||
MaxInputTokens *int `json:"max_input_tokens,omitempty"`
|
||
MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
|
||
Architecture *schemas.Architecture `json:"architecture,omitempty"`
|
||
|
||
PricingOptions
|
||
}
|
||
|
||
// UnmarshalJSON implements json.Unmarshaler for PricingEntry.
|
||
// It handles the special case where search_context_cost_per_query may arrive as either
|
||
// a plain float64 or a tiered object {"search_context_size_low":…,
|
||
// "search_context_size_medium":…, "search_context_size_high":…}.
|
||
func (p *PricingEntry) UnmarshalJSON(data []byte) error {
|
||
// Type alias breaks the UnmarshalJSON recursion while keeping all other fields.
|
||
type PricingEntryAlias PricingEntry
|
||
var raw struct {
|
||
PricingEntryAlias
|
||
SearchContextCostPerQuery *struct {
|
||
Low *float64 `json:"search_context_size_low"`
|
||
Medium *float64 `json:"search_context_size_medium"`
|
||
High *float64 `json:"search_context_size_high"`
|
||
} `json:"search_context_cost_per_query,omitempty"`
|
||
}
|
||
if err := sonic.Unmarshal(data, &raw); err != nil {
|
||
return err
|
||
}
|
||
*p = PricingEntry(raw.PricingEntryAlias)
|
||
|
||
// search_context_cost_per_query arrives as a tiered object – all three values are
|
||
// equal for non-Perplexity providers; we prefer medium, then low, then high.
|
||
// Perplexity always returns a pre-computed total_cost so the per-query rate is
|
||
// never consumed for that provider.
|
||
if q := raw.SearchContextCostPerQuery; q != nil {
|
||
switch {
|
||
case q.Medium != nil:
|
||
p.SearchContextCostPerQuery = q.Medium
|
||
case q.Low != nil:
|
||
p.SearchContextCostPerQuery = q.Low
|
||
case q.High != nil:
|
||
p.SearchContextCostPerQuery = q.High
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
type PricingOptions struct {
|
||
// Costs - Text
|
||
InputCostPerToken *float64 `json:"input_cost_per_token,omitempty"`
|
||
OutputCostPerToken *float64 `json:"output_cost_per_token,omitempty"`
|
||
InputCostPerTokenBatches *float64 `json:"input_cost_per_token_batches,omitempty"`
|
||
OutputCostPerTokenBatches *float64 `json:"output_cost_per_token_batches,omitempty"`
|
||
InputCostPerTokenPriority *float64 `json:"input_cost_per_token_priority,omitempty"`
|
||
OutputCostPerTokenPriority *float64 `json:"output_cost_per_token_priority,omitempty"`
|
||
InputCostPerTokenFlex *float64 `json:"input_cost_per_token_flex,omitempty"`
|
||
OutputCostPerTokenFlex *float64 `json:"output_cost_per_token_flex,omitempty"`
|
||
InputCostPerCharacter *float64 `json:"input_cost_per_character,omitempty"`
|
||
// Costs - 128k Tier
|
||
InputCostPerTokenAbove128kTokens *float64 `json:"input_cost_per_token_above_128k_tokens,omitempty"`
|
||
InputCostPerImageAbove128kTokens *float64 `json:"input_cost_per_image_above_128k_tokens,omitempty"`
|
||
InputCostPerVideoPerSecondAbove128kTokens *float64 `json:"input_cost_per_video_per_second_above_128k_tokens,omitempty"`
|
||
InputCostPerAudioPerSecondAbove128kTokens *float64 `json:"input_cost_per_audio_per_second_above_128k_tokens,omitempty"`
|
||
OutputCostPerTokenAbove128kTokens *float64 `json:"output_cost_per_token_above_128k_tokens,omitempty"`
|
||
// Costs - 200k Tier
|
||
InputCostPerTokenAbove200kTokens *float64 `json:"input_cost_per_token_above_200k_tokens,omitempty"`
|
||
InputCostPerTokenAbove200kTokensPriority *float64 `json:"input_cost_per_token_above_200k_tokens_priority,omitempty"`
|
||
OutputCostPerTokenAbove200kTokens *float64 `json:"output_cost_per_token_above_200k_tokens,omitempty"`
|
||
OutputCostPerTokenAbove200kTokensPriority *float64 `json:"output_cost_per_token_above_200k_tokens_priority,omitempty"`
|
||
// Costs - 272k Tier
|
||
InputCostPerTokenAbove272kTokens *float64 `json:"input_cost_per_token_above_272k_tokens,omitempty"`
|
||
InputCostPerTokenAbove272kTokensPriority *float64 `json:"input_cost_per_token_above_272k_tokens_priority,omitempty"`
|
||
OutputCostPerTokenAbove272kTokens *float64 `json:"output_cost_per_token_above_272k_tokens,omitempty"`
|
||
OutputCostPerTokenAbove272kTokensPriority *float64 `json:"output_cost_per_token_above_272k_tokens_priority,omitempty"`
|
||
|
||
// Costs - Cache
|
||
CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost,omitempty"`
|
||
CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost,omitempty"`
|
||
CacheCreationInputTokenCostAbove200kTokens *float64 `json:"cache_creation_input_token_cost_above_200k_tokens,omitempty"`
|
||
CacheReadInputTokenCostAbove200kTokens *float64 `json:"cache_read_input_token_cost_above_200k_tokens,omitempty"`
|
||
CacheReadInputTokenCostAbove200kTokensPriority *float64 `json:"cache_read_input_token_cost_above_200k_tokens_priority,omitempty"`
|
||
CacheCreationInputTokenCostAbove1hr *float64 `json:"cache_creation_input_token_cost_above_1hr,omitempty"`
|
||
CacheCreationInputTokenCostAbove1hrAbove200kTokens *float64 `json:"cache_creation_input_token_cost_above_1hr_above_200k_tokens,omitempty"`
|
||
CacheCreationInputAudioTokenCost *float64 `json:"cache_creation_input_audio_token_cost,omitempty"`
|
||
CacheReadInputTokenCostPriority *float64 `json:"cache_read_input_token_cost_priority,omitempty"`
|
||
CacheReadInputTokenCostFlex *float64 `json:"cache_read_input_token_cost_flex,omitempty"`
|
||
CacheReadInputImageTokenCost *float64 `json:"cache_read_input_image_token_cost,omitempty"`
|
||
CacheReadInputTokenCostAbove272kTokens *float64 `json:"cache_read_input_token_cost_above_272k_tokens,omitempty"`
|
||
CacheReadInputTokenCostAbove272kTokensPriority *float64 `json:"cache_read_input_token_cost_above_272k_tokens_priority,omitempty"`
|
||
|
||
// Costs - Image
|
||
InputCostPerImage *float64 `json:"input_cost_per_image,omitempty"`
|
||
InputCostPerPixel *float64 `json:"input_cost_per_pixel,omitempty"`
|
||
OutputCostPerImage *float64 `json:"output_cost_per_image,omitempty"`
|
||
OutputCostPerPixel *float64 `json:"output_cost_per_pixel,omitempty"`
|
||
OutputCostPerImagePremiumImage *float64 `json:"output_cost_per_image_premium_image,omitempty"`
|
||
OutputCostPerImageAbove512x512Pixels *float64 `json:"output_cost_per_image_above_512_and_512_pixels,omitempty"`
|
||
OutputCostPerImageAbove512x512PixelsPremium *float64 `json:"output_cost_per_image_above_512_and_512_pixels_and_premium_image,omitempty"`
|
||
OutputCostPerImageAbove1024x1024Pixels *float64 `json:"output_cost_per_image_above_1024_and_1024_pixels,omitempty"`
|
||
OutputCostPerImageAbove1024x1024PixelsPremium *float64 `json:"output_cost_per_image_above_1024_and_1024_pixels_and_premium_image,omitempty"`
|
||
OutputCostPerImageAbove2048x2048Pixels *float64 `json:"output_cost_per_image_above_2048_and_2048_pixels,omitempty"`
|
||
OutputCostPerImageAbove4096x4096Pixels *float64 `json:"output_cost_per_image_above_4096_and_4096_pixels,omitempty"`
|
||
OutputCostPerImageLowQuality *float64 `json:"output_cost_per_image_low_quality,omitempty"`
|
||
OutputCostPerImageMediumQuality *float64 `json:"output_cost_per_image_medium_quality,omitempty"`
|
||
OutputCostPerImageHighQuality *float64 `json:"output_cost_per_image_high_quality,omitempty"`
|
||
OutputCostPerImageAutoQuality *float64 `json:"output_cost_per_image_auto_quality,omitempty"`
|
||
InputCostPerImageToken *float64 `json:"input_cost_per_image_token,omitempty"`
|
||
OutputCostPerImageToken *float64 `json:"output_cost_per_image_token,omitempty"`
|
||
|
||
// Costs - Audio/Video
|
||
InputCostPerAudioToken *float64 `json:"input_cost_per_audio_token,omitempty"`
|
||
InputCostPerAudioPerSecond *float64 `json:"input_cost_per_audio_per_second,omitempty"`
|
||
InputCostPerSecond *float64 `json:"input_cost_per_second,omitempty"`
|
||
InputCostPerVideoPerSecond *float64 `json:"input_cost_per_video_per_second,omitempty"`
|
||
OutputCostPerAudioToken *float64 `json:"output_cost_per_audio_token,omitempty"`
|
||
OutputCostPerVideoPerSecond *float64 `json:"output_cost_per_video_per_second,omitempty"`
|
||
OutputCostPerSecond *float64 `json:"output_cost_per_second,omitempty"`
|
||
|
||
// Costs - Other
|
||
//
|
||
// SearchContextCostPerQuery is stored as a single float64, but the pricing datasheet
|
||
// represents it as a tiered object with three keys: search_context_size_low,
|
||
// search_context_size_medium, and search_context_size_high. For every provider except
|
||
// Perplexity the three tier values are identical, so we collapse the object to its
|
||
// medium tier value (falling back to low then high). Perplexity always returns a
|
||
// pre-computed total_cost in its usage response, so the per-query rate is never
|
||
// consumed for that provider; the collapsed value is therefore correct in all cases.
|
||
// See UnmarshalJSON below for the custom decoding logic.
|
||
SearchContextCostPerQuery *float64 `json:"search_context_cost_per_query,omitempty"`
|
||
CodeInterpreterCostPerSession *float64 `json:"code_interpreter_cost_per_session,omitempty"`
|
||
|
||
// Costs - OCR
|
||
OCRCostPerPage *float64 `json:"ocr_cost_per_page,omitempty"`
|
||
AnnotationCostPerPage *float64 `json:"annotation_cost_per_page,omitempty"`
|
||
}
|
||
|
||
// serviceTier captures the OpenAI service_tier value from a response.
|
||
// Add new tier flags here as OpenAI introduces them.
|
||
type serviceTier struct {
|
||
isPriority bool // true when service_tier == "priority"
|
||
isFlex bool // true when service_tier == "flex"
|
||
}
|
||
|
||
// costInput holds the extracted usage data from a BifrostResponse,
|
||
// normalized for the pricing engine.
|
||
type costInput struct {
|
||
usage *schemas.BifrostLLMUsage
|
||
audioTextInputChars int
|
||
audioSeconds *int
|
||
audioTokenDetails *schemas.TranscriptionUsageInputTokenDetails
|
||
imageUsage *schemas.ImageUsage
|
||
imageSize string // e.g. "1024x1024", used for per-pixel pricing
|
||
imageQuality string // "low", "medium", "high", "auto" (gpt-image-1.5); empty = use base rate
|
||
videoSeconds *int
|
||
ocrProcessedPages *int
|
||
ocrIsAnnotated *bool
|
||
tier serviceTier
|
||
}
|
||
|
||
// GetPricingEntryForModel returns the pricing data
|
||
func (mc *ModelCatalog) GetPricingEntryForModel(model string, provider schemas.ModelProvider) *PricingEntry {
|
||
mc.mu.RLock()
|
||
defer mc.mu.RUnlock()
|
||
// Check all modes
|
||
for _, mode := range []schemas.RequestType{
|
||
schemas.TextCompletionRequest,
|
||
schemas.ChatCompletionRequest,
|
||
schemas.ResponsesRequest,
|
||
schemas.EmbeddingRequest,
|
||
schemas.RerankRequest,
|
||
schemas.SpeechRequest,
|
||
schemas.TranscriptionRequest,
|
||
schemas.ImageGenerationRequest,
|
||
schemas.ImageEditRequest,
|
||
schemas.ImageVariationRequest,
|
||
schemas.VideoGenerationRequest,
|
||
schemas.OCRRequest,
|
||
} {
|
||
key := makeKey(model, string(provider), normalizeRequestType(mode))
|
||
pricing, ok := mc.pricingData[key]
|
||
if ok {
|
||
return convertTableModelPricingToPricingData(&pricing)
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// CalculateCost calculates the cost of a Bifrost response.
|
||
// It handles all request types, cache debug billing, and tiered pricing.
|
||
// If scopes is nil, an empty PricingLookupScopes is used; global and provider-scoped
|
||
// overrides may still apply since the provider is derived from the response.
|
||
func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse, scopes *PricingLookupScopes) float64 {
|
||
if result == nil {
|
||
return 0
|
||
}
|
||
|
||
var s PricingLookupScopes
|
||
if scopes != nil {
|
||
s = *scopes
|
||
}
|
||
|
||
// Handle semantic cache billing
|
||
cacheDebug := result.GetExtraFields().CacheDebug
|
||
if cacheDebug != nil {
|
||
return mc.calculateCostWithCache(result, cacheDebug, s)
|
||
}
|
||
|
||
return mc.calculateBaseCost(result, s)
|
||
}
|
||
|
||
// calculateCostWithCache handles cost calculation when semantic cache debug info is present.
|
||
func (mc *ModelCatalog) calculateCostWithCache(result *schemas.BifrostResponse, cacheDebug *schemas.BifrostCacheDebug, scopes PricingLookupScopes) float64 {
|
||
if cacheDebug.CacheHit {
|
||
// Direct cache hit — no LLM call, no cost
|
||
if cacheDebug.HitType != nil && *cacheDebug.HitType == "direct" {
|
||
return 0
|
||
}
|
||
// Semantic cache hit — only the embedding lookup cost
|
||
if cacheDebug.ProviderUsed != nil && cacheDebug.ModelUsed != nil && cacheDebug.InputTokens != nil {
|
||
return mc.computeCacheEmbeddingCost(cacheDebug, scopes)
|
||
}
|
||
return 0
|
||
}
|
||
|
||
// Cache miss — full LLM cost + embedding lookup cost
|
||
baseCost := mc.calculateBaseCost(result, scopes)
|
||
embeddingCost := mc.computeCacheEmbeddingCost(cacheDebug, scopes)
|
||
return baseCost + embeddingCost
|
||
}
|
||
|
||
// computeCacheEmbeddingCost calculates the embedding cost for a semantic cache lookup.
|
||
func (mc *ModelCatalog) computeCacheEmbeddingCost(cacheDebug *schemas.BifrostCacheDebug, scopes PricingLookupScopes) float64 {
|
||
if cacheDebug == nil || cacheDebug.ProviderUsed == nil || cacheDebug.ModelUsed == nil || cacheDebug.InputTokens == nil {
|
||
return 0
|
||
}
|
||
if scopes.Provider == "" {
|
||
scopes.Provider = *cacheDebug.ProviderUsed
|
||
}
|
||
pricing := mc.resolvePricing(*cacheDebug.ProviderUsed, *cacheDebug.ModelUsed, "", schemas.EmbeddingRequest, scopes)
|
||
if pricing == nil {
|
||
return 0
|
||
}
|
||
return float64(*cacheDebug.InputTokens) * tieredInputRate(pricing, *cacheDebug.InputTokens, serviceTier{})
|
||
}
|
||
|
||
// calculateBaseCost extracts usage from the response and routes to the appropriate compute function.
|
||
func (mc *ModelCatalog) calculateBaseCost(result *schemas.BifrostResponse, scopes PricingLookupScopes) float64 {
|
||
extraFields := result.GetExtraFields()
|
||
if extraFields == nil {
|
||
return 0
|
||
}
|
||
|
||
provider := string(extraFields.Provider)
|
||
originalModelRequested := extraFields.OriginalModelRequested
|
||
resolvedModelUsed := extraFields.ResolvedModelUsed
|
||
requestType := extraFields.RequestType
|
||
|
||
// Extract usage data from the response
|
||
input := extractCostInput(result)
|
||
|
||
// If provider already computed cost, use it
|
||
if input.usage != nil && input.usage.Cost != nil && input.usage.Cost.TotalCost > 0 {
|
||
return input.usage.Cost.TotalCost
|
||
}
|
||
|
||
// If no usage data at all, nothing to price
|
||
if input.usage == nil && input.audioSeconds == nil && input.audioTokenDetails == nil && input.imageUsage == nil && input.videoSeconds == nil && input.audioTextInputChars == 0 && input.ocrProcessedPages == nil {
|
||
return 0
|
||
}
|
||
|
||
// Normalize stream request types to their base type for pricing lookup
|
||
requestType = normalizeStreamRequestType(requestType)
|
||
|
||
// Resolve pricing entry with deployment fallback
|
||
pricing := mc.resolvePricing(provider, originalModelRequested, resolvedModelUsed, requestType, scopes)
|
||
if pricing == nil {
|
||
return 0
|
||
}
|
||
|
||
// Route to the appropriate compute function
|
||
switch requestType {
|
||
case schemas.ChatCompletionRequest, schemas.TextCompletionRequest, schemas.ResponsesRequest:
|
||
return computeTextCost(pricing, input.usage, input.tier)
|
||
case schemas.EmbeddingRequest:
|
||
return computeEmbeddingCost(pricing, input.usage, input.tier)
|
||
case schemas.RerankRequest:
|
||
return computeRerankCost(pricing, input.usage, input.tier)
|
||
case schemas.SpeechRequest:
|
||
return computeSpeechCost(pricing, input.usage, input.audioSeconds, input.audioTextInputChars, input.tier)
|
||
case schemas.TranscriptionRequest:
|
||
return computeTranscriptionCost(pricing, input.usage, input.audioSeconds, input.audioTokenDetails, input.tier)
|
||
case schemas.ImageGenerationRequest, schemas.ImageEditRequest, schemas.ImageVariationRequest:
|
||
return computeImageCost(pricing, input.imageUsage, input.imageSize, input.imageQuality, input.tier)
|
||
case schemas.VideoGenerationRequest, schemas.VideoRemixRequest:
|
||
return computeVideoCost(pricing, input.usage, input.videoSeconds, input.tier)
|
||
case schemas.OCRRequest:
|
||
return computeOCRCost(pricing, input.ocrProcessedPages, input.ocrIsAnnotated)
|
||
default:
|
||
return 0
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Usage extraction
|
||
// ---------------------------------------------------------------------------
|
||
|
||
func extractCostInput(result *schemas.BifrostResponse) costInput {
|
||
var input costInput
|
||
|
||
switch {
|
||
case result.TextCompletionResponse != nil && result.TextCompletionResponse.Usage != nil:
|
||
input.usage = result.TextCompletionResponse.Usage
|
||
|
||
case result.ChatResponse != nil && result.ChatResponse.Usage != nil:
|
||
input.usage = result.ChatResponse.Usage
|
||
input.tier = tierFromString(result.ChatResponse.ServiceTier)
|
||
|
||
case result.ResponsesResponse != nil && result.ResponsesResponse.Usage != nil:
|
||
input.usage = responsesUsageToBifrostUsage(result.ResponsesResponse.Usage)
|
||
input.tier = tierFromString(result.ResponsesResponse.ServiceTier)
|
||
|
||
case result.ResponsesStreamResponse != nil && result.ResponsesStreamResponse.Response != nil && result.ResponsesStreamResponse.Response.Usage != nil:
|
||
input.usage = responsesUsageToBifrostUsage(result.ResponsesStreamResponse.Response.Usage)
|
||
input.tier = tierFromString(result.ResponsesStreamResponse.Response.ServiceTier)
|
||
|
||
case result.EmbeddingResponse != nil && result.EmbeddingResponse.Usage != nil:
|
||
input.usage = result.EmbeddingResponse.Usage
|
||
|
||
case result.RerankResponse != nil && result.RerankResponse.Usage != nil:
|
||
input.usage = result.RerankResponse.Usage
|
||
|
||
case result.SpeechResponse != nil && result.SpeechResponse.Usage != nil:
|
||
input.usage = speechUsageToBifrostUsage(result.SpeechResponse.Usage)
|
||
input.audioTextInputChars = result.SpeechResponse.Usage.InputChars
|
||
|
||
case result.SpeechStreamResponse != nil && result.SpeechStreamResponse.Usage != nil:
|
||
input.usage = speechUsageToBifrostUsage(result.SpeechStreamResponse.Usage)
|
||
input.audioTextInputChars = result.SpeechStreamResponse.Usage.InputChars
|
||
|
||
case result.TranscriptionResponse != nil && result.TranscriptionResponse.Usage != nil:
|
||
input.usage, input.audioSeconds, input.audioTokenDetails = extractTranscriptionUsage(result.TranscriptionResponse.Usage)
|
||
|
||
case result.TranscriptionStreamResponse != nil && result.TranscriptionStreamResponse.Usage != nil:
|
||
input.usage, input.audioSeconds, input.audioTokenDetails = extractTranscriptionUsage(result.TranscriptionStreamResponse.Usage)
|
||
|
||
case result.ImageGenerationResponse != nil:
|
||
if result.ImageGenerationResponse.Usage != nil {
|
||
input.imageUsage = result.ImageGenerationResponse.Usage
|
||
} else {
|
||
// No usage data but response exists — default to empty so per-image pricing can apply
|
||
input.imageUsage = &schemas.ImageUsage{}
|
||
}
|
||
populateOutputImageCount(input.imageUsage, len(result.ImageGenerationResponse.Data))
|
||
if result.ImageGenerationResponse.ImageGenerationResponseParameters != nil {
|
||
input.imageSize = result.ImageGenerationResponse.ImageGenerationResponseParameters.Size
|
||
input.imageQuality = result.ImageGenerationResponse.ImageGenerationResponseParameters.Quality
|
||
}
|
||
|
||
case result.ImageGenerationStreamResponse != nil:
|
||
if result.ImageGenerationStreamResponse.Usage != nil {
|
||
input.imageUsage = result.ImageGenerationStreamResponse.Usage
|
||
} else {
|
||
input.imageUsage = &schemas.ImageUsage{}
|
||
}
|
||
input.imageSize = result.ImageGenerationStreamResponse.Size
|
||
input.imageQuality = result.ImageGenerationStreamResponse.Quality
|
||
|
||
case result.VideoGenerationResponse != nil && result.VideoGenerationResponse.Seconds != nil:
|
||
seconds, err := strconv.Atoi(*result.VideoGenerationResponse.Seconds)
|
||
if err == nil {
|
||
input.videoSeconds = &seconds
|
||
}
|
||
|
||
case result.OCRResponse != nil:
|
||
pages := len(result.OCRResponse.Pages)
|
||
if result.OCRResponse.UsageInfo != nil && result.OCRResponse.UsageInfo.PagesProcessed > 0 {
|
||
pages = result.OCRResponse.UsageInfo.PagesProcessed
|
||
}
|
||
input.ocrProcessedPages = &pages
|
||
isAnnotated := result.OCRResponse.DocumentAnnotation != nil && *result.OCRResponse.DocumentAnnotation != ""
|
||
input.ocrIsAnnotated = &isAnnotated
|
||
}
|
||
|
||
return input
|
||
}
|
||
|
||
func responsesUsageToBifrostUsage(u *schemas.ResponsesResponseUsage) *schemas.BifrostLLMUsage {
|
||
usage := &schemas.BifrostLLMUsage{
|
||
PromptTokens: u.InputTokens,
|
||
CompletionTokens: u.OutputTokens,
|
||
TotalTokens: u.TotalTokens,
|
||
Cost: u.Cost,
|
||
}
|
||
// Map token details for cache and search query pricing
|
||
if u.InputTokensDetails != nil {
|
||
usage.PromptTokensDetails = &schemas.ChatPromptTokensDetails{
|
||
TextTokens: u.InputTokensDetails.TextTokens,
|
||
AudioTokens: u.InputTokensDetails.AudioTokens,
|
||
ImageTokens: u.InputTokensDetails.ImageTokens,
|
||
CachedReadTokens: u.InputTokensDetails.CachedReadTokens,
|
||
CachedWriteTokens: u.InputTokensDetails.CachedWriteTokens,
|
||
}
|
||
}
|
||
if u.OutputTokensDetails != nil {
|
||
usage.CompletionTokensDetails = &schemas.ChatCompletionTokensDetails{
|
||
ReasoningTokens: u.OutputTokensDetails.ReasoningTokens,
|
||
}
|
||
if u.OutputTokensDetails.NumSearchQueries != nil {
|
||
usage.CompletionTokensDetails.NumSearchQueries = u.OutputTokensDetails.NumSearchQueries
|
||
}
|
||
}
|
||
return usage
|
||
}
|
||
|
||
func speechUsageToBifrostUsage(u *schemas.SpeechUsage) *schemas.BifrostLLMUsage {
|
||
return &schemas.BifrostLLMUsage{
|
||
PromptTokens: u.InputTokens,
|
||
CompletionTokens: u.OutputTokens,
|
||
TotalTokens: u.TotalTokens,
|
||
}
|
||
}
|
||
|
||
func extractTranscriptionUsage(u *schemas.TranscriptionUsage) (*schemas.BifrostLLMUsage, *int, *schemas.TranscriptionUsageInputTokenDetails) {
|
||
usage := &schemas.BifrostLLMUsage{}
|
||
if u.InputTokens != nil {
|
||
usage.PromptTokens = *u.InputTokens
|
||
}
|
||
if u.OutputTokens != nil {
|
||
usage.CompletionTokens = *u.OutputTokens
|
||
}
|
||
if u.TotalTokens != nil {
|
||
usage.TotalTokens = *u.TotalTokens
|
||
} else {
|
||
usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
|
||
}
|
||
|
||
var audioTokenDetails *schemas.TranscriptionUsageInputTokenDetails
|
||
if u.InputTokenDetails != nil {
|
||
audioTokenDetails = &schemas.TranscriptionUsageInputTokenDetails{
|
||
AudioTokens: u.InputTokenDetails.AudioTokens,
|
||
TextTokens: u.InputTokenDetails.TextTokens,
|
||
}
|
||
}
|
||
|
||
return usage, u.Seconds, audioTokenDetails
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Per-request-type cost computation
|
||
// ---------------------------------------------------------------------------
|
||
|
||
// computeTextCost handles chat, text completion, and responses requests.
|
||
func computeTextCost(pricing *configstoreTables.TableModelPricing, usage *schemas.BifrostLLMUsage, tier serviceTier) float64 {
|
||
if usage == nil {
|
||
return 0
|
||
}
|
||
|
||
totalTokens := usage.TotalTokens
|
||
promptTokens := usage.PromptTokens
|
||
completionTokens := usage.CompletionTokens
|
||
|
||
// Extract cached token counts
|
||
cachedReadTokens := 0
|
||
cachedWriteTokens := 0
|
||
if usage.PromptTokensDetails != nil {
|
||
cachedReadTokens = usage.PromptTokensDetails.CachedReadTokens
|
||
cachedWriteTokens = usage.PromptTokensDetails.CachedWriteTokens
|
||
}
|
||
|
||
inputRate := tieredInputRate(pricing, totalTokens, tier)
|
||
outputRate := tieredOutputRate(pricing, totalTokens, tier)
|
||
cacheReadInputRate := tieredCacheReadInputTokenRate(pricing, totalTokens, tier)
|
||
cacheCreationInputRate := tieredCacheCreationInputTokenRate(pricing, totalTokens, tier)
|
||
|
||
// Clamp cached token counts to avoid negative billing on malformed provider payloads
|
||
if cachedReadTokens > promptTokens {
|
||
cachedReadTokens = promptTokens
|
||
}
|
||
if cachedWriteTokens > promptTokens-cachedReadTokens {
|
||
cachedWriteTokens = promptTokens - cachedReadTokens
|
||
}
|
||
|
||
// Input cost: non-cached tokens at regular rate
|
||
nonCachedPrompt := promptTokens - cachedReadTokens - cachedWriteTokens
|
||
inputCost := float64(nonCachedPrompt) * inputRate
|
||
|
||
// Add cached prompt tokens at cache read rate
|
||
if cachedReadTokens > 0 {
|
||
inputCost += float64(cachedReadTokens) * cacheReadInputRate
|
||
}
|
||
|
||
// Add cached write tokens at cache creation rate
|
||
if cachedWriteTokens > 0 {
|
||
inputCost += float64(cachedWriteTokens) * cacheCreationInputRate
|
||
}
|
||
|
||
outputCost := float64(completionTokens) * outputRate
|
||
|
||
// Search query cost
|
||
searchCost := 0.0
|
||
if pricing.SearchContextCostPerQuery != nil && usage.CompletionTokensDetails != nil && usage.CompletionTokensDetails.NumSearchQueries != nil {
|
||
searchCost = float64(*usage.CompletionTokensDetails.NumSearchQueries) * *pricing.SearchContextCostPerQuery
|
||
}
|
||
|
||
return inputCost + outputCost + searchCost
|
||
}
|
||
|
||
// computeEmbeddingCost handles embedding requests (input-only).
|
||
func computeEmbeddingCost(pricing *configstoreTables.TableModelPricing, usage *schemas.BifrostLLMUsage, tier serviceTier) float64 {
|
||
if usage == nil {
|
||
return 0
|
||
}
|
||
return float64(usage.PromptTokens) * tieredInputRate(pricing, usage.TotalTokens, tier)
|
||
}
|
||
|
||
// computeRerankCost handles rerank requests.
|
||
func computeRerankCost(pricing *configstoreTables.TableModelPricing, usage *schemas.BifrostLLMUsage, tier serviceTier) float64 {
|
||
if usage == nil {
|
||
return 0
|
||
}
|
||
inputCost := float64(usage.PromptTokens) * tieredInputRate(pricing, usage.TotalTokens, tier)
|
||
outputCost := float64(usage.CompletionTokens) * tieredOutputRate(pricing, usage.TotalTokens, tier)
|
||
|
||
searchCost := 0.0
|
||
if pricing.SearchContextCostPerQuery != nil && usage.CompletionTokensDetails != nil && usage.CompletionTokensDetails.NumSearchQueries != nil {
|
||
searchCost = float64(*usage.CompletionTokensDetails.NumSearchQueries) * *pricing.SearchContextCostPerQuery
|
||
}
|
||
|
||
return inputCost + outputCost + searchCost
|
||
}
|
||
|
||
// computeSpeechCost handles speech (TTS) requests.
|
||
// Input is text (PromptTokens), output is audio (CompletionTokens).
|
||
//
|
||
// Per-character pricing (InputCostPerCharacter) is used as first-class support for TTS/audio
|
||
// models — providers such as OpenAI TTS, ElevenLabs, and AWS Polly bill per character of
|
||
// input text rather than per token. PromptTokens from usage is treated as the character count
|
||
// since TTS providers report their billable unit in that field.
|
||
// Output falls back to per-second duration when no audio token rate is configured.
|
||
func computeSpeechCost(pricing *configstoreTables.TableModelPricing, usage *schemas.BifrostLLMUsage, audioSeconds *int, audioTextInputChars int, tier serviceTier) float64 {
|
||
totalTokens := safeTotalTokens(usage)
|
||
|
||
// Input: per-character rate takes precedence for TTS/audio models
|
||
inputCost := 0.0
|
||
if audioTextInputChars > 0 {
|
||
if pricing.InputCostPerCharacter != nil {
|
||
inputCost = float64(audioTextInputChars) * *pricing.InputCostPerCharacter
|
||
} else {
|
||
inputCost = float64(audioTextInputChars) * tieredInputRate(pricing, totalTokens, tier)
|
||
}
|
||
} else if usage != nil && usage.PromptTokens > 0 {
|
||
inputCost = float64(usage.PromptTokens) * tieredInputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
// Output: audio tokens first, then per-second fallback
|
||
outputCost := computeAudioOutputCost(pricing, usage, audioSeconds, totalTokens, tier)
|
||
|
||
return inputCost + outputCost
|
||
}
|
||
|
||
// computeTranscriptionCost handles transcription (STT) requests.
|
||
// Input is audio, output is text (CompletionTokens).
|
||
// Input and output are calculated independently — tokens first, then per-second fallback.
|
||
func computeTranscriptionCost(pricing *configstoreTables.TableModelPricing, usage *schemas.BifrostLLMUsage, audioSeconds *int, audioTokenDetails *schemas.TranscriptionUsageInputTokenDetails, tier serviceTier) float64 {
|
||
totalTokens := safeTotalTokens(usage)
|
||
|
||
// Input: audio tokens/details first, then per-second fallback
|
||
inputCost := computeAudioInputCost(pricing, usage, audioSeconds, audioTokenDetails, totalTokens, tier)
|
||
|
||
// Output: text tokens
|
||
outputCost := 0.0
|
||
if usage != nil && usage.CompletionTokens > 0 {
|
||
outputCost = float64(usage.CompletionTokens) * tieredOutputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
return inputCost + outputCost
|
||
}
|
||
|
||
// computeAudioInputCost calculates input cost for audio: audio token details first,
|
||
// then generic input tokens, then per-second duration fallback.
|
||
func computeAudioInputCost(pricing *configstoreTables.TableModelPricing, usage *schemas.BifrostLLMUsage, audioSeconds *int, audioTokenDetails *schemas.TranscriptionUsageInputTokenDetails, totalTokens int, tier serviceTier) float64 {
|
||
// Audio token detail pricing (audio + text token breakdown)
|
||
if audioTokenDetails != nil && (audioTokenDetails.AudioTokens > 0 || audioTokenDetails.TextTokens > 0) {
|
||
return float64(audioTokenDetails.AudioTokens)*tieredAudioTokenInputRate(pricing, totalTokens, tier) +
|
||
float64(audioTokenDetails.TextTokens)*tieredInputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
// Generic input tokens
|
||
if usage != nil && usage.PromptTokens > 0 {
|
||
return float64(usage.PromptTokens) * tieredInputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
// Per-second duration fallback
|
||
if audioSeconds != nil && *audioSeconds > 0 {
|
||
if rate := tieredAudioInputPerSecondRate(pricing, totalTokens); rate > 0 {
|
||
return float64(*audioSeconds) * rate
|
||
}
|
||
}
|
||
|
||
return 0
|
||
}
|
||
|
||
// computeAudioOutputCost calculates output cost for audio: audio tokens first,
|
||
// then generic output tokens, then per-second duration fallback.
|
||
func computeAudioOutputCost(pricing *configstoreTables.TableModelPricing, usage *schemas.BifrostLLMUsage, audioSeconds *int, totalTokens int, tier serviceTier) float64 {
|
||
// Audio-specific output tokens
|
||
if usage != nil && usage.CompletionTokens > 0 {
|
||
return float64(usage.CompletionTokens) * tieredAudioTokenOutputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
// Per-second duration fallback
|
||
if audioSeconds != nil && *audioSeconds > 0 {
|
||
if pricing.OutputCostPerSecond != nil {
|
||
return float64(*audioSeconds) * *pricing.OutputCostPerSecond
|
||
}
|
||
}
|
||
|
||
return 0
|
||
}
|
||
|
||
// computeImageCost handles image generation requests.
|
||
// Input and output are calculated independently — each tries token-based pricing first,
|
||
// then per-pixel pricing, falling back to per-image count pricing.
|
||
// imageQuality must be one of "low", "medium", "high", "auto" to use quality-specific rates; other values use base rates.
|
||
func computeImageCost(pricing *configstoreTables.TableModelPricing, imageUsage *schemas.ImageUsage, imageSize string, imageQuality string, tier serviceTier) float64 {
|
||
if imageUsage == nil {
|
||
return 0
|
||
}
|
||
|
||
totalTokens := imageUsage.TotalTokens
|
||
pixels := parseImagePixels(imageSize)
|
||
inputCost := computeImageInputCost(pricing, imageUsage, totalTokens, pixels, tier)
|
||
outputCost := computeImageOutputCost(pricing, imageUsage, totalTokens, pixels, imageQuality, tier)
|
||
|
||
return inputCost + outputCost
|
||
}
|
||
|
||
// computeImageInputCost calculates input cost: tokens first, then per-pixel, then per-image count fallback.
|
||
func computeImageInputCost(pricing *configstoreTables.TableModelPricing, imageUsage *schemas.ImageUsage, totalTokens int, pixels int, tier serviceTier) float64 {
|
||
// Try token-based pricing first
|
||
var inputTextTokens, inputImageTokens int
|
||
if imageUsage.InputTokensDetails != nil {
|
||
inputImageTokens = imageUsage.InputTokensDetails.ImageTokens
|
||
inputTextTokens = imageUsage.InputTokensDetails.TextTokens
|
||
} else {
|
||
inputTextTokens = imageUsage.InputTokens
|
||
}
|
||
|
||
if inputTextTokens > 0 || inputImageTokens > 0 {
|
||
return float64(inputTextTokens)*tieredInputRate(pricing, totalTokens, tier) +
|
||
float64(inputImageTokens)*tieredImageInputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
// Per-pixel pricing fallback
|
||
if pricing.InputCostPerPixel != nil && pixels > 0 && imageUsage.NumInputImages > 0 {
|
||
return float64(pixels*imageUsage.NumInputImages) * *pricing.InputCostPerPixel
|
||
}
|
||
|
||
// Fall back to per-image count pricing
|
||
if pricing.InputCostPerImage != nil && imageUsage.NumInputImages > 0 {
|
||
return float64(imageUsage.NumInputImages) * *pricing.InputCostPerImage
|
||
}
|
||
|
||
return 0
|
||
}
|
||
|
||
// computeImageOutputCost calculates output cost: tokens first, then per-pixel, then per-image count fallback.
|
||
// imageQuality: "low", "medium", "high", "auto" use quality-specific rates when available; other values use base/size-tier rates.
|
||
func computeImageOutputCost(pricing *configstoreTables.TableModelPricing, imageUsage *schemas.ImageUsage, totalTokens int, pixels int, imageQuality string, tier serviceTier) float64 {
|
||
// Try token-based pricing first
|
||
var outputTextTokens, outputImageTokens int
|
||
if imageUsage.OutputTokensDetails != nil {
|
||
outputImageTokens = imageUsage.OutputTokensDetails.ImageTokens
|
||
outputTextTokens = imageUsage.OutputTokensDetails.TextTokens
|
||
} else {
|
||
outputImageTokens = imageUsage.OutputTokens
|
||
}
|
||
|
||
if outputTextTokens > 0 || outputImageTokens > 0 {
|
||
return float64(outputTextTokens)*tieredOutputRate(pricing, totalTokens, tier) +
|
||
float64(outputImageTokens)*tieredImageOutputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
// Per-pixel pricing fallback
|
||
if pricing.OutputCostPerPixel != nil && pixels > 0 {
|
||
numOutputImages := 1
|
||
if imageUsage.OutputTokensDetails != nil && imageUsage.OutputTokensDetails.NImages > 0 {
|
||
numOutputImages = imageUsage.OutputTokensDetails.NImages
|
||
}
|
||
return float64(pixels*numOutputImages) * *pricing.OutputCostPerPixel
|
||
}
|
||
|
||
// Fall back to per-image count pricing with size-tier selection
|
||
// TODO: handle premium image flag when it becomes available in imageUsage
|
||
numOutputImages := 1
|
||
if imageUsage.OutputTokensDetails != nil && imageUsage.OutputTokensDetails.NImages > 0 {
|
||
numOutputImages = imageUsage.OutputTokensDetails.NImages
|
||
}
|
||
var perImageRate *float64
|
||
q := imageQuality
|
||
if q == "" {
|
||
q = "auto"
|
||
}
|
||
switch q {
|
||
case "low":
|
||
if pricing.OutputCostPerImageLowQuality != nil {
|
||
perImageRate = pricing.OutputCostPerImageLowQuality
|
||
}
|
||
case "medium":
|
||
if pricing.OutputCostPerImageMediumQuality != nil {
|
||
perImageRate = pricing.OutputCostPerImageMediumQuality
|
||
}
|
||
case "high":
|
||
if pricing.OutputCostPerImageHighQuality != nil {
|
||
perImageRate = pricing.OutputCostPerImageHighQuality
|
||
}
|
||
case "auto":
|
||
if pricing.OutputCostPerImageAutoQuality != nil {
|
||
perImageRate = pricing.OutputCostPerImageAutoQuality
|
||
}
|
||
}
|
||
if perImageRate == nil {
|
||
const pixels512x512 = 512 * 512
|
||
const pixels1024x1024 = 1024 * 1024
|
||
const pixels2048x2048 = 2048 * 2048
|
||
const pixels4096x4096 = 4096 * 4096
|
||
switch {
|
||
case pixels >= pixels4096x4096 && pricing.OutputCostPerImageAbove4096x4096Pixels != nil:
|
||
perImageRate = pricing.OutputCostPerImageAbove4096x4096Pixels
|
||
case pixels >= pixels2048x2048 && pricing.OutputCostPerImageAbove2048x2048Pixels != nil:
|
||
perImageRate = pricing.OutputCostPerImageAbove2048x2048Pixels
|
||
case pixels >= pixels1024x1024 && pricing.OutputCostPerImageAbove1024x1024Pixels != nil:
|
||
perImageRate = pricing.OutputCostPerImageAbove1024x1024Pixels
|
||
case pixels >= pixels512x512 && pricing.OutputCostPerImageAbove512x512Pixels != nil:
|
||
perImageRate = pricing.OutputCostPerImageAbove512x512Pixels
|
||
default:
|
||
perImageRate = pricing.OutputCostPerImage
|
||
}
|
||
}
|
||
if perImageRate != nil {
|
||
return float64(numOutputImages) * *perImageRate
|
||
}
|
||
|
||
return 0
|
||
}
|
||
|
||
// computeVideoCost handles video generation requests.
|
||
// Input and output are calculated independently — tokens first, then per-second fallback.
|
||
func computeVideoCost(pricing *configstoreTables.TableModelPricing, usage *schemas.BifrostLLMUsage, videoSeconds *int, tier serviceTier) float64 {
|
||
totalTokens := safeTotalTokens(usage)
|
||
|
||
// Input: text prompt tokens first, then per-second fallback
|
||
inputCost := 0.0
|
||
if usage != nil && usage.PromptTokens > 0 {
|
||
inputCost = float64(usage.PromptTokens) * tieredInputRate(pricing, totalTokens, tier)
|
||
} else if videoSeconds != nil && *videoSeconds > 0 {
|
||
if rate := tieredVideoInputPerSecondRate(pricing, totalTokens); rate > 0 {
|
||
inputCost = float64(*videoSeconds) * rate
|
||
}
|
||
}
|
||
|
||
// Output: completion tokens first, then per-second fallback
|
||
outputCost := 0.0
|
||
if usage != nil && usage.CompletionTokens > 0 {
|
||
outputCost = float64(usage.CompletionTokens) * tieredOutputRate(pricing, totalTokens, tier)
|
||
} else if videoSeconds != nil && *videoSeconds > 0 {
|
||
if pricing.OutputCostPerVideoPerSecond != nil {
|
||
outputCost = float64(*videoSeconds) * *pricing.OutputCostPerVideoPerSecond
|
||
} else if pricing.OutputCostPerSecond != nil {
|
||
outputCost = float64(*videoSeconds) * *pricing.OutputCostPerSecond
|
||
}
|
||
}
|
||
|
||
return inputCost + outputCost
|
||
}
|
||
|
||
// computeOCRCost handles OCR requests, billing per page processed.
|
||
// ocr_cost_per_page covers base processing; annotation_cost_per_page is added when set.
|
||
func computeOCRCost(pricing *configstoreTables.TableModelPricing, ocrProcessedPages *int, ocrIsAnnotated *bool) float64 {
|
||
if ocrProcessedPages == nil {
|
||
return 0
|
||
}
|
||
pages := float64(*ocrProcessedPages)
|
||
cost := 0.0
|
||
if pricing.OCRCostPerPage != nil {
|
||
cost += pages * *pricing.OCRCostPerPage
|
||
}
|
||
if ocrIsAnnotated != nil && *ocrIsAnnotated && pricing.AnnotationCostPerPage != nil {
|
||
cost += pages * *pricing.AnnotationCostPerPage
|
||
}
|
||
return cost
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Helpers
|
||
// ---------------------------------------------------------------------------
|
||
|
||
// tierFromString constructs a serviceTier from an OpenAI service_tier response value.
|
||
func tierFromString(s *string) serviceTier {
|
||
if s == nil {
|
||
return serviceTier{}
|
||
}
|
||
switch *s {
|
||
case "priority":
|
||
return serviceTier{isPriority: true}
|
||
case "flex":
|
||
return serviceTier{isFlex: true}
|
||
default:
|
||
return serviceTier{}
|
||
}
|
||
}
|
||
|
||
// tieredInputRate returns the effective per-token input rate based on total token count.
|
||
// Flex applies a flat rate. Priority-specific tier rates are preferred where available.
|
||
func tieredInputRate(pricing *configstoreTables.TableModelPricing, totalTokens int, tier serviceTier) float64 {
|
||
if tier.isFlex && pricing.InputCostPerTokenFlex != nil {
|
||
return *pricing.InputCostPerTokenFlex
|
||
}
|
||
if totalTokens > TokenTierAbove272K {
|
||
if tier.isPriority && pricing.InputCostPerTokenAbove272kTokensPriority != nil {
|
||
return *pricing.InputCostPerTokenAbove272kTokensPriority
|
||
}
|
||
if pricing.InputCostPerTokenAbove272kTokens != nil {
|
||
return *pricing.InputCostPerTokenAbove272kTokens
|
||
}
|
||
}
|
||
if totalTokens > TokenTierAbove200K {
|
||
if tier.isPriority && pricing.InputCostPerTokenAbove200kTokensPriority != nil {
|
||
return *pricing.InputCostPerTokenAbove200kTokensPriority
|
||
}
|
||
if pricing.InputCostPerTokenAbove200kTokens != nil {
|
||
return *pricing.InputCostPerTokenAbove200kTokens
|
||
}
|
||
}
|
||
if totalTokens > TokenTierAbove128K && pricing.InputCostPerTokenAbove128kTokens != nil {
|
||
return *pricing.InputCostPerTokenAbove128kTokens
|
||
}
|
||
if tier.isPriority && pricing.InputCostPerTokenPriority != nil {
|
||
return *pricing.InputCostPerTokenPriority
|
||
}
|
||
if pricing.InputCostPerToken != nil {
|
||
return *pricing.InputCostPerToken
|
||
}
|
||
return 0
|
||
}
|
||
|
||
// tieredOutputRate returns the effective per-token output rate based on total token count.
|
||
// Flex applies a flat rate. Priority-specific tier rates are preferred where available.
|
||
func tieredOutputRate(pricing *configstoreTables.TableModelPricing, totalTokens int, tier serviceTier) float64 {
|
||
if tier.isFlex && pricing.OutputCostPerTokenFlex != nil {
|
||
return *pricing.OutputCostPerTokenFlex
|
||
}
|
||
if totalTokens > TokenTierAbove272K {
|
||
if tier.isPriority && pricing.OutputCostPerTokenAbove272kTokensPriority != nil {
|
||
return *pricing.OutputCostPerTokenAbove272kTokensPriority
|
||
}
|
||
if pricing.OutputCostPerTokenAbove272kTokens != nil {
|
||
return *pricing.OutputCostPerTokenAbove272kTokens
|
||
}
|
||
}
|
||
if totalTokens > TokenTierAbove200K {
|
||
if tier.isPriority && pricing.OutputCostPerTokenAbove200kTokensPriority != nil {
|
||
return *pricing.OutputCostPerTokenAbove200kTokensPriority
|
||
}
|
||
if pricing.OutputCostPerTokenAbove200kTokens != nil {
|
||
return *pricing.OutputCostPerTokenAbove200kTokens
|
||
}
|
||
}
|
||
if totalTokens > TokenTierAbove128K && pricing.OutputCostPerTokenAbove128kTokens != nil {
|
||
return *pricing.OutputCostPerTokenAbove128kTokens
|
||
}
|
||
|
||
if tier.isPriority && pricing.OutputCostPerTokenPriority != nil {
|
||
return *pricing.OutputCostPerTokenPriority
|
||
}
|
||
|
||
if pricing.OutputCostPerToken != nil {
|
||
return *pricing.OutputCostPerToken
|
||
}
|
||
|
||
return 0
|
||
}
|
||
|
||
// tieredImageInputRate returns the effective rate for image tokens on the input side.
|
||
// Falls back to the general tieredInputRate when no image-specific rate is configured.
|
||
func tieredImageInputRate(pricing *configstoreTables.TableModelPricing, totalTokens int, tier serviceTier) float64 {
|
||
if totalTokens > TokenTierAbove128K && pricing.InputCostPerImageAbove128kTokens != nil {
|
||
return *pricing.InputCostPerImageAbove128kTokens
|
||
}
|
||
if pricing.InputCostPerImageToken != nil {
|
||
return *pricing.InputCostPerImageToken
|
||
}
|
||
return tieredInputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
// tieredImageOutputRate returns the effective rate for image tokens on the output side.
|
||
// Falls back to the general tieredOutputRate when no image-specific rate is configured.
|
||
func tieredImageOutputRate(pricing *configstoreTables.TableModelPricing, totalTokens int, tier serviceTier) float64 {
|
||
if pricing.OutputCostPerImageToken != nil {
|
||
return *pricing.OutputCostPerImageToken
|
||
}
|
||
return tieredOutputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
// tieredAudioInputPerSecondRate returns the effective per-second rate for audio input.
|
||
func tieredAudioInputPerSecondRate(pricing *configstoreTables.TableModelPricing, totalTokens int) float64 {
|
||
if totalTokens > TokenTierAbove128K && pricing.InputCostPerAudioPerSecondAbove128kTokens != nil {
|
||
return *pricing.InputCostPerAudioPerSecondAbove128kTokens
|
||
}
|
||
if pricing.InputCostPerAudioPerSecond != nil {
|
||
return *pricing.InputCostPerAudioPerSecond
|
||
}
|
||
if pricing.InputCostPerSecond != nil {
|
||
return *pricing.InputCostPerSecond
|
||
}
|
||
return 0
|
||
}
|
||
|
||
// tieredVideoInputPerSecondRate returns the effective per-second rate for video input.
|
||
func tieredVideoInputPerSecondRate(pricing *configstoreTables.TableModelPricing, totalTokens int) float64 {
|
||
if totalTokens > TokenTierAbove128K && pricing.InputCostPerVideoPerSecondAbove128kTokens != nil {
|
||
return *pricing.InputCostPerVideoPerSecondAbove128kTokens
|
||
}
|
||
if pricing.InputCostPerVideoPerSecond != nil {
|
||
return *pricing.InputCostPerVideoPerSecond
|
||
}
|
||
return 0
|
||
}
|
||
|
||
// tieredAudioTokenInputRate returns the effective per-token rate for audio input tokens.
|
||
// Falls back to the general tieredInputRate when no audio-specific rate is configured.
|
||
func tieredAudioTokenInputRate(pricing *configstoreTables.TableModelPricing, totalTokens int, tier serviceTier) float64 {
|
||
if pricing.InputCostPerAudioToken != nil {
|
||
return *pricing.InputCostPerAudioToken
|
||
}
|
||
return tieredInputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
// tieredAudioTokenOutputRate returns the effective per-token rate for audio output tokens.
|
||
// Falls back to the general tieredOutputRate when no audio-specific rate is configured.
|
||
func tieredAudioTokenOutputRate(pricing *configstoreTables.TableModelPricing, totalTokens int, tier serviceTier) float64 {
|
||
if pricing.OutputCostPerAudioToken != nil {
|
||
return *pricing.OutputCostPerAudioToken
|
||
}
|
||
return tieredOutputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
func tieredCacheReadInputTokenRate(pricing *configstoreTables.TableModelPricing, totalTokens int, tier serviceTier) float64 {
|
||
if tier.isFlex && pricing.CacheReadInputTokenCostFlex != nil {
|
||
return *pricing.CacheReadInputTokenCostFlex
|
||
}
|
||
if totalTokens > TokenTierAbove272K {
|
||
if tier.isPriority && pricing.CacheReadInputTokenCostAbove272kTokensPriority != nil {
|
||
return *pricing.CacheReadInputTokenCostAbove272kTokensPriority
|
||
}
|
||
if pricing.CacheReadInputTokenCostAbove272kTokens != nil {
|
||
return *pricing.CacheReadInputTokenCostAbove272kTokens
|
||
}
|
||
}
|
||
if totalTokens > TokenTierAbove200K {
|
||
if tier.isPriority && pricing.CacheReadInputTokenCostAbove200kTokensPriority != nil {
|
||
return *pricing.CacheReadInputTokenCostAbove200kTokensPriority
|
||
}
|
||
if pricing.CacheReadInputTokenCostAbove200kTokens != nil {
|
||
return *pricing.CacheReadInputTokenCostAbove200kTokens
|
||
}
|
||
}
|
||
if tier.isPriority && pricing.CacheReadInputTokenCostPriority != nil {
|
||
return *pricing.CacheReadInputTokenCostPriority
|
||
}
|
||
if pricing.CacheReadInputTokenCost != nil {
|
||
return *pricing.CacheReadInputTokenCost
|
||
}
|
||
return tieredInputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
// Note: flex tier is not checked here because cache creation is not a concept in
|
||
// OpenAI's pricing model (the only provider that uses flex tier). Only cache read
|
||
// has a flex-specific rate.
|
||
func tieredCacheCreationInputTokenRate(pricing *configstoreTables.TableModelPricing, totalTokens int, tier serviceTier) float64 {
|
||
if totalTokens > TokenTierAbove200K && pricing.CacheCreationInputTokenCostAbove200kTokens != nil {
|
||
return *pricing.CacheCreationInputTokenCostAbove200kTokens
|
||
}
|
||
if pricing.CacheCreationInputTokenCost != nil {
|
||
return *pricing.CacheCreationInputTokenCost
|
||
}
|
||
return tieredInputRate(pricing, totalTokens, tier)
|
||
}
|
||
|
||
func safeTotalTokens(usage *schemas.BifrostLLMUsage) int {
|
||
if usage == nil {
|
||
return 0
|
||
}
|
||
return usage.TotalTokens
|
||
}
|
||
|
||
// parseImagePixels parses a size string like "1024x1024" into total pixel count.
|
||
// Returns 0 if the size string is empty or malformed.
|
||
func parseImagePixels(size string) int {
|
||
if size == "" {
|
||
return 0
|
||
}
|
||
parts := strings.SplitN(size, "x", 2)
|
||
if len(parts) != 2 {
|
||
return 0
|
||
}
|
||
w, err := strconv.Atoi(parts[0])
|
||
if err != nil || w <= 0 {
|
||
return 0
|
||
}
|
||
h, err := strconv.Atoi(parts[1])
|
||
if err != nil || h <= 0 {
|
||
return 0
|
||
}
|
||
return w * h
|
||
}
|
||
|
||
// populateOutputImageCount sets the output image count on ImageUsage from len(Data)
|
||
// when OutputTokensDetails.NImages is not already populated.
|
||
func populateOutputImageCount(imageUsage *schemas.ImageUsage, dataLen int) {
|
||
if imageUsage == nil || dataLen == 0 {
|
||
return
|
||
}
|
||
if imageUsage.OutputTokensDetails == nil {
|
||
imageUsage.OutputTokensDetails = &schemas.ImageTokenDetails{}
|
||
}
|
||
if imageUsage.OutputTokensDetails.NImages == 0 {
|
||
imageUsage.OutputTokensDetails.NImages = dataLen
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Pricing resolution
|
||
// ---------------------------------------------------------------------------
|
||
|
||
// resolvePricing resolves the pricing entry for a model, trying deployment as fallback.
|
||
func (mc *ModelCatalog) resolvePricing(provider, originalModelRequested, resolvedModelUsed string, requestType schemas.RequestType, scopes PricingLookupScopes) *configstoreTables.TableModelPricing {
|
||
if resolvedModelUsed == "" {
|
||
resolvedModelUsed = originalModelRequested
|
||
}
|
||
mc.logger.Debug("looking up pricing for resolved model %s and provider %s of request type %s", resolvedModelUsed, provider, normalizeRequestType(requestType))
|
||
|
||
if scopes.Provider == "" {
|
||
scopes.Provider = provider
|
||
}
|
||
|
||
base, exists := mc.getBasePricing(resolvedModelUsed, provider, requestType)
|
||
if exists && base != nil {
|
||
result, _ := mc.applyPricingOverrides(resolvedModelUsed, requestType, *base, scopes)
|
||
return &result
|
||
}
|
||
|
||
mc.logger.Debug("pricing not found for resolved model %s, trying alias %s", resolvedModelUsed, originalModelRequested)
|
||
base, exists = mc.getBasePricing(originalModelRequested, provider, requestType)
|
||
if exists && base != nil {
|
||
// Apply overrides using the resolved model name, not the alias
|
||
result, _ := mc.applyPricingOverrides(resolvedModelUsed, requestType, *base, scopes)
|
||
return &result
|
||
}
|
||
|
||
// No base catalog entry found; still try overrides in case the user defined
|
||
// override-only pricing for a model not in the built-in catalog.
|
||
mc.logger.Debug("pricing not found for resolved model %s and provider %s, trying override-only pricing", resolvedModelUsed, provider)
|
||
result, applied := mc.applyPricingOverrides(resolvedModelUsed, requestType, configstoreTables.TableModelPricing{}, scopes)
|
||
if applied {
|
||
return &result
|
||
}
|
||
mc.logger.Debug("no pricing found for resolved model %s and provider %s, skipping cost calculation", resolvedModelUsed, provider)
|
||
return nil
|
||
}
|
||
|
||
// getBasePricing looks up catalog pricing for the given model, provider, and request type.
|
||
// It applies a provider-specific fallback chain when an exact match is not found:
|
||
//
|
||
// - Gemini: retries under the "vertex" provider, then falls back to chat mode for Responses requests.
|
||
// - Vertex: strips the "provider/model" prefix and retries, then falls back to chat mode for Responses requests.
|
||
// - Bedrock: prepends the "anthropic." namespace for Claude models, then falls back to chat mode for Responses requests.
|
||
// - All providers: for Responses/ResponsesStream requests, retries the lookup in chat mode.
|
||
// - All providers: for ImageEdit/ImageVariation requests, retries the lookup in image-generation mode.
|
||
//
|
||
// The method acquires a read lock for the duration of the lookup.
|
||
//
|
||
// Input: model — exact model name to look up.
|
||
//
|
||
// provider — provider identifier (e.g. "openai", "anthropic").
|
||
// requestType — the request type used to derive the pricing mode.
|
||
//
|
||
// Output: TableModelPricing — the matched pricing row (zero value when not found).
|
||
//
|
||
// bool — true when a pricing entry was found, false otherwise.
|
||
func (mc *ModelCatalog) getBasePricing(model, provider string, requestType schemas.RequestType) (*configstoreTables.TableModelPricing, bool) {
|
||
mc.mu.RLock()
|
||
defer mc.mu.RUnlock()
|
||
|
||
mode := normalizeRequestType(requestType)
|
||
|
||
pricing, ok := mc.pricingData[makeKey(model, provider, mode)]
|
||
if ok {
|
||
return &pricing, true
|
||
}
|
||
|
||
// Lookup in vertex if gemini not found
|
||
if provider == string(schemas.Gemini) {
|
||
mc.logger.Debug("primary lookup failed, trying vertex provider for the same model")
|
||
pricing, ok = mc.pricingData[makeKey(model, "vertex", mode)]
|
||
if ok {
|
||
return &pricing, true
|
||
}
|
||
|
||
// Lookup in chat if responses not found
|
||
if requestType == schemas.ResponsesRequest || requestType == schemas.ResponsesStreamRequest || requestType == schemas.RealtimeRequest {
|
||
mc.logger.Debug("secondary lookup failed, trying vertex provider for the same model in chat completion")
|
||
pricing, ok = mc.pricingData[makeKey(model, "vertex", normalizeRequestType(schemas.ChatCompletionRequest))]
|
||
if ok {
|
||
return &pricing, true
|
||
}
|
||
}
|
||
}
|
||
|
||
if provider == string(schemas.Vertex) {
|
||
// Vertex models can be of the form "provider/model", so try to lookup the model without the provider prefix and keep the original provider
|
||
if strings.Contains(model, "/") {
|
||
modelWithoutProvider := strings.SplitN(model, "/", 2)[1]
|
||
mc.logger.Debug("primary lookup failed, trying vertex provider for the same model with provider/model format %s", modelWithoutProvider)
|
||
pricing, ok = mc.pricingData[makeKey(modelWithoutProvider, "vertex", mode)]
|
||
if ok {
|
||
return &pricing, true
|
||
}
|
||
|
||
// Lookup in chat if responses not found
|
||
if requestType == schemas.ResponsesRequest || requestType == schemas.ResponsesStreamRequest || requestType == schemas.RealtimeRequest {
|
||
mc.logger.Debug("secondary lookup failed, trying vertex provider for the same model in chat completion")
|
||
pricing, ok = mc.pricingData[makeKey(modelWithoutProvider, "vertex", normalizeRequestType(schemas.ChatCompletionRequest))]
|
||
if ok {
|
||
return &pricing, true
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if provider == string(schemas.Bedrock) {
|
||
// If model is claude without "anthropic." prefix, try with "anthropic." prefix
|
||
if !strings.Contains(model, "anthropic.") && schemas.IsAnthropicModel(model) {
|
||
mc.logger.Debug("primary lookup failed, trying with anthropic. prefix for the same model")
|
||
pricing, ok = mc.pricingData[makeKey("anthropic."+model, provider, mode)]
|
||
if ok {
|
||
return &pricing, true
|
||
}
|
||
|
||
// Lookup in chat if responses not found
|
||
if requestType == schemas.ResponsesRequest || requestType == schemas.ResponsesStreamRequest || requestType == schemas.RealtimeRequest {
|
||
mc.logger.Debug("secondary lookup failed, trying chat provider for the same model in chat completion")
|
||
pricing, ok = mc.pricingData[makeKey("anthropic."+model, provider, normalizeRequestType(schemas.ChatCompletionRequest))]
|
||
if ok {
|
||
return &pricing, true
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Lookup in chat if responses not found
|
||
if requestType == schemas.ResponsesRequest || requestType == schemas.ResponsesStreamRequest || requestType == schemas.RealtimeRequest {
|
||
mc.logger.Debug("primary lookup failed, trying chat provider for the same model in chat completion")
|
||
pricing, ok = mc.pricingData[makeKey(model, provider, normalizeRequestType(schemas.ChatCompletionRequest))]
|
||
if ok {
|
||
return &pricing, true
|
||
}
|
||
}
|
||
|
||
// Lookup in image generation if image edit not found
|
||
if requestType == schemas.ImageEditRequest ||
|
||
requestType == schemas.ImageEditStreamRequest ||
|
||
requestType == schemas.ImageVariationRequest {
|
||
mc.logger.Debug("primary lookup failed, trying image generation provider for the same model")
|
||
pricing, ok = mc.pricingData[makeKey(model, provider, normalizeRequestType(schemas.ImageGenerationRequest))]
|
||
if ok {
|
||
return &pricing, true
|
||
}
|
||
}
|
||
|
||
return nil, false
|
||
}
|