package modelcatalog import ( "testing" bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) // --------------------------------------------------------------------------- // helpers // --------------------------------------------------------------------------- // chatPricing returns a TableModelPricing with the given per-token rates. func chatPricing(input, output float64) configstoreTables.TableModelPricing { return configstoreTables.TableModelPricing{ Model: "test-model", Provider: "test-provider", Mode: "chat", InputCostPerToken: bifrost.Ptr(input), OutputCostPerToken: bifrost.Ptr(output), } } // testCatalogWithPricing creates a catalog pre-loaded with the given pricing entries. func testCatalogWithPricing(entries map[string]configstoreTables.TableModelPricing) *ModelCatalog { mc := newTestCatalog(nil, nil) mc.logger = noOpLogger{} for k, v := range entries { mc.pricingData[k] = v } return mc } // makeChatResponse builds a minimal BifrostResponse for a chat completion. func makeChatResponse(provider schemas.ModelProvider, model string, usage *schemas.BifrostLLMUsage) *schemas.BifrostResponse { return &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ Usage: usage, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ChatCompletionRequest, Provider: provider, OriginalModelRequested: model, }, }, } } // makeEmbeddingResponse builds a minimal BifrostResponse for an embedding request. func makeEmbeddingResponse(provider schemas.ModelProvider, model string, usage *schemas.BifrostLLMUsage) *schemas.BifrostResponse { return &schemas.BifrostResponse{ EmbeddingResponse: &schemas.BifrostEmbeddingResponse{ Usage: usage, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.EmbeddingRequest, Provider: provider, OriginalModelRequested: model, }, }, } } // makeRerankResponse builds a minimal BifrostResponse for a rerank request. func makeRerankResponse(provider schemas.ModelProvider, model string, usage *schemas.BifrostLLMUsage) *schemas.BifrostResponse { return &schemas.BifrostResponse{ RerankResponse: &schemas.BifrostRerankResponse{ Usage: usage, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.RerankRequest, Provider: provider, OriginalModelRequested: model, }, }, } } // makeImageResponse builds a minimal BifrostResponse for an image generation request. func makeImageResponse(provider schemas.ModelProvider, model string, usage *schemas.ImageUsage) *schemas.BifrostResponse { return &schemas.BifrostResponse{ ImageGenerationResponse: &schemas.BifrostImageGenerationResponse{ Usage: usage, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ImageGenerationRequest, Provider: provider, OriginalModelRequested: model, }, }, } } func derefF(f *float64) float64 { if f == nil { return 0 } return *f } // ========================================================================= // 1. computeTextCost — unit tests (pure function, no catalog) // ========================================================================= func TestComputeTextCost_BasicInputOutput(t *testing.T) { // GPT-4o: $5/M input, $15/M output p := chatPricing(0.000005, 0.000015) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, } cost := computeTextCost(&p, usage, serviceTier{}) // 1000 * 0.000005 + 500 * 0.000015 = 0.005 + 0.0075 = 0.0125 assert.InDelta(t, 0.0125, cost, 1e-12) } func TestComputeTextCost_NilUsage(t *testing.T) { p := chatPricing(0.000005, 0.000015) assert.Equal(t, 0.0, computeTextCost(&p, nil, serviceTier{})) } func TestComputeTextCost_ZeroTokens(t *testing.T) { p := chatPricing(0.000005, 0.000015) usage := &schemas.BifrostLLMUsage{} assert.Equal(t, 0.0, computeTextCost(&p, usage, serviceTier{})) } func TestComputeTextCost_WithCachedPromptTokens(t *testing.T) { // Claude 3.5 Sonnet (Bedrock): input=$3/M, output=$15/M, cache_read=$0.3/M, cache_creation=$3.75/M p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCost = bifrost.Ptr(0.0000003) p.CacheCreationInputTokenCost = bifrost.Ptr(0.00000375) usage := &schemas.BifrostLLMUsage{ PromptTokens: 2000, CompletionTokens: 500, TotalTokens: 2500, PromptTokensDetails: &schemas.ChatPromptTokensDetails{ CachedReadTokens: 1500, // 1500 read from cache CachedWriteTokens: 200, // 200 cache creation tokens }, } cost := computeTextCost(&p, usage, serviceTier{}) // Both cached read and write tokens are input-side deductions from promptTokens. // Input: (2000-1500-200)*0.000003 + 1500*0.0000003 + 200*0.00000375 = 0.0009 + 0.00045 + 0.00075 = 0.0021 // Output: 500*0.000015 = 0.0075 // Total: 0.0021 + 0.0075 = 0.0096 assert.InDelta(t, 0.0096, cost, 1e-12) } func TestComputeTextCost_Tiered200k(t *testing.T) { // Claude 3.5 Sonnet Bedrock 200k tier: input=$6/M, output=$30/M p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenAbove200kTokens = bifrost.Ptr(0.000006) p.OutputCostPerTokenAbove200kTokens = bifrost.Ptr(0.00003) usage := &schemas.BifrostLLMUsage{ PromptTokens: 180000, CompletionTokens: 30000, TotalTokens: 210000, // Above 200k threshold } cost := computeTextCost(&p, usage, serviceTier{}) // Uses tiered rate since total > 200k // 180000 * 0.000006 + 30000 * 0.00003 = 1.08 + 0.90 = 1.98 assert.InDelta(t, 1.98, cost, 1e-9) } func TestComputeTextCost_Below200kUsesBaseRate(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenAbove200kTokens = bifrost.Ptr(0.000006) p.OutputCostPerTokenAbove200kTokens = bifrost.Ptr(0.00003) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, // Below 200k } cost := computeTextCost(&p, usage, serviceTier{}) // Uses base rate since total < 200k // 1000 * 0.000003 + 500 * 0.000015 = 0.003 + 0.0075 = 0.0105 assert.InDelta(t, 0.0105, cost, 1e-12) } func TestComputeTextCost_Tiered272k(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenAbove200kTokens = new(0.000006) p.OutputCostPerTokenAbove200kTokens = new(0.00003) p.InputCostPerTokenAbove272kTokens = new(0.000009) p.OutputCostPerTokenAbove272kTokens = new(0.000045) usage := &schemas.BifrostLLMUsage{ PromptTokens: 250000, CompletionTokens: 30000, TotalTokens: 280000, // Above 272k threshold } cost := computeTextCost(&p, usage, serviceTier{}) // Uses 272k tiered rate since total > 272k // 250000 * 0.000009 + 30000 * 0.000045 = 2.25 + 1.35 = 3.60 assert.InDelta(t, 3.60, cost, 1e-9) } func TestComputeTextCost_Between200kAnd272kUses200kRate(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenAbove200kTokens = new(0.000006) p.OutputCostPerTokenAbove200kTokens = new(0.00003) p.InputCostPerTokenAbove272kTokens = new(0.000009) p.OutputCostPerTokenAbove272kTokens = new(0.000045) usage := &schemas.BifrostLLMUsage{ PromptTokens: 200000, CompletionTokens: 30000, TotalTokens: 230000, // Between 200k and 272k } cost := computeTextCost(&p, usage, serviceTier{}) // Uses 200k tiered rate since total > 200k but <= 272k // 200000 * 0.000006 + 30000 * 0.00003 = 1.20 + 0.90 = 2.10 assert.InDelta(t, 2.10, cost, 1e-9) } func TestComputeTextCost_272kTierWithCacheRead(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenAbove272kTokens = new(0.000009) p.OutputCostPerTokenAbove272kTokens = new(0.000045) p.CacheReadInputTokenCost = new(0.0000003) p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009) usage := &schemas.BifrostLLMUsage{ PromptTokens: 250000, CompletionTokens: 30000, TotalTokens: 280000, // Above 272k PromptTokensDetails: &schemas.ChatPromptTokensDetails{ CachedReadTokens: 50000, }, } cost := computeTextCost(&p, usage, serviceTier{}) // Non-cached input: (250000-50000) * 0.000009 = 200000 * 0.000009 = 1.80 // Cached read: 50000 * 0.0000009 = 0.045 // Output: 30000 * 0.000045 = 1.35 // Total: 1.80 + 0.045 + 1.35 = 3.195 assert.InDelta(t, 3.195, cost, 1e-9) } func TestComputeTextCost_SearchQueryCost(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.SearchContextCostPerQuery = bifrost.Ptr(0.01) // $0.01 per search query numQueries := 3 usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, CompletionTokensDetails: &schemas.ChatCompletionTokensDetails{ NumSearchQueries: &numQueries, }, } cost := computeTextCost(&p, usage, serviceTier{}) // 1000*0.000003 + 500*0.000015 + 3*0.01 = 0.003 + 0.0075 + 0.03 = 0.0405 assert.InDelta(t, 0.0405, cost, 1e-12) } func TestComputeTextCost_NoCacheRateFallsBackToBaseInputRate(t *testing.T) { // If cache rate fields are nil, tieredCacheReadInputTokenRate falls back to base InputCostPerToken p := chatPricing(0.000005, 0.000015) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, PromptTokensDetails: &schemas.ChatPromptTokensDetails{ CachedReadTokens: 400, }, } cost := computeTextCost(&p, usage, serviceTier{}) // Non-cached prompt: (1000-400)*0.000005 = 600*0.000005 = 0.003 // Cached prompt: 400 tokens at base input rate (no cache rate set) = 400*0.000005 = 0.002 // Output: 500*0.000015 = 0.0075 // Total: 0.003 + 0.002 + 0.0075 = 0.0125 assert.InDelta(t, 0.0125, cost, 1e-12) } // ========================================================================= // 2. computeEmbeddingCost — unit tests // ========================================================================= func TestComputeEmbeddingCost_Basic(t *testing.T) { // Titan Embed Text v1: $0.1/M input p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.0000001), OutputCostPerToken: bifrost.Ptr(0.0), } usage := &schemas.BifrostLLMUsage{ PromptTokens: 5000, TotalTokens: 5000, } cost := computeEmbeddingCost(&p, usage, serviceTier{}) // 5000 * 0.0000001 = 0.0005 assert.InDelta(t, 0.0005, cost, 1e-12) } func TestComputeEmbeddingCost_NilUsage(t *testing.T) { p := configstoreTables.TableModelPricing{InputCostPerToken: new(0.0000001)} assert.Equal(t, 0.0, computeEmbeddingCost(&p, nil, serviceTier{})) } // ========================================================================= // 3. computeRerankCost — unit tests // ========================================================================= func TestComputeRerankCost_Basic(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000001), OutputCostPerToken: bifrost.Ptr(0.000002), } usage := &schemas.BifrostLLMUsage{ PromptTokens: 2000, CompletionTokens: 100, TotalTokens: 2100, } cost := computeRerankCost(&p, usage, serviceTier{}) // 2000*0.000001 + 100*0.000002 = 0.002 + 0.0002 = 0.0022 assert.InDelta(t, 0.0022, cost, 1e-12) } func TestComputeRerankCost_WithSearchCost(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.0), OutputCostPerToken: bifrost.Ptr(0.0), SearchContextCostPerQuery: bifrost.Ptr(0.001), } numQueries := 5 usage := &schemas.BifrostLLMUsage{ CompletionTokensDetails: &schemas.ChatCompletionTokensDetails{ NumSearchQueries: &numQueries, }, } cost := computeRerankCost(&p, usage, serviceTier{}) assert.InDelta(t, 0.005, cost, 1e-12) } func TestComputeRerankCost_NilUsage(t *testing.T) { p := configstoreTables.TableModelPricing{InputCostPerToken: new(0.001)} assert.Equal(t, 0.0, computeRerankCost(&p, nil, serviceTier{})) } // ========================================================================= // 4. computeSpeechCost — unit tests // ========================================================================= func TestComputeSpeechCost_TokensPreferredOverDuration(t *testing.T) { // TTS: input=text tokens, output=audio tokens (preferred over per-second) p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.0000025), OutputCostPerToken: bifrost.Ptr(0.00001), OutputCostPerSecond: bifrost.Ptr(0.00025), } seconds := 60 usage := &schemas.BifrostLLMUsage{ PromptTokens: 100, CompletionTokens: 200, TotalTokens: 300, } cost := computeSpeechCost(&p, usage, &seconds, 0, serviceTier{}) // Input: 100 text tokens * $0.0000025 = $0.00025 // Output: 200 audio tokens present → uses token rate $0.00001, NOT per-second // 200 * $0.00001 = $0.002 // Total: $0.00225 assert.InDelta(t, 0.00225, cost, 1e-12) } func TestComputeSpeechCost_OutputFallsBackToPerSecond(t *testing.T) { // TTS: no output tokens → falls back to per-second output pricing p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000001), OutputCostPerToken: bifrost.Ptr(0.000002), OutputCostPerSecond: bifrost.Ptr(0.0001), } seconds := 120 usage := &schemas.BifrostLLMUsage{PromptTokens: 500} cost := computeSpeechCost(&p, usage, &seconds, 0, serviceTier{}) // Input: 500 * $0.000001 = $0.0005 // Output: no CompletionTokens → falls back to 120 * $0.0001 = $0.012 // Total: $0.0125 assert.InDelta(t, 0.0125, cost, 1e-12) } func TestComputeSpeechCost_OutputAudioTokenRate(t *testing.T) { // TTS: output uses OutputCostPerAudioToken when available p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000001), OutputCostPerToken: bifrost.Ptr(0.000002), OutputCostPerAudioToken: bifrost.Ptr(0.00005), } usage := &schemas.BifrostLLMUsage{ PromptTokens: 200, CompletionTokens: 100, TotalTokens: 300, } cost := computeSpeechCost(&p, usage, nil, 0, serviceTier{}) // Input: 200 * $0.000001 = $0.0002 // Output: 100 * $0.00005 = $0.005 (OutputCostPerAudioToken preferred) // Total: $0.0052 assert.InDelta(t, 0.0052, cost, 1e-12) } func TestComputeSpeechCost_TokenFallback(t *testing.T) { p := chatPricing(0.000005, 0.000015) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, } cost := computeSpeechCost(&p, usage, nil, 0, serviceTier{}) // No audio seconds → token fallback // 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125 assert.InDelta(t, 0.0125, cost, 1e-12) } func TestComputeSpeechCost_NilUsageNilSeconds(t *testing.T) { p := chatPricing(0.000005, 0.000015) assert.Equal(t, 0.0, computeSpeechCost(&p, nil, nil, 0, serviceTier{})) } // ========================================================================= // 5. computeTranscriptionCost — unit tests // ========================================================================= func TestComputeTranscriptionCost_DurationBased(t *testing.T) { // assemblyai/nano: input_cost_per_second=0.00010278 p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.0), OutputCostPerToken: bifrost.Ptr(0.0), InputCostPerSecond: bifrost.Ptr(0.00010278), } seconds := 300 // 5 minutes cost := computeTranscriptionCost(&p, nil, &seconds, nil, serviceTier{}) // 300 * 0.00010278 = 0.030834 assert.InDelta(t, 0.030834, cost, 1e-9) } func TestComputeTranscriptionCost_AudioTokenDetails(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), InputCostPerAudioToken: bifrost.Ptr(0.00001), } usage := &schemas.BifrostLLMUsage{ PromptTokens: 2000, CompletionTokens: 500, TotalTokens: 2500, } audioDetails := &schemas.TranscriptionUsageInputTokenDetails{ AudioTokens: 1500, TextTokens: 500, } cost := computeTranscriptionCost(&p, usage, nil, audioDetails, serviceTier{}) // Audio: 1500*0.00001 = 0.015 // Text: 500*0.000005 = 0.0025 // Output: 500*0.000015 = 0.0075 // Total: 0.025 assert.InDelta(t, 0.025, cost, 1e-12) } func TestComputeTranscriptionCost_TokenFallback(t *testing.T) { p := chatPricing(0.000005, 0.000015) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 200, TotalTokens: 1200, } cost := computeTranscriptionCost(&p, usage, nil, nil, serviceTier{}) // 1000*0.000005 + 200*0.000015 = 0.005 + 0.003 = 0.008 assert.InDelta(t, 0.008, cost, 1e-12) } func TestComputeTranscriptionCost_TokenDetailsPreferredOverDuration(t *testing.T) { // STT: audio token details present → uses tokens, not per-second p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.0), InputCostPerAudioPerSecond: bifrost.Ptr(0.0001), InputCostPerAudioToken: bifrost.Ptr(0.00001), } seconds := 60 audioDetails := &schemas.TranscriptionUsageInputTokenDetails{ AudioTokens: 5000, TextTokens: 1000, } cost := computeTranscriptionCost(&p, nil, &seconds, audioDetails, serviceTier{}) // Input: audio token details present → tokens preferred over per-second // 5000 audio * $0.00001 = $0.05 // 1000 text * $0.000005 = $0.005 // Output: nil usage → $0 // Total: $0.055 assert.InDelta(t, 0.055, cost, 1e-12) } func TestComputeTranscriptionCost_DurationFallbackWhenNoTokens(t *testing.T) { // STT: no audio token details, no prompt tokens → falls back to per-second p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), InputCostPerAudioPerSecond: bifrost.Ptr(0.0001), } seconds := 60 usage := &schemas.BifrostLLMUsage{ CompletionTokens: 200, TotalTokens: 200, } cost := computeTranscriptionCost(&p, usage, &seconds, nil, serviceTier{}) // Input: no audio details, PromptTokens=0 → falls back to 60 * $0.0001 = $0.006 // Output: 200 * $0.000015 = $0.003 // Total: $0.009 assert.InDelta(t, 0.009, cost, 1e-12) } // ========================================================================= // 6. computeImageCost — unit tests // ========================================================================= func TestComputeImageCost_PerImage(t *testing.T) { // dall-e-3 (aiml): output_cost_per_image=$0.052 p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.0), OutputCostPerToken: bifrost.Ptr(0.0), OutputCostPerImage: bifrost.Ptr(0.052), } usage := &schemas.ImageUsage{ OutputTokensDetails: &schemas.ImageTokenDetails{ NImages: 2, }, } cost := computeImageCost(&p, usage, "", "", serviceTier{}) // 2 * 0.052 = 0.104 assert.InDelta(t, 0.104, cost, 1e-12) } func TestComputeImageCost_PerImageDefaultsToOne(t *testing.T) { p := configstoreTables.TableModelPricing{ OutputCostPerImage: bifrost.Ptr(0.052), } usage := &schemas.ImageUsage{} // No token details → defaults to 1 image cost := computeImageCost(&p, usage, "", "", serviceTier{}) assert.InDelta(t, 0.052, cost, 1e-12) } func TestComputeImageCost_TokenBased(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), } usage := &schemas.ImageUsage{ InputTokens: 1000, OutputTokens: 500, TotalTokens: 1500, } cost := computeImageCost(&p, usage, "", "", serviceTier{}) // 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125 assert.InDelta(t, 0.0125, cost, 1e-12) } func TestComputeImageCost_TokenBasedWithDetails(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), } usage := &schemas.ImageUsage{ InputTokens: 2000, OutputTokens: 1000, TotalTokens: 3000, InputTokensDetails: &schemas.ImageTokenDetails{ TextTokens: 500, ImageTokens: 1500, }, OutputTokensDetails: &schemas.ImageTokenDetails{ TextTokens: 200, ImageTokens: 800, }, } cost := computeImageCost(&p, usage, "", "", serviceTier{}) // Input: (500+1500)*0.000005 = 2000*0.000005 = 0.01 // Output: (200+800)*0.000015 = 1000*0.000015 = 0.015 // Total: 0.025 assert.InDelta(t, 0.025, cost, 1e-12) } func TestComputeImageCost_NilUsage(t *testing.T) { p := configstoreTables.TableModelPricing{OutputCostPerImage: new(0.05)} assert.Equal(t, 0.0, computeImageCost(&p, nil, "", "", serviceTier{})) } func TestComputeImageCost_InputAndOutputPerImage(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerImage: bifrost.Ptr(0.01), OutputCostPerImage: bifrost.Ptr(0.05), } usage := &schemas.ImageUsage{ NumInputImages: 3, OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2}, } cost := computeImageCost(&p, usage, "", "", serviceTier{}) // 3 input * $0.01 + 2 output * $0.05 = $0.03 + $0.10 = $0.13 assert.InDelta(t, 0.13, cost, 1e-12) } func TestComputeImageCost_PerPixelOutput(t *testing.T) { p := configstoreTables.TableModelPricing{ OutputCostPerPixel: bifrost.Ptr(0.000000019), // ~$0.02 for 1024x1024 } usage := &schemas.ImageUsage{ OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1}, } cost := computeImageCost(&p, usage, "1024x1024", "", serviceTier{}) // 1024*1024 * 1 * 0.000000019 = 1048576 * 0.000000019 ≈ 0.01992 assert.InDelta(t, 1048576*0.000000019, cost, 1e-12) } func TestComputeImageCost_PerPixelInputAndOutput(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerPixel: bifrost.Ptr(0.00000001), OutputCostPerPixel: bifrost.Ptr(0.00000002), } usage := &schemas.ImageUsage{ NumInputImages: 2, OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 3}, } cost := computeImageCost(&p, usage, "512x512", "", serviceTier{}) pixels := 512 * 512 // 262144 // Input: 262144 * 2 * 0.00000001 = 0.00524288 // Output: 262144 * 3 * 0.00000002 = 0.01572864 expected := float64(pixels*2)*0.00000001 + float64(pixels*3)*0.00000002 assert.InDelta(t, expected, cost, 1e-12) } func TestComputeImageCost_TokensPreferredOverPixels(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), InputCostPerPixel: bifrost.Ptr(0.00000001), OutputCostPerPixel: bifrost.Ptr(0.00000002), } usage := &schemas.ImageUsage{ InputTokens: 1000, OutputTokens: 500, TotalTokens: 1500, } cost := computeImageCost(&p, usage, "1024x1024", "", serviceTier{}) // Tokens should win: 1000*0.000005 + 500*0.000015 = 0.0125 assert.InDelta(t, 0.0125, cost, 1e-12) } func TestComputeImageCost_PixelsPreferredOverPerImage(t *testing.T) { p := configstoreTables.TableModelPricing{ OutputCostPerPixel: bifrost.Ptr(0.00000002), OutputCostPerImage: bifrost.Ptr(999.0), // should not be used } usage := &schemas.ImageUsage{ OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1}, } cost := computeImageCost(&p, usage, "256x256", "", serviceTier{}) // Per-pixel should win: 65536 * 1 * 0.00000002 = 0.00131072 assert.InDelta(t, 65536*0.00000002, cost, 1e-12) } func TestComputeImageCost_PerPixelFallsBackToPerImage_WhenNoSize(t *testing.T) { p := configstoreTables.TableModelPricing{ OutputCostPerPixel: bifrost.Ptr(0.00000002), OutputCostPerImage: bifrost.Ptr(0.05), } usage := &schemas.ImageUsage{ OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2}, } cost := computeImageCost(&p, usage, "", "", serviceTier{}) // No size → pixels=0, falls through to per-image: 2 * $0.05 = $0.10 assert.InDelta(t, 0.10, cost, 1e-12) } func TestComputeImageCost_QualityBasedRates(t *testing.T) { usage := &schemas.ImageUsage{ OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1}, } // Quality-specific rates take precedence over base/size-tier p := configstoreTables.TableModelPricing{ OutputCostPerImage: bifrost.Ptr(0.01), OutputCostPerImageLowQuality: bifrost.Ptr(0.02), OutputCostPerImageMediumQuality: bifrost.Ptr(0.03), OutputCostPerImageHighQuality: bifrost.Ptr(0.04), OutputCostPerImageAutoQuality: bifrost.Ptr(0.05), } assert.InDelta(t, 0.02, computeImageCost(&p, usage, "", "low", serviceTier{}), 1e-12) assert.InDelta(t, 0.03, computeImageCost(&p, usage, "", "medium", serviceTier{}), 1e-12) assert.InDelta(t, 0.04, computeImageCost(&p, usage, "", "high", serviceTier{}), 1e-12) assert.InDelta(t, 0.05, computeImageCost(&p, usage, "", "auto", serviceTier{}), 1e-12) // "hd" does not match any quality case so perImageRate stays nil → size/base fallback. assert.InDelta(t, 0.01, computeImageCost(&p, usage, "", "hd", serviceTier{}), 1e-12) // Empty quality is treated as auto assert.InDelta(t, 0.05, computeImageCost(&p, usage, "", "", serviceTier{}), 1e-12) } func TestParseImagePixels(t *testing.T) { assert.Equal(t, 1048576, parseImagePixels("1024x1024")) assert.Equal(t, 262144, parseImagePixels("512x512")) assert.Equal(t, 1835008, parseImagePixels("1792x1024")) assert.Equal(t, 0, parseImagePixels("")) assert.Equal(t, 0, parseImagePixels("invalid")) assert.Equal(t, 0, parseImagePixels("1024")) assert.Equal(t, 0, parseImagePixels("0x1024")) assert.Equal(t, 0, parseImagePixels("-1x1024")) } // ========================================================================= // 7. computeVideoCost — unit tests // ========================================================================= func TestComputeVideoCost_DurationBased(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000001), OutputCostPerToken: bifrost.Ptr(0.0), OutputCostPerVideoPerSecond: bifrost.Ptr(0.001), } seconds := 30 usage := &schemas.BifrostLLMUsage{PromptTokens: 500, TotalTokens: 500} cost := computeVideoCost(&p, usage, &seconds, serviceTier{}) // Output: 30 * 0.001 = 0.03 // Input: 500 * 0.000001 = 0.0005 // Total: 0.0305 assert.InDelta(t, 0.0305, cost, 1e-12) } func TestComputeVideoCost_OutputCostPerSecondFallback(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.0), OutputCostPerToken: bifrost.Ptr(0.0), OutputCostPerSecond: bifrost.Ptr(0.002), } seconds := 10 cost := computeVideoCost(&p, nil, &seconds, serviceTier{}) assert.InDelta(t, 0.02, cost, 1e-12) } func TestComputeVideoCost_NilSeconds(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000001), OutputCostPerVideoPerSecond: bifrost.Ptr(0.001), } usage := &schemas.BifrostLLMUsage{PromptTokens: 1000} cost := computeVideoCost(&p, usage, nil, serviceTier{}) // Only input tokens: 1000 * 0.000001 = 0.001 assert.InDelta(t, 0.001, cost, 1e-12) } // ========================================================================= // 8. tieredInputRate / tieredOutputRate // ========================================================================= func TestTieredInputRate_BelowThreshold(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000003), InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006), } assert.Equal(t, 0.000003, tieredInputRate(&p, 100000, serviceTier{})) } func TestTieredInputRate_AboveThreshold(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000003), InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006), } assert.Equal(t, 0.000006, tieredInputRate(&p, 210000, serviceTier{})) } func TestTieredInputRate_AboveThresholdNoTieredRate(t *testing.T) { p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000003), } // Falls back to base rate when tiered field is nil assert.Equal(t, 0.000003, tieredInputRate(&p, 300000, serviceTier{})) } func TestTieredOutputRate_AboveThreshold(t *testing.T) { p := configstoreTables.TableModelPricing{ OutputCostPerToken: bifrost.Ptr(0.000015), OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003), } assert.Equal(t, 0.00003, tieredOutputRate(&p, 250000, serviceTier{})) } // ========================================================================= // 9. extractCostInput — usage extraction // ========================================================================= func TestExtractCostInput_ChatResponse(t *testing.T) { usage := &schemas.BifrostLLMUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150} resp := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{Usage: usage}, } input := extractCostInput(resp) require.NotNil(t, input.usage) assert.Equal(t, 100, input.usage.PromptTokens) assert.Equal(t, 50, input.usage.CompletionTokens) } func TestExtractCostInput_EmbeddingResponse(t *testing.T) { usage := &schemas.BifrostLLMUsage{PromptTokens: 200, TotalTokens: 200} resp := &schemas.BifrostResponse{ EmbeddingResponse: &schemas.BifrostEmbeddingResponse{Usage: usage}, } input := extractCostInput(resp) require.NotNil(t, input.usage) assert.Equal(t, 200, input.usage.PromptTokens) } func TestExtractCostInput_ImageResponse(t *testing.T) { imgUsage := &schemas.ImageUsage{InputTokens: 100, OutputTokens: 200, TotalTokens: 300} resp := &schemas.BifrostResponse{ ImageGenerationResponse: &schemas.BifrostImageGenerationResponse{Usage: imgUsage}, } input := extractCostInput(resp) assert.Nil(t, input.usage) require.NotNil(t, input.imageUsage) assert.Equal(t, 300, input.imageUsage.TotalTokens) } func TestExtractCostInput_TranscriptionWithSeconds(t *testing.T) { sec := 60 resp := &schemas.BifrostResponse{ TranscriptionResponse: &schemas.BifrostTranscriptionResponse{ Usage: &schemas.TranscriptionUsage{ Seconds: &sec, InputTokens: bifrost.Ptr(1000), OutputTokens: bifrost.Ptr(200), TotalTokens: bifrost.Ptr(1200), }, }, } input := extractCostInput(resp) require.NotNil(t, input.usage) require.NotNil(t, input.audioSeconds) assert.Equal(t, 60, *input.audioSeconds) assert.Equal(t, 1000, input.usage.PromptTokens) } func TestExtractCostInput_SpeechResponse(t *testing.T) { resp := &schemas.BifrostResponse{ SpeechResponse: &schemas.BifrostSpeechResponse{ Usage: &schemas.SpeechUsage{ InputTokens: 100, OutputTokens: 500, TotalTokens: 600, }, }, } input := extractCostInput(resp) require.NotNil(t, input.usage) assert.Equal(t, 100, input.usage.PromptTokens) assert.Equal(t, 500, input.usage.CompletionTokens) assert.Equal(t, 600, input.usage.TotalTokens) } func TestExtractCostInput_VideoResponse(t *testing.T) { sec := "15" resp := &schemas.BifrostResponse{ VideoGenerationResponse: &schemas.BifrostVideoGenerationResponse{ Seconds: &sec, }, } input := extractCostInput(resp) require.NotNil(t, input.videoSeconds) assert.Equal(t, 15, *input.videoSeconds) } func TestExtractCostInput_VideoResponseInvalidSeconds(t *testing.T) { sec := "not-a-number" resp := &schemas.BifrostResponse{ VideoGenerationResponse: &schemas.BifrostVideoGenerationResponse{ Seconds: &sec, }, } input := extractCostInput(resp) assert.Nil(t, input.videoSeconds) } // ========================================================================= // 10. Semantic cache billing (calculateCostWithCache) // ========================================================================= func TestCalculateCost_SemanticCacheDirectHit(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), }, }) hitType := "direct" resp := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ Usage: &schemas.BifrostLLMUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150}, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ChatCompletionRequest, Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o", CacheDebug: &schemas.BifrostCacheDebug{ CacheHit: true, HitType: &hitType, }, }, }, } cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.0, cost) } func TestCalculateCost_SemanticCacheSemanticHit(t *testing.T) { embProvider := "openai" embModel := "text-embedding-3-small" embTokens := 500 mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), }, makeKey("text-embedding-3-small", "openai", "embedding"): { Model: "text-embedding-3-small", Provider: "openai", Mode: "embedding", InputCostPerToken: bifrost.Ptr(0.00000002), }, }) hitType := "semantic" resp := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ Usage: &schemas.BifrostLLMUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150}, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ChatCompletionRequest, Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o", CacheDebug: &schemas.BifrostCacheDebug{ CacheHit: true, HitType: &hitType, ProviderUsed: &embProvider, ModelUsed: &embModel, InputTokens: &embTokens, }, }, }, } cost := mc.CalculateCost(resp, nil) // Only embedding cost: 500 * 0.00000002 = 0.00001 assert.InDelta(t, 0.00001, cost, 1e-12) } func TestCalculateCost_SemanticCacheMiss(t *testing.T) { embProvider := "openai" embModel := "text-embedding-3-small" embTokens := 500 mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), }, makeKey("text-embedding-3-small", "openai", "embedding"): { Model: "text-embedding-3-small", Provider: "openai", Mode: "embedding", InputCostPerToken: bifrost.Ptr(0.00000002), }, }) resp := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ Usage: &schemas.BifrostLLMUsage{PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500}, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ChatCompletionRequest, Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o", CacheDebug: &schemas.BifrostCacheDebug{ CacheHit: false, ProviderUsed: &embProvider, ModelUsed: &embModel, InputTokens: &embTokens, }, }, }, } cost := mc.CalculateCost(resp, nil) // Base cost: 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125 // Embedding cost: 500 * 0.00000002 = 0.00001 // Total: 0.01251 assert.InDelta(t, 0.01251, cost, 1e-12) } func TestCalculateCost_SemanticCacheHitNoEmbeddingInfo(t *testing.T) { mc := testCatalogWithPricing(nil) resp := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ExtraFields: schemas.BifrostResponseExtraFields{ CacheDebug: &schemas.BifrostCacheDebug{ CacheHit: true, // No ProviderUsed, ModelUsed, InputTokens }, }, }, } cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.0, cost) } // ========================================================================= // 11. CalculateCost integration — end-to-end // ========================================================================= func TestCalculateCost_NilResponse(t *testing.T) { mc := testCatalogWithPricing(nil) assert.Equal(t, 0.0, mc.CalculateCost(nil, nil)) } func TestCalculateCost_ProviderComputedCostPassthrough(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) resp := makeChatResponse(schemas.OpenAI, "gpt-4o", &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, Cost: &schemas.BifrostCost{ TotalCost: 0.99, // Provider already calculated }, }) cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.99, cost) } func TestCalculateCost_NoUsageData(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) resp := makeChatResponse(schemas.OpenAI, "gpt-4o", nil) cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.0, cost) } func TestCalculateCost_ChatCompletion_GPT4o(t *testing.T) { // GPT-4o: $5/M input, $15/M output, cache_read=$0.5/M mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), CacheReadInputTokenCost: bifrost.Ptr(0.0000005), }, }) resp := makeChatResponse(schemas.OpenAI, "gpt-4o", &schemas.BifrostLLMUsage{ PromptTokens: 10000, CompletionTokens: 2000, TotalTokens: 12000, }) cost := mc.CalculateCost(resp, nil) // 10000*0.000005 + 2000*0.000015 = 0.05 + 0.03 = 0.08 assert.InDelta(t, 0.08, cost, 1e-12) } func TestCalculateCost_ChatCompletion_Claude35Sonnet_WithCache(t *testing.T) { // Claude 3.5 Sonnet (Bedrock): $3/M input, $15/M output, cache_read=$0.3/M, cache_creation=$3.75/M mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", "chat"): { Model: "anthropic.claude-3-5-sonnet-20241022-v2:0", Provider: "bedrock", Mode: "chat", InputCostPerToken: bifrost.Ptr(0.000003), OutputCostPerToken: bifrost.Ptr(0.000015), CacheReadInputTokenCost: bifrost.Ptr(0.0000003), CacheCreationInputTokenCost: bifrost.Ptr(0.00000375), InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006), OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003), }, }) resp := makeChatResponse(schemas.Bedrock, "anthropic.claude-3-5-sonnet-20241022-v2:0", &schemas.BifrostLLMUsage{ PromptTokens: 5000, CompletionTokens: 1000, TotalTokens: 6000, PromptTokensDetails: &schemas.ChatPromptTokensDetails{ CachedReadTokens: 3000, // 3000 cache read tokens CachedWriteTokens: 500, // 500 cache creation tokens }, }) cost := mc.CalculateCost(resp, nil) // Both cached read and write tokens are input-side deductions from promptTokens. // Input: (5000-3000-500)*0.000003 + 3000*0.0000003 + 500*0.00000375 = 0.0045 + 0.0009 + 0.001875 = 0.007275 // Output: 1000*0.000015 = 0.015 // Total: 0.007275 + 0.015 = 0.022275 assert.InDelta(t, 0.022275, cost, 1e-12) } func TestCalculateCost_Embedding(t *testing.T) { // Titan Embed Text v1: $0.1/M input mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("amazon.titan-embed-text-v1", "bedrock", "embedding"): { Model: "amazon.titan-embed-text-v1", Provider: "bedrock", Mode: "embedding", InputCostPerToken: bifrost.Ptr(0.0000001), OutputCostPerToken: bifrost.Ptr(0.0), }, }) resp := makeEmbeddingResponse(schemas.Bedrock, "amazon.titan-embed-text-v1", &schemas.BifrostLLMUsage{ PromptTokens: 10000, TotalTokens: 10000, }) cost := mc.CalculateCost(resp, nil) // 10000 * 0.0000001 = 0.001 assert.InDelta(t, 0.001, cost, 1e-12) } func TestCalculateCost_Rerank(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("amazon.rerank-v1:0", "bedrock", "rerank"): { Model: "amazon.rerank-v1:0", Provider: "bedrock", Mode: "rerank", InputCostPerToken: bifrost.Ptr(0.0), OutputCostPerToken: bifrost.Ptr(0.0), }, }) resp := makeRerankResponse(schemas.Bedrock, "amazon.rerank-v1:0", &schemas.BifrostLLMUsage{ PromptTokens: 500, TotalTokens: 500, }) cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.0, cost) } func TestCalculateCost_ImageGeneration(t *testing.T) { // dall-e-3 via aiml: output_cost_per_image=$0.052 mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("dall-e-3", "aiml", "image_generation"): { Model: "dall-e-3", Provider: "aiml", Mode: "image_generation", OutputCostPerImage: bifrost.Ptr(0.052), }, }) resp := makeImageResponse("aiml", "dall-e-3", &schemas.ImageUsage{ OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 3}, }) cost := mc.CalculateCost(resp, nil) // 3 * 0.052 = 0.156 assert.InDelta(t, 0.156, cost, 1e-12) } func TestCalculateCost_StreamRequestTypeNormalized(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) // Stream request type should be normalized to base type resp := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ Usage: &schemas.BifrostLLMUsage{PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500}, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ChatCompletionStreamRequest, Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o", }, }, } cost := mc.CalculateCost(resp, nil) assert.InDelta(t, 0.0125, cost, 1e-12) } func TestCalculateCost_NoPricingData(t *testing.T) { mc := testCatalogWithPricing(nil) resp := makeChatResponse(schemas.OpenAI, "unknown-model", &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, }) cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.0, cost) } // ========================================================================= // 12. Pricing resolution — getPricing fallback logic // ========================================================================= func TestGetPricing_DirectLookup(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) assert.Equal(t, 0.000005, derefF(p.InputCostPerToken)) } func TestGetPricing_GeminiFallsBackToVertex(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gemini-2.0-flash", "vertex", "chat"): { Model: "gemini-2.0-flash", Provider: "vertex", Mode: "chat", InputCostPerToken: bifrost.Ptr(0.0000001), OutputCostPerToken: bifrost.Ptr(0.0000004), }, }) p := mc.resolvePricing("gemini", "gemini-2.0-flash", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "gemini"}) assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken)) } func TestGetPricing_VertexStripsProviderPrefix(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gemini-2.0-flash", "vertex", "chat"): chatPricing(0.0000001, 0.0000004), }) p := mc.resolvePricing("vertex", "google/gemini-2.0-flash", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "vertex"}) assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken)) } func TestGetPricing_BedrockAddsAnthropicPrefix(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", "chat"): chatPricing(0.000003, 0.000015), }) p := mc.resolvePricing("bedrock", "claude-3-5-sonnet-20241022-v2:0", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "bedrock"}) assert.Equal(t, 0.000003, derefF(p.InputCostPerToken)) } func TestGetPricing_ResponsesFallsBackToChat(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "openai"}) assert.Equal(t, 0.000005, derefF(p.InputCostPerToken)) } func TestGetPricing_ResponsesStreamFallsBackToChat(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesStreamRequest, PricingLookupScopes{Provider: "openai"}) assert.Equal(t, 0.000005, derefF(p.InputCostPerToken)) } func TestGetPricing_RealtimeFallsBackToChat(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) p := mc.resolvePricing("openai", "gpt-4o", "", schemas.RealtimeRequest, PricingLookupScopes{Provider: "openai"}) assert.Equal(t, 0.000005, derefF(p.InputCostPerToken)) } func TestGetPricing_GeminiResponsesFallsBackToVertexChat(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gemini-2.0-flash", "vertex", "chat"): chatPricing(0.0000001, 0.0000004), }) // gemini provider + responses request → try vertex + responses → try vertex + chat p := mc.resolvePricing("gemini", "gemini-2.0-flash", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "gemini"}) assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken)) } func TestGetPricing_NotFound(t *testing.T) { mc := testCatalogWithPricing(nil) p := mc.resolvePricing("openai", "nonexistent", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) assert.Nil(t, p) } // ========================================================================= // 13. resolvePricing — deployment fallback // ========================================================================= func TestResolvePricing_DeploymentFallback(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("my-deployment", "openai", "chat"): chatPricing(0.000005, 0.000015), }) // Model not found directly, but deployment matches p := mc.resolvePricing("openai", "gpt-4o-custom", "my-deployment", schemas.ChatCompletionRequest, PricingLookupScopes{}) require.NotNil(t, p) assert.Equal(t, 0.000005, derefF(p.InputCostPerToken)) } func TestResolvePricing_ResolvedModelHasPriority(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), makeKey("my-deployment", "openai", "chat"): chatPricing(0.000001, 0.000002), }) // Resolved model ("my-deployment") is looked up first and has priority // over the originally requested model ("gpt-4o"). p := mc.resolvePricing("openai", "gpt-4o", "my-deployment", schemas.ChatCompletionRequest, PricingLookupScopes{}) require.NotNil(t, p) assert.Equal(t, 0.000001, derefF(p.InputCostPerToken)) } func TestResolvePricing_NothingFound(t *testing.T) { mc := testCatalogWithPricing(nil) p := mc.resolvePricing("openai", "unknown", "", schemas.ChatCompletionRequest, PricingLookupScopes{}) assert.Nil(t, p) } // ========================================================================= // 14. normalizeStreamRequestType // ========================================================================= func TestNormalizeStreamRequestType(t *testing.T) { tests := []struct { input schemas.RequestType expected schemas.RequestType }{ {schemas.ChatCompletionStreamRequest, schemas.ChatCompletionRequest}, {schemas.TextCompletionStreamRequest, schemas.TextCompletionRequest}, {schemas.ResponsesStreamRequest, schemas.ResponsesRequest}, {schemas.SpeechStreamRequest, schemas.SpeechRequest}, {schemas.TranscriptionStreamRequest, schemas.TranscriptionRequest}, {schemas.ImageGenerationStreamRequest, schemas.ImageGenerationRequest}, {schemas.ImageEditStreamRequest, schemas.ImageEditRequest}, {schemas.RealtimeRequest, schemas.RealtimeRequest}, // realtime is its own base type {schemas.ChatCompletionRequest, schemas.ChatCompletionRequest}, // non-stream unchanged {schemas.EmbeddingRequest, schemas.EmbeddingRequest}, // non-stream unchanged } for _, tt := range tests { assert.Equal(t, tt.expected, normalizeStreamRequestType(tt.input), "for input %s", tt.input) } } // ========================================================================= // 15. responsesUsageToBifrostUsage // ========================================================================= func TestResponsesUsageToBifrostUsage_Basic(t *testing.T) { u := &schemas.ResponsesResponseUsage{ InputTokens: 100, OutputTokens: 50, TotalTokens: 150, } result := responsesUsageToBifrostUsage(u) assert.Equal(t, 100, result.PromptTokens) assert.Equal(t, 50, result.CompletionTokens) assert.Equal(t, 150, result.TotalTokens) assert.Nil(t, result.PromptTokensDetails) assert.Nil(t, result.CompletionTokensDetails) } func TestResponsesUsageToBifrostUsage_WithTokenDetails(t *testing.T) { numQueries := 2 u := &schemas.ResponsesResponseUsage{ InputTokens: 1000, OutputTokens: 500, TotalTokens: 1500, InputTokensDetails: &schemas.ResponsesResponseInputTokens{ CachedReadTokens: 300, CachedWriteTokens: 50, TextTokens: 600, AudioTokens: 50, ImageTokens: 50, }, OutputTokensDetails: &schemas.ResponsesResponseOutputTokens{ ReasoningTokens: 100, NumSearchQueries: &numQueries, }, } result := responsesUsageToBifrostUsage(u) require.NotNil(t, result.PromptTokensDetails) assert.Equal(t, 300, result.PromptTokensDetails.CachedReadTokens) assert.Equal(t, 50, result.PromptTokensDetails.CachedWriteTokens) assert.Equal(t, 600, result.PromptTokensDetails.TextTokens) assert.Equal(t, 50, result.PromptTokensDetails.AudioTokens) assert.Equal(t, 50, result.PromptTokensDetails.ImageTokens) require.NotNil(t, result.CompletionTokensDetails) assert.Equal(t, 100, result.CompletionTokensDetails.ReasoningTokens) require.NotNil(t, result.CompletionTokensDetails.NumSearchQueries) assert.Equal(t, 2, *result.CompletionTokensDetails.NumSearchQueries) } // ========================================================================= // 16. Edge cases // ========================================================================= func TestCalculateCost_200kTier_EndToEnd(t *testing.T) { // Claude 3.5 Sonnet Bedrock with 200k tier pricing mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("anthropic.claude-3-5-sonnet-20240620-v1:0", "bedrock", "chat"): { Model: "anthropic.claude-3-5-sonnet-20240620-v1:0", Provider: "bedrock", Mode: "chat", InputCostPerToken: bifrost.Ptr(0.000003), OutputCostPerToken: bifrost.Ptr(0.000015), InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006), OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003), CacheReadInputTokenCost: bifrost.Ptr(0.0000003), CacheCreationInputTokenCost: bifrost.Ptr(0.00000375), CacheReadInputTokenCostAbove200kTokens: bifrost.Ptr(0.0000006), CacheCreationInputTokenCostAbove200kTokens: bifrost.Ptr(0.0000075), }, }) resp := makeChatResponse(schemas.Bedrock, "anthropic.claude-3-5-sonnet-20240620-v1:0", &schemas.BifrostLLMUsage{ PromptTokens: 190000, CompletionTokens: 20000, TotalTokens: 210000, // Above 200k }) cost := mc.CalculateCost(resp, nil) // Tiered rate: input=0.000006, output=0.00003 // 190000*0.000006 + 20000*0.00003 = 1.14 + 0.6 = 1.74 assert.InDelta(t, 1.74, cost, 1e-9) } func TestCalculateCost_272kTier_EndToEnd(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("claude-3-7-sonnet", "anthropic", "chat"): { Model: "claude-3-7-sonnet", Provider: "anthropic", Mode: "chat", InputCostPerToken: new(0.000003), OutputCostPerToken: new(0.000015), InputCostPerTokenAbove200kTokens: new(0.000006), OutputCostPerTokenAbove200kTokens: new(0.00003), InputCostPerTokenAbove272kTokens: new(0.000009), OutputCostPerTokenAbove272kTokens: new(0.000045), CacheReadInputTokenCost: new(0.0000003), CacheReadInputTokenCostAbove200kTokens: new(0.0000006), CacheReadInputTokenCostAbove272kTokens: new(0.0000009), }, }) resp := makeChatResponse(schemas.Anthropic, "claude-3-7-sonnet", &schemas.BifrostLLMUsage{ PromptTokens: 250000, CompletionTokens: 30000, TotalTokens: 280000, // Above 272k }) cost := mc.CalculateCost(resp, nil) // Tiered rate: input=0.000009, output=0.000045 // 250000*0.000009 + 30000*0.000045 = 2.25 + 1.35 = 3.60 assert.InDelta(t, 3.60, cost, 1e-9) } func TestCalculateCost_272kTier_CacheReadFallbackChain(t *testing.T) { // Verifies the 272k cache read rate takes precedence over 200k and base rates mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("claude-3-7-sonnet", "anthropic", "chat"): { Model: "claude-3-7-sonnet", Provider: "anthropic", Mode: "chat", InputCostPerToken: new(0.000003), OutputCostPerToken: new(0.000015), InputCostPerTokenAbove272kTokens: new(0.000009), OutputCostPerTokenAbove272kTokens: new(0.000045), CacheReadInputTokenCost: new(0.0000003), CacheReadInputTokenCostAbove200kTokens: new(0.0000006), CacheReadInputTokenCostAbove272kTokens: new(0.0000009), }, }) resp := makeChatResponse(schemas.Anthropic, "claude-3-7-sonnet", &schemas.BifrostLLMUsage{ PromptTokens: 250000, CompletionTokens: 30000, TotalTokens: 280000, PromptTokensDetails: &schemas.ChatPromptTokensDetails{ CachedReadTokens: 50000, }, }) cost := mc.CalculateCost(resp, nil) // Non-cached input: (250000-50000) * 0.000009 = 200000 * 0.000009 = 1.80 // Cached read (272k rate): 50000 * 0.0000009 = 0.045 // Output: 30000 * 0.000045 = 1.35 // Total: 1.80 + 0.045 + 1.35 = 3.195 assert.InDelta(t, 3.195, cost, 1e-9) } // ========================================================================= // Priority tier tests // ========================================================================= func TestComputeTextCost_PriorityUsesInputOutputPriorityRate(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenPriority = new(0.000006) p.OutputCostPerTokenPriority = new(0.00003) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, } cost := computeTextCost(&p, usage, serviceTier{isPriority: true}) // Uses priority rates: 1000*0.000006 + 500*0.00003 = 0.006 + 0.015 = 0.021 assert.InDelta(t, 0.021, cost, 1e-12) } func TestComputeTextCost_NonPriorityIgnoresPriorityRate(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenPriority = new(0.000006) p.OutputCostPerTokenPriority = new(0.00003) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, } cost := computeTextCost(&p, usage, serviceTier{}) // Uses base rates, ignores priority fields: 1000*0.000003 + 500*0.000015 = 0.003 + 0.0075 = 0.0105 assert.InDelta(t, 0.0105, cost, 1e-12) } func TestComputeTextCost_Priority272kTier(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenPriority = new(0.000006) p.OutputCostPerTokenPriority = new(0.00003) p.InputCostPerTokenAbove272kTokens = new(0.000009) p.InputCostPerTokenAbove272kTokensPriority = new(0.000012) p.OutputCostPerTokenAbove272kTokens = new(0.000045) p.OutputCostPerTokenAbove272kTokensPriority = new(0.00006) usage := &schemas.BifrostLLMUsage{ PromptTokens: 250000, CompletionTokens: 30000, TotalTokens: 280000, } cost := computeTextCost(&p, usage, serviceTier{isPriority: true}) // Uses 272k priority rates: 250000*0.000012 + 30000*0.00006 = 3.00 + 1.80 = 4.80 assert.InDelta(t, 4.80, cost, 1e-9) } func TestComputeTextCost_Priority272kTierFallsBackToNonPriority272k(t *testing.T) { // Priority flag set but no priority-specific 272k rate — fall back to non-priority 272k p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenAbove272kTokens = new(0.000009) p.OutputCostPerTokenAbove272kTokens = new(0.000045) usage := &schemas.BifrostLLMUsage{ PromptTokens: 250000, CompletionTokens: 30000, TotalTokens: 280000, } cost := computeTextCost(&p, usage, serviceTier{isPriority: true}) // Falls back to non-priority 272k rate: 250000*0.000009 + 30000*0.000045 = 2.25 + 1.35 = 3.60 assert.InDelta(t, 3.60, cost, 1e-9) } func TestComputeTextCost_PriorityCacheReadRate(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenPriority = new(0.000006) p.OutputCostPerTokenPriority = new(0.00003) p.CacheReadInputTokenCost = new(0.0000003) p.CacheReadInputTokenCostPriority = new(0.0000006) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, PromptTokensDetails: &schemas.ChatPromptTokensDetails{ CachedReadTokens: 400, }, } cost := computeTextCost(&p, usage, serviceTier{isPriority: true}) // Non-cached input: (1000-400)*0.000006 = 600*0.000006 = 0.0036 // Cached read (priority rate): 400*0.0000006 = 0.00024 // Output: 500*0.00003 = 0.015 // Total: 0.0036 + 0.00024 + 0.015 = 0.01884 assert.InDelta(t, 0.01884, cost, 1e-12) } func TestCalculateCost_PriorityTier_EndToEnd(t *testing.T) { tierStr := "priority" mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", InputCostPerToken: new(0.000005), OutputCostPerToken: new(0.000015), InputCostPerTokenPriority: new(0.000010), OutputCostPerTokenPriority: new(0.000030), }, }) resp := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ServiceTier: &tierStr, Usage: &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, }, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ChatCompletionRequest, Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o", ResolvedModelUsed: "gpt-4o", }, }, } cost := mc.CalculateCost(resp, nil) // Priority rates: 1000*0.000010 + 500*0.000030 = 0.010 + 0.015 = 0.025 assert.InDelta(t, 0.025, cost, 1e-12) } func TestCalculateCost_NonPriorityServiceTier_UsesBaseRate(t *testing.T) { tierStr := "auto" mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", InputCostPerToken: new(0.000005), OutputCostPerToken: new(0.000015), InputCostPerTokenPriority: new(0.000010), OutputCostPerTokenPriority: new(0.000030), }, }) resp := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ServiceTier: &tierStr, Usage: &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, }, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ChatCompletionRequest, Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o", ResolvedModelUsed: "gpt-4o", }, }, } cost := mc.CalculateCost(resp, nil) // Base rates (not priority): 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125 assert.InDelta(t, 0.0125, cost, 1e-12) } func TestTieredCacheReadRate_FallbackOrder(t *testing.T) { // 272k rate takes precedence over 200k, 200k over base, base over input rate t.Run("uses_272k_when_above_272k", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCost = new(0.0000003) p.CacheReadInputTokenCostAbove200kTokens = new(0.0000006) p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009) assert.Equal(t, 0.0000009, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{})) }) t.Run("uses_200k_when_between_200k_and_272k", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCost = new(0.0000003) p.CacheReadInputTokenCostAbove200kTokens = new(0.0000006) p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009) assert.Equal(t, 0.0000006, tieredCacheReadInputTokenRate(&p, 230000, serviceTier{})) }) t.Run("uses_base_cache_rate_when_below_200k", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCost = new(0.0000003) p.CacheReadInputTokenCostAbove200kTokens = new(0.0000006) p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009) assert.Equal(t, 0.0000003, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{})) }) t.Run("falls_back_to_input_rate_when_no_cache_rate_set", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) // No cache rates set at all assert.Equal(t, 0.000003, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{})) }) t.Run("priority_uses_272k_priority_rate", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCost = new(0.0000003) p.CacheReadInputTokenCostPriority = new(0.0000006) p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009) p.CacheReadInputTokenCostAbove272kTokensPriority = new(0.0000012) assert.Equal(t, 0.0000012, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isPriority: true})) }) t.Run("priority_falls_back_to_272k_non_priority_when_priority_rate_missing", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009) assert.Equal(t, 0.0000009, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isPriority: true})) }) t.Run("priority_uses_priority_base_cache_rate_below_tiers", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCost = new(0.0000003) p.CacheReadInputTokenCostPriority = new(0.0000006) assert.Equal(t, 0.0000006, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{isPriority: true})) }) t.Run("flex_uses_flex_cache_rate", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCost = new(0.0000003) p.CacheReadInputTokenCostFlex = new(0.0000005) assert.Equal(t, 0.0000005, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{isFlex: true})) }) t.Run("flex_uses_flex_cache_rate_regardless_of_token_count", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCost = new(0.0000003) p.CacheReadInputTokenCostFlex = new(0.0000005) p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009) // Even above 272k, flex flat rate takes precedence assert.Equal(t, 0.0000005, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isFlex: true})) }) t.Run("flex_falls_back_to_base_cache_rate_when_no_flex_cache_rate", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCost = new(0.0000003) // No flex cache rate — falls back to base cache rate assert.Equal(t, 0.0000003, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{isFlex: true})) }) t.Run("flex_wins_over_272k_priority_and_priority_base_when_all_present", func(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.CacheReadInputTokenCostAbove272kTokens = new(5e-7) p.CacheReadInputTokenCostFlex = new(1.3e-7) p.CacheReadInputTokenCostPriority = new(5e-7) p.CacheReadInputTokenCostAbove272kTokensPriority = new(0.000001) // token count exceeds 272k — but flex flat rate should still win assert.Equal(t, 1.3e-7, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isFlex: true})) }) } // ========================================================================= // tierFromString tests // ========================================================================= func TestTierFromString_Priority(t *testing.T) { s := "priority" tier := tierFromString(&s) assert.True(t, tier.isPriority) assert.False(t, tier.isFlex) } func TestTierFromString_Flex(t *testing.T) { s := "flex" tier := tierFromString(&s) assert.False(t, tier.isPriority) assert.True(t, tier.isFlex) } func TestTierFromString_Default(t *testing.T) { for _, s := range []string{"auto", "default", "", "unknown"} { tier := tierFromString(&s) assert.False(t, tier.isPriority, "expected no priority for %q", s) assert.False(t, tier.isFlex, "expected no flex for %q", s) } } func TestTierFromString_Nil(t *testing.T) { tier := tierFromString(nil) assert.False(t, tier.isPriority) assert.False(t, tier.isFlex) } // ========================================================================= // Flex tier tests // ========================================================================= func TestComputeTextCost_FlexUsesFlexRate(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenFlex = new(0.0000015) p.OutputCostPerTokenFlex = new(0.0000075) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, } cost := computeTextCost(&p, usage, serviceTier{isFlex: true}) // Flex rates: 1000*0.0000015 + 500*0.0000075 = 0.0015 + 0.00375 = 0.00525 assert.InDelta(t, 0.00525, cost, 1e-12) } func TestComputeTextCost_NonFlexIgnoresFlexRate(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenFlex = new(0.0000015) p.OutputCostPerTokenFlex = new(0.0000075) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, } cost := computeTextCost(&p, usage, serviceTier{}) // Base rates, flex fields ignored: 1000*0.000003 + 500*0.000015 = 0.003 + 0.0075 = 0.0105 assert.InDelta(t, 0.0105, cost, 1e-12) } func TestComputeTextCost_FlexIgnoresTokenTiers(t *testing.T) { // Flex is a flat rate — token-count tiers (272k, 200k, 128k) do not apply. p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenFlex = new(0.0000015) p.OutputCostPerTokenFlex = new(0.0000075) p.InputCostPerTokenAbove272kTokens = new(0.000009) p.OutputCostPerTokenAbove272kTokens = new(0.000045) usage := &schemas.BifrostLLMUsage{ PromptTokens: 250000, CompletionTokens: 30000, TotalTokens: 280000, } cost := computeTextCost(&p, usage, serviceTier{isFlex: true}) // Flex flat rate overrides 272k tier: 250000*0.0000015 + 30000*0.0000075 = 0.375 + 0.225 = 0.60 assert.InDelta(t, 0.60, cost, 1e-9) } func TestComputeTextCost_FlexCacheReadRate(t *testing.T) { p := chatPricing(0.000003, 0.000015) p.InputCostPerTokenFlex = new(0.0000015) p.OutputCostPerTokenFlex = new(0.0000075) p.CacheReadInputTokenCost = new(0.0000003) p.CacheReadInputTokenCostFlex = new(0.0000006) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, PromptTokensDetails: &schemas.ChatPromptTokensDetails{ CachedReadTokens: 400, }, } cost := computeTextCost(&p, usage, serviceTier{isFlex: true}) // Non-cached input: (1000-400)*0.0000015 = 600*0.0000015 = 0.0009 // Cached read (flex rate): 400*0.0000006 = 0.00024 // Output: 500*0.0000075 = 0.00375 // Total: 0.0009 + 0.00024 + 0.00375 = 0.00489 assert.InDelta(t, 0.00489, cost, 1e-12) } func TestComputeTextCost_FlexFallsBackToBaseWhenNoFlexRate(t *testing.T) { // isFlex set but no flex fields configured — falls back to base rates. p := chatPricing(0.000003, 0.000015) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, } cost := computeTextCost(&p, usage, serviceTier{isFlex: true}) // Base rates used as fallback: 1000*0.000003 + 500*0.000015 = 0.003 + 0.0075 = 0.0105 assert.InDelta(t, 0.0105, cost, 1e-12) } func TestCalculateCost_FlexTier_EndToEnd(t *testing.T) { tierStr := "flex" mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", InputCostPerToken: new(0.000005), OutputCostPerToken: new(0.000015), InputCostPerTokenFlex: new(0.0000025), OutputCostPerTokenFlex: new(0.0000075), }, }) resp := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ServiceTier: &tierStr, Usage: &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, }, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ChatCompletionRequest, Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o", ResolvedModelUsed: "gpt-4o", }, }, } cost := mc.CalculateCost(resp, nil) // Flex rates: 1000*0.0000025 + 500*0.0000075 = 0.0025 + 0.00375 = 0.00625 assert.InDelta(t, 0.00625, cost, 1e-12) } func TestCalculateCost_FlexTier_FallsBackToBaseWhenNoFlexRate(t *testing.T) { tierStr := "flex" mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) resp := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ServiceTier: &tierStr, Usage: &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, }, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ChatCompletionRequest, Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o", ResolvedModelUsed: "gpt-4o", }, }, } cost := mc.CalculateCost(resp, nil) // No flex rates configured — falls back to base: 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125 assert.InDelta(t, 0.0125, cost, 1e-12) } func TestCalculateCost_ProviderCostZeroTotalStillCalculates(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) // Provider cost present but TotalCost is 0 → our calculation runs resp := makeChatResponse(schemas.OpenAI, "gpt-4o", &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, Cost: &schemas.BifrostCost{ TotalCost: 0, }, }) cost := mc.CalculateCost(resp, nil) assert.InDelta(t, 0.0125, cost, 1e-12) } func TestCalculateCost_AllCachedTokens(t *testing.T) { // All prompt tokens are from cache p := chatPricing(0.000005, 0.000015) p.CacheReadInputTokenCost = bifrost.Ptr(0.0000005) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 0, TotalTokens: 1000, PromptTokensDetails: &schemas.ChatPromptTokensDetails{ CachedReadTokens: 1000, // All cached }, } cost := computeTextCost(&p, usage, serviceTier{}) // Non-cached: 0, cached: 1000*0.0000005 = 0.0005 assert.InDelta(t, 0.0005, cost, 1e-12) } // ========================================================================= // Nil usage fallbacks — per-unit pricing when no token data is reported // ========================================================================= func TestCalculateCost_ImageGeneration_NilUsage_PerImagePricing(t *testing.T) { // Image response exists but Usage is nil — should default to 1 image with per-image pricing pricing := configstoreTables.TableModelPricing{ Model: "dall-e-3", Provider: "openai", Mode: "image_generation", InputCostPerToken: bifrost.Ptr(0.0), OutputCostPerImage: bifrost.Ptr(0.04), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("dall-e-3", "openai", "image_generation"): pricing, }) resp := makeImageResponse("openai", "dall-e-3", nil) cost := mc.CalculateCost(resp, nil) // 1 image * $0.04 = $0.04 assert.InDelta(t, 0.04, cost, 1e-12) } func TestCalculateCost_ImageGeneration_NilUsage_InputAndOutputPerImage(t *testing.T) { // Both input and output per-image pricing, but no NumInputImages set pricing := configstoreTables.TableModelPricing{ Model: "test-image-model", Provider: "test", Mode: "image_generation", InputCostPerImage: bifrost.Ptr(0.01), OutputCostPerImage: bifrost.Ptr(0.04), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("test-image-model", "test", "image_generation"): pricing, }) resp := makeImageResponse("test", "test-image-model", nil) cost := mc.CalculateCost(resp, nil) // NumInputImages is 0 (not populated from request), so only output pricing applies // 1 output image * $0.04 = $0.04 assert.InDelta(t, 0.04, cost, 1e-12) } func TestCalculateCost_ImageGeneration_WithInputImages(t *testing.T) { // Input + output per-image pricing with NumInputImages populated from request pricing := configstoreTables.TableModelPricing{ Model: "gpt-image-1", Provider: "openai", Mode: "image_generation", InputCostPerImage: bifrost.Ptr(0.01), OutputCostPerImage: bifrost.Ptr(0.04), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-image-1", "openai", "image_generation"): pricing, }) resp := makeImageResponse("openai", "gpt-image-1", &schemas.ImageUsage{ NumInputImages: 2, }) cost := mc.CalculateCost(resp, nil) // 2 input images * $0.01 + 1 output image * $0.04 = $0.06 assert.InDelta(t, 0.06, cost, 1e-12) } func TestCalculateCost_ImageGeneration_OutputCountFromData(t *testing.T) { // Output image count derived from len(Data) via populateOutputImageCount pricing := configstoreTables.TableModelPricing{ Model: "dall-e-3", Provider: "openai", Mode: "image_generation", OutputCostPerImage: bifrost.Ptr(0.04), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("dall-e-3", "openai", "image_generation"): pricing, }) resp := &schemas.BifrostResponse{ ImageGenerationResponse: &schemas.BifrostImageGenerationResponse{ Data: []schemas.ImageData{ {URL: "https://example.com/img1.png", Index: 0}, {URL: "https://example.com/img2.png", Index: 1}, {URL: "https://example.com/img3.png", Index: 2}, }, ExtraFields: schemas.BifrostResponseExtraFields{ RequestType: schemas.ImageGenerationRequest, Provider: "openai", OriginalModelRequested: "dall-e-3", }, }, } cost := mc.CalculateCost(resp, nil) // 3 output images * $0.04 = $0.12 assert.InDelta(t, 0.12, cost, 1e-12) } func TestCalculateCost_ImageGeneration_NilUsage_NoPerImagePricing(t *testing.T) { // No per-image pricing and no tokens — should return 0 pricing := configstoreTables.TableModelPricing{ Model: "token-only-model", Provider: "test", Mode: "image_generation", InputCostPerToken: bifrost.Ptr(0.000001), OutputCostPerToken: bifrost.Ptr(0.000002), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("token-only-model", "test", "image_generation"): pricing, }) resp := makeImageResponse("test", "token-only-model", nil) cost := mc.CalculateCost(resp, nil) // No per-image pricing and all tokens are zero → 0 assert.InDelta(t, 0.0, cost, 1e-12) } func TestCalculateCost_ImageGeneration_EmptyUsage_PerImagePricing(t *testing.T) { // Usage exists but all fields are zero — same as nil usage, should use per-image pricing pricing := configstoreTables.TableModelPricing{ Model: "dall-e-3", Provider: "openai", Mode: "image_generation", OutputCostPerImage: bifrost.Ptr(0.04), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("dall-e-3", "openai", "image_generation"): pricing, }) resp := makeImageResponse("openai", "dall-e-3", &schemas.ImageUsage{}) cost := mc.CalculateCost(resp, nil) assert.InDelta(t, 0.04, cost, 1e-12) } func TestComputeImageCost_MixedInputTokensOutputPerImage(t *testing.T) { // Input has tokens (text prompt), output has no tokens but per-image pricing p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), OutputCostPerImage: bifrost.Ptr(0.04), } usage := &schemas.ImageUsage{ InputTokens: 500, OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2}, } cost := computeImageCost(&p, usage, "", "", serviceTier{}) // Input: 500 tokens * $0.000005 = $0.0025 // Output: no output tokens → falls back to 2 images * $0.04 = $0.08 assert.InDelta(t, 0.0825, cost, 1e-12) } func TestComputeImageCost_MixedInputPerImageOutputTokens(t *testing.T) { // Input has no tokens but per-image count, output has tokens p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), InputCostPerImage: bifrost.Ptr(0.01), } usage := &schemas.ImageUsage{ NumInputImages: 3, OutputTokens: 1000, } cost := computeImageCost(&p, usage, "", "", serviceTier{}) // Input: no input tokens → falls back to 3 images * $0.01 = $0.03 // Output: 1000 tokens * $0.000015 = $0.015 assert.InDelta(t, 0.045, cost, 1e-12) } func TestComputeImageCost_BothHaveTokens_IgnoresPerImage(t *testing.T) { // Both sides have tokens — per-image pricing is ignored p := configstoreTables.TableModelPricing{ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), InputCostPerImage: bifrost.Ptr(0.01), OutputCostPerImage: bifrost.Ptr(0.04), } usage := &schemas.ImageUsage{ InputTokens: 200, OutputTokens: 800, TotalTokens: 1000, NumInputImages: 3, } cost := computeImageCost(&p, usage, "", "", serviceTier{}) // Input: 200 * $0.000005 = $0.001 (tokens present, per-image ignored) // Output: 800 * $0.000015 = $0.012 (tokens present, per-image ignored) assert.InDelta(t, 0.013, cost, 1e-12) }