Files
bifrost/framework/modelcatalog/pricing_test.go
Beyhan Oğur 880f412e2c first commit
2026-04-26 21:52:23 +03:00

2165 lines
77 KiB
Go

package modelcatalog
import (
"testing"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// ---------------------------------------------------------------------------
// helpers
// ---------------------------------------------------------------------------
// chatPricing returns a TableModelPricing with the given per-token rates.
func chatPricing(input, output float64) configstoreTables.TableModelPricing {
return configstoreTables.TableModelPricing{
Model: "test-model",
Provider: "test-provider",
Mode: "chat",
InputCostPerToken: bifrost.Ptr(input),
OutputCostPerToken: bifrost.Ptr(output),
}
}
// testCatalogWithPricing creates a catalog pre-loaded with the given pricing entries.
func testCatalogWithPricing(entries map[string]configstoreTables.TableModelPricing) *ModelCatalog {
mc := newTestCatalog(nil, nil)
mc.logger = noOpLogger{}
for k, v := range entries {
mc.pricingData[k] = v
}
return mc
}
// makeChatResponse builds a minimal BifrostResponse for a chat completion.
func makeChatResponse(provider schemas.ModelProvider, model string, usage *schemas.BifrostLLMUsage) *schemas.BifrostResponse {
return &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
Usage: usage,
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ChatCompletionRequest,
Provider: provider,
OriginalModelRequested: model,
},
},
}
}
// makeEmbeddingResponse builds a minimal BifrostResponse for an embedding request.
func makeEmbeddingResponse(provider schemas.ModelProvider, model string, usage *schemas.BifrostLLMUsage) *schemas.BifrostResponse {
return &schemas.BifrostResponse{
EmbeddingResponse: &schemas.BifrostEmbeddingResponse{
Usage: usage,
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.EmbeddingRequest,
Provider: provider,
OriginalModelRequested: model,
},
},
}
}
// makeRerankResponse builds a minimal BifrostResponse for a rerank request.
func makeRerankResponse(provider schemas.ModelProvider, model string, usage *schemas.BifrostLLMUsage) *schemas.BifrostResponse {
return &schemas.BifrostResponse{
RerankResponse: &schemas.BifrostRerankResponse{
Usage: usage,
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.RerankRequest,
Provider: provider,
OriginalModelRequested: model,
},
},
}
}
// makeImageResponse builds a minimal BifrostResponse for an image generation request.
func makeImageResponse(provider schemas.ModelProvider, model string, usage *schemas.ImageUsage) *schemas.BifrostResponse {
return &schemas.BifrostResponse{
ImageGenerationResponse: &schemas.BifrostImageGenerationResponse{
Usage: usage,
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ImageGenerationRequest,
Provider: provider,
OriginalModelRequested: model,
},
},
}
}
func derefF(f *float64) float64 {
if f == nil {
return 0
}
return *f
}
// =========================================================================
// 1. computeTextCost — unit tests (pure function, no catalog)
// =========================================================================
func TestComputeTextCost_BasicInputOutput(t *testing.T) {
// GPT-4o: $5/M input, $15/M output
p := chatPricing(0.000005, 0.000015)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
}
cost := computeTextCost(&p, usage, serviceTier{})
// 1000 * 0.000005 + 500 * 0.000015 = 0.005 + 0.0075 = 0.0125
assert.InDelta(t, 0.0125, cost, 1e-12)
}
func TestComputeTextCost_NilUsage(t *testing.T) {
p := chatPricing(0.000005, 0.000015)
assert.Equal(t, 0.0, computeTextCost(&p, nil, serviceTier{}))
}
func TestComputeTextCost_ZeroTokens(t *testing.T) {
p := chatPricing(0.000005, 0.000015)
usage := &schemas.BifrostLLMUsage{}
assert.Equal(t, 0.0, computeTextCost(&p, usage, serviceTier{}))
}
func TestComputeTextCost_WithCachedPromptTokens(t *testing.T) {
// Claude 3.5 Sonnet (Bedrock): input=$3/M, output=$15/M, cache_read=$0.3/M, cache_creation=$3.75/M
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCost = bifrost.Ptr(0.0000003)
p.CacheCreationInputTokenCost = bifrost.Ptr(0.00000375)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 2000,
CompletionTokens: 500,
TotalTokens: 2500,
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
CachedReadTokens: 1500, // 1500 read from cache
CachedWriteTokens: 200, // 200 cache creation tokens
},
}
cost := computeTextCost(&p, usage, serviceTier{})
// Both cached read and write tokens are input-side deductions from promptTokens.
// Input: (2000-1500-200)*0.000003 + 1500*0.0000003 + 200*0.00000375 = 0.0009 + 0.00045 + 0.00075 = 0.0021
// Output: 500*0.000015 = 0.0075
// Total: 0.0021 + 0.0075 = 0.0096
assert.InDelta(t, 0.0096, cost, 1e-12)
}
func TestComputeTextCost_Tiered200k(t *testing.T) {
// Claude 3.5 Sonnet Bedrock 200k tier: input=$6/M, output=$30/M
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenAbove200kTokens = bifrost.Ptr(0.000006)
p.OutputCostPerTokenAbove200kTokens = bifrost.Ptr(0.00003)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 180000,
CompletionTokens: 30000,
TotalTokens: 210000, // Above 200k threshold
}
cost := computeTextCost(&p, usage, serviceTier{})
// Uses tiered rate since total > 200k
// 180000 * 0.000006 + 30000 * 0.00003 = 1.08 + 0.90 = 1.98
assert.InDelta(t, 1.98, cost, 1e-9)
}
func TestComputeTextCost_Below200kUsesBaseRate(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenAbove200kTokens = bifrost.Ptr(0.000006)
p.OutputCostPerTokenAbove200kTokens = bifrost.Ptr(0.00003)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500, // Below 200k
}
cost := computeTextCost(&p, usage, serviceTier{})
// Uses base rate since total < 200k
// 1000 * 0.000003 + 500 * 0.000015 = 0.003 + 0.0075 = 0.0105
assert.InDelta(t, 0.0105, cost, 1e-12)
}
func TestComputeTextCost_Tiered272k(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenAbove200kTokens = new(0.000006)
p.OutputCostPerTokenAbove200kTokens = new(0.00003)
p.InputCostPerTokenAbove272kTokens = new(0.000009)
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 250000,
CompletionTokens: 30000,
TotalTokens: 280000, // Above 272k threshold
}
cost := computeTextCost(&p, usage, serviceTier{})
// Uses 272k tiered rate since total > 272k
// 250000 * 0.000009 + 30000 * 0.000045 = 2.25 + 1.35 = 3.60
assert.InDelta(t, 3.60, cost, 1e-9)
}
func TestComputeTextCost_Between200kAnd272kUses200kRate(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenAbove200kTokens = new(0.000006)
p.OutputCostPerTokenAbove200kTokens = new(0.00003)
p.InputCostPerTokenAbove272kTokens = new(0.000009)
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 200000,
CompletionTokens: 30000,
TotalTokens: 230000, // Between 200k and 272k
}
cost := computeTextCost(&p, usage, serviceTier{})
// Uses 200k tiered rate since total > 200k but <= 272k
// 200000 * 0.000006 + 30000 * 0.00003 = 1.20 + 0.90 = 2.10
assert.InDelta(t, 2.10, cost, 1e-9)
}
func TestComputeTextCost_272kTierWithCacheRead(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenAbove272kTokens = new(0.000009)
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
p.CacheReadInputTokenCost = new(0.0000003)
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 250000,
CompletionTokens: 30000,
TotalTokens: 280000, // Above 272k
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
CachedReadTokens: 50000,
},
}
cost := computeTextCost(&p, usage, serviceTier{})
// Non-cached input: (250000-50000) * 0.000009 = 200000 * 0.000009 = 1.80
// Cached read: 50000 * 0.0000009 = 0.045
// Output: 30000 * 0.000045 = 1.35
// Total: 1.80 + 0.045 + 1.35 = 3.195
assert.InDelta(t, 3.195, cost, 1e-9)
}
func TestComputeTextCost_SearchQueryCost(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.SearchContextCostPerQuery = bifrost.Ptr(0.01) // $0.01 per search query
numQueries := 3
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
CompletionTokensDetails: &schemas.ChatCompletionTokensDetails{
NumSearchQueries: &numQueries,
},
}
cost := computeTextCost(&p, usage, serviceTier{})
// 1000*0.000003 + 500*0.000015 + 3*0.01 = 0.003 + 0.0075 + 0.03 = 0.0405
assert.InDelta(t, 0.0405, cost, 1e-12)
}
func TestComputeTextCost_NoCacheRateFallsBackToBaseInputRate(t *testing.T) {
// If cache rate fields are nil, tieredCacheReadInputTokenRate falls back to base InputCostPerToken
p := chatPricing(0.000005, 0.000015)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
CachedReadTokens: 400,
},
}
cost := computeTextCost(&p, usage, serviceTier{})
// Non-cached prompt: (1000-400)*0.000005 = 600*0.000005 = 0.003
// Cached prompt: 400 tokens at base input rate (no cache rate set) = 400*0.000005 = 0.002
// Output: 500*0.000015 = 0.0075
// Total: 0.003 + 0.002 + 0.0075 = 0.0125
assert.InDelta(t, 0.0125, cost, 1e-12)
}
// =========================================================================
// 2. computeEmbeddingCost — unit tests
// =========================================================================
func TestComputeEmbeddingCost_Basic(t *testing.T) {
// Titan Embed Text v1: $0.1/M input
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.0000001),
OutputCostPerToken: bifrost.Ptr(0.0),
}
usage := &schemas.BifrostLLMUsage{
PromptTokens: 5000,
TotalTokens: 5000,
}
cost := computeEmbeddingCost(&p, usage, serviceTier{})
// 5000 * 0.0000001 = 0.0005
assert.InDelta(t, 0.0005, cost, 1e-12)
}
func TestComputeEmbeddingCost_NilUsage(t *testing.T) {
p := configstoreTables.TableModelPricing{InputCostPerToken: new(0.0000001)}
assert.Equal(t, 0.0, computeEmbeddingCost(&p, nil, serviceTier{}))
}
// =========================================================================
// 3. computeRerankCost — unit tests
// =========================================================================
func TestComputeRerankCost_Basic(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000001),
OutputCostPerToken: bifrost.Ptr(0.000002),
}
usage := &schemas.BifrostLLMUsage{
PromptTokens: 2000,
CompletionTokens: 100,
TotalTokens: 2100,
}
cost := computeRerankCost(&p, usage, serviceTier{})
// 2000*0.000001 + 100*0.000002 = 0.002 + 0.0002 = 0.0022
assert.InDelta(t, 0.0022, cost, 1e-12)
}
func TestComputeRerankCost_WithSearchCost(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.0),
OutputCostPerToken: bifrost.Ptr(0.0),
SearchContextCostPerQuery: bifrost.Ptr(0.001),
}
numQueries := 5
usage := &schemas.BifrostLLMUsage{
CompletionTokensDetails: &schemas.ChatCompletionTokensDetails{
NumSearchQueries: &numQueries,
},
}
cost := computeRerankCost(&p, usage, serviceTier{})
assert.InDelta(t, 0.005, cost, 1e-12)
}
func TestComputeRerankCost_NilUsage(t *testing.T) {
p := configstoreTables.TableModelPricing{InputCostPerToken: new(0.001)}
assert.Equal(t, 0.0, computeRerankCost(&p, nil, serviceTier{}))
}
// =========================================================================
// 4. computeSpeechCost — unit tests
// =========================================================================
func TestComputeSpeechCost_TokensPreferredOverDuration(t *testing.T) {
// TTS: input=text tokens, output=audio tokens (preferred over per-second)
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.0000025),
OutputCostPerToken: bifrost.Ptr(0.00001),
OutputCostPerSecond: bifrost.Ptr(0.00025),
}
seconds := 60
usage := &schemas.BifrostLLMUsage{
PromptTokens: 100,
CompletionTokens: 200,
TotalTokens: 300,
}
cost := computeSpeechCost(&p, usage, &seconds, 0, serviceTier{})
// Input: 100 text tokens * $0.0000025 = $0.00025
// Output: 200 audio tokens present → uses token rate $0.00001, NOT per-second
// 200 * $0.00001 = $0.002
// Total: $0.00225
assert.InDelta(t, 0.00225, cost, 1e-12)
}
func TestComputeSpeechCost_OutputFallsBackToPerSecond(t *testing.T) {
// TTS: no output tokens → falls back to per-second output pricing
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000001),
OutputCostPerToken: bifrost.Ptr(0.000002),
OutputCostPerSecond: bifrost.Ptr(0.0001),
}
seconds := 120
usage := &schemas.BifrostLLMUsage{PromptTokens: 500}
cost := computeSpeechCost(&p, usage, &seconds, 0, serviceTier{})
// Input: 500 * $0.000001 = $0.0005
// Output: no CompletionTokens → falls back to 120 * $0.0001 = $0.012
// Total: $0.0125
assert.InDelta(t, 0.0125, cost, 1e-12)
}
func TestComputeSpeechCost_OutputAudioTokenRate(t *testing.T) {
// TTS: output uses OutputCostPerAudioToken when available
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000001),
OutputCostPerToken: bifrost.Ptr(0.000002),
OutputCostPerAudioToken: bifrost.Ptr(0.00005),
}
usage := &schemas.BifrostLLMUsage{
PromptTokens: 200,
CompletionTokens: 100,
TotalTokens: 300,
}
cost := computeSpeechCost(&p, usage, nil, 0, serviceTier{})
// Input: 200 * $0.000001 = $0.0002
// Output: 100 * $0.00005 = $0.005 (OutputCostPerAudioToken preferred)
// Total: $0.0052
assert.InDelta(t, 0.0052, cost, 1e-12)
}
func TestComputeSpeechCost_TokenFallback(t *testing.T) {
p := chatPricing(0.000005, 0.000015)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
}
cost := computeSpeechCost(&p, usage, nil, 0, serviceTier{}) // No audio seconds → token fallback
// 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
assert.InDelta(t, 0.0125, cost, 1e-12)
}
func TestComputeSpeechCost_NilUsageNilSeconds(t *testing.T) {
p := chatPricing(0.000005, 0.000015)
assert.Equal(t, 0.0, computeSpeechCost(&p, nil, nil, 0, serviceTier{}))
}
// =========================================================================
// 5. computeTranscriptionCost — unit tests
// =========================================================================
func TestComputeTranscriptionCost_DurationBased(t *testing.T) {
// assemblyai/nano: input_cost_per_second=0.00010278
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.0),
OutputCostPerToken: bifrost.Ptr(0.0),
InputCostPerSecond: bifrost.Ptr(0.00010278),
}
seconds := 300 // 5 minutes
cost := computeTranscriptionCost(&p, nil, &seconds, nil, serviceTier{})
// 300 * 0.00010278 = 0.030834
assert.InDelta(t, 0.030834, cost, 1e-9)
}
func TestComputeTranscriptionCost_AudioTokenDetails(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000005),
OutputCostPerToken: bifrost.Ptr(0.000015),
InputCostPerAudioToken: bifrost.Ptr(0.00001),
}
usage := &schemas.BifrostLLMUsage{
PromptTokens: 2000,
CompletionTokens: 500,
TotalTokens: 2500,
}
audioDetails := &schemas.TranscriptionUsageInputTokenDetails{
AudioTokens: 1500,
TextTokens: 500,
}
cost := computeTranscriptionCost(&p, usage, nil, audioDetails, serviceTier{})
// Audio: 1500*0.00001 = 0.015
// Text: 500*0.000005 = 0.0025
// Output: 500*0.000015 = 0.0075
// Total: 0.025
assert.InDelta(t, 0.025, cost, 1e-12)
}
func TestComputeTranscriptionCost_TokenFallback(t *testing.T) {
p := chatPricing(0.000005, 0.000015)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 200,
TotalTokens: 1200,
}
cost := computeTranscriptionCost(&p, usage, nil, nil, serviceTier{})
// 1000*0.000005 + 200*0.000015 = 0.005 + 0.003 = 0.008
assert.InDelta(t, 0.008, cost, 1e-12)
}
func TestComputeTranscriptionCost_TokenDetailsPreferredOverDuration(t *testing.T) {
// STT: audio token details present → uses tokens, not per-second
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000005),
OutputCostPerToken: bifrost.Ptr(0.0),
InputCostPerAudioPerSecond: bifrost.Ptr(0.0001),
InputCostPerAudioToken: bifrost.Ptr(0.00001),
}
seconds := 60
audioDetails := &schemas.TranscriptionUsageInputTokenDetails{
AudioTokens: 5000,
TextTokens: 1000,
}
cost := computeTranscriptionCost(&p, nil, &seconds, audioDetails, serviceTier{})
// Input: audio token details present → tokens preferred over per-second
// 5000 audio * $0.00001 = $0.05
// 1000 text * $0.000005 = $0.005
// Output: nil usage → $0
// Total: $0.055
assert.InDelta(t, 0.055, cost, 1e-12)
}
func TestComputeTranscriptionCost_DurationFallbackWhenNoTokens(t *testing.T) {
// STT: no audio token details, no prompt tokens → falls back to per-second
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000005),
OutputCostPerToken: bifrost.Ptr(0.000015),
InputCostPerAudioPerSecond: bifrost.Ptr(0.0001),
}
seconds := 60
usage := &schemas.BifrostLLMUsage{
CompletionTokens: 200,
TotalTokens: 200,
}
cost := computeTranscriptionCost(&p, usage, &seconds, nil, serviceTier{})
// Input: no audio details, PromptTokens=0 → falls back to 60 * $0.0001 = $0.006
// Output: 200 * $0.000015 = $0.003
// Total: $0.009
assert.InDelta(t, 0.009, cost, 1e-12)
}
// =========================================================================
// 6. computeImageCost — unit tests
// =========================================================================
func TestComputeImageCost_PerImage(t *testing.T) {
// dall-e-3 (aiml): output_cost_per_image=$0.052
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.0),
OutputCostPerToken: bifrost.Ptr(0.0),
OutputCostPerImage: bifrost.Ptr(0.052),
}
usage := &schemas.ImageUsage{
OutputTokensDetails: &schemas.ImageTokenDetails{
NImages: 2,
},
}
cost := computeImageCost(&p, usage, "", "", serviceTier{})
// 2 * 0.052 = 0.104
assert.InDelta(t, 0.104, cost, 1e-12)
}
func TestComputeImageCost_PerImageDefaultsToOne(t *testing.T) {
p := configstoreTables.TableModelPricing{
OutputCostPerImage: bifrost.Ptr(0.052),
}
usage := &schemas.ImageUsage{} // No token details → defaults to 1 image
cost := computeImageCost(&p, usage, "", "", serviceTier{})
assert.InDelta(t, 0.052, cost, 1e-12)
}
func TestComputeImageCost_TokenBased(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000005),
OutputCostPerToken: bifrost.Ptr(0.000015),
}
usage := &schemas.ImageUsage{
InputTokens: 1000,
OutputTokens: 500,
TotalTokens: 1500,
}
cost := computeImageCost(&p, usage, "", "", serviceTier{})
// 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
assert.InDelta(t, 0.0125, cost, 1e-12)
}
func TestComputeImageCost_TokenBasedWithDetails(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000005),
OutputCostPerToken: bifrost.Ptr(0.000015),
}
usage := &schemas.ImageUsage{
InputTokens: 2000,
OutputTokens: 1000,
TotalTokens: 3000,
InputTokensDetails: &schemas.ImageTokenDetails{
TextTokens: 500,
ImageTokens: 1500,
},
OutputTokensDetails: &schemas.ImageTokenDetails{
TextTokens: 200,
ImageTokens: 800,
},
}
cost := computeImageCost(&p, usage, "", "", serviceTier{})
// Input: (500+1500)*0.000005 = 2000*0.000005 = 0.01
// Output: (200+800)*0.000015 = 1000*0.000015 = 0.015
// Total: 0.025
assert.InDelta(t, 0.025, cost, 1e-12)
}
func TestComputeImageCost_NilUsage(t *testing.T) {
p := configstoreTables.TableModelPricing{OutputCostPerImage: new(0.05)}
assert.Equal(t, 0.0, computeImageCost(&p, nil, "", "", serviceTier{}))
}
func TestComputeImageCost_InputAndOutputPerImage(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerImage: bifrost.Ptr(0.01),
OutputCostPerImage: bifrost.Ptr(0.05),
}
usage := &schemas.ImageUsage{
NumInputImages: 3,
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2},
}
cost := computeImageCost(&p, usage, "", "", serviceTier{})
// 3 input * $0.01 + 2 output * $0.05 = $0.03 + $0.10 = $0.13
assert.InDelta(t, 0.13, cost, 1e-12)
}
func TestComputeImageCost_PerPixelOutput(t *testing.T) {
p := configstoreTables.TableModelPricing{
OutputCostPerPixel: bifrost.Ptr(0.000000019), // ~$0.02 for 1024x1024
}
usage := &schemas.ImageUsage{
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1},
}
cost := computeImageCost(&p, usage, "1024x1024", "", serviceTier{})
// 1024*1024 * 1 * 0.000000019 = 1048576 * 0.000000019 ≈ 0.01992
assert.InDelta(t, 1048576*0.000000019, cost, 1e-12)
}
func TestComputeImageCost_PerPixelInputAndOutput(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerPixel: bifrost.Ptr(0.00000001),
OutputCostPerPixel: bifrost.Ptr(0.00000002),
}
usage := &schemas.ImageUsage{
NumInputImages: 2,
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 3},
}
cost := computeImageCost(&p, usage, "512x512", "", serviceTier{})
pixels := 512 * 512 // 262144
// Input: 262144 * 2 * 0.00000001 = 0.00524288
// Output: 262144 * 3 * 0.00000002 = 0.01572864
expected := float64(pixels*2)*0.00000001 + float64(pixels*3)*0.00000002
assert.InDelta(t, expected, cost, 1e-12)
}
func TestComputeImageCost_TokensPreferredOverPixels(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000005),
OutputCostPerToken: bifrost.Ptr(0.000015),
InputCostPerPixel: bifrost.Ptr(0.00000001),
OutputCostPerPixel: bifrost.Ptr(0.00000002),
}
usage := &schemas.ImageUsage{
InputTokens: 1000,
OutputTokens: 500,
TotalTokens: 1500,
}
cost := computeImageCost(&p, usage, "1024x1024", "", serviceTier{})
// Tokens should win: 1000*0.000005 + 500*0.000015 = 0.0125
assert.InDelta(t, 0.0125, cost, 1e-12)
}
func TestComputeImageCost_PixelsPreferredOverPerImage(t *testing.T) {
p := configstoreTables.TableModelPricing{
OutputCostPerPixel: bifrost.Ptr(0.00000002),
OutputCostPerImage: bifrost.Ptr(999.0), // should not be used
}
usage := &schemas.ImageUsage{
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1},
}
cost := computeImageCost(&p, usage, "256x256", "", serviceTier{})
// Per-pixel should win: 65536 * 1 * 0.00000002 = 0.00131072
assert.InDelta(t, 65536*0.00000002, cost, 1e-12)
}
func TestComputeImageCost_PerPixelFallsBackToPerImage_WhenNoSize(t *testing.T) {
p := configstoreTables.TableModelPricing{
OutputCostPerPixel: bifrost.Ptr(0.00000002),
OutputCostPerImage: bifrost.Ptr(0.05),
}
usage := &schemas.ImageUsage{
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2},
}
cost := computeImageCost(&p, usage, "", "", serviceTier{})
// No size → pixels=0, falls through to per-image: 2 * $0.05 = $0.10
assert.InDelta(t, 0.10, cost, 1e-12)
}
func TestComputeImageCost_QualityBasedRates(t *testing.T) {
usage := &schemas.ImageUsage{
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1},
}
// Quality-specific rates take precedence over base/size-tier
p := configstoreTables.TableModelPricing{
OutputCostPerImage: bifrost.Ptr(0.01),
OutputCostPerImageLowQuality: bifrost.Ptr(0.02),
OutputCostPerImageMediumQuality: bifrost.Ptr(0.03),
OutputCostPerImageHighQuality: bifrost.Ptr(0.04),
OutputCostPerImageAutoQuality: bifrost.Ptr(0.05),
}
assert.InDelta(t, 0.02, computeImageCost(&p, usage, "", "low", serviceTier{}), 1e-12)
assert.InDelta(t, 0.03, computeImageCost(&p, usage, "", "medium", serviceTier{}), 1e-12)
assert.InDelta(t, 0.04, computeImageCost(&p, usage, "", "high", serviceTier{}), 1e-12)
assert.InDelta(t, 0.05, computeImageCost(&p, usage, "", "auto", serviceTier{}), 1e-12)
// "hd" does not match any quality case so perImageRate stays nil → size/base fallback.
assert.InDelta(t, 0.01, computeImageCost(&p, usage, "", "hd", serviceTier{}), 1e-12)
// Empty quality is treated as auto
assert.InDelta(t, 0.05, computeImageCost(&p, usage, "", "", serviceTier{}), 1e-12)
}
func TestParseImagePixels(t *testing.T) {
assert.Equal(t, 1048576, parseImagePixels("1024x1024"))
assert.Equal(t, 262144, parseImagePixels("512x512"))
assert.Equal(t, 1835008, parseImagePixels("1792x1024"))
assert.Equal(t, 0, parseImagePixels(""))
assert.Equal(t, 0, parseImagePixels("invalid"))
assert.Equal(t, 0, parseImagePixels("1024"))
assert.Equal(t, 0, parseImagePixels("0x1024"))
assert.Equal(t, 0, parseImagePixels("-1x1024"))
}
// =========================================================================
// 7. computeVideoCost — unit tests
// =========================================================================
func TestComputeVideoCost_DurationBased(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000001),
OutputCostPerToken: bifrost.Ptr(0.0),
OutputCostPerVideoPerSecond: bifrost.Ptr(0.001),
}
seconds := 30
usage := &schemas.BifrostLLMUsage{PromptTokens: 500, TotalTokens: 500}
cost := computeVideoCost(&p, usage, &seconds, serviceTier{})
// Output: 30 * 0.001 = 0.03
// Input: 500 * 0.000001 = 0.0005
// Total: 0.0305
assert.InDelta(t, 0.0305, cost, 1e-12)
}
func TestComputeVideoCost_OutputCostPerSecondFallback(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.0),
OutputCostPerToken: bifrost.Ptr(0.0),
OutputCostPerSecond: bifrost.Ptr(0.002),
}
seconds := 10
cost := computeVideoCost(&p, nil, &seconds, serviceTier{})
assert.InDelta(t, 0.02, cost, 1e-12)
}
func TestComputeVideoCost_NilSeconds(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000001),
OutputCostPerVideoPerSecond: bifrost.Ptr(0.001),
}
usage := &schemas.BifrostLLMUsage{PromptTokens: 1000}
cost := computeVideoCost(&p, usage, nil, serviceTier{})
// Only input tokens: 1000 * 0.000001 = 0.001
assert.InDelta(t, 0.001, cost, 1e-12)
}
// =========================================================================
// 8. tieredInputRate / tieredOutputRate
// =========================================================================
func TestTieredInputRate_BelowThreshold(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000003),
InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
}
assert.Equal(t, 0.000003, tieredInputRate(&p, 100000, serviceTier{}))
}
func TestTieredInputRate_AboveThreshold(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000003),
InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
}
assert.Equal(t, 0.000006, tieredInputRate(&p, 210000, serviceTier{}))
}
func TestTieredInputRate_AboveThresholdNoTieredRate(t *testing.T) {
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000003),
}
// Falls back to base rate when tiered field is nil
assert.Equal(t, 0.000003, tieredInputRate(&p, 300000, serviceTier{}))
}
func TestTieredOutputRate_AboveThreshold(t *testing.T) {
p := configstoreTables.TableModelPricing{
OutputCostPerToken: bifrost.Ptr(0.000015),
OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003),
}
assert.Equal(t, 0.00003, tieredOutputRate(&p, 250000, serviceTier{}))
}
// =========================================================================
// 9. extractCostInput — usage extraction
// =========================================================================
func TestExtractCostInput_ChatResponse(t *testing.T) {
usage := &schemas.BifrostLLMUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150}
resp := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{Usage: usage},
}
input := extractCostInput(resp)
require.NotNil(t, input.usage)
assert.Equal(t, 100, input.usage.PromptTokens)
assert.Equal(t, 50, input.usage.CompletionTokens)
}
func TestExtractCostInput_EmbeddingResponse(t *testing.T) {
usage := &schemas.BifrostLLMUsage{PromptTokens: 200, TotalTokens: 200}
resp := &schemas.BifrostResponse{
EmbeddingResponse: &schemas.BifrostEmbeddingResponse{Usage: usage},
}
input := extractCostInput(resp)
require.NotNil(t, input.usage)
assert.Equal(t, 200, input.usage.PromptTokens)
}
func TestExtractCostInput_ImageResponse(t *testing.T) {
imgUsage := &schemas.ImageUsage{InputTokens: 100, OutputTokens: 200, TotalTokens: 300}
resp := &schemas.BifrostResponse{
ImageGenerationResponse: &schemas.BifrostImageGenerationResponse{Usage: imgUsage},
}
input := extractCostInput(resp)
assert.Nil(t, input.usage)
require.NotNil(t, input.imageUsage)
assert.Equal(t, 300, input.imageUsage.TotalTokens)
}
func TestExtractCostInput_TranscriptionWithSeconds(t *testing.T) {
sec := 60
resp := &schemas.BifrostResponse{
TranscriptionResponse: &schemas.BifrostTranscriptionResponse{
Usage: &schemas.TranscriptionUsage{
Seconds: &sec,
InputTokens: bifrost.Ptr(1000),
OutputTokens: bifrost.Ptr(200),
TotalTokens: bifrost.Ptr(1200),
},
},
}
input := extractCostInput(resp)
require.NotNil(t, input.usage)
require.NotNil(t, input.audioSeconds)
assert.Equal(t, 60, *input.audioSeconds)
assert.Equal(t, 1000, input.usage.PromptTokens)
}
func TestExtractCostInput_SpeechResponse(t *testing.T) {
resp := &schemas.BifrostResponse{
SpeechResponse: &schemas.BifrostSpeechResponse{
Usage: &schemas.SpeechUsage{
InputTokens: 100,
OutputTokens: 500,
TotalTokens: 600,
},
},
}
input := extractCostInput(resp)
require.NotNil(t, input.usage)
assert.Equal(t, 100, input.usage.PromptTokens)
assert.Equal(t, 500, input.usage.CompletionTokens)
assert.Equal(t, 600, input.usage.TotalTokens)
}
func TestExtractCostInput_VideoResponse(t *testing.T) {
sec := "15"
resp := &schemas.BifrostResponse{
VideoGenerationResponse: &schemas.BifrostVideoGenerationResponse{
Seconds: &sec,
},
}
input := extractCostInput(resp)
require.NotNil(t, input.videoSeconds)
assert.Equal(t, 15, *input.videoSeconds)
}
func TestExtractCostInput_VideoResponseInvalidSeconds(t *testing.T) {
sec := "not-a-number"
resp := &schemas.BifrostResponse{
VideoGenerationResponse: &schemas.BifrostVideoGenerationResponse{
Seconds: &sec,
},
}
input := extractCostInput(resp)
assert.Nil(t, input.videoSeconds)
}
// =========================================================================
// 10. Semantic cache billing (calculateCostWithCache)
// =========================================================================
func TestCalculateCost_SemanticCacheDirectHit(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o", Provider: "openai", Mode: "chat",
InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015),
},
})
hitType := "direct"
resp := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
Usage: &schemas.BifrostLLMUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150},
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ChatCompletionRequest,
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o",
CacheDebug: &schemas.BifrostCacheDebug{
CacheHit: true,
HitType: &hitType,
},
},
},
}
cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.0, cost)
}
func TestCalculateCost_SemanticCacheSemanticHit(t *testing.T) {
embProvider := "openai"
embModel := "text-embedding-3-small"
embTokens := 500
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o", Provider: "openai", Mode: "chat",
InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015),
},
makeKey("text-embedding-3-small", "openai", "embedding"): {
Model: "text-embedding-3-small", Provider: "openai", Mode: "embedding",
InputCostPerToken: bifrost.Ptr(0.00000002),
},
})
hitType := "semantic"
resp := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
Usage: &schemas.BifrostLLMUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150},
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ChatCompletionRequest,
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o",
CacheDebug: &schemas.BifrostCacheDebug{
CacheHit: true,
HitType: &hitType,
ProviderUsed: &embProvider,
ModelUsed: &embModel,
InputTokens: &embTokens,
},
},
},
}
cost := mc.CalculateCost(resp, nil)
// Only embedding cost: 500 * 0.00000002 = 0.00001
assert.InDelta(t, 0.00001, cost, 1e-12)
}
func TestCalculateCost_SemanticCacheMiss(t *testing.T) {
embProvider := "openai"
embModel := "text-embedding-3-small"
embTokens := 500
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o", Provider: "openai", Mode: "chat",
InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015),
},
makeKey("text-embedding-3-small", "openai", "embedding"): {
Model: "text-embedding-3-small", Provider: "openai", Mode: "embedding",
InputCostPerToken: bifrost.Ptr(0.00000002),
},
})
resp := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
Usage: &schemas.BifrostLLMUsage{PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500},
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ChatCompletionRequest,
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o",
CacheDebug: &schemas.BifrostCacheDebug{
CacheHit: false,
ProviderUsed: &embProvider,
ModelUsed: &embModel,
InputTokens: &embTokens,
},
},
},
}
cost := mc.CalculateCost(resp, nil)
// Base cost: 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
// Embedding cost: 500 * 0.00000002 = 0.00001
// Total: 0.01251
assert.InDelta(t, 0.01251, cost, 1e-12)
}
func TestCalculateCost_SemanticCacheHitNoEmbeddingInfo(t *testing.T) {
mc := testCatalogWithPricing(nil)
resp := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ExtraFields: schemas.BifrostResponseExtraFields{
CacheDebug: &schemas.BifrostCacheDebug{
CacheHit: true,
// No ProviderUsed, ModelUsed, InputTokens
},
},
},
}
cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.0, cost)
}
// =========================================================================
// 11. CalculateCost integration — end-to-end
// =========================================================================
func TestCalculateCost_NilResponse(t *testing.T) {
mc := testCatalogWithPricing(nil)
assert.Equal(t, 0.0, mc.CalculateCost(nil, nil))
}
func TestCalculateCost_ProviderComputedCostPassthrough(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
resp := makeChatResponse(schemas.OpenAI, "gpt-4o", &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
Cost: &schemas.BifrostCost{
TotalCost: 0.99, // Provider already calculated
},
})
cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.99, cost)
}
func TestCalculateCost_NoUsageData(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
resp := makeChatResponse(schemas.OpenAI, "gpt-4o", nil)
cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.0, cost)
}
func TestCalculateCost_ChatCompletion_GPT4o(t *testing.T) {
// GPT-4o: $5/M input, $15/M output, cache_read=$0.5/M
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o", Provider: "openai", Mode: "chat",
InputCostPerToken: bifrost.Ptr(0.000005),
OutputCostPerToken: bifrost.Ptr(0.000015),
CacheReadInputTokenCost: bifrost.Ptr(0.0000005),
},
})
resp := makeChatResponse(schemas.OpenAI, "gpt-4o", &schemas.BifrostLLMUsage{
PromptTokens: 10000,
CompletionTokens: 2000,
TotalTokens: 12000,
})
cost := mc.CalculateCost(resp, nil)
// 10000*0.000005 + 2000*0.000015 = 0.05 + 0.03 = 0.08
assert.InDelta(t, 0.08, cost, 1e-12)
}
func TestCalculateCost_ChatCompletion_Claude35Sonnet_WithCache(t *testing.T) {
// Claude 3.5 Sonnet (Bedrock): $3/M input, $15/M output, cache_read=$0.3/M, cache_creation=$3.75/M
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", "chat"): {
Model: "anthropic.claude-3-5-sonnet-20241022-v2:0", Provider: "bedrock", Mode: "chat",
InputCostPerToken: bifrost.Ptr(0.000003),
OutputCostPerToken: bifrost.Ptr(0.000015),
CacheReadInputTokenCost: bifrost.Ptr(0.0000003),
CacheCreationInputTokenCost: bifrost.Ptr(0.00000375),
InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003),
},
})
resp := makeChatResponse(schemas.Bedrock, "anthropic.claude-3-5-sonnet-20241022-v2:0", &schemas.BifrostLLMUsage{
PromptTokens: 5000,
CompletionTokens: 1000,
TotalTokens: 6000,
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
CachedReadTokens: 3000, // 3000 cache read tokens
CachedWriteTokens: 500, // 500 cache creation tokens
},
})
cost := mc.CalculateCost(resp, nil)
// Both cached read and write tokens are input-side deductions from promptTokens.
// Input: (5000-3000-500)*0.000003 + 3000*0.0000003 + 500*0.00000375 = 0.0045 + 0.0009 + 0.001875 = 0.007275
// Output: 1000*0.000015 = 0.015
// Total: 0.007275 + 0.015 = 0.022275
assert.InDelta(t, 0.022275, cost, 1e-12)
}
func TestCalculateCost_Embedding(t *testing.T) {
// Titan Embed Text v1: $0.1/M input
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("amazon.titan-embed-text-v1", "bedrock", "embedding"): {
Model: "amazon.titan-embed-text-v1", Provider: "bedrock", Mode: "embedding",
InputCostPerToken: bifrost.Ptr(0.0000001),
OutputCostPerToken: bifrost.Ptr(0.0),
},
})
resp := makeEmbeddingResponse(schemas.Bedrock, "amazon.titan-embed-text-v1", &schemas.BifrostLLMUsage{
PromptTokens: 10000,
TotalTokens: 10000,
})
cost := mc.CalculateCost(resp, nil)
// 10000 * 0.0000001 = 0.001
assert.InDelta(t, 0.001, cost, 1e-12)
}
func TestCalculateCost_Rerank(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("amazon.rerank-v1:0", "bedrock", "rerank"): {
Model: "amazon.rerank-v1:0", Provider: "bedrock", Mode: "rerank",
InputCostPerToken: bifrost.Ptr(0.0),
OutputCostPerToken: bifrost.Ptr(0.0),
},
})
resp := makeRerankResponse(schemas.Bedrock, "amazon.rerank-v1:0", &schemas.BifrostLLMUsage{
PromptTokens: 500,
TotalTokens: 500,
})
cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.0, cost)
}
func TestCalculateCost_ImageGeneration(t *testing.T) {
// dall-e-3 via aiml: output_cost_per_image=$0.052
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("dall-e-3", "aiml", "image_generation"): {
Model: "dall-e-3", Provider: "aiml", Mode: "image_generation",
OutputCostPerImage: bifrost.Ptr(0.052),
},
})
resp := makeImageResponse("aiml", "dall-e-3", &schemas.ImageUsage{
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 3},
})
cost := mc.CalculateCost(resp, nil)
// 3 * 0.052 = 0.156
assert.InDelta(t, 0.156, cost, 1e-12)
}
func TestCalculateCost_StreamRequestTypeNormalized(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
// Stream request type should be normalized to base type
resp := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
Usage: &schemas.BifrostLLMUsage{PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500},
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ChatCompletionStreamRequest,
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o",
},
},
}
cost := mc.CalculateCost(resp, nil)
assert.InDelta(t, 0.0125, cost, 1e-12)
}
func TestCalculateCost_NoPricingData(t *testing.T) {
mc := testCatalogWithPricing(nil)
resp := makeChatResponse(schemas.OpenAI, "unknown-model", &schemas.BifrostLLMUsage{
PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500,
})
cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.0, cost)
}
// =========================================================================
// 12. Pricing resolution — getPricing fallback logic
// =========================================================================
func TestGetPricing_DirectLookup(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
}
func TestGetPricing_GeminiFallsBackToVertex(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gemini-2.0-flash", "vertex", "chat"): {
Model: "gemini-2.0-flash", Provider: "vertex", Mode: "chat",
InputCostPerToken: bifrost.Ptr(0.0000001), OutputCostPerToken: bifrost.Ptr(0.0000004),
},
})
p := mc.resolvePricing("gemini", "gemini-2.0-flash", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "gemini"})
assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken))
}
func TestGetPricing_VertexStripsProviderPrefix(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gemini-2.0-flash", "vertex", "chat"): chatPricing(0.0000001, 0.0000004),
})
p := mc.resolvePricing("vertex", "google/gemini-2.0-flash", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "vertex"})
assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken))
}
func TestGetPricing_BedrockAddsAnthropicPrefix(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", "chat"): chatPricing(0.000003, 0.000015),
})
p := mc.resolvePricing("bedrock", "claude-3-5-sonnet-20241022-v2:0", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "bedrock"})
assert.Equal(t, 0.000003, derefF(p.InputCostPerToken))
}
func TestGetPricing_ResponsesFallsBackToChat(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "openai"})
assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
}
func TestGetPricing_ResponsesStreamFallsBackToChat(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesStreamRequest, PricingLookupScopes{Provider: "openai"})
assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
}
func TestGetPricing_RealtimeFallsBackToChat(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
p := mc.resolvePricing("openai", "gpt-4o", "", schemas.RealtimeRequest, PricingLookupScopes{Provider: "openai"})
assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
}
func TestGetPricing_GeminiResponsesFallsBackToVertexChat(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gemini-2.0-flash", "vertex", "chat"): chatPricing(0.0000001, 0.0000004),
})
// gemini provider + responses request → try vertex + responses → try vertex + chat
p := mc.resolvePricing("gemini", "gemini-2.0-flash", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "gemini"})
assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken))
}
func TestGetPricing_NotFound(t *testing.T) {
mc := testCatalogWithPricing(nil)
p := mc.resolvePricing("openai", "nonexistent", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
assert.Nil(t, p)
}
// =========================================================================
// 13. resolvePricing — deployment fallback
// =========================================================================
func TestResolvePricing_DeploymentFallback(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("my-deployment", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
// Model not found directly, but deployment matches
p := mc.resolvePricing("openai", "gpt-4o-custom", "my-deployment", schemas.ChatCompletionRequest, PricingLookupScopes{})
require.NotNil(t, p)
assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
}
func TestResolvePricing_ResolvedModelHasPriority(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
makeKey("my-deployment", "openai", "chat"): chatPricing(0.000001, 0.000002),
})
// Resolved model ("my-deployment") is looked up first and has priority
// over the originally requested model ("gpt-4o").
p := mc.resolvePricing("openai", "gpt-4o", "my-deployment", schemas.ChatCompletionRequest, PricingLookupScopes{})
require.NotNil(t, p)
assert.Equal(t, 0.000001, derefF(p.InputCostPerToken))
}
func TestResolvePricing_NothingFound(t *testing.T) {
mc := testCatalogWithPricing(nil)
p := mc.resolvePricing("openai", "unknown", "", schemas.ChatCompletionRequest, PricingLookupScopes{})
assert.Nil(t, p)
}
// =========================================================================
// 14. normalizeStreamRequestType
// =========================================================================
func TestNormalizeStreamRequestType(t *testing.T) {
tests := []struct {
input schemas.RequestType
expected schemas.RequestType
}{
{schemas.ChatCompletionStreamRequest, schemas.ChatCompletionRequest},
{schemas.TextCompletionStreamRequest, schemas.TextCompletionRequest},
{schemas.ResponsesStreamRequest, schemas.ResponsesRequest},
{schemas.SpeechStreamRequest, schemas.SpeechRequest},
{schemas.TranscriptionStreamRequest, schemas.TranscriptionRequest},
{schemas.ImageGenerationStreamRequest, schemas.ImageGenerationRequest},
{schemas.ImageEditStreamRequest, schemas.ImageEditRequest},
{schemas.RealtimeRequest, schemas.RealtimeRequest}, // realtime is its own base type
{schemas.ChatCompletionRequest, schemas.ChatCompletionRequest}, // non-stream unchanged
{schemas.EmbeddingRequest, schemas.EmbeddingRequest}, // non-stream unchanged
}
for _, tt := range tests {
assert.Equal(t, tt.expected, normalizeStreamRequestType(tt.input), "for input %s", tt.input)
}
}
// =========================================================================
// 15. responsesUsageToBifrostUsage
// =========================================================================
func TestResponsesUsageToBifrostUsage_Basic(t *testing.T) {
u := &schemas.ResponsesResponseUsage{
InputTokens: 100,
OutputTokens: 50,
TotalTokens: 150,
}
result := responsesUsageToBifrostUsage(u)
assert.Equal(t, 100, result.PromptTokens)
assert.Equal(t, 50, result.CompletionTokens)
assert.Equal(t, 150, result.TotalTokens)
assert.Nil(t, result.PromptTokensDetails)
assert.Nil(t, result.CompletionTokensDetails)
}
func TestResponsesUsageToBifrostUsage_WithTokenDetails(t *testing.T) {
numQueries := 2
u := &schemas.ResponsesResponseUsage{
InputTokens: 1000,
OutputTokens: 500,
TotalTokens: 1500,
InputTokensDetails: &schemas.ResponsesResponseInputTokens{
CachedReadTokens: 300,
CachedWriteTokens: 50,
TextTokens: 600,
AudioTokens: 50,
ImageTokens: 50,
},
OutputTokensDetails: &schemas.ResponsesResponseOutputTokens{
ReasoningTokens: 100,
NumSearchQueries: &numQueries,
},
}
result := responsesUsageToBifrostUsage(u)
require.NotNil(t, result.PromptTokensDetails)
assert.Equal(t, 300, result.PromptTokensDetails.CachedReadTokens)
assert.Equal(t, 50, result.PromptTokensDetails.CachedWriteTokens)
assert.Equal(t, 600, result.PromptTokensDetails.TextTokens)
assert.Equal(t, 50, result.PromptTokensDetails.AudioTokens)
assert.Equal(t, 50, result.PromptTokensDetails.ImageTokens)
require.NotNil(t, result.CompletionTokensDetails)
assert.Equal(t, 100, result.CompletionTokensDetails.ReasoningTokens)
require.NotNil(t, result.CompletionTokensDetails.NumSearchQueries)
assert.Equal(t, 2, *result.CompletionTokensDetails.NumSearchQueries)
}
// =========================================================================
// 16. Edge cases
// =========================================================================
func TestCalculateCost_200kTier_EndToEnd(t *testing.T) {
// Claude 3.5 Sonnet Bedrock with 200k tier pricing
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("anthropic.claude-3-5-sonnet-20240620-v1:0", "bedrock", "chat"): {
Model: "anthropic.claude-3-5-sonnet-20240620-v1:0", Provider: "bedrock", Mode: "chat",
InputCostPerToken: bifrost.Ptr(0.000003),
OutputCostPerToken: bifrost.Ptr(0.000015),
InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003),
CacheReadInputTokenCost: bifrost.Ptr(0.0000003),
CacheCreationInputTokenCost: bifrost.Ptr(0.00000375),
CacheReadInputTokenCostAbove200kTokens: bifrost.Ptr(0.0000006),
CacheCreationInputTokenCostAbove200kTokens: bifrost.Ptr(0.0000075),
},
})
resp := makeChatResponse(schemas.Bedrock, "anthropic.claude-3-5-sonnet-20240620-v1:0", &schemas.BifrostLLMUsage{
PromptTokens: 190000,
CompletionTokens: 20000,
TotalTokens: 210000, // Above 200k
})
cost := mc.CalculateCost(resp, nil)
// Tiered rate: input=0.000006, output=0.00003
// 190000*0.000006 + 20000*0.00003 = 1.14 + 0.6 = 1.74
assert.InDelta(t, 1.74, cost, 1e-9)
}
func TestCalculateCost_272kTier_EndToEnd(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("claude-3-7-sonnet", "anthropic", "chat"): {
Model: "claude-3-7-sonnet",
Provider: "anthropic",
Mode: "chat",
InputCostPerToken: new(0.000003),
OutputCostPerToken: new(0.000015),
InputCostPerTokenAbove200kTokens: new(0.000006),
OutputCostPerTokenAbove200kTokens: new(0.00003),
InputCostPerTokenAbove272kTokens: new(0.000009),
OutputCostPerTokenAbove272kTokens: new(0.000045),
CacheReadInputTokenCost: new(0.0000003),
CacheReadInputTokenCostAbove200kTokens: new(0.0000006),
CacheReadInputTokenCostAbove272kTokens: new(0.0000009),
},
})
resp := makeChatResponse(schemas.Anthropic, "claude-3-7-sonnet", &schemas.BifrostLLMUsage{
PromptTokens: 250000,
CompletionTokens: 30000,
TotalTokens: 280000, // Above 272k
})
cost := mc.CalculateCost(resp, nil)
// Tiered rate: input=0.000009, output=0.000045
// 250000*0.000009 + 30000*0.000045 = 2.25 + 1.35 = 3.60
assert.InDelta(t, 3.60, cost, 1e-9)
}
func TestCalculateCost_272kTier_CacheReadFallbackChain(t *testing.T) {
// Verifies the 272k cache read rate takes precedence over 200k and base rates
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("claude-3-7-sonnet", "anthropic", "chat"): {
Model: "claude-3-7-sonnet",
Provider: "anthropic",
Mode: "chat",
InputCostPerToken: new(0.000003),
OutputCostPerToken: new(0.000015),
InputCostPerTokenAbove272kTokens: new(0.000009),
OutputCostPerTokenAbove272kTokens: new(0.000045),
CacheReadInputTokenCost: new(0.0000003),
CacheReadInputTokenCostAbove200kTokens: new(0.0000006),
CacheReadInputTokenCostAbove272kTokens: new(0.0000009),
},
})
resp := makeChatResponse(schemas.Anthropic, "claude-3-7-sonnet", &schemas.BifrostLLMUsage{
PromptTokens: 250000,
CompletionTokens: 30000,
TotalTokens: 280000,
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
CachedReadTokens: 50000,
},
})
cost := mc.CalculateCost(resp, nil)
// Non-cached input: (250000-50000) * 0.000009 = 200000 * 0.000009 = 1.80
// Cached read (272k rate): 50000 * 0.0000009 = 0.045
// Output: 30000 * 0.000045 = 1.35
// Total: 1.80 + 0.045 + 1.35 = 3.195
assert.InDelta(t, 3.195, cost, 1e-9)
}
// =========================================================================
// Priority tier tests
// =========================================================================
func TestComputeTextCost_PriorityUsesInputOutputPriorityRate(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenPriority = new(0.000006)
p.OutputCostPerTokenPriority = new(0.00003)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
}
cost := computeTextCost(&p, usage, serviceTier{isPriority: true})
// Uses priority rates: 1000*0.000006 + 500*0.00003 = 0.006 + 0.015 = 0.021
assert.InDelta(t, 0.021, cost, 1e-12)
}
func TestComputeTextCost_NonPriorityIgnoresPriorityRate(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenPriority = new(0.000006)
p.OutputCostPerTokenPriority = new(0.00003)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
}
cost := computeTextCost(&p, usage, serviceTier{})
// Uses base rates, ignores priority fields: 1000*0.000003 + 500*0.000015 = 0.003 + 0.0075 = 0.0105
assert.InDelta(t, 0.0105, cost, 1e-12)
}
func TestComputeTextCost_Priority272kTier(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenPriority = new(0.000006)
p.OutputCostPerTokenPriority = new(0.00003)
p.InputCostPerTokenAbove272kTokens = new(0.000009)
p.InputCostPerTokenAbove272kTokensPriority = new(0.000012)
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
p.OutputCostPerTokenAbove272kTokensPriority = new(0.00006)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 250000,
CompletionTokens: 30000,
TotalTokens: 280000,
}
cost := computeTextCost(&p, usage, serviceTier{isPriority: true})
// Uses 272k priority rates: 250000*0.000012 + 30000*0.00006 = 3.00 + 1.80 = 4.80
assert.InDelta(t, 4.80, cost, 1e-9)
}
func TestComputeTextCost_Priority272kTierFallsBackToNonPriority272k(t *testing.T) {
// Priority flag set but no priority-specific 272k rate — fall back to non-priority 272k
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenAbove272kTokens = new(0.000009)
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 250000,
CompletionTokens: 30000,
TotalTokens: 280000,
}
cost := computeTextCost(&p, usage, serviceTier{isPriority: true})
// Falls back to non-priority 272k rate: 250000*0.000009 + 30000*0.000045 = 2.25 + 1.35 = 3.60
assert.InDelta(t, 3.60, cost, 1e-9)
}
func TestComputeTextCost_PriorityCacheReadRate(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenPriority = new(0.000006)
p.OutputCostPerTokenPriority = new(0.00003)
p.CacheReadInputTokenCost = new(0.0000003)
p.CacheReadInputTokenCostPriority = new(0.0000006)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
CachedReadTokens: 400,
},
}
cost := computeTextCost(&p, usage, serviceTier{isPriority: true})
// Non-cached input: (1000-400)*0.000006 = 600*0.000006 = 0.0036
// Cached read (priority rate): 400*0.0000006 = 0.00024
// Output: 500*0.00003 = 0.015
// Total: 0.0036 + 0.00024 + 0.015 = 0.01884
assert.InDelta(t, 0.01884, cost, 1e-12)
}
func TestCalculateCost_PriorityTier_EndToEnd(t *testing.T) {
tierStr := "priority"
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o",
Provider: "openai",
Mode: "chat",
InputCostPerToken: new(0.000005),
OutputCostPerToken: new(0.000015),
InputCostPerTokenPriority: new(0.000010),
OutputCostPerTokenPriority: new(0.000030),
},
})
resp := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ServiceTier: &tierStr,
Usage: &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
},
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ChatCompletionRequest,
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o",
ResolvedModelUsed: "gpt-4o",
},
},
}
cost := mc.CalculateCost(resp, nil)
// Priority rates: 1000*0.000010 + 500*0.000030 = 0.010 + 0.015 = 0.025
assert.InDelta(t, 0.025, cost, 1e-12)
}
func TestCalculateCost_NonPriorityServiceTier_UsesBaseRate(t *testing.T) {
tierStr := "auto"
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o",
Provider: "openai",
Mode: "chat",
InputCostPerToken: new(0.000005),
OutputCostPerToken: new(0.000015),
InputCostPerTokenPriority: new(0.000010),
OutputCostPerTokenPriority: new(0.000030),
},
})
resp := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ServiceTier: &tierStr,
Usage: &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
},
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ChatCompletionRequest,
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o",
ResolvedModelUsed: "gpt-4o",
},
},
}
cost := mc.CalculateCost(resp, nil)
// Base rates (not priority): 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
assert.InDelta(t, 0.0125, cost, 1e-12)
}
func TestTieredCacheReadRate_FallbackOrder(t *testing.T) {
// 272k rate takes precedence over 200k, 200k over base, base over input rate
t.Run("uses_272k_when_above_272k", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCost = new(0.0000003)
p.CacheReadInputTokenCostAbove200kTokens = new(0.0000006)
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
assert.Equal(t, 0.0000009, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{}))
})
t.Run("uses_200k_when_between_200k_and_272k", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCost = new(0.0000003)
p.CacheReadInputTokenCostAbove200kTokens = new(0.0000006)
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
assert.Equal(t, 0.0000006, tieredCacheReadInputTokenRate(&p, 230000, serviceTier{}))
})
t.Run("uses_base_cache_rate_when_below_200k", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCost = new(0.0000003)
p.CacheReadInputTokenCostAbove200kTokens = new(0.0000006)
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
assert.Equal(t, 0.0000003, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{}))
})
t.Run("falls_back_to_input_rate_when_no_cache_rate_set", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
// No cache rates set at all
assert.Equal(t, 0.000003, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{}))
})
t.Run("priority_uses_272k_priority_rate", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCost = new(0.0000003)
p.CacheReadInputTokenCostPriority = new(0.0000006)
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
p.CacheReadInputTokenCostAbove272kTokensPriority = new(0.0000012)
assert.Equal(t, 0.0000012, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isPriority: true}))
})
t.Run("priority_falls_back_to_272k_non_priority_when_priority_rate_missing", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
assert.Equal(t, 0.0000009, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isPriority: true}))
})
t.Run("priority_uses_priority_base_cache_rate_below_tiers", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCost = new(0.0000003)
p.CacheReadInputTokenCostPriority = new(0.0000006)
assert.Equal(t, 0.0000006, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{isPriority: true}))
})
t.Run("flex_uses_flex_cache_rate", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCost = new(0.0000003)
p.CacheReadInputTokenCostFlex = new(0.0000005)
assert.Equal(t, 0.0000005, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{isFlex: true}))
})
t.Run("flex_uses_flex_cache_rate_regardless_of_token_count", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCost = new(0.0000003)
p.CacheReadInputTokenCostFlex = new(0.0000005)
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
// Even above 272k, flex flat rate takes precedence
assert.Equal(t, 0.0000005, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isFlex: true}))
})
t.Run("flex_falls_back_to_base_cache_rate_when_no_flex_cache_rate", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCost = new(0.0000003)
// No flex cache rate — falls back to base cache rate
assert.Equal(t, 0.0000003, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{isFlex: true}))
})
t.Run("flex_wins_over_272k_priority_and_priority_base_when_all_present", func(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.CacheReadInputTokenCostAbove272kTokens = new(5e-7)
p.CacheReadInputTokenCostFlex = new(1.3e-7)
p.CacheReadInputTokenCostPriority = new(5e-7)
p.CacheReadInputTokenCostAbove272kTokensPriority = new(0.000001)
// token count exceeds 272k — but flex flat rate should still win
assert.Equal(t, 1.3e-7, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isFlex: true}))
})
}
// =========================================================================
// tierFromString tests
// =========================================================================
func TestTierFromString_Priority(t *testing.T) {
s := "priority"
tier := tierFromString(&s)
assert.True(t, tier.isPriority)
assert.False(t, tier.isFlex)
}
func TestTierFromString_Flex(t *testing.T) {
s := "flex"
tier := tierFromString(&s)
assert.False(t, tier.isPriority)
assert.True(t, tier.isFlex)
}
func TestTierFromString_Default(t *testing.T) {
for _, s := range []string{"auto", "default", "", "unknown"} {
tier := tierFromString(&s)
assert.False(t, tier.isPriority, "expected no priority for %q", s)
assert.False(t, tier.isFlex, "expected no flex for %q", s)
}
}
func TestTierFromString_Nil(t *testing.T) {
tier := tierFromString(nil)
assert.False(t, tier.isPriority)
assert.False(t, tier.isFlex)
}
// =========================================================================
// Flex tier tests
// =========================================================================
func TestComputeTextCost_FlexUsesFlexRate(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenFlex = new(0.0000015)
p.OutputCostPerTokenFlex = new(0.0000075)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
}
cost := computeTextCost(&p, usage, serviceTier{isFlex: true})
// Flex rates: 1000*0.0000015 + 500*0.0000075 = 0.0015 + 0.00375 = 0.00525
assert.InDelta(t, 0.00525, cost, 1e-12)
}
func TestComputeTextCost_NonFlexIgnoresFlexRate(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenFlex = new(0.0000015)
p.OutputCostPerTokenFlex = new(0.0000075)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
}
cost := computeTextCost(&p, usage, serviceTier{})
// Base rates, flex fields ignored: 1000*0.000003 + 500*0.000015 = 0.003 + 0.0075 = 0.0105
assert.InDelta(t, 0.0105, cost, 1e-12)
}
func TestComputeTextCost_FlexIgnoresTokenTiers(t *testing.T) {
// Flex is a flat rate — token-count tiers (272k, 200k, 128k) do not apply.
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenFlex = new(0.0000015)
p.OutputCostPerTokenFlex = new(0.0000075)
p.InputCostPerTokenAbove272kTokens = new(0.000009)
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 250000,
CompletionTokens: 30000,
TotalTokens: 280000,
}
cost := computeTextCost(&p, usage, serviceTier{isFlex: true})
// Flex flat rate overrides 272k tier: 250000*0.0000015 + 30000*0.0000075 = 0.375 + 0.225 = 0.60
assert.InDelta(t, 0.60, cost, 1e-9)
}
func TestComputeTextCost_FlexCacheReadRate(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
p.InputCostPerTokenFlex = new(0.0000015)
p.OutputCostPerTokenFlex = new(0.0000075)
p.CacheReadInputTokenCost = new(0.0000003)
p.CacheReadInputTokenCostFlex = new(0.0000006)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
CachedReadTokens: 400,
},
}
cost := computeTextCost(&p, usage, serviceTier{isFlex: true})
// Non-cached input: (1000-400)*0.0000015 = 600*0.0000015 = 0.0009
// Cached read (flex rate): 400*0.0000006 = 0.00024
// Output: 500*0.0000075 = 0.00375
// Total: 0.0009 + 0.00024 + 0.00375 = 0.00489
assert.InDelta(t, 0.00489, cost, 1e-12)
}
func TestComputeTextCost_FlexFallsBackToBaseWhenNoFlexRate(t *testing.T) {
// isFlex set but no flex fields configured — falls back to base rates.
p := chatPricing(0.000003, 0.000015)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
}
cost := computeTextCost(&p, usage, serviceTier{isFlex: true})
// Base rates used as fallback: 1000*0.000003 + 500*0.000015 = 0.003 + 0.0075 = 0.0105
assert.InDelta(t, 0.0105, cost, 1e-12)
}
func TestCalculateCost_FlexTier_EndToEnd(t *testing.T) {
tierStr := "flex"
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o",
Provider: "openai",
Mode: "chat",
InputCostPerToken: new(0.000005),
OutputCostPerToken: new(0.000015),
InputCostPerTokenFlex: new(0.0000025),
OutputCostPerTokenFlex: new(0.0000075),
},
})
resp := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ServiceTier: &tierStr,
Usage: &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
},
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ChatCompletionRequest,
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o",
ResolvedModelUsed: "gpt-4o",
},
},
}
cost := mc.CalculateCost(resp, nil)
// Flex rates: 1000*0.0000025 + 500*0.0000075 = 0.0025 + 0.00375 = 0.00625
assert.InDelta(t, 0.00625, cost, 1e-12)
}
func TestCalculateCost_FlexTier_FallsBackToBaseWhenNoFlexRate(t *testing.T) {
tierStr := "flex"
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
resp := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ServiceTier: &tierStr,
Usage: &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
},
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ChatCompletionRequest,
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o",
ResolvedModelUsed: "gpt-4o",
},
},
}
cost := mc.CalculateCost(resp, nil)
// No flex rates configured — falls back to base: 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
assert.InDelta(t, 0.0125, cost, 1e-12)
}
func TestCalculateCost_ProviderCostZeroTotalStillCalculates(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
// Provider cost present but TotalCost is 0 → our calculation runs
resp := makeChatResponse(schemas.OpenAI, "gpt-4o", &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 500,
TotalTokens: 1500,
Cost: &schemas.BifrostCost{
TotalCost: 0,
},
})
cost := mc.CalculateCost(resp, nil)
assert.InDelta(t, 0.0125, cost, 1e-12)
}
func TestCalculateCost_AllCachedTokens(t *testing.T) {
// All prompt tokens are from cache
p := chatPricing(0.000005, 0.000015)
p.CacheReadInputTokenCost = bifrost.Ptr(0.0000005)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
CompletionTokens: 0,
TotalTokens: 1000,
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
CachedReadTokens: 1000, // All cached
},
}
cost := computeTextCost(&p, usage, serviceTier{})
// Non-cached: 0, cached: 1000*0.0000005 = 0.0005
assert.InDelta(t, 0.0005, cost, 1e-12)
}
// =========================================================================
// Nil usage fallbacks — per-unit pricing when no token data is reported
// =========================================================================
func TestCalculateCost_ImageGeneration_NilUsage_PerImagePricing(t *testing.T) {
// Image response exists but Usage is nil — should default to 1 image with per-image pricing
pricing := configstoreTables.TableModelPricing{
Model: "dall-e-3",
Provider: "openai",
Mode: "image_generation",
InputCostPerToken: bifrost.Ptr(0.0),
OutputCostPerImage: bifrost.Ptr(0.04),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("dall-e-3", "openai", "image_generation"): pricing,
})
resp := makeImageResponse("openai", "dall-e-3", nil)
cost := mc.CalculateCost(resp, nil)
// 1 image * $0.04 = $0.04
assert.InDelta(t, 0.04, cost, 1e-12)
}
func TestCalculateCost_ImageGeneration_NilUsage_InputAndOutputPerImage(t *testing.T) {
// Both input and output per-image pricing, but no NumInputImages set
pricing := configstoreTables.TableModelPricing{
Model: "test-image-model",
Provider: "test",
Mode: "image_generation",
InputCostPerImage: bifrost.Ptr(0.01),
OutputCostPerImage: bifrost.Ptr(0.04),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("test-image-model", "test", "image_generation"): pricing,
})
resp := makeImageResponse("test", "test-image-model", nil)
cost := mc.CalculateCost(resp, nil)
// NumInputImages is 0 (not populated from request), so only output pricing applies
// 1 output image * $0.04 = $0.04
assert.InDelta(t, 0.04, cost, 1e-12)
}
func TestCalculateCost_ImageGeneration_WithInputImages(t *testing.T) {
// Input + output per-image pricing with NumInputImages populated from request
pricing := configstoreTables.TableModelPricing{
Model: "gpt-image-1",
Provider: "openai",
Mode: "image_generation",
InputCostPerImage: bifrost.Ptr(0.01),
OutputCostPerImage: bifrost.Ptr(0.04),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-image-1", "openai", "image_generation"): pricing,
})
resp := makeImageResponse("openai", "gpt-image-1", &schemas.ImageUsage{
NumInputImages: 2,
})
cost := mc.CalculateCost(resp, nil)
// 2 input images * $0.01 + 1 output image * $0.04 = $0.06
assert.InDelta(t, 0.06, cost, 1e-12)
}
func TestCalculateCost_ImageGeneration_OutputCountFromData(t *testing.T) {
// Output image count derived from len(Data) via populateOutputImageCount
pricing := configstoreTables.TableModelPricing{
Model: "dall-e-3",
Provider: "openai",
Mode: "image_generation",
OutputCostPerImage: bifrost.Ptr(0.04),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("dall-e-3", "openai", "image_generation"): pricing,
})
resp := &schemas.BifrostResponse{
ImageGenerationResponse: &schemas.BifrostImageGenerationResponse{
Data: []schemas.ImageData{
{URL: "https://example.com/img1.png", Index: 0},
{URL: "https://example.com/img2.png", Index: 1},
{URL: "https://example.com/img3.png", Index: 2},
},
ExtraFields: schemas.BifrostResponseExtraFields{
RequestType: schemas.ImageGenerationRequest,
Provider: "openai",
OriginalModelRequested: "dall-e-3",
},
},
}
cost := mc.CalculateCost(resp, nil)
// 3 output images * $0.04 = $0.12
assert.InDelta(t, 0.12, cost, 1e-12)
}
func TestCalculateCost_ImageGeneration_NilUsage_NoPerImagePricing(t *testing.T) {
// No per-image pricing and no tokens — should return 0
pricing := configstoreTables.TableModelPricing{
Model: "token-only-model",
Provider: "test",
Mode: "image_generation",
InputCostPerToken: bifrost.Ptr(0.000001),
OutputCostPerToken: bifrost.Ptr(0.000002),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("token-only-model", "test", "image_generation"): pricing,
})
resp := makeImageResponse("test", "token-only-model", nil)
cost := mc.CalculateCost(resp, nil)
// No per-image pricing and all tokens are zero → 0
assert.InDelta(t, 0.0, cost, 1e-12)
}
func TestCalculateCost_ImageGeneration_EmptyUsage_PerImagePricing(t *testing.T) {
// Usage exists but all fields are zero — same as nil usage, should use per-image pricing
pricing := configstoreTables.TableModelPricing{
Model: "dall-e-3",
Provider: "openai",
Mode: "image_generation",
OutputCostPerImage: bifrost.Ptr(0.04),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("dall-e-3", "openai", "image_generation"): pricing,
})
resp := makeImageResponse("openai", "dall-e-3", &schemas.ImageUsage{})
cost := mc.CalculateCost(resp, nil)
assert.InDelta(t, 0.04, cost, 1e-12)
}
func TestComputeImageCost_MixedInputTokensOutputPerImage(t *testing.T) {
// Input has tokens (text prompt), output has no tokens but per-image pricing
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000005),
OutputCostPerToken: bifrost.Ptr(0.000015),
OutputCostPerImage: bifrost.Ptr(0.04),
}
usage := &schemas.ImageUsage{
InputTokens: 500,
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2},
}
cost := computeImageCost(&p, usage, "", "", serviceTier{})
// Input: 500 tokens * $0.000005 = $0.0025
// Output: no output tokens → falls back to 2 images * $0.04 = $0.08
assert.InDelta(t, 0.0825, cost, 1e-12)
}
func TestComputeImageCost_MixedInputPerImageOutputTokens(t *testing.T) {
// Input has no tokens but per-image count, output has tokens
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000005),
OutputCostPerToken: bifrost.Ptr(0.000015),
InputCostPerImage: bifrost.Ptr(0.01),
}
usage := &schemas.ImageUsage{
NumInputImages: 3,
OutputTokens: 1000,
}
cost := computeImageCost(&p, usage, "", "", serviceTier{})
// Input: no input tokens → falls back to 3 images * $0.01 = $0.03
// Output: 1000 tokens * $0.000015 = $0.015
assert.InDelta(t, 0.045, cost, 1e-12)
}
func TestComputeImageCost_BothHaveTokens_IgnoresPerImage(t *testing.T) {
// Both sides have tokens — per-image pricing is ignored
p := configstoreTables.TableModelPricing{
InputCostPerToken: bifrost.Ptr(0.000005),
OutputCostPerToken: bifrost.Ptr(0.000015),
InputCostPerImage: bifrost.Ptr(0.01),
OutputCostPerImage: bifrost.Ptr(0.04),
}
usage := &schemas.ImageUsage{
InputTokens: 200,
OutputTokens: 800,
TotalTokens: 1000,
NumInputImages: 3,
}
cost := computeImageCost(&p, usage, "", "", serviceTier{})
// Input: 200 * $0.000005 = $0.001 (tokens present, per-image ignored)
// Output: 800 * $0.000015 = $0.012 (tokens present, per-image ignored)
assert.InDelta(t, 0.013, cost, 1e-12)
}