2165 lines
77 KiB
Go
2165 lines
77 KiB
Go
package modelcatalog
|
|
|
|
import (
|
|
"testing"
|
|
|
|
bifrost "github.com/maximhq/bifrost/core"
|
|
"github.com/maximhq/bifrost/core/schemas"
|
|
configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// chatPricing returns a TableModelPricing with the given per-token rates.
|
|
func chatPricing(input, output float64) configstoreTables.TableModelPricing {
|
|
return configstoreTables.TableModelPricing{
|
|
Model: "test-model",
|
|
Provider: "test-provider",
|
|
Mode: "chat",
|
|
InputCostPerToken: bifrost.Ptr(input),
|
|
OutputCostPerToken: bifrost.Ptr(output),
|
|
}
|
|
}
|
|
|
|
// testCatalogWithPricing creates a catalog pre-loaded with the given pricing entries.
|
|
func testCatalogWithPricing(entries map[string]configstoreTables.TableModelPricing) *ModelCatalog {
|
|
mc := newTestCatalog(nil, nil)
|
|
mc.logger = noOpLogger{}
|
|
for k, v := range entries {
|
|
mc.pricingData[k] = v
|
|
}
|
|
return mc
|
|
}
|
|
|
|
// makeChatResponse builds a minimal BifrostResponse for a chat completion.
|
|
func makeChatResponse(provider schemas.ModelProvider, model string, usage *schemas.BifrostLLMUsage) *schemas.BifrostResponse {
|
|
return &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{
|
|
Usage: usage,
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ChatCompletionRequest,
|
|
Provider: provider,
|
|
OriginalModelRequested: model,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// makeEmbeddingResponse builds a minimal BifrostResponse for an embedding request.
|
|
func makeEmbeddingResponse(provider schemas.ModelProvider, model string, usage *schemas.BifrostLLMUsage) *schemas.BifrostResponse {
|
|
return &schemas.BifrostResponse{
|
|
EmbeddingResponse: &schemas.BifrostEmbeddingResponse{
|
|
Usage: usage,
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.EmbeddingRequest,
|
|
Provider: provider,
|
|
OriginalModelRequested: model,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// makeRerankResponse builds a minimal BifrostResponse for a rerank request.
|
|
func makeRerankResponse(provider schemas.ModelProvider, model string, usage *schemas.BifrostLLMUsage) *schemas.BifrostResponse {
|
|
return &schemas.BifrostResponse{
|
|
RerankResponse: &schemas.BifrostRerankResponse{
|
|
Usage: usage,
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.RerankRequest,
|
|
Provider: provider,
|
|
OriginalModelRequested: model,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// makeImageResponse builds a minimal BifrostResponse for an image generation request.
|
|
func makeImageResponse(provider schemas.ModelProvider, model string, usage *schemas.ImageUsage) *schemas.BifrostResponse {
|
|
return &schemas.BifrostResponse{
|
|
ImageGenerationResponse: &schemas.BifrostImageGenerationResponse{
|
|
Usage: usage,
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ImageGenerationRequest,
|
|
Provider: provider,
|
|
OriginalModelRequested: model,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func derefF(f *float64) float64 {
|
|
if f == nil {
|
|
return 0
|
|
}
|
|
return *f
|
|
}
|
|
|
|
// =========================================================================
|
|
// 1. computeTextCost — unit tests (pure function, no catalog)
|
|
// =========================================================================
|
|
|
|
func TestComputeTextCost_BasicInputOutput(t *testing.T) {
|
|
// GPT-4o: $5/M input, $15/M output
|
|
p := chatPricing(0.000005, 0.000015)
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
}
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
// 1000 * 0.000005 + 500 * 0.000015 = 0.005 + 0.0075 = 0.0125
|
|
assert.InDelta(t, 0.0125, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTextCost_NilUsage(t *testing.T) {
|
|
p := chatPricing(0.000005, 0.000015)
|
|
assert.Equal(t, 0.0, computeTextCost(&p, nil, serviceTier{}))
|
|
}
|
|
|
|
func TestComputeTextCost_ZeroTokens(t *testing.T) {
|
|
p := chatPricing(0.000005, 0.000015)
|
|
usage := &schemas.BifrostLLMUsage{}
|
|
assert.Equal(t, 0.0, computeTextCost(&p, usage, serviceTier{}))
|
|
}
|
|
|
|
func TestComputeTextCost_WithCachedPromptTokens(t *testing.T) {
|
|
// Claude 3.5 Sonnet (Bedrock): input=$3/M, output=$15/M, cache_read=$0.3/M, cache_creation=$3.75/M
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCost = bifrost.Ptr(0.0000003)
|
|
p.CacheCreationInputTokenCost = bifrost.Ptr(0.00000375)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 2000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 2500,
|
|
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
|
|
CachedReadTokens: 1500, // 1500 read from cache
|
|
CachedWriteTokens: 200, // 200 cache creation tokens
|
|
},
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
|
|
// Both cached read and write tokens are input-side deductions from promptTokens.
|
|
// Input: (2000-1500-200)*0.000003 + 1500*0.0000003 + 200*0.00000375 = 0.0009 + 0.00045 + 0.00075 = 0.0021
|
|
// Output: 500*0.000015 = 0.0075
|
|
// Total: 0.0021 + 0.0075 = 0.0096
|
|
assert.InDelta(t, 0.0096, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTextCost_Tiered200k(t *testing.T) {
|
|
// Claude 3.5 Sonnet Bedrock 200k tier: input=$6/M, output=$30/M
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenAbove200kTokens = bifrost.Ptr(0.000006)
|
|
p.OutputCostPerTokenAbove200kTokens = bifrost.Ptr(0.00003)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 180000,
|
|
CompletionTokens: 30000,
|
|
TotalTokens: 210000, // Above 200k threshold
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
|
|
// Uses tiered rate since total > 200k
|
|
// 180000 * 0.000006 + 30000 * 0.00003 = 1.08 + 0.90 = 1.98
|
|
assert.InDelta(t, 1.98, cost, 1e-9)
|
|
}
|
|
|
|
func TestComputeTextCost_Below200kUsesBaseRate(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenAbove200kTokens = bifrost.Ptr(0.000006)
|
|
p.OutputCostPerTokenAbove200kTokens = bifrost.Ptr(0.00003)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500, // Below 200k
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
|
|
// Uses base rate since total < 200k
|
|
// 1000 * 0.000003 + 500 * 0.000015 = 0.003 + 0.0075 = 0.0105
|
|
assert.InDelta(t, 0.0105, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTextCost_Tiered272k(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenAbove200kTokens = new(0.000006)
|
|
p.OutputCostPerTokenAbove200kTokens = new(0.00003)
|
|
p.InputCostPerTokenAbove272kTokens = new(0.000009)
|
|
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 250000,
|
|
CompletionTokens: 30000,
|
|
TotalTokens: 280000, // Above 272k threshold
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
|
|
// Uses 272k tiered rate since total > 272k
|
|
// 250000 * 0.000009 + 30000 * 0.000045 = 2.25 + 1.35 = 3.60
|
|
assert.InDelta(t, 3.60, cost, 1e-9)
|
|
}
|
|
|
|
func TestComputeTextCost_Between200kAnd272kUses200kRate(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenAbove200kTokens = new(0.000006)
|
|
p.OutputCostPerTokenAbove200kTokens = new(0.00003)
|
|
p.InputCostPerTokenAbove272kTokens = new(0.000009)
|
|
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 200000,
|
|
CompletionTokens: 30000,
|
|
TotalTokens: 230000, // Between 200k and 272k
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
|
|
// Uses 200k tiered rate since total > 200k but <= 272k
|
|
// 200000 * 0.000006 + 30000 * 0.00003 = 1.20 + 0.90 = 2.10
|
|
assert.InDelta(t, 2.10, cost, 1e-9)
|
|
}
|
|
|
|
func TestComputeTextCost_272kTierWithCacheRead(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenAbove272kTokens = new(0.000009)
|
|
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 250000,
|
|
CompletionTokens: 30000,
|
|
TotalTokens: 280000, // Above 272k
|
|
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
|
|
CachedReadTokens: 50000,
|
|
},
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
|
|
// Non-cached input: (250000-50000) * 0.000009 = 200000 * 0.000009 = 1.80
|
|
// Cached read: 50000 * 0.0000009 = 0.045
|
|
// Output: 30000 * 0.000045 = 1.35
|
|
// Total: 1.80 + 0.045 + 1.35 = 3.195
|
|
assert.InDelta(t, 3.195, cost, 1e-9)
|
|
}
|
|
|
|
func TestComputeTextCost_SearchQueryCost(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.SearchContextCostPerQuery = bifrost.Ptr(0.01) // $0.01 per search query
|
|
|
|
numQueries := 3
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
CompletionTokensDetails: &schemas.ChatCompletionTokensDetails{
|
|
NumSearchQueries: &numQueries,
|
|
},
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
|
|
// 1000*0.000003 + 500*0.000015 + 3*0.01 = 0.003 + 0.0075 + 0.03 = 0.0405
|
|
assert.InDelta(t, 0.0405, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTextCost_NoCacheRateFallsBackToBaseInputRate(t *testing.T) {
|
|
// If cache rate fields are nil, tieredCacheReadInputTokenRate falls back to base InputCostPerToken
|
|
p := chatPricing(0.000005, 0.000015)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
|
|
CachedReadTokens: 400,
|
|
},
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
|
|
// Non-cached prompt: (1000-400)*0.000005 = 600*0.000005 = 0.003
|
|
// Cached prompt: 400 tokens at base input rate (no cache rate set) = 400*0.000005 = 0.002
|
|
// Output: 500*0.000015 = 0.0075
|
|
// Total: 0.003 + 0.002 + 0.0075 = 0.0125
|
|
assert.InDelta(t, 0.0125, cost, 1e-12)
|
|
}
|
|
|
|
// =========================================================================
|
|
// 2. computeEmbeddingCost — unit tests
|
|
// =========================================================================
|
|
|
|
func TestComputeEmbeddingCost_Basic(t *testing.T) {
|
|
// Titan Embed Text v1: $0.1/M input
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.0000001),
|
|
OutputCostPerToken: bifrost.Ptr(0.0),
|
|
}
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 5000,
|
|
TotalTokens: 5000,
|
|
}
|
|
cost := computeEmbeddingCost(&p, usage, serviceTier{})
|
|
// 5000 * 0.0000001 = 0.0005
|
|
assert.InDelta(t, 0.0005, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeEmbeddingCost_NilUsage(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{InputCostPerToken: new(0.0000001)}
|
|
assert.Equal(t, 0.0, computeEmbeddingCost(&p, nil, serviceTier{}))
|
|
}
|
|
|
|
// =========================================================================
|
|
// 3. computeRerankCost — unit tests
|
|
// =========================================================================
|
|
|
|
func TestComputeRerankCost_Basic(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000001),
|
|
OutputCostPerToken: bifrost.Ptr(0.000002),
|
|
}
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 2000,
|
|
CompletionTokens: 100,
|
|
TotalTokens: 2100,
|
|
}
|
|
cost := computeRerankCost(&p, usage, serviceTier{})
|
|
// 2000*0.000001 + 100*0.000002 = 0.002 + 0.0002 = 0.0022
|
|
assert.InDelta(t, 0.0022, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeRerankCost_WithSearchCost(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.0),
|
|
OutputCostPerToken: bifrost.Ptr(0.0),
|
|
SearchContextCostPerQuery: bifrost.Ptr(0.001),
|
|
}
|
|
numQueries := 5
|
|
usage := &schemas.BifrostLLMUsage{
|
|
CompletionTokensDetails: &schemas.ChatCompletionTokensDetails{
|
|
NumSearchQueries: &numQueries,
|
|
},
|
|
}
|
|
cost := computeRerankCost(&p, usage, serviceTier{})
|
|
assert.InDelta(t, 0.005, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeRerankCost_NilUsage(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{InputCostPerToken: new(0.001)}
|
|
assert.Equal(t, 0.0, computeRerankCost(&p, nil, serviceTier{}))
|
|
}
|
|
|
|
// =========================================================================
|
|
// 4. computeSpeechCost — unit tests
|
|
// =========================================================================
|
|
|
|
func TestComputeSpeechCost_TokensPreferredOverDuration(t *testing.T) {
|
|
// TTS: input=text tokens, output=audio tokens (preferred over per-second)
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.0000025),
|
|
OutputCostPerToken: bifrost.Ptr(0.00001),
|
|
OutputCostPerSecond: bifrost.Ptr(0.00025),
|
|
}
|
|
seconds := 60
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 100,
|
|
CompletionTokens: 200,
|
|
TotalTokens: 300,
|
|
}
|
|
cost := computeSpeechCost(&p, usage, &seconds, 0, serviceTier{})
|
|
// Input: 100 text tokens * $0.0000025 = $0.00025
|
|
// Output: 200 audio tokens present → uses token rate $0.00001, NOT per-second
|
|
// 200 * $0.00001 = $0.002
|
|
// Total: $0.00225
|
|
assert.InDelta(t, 0.00225, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeSpeechCost_OutputFallsBackToPerSecond(t *testing.T) {
|
|
// TTS: no output tokens → falls back to per-second output pricing
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000001),
|
|
OutputCostPerToken: bifrost.Ptr(0.000002),
|
|
OutputCostPerSecond: bifrost.Ptr(0.0001),
|
|
}
|
|
seconds := 120
|
|
usage := &schemas.BifrostLLMUsage{PromptTokens: 500}
|
|
cost := computeSpeechCost(&p, usage, &seconds, 0, serviceTier{})
|
|
// Input: 500 * $0.000001 = $0.0005
|
|
// Output: no CompletionTokens → falls back to 120 * $0.0001 = $0.012
|
|
// Total: $0.0125
|
|
assert.InDelta(t, 0.0125, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeSpeechCost_OutputAudioTokenRate(t *testing.T) {
|
|
// TTS: output uses OutputCostPerAudioToken when available
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000001),
|
|
OutputCostPerToken: bifrost.Ptr(0.000002),
|
|
OutputCostPerAudioToken: bifrost.Ptr(0.00005),
|
|
}
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 200,
|
|
CompletionTokens: 100,
|
|
TotalTokens: 300,
|
|
}
|
|
cost := computeSpeechCost(&p, usage, nil, 0, serviceTier{})
|
|
// Input: 200 * $0.000001 = $0.0002
|
|
// Output: 100 * $0.00005 = $0.005 (OutputCostPerAudioToken preferred)
|
|
// Total: $0.0052
|
|
assert.InDelta(t, 0.0052, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeSpeechCost_TokenFallback(t *testing.T) {
|
|
p := chatPricing(0.000005, 0.000015)
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
}
|
|
cost := computeSpeechCost(&p, usage, nil, 0, serviceTier{}) // No audio seconds → token fallback
|
|
// 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
|
|
assert.InDelta(t, 0.0125, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeSpeechCost_NilUsageNilSeconds(t *testing.T) {
|
|
p := chatPricing(0.000005, 0.000015)
|
|
assert.Equal(t, 0.0, computeSpeechCost(&p, nil, nil, 0, serviceTier{}))
|
|
}
|
|
|
|
// =========================================================================
|
|
// 5. computeTranscriptionCost — unit tests
|
|
// =========================================================================
|
|
|
|
func TestComputeTranscriptionCost_DurationBased(t *testing.T) {
|
|
// assemblyai/nano: input_cost_per_second=0.00010278
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.0),
|
|
OutputCostPerToken: bifrost.Ptr(0.0),
|
|
InputCostPerSecond: bifrost.Ptr(0.00010278),
|
|
}
|
|
seconds := 300 // 5 minutes
|
|
cost := computeTranscriptionCost(&p, nil, &seconds, nil, serviceTier{})
|
|
// 300 * 0.00010278 = 0.030834
|
|
assert.InDelta(t, 0.030834, cost, 1e-9)
|
|
}
|
|
|
|
func TestComputeTranscriptionCost_AudioTokenDetails(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000005),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
InputCostPerAudioToken: bifrost.Ptr(0.00001),
|
|
}
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 2000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 2500,
|
|
}
|
|
audioDetails := &schemas.TranscriptionUsageInputTokenDetails{
|
|
AudioTokens: 1500,
|
|
TextTokens: 500,
|
|
}
|
|
cost := computeTranscriptionCost(&p, usage, nil, audioDetails, serviceTier{})
|
|
// Audio: 1500*0.00001 = 0.015
|
|
// Text: 500*0.000005 = 0.0025
|
|
// Output: 500*0.000015 = 0.0075
|
|
// Total: 0.025
|
|
assert.InDelta(t, 0.025, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTranscriptionCost_TokenFallback(t *testing.T) {
|
|
p := chatPricing(0.000005, 0.000015)
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 200,
|
|
TotalTokens: 1200,
|
|
}
|
|
cost := computeTranscriptionCost(&p, usage, nil, nil, serviceTier{})
|
|
// 1000*0.000005 + 200*0.000015 = 0.005 + 0.003 = 0.008
|
|
assert.InDelta(t, 0.008, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTranscriptionCost_TokenDetailsPreferredOverDuration(t *testing.T) {
|
|
// STT: audio token details present → uses tokens, not per-second
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000005),
|
|
OutputCostPerToken: bifrost.Ptr(0.0),
|
|
InputCostPerAudioPerSecond: bifrost.Ptr(0.0001),
|
|
InputCostPerAudioToken: bifrost.Ptr(0.00001),
|
|
}
|
|
seconds := 60
|
|
audioDetails := &schemas.TranscriptionUsageInputTokenDetails{
|
|
AudioTokens: 5000,
|
|
TextTokens: 1000,
|
|
}
|
|
cost := computeTranscriptionCost(&p, nil, &seconds, audioDetails, serviceTier{})
|
|
// Input: audio token details present → tokens preferred over per-second
|
|
// 5000 audio * $0.00001 = $0.05
|
|
// 1000 text * $0.000005 = $0.005
|
|
// Output: nil usage → $0
|
|
// Total: $0.055
|
|
assert.InDelta(t, 0.055, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTranscriptionCost_DurationFallbackWhenNoTokens(t *testing.T) {
|
|
// STT: no audio token details, no prompt tokens → falls back to per-second
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000005),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
InputCostPerAudioPerSecond: bifrost.Ptr(0.0001),
|
|
}
|
|
seconds := 60
|
|
usage := &schemas.BifrostLLMUsage{
|
|
CompletionTokens: 200,
|
|
TotalTokens: 200,
|
|
}
|
|
cost := computeTranscriptionCost(&p, usage, &seconds, nil, serviceTier{})
|
|
// Input: no audio details, PromptTokens=0 → falls back to 60 * $0.0001 = $0.006
|
|
// Output: 200 * $0.000015 = $0.003
|
|
// Total: $0.009
|
|
assert.InDelta(t, 0.009, cost, 1e-12)
|
|
}
|
|
|
|
// =========================================================================
|
|
// 6. computeImageCost — unit tests
|
|
// =========================================================================
|
|
|
|
func TestComputeImageCost_PerImage(t *testing.T) {
|
|
// dall-e-3 (aiml): output_cost_per_image=$0.052
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.0),
|
|
OutputCostPerToken: bifrost.Ptr(0.0),
|
|
OutputCostPerImage: bifrost.Ptr(0.052),
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
OutputTokensDetails: &schemas.ImageTokenDetails{
|
|
NImages: 2,
|
|
},
|
|
}
|
|
cost := computeImageCost(&p, usage, "", "", serviceTier{})
|
|
// 2 * 0.052 = 0.104
|
|
assert.InDelta(t, 0.104, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_PerImageDefaultsToOne(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
OutputCostPerImage: bifrost.Ptr(0.052),
|
|
}
|
|
usage := &schemas.ImageUsage{} // No token details → defaults to 1 image
|
|
cost := computeImageCost(&p, usage, "", "", serviceTier{})
|
|
assert.InDelta(t, 0.052, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_TokenBased(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000005),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
InputTokens: 1000,
|
|
OutputTokens: 500,
|
|
TotalTokens: 1500,
|
|
}
|
|
cost := computeImageCost(&p, usage, "", "", serviceTier{})
|
|
// 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
|
|
assert.InDelta(t, 0.0125, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_TokenBasedWithDetails(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000005),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
InputTokens: 2000,
|
|
OutputTokens: 1000,
|
|
TotalTokens: 3000,
|
|
InputTokensDetails: &schemas.ImageTokenDetails{
|
|
TextTokens: 500,
|
|
ImageTokens: 1500,
|
|
},
|
|
OutputTokensDetails: &schemas.ImageTokenDetails{
|
|
TextTokens: 200,
|
|
ImageTokens: 800,
|
|
},
|
|
}
|
|
cost := computeImageCost(&p, usage, "", "", serviceTier{})
|
|
// Input: (500+1500)*0.000005 = 2000*0.000005 = 0.01
|
|
// Output: (200+800)*0.000015 = 1000*0.000015 = 0.015
|
|
// Total: 0.025
|
|
assert.InDelta(t, 0.025, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_NilUsage(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{OutputCostPerImage: new(0.05)}
|
|
assert.Equal(t, 0.0, computeImageCost(&p, nil, "", "", serviceTier{}))
|
|
}
|
|
|
|
func TestComputeImageCost_InputAndOutputPerImage(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerImage: bifrost.Ptr(0.01),
|
|
OutputCostPerImage: bifrost.Ptr(0.05),
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
NumInputImages: 3,
|
|
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2},
|
|
}
|
|
cost := computeImageCost(&p, usage, "", "", serviceTier{})
|
|
// 3 input * $0.01 + 2 output * $0.05 = $0.03 + $0.10 = $0.13
|
|
assert.InDelta(t, 0.13, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_PerPixelOutput(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
OutputCostPerPixel: bifrost.Ptr(0.000000019), // ~$0.02 for 1024x1024
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1},
|
|
}
|
|
cost := computeImageCost(&p, usage, "1024x1024", "", serviceTier{})
|
|
// 1024*1024 * 1 * 0.000000019 = 1048576 * 0.000000019 ≈ 0.01992
|
|
assert.InDelta(t, 1048576*0.000000019, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_PerPixelInputAndOutput(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerPixel: bifrost.Ptr(0.00000001),
|
|
OutputCostPerPixel: bifrost.Ptr(0.00000002),
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
NumInputImages: 2,
|
|
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 3},
|
|
}
|
|
cost := computeImageCost(&p, usage, "512x512", "", serviceTier{})
|
|
pixels := 512 * 512 // 262144
|
|
// Input: 262144 * 2 * 0.00000001 = 0.00524288
|
|
// Output: 262144 * 3 * 0.00000002 = 0.01572864
|
|
expected := float64(pixels*2)*0.00000001 + float64(pixels*3)*0.00000002
|
|
assert.InDelta(t, expected, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_TokensPreferredOverPixels(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000005),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
InputCostPerPixel: bifrost.Ptr(0.00000001),
|
|
OutputCostPerPixel: bifrost.Ptr(0.00000002),
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
InputTokens: 1000,
|
|
OutputTokens: 500,
|
|
TotalTokens: 1500,
|
|
}
|
|
cost := computeImageCost(&p, usage, "1024x1024", "", serviceTier{})
|
|
// Tokens should win: 1000*0.000005 + 500*0.000015 = 0.0125
|
|
assert.InDelta(t, 0.0125, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_PixelsPreferredOverPerImage(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
OutputCostPerPixel: bifrost.Ptr(0.00000002),
|
|
OutputCostPerImage: bifrost.Ptr(999.0), // should not be used
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1},
|
|
}
|
|
cost := computeImageCost(&p, usage, "256x256", "", serviceTier{})
|
|
// Per-pixel should win: 65536 * 1 * 0.00000002 = 0.00131072
|
|
assert.InDelta(t, 65536*0.00000002, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_PerPixelFallsBackToPerImage_WhenNoSize(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
OutputCostPerPixel: bifrost.Ptr(0.00000002),
|
|
OutputCostPerImage: bifrost.Ptr(0.05),
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2},
|
|
}
|
|
cost := computeImageCost(&p, usage, "", "", serviceTier{})
|
|
// No size → pixels=0, falls through to per-image: 2 * $0.05 = $0.10
|
|
assert.InDelta(t, 0.10, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_QualityBasedRates(t *testing.T) {
|
|
usage := &schemas.ImageUsage{
|
|
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1},
|
|
}
|
|
// Quality-specific rates take precedence over base/size-tier
|
|
p := configstoreTables.TableModelPricing{
|
|
OutputCostPerImage: bifrost.Ptr(0.01),
|
|
OutputCostPerImageLowQuality: bifrost.Ptr(0.02),
|
|
OutputCostPerImageMediumQuality: bifrost.Ptr(0.03),
|
|
OutputCostPerImageHighQuality: bifrost.Ptr(0.04),
|
|
OutputCostPerImageAutoQuality: bifrost.Ptr(0.05),
|
|
}
|
|
assert.InDelta(t, 0.02, computeImageCost(&p, usage, "", "low", serviceTier{}), 1e-12)
|
|
assert.InDelta(t, 0.03, computeImageCost(&p, usage, "", "medium", serviceTier{}), 1e-12)
|
|
assert.InDelta(t, 0.04, computeImageCost(&p, usage, "", "high", serviceTier{}), 1e-12)
|
|
assert.InDelta(t, 0.05, computeImageCost(&p, usage, "", "auto", serviceTier{}), 1e-12)
|
|
// "hd" does not match any quality case so perImageRate stays nil → size/base fallback.
|
|
assert.InDelta(t, 0.01, computeImageCost(&p, usage, "", "hd", serviceTier{}), 1e-12)
|
|
// Empty quality is treated as auto
|
|
assert.InDelta(t, 0.05, computeImageCost(&p, usage, "", "", serviceTier{}), 1e-12)
|
|
}
|
|
|
|
func TestParseImagePixels(t *testing.T) {
|
|
assert.Equal(t, 1048576, parseImagePixels("1024x1024"))
|
|
assert.Equal(t, 262144, parseImagePixels("512x512"))
|
|
assert.Equal(t, 1835008, parseImagePixels("1792x1024"))
|
|
assert.Equal(t, 0, parseImagePixels(""))
|
|
assert.Equal(t, 0, parseImagePixels("invalid"))
|
|
assert.Equal(t, 0, parseImagePixels("1024"))
|
|
assert.Equal(t, 0, parseImagePixels("0x1024"))
|
|
assert.Equal(t, 0, parseImagePixels("-1x1024"))
|
|
}
|
|
|
|
// =========================================================================
|
|
// 7. computeVideoCost — unit tests
|
|
// =========================================================================
|
|
|
|
func TestComputeVideoCost_DurationBased(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000001),
|
|
OutputCostPerToken: bifrost.Ptr(0.0),
|
|
OutputCostPerVideoPerSecond: bifrost.Ptr(0.001),
|
|
}
|
|
seconds := 30
|
|
usage := &schemas.BifrostLLMUsage{PromptTokens: 500, TotalTokens: 500}
|
|
cost := computeVideoCost(&p, usage, &seconds, serviceTier{})
|
|
// Output: 30 * 0.001 = 0.03
|
|
// Input: 500 * 0.000001 = 0.0005
|
|
// Total: 0.0305
|
|
assert.InDelta(t, 0.0305, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeVideoCost_OutputCostPerSecondFallback(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.0),
|
|
OutputCostPerToken: bifrost.Ptr(0.0),
|
|
OutputCostPerSecond: bifrost.Ptr(0.002),
|
|
}
|
|
seconds := 10
|
|
cost := computeVideoCost(&p, nil, &seconds, serviceTier{})
|
|
assert.InDelta(t, 0.02, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeVideoCost_NilSeconds(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000001),
|
|
OutputCostPerVideoPerSecond: bifrost.Ptr(0.001),
|
|
}
|
|
usage := &schemas.BifrostLLMUsage{PromptTokens: 1000}
|
|
cost := computeVideoCost(&p, usage, nil, serviceTier{})
|
|
// Only input tokens: 1000 * 0.000001 = 0.001
|
|
assert.InDelta(t, 0.001, cost, 1e-12)
|
|
}
|
|
|
|
// =========================================================================
|
|
// 8. tieredInputRate / tieredOutputRate
|
|
// =========================================================================
|
|
|
|
func TestTieredInputRate_BelowThreshold(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000003),
|
|
InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
|
|
}
|
|
assert.Equal(t, 0.000003, tieredInputRate(&p, 100000, serviceTier{}))
|
|
}
|
|
|
|
func TestTieredInputRate_AboveThreshold(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000003),
|
|
InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
|
|
}
|
|
assert.Equal(t, 0.000006, tieredInputRate(&p, 210000, serviceTier{}))
|
|
}
|
|
|
|
func TestTieredInputRate_AboveThresholdNoTieredRate(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000003),
|
|
}
|
|
// Falls back to base rate when tiered field is nil
|
|
assert.Equal(t, 0.000003, tieredInputRate(&p, 300000, serviceTier{}))
|
|
}
|
|
|
|
func TestTieredOutputRate_AboveThreshold(t *testing.T) {
|
|
p := configstoreTables.TableModelPricing{
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003),
|
|
}
|
|
assert.Equal(t, 0.00003, tieredOutputRate(&p, 250000, serviceTier{}))
|
|
}
|
|
|
|
// =========================================================================
|
|
// 9. extractCostInput — usage extraction
|
|
// =========================================================================
|
|
|
|
func TestExtractCostInput_ChatResponse(t *testing.T) {
|
|
usage := &schemas.BifrostLLMUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150}
|
|
resp := &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{Usage: usage},
|
|
}
|
|
input := extractCostInput(resp)
|
|
require.NotNil(t, input.usage)
|
|
assert.Equal(t, 100, input.usage.PromptTokens)
|
|
assert.Equal(t, 50, input.usage.CompletionTokens)
|
|
}
|
|
|
|
func TestExtractCostInput_EmbeddingResponse(t *testing.T) {
|
|
usage := &schemas.BifrostLLMUsage{PromptTokens: 200, TotalTokens: 200}
|
|
resp := &schemas.BifrostResponse{
|
|
EmbeddingResponse: &schemas.BifrostEmbeddingResponse{Usage: usage},
|
|
}
|
|
input := extractCostInput(resp)
|
|
require.NotNil(t, input.usage)
|
|
assert.Equal(t, 200, input.usage.PromptTokens)
|
|
}
|
|
|
|
func TestExtractCostInput_ImageResponse(t *testing.T) {
|
|
imgUsage := &schemas.ImageUsage{InputTokens: 100, OutputTokens: 200, TotalTokens: 300}
|
|
resp := &schemas.BifrostResponse{
|
|
ImageGenerationResponse: &schemas.BifrostImageGenerationResponse{Usage: imgUsage},
|
|
}
|
|
input := extractCostInput(resp)
|
|
assert.Nil(t, input.usage)
|
|
require.NotNil(t, input.imageUsage)
|
|
assert.Equal(t, 300, input.imageUsage.TotalTokens)
|
|
}
|
|
|
|
func TestExtractCostInput_TranscriptionWithSeconds(t *testing.T) {
|
|
sec := 60
|
|
resp := &schemas.BifrostResponse{
|
|
TranscriptionResponse: &schemas.BifrostTranscriptionResponse{
|
|
Usage: &schemas.TranscriptionUsage{
|
|
Seconds: &sec,
|
|
InputTokens: bifrost.Ptr(1000),
|
|
OutputTokens: bifrost.Ptr(200),
|
|
TotalTokens: bifrost.Ptr(1200),
|
|
},
|
|
},
|
|
}
|
|
input := extractCostInput(resp)
|
|
require.NotNil(t, input.usage)
|
|
require.NotNil(t, input.audioSeconds)
|
|
assert.Equal(t, 60, *input.audioSeconds)
|
|
assert.Equal(t, 1000, input.usage.PromptTokens)
|
|
}
|
|
|
|
func TestExtractCostInput_SpeechResponse(t *testing.T) {
|
|
resp := &schemas.BifrostResponse{
|
|
SpeechResponse: &schemas.BifrostSpeechResponse{
|
|
Usage: &schemas.SpeechUsage{
|
|
InputTokens: 100,
|
|
OutputTokens: 500,
|
|
TotalTokens: 600,
|
|
},
|
|
},
|
|
}
|
|
input := extractCostInput(resp)
|
|
require.NotNil(t, input.usage)
|
|
assert.Equal(t, 100, input.usage.PromptTokens)
|
|
assert.Equal(t, 500, input.usage.CompletionTokens)
|
|
assert.Equal(t, 600, input.usage.TotalTokens)
|
|
}
|
|
|
|
func TestExtractCostInput_VideoResponse(t *testing.T) {
|
|
sec := "15"
|
|
resp := &schemas.BifrostResponse{
|
|
VideoGenerationResponse: &schemas.BifrostVideoGenerationResponse{
|
|
Seconds: &sec,
|
|
},
|
|
}
|
|
input := extractCostInput(resp)
|
|
require.NotNil(t, input.videoSeconds)
|
|
assert.Equal(t, 15, *input.videoSeconds)
|
|
}
|
|
|
|
func TestExtractCostInput_VideoResponseInvalidSeconds(t *testing.T) {
|
|
sec := "not-a-number"
|
|
resp := &schemas.BifrostResponse{
|
|
VideoGenerationResponse: &schemas.BifrostVideoGenerationResponse{
|
|
Seconds: &sec,
|
|
},
|
|
}
|
|
input := extractCostInput(resp)
|
|
assert.Nil(t, input.videoSeconds)
|
|
}
|
|
|
|
// =========================================================================
|
|
// 10. Semantic cache billing (calculateCostWithCache)
|
|
// =========================================================================
|
|
|
|
func TestCalculateCost_SemanticCacheDirectHit(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): {
|
|
Model: "gpt-4o", Provider: "openai", Mode: "chat",
|
|
InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
},
|
|
})
|
|
|
|
hitType := "direct"
|
|
resp := &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{
|
|
Usage: &schemas.BifrostLLMUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150},
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ChatCompletionRequest,
|
|
Provider: schemas.OpenAI,
|
|
OriginalModelRequested: "gpt-4o",
|
|
CacheDebug: &schemas.BifrostCacheDebug{
|
|
CacheHit: true,
|
|
HitType: &hitType,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
assert.Equal(t, 0.0, cost)
|
|
}
|
|
|
|
func TestCalculateCost_SemanticCacheSemanticHit(t *testing.T) {
|
|
embProvider := "openai"
|
|
embModel := "text-embedding-3-small"
|
|
embTokens := 500
|
|
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): {
|
|
Model: "gpt-4o", Provider: "openai", Mode: "chat",
|
|
InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
},
|
|
makeKey("text-embedding-3-small", "openai", "embedding"): {
|
|
Model: "text-embedding-3-small", Provider: "openai", Mode: "embedding",
|
|
InputCostPerToken: bifrost.Ptr(0.00000002),
|
|
},
|
|
})
|
|
|
|
hitType := "semantic"
|
|
resp := &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{
|
|
Usage: &schemas.BifrostLLMUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150},
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ChatCompletionRequest,
|
|
Provider: schemas.OpenAI,
|
|
OriginalModelRequested: "gpt-4o",
|
|
CacheDebug: &schemas.BifrostCacheDebug{
|
|
CacheHit: true,
|
|
HitType: &hitType,
|
|
ProviderUsed: &embProvider,
|
|
ModelUsed: &embModel,
|
|
InputTokens: &embTokens,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// Only embedding cost: 500 * 0.00000002 = 0.00001
|
|
assert.InDelta(t, 0.00001, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_SemanticCacheMiss(t *testing.T) {
|
|
embProvider := "openai"
|
|
embModel := "text-embedding-3-small"
|
|
embTokens := 500
|
|
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): {
|
|
Model: "gpt-4o", Provider: "openai", Mode: "chat",
|
|
InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
},
|
|
makeKey("text-embedding-3-small", "openai", "embedding"): {
|
|
Model: "text-embedding-3-small", Provider: "openai", Mode: "embedding",
|
|
InputCostPerToken: bifrost.Ptr(0.00000002),
|
|
},
|
|
})
|
|
|
|
resp := &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{
|
|
Usage: &schemas.BifrostLLMUsage{PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500},
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ChatCompletionRequest,
|
|
Provider: schemas.OpenAI,
|
|
OriginalModelRequested: "gpt-4o",
|
|
CacheDebug: &schemas.BifrostCacheDebug{
|
|
CacheHit: false,
|
|
ProviderUsed: &embProvider,
|
|
ModelUsed: &embModel,
|
|
InputTokens: &embTokens,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// Base cost: 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
|
|
// Embedding cost: 500 * 0.00000002 = 0.00001
|
|
// Total: 0.01251
|
|
assert.InDelta(t, 0.01251, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_SemanticCacheHitNoEmbeddingInfo(t *testing.T) {
|
|
mc := testCatalogWithPricing(nil)
|
|
|
|
resp := &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
CacheDebug: &schemas.BifrostCacheDebug{
|
|
CacheHit: true,
|
|
// No ProviderUsed, ModelUsed, InputTokens
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
assert.Equal(t, 0.0, cost)
|
|
}
|
|
|
|
// =========================================================================
|
|
// 11. CalculateCost integration — end-to-end
|
|
// =========================================================================
|
|
|
|
func TestCalculateCost_NilResponse(t *testing.T) {
|
|
mc := testCatalogWithPricing(nil)
|
|
assert.Equal(t, 0.0, mc.CalculateCost(nil, nil))
|
|
}
|
|
|
|
func TestCalculateCost_ProviderComputedCostPassthrough(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
})
|
|
|
|
resp := makeChatResponse(schemas.OpenAI, "gpt-4o", &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
Cost: &schemas.BifrostCost{
|
|
TotalCost: 0.99, // Provider already calculated
|
|
},
|
|
})
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
assert.Equal(t, 0.99, cost)
|
|
}
|
|
|
|
func TestCalculateCost_NoUsageData(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
})
|
|
|
|
resp := makeChatResponse(schemas.OpenAI, "gpt-4o", nil)
|
|
cost := mc.CalculateCost(resp, nil)
|
|
assert.Equal(t, 0.0, cost)
|
|
}
|
|
|
|
func TestCalculateCost_ChatCompletion_GPT4o(t *testing.T) {
|
|
// GPT-4o: $5/M input, $15/M output, cache_read=$0.5/M
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): {
|
|
Model: "gpt-4o", Provider: "openai", Mode: "chat",
|
|
InputCostPerToken: bifrost.Ptr(0.000005),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
CacheReadInputTokenCost: bifrost.Ptr(0.0000005),
|
|
},
|
|
})
|
|
|
|
resp := makeChatResponse(schemas.OpenAI, "gpt-4o", &schemas.BifrostLLMUsage{
|
|
PromptTokens: 10000,
|
|
CompletionTokens: 2000,
|
|
TotalTokens: 12000,
|
|
})
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// 10000*0.000005 + 2000*0.000015 = 0.05 + 0.03 = 0.08
|
|
assert.InDelta(t, 0.08, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_ChatCompletion_Claude35Sonnet_WithCache(t *testing.T) {
|
|
// Claude 3.5 Sonnet (Bedrock): $3/M input, $15/M output, cache_read=$0.3/M, cache_creation=$3.75/M
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", "chat"): {
|
|
Model: "anthropic.claude-3-5-sonnet-20241022-v2:0", Provider: "bedrock", Mode: "chat",
|
|
InputCostPerToken: bifrost.Ptr(0.000003),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
CacheReadInputTokenCost: bifrost.Ptr(0.0000003),
|
|
CacheCreationInputTokenCost: bifrost.Ptr(0.00000375),
|
|
InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
|
|
OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003),
|
|
},
|
|
})
|
|
|
|
resp := makeChatResponse(schemas.Bedrock, "anthropic.claude-3-5-sonnet-20241022-v2:0", &schemas.BifrostLLMUsage{
|
|
PromptTokens: 5000,
|
|
CompletionTokens: 1000,
|
|
TotalTokens: 6000,
|
|
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
|
|
CachedReadTokens: 3000, // 3000 cache read tokens
|
|
CachedWriteTokens: 500, // 500 cache creation tokens
|
|
},
|
|
})
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// Both cached read and write tokens are input-side deductions from promptTokens.
|
|
// Input: (5000-3000-500)*0.000003 + 3000*0.0000003 + 500*0.00000375 = 0.0045 + 0.0009 + 0.001875 = 0.007275
|
|
// Output: 1000*0.000015 = 0.015
|
|
// Total: 0.007275 + 0.015 = 0.022275
|
|
assert.InDelta(t, 0.022275, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_Embedding(t *testing.T) {
|
|
// Titan Embed Text v1: $0.1/M input
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("amazon.titan-embed-text-v1", "bedrock", "embedding"): {
|
|
Model: "amazon.titan-embed-text-v1", Provider: "bedrock", Mode: "embedding",
|
|
InputCostPerToken: bifrost.Ptr(0.0000001),
|
|
OutputCostPerToken: bifrost.Ptr(0.0),
|
|
},
|
|
})
|
|
|
|
resp := makeEmbeddingResponse(schemas.Bedrock, "amazon.titan-embed-text-v1", &schemas.BifrostLLMUsage{
|
|
PromptTokens: 10000,
|
|
TotalTokens: 10000,
|
|
})
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// 10000 * 0.0000001 = 0.001
|
|
assert.InDelta(t, 0.001, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_Rerank(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("amazon.rerank-v1:0", "bedrock", "rerank"): {
|
|
Model: "amazon.rerank-v1:0", Provider: "bedrock", Mode: "rerank",
|
|
InputCostPerToken: bifrost.Ptr(0.0),
|
|
OutputCostPerToken: bifrost.Ptr(0.0),
|
|
},
|
|
})
|
|
|
|
resp := makeRerankResponse(schemas.Bedrock, "amazon.rerank-v1:0", &schemas.BifrostLLMUsage{
|
|
PromptTokens: 500,
|
|
TotalTokens: 500,
|
|
})
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
assert.Equal(t, 0.0, cost)
|
|
}
|
|
|
|
func TestCalculateCost_ImageGeneration(t *testing.T) {
|
|
// dall-e-3 via aiml: output_cost_per_image=$0.052
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("dall-e-3", "aiml", "image_generation"): {
|
|
Model: "dall-e-3", Provider: "aiml", Mode: "image_generation",
|
|
OutputCostPerImage: bifrost.Ptr(0.052),
|
|
},
|
|
})
|
|
|
|
resp := makeImageResponse("aiml", "dall-e-3", &schemas.ImageUsage{
|
|
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 3},
|
|
})
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// 3 * 0.052 = 0.156
|
|
assert.InDelta(t, 0.156, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_StreamRequestTypeNormalized(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
})
|
|
|
|
// Stream request type should be normalized to base type
|
|
resp := &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{
|
|
Usage: &schemas.BifrostLLMUsage{PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500},
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ChatCompletionStreamRequest,
|
|
Provider: schemas.OpenAI,
|
|
OriginalModelRequested: "gpt-4o",
|
|
},
|
|
},
|
|
}
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
assert.InDelta(t, 0.0125, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_NoPricingData(t *testing.T) {
|
|
mc := testCatalogWithPricing(nil)
|
|
resp := makeChatResponse(schemas.OpenAI, "unknown-model", &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500,
|
|
})
|
|
cost := mc.CalculateCost(resp, nil)
|
|
assert.Equal(t, 0.0, cost)
|
|
}
|
|
|
|
// =========================================================================
|
|
// 12. Pricing resolution — getPricing fallback logic
|
|
// =========================================================================
|
|
|
|
func TestGetPricing_DirectLookup(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
})
|
|
p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
|
|
assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
|
|
}
|
|
|
|
func TestGetPricing_GeminiFallsBackToVertex(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gemini-2.0-flash", "vertex", "chat"): {
|
|
Model: "gemini-2.0-flash", Provider: "vertex", Mode: "chat",
|
|
InputCostPerToken: bifrost.Ptr(0.0000001), OutputCostPerToken: bifrost.Ptr(0.0000004),
|
|
},
|
|
})
|
|
p := mc.resolvePricing("gemini", "gemini-2.0-flash", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "gemini"})
|
|
assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken))
|
|
}
|
|
|
|
func TestGetPricing_VertexStripsProviderPrefix(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gemini-2.0-flash", "vertex", "chat"): chatPricing(0.0000001, 0.0000004),
|
|
})
|
|
p := mc.resolvePricing("vertex", "google/gemini-2.0-flash", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "vertex"})
|
|
assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken))
|
|
}
|
|
|
|
func TestGetPricing_BedrockAddsAnthropicPrefix(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", "chat"): chatPricing(0.000003, 0.000015),
|
|
})
|
|
p := mc.resolvePricing("bedrock", "claude-3-5-sonnet-20241022-v2:0", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "bedrock"})
|
|
assert.Equal(t, 0.000003, derefF(p.InputCostPerToken))
|
|
}
|
|
|
|
func TestGetPricing_ResponsesFallsBackToChat(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
})
|
|
p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "openai"})
|
|
assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
|
|
}
|
|
|
|
func TestGetPricing_ResponsesStreamFallsBackToChat(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
})
|
|
p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesStreamRequest, PricingLookupScopes{Provider: "openai"})
|
|
assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
|
|
}
|
|
|
|
func TestGetPricing_RealtimeFallsBackToChat(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
})
|
|
p := mc.resolvePricing("openai", "gpt-4o", "", schemas.RealtimeRequest, PricingLookupScopes{Provider: "openai"})
|
|
assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
|
|
}
|
|
|
|
func TestGetPricing_GeminiResponsesFallsBackToVertexChat(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gemini-2.0-flash", "vertex", "chat"): chatPricing(0.0000001, 0.0000004),
|
|
})
|
|
// gemini provider + responses request → try vertex + responses → try vertex + chat
|
|
p := mc.resolvePricing("gemini", "gemini-2.0-flash", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "gemini"})
|
|
assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken))
|
|
}
|
|
|
|
func TestGetPricing_NotFound(t *testing.T) {
|
|
mc := testCatalogWithPricing(nil)
|
|
p := mc.resolvePricing("openai", "nonexistent", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
|
|
assert.Nil(t, p)
|
|
}
|
|
|
|
// =========================================================================
|
|
// 13. resolvePricing — deployment fallback
|
|
// =========================================================================
|
|
|
|
func TestResolvePricing_DeploymentFallback(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("my-deployment", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
})
|
|
|
|
// Model not found directly, but deployment matches
|
|
p := mc.resolvePricing("openai", "gpt-4o-custom", "my-deployment", schemas.ChatCompletionRequest, PricingLookupScopes{})
|
|
require.NotNil(t, p)
|
|
assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
|
|
}
|
|
|
|
func TestResolvePricing_ResolvedModelHasPriority(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
makeKey("my-deployment", "openai", "chat"): chatPricing(0.000001, 0.000002),
|
|
})
|
|
|
|
// Resolved model ("my-deployment") is looked up first and has priority
|
|
// over the originally requested model ("gpt-4o").
|
|
p := mc.resolvePricing("openai", "gpt-4o", "my-deployment", schemas.ChatCompletionRequest, PricingLookupScopes{})
|
|
require.NotNil(t, p)
|
|
assert.Equal(t, 0.000001, derefF(p.InputCostPerToken))
|
|
}
|
|
|
|
func TestResolvePricing_NothingFound(t *testing.T) {
|
|
mc := testCatalogWithPricing(nil)
|
|
p := mc.resolvePricing("openai", "unknown", "", schemas.ChatCompletionRequest, PricingLookupScopes{})
|
|
assert.Nil(t, p)
|
|
}
|
|
|
|
// =========================================================================
|
|
// 14. normalizeStreamRequestType
|
|
// =========================================================================
|
|
|
|
func TestNormalizeStreamRequestType(t *testing.T) {
|
|
tests := []struct {
|
|
input schemas.RequestType
|
|
expected schemas.RequestType
|
|
}{
|
|
{schemas.ChatCompletionStreamRequest, schemas.ChatCompletionRequest},
|
|
{schemas.TextCompletionStreamRequest, schemas.TextCompletionRequest},
|
|
{schemas.ResponsesStreamRequest, schemas.ResponsesRequest},
|
|
{schemas.SpeechStreamRequest, schemas.SpeechRequest},
|
|
{schemas.TranscriptionStreamRequest, schemas.TranscriptionRequest},
|
|
{schemas.ImageGenerationStreamRequest, schemas.ImageGenerationRequest},
|
|
{schemas.ImageEditStreamRequest, schemas.ImageEditRequest},
|
|
{schemas.RealtimeRequest, schemas.RealtimeRequest}, // realtime is its own base type
|
|
{schemas.ChatCompletionRequest, schemas.ChatCompletionRequest}, // non-stream unchanged
|
|
{schemas.EmbeddingRequest, schemas.EmbeddingRequest}, // non-stream unchanged
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
assert.Equal(t, tt.expected, normalizeStreamRequestType(tt.input), "for input %s", tt.input)
|
|
}
|
|
}
|
|
|
|
// =========================================================================
|
|
// 15. responsesUsageToBifrostUsage
|
|
// =========================================================================
|
|
|
|
func TestResponsesUsageToBifrostUsage_Basic(t *testing.T) {
|
|
u := &schemas.ResponsesResponseUsage{
|
|
InputTokens: 100,
|
|
OutputTokens: 50,
|
|
TotalTokens: 150,
|
|
}
|
|
result := responsesUsageToBifrostUsage(u)
|
|
assert.Equal(t, 100, result.PromptTokens)
|
|
assert.Equal(t, 50, result.CompletionTokens)
|
|
assert.Equal(t, 150, result.TotalTokens)
|
|
assert.Nil(t, result.PromptTokensDetails)
|
|
assert.Nil(t, result.CompletionTokensDetails)
|
|
}
|
|
|
|
func TestResponsesUsageToBifrostUsage_WithTokenDetails(t *testing.T) {
|
|
numQueries := 2
|
|
u := &schemas.ResponsesResponseUsage{
|
|
InputTokens: 1000,
|
|
OutputTokens: 500,
|
|
TotalTokens: 1500,
|
|
InputTokensDetails: &schemas.ResponsesResponseInputTokens{
|
|
CachedReadTokens: 300,
|
|
CachedWriteTokens: 50,
|
|
TextTokens: 600,
|
|
AudioTokens: 50,
|
|
ImageTokens: 50,
|
|
},
|
|
OutputTokensDetails: &schemas.ResponsesResponseOutputTokens{
|
|
ReasoningTokens: 100,
|
|
NumSearchQueries: &numQueries,
|
|
},
|
|
}
|
|
result := responsesUsageToBifrostUsage(u)
|
|
|
|
require.NotNil(t, result.PromptTokensDetails)
|
|
assert.Equal(t, 300, result.PromptTokensDetails.CachedReadTokens)
|
|
assert.Equal(t, 50, result.PromptTokensDetails.CachedWriteTokens)
|
|
assert.Equal(t, 600, result.PromptTokensDetails.TextTokens)
|
|
assert.Equal(t, 50, result.PromptTokensDetails.AudioTokens)
|
|
assert.Equal(t, 50, result.PromptTokensDetails.ImageTokens)
|
|
|
|
require.NotNil(t, result.CompletionTokensDetails)
|
|
assert.Equal(t, 100, result.CompletionTokensDetails.ReasoningTokens)
|
|
require.NotNil(t, result.CompletionTokensDetails.NumSearchQueries)
|
|
assert.Equal(t, 2, *result.CompletionTokensDetails.NumSearchQueries)
|
|
}
|
|
|
|
// =========================================================================
|
|
// 16. Edge cases
|
|
// =========================================================================
|
|
|
|
func TestCalculateCost_200kTier_EndToEnd(t *testing.T) {
|
|
// Claude 3.5 Sonnet Bedrock with 200k tier pricing
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("anthropic.claude-3-5-sonnet-20240620-v1:0", "bedrock", "chat"): {
|
|
Model: "anthropic.claude-3-5-sonnet-20240620-v1:0", Provider: "bedrock", Mode: "chat",
|
|
InputCostPerToken: bifrost.Ptr(0.000003),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
|
|
OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003),
|
|
CacheReadInputTokenCost: bifrost.Ptr(0.0000003),
|
|
CacheCreationInputTokenCost: bifrost.Ptr(0.00000375),
|
|
CacheReadInputTokenCostAbove200kTokens: bifrost.Ptr(0.0000006),
|
|
CacheCreationInputTokenCostAbove200kTokens: bifrost.Ptr(0.0000075),
|
|
},
|
|
})
|
|
|
|
resp := makeChatResponse(schemas.Bedrock, "anthropic.claude-3-5-sonnet-20240620-v1:0", &schemas.BifrostLLMUsage{
|
|
PromptTokens: 190000,
|
|
CompletionTokens: 20000,
|
|
TotalTokens: 210000, // Above 200k
|
|
})
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// Tiered rate: input=0.000006, output=0.00003
|
|
// 190000*0.000006 + 20000*0.00003 = 1.14 + 0.6 = 1.74
|
|
assert.InDelta(t, 1.74, cost, 1e-9)
|
|
}
|
|
|
|
func TestCalculateCost_272kTier_EndToEnd(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("claude-3-7-sonnet", "anthropic", "chat"): {
|
|
Model: "claude-3-7-sonnet",
|
|
Provider: "anthropic",
|
|
Mode: "chat",
|
|
InputCostPerToken: new(0.000003),
|
|
OutputCostPerToken: new(0.000015),
|
|
InputCostPerTokenAbove200kTokens: new(0.000006),
|
|
OutputCostPerTokenAbove200kTokens: new(0.00003),
|
|
InputCostPerTokenAbove272kTokens: new(0.000009),
|
|
OutputCostPerTokenAbove272kTokens: new(0.000045),
|
|
CacheReadInputTokenCost: new(0.0000003),
|
|
CacheReadInputTokenCostAbove200kTokens: new(0.0000006),
|
|
CacheReadInputTokenCostAbove272kTokens: new(0.0000009),
|
|
},
|
|
})
|
|
|
|
resp := makeChatResponse(schemas.Anthropic, "claude-3-7-sonnet", &schemas.BifrostLLMUsage{
|
|
PromptTokens: 250000,
|
|
CompletionTokens: 30000,
|
|
TotalTokens: 280000, // Above 272k
|
|
})
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// Tiered rate: input=0.000009, output=0.000045
|
|
// 250000*0.000009 + 30000*0.000045 = 2.25 + 1.35 = 3.60
|
|
assert.InDelta(t, 3.60, cost, 1e-9)
|
|
}
|
|
|
|
func TestCalculateCost_272kTier_CacheReadFallbackChain(t *testing.T) {
|
|
// Verifies the 272k cache read rate takes precedence over 200k and base rates
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("claude-3-7-sonnet", "anthropic", "chat"): {
|
|
Model: "claude-3-7-sonnet",
|
|
Provider: "anthropic",
|
|
Mode: "chat",
|
|
InputCostPerToken: new(0.000003),
|
|
OutputCostPerToken: new(0.000015),
|
|
InputCostPerTokenAbove272kTokens: new(0.000009),
|
|
OutputCostPerTokenAbove272kTokens: new(0.000045),
|
|
CacheReadInputTokenCost: new(0.0000003),
|
|
CacheReadInputTokenCostAbove200kTokens: new(0.0000006),
|
|
CacheReadInputTokenCostAbove272kTokens: new(0.0000009),
|
|
},
|
|
})
|
|
|
|
resp := makeChatResponse(schemas.Anthropic, "claude-3-7-sonnet", &schemas.BifrostLLMUsage{
|
|
PromptTokens: 250000,
|
|
CompletionTokens: 30000,
|
|
TotalTokens: 280000,
|
|
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
|
|
CachedReadTokens: 50000,
|
|
},
|
|
})
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// Non-cached input: (250000-50000) * 0.000009 = 200000 * 0.000009 = 1.80
|
|
// Cached read (272k rate): 50000 * 0.0000009 = 0.045
|
|
// Output: 30000 * 0.000045 = 1.35
|
|
// Total: 1.80 + 0.045 + 1.35 = 3.195
|
|
assert.InDelta(t, 3.195, cost, 1e-9)
|
|
}
|
|
|
|
// =========================================================================
|
|
// Priority tier tests
|
|
// =========================================================================
|
|
|
|
func TestComputeTextCost_PriorityUsesInputOutputPriorityRate(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenPriority = new(0.000006)
|
|
p.OutputCostPerTokenPriority = new(0.00003)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{isPriority: true})
|
|
|
|
// Uses priority rates: 1000*0.000006 + 500*0.00003 = 0.006 + 0.015 = 0.021
|
|
assert.InDelta(t, 0.021, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTextCost_NonPriorityIgnoresPriorityRate(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenPriority = new(0.000006)
|
|
p.OutputCostPerTokenPriority = new(0.00003)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
|
|
// Uses base rates, ignores priority fields: 1000*0.000003 + 500*0.000015 = 0.003 + 0.0075 = 0.0105
|
|
assert.InDelta(t, 0.0105, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTextCost_Priority272kTier(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenPriority = new(0.000006)
|
|
p.OutputCostPerTokenPriority = new(0.00003)
|
|
p.InputCostPerTokenAbove272kTokens = new(0.000009)
|
|
p.InputCostPerTokenAbove272kTokensPriority = new(0.000012)
|
|
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
|
|
p.OutputCostPerTokenAbove272kTokensPriority = new(0.00006)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 250000,
|
|
CompletionTokens: 30000,
|
|
TotalTokens: 280000,
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{isPriority: true})
|
|
|
|
// Uses 272k priority rates: 250000*0.000012 + 30000*0.00006 = 3.00 + 1.80 = 4.80
|
|
assert.InDelta(t, 4.80, cost, 1e-9)
|
|
}
|
|
|
|
func TestComputeTextCost_Priority272kTierFallsBackToNonPriority272k(t *testing.T) {
|
|
// Priority flag set but no priority-specific 272k rate — fall back to non-priority 272k
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenAbove272kTokens = new(0.000009)
|
|
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 250000,
|
|
CompletionTokens: 30000,
|
|
TotalTokens: 280000,
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{isPriority: true})
|
|
|
|
// Falls back to non-priority 272k rate: 250000*0.000009 + 30000*0.000045 = 2.25 + 1.35 = 3.60
|
|
assert.InDelta(t, 3.60, cost, 1e-9)
|
|
}
|
|
|
|
func TestComputeTextCost_PriorityCacheReadRate(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenPriority = new(0.000006)
|
|
p.OutputCostPerTokenPriority = new(0.00003)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
p.CacheReadInputTokenCostPriority = new(0.0000006)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
|
|
CachedReadTokens: 400,
|
|
},
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{isPriority: true})
|
|
|
|
// Non-cached input: (1000-400)*0.000006 = 600*0.000006 = 0.0036
|
|
// Cached read (priority rate): 400*0.0000006 = 0.00024
|
|
// Output: 500*0.00003 = 0.015
|
|
// Total: 0.0036 + 0.00024 + 0.015 = 0.01884
|
|
assert.InDelta(t, 0.01884, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_PriorityTier_EndToEnd(t *testing.T) {
|
|
tierStr := "priority"
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): {
|
|
Model: "gpt-4o",
|
|
Provider: "openai",
|
|
Mode: "chat",
|
|
InputCostPerToken: new(0.000005),
|
|
OutputCostPerToken: new(0.000015),
|
|
InputCostPerTokenPriority: new(0.000010),
|
|
OutputCostPerTokenPriority: new(0.000030),
|
|
},
|
|
})
|
|
|
|
resp := &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{
|
|
ServiceTier: &tierStr,
|
|
Usage: &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
},
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ChatCompletionRequest,
|
|
Provider: schemas.OpenAI,
|
|
OriginalModelRequested: "gpt-4o",
|
|
ResolvedModelUsed: "gpt-4o",
|
|
},
|
|
},
|
|
}
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// Priority rates: 1000*0.000010 + 500*0.000030 = 0.010 + 0.015 = 0.025
|
|
assert.InDelta(t, 0.025, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_NonPriorityServiceTier_UsesBaseRate(t *testing.T) {
|
|
tierStr := "auto"
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): {
|
|
Model: "gpt-4o",
|
|
Provider: "openai",
|
|
Mode: "chat",
|
|
InputCostPerToken: new(0.000005),
|
|
OutputCostPerToken: new(0.000015),
|
|
InputCostPerTokenPriority: new(0.000010),
|
|
OutputCostPerTokenPriority: new(0.000030),
|
|
},
|
|
})
|
|
|
|
resp := &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{
|
|
ServiceTier: &tierStr,
|
|
Usage: &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
},
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ChatCompletionRequest,
|
|
Provider: schemas.OpenAI,
|
|
OriginalModelRequested: "gpt-4o",
|
|
ResolvedModelUsed: "gpt-4o",
|
|
},
|
|
},
|
|
}
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// Base rates (not priority): 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
|
|
assert.InDelta(t, 0.0125, cost, 1e-12)
|
|
}
|
|
|
|
func TestTieredCacheReadRate_FallbackOrder(t *testing.T) {
|
|
// 272k rate takes precedence over 200k, 200k over base, base over input rate
|
|
t.Run("uses_272k_when_above_272k", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
p.CacheReadInputTokenCostAbove200kTokens = new(0.0000006)
|
|
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
|
|
assert.Equal(t, 0.0000009, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{}))
|
|
})
|
|
t.Run("uses_200k_when_between_200k_and_272k", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
p.CacheReadInputTokenCostAbove200kTokens = new(0.0000006)
|
|
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
|
|
assert.Equal(t, 0.0000006, tieredCacheReadInputTokenRate(&p, 230000, serviceTier{}))
|
|
})
|
|
t.Run("uses_base_cache_rate_when_below_200k", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
p.CacheReadInputTokenCostAbove200kTokens = new(0.0000006)
|
|
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
|
|
assert.Equal(t, 0.0000003, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{}))
|
|
})
|
|
t.Run("falls_back_to_input_rate_when_no_cache_rate_set", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
// No cache rates set at all
|
|
assert.Equal(t, 0.000003, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{}))
|
|
})
|
|
t.Run("priority_uses_272k_priority_rate", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
p.CacheReadInputTokenCostPriority = new(0.0000006)
|
|
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
|
|
p.CacheReadInputTokenCostAbove272kTokensPriority = new(0.0000012)
|
|
assert.Equal(t, 0.0000012, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isPriority: true}))
|
|
})
|
|
t.Run("priority_falls_back_to_272k_non_priority_when_priority_rate_missing", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
|
|
assert.Equal(t, 0.0000009, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isPriority: true}))
|
|
})
|
|
t.Run("priority_uses_priority_base_cache_rate_below_tiers", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
p.CacheReadInputTokenCostPriority = new(0.0000006)
|
|
assert.Equal(t, 0.0000006, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{isPriority: true}))
|
|
})
|
|
t.Run("flex_uses_flex_cache_rate", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
p.CacheReadInputTokenCostFlex = new(0.0000005)
|
|
assert.Equal(t, 0.0000005, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{isFlex: true}))
|
|
})
|
|
t.Run("flex_uses_flex_cache_rate_regardless_of_token_count", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
p.CacheReadInputTokenCostFlex = new(0.0000005)
|
|
p.CacheReadInputTokenCostAbove272kTokens = new(0.0000009)
|
|
// Even above 272k, flex flat rate takes precedence
|
|
assert.Equal(t, 0.0000005, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isFlex: true}))
|
|
})
|
|
t.Run("flex_falls_back_to_base_cache_rate_when_no_flex_cache_rate", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
// No flex cache rate — falls back to base cache rate
|
|
assert.Equal(t, 0.0000003, tieredCacheReadInputTokenRate(&p, 1500, serviceTier{isFlex: true}))
|
|
})
|
|
t.Run("flex_wins_over_272k_priority_and_priority_base_when_all_present", func(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.CacheReadInputTokenCostAbove272kTokens = new(5e-7)
|
|
p.CacheReadInputTokenCostFlex = new(1.3e-7)
|
|
p.CacheReadInputTokenCostPriority = new(5e-7)
|
|
p.CacheReadInputTokenCostAbove272kTokensPriority = new(0.000001)
|
|
// token count exceeds 272k — but flex flat rate should still win
|
|
assert.Equal(t, 1.3e-7, tieredCacheReadInputTokenRate(&p, 280000, serviceTier{isFlex: true}))
|
|
})
|
|
}
|
|
|
|
// =========================================================================
|
|
// tierFromString tests
|
|
// =========================================================================
|
|
|
|
func TestTierFromString_Priority(t *testing.T) {
|
|
s := "priority"
|
|
tier := tierFromString(&s)
|
|
assert.True(t, tier.isPriority)
|
|
assert.False(t, tier.isFlex)
|
|
}
|
|
|
|
func TestTierFromString_Flex(t *testing.T) {
|
|
s := "flex"
|
|
tier := tierFromString(&s)
|
|
assert.False(t, tier.isPriority)
|
|
assert.True(t, tier.isFlex)
|
|
}
|
|
|
|
func TestTierFromString_Default(t *testing.T) {
|
|
for _, s := range []string{"auto", "default", "", "unknown"} {
|
|
tier := tierFromString(&s)
|
|
assert.False(t, tier.isPriority, "expected no priority for %q", s)
|
|
assert.False(t, tier.isFlex, "expected no flex for %q", s)
|
|
}
|
|
}
|
|
|
|
func TestTierFromString_Nil(t *testing.T) {
|
|
tier := tierFromString(nil)
|
|
assert.False(t, tier.isPriority)
|
|
assert.False(t, tier.isFlex)
|
|
}
|
|
|
|
// =========================================================================
|
|
// Flex tier tests
|
|
// =========================================================================
|
|
|
|
func TestComputeTextCost_FlexUsesFlexRate(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenFlex = new(0.0000015)
|
|
p.OutputCostPerTokenFlex = new(0.0000075)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{isFlex: true})
|
|
|
|
// Flex rates: 1000*0.0000015 + 500*0.0000075 = 0.0015 + 0.00375 = 0.00525
|
|
assert.InDelta(t, 0.00525, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTextCost_NonFlexIgnoresFlexRate(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenFlex = new(0.0000015)
|
|
p.OutputCostPerTokenFlex = new(0.0000075)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
|
|
// Base rates, flex fields ignored: 1000*0.000003 + 500*0.000015 = 0.003 + 0.0075 = 0.0105
|
|
assert.InDelta(t, 0.0105, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTextCost_FlexIgnoresTokenTiers(t *testing.T) {
|
|
// Flex is a flat rate — token-count tiers (272k, 200k, 128k) do not apply.
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenFlex = new(0.0000015)
|
|
p.OutputCostPerTokenFlex = new(0.0000075)
|
|
p.InputCostPerTokenAbove272kTokens = new(0.000009)
|
|
p.OutputCostPerTokenAbove272kTokens = new(0.000045)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 250000,
|
|
CompletionTokens: 30000,
|
|
TotalTokens: 280000,
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{isFlex: true})
|
|
|
|
// Flex flat rate overrides 272k tier: 250000*0.0000015 + 30000*0.0000075 = 0.375 + 0.225 = 0.60
|
|
assert.InDelta(t, 0.60, cost, 1e-9)
|
|
}
|
|
|
|
func TestComputeTextCost_FlexCacheReadRate(t *testing.T) {
|
|
p := chatPricing(0.000003, 0.000015)
|
|
p.InputCostPerTokenFlex = new(0.0000015)
|
|
p.OutputCostPerTokenFlex = new(0.0000075)
|
|
p.CacheReadInputTokenCost = new(0.0000003)
|
|
p.CacheReadInputTokenCostFlex = new(0.0000006)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
|
|
CachedReadTokens: 400,
|
|
},
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{isFlex: true})
|
|
|
|
// Non-cached input: (1000-400)*0.0000015 = 600*0.0000015 = 0.0009
|
|
// Cached read (flex rate): 400*0.0000006 = 0.00024
|
|
// Output: 500*0.0000075 = 0.00375
|
|
// Total: 0.0009 + 0.00024 + 0.00375 = 0.00489
|
|
assert.InDelta(t, 0.00489, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeTextCost_FlexFallsBackToBaseWhenNoFlexRate(t *testing.T) {
|
|
// isFlex set but no flex fields configured — falls back to base rates.
|
|
p := chatPricing(0.000003, 0.000015)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{isFlex: true})
|
|
|
|
// Base rates used as fallback: 1000*0.000003 + 500*0.000015 = 0.003 + 0.0075 = 0.0105
|
|
assert.InDelta(t, 0.0105, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_FlexTier_EndToEnd(t *testing.T) {
|
|
tierStr := "flex"
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): {
|
|
Model: "gpt-4o",
|
|
Provider: "openai",
|
|
Mode: "chat",
|
|
InputCostPerToken: new(0.000005),
|
|
OutputCostPerToken: new(0.000015),
|
|
InputCostPerTokenFlex: new(0.0000025),
|
|
OutputCostPerTokenFlex: new(0.0000075),
|
|
},
|
|
})
|
|
|
|
resp := &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{
|
|
ServiceTier: &tierStr,
|
|
Usage: &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
},
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ChatCompletionRequest,
|
|
Provider: schemas.OpenAI,
|
|
OriginalModelRequested: "gpt-4o",
|
|
ResolvedModelUsed: "gpt-4o",
|
|
},
|
|
},
|
|
}
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// Flex rates: 1000*0.0000025 + 500*0.0000075 = 0.0025 + 0.00375 = 0.00625
|
|
assert.InDelta(t, 0.00625, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_FlexTier_FallsBackToBaseWhenNoFlexRate(t *testing.T) {
|
|
tierStr := "flex"
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
})
|
|
|
|
resp := &schemas.BifrostResponse{
|
|
ChatResponse: &schemas.BifrostChatResponse{
|
|
ServiceTier: &tierStr,
|
|
Usage: &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
},
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ChatCompletionRequest,
|
|
Provider: schemas.OpenAI,
|
|
OriginalModelRequested: "gpt-4o",
|
|
ResolvedModelUsed: "gpt-4o",
|
|
},
|
|
},
|
|
}
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// No flex rates configured — falls back to base: 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
|
|
assert.InDelta(t, 0.0125, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_ProviderCostZeroTotalStillCalculates(t *testing.T) {
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
|
|
})
|
|
|
|
// Provider cost present but TotalCost is 0 → our calculation runs
|
|
resp := makeChatResponse(schemas.OpenAI, "gpt-4o", &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 500,
|
|
TotalTokens: 1500,
|
|
Cost: &schemas.BifrostCost{
|
|
TotalCost: 0,
|
|
},
|
|
})
|
|
|
|
cost := mc.CalculateCost(resp, nil)
|
|
assert.InDelta(t, 0.0125, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_AllCachedTokens(t *testing.T) {
|
|
// All prompt tokens are from cache
|
|
p := chatPricing(0.000005, 0.000015)
|
|
p.CacheReadInputTokenCost = bifrost.Ptr(0.0000005)
|
|
|
|
usage := &schemas.BifrostLLMUsage{
|
|
PromptTokens: 1000,
|
|
CompletionTokens: 0,
|
|
TotalTokens: 1000,
|
|
PromptTokensDetails: &schemas.ChatPromptTokensDetails{
|
|
CachedReadTokens: 1000, // All cached
|
|
},
|
|
}
|
|
|
|
cost := computeTextCost(&p, usage, serviceTier{})
|
|
// Non-cached: 0, cached: 1000*0.0000005 = 0.0005
|
|
assert.InDelta(t, 0.0005, cost, 1e-12)
|
|
}
|
|
|
|
// =========================================================================
|
|
// Nil usage fallbacks — per-unit pricing when no token data is reported
|
|
// =========================================================================
|
|
|
|
func TestCalculateCost_ImageGeneration_NilUsage_PerImagePricing(t *testing.T) {
|
|
// Image response exists but Usage is nil — should default to 1 image with per-image pricing
|
|
pricing := configstoreTables.TableModelPricing{
|
|
Model: "dall-e-3",
|
|
Provider: "openai",
|
|
Mode: "image_generation",
|
|
InputCostPerToken: bifrost.Ptr(0.0),
|
|
OutputCostPerImage: bifrost.Ptr(0.04),
|
|
}
|
|
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("dall-e-3", "openai", "image_generation"): pricing,
|
|
})
|
|
|
|
resp := makeImageResponse("openai", "dall-e-3", nil)
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// 1 image * $0.04 = $0.04
|
|
assert.InDelta(t, 0.04, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_ImageGeneration_NilUsage_InputAndOutputPerImage(t *testing.T) {
|
|
// Both input and output per-image pricing, but no NumInputImages set
|
|
pricing := configstoreTables.TableModelPricing{
|
|
Model: "test-image-model",
|
|
Provider: "test",
|
|
Mode: "image_generation",
|
|
InputCostPerImage: bifrost.Ptr(0.01),
|
|
OutputCostPerImage: bifrost.Ptr(0.04),
|
|
}
|
|
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("test-image-model", "test", "image_generation"): pricing,
|
|
})
|
|
|
|
resp := makeImageResponse("test", "test-image-model", nil)
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// NumInputImages is 0 (not populated from request), so only output pricing applies
|
|
// 1 output image * $0.04 = $0.04
|
|
assert.InDelta(t, 0.04, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_ImageGeneration_WithInputImages(t *testing.T) {
|
|
// Input + output per-image pricing with NumInputImages populated from request
|
|
pricing := configstoreTables.TableModelPricing{
|
|
Model: "gpt-image-1",
|
|
Provider: "openai",
|
|
Mode: "image_generation",
|
|
InputCostPerImage: bifrost.Ptr(0.01),
|
|
OutputCostPerImage: bifrost.Ptr(0.04),
|
|
}
|
|
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("gpt-image-1", "openai", "image_generation"): pricing,
|
|
})
|
|
|
|
resp := makeImageResponse("openai", "gpt-image-1", &schemas.ImageUsage{
|
|
NumInputImages: 2,
|
|
})
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// 2 input images * $0.01 + 1 output image * $0.04 = $0.06
|
|
assert.InDelta(t, 0.06, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_ImageGeneration_OutputCountFromData(t *testing.T) {
|
|
// Output image count derived from len(Data) via populateOutputImageCount
|
|
pricing := configstoreTables.TableModelPricing{
|
|
Model: "dall-e-3",
|
|
Provider: "openai",
|
|
Mode: "image_generation",
|
|
OutputCostPerImage: bifrost.Ptr(0.04),
|
|
}
|
|
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("dall-e-3", "openai", "image_generation"): pricing,
|
|
})
|
|
|
|
resp := &schemas.BifrostResponse{
|
|
ImageGenerationResponse: &schemas.BifrostImageGenerationResponse{
|
|
Data: []schemas.ImageData{
|
|
{URL: "https://example.com/img1.png", Index: 0},
|
|
{URL: "https://example.com/img2.png", Index: 1},
|
|
{URL: "https://example.com/img3.png", Index: 2},
|
|
},
|
|
ExtraFields: schemas.BifrostResponseExtraFields{
|
|
RequestType: schemas.ImageGenerationRequest,
|
|
Provider: "openai",
|
|
OriginalModelRequested: "dall-e-3",
|
|
},
|
|
},
|
|
}
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// 3 output images * $0.04 = $0.12
|
|
assert.InDelta(t, 0.12, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_ImageGeneration_NilUsage_NoPerImagePricing(t *testing.T) {
|
|
// No per-image pricing and no tokens — should return 0
|
|
pricing := configstoreTables.TableModelPricing{
|
|
Model: "token-only-model",
|
|
Provider: "test",
|
|
Mode: "image_generation",
|
|
InputCostPerToken: bifrost.Ptr(0.000001),
|
|
OutputCostPerToken: bifrost.Ptr(0.000002),
|
|
}
|
|
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("token-only-model", "test", "image_generation"): pricing,
|
|
})
|
|
|
|
resp := makeImageResponse("test", "token-only-model", nil)
|
|
cost := mc.CalculateCost(resp, nil)
|
|
// No per-image pricing and all tokens are zero → 0
|
|
assert.InDelta(t, 0.0, cost, 1e-12)
|
|
}
|
|
|
|
func TestCalculateCost_ImageGeneration_EmptyUsage_PerImagePricing(t *testing.T) {
|
|
// Usage exists but all fields are zero — same as nil usage, should use per-image pricing
|
|
pricing := configstoreTables.TableModelPricing{
|
|
Model: "dall-e-3",
|
|
Provider: "openai",
|
|
Mode: "image_generation",
|
|
OutputCostPerImage: bifrost.Ptr(0.04),
|
|
}
|
|
|
|
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
|
|
makeKey("dall-e-3", "openai", "image_generation"): pricing,
|
|
})
|
|
|
|
resp := makeImageResponse("openai", "dall-e-3", &schemas.ImageUsage{})
|
|
cost := mc.CalculateCost(resp, nil)
|
|
assert.InDelta(t, 0.04, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_MixedInputTokensOutputPerImage(t *testing.T) {
|
|
// Input has tokens (text prompt), output has no tokens but per-image pricing
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000005),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
OutputCostPerImage: bifrost.Ptr(0.04),
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
InputTokens: 500,
|
|
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2},
|
|
}
|
|
cost := computeImageCost(&p, usage, "", "", serviceTier{})
|
|
// Input: 500 tokens * $0.000005 = $0.0025
|
|
// Output: no output tokens → falls back to 2 images * $0.04 = $0.08
|
|
assert.InDelta(t, 0.0825, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_MixedInputPerImageOutputTokens(t *testing.T) {
|
|
// Input has no tokens but per-image count, output has tokens
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000005),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
InputCostPerImage: bifrost.Ptr(0.01),
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
NumInputImages: 3,
|
|
OutputTokens: 1000,
|
|
}
|
|
cost := computeImageCost(&p, usage, "", "", serviceTier{})
|
|
// Input: no input tokens → falls back to 3 images * $0.01 = $0.03
|
|
// Output: 1000 tokens * $0.000015 = $0.015
|
|
assert.InDelta(t, 0.045, cost, 1e-12)
|
|
}
|
|
|
|
func TestComputeImageCost_BothHaveTokens_IgnoresPerImage(t *testing.T) {
|
|
// Both sides have tokens — per-image pricing is ignored
|
|
p := configstoreTables.TableModelPricing{
|
|
InputCostPerToken: bifrost.Ptr(0.000005),
|
|
OutputCostPerToken: bifrost.Ptr(0.000015),
|
|
InputCostPerImage: bifrost.Ptr(0.01),
|
|
OutputCostPerImage: bifrost.Ptr(0.04),
|
|
}
|
|
usage := &schemas.ImageUsage{
|
|
InputTokens: 200,
|
|
OutputTokens: 800,
|
|
TotalTokens: 1000,
|
|
NumInputImages: 3,
|
|
}
|
|
cost := computeImageCost(&p, usage, "", "", serviceTier{})
|
|
// Input: 200 * $0.000005 = $0.001 (tokens present, per-image ignored)
|
|
// Output: 800 * $0.000015 = $0.012 (tokens present, per-image ignored)
|
|
assert.InDelta(t, 0.013, cost, 1e-12)
|
|
}
|