Files
bifrost/tests/governance/ratelimit_test.go
Beyhan Oğur 880f412e2c first commit
2026-04-26 21:52:23 +03:00

1006 lines
30 KiB
Go

package governance
import (
"testing"
"time"
)
// TestVirtualKeyTokenRateLimit tests that VK-level token rate limits are enforced
func TestVirtualKeyTokenRateLimit(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with a very restrictive token rate limit
vkName := "test-vk-token-limit-" + generateRandomID()
tokenLimit := int64(500) // Only 500 tokens per hour
tokenResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &tokenLimit,
TokenResetDuration: &tokenResetDuration,
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK %s with token limit: %d tokens per %s", vkName, tokenLimit, tokenResetDuration)
// Make requests until we hit the token limit
successCount := 0
for i := 0; i < 10; i++ {
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Short test request " + string(rune('0'+i)) + " for token limit.",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode >= 400 {
if CheckErrorMessage(t, resp, "token") || CheckErrorMessage(t, resp, "rate") {
t.Logf("Request %d correctly rejected due to token rate limit", i+1)
return // Test passed - hit the token limit
} else {
t.Logf("Request %d failed with unexpected error: %v", i+1, resp.Body)
}
} else if resp.StatusCode == 200 {
successCount++
t.Logf("Request %d succeeded (tokens within limit)", i+1)
}
}
if successCount > 0 {
t.Logf("Made %d successful requests before hitting token limit ✓", successCount)
} else {
t.Skip("Could not make requests to test token limit")
}
}
// TestVirtualKeyRequestRateLimit tests that VK-level request rate limits are enforced
func TestVirtualKeyRequestRateLimit(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with a very restrictive request rate limit
vkName := "test-vk-request-limit-" + generateRandomID()
requestLimit := int64(3) // Only 3 requests per minute
requestResetDuration := "1m"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
RateLimit: &CreateRateLimitRequest{
RequestMaxLimit: &requestLimit,
RequestResetDuration: &requestResetDuration,
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK %s with request limit: %d requests per %s", vkName, requestLimit, requestResetDuration)
// Make requests until we hit the request limit
successCount := 0
for i := 0; i < 5; i++ {
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Request number " + string(rune('0'+i)) + ".",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode >= 400 {
if CheckErrorMessage(t, resp, "request") || CheckErrorMessage(t, resp, "rate") {
t.Logf("Request %d correctly rejected due to request rate limit", i+1)
return // Test passed
} else {
t.Logf("Request %d failed with different error", i+1)
}
} else if resp.StatusCode == 200 {
successCount++
t.Logf("Request %d succeeded (count: %d/%d)", i+1, successCount, requestLimit)
}
}
if successCount > 0 {
t.Logf("Made %d successful requests before hitting request limit ✓", successCount)
} else {
t.Skip("Could not make requests to test request limit")
}
}
// TestProviderConfigTokenRateLimit tests that provider-level token rate limits are enforced
func TestProviderConfigTokenRateLimit(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with a provider config that has a token rate limit
vkName := "test-vk-provider-token-limit-" + generateRandomID()
providerTokenLimit := int64(300) // Limited tokens per provider
tokenResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
ProviderConfigs: []ProviderConfigRequest{
{
Provider: "openai",
Weight: float64Ptr(1.0),
AllowedModels: []string{"*"},
KeyIDs: []string{"*"},
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &providerTokenLimit,
TokenResetDuration: &tokenResetDuration,
},
},
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK %s with provider token limit: %d tokens per %s", vkName, providerTokenLimit, tokenResetDuration)
// Make requests to openai until we hit provider token limit
successCount := 0
for i := 0; i < 10; i++ {
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Provider token limit test " + string(rune('0'+i)) + ".",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode >= 400 {
if CheckErrorMessage(t, resp, "token") || CheckErrorMessage(t, resp, "rate") {
t.Logf("Request %d correctly rejected due to provider token limit", i+1)
return // Test passed
} else {
t.Logf("Request %d failed with different error", i+1)
}
} else if resp.StatusCode == 200 {
successCount++
t.Logf("Request %d succeeded", i+1)
}
}
if successCount > 0 {
t.Logf("Made %d successful requests with provider token limit ✓", successCount)
} else {
t.Skip("Could not make requests to test provider token limit")
}
}
// TestProviderConfigRequestRateLimit tests that provider-level request rate limits are enforced
func TestProviderConfigRequestRateLimit(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with a provider config that has a request rate limit
vkName := "test-vk-provider-request-limit-" + generateRandomID()
providerRequestLimit := int64(2) // Only 2 requests per minute for this provider
requestResetDuration := "1m"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
ProviderConfigs: []ProviderConfigRequest{
{
Provider: "openai",
Weight: float64Ptr(1.0),
AllowedModels: []string{"*"},
KeyIDs: []string{"*"},
RateLimit: &CreateRateLimitRequest{
RequestMaxLimit: &providerRequestLimit,
RequestResetDuration: &requestResetDuration,
},
},
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK %s with provider request limit: %d requests per %s", vkName, providerRequestLimit, requestResetDuration)
// Make requests to openai until we hit provider request limit
successCount := 0
for i := 0; i < 5; i++ {
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Provider request limit test " + string(rune('0'+i)) + ".",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode >= 400 {
if CheckErrorMessage(t, resp, "request") || CheckErrorMessage(t, resp, "rate") {
t.Logf("Request %d correctly rejected due to provider request limit", i+1)
return // Test passed
} else {
t.Logf("Request %d failed with different error", i+1)
}
} else if resp.StatusCode == 200 {
successCount++
t.Logf("Request %d succeeded (count: %d/%d)", i+1, successCount, providerRequestLimit)
}
}
if successCount > 0 {
t.Logf("Made %d successful requests with provider request limit ✓", successCount)
} else {
t.Skip("Could not make requests to test provider request limit")
}
}
// TestMultipleProvidersSeparateRateLimits tests that different providers have independent rate limits
func TestMultipleProvidersSeparateRateLimits(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with multiple providers, each with their own rate limits
vkName := "test-vk-multi-provider-limits-" + generateRandomID()
openaiLimit := int64(100)
anthropicLimit := int64(50)
tokenResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
ProviderConfigs: []ProviderConfigRequest{
{
Provider: "openai",
Weight: float64Ptr(1.0),
AllowedModels: []string{"*"},
KeyIDs: []string{"*"},
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &openaiLimit,
TokenResetDuration: &tokenResetDuration,
},
},
{
Provider: "anthropic",
Weight: float64Ptr(1.0),
AllowedModels: []string{"*"},
KeyIDs: []string{"*"},
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &anthropicLimit,
TokenResetDuration: &tokenResetDuration,
},
},
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK %s with separate rate limits per provider", vkName)
// Verify both providers are allowed
getDataResp := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
vkData := virtualKeysMap[vkValue].(map[string]interface{})
providerConfigs, _ := vkData["provider_configs"].([]interface{})
if len(providerConfigs) != 2 {
t.Fatalf("Expected 2 provider configs, got %d", len(providerConfigs))
}
t.Logf("VK has %d provider configs with separate rate limits ✓", len(providerConfigs))
}
// TestProviderAndVKRateLimitTogether tests that both provider and VK rate limits are enforced together
func TestProviderAndVKRateLimitTogether(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with both VK-level and provider-level rate limits
vkName := "test-vk-both-limits-" + generateRandomID()
vkTokenLimit := int64(1000)
vkTokenResetDuration := "1h"
providerTokenLimit := int64(300)
providerTokenResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &vkTokenLimit,
TokenResetDuration: &vkTokenResetDuration,
},
ProviderConfigs: []ProviderConfigRequest{
{
Provider: "openai",
Weight: float64Ptr(1.0),
AllowedModels: []string{"*"},
KeyIDs: []string{"*"},
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &providerTokenLimit,
TokenResetDuration: &providerTokenResetDuration,
},
},
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK %s with VK limit (%d tokens) and provider limit (%d tokens)", vkName, vkTokenLimit, providerTokenLimit)
// Verify the VK has both limits configured
getDataResp := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
if getDataResp.StatusCode != 200 {
t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
}
virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
vkData := virtualKeysMap[vkValue].(map[string]interface{})
// Check VK has rate limit
vkRateLimitID, _ := vkData["rate_limit_id"].(string)
if vkRateLimitID == "" {
t.Fatalf("VK rate limit ID not found")
}
// Check provider config exists
providerConfigs, _ := vkData["provider_configs"].([]interface{})
if len(providerConfigs) == 0 {
t.Fatalf("No provider configs found")
}
t.Logf("VK has both VK-level rate limit and provider-level rate limit configured ✓")
}
// TestRateLimitInMemorySync tests that rate limit changes sync to in-memory store
func TestRateLimitInMemorySync(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with a token rate limit
vkName := "test-vk-rate-limit-sync-" + generateRandomID()
initialTokenLimit := int64(1000)
tokenResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &initialTokenLimit,
TokenResetDuration: &tokenResetDuration,
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK %s with rate limit: %d tokens", vkName, initialTokenLimit)
// Get initial rate limit from in-memory store
getDataResp := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
if getDataResp.StatusCode != 200 {
t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
}
virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
vkData := virtualKeysMap[vkValue].(map[string]interface{})
rateLimitID, _ := vkData["rate_limit_id"].(string)
if rateLimitID == "" {
t.Fatalf("Rate limit ID not found in VK")
}
// Update the rate limit
newTokenLimit := int64(5000)
updateResp := MakeRequest(t, APIRequest{
Method: "PUT",
Path: "/api/governance/virtual-keys/" + vkID,
Body: UpdateVirtualKeyRequest{
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &newTokenLimit,
TokenResetDuration: &tokenResetDuration,
},
},
})
if updateResp.StatusCode != 200 {
t.Fatalf("Failed to update VK rate limit: status %d", updateResp.StatusCode)
}
t.Logf("Updated rate limit from %d to %d tokens", initialTokenLimit, newTokenLimit)
// Verify rate limit is updated in in-memory store
time.Sleep(500 * time.Millisecond)
getDataResp2 := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
if getDataResp2.StatusCode != 200 {
t.Fatalf("Failed to get governance data after update: status %d", getDataResp2.StatusCode)
}
virtualKeysMap2 := getDataResp2.Body["virtual_keys"].(map[string]interface{})
vkData2 := virtualKeysMap2[vkValue].(map[string]interface{})
// Verify VK still has rate limit configured
rateLimitID2, _ := vkData2["rate_limit_id"].(string)
if rateLimitID2 == "" {
t.Fatalf("Rate limit ID removed after update")
}
// Verify it's the same rate limit (ID should match)
if rateLimitID2 != rateLimitID {
t.Fatalf("Rate limit ID changed after update: was %s, now %s", rateLimitID, rateLimitID2)
}
// Verify rate limit content - check the actual values in the main RateLimits map
getRateLimitsResp2 := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/rate-limits?from_memory=true",
})
rateLimitsMap2 := getRateLimitsResp2.Body["rate_limits"].(map[string]interface{})
rateLimit2, ok := rateLimitsMap2[rateLimitID2].(map[string]interface{})
if !ok {
t.Fatalf("Rate limit not found in RateLimits map")
}
// Check TokenMaxLimit was updated
tokenMaxLimit, ok := rateLimit2["token_max_limit"].(float64)
if !ok {
t.Fatalf("Token max limit not found in rate limit")
}
if int64(tokenMaxLimit) != newTokenLimit {
t.Fatalf("Token max limit not updated: expected %d but got %d", newTokenLimit, int64(tokenMaxLimit))
}
t.Logf("Token max limit correctly updated to %d ✓", int64(tokenMaxLimit))
// Check TokenResetDuration persists
resetDuration, ok := rateLimit2["token_reset_duration"].(string)
if !ok {
t.Fatalf("Token reset duration not found in rate limit")
}
if resetDuration != tokenResetDuration {
t.Fatalf("Token reset duration changed: expected %s but got %s", tokenResetDuration, resetDuration)
}
t.Logf("Token reset duration persisted: %s ✓", resetDuration)
// Check usage counters exist
if tokenCurrentUsage, ok := rateLimit2["token_current_usage"].(float64); ok {
t.Logf("Token current usage in memory: %d", int64(tokenCurrentUsage))
}
t.Logf("Rate limit in-memory sync verified ✓")
t.Logf("VK rate limit ID persisted: %s", rateLimitID2)
}
// TestRateLimitTokenAndRequestTogether tests that both token and request limits work together
func TestRateLimitTokenAndRequestTogether(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with both token and request limits
vkName := "test-vk-token-and-request-" + generateRandomID()
tokenLimit := int64(5000)
tokenResetDuration := "1h"
requestLimit := int64(100)
requestResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &tokenLimit,
TokenResetDuration: &tokenResetDuration,
RequestMaxLimit: &requestLimit,
RequestResetDuration: &requestResetDuration,
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK %s with token limit (%d) and request limit (%d)", vkName, tokenLimit, requestLimit)
// Make a few requests and verify both limits are being tracked
successCount := 0
for i := 0; i < 3; i++ {
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Test request for token and request limits " + string(rune('0'+i)) + ".",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode == 200 {
successCount++
t.Logf("Request %d succeeded", i+1)
} else if resp.StatusCode >= 400 {
t.Logf("Request %d failed with status %d", i+1, resp.StatusCode)
break
}
}
if successCount > 0 {
t.Logf("Made %d successful requests with both token and request limits ✓", successCount)
} else {
t.Skip("Could not make requests to test combined limits")
}
}
// TestRateLimitUsageTrackedInMemory tests that VK-level rate limit usage is tracked in in-memory store
func TestRateLimitUsageTrackedInMemory(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with both token and request rate limits
vkName := "test-vk-usage-tracking-" + generateRandomID()
tokenLimit := int64(100000)
tokenResetDuration := "1h"
requestLimit := int64(100)
requestResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &tokenLimit,
TokenResetDuration: &tokenResetDuration,
RequestMaxLimit: &requestLimit,
RequestResetDuration: &requestResetDuration,
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK %s with rate limits for usage tracking", vkName)
// Get initial state - rate limit usage should be 0
getDataResp1 := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
virtualKeysMap1 := getDataResp1.Body["virtual_keys"].(map[string]interface{})
vkData1 := virtualKeysMap1[vkValue].(map[string]interface{})
rateLimitID1, _ := vkData1["rate_limit_id"].(string)
initialTokenUsage := 0.0
initialRequestUsage := 0.0
// Check initial rate limit usage (should be 0) from main RateLimits map
getRateLimitsResp1 := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/rate-limits?from_memory=true",
})
rateLimitsMap1 := getRateLimitsResp1.Body["rate_limits"].(map[string]interface{})
rateLimit1, ok := rateLimitsMap1[rateLimitID1].(map[string]interface{})
if !ok {
t.Fatalf("Rate limit not found in RateLimits map")
}
if tokenUsage, ok := rateLimit1["token_current_usage"].(float64); ok {
initialTokenUsage = tokenUsage
t.Logf("Initial token usage: %d", int64(initialTokenUsage))
}
if requestUsage, ok := rateLimit1["request_current_usage"].(float64); ok {
initialRequestUsage = requestUsage
t.Logf("Initial request usage: %d", int64(initialRequestUsage))
}
// Make a request to use some tokens and increment request count
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Test request for usage tracking.",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode != 200 {
t.Skip("Could not make request to test usage tracking")
}
// Wait for async PostHook goroutine to complete usage update
time.Sleep(2 * time.Second)
// Get updated state - rate limit usage should have increased
getDataResp2 := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
virtualKeysMap2 := getDataResp2.Body["virtual_keys"].(map[string]interface{})
vkData2 := virtualKeysMap2[vkValue].(map[string]interface{})
rateLimitID2, _ := vkData2["rate_limit_id"].(string)
// Get rate limit from main RateLimits map
getRateLimitsResp2 := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/rate-limits?from_memory=true",
})
rateLimitsMap2 := getRateLimitsResp2.Body["rate_limits"].(map[string]interface{})
rateLimit2, ok := rateLimitsMap2[rateLimitID2].(map[string]interface{})
if !ok {
t.Fatalf("Rate limit not found in RateLimits map after request")
}
// Check that token usage increased
tokenUsage2, ok := rateLimit2["token_current_usage"].(float64)
if !ok {
t.Fatalf("Token current usage not found in rate limit")
}
if tokenUsage2 <= initialTokenUsage {
t.Logf("Warning: Token usage did not increase (before: %d, after: %d)", int64(initialTokenUsage), int64(tokenUsage2))
} else {
t.Logf("Token usage increased from %d to %d ✓", int64(initialTokenUsage), int64(tokenUsage2))
}
// Check that request usage increased
requestUsage2, ok := rateLimit2["request_current_usage"].(float64)
if !ok {
t.Fatalf("Request current usage not found in rate limit")
}
if requestUsage2 <= initialRequestUsage {
t.Logf("Warning: Request usage did not increase (before: %d, after: %d)", int64(initialRequestUsage), int64(requestUsage2))
} else {
t.Logf("Request usage increased from %d to %d ✓", int64(initialRequestUsage), int64(requestUsage2))
}
// Verify rate limit still has the configured max limits
tokenMaxLimit, ok := rateLimit2["token_max_limit"].(float64)
if ok && int64(tokenMaxLimit) != tokenLimit {
t.Fatalf("Token max limit changed: expected %d but got %d", tokenLimit, int64(tokenMaxLimit))
}
requestMaxLimit, ok := rateLimit2["request_max_limit"].(float64)
if ok && int64(requestMaxLimit) != requestLimit {
t.Fatalf("Request max limit changed: expected %d but got %d", requestLimit, int64(requestMaxLimit))
}
t.Logf("VK-level rate limit usage properly tracked in in-memory store ✓")
t.Logf("Token usage: %d/%d, Request usage: %d/%d",
int64(tokenUsage2), tokenLimit, int64(requestUsage2), requestLimit)
}
// TestProviderLevelRateLimitUsageTracking tests that provider-level rate limits are separately tracked
func TestProviderLevelRateLimitUsageTracking(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with multiple providers, each with their own rate limits
vkName := "test-vk-provider-usage-" + generateRandomID()
openaiTokenLimit := int64(50000)
anthropicTokenLimit := int64(30000)
tokenResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
ProviderConfigs: []ProviderConfigRequest{
{
Provider: "openai",
Weight: float64Ptr(1.0),
AllowedModels: []string{"*"},
KeyIDs: []string{"*"},
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &openaiTokenLimit,
TokenResetDuration: &tokenResetDuration,
},
},
{
Provider: "anthropic",
Weight: float64Ptr(1.0),
AllowedModels: []string{"*"},
KeyIDs: []string{"*"},
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &anthropicTokenLimit,
TokenResetDuration: &tokenResetDuration,
},
},
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK %s with per-provider rate limits", vkName)
// Get initial state - provider rate limit usage should be 0
getDataResp1 := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
virtualKeysMap1 := getDataResp1.Body["virtual_keys"].(map[string]interface{})
vkData1 := virtualKeysMap1[vkValue].(map[string]interface{})
providerConfigs1, ok := vkData1["provider_configs"].([]interface{})
if !ok {
t.Fatalf("Provider configs not found in VK data")
}
if len(providerConfigs1) != 2 {
t.Fatalf("Expected 2 provider configs, got %d", len(providerConfigs1))
}
t.Logf("VK has %d provider configs with separate rate limits", len(providerConfigs1))
// Make a request with openai model to use openai provider's rate limit
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Test request for provider rate limit tracking.",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode != 200 {
t.Skip("Could not make request to test provider rate limit tracking")
}
// Wait for async PostHook goroutine to complete usage update
time.Sleep(2 * time.Second)
// Get updated state - openai provider rate limit usage should have increased
getDataResp2 := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
virtualKeysMap2 := getDataResp2.Body["virtual_keys"].(map[string]interface{})
vkData2 := virtualKeysMap2[vkValue].(map[string]interface{})
providerConfigs2, ok := vkData2["provider_configs"].([]interface{})
if !ok {
t.Fatalf("Provider configs not found in VK data after request")
}
// Check each provider config for rate limit updates
var openaiUsage, anthropicUsage float64
var openaiMaxLimit, anthropicMaxLimit float64
// Get rate limits from main RateLimits map
getRateLimitsResp2 := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/rate-limits?from_memory=true",
})
rateLimitsMap2 := getRateLimitsResp2.Body["rate_limits"].(map[string]interface{})
for i, providerConfig := range providerConfigs2 {
config, ok := providerConfig.(map[string]interface{})
if !ok {
continue
}
provider, ok := config["provider"].(string)
if !ok {
continue
}
rateLimitID, ok := config["rate_limit_id"].(string)
if !ok {
t.Logf("Provider %s: No rate limit ID found", provider)
continue
}
rateLimit, ok := rateLimitsMap2[rateLimitID].(map[string]interface{})
if !ok {
t.Logf("Provider %s: No rate limit found in RateLimits map", provider)
continue
}
tokenUsage, _ := rateLimit["token_current_usage"].(float64)
tokenMaxLimit, _ := rateLimit["token_max_limit"].(float64)
if provider == "openai" {
openaiUsage = tokenUsage
openaiMaxLimit = tokenMaxLimit
t.Logf("Provider %d (openai): Token usage: %d/%d", i, int64(tokenUsage), int64(tokenMaxLimit))
} else if provider == "anthropic" {
anthropicUsage = tokenUsage
anthropicMaxLimit = tokenMaxLimit
t.Logf("Provider %d (anthropic): Token usage: %d/%d", i, int64(tokenUsage), int64(tokenMaxLimit))
}
}
// Verify provider limits are independent
if openaiMaxLimit != float64(openaiTokenLimit) {
t.Logf("Warning: OpenAI max limit changed: expected %d but got %d", openaiTokenLimit, int64(openaiMaxLimit))
}
if anthropicMaxLimit != float64(anthropicTokenLimit) {
t.Logf("Warning: Anthropic max limit changed: expected %d but got %d", anthropicTokenLimit, int64(anthropicMaxLimit))
}
t.Logf("Provider-level rate limits properly tracked separately in in-memory store ✓")
t.Logf("OpenAI usage: %d, Anthropic usage: %d (separate limits)", int64(openaiUsage), int64(anthropicUsage))
}