Files
bifrost/tests/governance/ratelimitenforcement_test.go
Beyhan Oğur 880f412e2c first commit
2026-04-26 21:52:23 +03:00

632 lines
19 KiB
Go

package governance
import (
"testing"
"time"
)
// TestVirtualKeyTokenRateLimitEnforcement verifies VK token rate limits actually reject requests
// Rate limit enforcement is POST-HOC: the request that exceeds the limit is ALLOWED,
// but subsequent requests are BLOCKED.
func TestVirtualKeyTokenRateLimitEnforcement(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with a VERY restrictive token rate limit
vkName := "test-vk-strict-token-limit-" + generateRandomID()
tokenLimit := int64(100) // Only 100 tokens max
tokenResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &tokenLimit,
TokenResetDuration: &tokenResetDuration,
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK with strict token limit: %d tokens per %s", tokenLimit, tokenResetDuration)
// Verify rate limit is in in-memory store
getDataResp := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
if getDataResp.StatusCode != 200 {
t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
}
virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
vkData := virtualKeysMap[vkValue].(map[string]interface{})
rateLimitID, _ := vkData["rate_limit_id"].(string)
if rateLimitID == "" {
t.Fatalf("Rate limit not configured on VK")
}
t.Logf("Rate limit ID %s configured on VK ✓", rateLimitID)
// Make requests until token limit is exceeded
// Rate limit enforcement is POST-HOC: request that exceeds is allowed, next is blocked
consumedTokens := int64(0)
requestNum := 1
shouldStop := false
for requestNum <= 20 {
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Hello how are you?",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode >= 400 {
// Request rejected - check if it's due to rate limit
if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "token") || CheckErrorMessage(t, resp, "rate") {
t.Logf("Request %d correctly rejected: token limit exceeded at %d/%d", requestNum, consumedTokens, tokenLimit)
// Verify rejection happened after exceeding the limit
if consumedTokens < tokenLimit {
t.Fatalf("Request rejected before token limit was exceeded: consumed %d < limit %d", consumedTokens, tokenLimit)
}
t.Logf("Token rate limit enforcement verified ✓")
t.Logf("Request blocked after token limit exceeded")
return // Test passed
} else {
t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
}
}
// Request succeeded - extract token usage
var tokensUsed int64
if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
if total, ok := usage["total_tokens"].(float64); ok {
tokensUsed = int64(total)
}
}
consumedTokens += tokensUsed
t.Logf("Request %d succeeded: tokens=%d, consumed=%d/%d", requestNum, tokensUsed, consumedTokens, tokenLimit)
requestNum++
if shouldStop {
break
}
if consumedTokens >= tokenLimit {
shouldStop = true
}
}
t.Fatalf("Made %d requests but never hit token rate limit (consumed %d / %d) - rate limit not being enforced",
requestNum-1, consumedTokens, tokenLimit)
}
// TestVirtualKeyRequestRateLimitEnforcement verifies VK request rate limits actually reject requests
func TestVirtualKeyRequestRateLimitEnforcement(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with a very restrictive request rate limit
vkName := "test-vk-strict-request-limit-" + generateRandomID()
requestLimit := int64(1) // Only 1 request allowed
requestResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
RateLimit: &CreateRateLimitRequest{
RequestMaxLimit: &requestLimit,
RequestResetDuration: &requestResetDuration,
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK with request limit: %d request per %s", requestLimit, requestResetDuration)
// Make requests until request limit is exceeded
requestCount := int64(0)
requestNum := 1
for requestNum <= 10 {
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Request to test request rate limit.",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode >= 400 {
// Request rejected - check if it's due to rate limit
if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "request") || CheckErrorMessage(t, resp, "rate") {
t.Logf("Request %d correctly rejected: request limit exceeded at %d/%d", requestNum, requestCount, requestLimit)
// Verify rejection happened after exceeding the limit
if requestCount < requestLimit {
t.Fatalf("Request rejected before request limit was exceeded: count %d < limit %d", requestCount, requestLimit)
}
t.Logf("Request rate limit enforcement verified ✓")
t.Logf("Request blocked after request limit exceeded")
return // Test passed
} else {
t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
}
}
// Request succeeded - increment count
requestCount++
t.Logf("Request %d succeeded: count=%d/%d", requestNum, requestCount, requestLimit)
requestNum++
}
t.Fatalf("Made %d requests but never hit request rate limit (count %d / %d) - rate limit not being enforced",
requestNum-1, requestCount, requestLimit)
}
// TestProviderConfigTokenRateLimitEnforcement verifies provider-level token limits reject requests
func TestProviderConfigTokenRateLimitEnforcement(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with provider-level token rate limit
vkName := "test-vk-provider-strict-token-" + generateRandomID()
providerTokenLimit := int64(100)
tokenResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
ProviderConfigs: []ProviderConfigRequest{
{
Provider: "openai",
Weight: float64Ptr(1.0),
AllowedModels: []string{"*"},
KeyIDs: []string{"*"},
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &providerTokenLimit,
TokenResetDuration: &tokenResetDuration,
},
},
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK with provider token limit: %d tokens", providerTokenLimit)
// Verify provider config rate limit is set
getDataResp := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
if getDataResp.StatusCode != 200 {
t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
}
virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
vkData := virtualKeysMap[vkValue].(map[string]interface{})
providerConfigs, _ := vkData["provider_configs"].([]interface{})
if len(providerConfigs) == 0 {
t.Fatalf("Provider config not found")
}
t.Logf("Provider config rate limit configured ✓")
// Make requests until provider token limit is exceeded
// Rate limit enforcement is POST-HOC: request that exceeds is allowed, next is blocked
consumedTokens := int64(0)
requestNum := 1
shouldStop := false
for requestNum <= 20 {
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Request to openai to test provider token limit.",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode >= 400 {
// Request rejected - check if it's due to rate limit
if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "token") || CheckErrorMessage(t, resp, "rate") {
t.Logf("Request %d correctly rejected: provider token limit exceeded at %d/%d", requestNum, consumedTokens, providerTokenLimit)
// Verify rejection happened after exceeding the limit
if consumedTokens < providerTokenLimit {
t.Fatalf("Request rejected before provider token limit was exceeded: consumed %d < limit %d", consumedTokens, providerTokenLimit)
}
t.Logf("Provider token rate limit enforcement verified ✓")
t.Logf("Request blocked after provider token limit exceeded")
return // Test passed
} else {
t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
}
}
// Request succeeded - extract token usage
var tokensUsed int64
if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
if total, ok := usage["total_tokens"].(float64); ok {
tokensUsed = int64(total)
}
}
consumedTokens += tokensUsed
t.Logf("Request %d succeeded: tokens=%d, consumed=%d/%d", requestNum, tokensUsed, consumedTokens, providerTokenLimit)
requestNum++
if shouldStop {
break
}
if consumedTokens >= providerTokenLimit {
shouldStop = true
}
}
t.Fatalf("Made %d requests but never hit provider token rate limit (consumed %d / %d) - rate limit not being enforced",
requestNum-1, consumedTokens, providerTokenLimit)
}
// TestProviderConfigRequestRateLimitEnforcement verifies provider-level request limits
func TestProviderConfigRequestRateLimitEnforcement(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create a VK with provider-level request rate limit
vkName := "test-vk-provider-strict-request-" + generateRandomID()
providerRequestLimit := int64(1) // Only 1 request allowed
requestResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
ProviderConfigs: []ProviderConfigRequest{
{
Provider: "openai",
Weight: float64Ptr(1.0),
AllowedModels: []string{"*"},
KeyIDs: []string{"*"},
RateLimit: &CreateRateLimitRequest{
RequestMaxLimit: &providerRequestLimit,
RequestResetDuration: &requestResetDuration,
},
},
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK with provider request limit: %d request", providerRequestLimit)
// Make requests until provider request limit is exceeded
requestCount := int64(0)
requestNum := 1
for requestNum <= 10 {
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Request to test provider request rate limit.",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode >= 400 {
// Request rejected - check if it's due to rate limit
if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "request") || CheckErrorMessage(t, resp, "rate") {
t.Logf("Request %d correctly rejected: provider request limit exceeded at %d/%d", requestNum, requestCount, providerRequestLimit)
// Verify rejection happened after exceeding the limit
if requestCount < providerRequestLimit {
t.Fatalf("Request rejected before provider request limit was exceeded: count %d < limit %d", requestCount, providerRequestLimit)
}
t.Logf("Provider request rate limit enforcement verified ✓")
t.Logf("Request blocked after provider request limit exceeded")
return // Test passed
} else {
t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
}
}
// Request succeeded - increment count
requestCount++
t.Logf("Request %d succeeded: count=%d/%d", requestNum, requestCount, providerRequestLimit)
requestNum++
}
t.Fatalf("Made %d requests but never hit provider request rate limit (count %d / %d) - rate limit not being enforced",
requestNum-1, requestCount, providerRequestLimit)
}
// TestProviderAndVKRateLimitBothEnforced verifies both provider and VK limits are enforced
func TestProviderAndVKRateLimitBothEnforced(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create VK with both VK and provider request limits
vkName := "test-vk-both-enforced-" + generateRandomID()
vkRequestLimit := int64(5)
providerRequestLimit := int64(2) // More restrictive
requestResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
RateLimit: &CreateRateLimitRequest{
RequestMaxLimit: &vkRequestLimit,
RequestResetDuration: &requestResetDuration,
},
ProviderConfigs: []ProviderConfigRequest{
{
Provider: "openai",
Weight: float64Ptr(1.0),
AllowedModels: []string{"*"},
KeyIDs: []string{"*"},
RateLimit: &CreateRateLimitRequest{
RequestMaxLimit: &providerRequestLimit,
RequestResetDuration: &requestResetDuration,
},
},
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK with VK limit (%d) and provider limit (%d requests)", vkRequestLimit, providerRequestLimit)
// Make requests - provider limit (2) is more restrictive than VK limit (5)
// So we should hit provider limit first
successCount := 0
for i := 0; i < 5; i++ {
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Request " + string(rune('0'+i)) + " to test both limits.",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode == 200 {
successCount++
t.Logf("Request %d succeeded (count: %d)", i+1, successCount)
} else if resp.StatusCode >= 400 {
t.Logf("Request %d rejected with status %d", i+1, resp.StatusCode)
if successCount < int(providerRequestLimit) {
t.Fatalf("Request rejected before provider limit (%d): %v", providerRequestLimit, resp.Body)
}
// Expected - hit provider limit first
return
}
}
if successCount > 0 {
if successCount >= 5 {
t.Fatalf("Made all %d requests without hitting rate limit (provider limit was %d) - rate limit not enforced",
successCount, providerRequestLimit)
}
t.Logf("Both VK and provider rate limits are configured and enforced ✓")
} else {
t.Skip("Could not test - all requests failed")
}
}
// TestRateLimitInMemoryUsageTracking verifies usage counters are tracked in in-memory store
func TestRateLimitInMemoryUsageTracking(t *testing.T) {
t.Parallel()
testData := NewGlobalTestData()
defer testData.Cleanup(t)
// Create VK with rate limit
vkName := "test-vk-usage-tracking-" + generateRandomID()
tokenLimit := int64(10000)
tokenResetDuration := "1h"
createVKResp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/api/governance/virtual-keys",
Body: CreateVirtualKeyRequest{
Name: vkName,
RateLimit: &CreateRateLimitRequest{
TokenMaxLimit: &tokenLimit,
TokenResetDuration: &tokenResetDuration,
},
},
})
if createVKResp.StatusCode != 200 {
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
}
vkID := ExtractIDFromResponse(t, createVKResp)
testData.AddVirtualKey(vkID)
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
vkValue := vk["value"].(string)
t.Logf("Created VK for usage tracking test")
// Make a request
resp := MakeRequest(t, APIRequest{
Method: "POST",
Path: "/v1/chat/completions",
Body: ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []ChatMessage{
{
Role: "user",
Content: "Test for usage tracking.",
},
},
},
VKHeader: &vkValue,
})
if resp.StatusCode != 200 {
t.Skip("Could not execute request for usage tracking test")
}
// Get usage from response
var tokensUsed int
if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
if total, ok := usage["total_tokens"].(float64); ok {
tokensUsed = int(total)
}
}
if tokensUsed == 0 {
t.Skip("Could not extract token usage from response")
}
t.Logf("Request used %d tokens", tokensUsed)
// Wait for async update to propagate to in-memory store
var rateLimitID string
usageUpdated := WaitForCondition(t, func() bool {
getDataResp := MakeRequest(t, APIRequest{
Method: "GET",
Path: "/api/governance/virtual-keys?from_memory=true",
})
if getDataResp.StatusCode != 200 {
return false
}
virtualKeysMap, ok := getDataResp.Body["virtual_keys"].(map[string]interface{})
if !ok || virtualKeysMap == nil {
return false
}
vkData, ok := virtualKeysMap[vkValue].(map[string]interface{})
if !ok {
return false
}
rateLimitID, _ = vkData["rate_limit_id"].(string)
return rateLimitID != ""
}, 3*time.Second, "rate limit usage tracked in in-memory store")
if !usageUpdated {
t.Fatalf("Rate limit usage not tracked in in-memory store after request (timeout after 3s)")
}
if rateLimitID != "" {
t.Logf("Rate limit %s is configured and tracking usage ✓", rateLimitID)
} else {
t.Logf("Rate limit is configured ✓")
}
}