first commit
This commit is contained in:
631
tests/governance/ratelimitenforcement_test.go
Normal file
631
tests/governance/ratelimitenforcement_test.go
Normal file
@@ -0,0 +1,631 @@
|
||||
package governance
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestVirtualKeyTokenRateLimitEnforcement verifies VK token rate limits actually reject requests
|
||||
// Rate limit enforcement is POST-HOC: the request that exceeds the limit is ALLOWED,
|
||||
// but subsequent requests are BLOCKED.
|
||||
func TestVirtualKeyTokenRateLimitEnforcement(t *testing.T) {
|
||||
t.Parallel()
|
||||
testData := NewGlobalTestData()
|
||||
defer testData.Cleanup(t)
|
||||
|
||||
// Create a VK with a VERY restrictive token rate limit
|
||||
vkName := "test-vk-strict-token-limit-" + generateRandomID()
|
||||
tokenLimit := int64(100) // Only 100 tokens max
|
||||
tokenResetDuration := "1h"
|
||||
|
||||
createVKResp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/api/governance/virtual-keys",
|
||||
Body: CreateVirtualKeyRequest{
|
||||
Name: vkName,
|
||||
RateLimit: &CreateRateLimitRequest{
|
||||
TokenMaxLimit: &tokenLimit,
|
||||
TokenResetDuration: &tokenResetDuration,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if createVKResp.StatusCode != 200 {
|
||||
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
|
||||
}
|
||||
|
||||
vkID := ExtractIDFromResponse(t, createVKResp)
|
||||
testData.AddVirtualKey(vkID)
|
||||
|
||||
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
|
||||
vkValue := vk["value"].(string)
|
||||
|
||||
t.Logf("Created VK with strict token limit: %d tokens per %s", tokenLimit, tokenResetDuration)
|
||||
|
||||
// Verify rate limit is in in-memory store
|
||||
getDataResp := MakeRequest(t, APIRequest{
|
||||
Method: "GET",
|
||||
Path: "/api/governance/virtual-keys?from_memory=true",
|
||||
})
|
||||
|
||||
if getDataResp.StatusCode != 200 {
|
||||
t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
|
||||
}
|
||||
|
||||
virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
|
||||
vkData := virtualKeysMap[vkValue].(map[string]interface{})
|
||||
rateLimitID, _ := vkData["rate_limit_id"].(string)
|
||||
|
||||
if rateLimitID == "" {
|
||||
t.Fatalf("Rate limit not configured on VK")
|
||||
}
|
||||
|
||||
t.Logf("Rate limit ID %s configured on VK ✓", rateLimitID)
|
||||
|
||||
// Make requests until token limit is exceeded
|
||||
// Rate limit enforcement is POST-HOC: request that exceeds is allowed, next is blocked
|
||||
consumedTokens := int64(0)
|
||||
requestNum := 1
|
||||
shouldStop := false
|
||||
|
||||
for requestNum <= 20 {
|
||||
resp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/v1/chat/completions",
|
||||
Body: ChatCompletionRequest{
|
||||
Model: "openai/gpt-4o",
|
||||
Messages: []ChatMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Hello how are you?",
|
||||
},
|
||||
},
|
||||
},
|
||||
VKHeader: &vkValue,
|
||||
})
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
// Request rejected - check if it's due to rate limit
|
||||
if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "token") || CheckErrorMessage(t, resp, "rate") {
|
||||
t.Logf("Request %d correctly rejected: token limit exceeded at %d/%d", requestNum, consumedTokens, tokenLimit)
|
||||
|
||||
// Verify rejection happened after exceeding the limit
|
||||
if consumedTokens < tokenLimit {
|
||||
t.Fatalf("Request rejected before token limit was exceeded: consumed %d < limit %d", consumedTokens, tokenLimit)
|
||||
}
|
||||
|
||||
t.Logf("Token rate limit enforcement verified ✓")
|
||||
t.Logf("Request blocked after token limit exceeded")
|
||||
return // Test passed
|
||||
} else {
|
||||
t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
|
||||
}
|
||||
}
|
||||
|
||||
// Request succeeded - extract token usage
|
||||
var tokensUsed int64
|
||||
if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
|
||||
if total, ok := usage["total_tokens"].(float64); ok {
|
||||
tokensUsed = int64(total)
|
||||
}
|
||||
}
|
||||
|
||||
consumedTokens += tokensUsed
|
||||
t.Logf("Request %d succeeded: tokens=%d, consumed=%d/%d", requestNum, tokensUsed, consumedTokens, tokenLimit)
|
||||
|
||||
requestNum++
|
||||
|
||||
if shouldStop {
|
||||
break
|
||||
}
|
||||
|
||||
if consumedTokens >= tokenLimit {
|
||||
shouldStop = true
|
||||
}
|
||||
}
|
||||
|
||||
t.Fatalf("Made %d requests but never hit token rate limit (consumed %d / %d) - rate limit not being enforced",
|
||||
requestNum-1, consumedTokens, tokenLimit)
|
||||
}
|
||||
|
||||
// TestVirtualKeyRequestRateLimitEnforcement verifies VK request rate limits actually reject requests
|
||||
func TestVirtualKeyRequestRateLimitEnforcement(t *testing.T) {
|
||||
t.Parallel()
|
||||
testData := NewGlobalTestData()
|
||||
defer testData.Cleanup(t)
|
||||
|
||||
// Create a VK with a very restrictive request rate limit
|
||||
vkName := "test-vk-strict-request-limit-" + generateRandomID()
|
||||
requestLimit := int64(1) // Only 1 request allowed
|
||||
requestResetDuration := "1h"
|
||||
|
||||
createVKResp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/api/governance/virtual-keys",
|
||||
Body: CreateVirtualKeyRequest{
|
||||
Name: vkName,
|
||||
RateLimit: &CreateRateLimitRequest{
|
||||
RequestMaxLimit: &requestLimit,
|
||||
RequestResetDuration: &requestResetDuration,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if createVKResp.StatusCode != 200 {
|
||||
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
|
||||
}
|
||||
|
||||
vkID := ExtractIDFromResponse(t, createVKResp)
|
||||
testData.AddVirtualKey(vkID)
|
||||
|
||||
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
|
||||
vkValue := vk["value"].(string)
|
||||
|
||||
t.Logf("Created VK with request limit: %d request per %s", requestLimit, requestResetDuration)
|
||||
|
||||
// Make requests until request limit is exceeded
|
||||
requestCount := int64(0)
|
||||
requestNum := 1
|
||||
|
||||
for requestNum <= 10 {
|
||||
resp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/v1/chat/completions",
|
||||
Body: ChatCompletionRequest{
|
||||
Model: "openai/gpt-4o",
|
||||
Messages: []ChatMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Request to test request rate limit.",
|
||||
},
|
||||
},
|
||||
},
|
||||
VKHeader: &vkValue,
|
||||
})
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
// Request rejected - check if it's due to rate limit
|
||||
if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "request") || CheckErrorMessage(t, resp, "rate") {
|
||||
t.Logf("Request %d correctly rejected: request limit exceeded at %d/%d", requestNum, requestCount, requestLimit)
|
||||
|
||||
// Verify rejection happened after exceeding the limit
|
||||
if requestCount < requestLimit {
|
||||
t.Fatalf("Request rejected before request limit was exceeded: count %d < limit %d", requestCount, requestLimit)
|
||||
}
|
||||
|
||||
t.Logf("Request rate limit enforcement verified ✓")
|
||||
t.Logf("Request blocked after request limit exceeded")
|
||||
return // Test passed
|
||||
} else {
|
||||
t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
|
||||
}
|
||||
}
|
||||
|
||||
// Request succeeded - increment count
|
||||
requestCount++
|
||||
t.Logf("Request %d succeeded: count=%d/%d", requestNum, requestCount, requestLimit)
|
||||
|
||||
requestNum++
|
||||
}
|
||||
|
||||
t.Fatalf("Made %d requests but never hit request rate limit (count %d / %d) - rate limit not being enforced",
|
||||
requestNum-1, requestCount, requestLimit)
|
||||
}
|
||||
|
||||
// TestProviderConfigTokenRateLimitEnforcement verifies provider-level token limits reject requests
|
||||
func TestProviderConfigTokenRateLimitEnforcement(t *testing.T) {
|
||||
t.Parallel()
|
||||
testData := NewGlobalTestData()
|
||||
defer testData.Cleanup(t)
|
||||
|
||||
// Create a VK with provider-level token rate limit
|
||||
vkName := "test-vk-provider-strict-token-" + generateRandomID()
|
||||
providerTokenLimit := int64(100)
|
||||
tokenResetDuration := "1h"
|
||||
|
||||
createVKResp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/api/governance/virtual-keys",
|
||||
Body: CreateVirtualKeyRequest{
|
||||
Name: vkName,
|
||||
ProviderConfigs: []ProviderConfigRequest{
|
||||
{
|
||||
Provider: "openai",
|
||||
Weight: float64Ptr(1.0),
|
||||
AllowedModels: []string{"*"},
|
||||
KeyIDs: []string{"*"},
|
||||
RateLimit: &CreateRateLimitRequest{
|
||||
TokenMaxLimit: &providerTokenLimit,
|
||||
TokenResetDuration: &tokenResetDuration,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if createVKResp.StatusCode != 200 {
|
||||
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
|
||||
}
|
||||
|
||||
vkID := ExtractIDFromResponse(t, createVKResp)
|
||||
testData.AddVirtualKey(vkID)
|
||||
|
||||
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
|
||||
vkValue := vk["value"].(string)
|
||||
|
||||
t.Logf("Created VK with provider token limit: %d tokens", providerTokenLimit)
|
||||
|
||||
// Verify provider config rate limit is set
|
||||
getDataResp := MakeRequest(t, APIRequest{
|
||||
Method: "GET",
|
||||
Path: "/api/governance/virtual-keys?from_memory=true",
|
||||
})
|
||||
|
||||
if getDataResp.StatusCode != 200 {
|
||||
t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
|
||||
}
|
||||
|
||||
virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
|
||||
vkData := virtualKeysMap[vkValue].(map[string]interface{})
|
||||
providerConfigs, _ := vkData["provider_configs"].([]interface{})
|
||||
|
||||
if len(providerConfigs) == 0 {
|
||||
t.Fatalf("Provider config not found")
|
||||
}
|
||||
|
||||
t.Logf("Provider config rate limit configured ✓")
|
||||
|
||||
// Make requests until provider token limit is exceeded
|
||||
// Rate limit enforcement is POST-HOC: request that exceeds is allowed, next is blocked
|
||||
consumedTokens := int64(0)
|
||||
requestNum := 1
|
||||
shouldStop := false
|
||||
|
||||
for requestNum <= 20 {
|
||||
resp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/v1/chat/completions",
|
||||
Body: ChatCompletionRequest{
|
||||
Model: "openai/gpt-4o",
|
||||
Messages: []ChatMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Request to openai to test provider token limit.",
|
||||
},
|
||||
},
|
||||
},
|
||||
VKHeader: &vkValue,
|
||||
})
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
// Request rejected - check if it's due to rate limit
|
||||
if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "token") || CheckErrorMessage(t, resp, "rate") {
|
||||
t.Logf("Request %d correctly rejected: provider token limit exceeded at %d/%d", requestNum, consumedTokens, providerTokenLimit)
|
||||
|
||||
// Verify rejection happened after exceeding the limit
|
||||
if consumedTokens < providerTokenLimit {
|
||||
t.Fatalf("Request rejected before provider token limit was exceeded: consumed %d < limit %d", consumedTokens, providerTokenLimit)
|
||||
}
|
||||
|
||||
t.Logf("Provider token rate limit enforcement verified ✓")
|
||||
t.Logf("Request blocked after provider token limit exceeded")
|
||||
return // Test passed
|
||||
} else {
|
||||
t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
|
||||
}
|
||||
}
|
||||
|
||||
// Request succeeded - extract token usage
|
||||
var tokensUsed int64
|
||||
if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
|
||||
if total, ok := usage["total_tokens"].(float64); ok {
|
||||
tokensUsed = int64(total)
|
||||
}
|
||||
}
|
||||
|
||||
consumedTokens += tokensUsed
|
||||
t.Logf("Request %d succeeded: tokens=%d, consumed=%d/%d", requestNum, tokensUsed, consumedTokens, providerTokenLimit)
|
||||
|
||||
requestNum++
|
||||
|
||||
if shouldStop {
|
||||
break
|
||||
}
|
||||
|
||||
if consumedTokens >= providerTokenLimit {
|
||||
shouldStop = true
|
||||
}
|
||||
}
|
||||
|
||||
t.Fatalf("Made %d requests but never hit provider token rate limit (consumed %d / %d) - rate limit not being enforced",
|
||||
requestNum-1, consumedTokens, providerTokenLimit)
|
||||
}
|
||||
|
||||
// TestProviderConfigRequestRateLimitEnforcement verifies provider-level request limits
|
||||
func TestProviderConfigRequestRateLimitEnforcement(t *testing.T) {
|
||||
t.Parallel()
|
||||
testData := NewGlobalTestData()
|
||||
defer testData.Cleanup(t)
|
||||
|
||||
// Create a VK with provider-level request rate limit
|
||||
vkName := "test-vk-provider-strict-request-" + generateRandomID()
|
||||
providerRequestLimit := int64(1) // Only 1 request allowed
|
||||
requestResetDuration := "1h"
|
||||
|
||||
createVKResp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/api/governance/virtual-keys",
|
||||
Body: CreateVirtualKeyRequest{
|
||||
Name: vkName,
|
||||
ProviderConfigs: []ProviderConfigRequest{
|
||||
{
|
||||
Provider: "openai",
|
||||
Weight: float64Ptr(1.0),
|
||||
AllowedModels: []string{"*"},
|
||||
KeyIDs: []string{"*"},
|
||||
RateLimit: &CreateRateLimitRequest{
|
||||
RequestMaxLimit: &providerRequestLimit,
|
||||
RequestResetDuration: &requestResetDuration,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if createVKResp.StatusCode != 200 {
|
||||
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
|
||||
}
|
||||
|
||||
vkID := ExtractIDFromResponse(t, createVKResp)
|
||||
testData.AddVirtualKey(vkID)
|
||||
|
||||
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
|
||||
vkValue := vk["value"].(string)
|
||||
|
||||
t.Logf("Created VK with provider request limit: %d request", providerRequestLimit)
|
||||
|
||||
// Make requests until provider request limit is exceeded
|
||||
requestCount := int64(0)
|
||||
requestNum := 1
|
||||
|
||||
for requestNum <= 10 {
|
||||
resp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/v1/chat/completions",
|
||||
Body: ChatCompletionRequest{
|
||||
Model: "openai/gpt-4o",
|
||||
Messages: []ChatMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Request to test provider request rate limit.",
|
||||
},
|
||||
},
|
||||
},
|
||||
VKHeader: &vkValue,
|
||||
})
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
// Request rejected - check if it's due to rate limit
|
||||
if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "request") || CheckErrorMessage(t, resp, "rate") {
|
||||
t.Logf("Request %d correctly rejected: provider request limit exceeded at %d/%d", requestNum, requestCount, providerRequestLimit)
|
||||
|
||||
// Verify rejection happened after exceeding the limit
|
||||
if requestCount < providerRequestLimit {
|
||||
t.Fatalf("Request rejected before provider request limit was exceeded: count %d < limit %d", requestCount, providerRequestLimit)
|
||||
}
|
||||
|
||||
t.Logf("Provider request rate limit enforcement verified ✓")
|
||||
t.Logf("Request blocked after provider request limit exceeded")
|
||||
return // Test passed
|
||||
} else {
|
||||
t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
|
||||
}
|
||||
}
|
||||
|
||||
// Request succeeded - increment count
|
||||
requestCount++
|
||||
t.Logf("Request %d succeeded: count=%d/%d", requestNum, requestCount, providerRequestLimit)
|
||||
|
||||
requestNum++
|
||||
}
|
||||
|
||||
t.Fatalf("Made %d requests but never hit provider request rate limit (count %d / %d) - rate limit not being enforced",
|
||||
requestNum-1, requestCount, providerRequestLimit)
|
||||
}
|
||||
|
||||
// TestProviderAndVKRateLimitBothEnforced verifies both provider and VK limits are enforced
|
||||
func TestProviderAndVKRateLimitBothEnforced(t *testing.T) {
|
||||
t.Parallel()
|
||||
testData := NewGlobalTestData()
|
||||
defer testData.Cleanup(t)
|
||||
|
||||
// Create VK with both VK and provider request limits
|
||||
vkName := "test-vk-both-enforced-" + generateRandomID()
|
||||
vkRequestLimit := int64(5)
|
||||
providerRequestLimit := int64(2) // More restrictive
|
||||
requestResetDuration := "1h"
|
||||
|
||||
createVKResp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/api/governance/virtual-keys",
|
||||
Body: CreateVirtualKeyRequest{
|
||||
Name: vkName,
|
||||
RateLimit: &CreateRateLimitRequest{
|
||||
RequestMaxLimit: &vkRequestLimit,
|
||||
RequestResetDuration: &requestResetDuration,
|
||||
},
|
||||
ProviderConfigs: []ProviderConfigRequest{
|
||||
{
|
||||
Provider: "openai",
|
||||
Weight: float64Ptr(1.0),
|
||||
AllowedModels: []string{"*"},
|
||||
KeyIDs: []string{"*"},
|
||||
RateLimit: &CreateRateLimitRequest{
|
||||
RequestMaxLimit: &providerRequestLimit,
|
||||
RequestResetDuration: &requestResetDuration,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if createVKResp.StatusCode != 200 {
|
||||
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
|
||||
}
|
||||
|
||||
vkID := ExtractIDFromResponse(t, createVKResp)
|
||||
testData.AddVirtualKey(vkID)
|
||||
|
||||
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
|
||||
vkValue := vk["value"].(string)
|
||||
|
||||
t.Logf("Created VK with VK limit (%d) and provider limit (%d requests)", vkRequestLimit, providerRequestLimit)
|
||||
|
||||
// Make requests - provider limit (2) is more restrictive than VK limit (5)
|
||||
// So we should hit provider limit first
|
||||
successCount := 0
|
||||
for i := 0; i < 5; i++ {
|
||||
resp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/v1/chat/completions",
|
||||
Body: ChatCompletionRequest{
|
||||
Model: "openai/gpt-4o",
|
||||
Messages: []ChatMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Request " + string(rune('0'+i)) + " to test both limits.",
|
||||
},
|
||||
},
|
||||
},
|
||||
VKHeader: &vkValue,
|
||||
})
|
||||
|
||||
if resp.StatusCode == 200 {
|
||||
successCount++
|
||||
t.Logf("Request %d succeeded (count: %d)", i+1, successCount)
|
||||
} else if resp.StatusCode >= 400 {
|
||||
t.Logf("Request %d rejected with status %d", i+1, resp.StatusCode)
|
||||
if successCount < int(providerRequestLimit) {
|
||||
t.Fatalf("Request rejected before provider limit (%d): %v", providerRequestLimit, resp.Body)
|
||||
}
|
||||
// Expected - hit provider limit first
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if successCount > 0 {
|
||||
if successCount >= 5 {
|
||||
t.Fatalf("Made all %d requests without hitting rate limit (provider limit was %d) - rate limit not enforced",
|
||||
successCount, providerRequestLimit)
|
||||
}
|
||||
t.Logf("Both VK and provider rate limits are configured and enforced ✓")
|
||||
} else {
|
||||
t.Skip("Could not test - all requests failed")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRateLimitInMemoryUsageTracking verifies usage counters are tracked in in-memory store
|
||||
func TestRateLimitInMemoryUsageTracking(t *testing.T) {
|
||||
t.Parallel()
|
||||
testData := NewGlobalTestData()
|
||||
defer testData.Cleanup(t)
|
||||
|
||||
// Create VK with rate limit
|
||||
vkName := "test-vk-usage-tracking-" + generateRandomID()
|
||||
tokenLimit := int64(10000)
|
||||
tokenResetDuration := "1h"
|
||||
|
||||
createVKResp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/api/governance/virtual-keys",
|
||||
Body: CreateVirtualKeyRequest{
|
||||
Name: vkName,
|
||||
RateLimit: &CreateRateLimitRequest{
|
||||
TokenMaxLimit: &tokenLimit,
|
||||
TokenResetDuration: &tokenResetDuration,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if createVKResp.StatusCode != 200 {
|
||||
t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
|
||||
}
|
||||
|
||||
vkID := ExtractIDFromResponse(t, createVKResp)
|
||||
testData.AddVirtualKey(vkID)
|
||||
|
||||
vk := createVKResp.Body["virtual_key"].(map[string]interface{})
|
||||
vkValue := vk["value"].(string)
|
||||
|
||||
t.Logf("Created VK for usage tracking test")
|
||||
|
||||
// Make a request
|
||||
resp := MakeRequest(t, APIRequest{
|
||||
Method: "POST",
|
||||
Path: "/v1/chat/completions",
|
||||
Body: ChatCompletionRequest{
|
||||
Model: "openai/gpt-4o",
|
||||
Messages: []ChatMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Test for usage tracking.",
|
||||
},
|
||||
},
|
||||
},
|
||||
VKHeader: &vkValue,
|
||||
})
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
t.Skip("Could not execute request for usage tracking test")
|
||||
}
|
||||
|
||||
// Get usage from response
|
||||
var tokensUsed int
|
||||
if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
|
||||
if total, ok := usage["total_tokens"].(float64); ok {
|
||||
tokensUsed = int(total)
|
||||
}
|
||||
}
|
||||
|
||||
if tokensUsed == 0 {
|
||||
t.Skip("Could not extract token usage from response")
|
||||
}
|
||||
|
||||
t.Logf("Request used %d tokens", tokensUsed)
|
||||
|
||||
// Wait for async update to propagate to in-memory store
|
||||
var rateLimitID string
|
||||
usageUpdated := WaitForCondition(t, func() bool {
|
||||
getDataResp := MakeRequest(t, APIRequest{
|
||||
Method: "GET",
|
||||
Path: "/api/governance/virtual-keys?from_memory=true",
|
||||
})
|
||||
|
||||
if getDataResp.StatusCode != 200 {
|
||||
return false
|
||||
}
|
||||
|
||||
virtualKeysMap, ok := getDataResp.Body["virtual_keys"].(map[string]interface{})
|
||||
if !ok || virtualKeysMap == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
vkData, ok := virtualKeysMap[vkValue].(map[string]interface{})
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
rateLimitID, _ = vkData["rate_limit_id"].(string)
|
||||
return rateLimitID != ""
|
||||
}, 3*time.Second, "rate limit usage tracked in in-memory store")
|
||||
|
||||
if !usageUpdated {
|
||||
t.Fatalf("Rate limit usage not tracked in in-memory store after request (timeout after 3s)")
|
||||
}
|
||||
|
||||
if rateLimitID != "" {
|
||||
t.Logf("Rate limit %s is configured and tracking usage ✓", rateLimitID)
|
||||
} else {
|
||||
t.Logf("Rate limit is configured ✓")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user