first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/tests/governance/advancedscenarios_test.go
+++ b/tests/governance/advancedscenarios_test.go
--- a/tests/governance/config.json
+++ b/tests/governance/config.json
@@ -0,0 +1,67 @@
+{
+  "$schema": "https://www.getbifrost.ai/schema",
+  "providers": {
+    "openai": {
+      "keys": [
+        {
+          "name": "OpenAI Test Key",
+          "value": "env.OPENAI_API_KEY",
+          "weight": 1,
+          "models": ["*"],
+          "use_for_batch_api": true
+        }
+      ],
+      "network_config": {
+        "default_request_timeout_in_seconds": 300
+      }
+    },
+    "anthropic": {
+      "keys": [
+        {
+          "name": "Anthropic Test Key",
+          "value": "env.ANTHROPIC_API_KEY",
+          "weight": 1,
+          "models": ["*"],
+          "use_for_batch_api": true
+        }
+      ],
+      "network_config": {
+        "default_request_timeout_in_seconds": 300
+      }
+    },
+    "openrouter": {
+      "keys": [
+        {
+          "name": "OpenRouter Test Key",
+          "value": "env.OPENROUTER_API_KEY",
+          "weight": 1,
+          "models": ["*"]
+        }
+      ],
+      "network_config": {
+        "default_request_timeout_in_seconds": 300
+      }
+    }
+  },
+  "config_store": {
+    "enabled": true,
+    "type": "sqlite",
+    "config": {
+      "path": "./data/governance-test.db"
+    }
+  },
+  "logs_store": {
+    "enabled": false
+  },
+  "client": {
+    "drop_excess_requests": false,
+    "initial_pool_size": 300,
+    "allowed_origins": [
+      "*"
+    ],
+    "enable_logging": true,
+    "enforce_auth_on_inference": true,
+    "allow_direct_keys": false,
+    "max_request_body_size_mb": 100
+  }
+}
--- a/tests/governance/configupdatesync_test.go
+++ b/tests/governance/configupdatesync_test.go
--- a/tests/governance/customer_virtual_keys_response_test.go
+++ b/tests/governance/customer_virtual_keys_response_test.go
@@ -0,0 +1,567 @@
+package governance
+
+import (
+	"encoding/json"
+	"testing"
+	"time"
+)
+
+func TestCustomerResponsesIncludeAssignedVirtualKeys(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	customerName := "test-customer-vk-response-" + generateRandomID()
+	createCustomerResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/customers",
+		Body: CreateCustomerRequest{
+			Name: customerName,
+		},
+	})
+
+	if createCustomerResp.StatusCode != 200 {
+		t.Fatalf("Failed to create customer: status %d", createCustomerResp.StatusCode)
+	}
+
+	customerID := ExtractIDFromResponse(t, createCustomerResp)
+	testData.AddCustomer(customerID)
+
+	customerData, ok := createCustomerResp.Body["customer"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("Expected 'customer' in response body, got: %v", createCustomerResp.Body)
+	}
+	assertJSONArrayField(t, customerData, "teams", 0)
+	assertJSONArrayField(t, customerData, "virtual_keys", 0)
+
+	vkName := "test-vk-customer-response-" + generateRandomID()
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name:       vkName,
+			CustomerID: &customerID,
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	customerDetailReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers/" + customerID,
+	}
+	customerListReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers",
+	}
+	customerMemoryDetailReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers/" + customerID + "?from_memory=true",
+	}
+	customerMemoryListReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers?from_memory=true",
+	}
+
+	if _, ok := WaitForAPICondition(t, customerDetailReq, func(resp *APIResponse) bool {
+		return customerHasVirtualKey(resp, customerID, vkID)
+	}, 5*time.Second, "db customer detail shows assigned virtual key"); !ok {
+		t.Fatalf("Customer detail never showed assigned virtual key")
+	}
+
+	if _, ok := WaitForAPICondition(t, customerListReq, func(resp *APIResponse) bool {
+		return customerHasVirtualKey(resp, customerID, vkID)
+	}, 5*time.Second, "db customer list shows assigned virtual key"); !ok {
+		t.Fatalf("Customer list never showed assigned virtual key")
+	}
+
+	if _, ok := WaitForAPICondition(t, customerMemoryDetailReq, func(resp *APIResponse) bool {
+		return customerHasVirtualKey(resp, customerID, vkID)
+	}, 5*time.Second, "in-memory customer detail shows assigned virtual key"); !ok {
+		t.Fatalf("In-memory customer detail never showed assigned virtual key")
+	}
+
+	if _, ok := WaitForAPICondition(t, customerMemoryListReq, func(resp *APIResponse) bool {
+		return customerHasVirtualKey(resp, customerID, vkID)
+	}, 5*time.Second, "in-memory customer list shows assigned virtual key"); !ok {
+		t.Fatalf("In-memory customer list never showed assigned virtual key")
+	}
+}
+
+func TestVirtualKeyResponsesEmbedConsistentCustomerRelations(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	customerName := "test-vk-embedded-customer-" + generateRandomID()
+	createCustomerResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/customers",
+		Body: CreateCustomerRequest{
+			Name: customerName,
+		},
+	})
+
+	if createCustomerResp.StatusCode != 200 {
+		t.Fatalf("Failed to create customer: status %d", createCustomerResp.StatusCode)
+	}
+
+	customerID := ExtractIDFromResponse(t, createCustomerResp)
+	testData.AddCustomer(customerID)
+
+	vkName := "test-vk-embedded-customer-" + generateRandomID()
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name:       vkName,
+			CustomerID: &customerID,
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	getVKReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys/" + vkID,
+	}
+	getVKMemoryReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys/" + vkID + "?from_memory=true",
+	}
+
+	if _, ok := WaitForAPICondition(t, getVKReq, func(resp *APIResponse) bool {
+		return embeddedCustomerHasExpectedRelations(resp, customerID, vkID) && responseJSONIsMarshalable(resp)
+	}, 5*time.Second, "db virtual key embeds normalized customer"); !ok {
+		t.Fatalf("Virtual key detail never returned embedded customer with normalized relations")
+	}
+
+	if _, ok := WaitForAPICondition(t, getVKMemoryReq, func(resp *APIResponse) bool {
+		return embeddedCustomerHasExpectedRelations(resp, customerID, vkID) && responseJSONIsMarshalable(resp)
+	}, 5*time.Second, "in-memory virtual key embeds normalized customer"); !ok {
+		t.Fatalf("In-memory virtual key detail never returned embedded customer with normalized relations")
+	}
+}
+
+func TestCustomerResponsesIncludeMultipleAssignedVirtualKeys(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	customerName := "test-customer-multi-vk-" + generateRandomID()
+	createCustomerResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/customers",
+		Body: CreateCustomerRequest{
+			Name: customerName,
+		},
+	})
+
+	if createCustomerResp.StatusCode != 200 {
+		t.Fatalf("Failed to create customer: status %d", createCustomerResp.StatusCode)
+	}
+
+	customerID := ExtractIDFromResponse(t, createCustomerResp)
+	testData.AddCustomer(customerID)
+
+	vk1ID := createCustomerVirtualKeyForTest(t, testData, customerID, "test-customer-multi-vk-1-")
+	vk2ID := createCustomerVirtualKeyForTest(t, testData, customerID, "test-customer-multi-vk-2-")
+
+	customerDetailReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers/" + customerID,
+	}
+	customerListReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers",
+	}
+	customerMemoryDetailReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers/" + customerID + "?from_memory=true",
+	}
+	customerMemoryListReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers?from_memory=true",
+	}
+
+	expectedVKs := []string{vk1ID, vk2ID}
+
+	if _, ok := WaitForAPICondition(t, customerDetailReq, func(resp *APIResponse) bool {
+		return customerHasExactVirtualKeys(resp, customerID, expectedVKs)
+	}, 5*time.Second, "db customer detail shows both assigned virtual keys"); !ok {
+		t.Fatalf("Customer detail never showed both assigned virtual keys")
+	}
+
+	if _, ok := WaitForAPICondition(t, customerListReq, func(resp *APIResponse) bool {
+		return customerHasExactVirtualKeys(resp, customerID, expectedVKs)
+	}, 5*time.Second, "db customer list shows both assigned virtual keys"); !ok {
+		t.Fatalf("Customer list never showed both assigned virtual keys")
+	}
+
+	if _, ok := WaitForAPICondition(t, customerMemoryDetailReq, func(resp *APIResponse) bool {
+		return customerHasExactVirtualKeys(resp, customerID, expectedVKs)
+	}, 5*time.Second, "in-memory customer detail shows both assigned virtual keys"); !ok {
+		t.Fatalf("In-memory customer detail never showed both assigned virtual keys")
+	}
+
+	if _, ok := WaitForAPICondition(t, customerMemoryListReq, func(resp *APIResponse) bool {
+		return customerHasExactVirtualKeys(resp, customerID, expectedVKs)
+	}, 5*time.Second, "in-memory customer list shows both assigned virtual keys"); !ok {
+		t.Fatalf("In-memory customer list never showed both assigned virtual keys")
+	}
+}
+
+func TestCustomerResponsesExcludeTeamScopedVirtualKeys(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	customerName := "test-customer-team-vk-" + generateRandomID()
+	createCustomerResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/customers",
+		Body: CreateCustomerRequest{
+			Name: customerName,
+		},
+	})
+
+	if createCustomerResp.StatusCode != 200 {
+		t.Fatalf("Failed to create customer: status %d", createCustomerResp.StatusCode)
+	}
+
+	customerID := ExtractIDFromResponse(t, createCustomerResp)
+	testData.AddCustomer(customerID)
+
+	teamName := "test-team-team-vk-" + generateRandomID()
+	createTeamResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/teams",
+		Body: CreateTeamRequest{
+			Name:       teamName,
+			CustomerID: &customerID,
+		},
+	})
+
+	if createTeamResp.StatusCode != 200 {
+		t.Fatalf("Failed to create team: status %d", createTeamResp.StatusCode)
+	}
+
+	teamID := ExtractIDFromResponse(t, createTeamResp)
+	testData.AddTeam(teamID)
+
+	vkName := "test-team-scoped-vk-" + generateRandomID()
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name:   vkName,
+			TeamID: &teamID,
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create team-scoped VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	customerDetailReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers/" + customerID,
+	}
+	customerListReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers",
+	}
+	customerMemoryDetailReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers/" + customerID + "?from_memory=true",
+	}
+	customerMemoryListReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers?from_memory=true",
+	}
+	getVKReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys/" + vkID,
+	}
+	getVKMemoryReq := APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys/" + vkID + "?from_memory=true",
+	}
+
+	if _, ok := WaitForAPICondition(t, customerDetailReq, func(resp *APIResponse) bool {
+		return customerExcludesVirtualKey(resp, customerID, vkID, 0)
+	}, 5*time.Second, "db customer detail excludes team-scoped virtual key"); !ok {
+		t.Fatalf("Customer detail incorrectly included team-scoped virtual key")
+	}
+
+	if _, ok := WaitForAPICondition(t, customerListReq, func(resp *APIResponse) bool {
+		return customerExcludesVirtualKey(resp, customerID, vkID, 0)
+	}, 5*time.Second, "db customer list excludes team-scoped virtual key"); !ok {
+		t.Fatalf("Customer list incorrectly included team-scoped virtual key")
+	}
+
+	if _, ok := WaitForAPICondition(t, customerMemoryDetailReq, func(resp *APIResponse) bool {
+		return customerExcludesVirtualKey(resp, customerID, vkID, 0)
+	}, 5*time.Second, "in-memory customer detail excludes team-scoped virtual key"); !ok {
+		t.Fatalf("In-memory customer detail incorrectly included team-scoped virtual key")
+	}
+
+	if _, ok := WaitForAPICondition(t, customerMemoryListReq, func(resp *APIResponse) bool {
+		return customerExcludesVirtualKey(resp, customerID, vkID, 0)
+	}, 5*time.Second, "in-memory customer list excludes team-scoped virtual key"); !ok {
+		t.Fatalf("In-memory customer list incorrectly included team-scoped virtual key")
+	}
+
+	if _, ok := WaitForAPICondition(t, getVKReq, func(resp *APIResponse) bool {
+		return virtualKeyHasExpectedTeam(resp, vkID, teamID) && responseJSONIsMarshalable(resp)
+	}, 5*time.Second, "db virtual key retains team relationship"); !ok {
+		t.Fatalf("DB virtual key detail never returned expected team relationship")
+	}
+
+	if _, ok := WaitForAPICondition(t, getVKMemoryReq, func(resp *APIResponse) bool {
+		return virtualKeyHasExpectedTeam(resp, vkID, teamID) && responseJSONIsMarshalable(resp)
+	}, 5*time.Second, "in-memory virtual key retains team relationship"); !ok {
+		t.Fatalf("In-memory virtual key detail never returned expected team relationship")
+	}
+}
+
+func createCustomerVirtualKeyForTest(t *testing.T, testData *GlobalTestData, customerID, namePrefix string) string {
+	t.Helper()
+
+	vkName := namePrefix + generateRandomID()
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name:       vkName,
+			CustomerID: &customerID,
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+	return vkID
+}
+
+func extractCustomerFromResponse(body map[string]interface{}, customerID string) map[string]interface{} {
+	if customer, ok := body["customer"].(map[string]interface{}); ok {
+		id, _ := customer["id"].(string)
+		if id == customerID {
+			return customer
+		}
+	}
+
+	if customers, ok := body["customers"].([]interface{}); ok {
+		for _, item := range customers {
+			customer, ok := item.(map[string]interface{})
+			if !ok {
+				continue
+			}
+			id, _ := customer["id"].(string)
+			if id == customerID {
+				return customer
+			}
+		}
+	}
+
+	if customers, ok := body["customers"].(map[string]interface{}); ok {
+		if customer, ok := customers[customerID].(map[string]interface{}); ok {
+			return customer
+		}
+	}
+
+	return nil
+}
+
+func customerHasVirtualKey(resp *APIResponse, customerID, vkID string) bool {
+	if resp.StatusCode != 200 {
+		return false
+	}
+
+	customer := extractCustomerFromResponse(resp.Body, customerID)
+	if customer == nil {
+		return false
+	}
+
+	if !arrayFieldContainsID(customer, "virtual_keys", vkID) {
+		return false
+	}
+
+	return fieldIsJSONArray(customer, "teams")
+}
+
+func customerHasExactVirtualKeys(resp *APIResponse, customerID string, expectedVKIDs []string) bool {
+	if resp.StatusCode != 200 {
+		return false
+	}
+
+	customer := extractCustomerFromResponse(resp.Body, customerID)
+	if customer == nil || !fieldIsJSONArray(customer, "teams") {
+		return false
+	}
+
+	values, ok := customer["virtual_keys"].([]interface{})
+	if !ok || len(values) != len(expectedVKIDs) {
+		return false
+	}
+
+	for _, expectedID := range expectedVKIDs {
+		if !arrayFieldContainsID(customer, "virtual_keys", expectedID) {
+			return false
+		}
+	}
+
+	return true
+}
+
+func customerExcludesVirtualKey(resp *APIResponse, customerID, vkID string, expectedLen int) bool {
+	if resp.StatusCode != 200 {
+		return false
+	}
+
+	customer := extractCustomerFromResponse(resp.Body, customerID)
+	if customer == nil || !fieldIsJSONArray(customer, "teams") {
+		return false
+	}
+
+	values, ok := customer["virtual_keys"].([]interface{})
+	if !ok || len(values) != expectedLen {
+		return false
+	}
+
+	return !arrayFieldContainsID(customer, "virtual_keys", vkID)
+}
+
+func embeddedCustomerHasExpectedRelations(resp *APIResponse, customerID, vkID string) bool {
+	if resp.StatusCode != 200 {
+		return false
+	}
+
+	virtualKey, ok := resp.Body["virtual_key"].(map[string]interface{})
+	if !ok {
+		return false
+	}
+
+	customer, ok := virtualKey["customer"].(map[string]interface{})
+	if !ok {
+		return false
+	}
+
+	id, _ := customer["id"].(string)
+	if id != customerID {
+		return false
+	}
+
+	if !fieldIsJSONArray(customer, "teams") {
+		return false
+	}
+
+	if !arrayFieldContainsID(customer, "virtual_keys", vkID) {
+		return false
+	}
+
+	values, ok := customer["virtual_keys"].([]interface{})
+	if !ok {
+		return false
+	}
+	for _, item := range values {
+		entry, ok := item.(map[string]interface{})
+		if !ok {
+			return false
+		}
+		if nestedCustomer, exists := entry["customer"]; exists && nestedCustomer != nil {
+			return false
+		}
+	}
+
+	return true
+}
+
+func virtualKeyHasExpectedTeam(resp *APIResponse, vkID, teamID string) bool {
+	if resp.StatusCode != 200 {
+		return false
+	}
+
+	virtualKey, ok := resp.Body["virtual_key"].(map[string]interface{})
+	if !ok {
+		return false
+	}
+
+	id, _ := virtualKey["id"].(string)
+	if id != vkID {
+		return false
+	}
+
+	team, ok := virtualKey["team"].(map[string]interface{})
+	if !ok {
+		return false
+	}
+
+	actualTeamID, _ := team["id"].(string)
+	if actualTeamID != teamID {
+		return false
+	}
+
+	customer, exists := virtualKey["customer"]
+	return !exists || customer == nil
+}
+
+func arrayFieldContainsID(parent map[string]interface{}, field, id string) bool {
+	values, ok := parent[field].([]interface{})
+	if !ok {
+		return false
+	}
+	for _, item := range values {
+		entry, ok := item.(map[string]interface{})
+		if !ok {
+			continue
+		}
+		entryID, _ := entry["id"].(string)
+		if entryID == id {
+			return true
+		}
+	}
+	return false
+}
+
+func fieldIsJSONArray(parent map[string]interface{}, field string) bool {
+	_, ok := parent[field].([]interface{})
+	return ok
+}
+
+func assertJSONArrayField(t *testing.T, parent map[string]interface{}, field string, expectedLen int) {
+	t.Helper()
+
+	values, ok := parent[field].([]interface{})
+	if !ok {
+		t.Fatalf("Expected %q to be an array, got %T", field, parent[field])
+	}
+	if len(values) != expectedLen {
+		t.Fatalf("Expected %q length %d, got %d", field, expectedLen, len(values))
+	}
+}
+
+func responseJSONIsMarshalable(resp *APIResponse) bool {
+	if !json.Valid(resp.RawBody) {
+		return false
+	}
+	_, err := json.Marshal(resp.Body)
+	return err == nil
+}
--- a/tests/governance/customerbudget_test.go
+++ b/tests/governance/customerbudget_test.go
@@ -0,0 +1,335 @@
+package governance
+
+import (
+	"strconv"
+	"testing"
+)
+
+// TestCustomerBudgetExceededWithMultipleVKs tests that customer level budgets are enforced across multiple VKs
+// by making requests until budget is consumed
+func TestCustomerBudgetExceededWithMultipleVKs(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a customer with a fixed budget
+	customerBudget := 0.01
+	customerName := "test-customer-budget-exceeded-" + generateRandomID()
+	createCustomerResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/customers",
+		Body: CreateCustomerRequest{
+			Name: customerName,
+			Budget: &BudgetRequest{
+				MaxLimit:      customerBudget,
+				ResetDuration: "1h",
+			},
+		},
+	})
+
+	if createCustomerResp.StatusCode != 200 {
+		t.Fatalf("Failed to create customer: status %d", createCustomerResp.StatusCode)
+	}
+
+	customerID := ExtractIDFromResponse(t, createCustomerResp)
+	testData.AddCustomer(customerID)
+
+	// Create 2 VKs under the customer (directly, without team)
+	var vkValues []string
+	for i := 1; i <= 2; i++ {
+		createVKResp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/api/governance/virtual-keys",
+			Body: CreateVirtualKeyRequest{
+				Name:       "test-vk-" + generateRandomID(),
+				CustomerID: &customerID,
+				Budget: &BudgetRequest{
+					MaxLimit:      1.0, // High VK budget so customer is the limiting factor
+					ResetDuration: "1h",
+				},
+			},
+		})
+
+		if createVKResp.StatusCode != 200 {
+			t.Fatalf("Failed to create VK %d: status %d", i, createVKResp.StatusCode)
+		}
+
+		vkID := ExtractIDFromResponse(t, createVKResp)
+		testData.AddVirtualKey(vkID)
+
+		vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+		vkValues = append(vkValues, vk["value"].(string))
+	}
+
+	t.Logf("Created customer %s with budget $%.2f and 2 VKs", customerName, customerBudget)
+
+	// Keep making requests alternating between VKs, tracking actual token usage until customer budget is exceeded
+	consumedBudget := 0.0
+	requestNum := 1
+	var lastSuccessfulCost float64
+	var shouldStop = false
+	vkIndex := 0
+
+	for requestNum <= 50 {
+		// Alternate between VKs to test shared customer budget
+		vkValue := vkValues[vkIndex%2]
+
+		// Create a longer prompt to consume more tokens and budget faster
+		longPrompt := "Please provide a comprehensive and detailed response to the following question. " +
+			"I need extensive information covering all aspects of the topic. " +
+			"Provide multiple paragraphs with detailed explanations. " +
+			"Request number " + strconv.Itoa(requestNum) + ". " +
+			"Here is a detailed prompt that will consume significant tokens: " +
+			"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+			"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+			"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+			"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+			"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+			"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+			"Mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+			"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+			"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+			"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+			"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+			"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+			"Mollit anim id est laborum."
+
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: longPrompt,
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode >= 400 {
+			// Request failed - check if it's due to budget
+			if CheckErrorMessage(t, resp, "budget") || CheckErrorMessage(t, resp, "customer") {
+				t.Logf("Request %d correctly rejected: customer budget exceeded", requestNum)
+				t.Logf("Consumed budget: $%.6f (limit: $%.2f)", consumedBudget, customerBudget)
+				t.Logf("Last successful request cost: $%.6f", lastSuccessfulCost)
+
+				// Verify that we made at least one successful request before hitting budget
+				if requestNum == 1 {
+					t.Fatalf("First request should have succeeded but was rejected due to budget")
+				}
+				return // Test passed
+			} else {
+				t.Fatalf("Request %d failed with unexpected error (not budget): %v", requestNum, resp.Body)
+			}
+		}
+
+		// Request succeeded - extract actual token usage from response
+		if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+			if prompt, ok := usage["prompt_tokens"].(float64); ok {
+				if completion, ok := usage["completion_tokens"].(float64); ok {
+					actualInputTokens := int(prompt)
+					actualOutputTokens := int(completion)
+					actualCost, _ := CalculateCost("openai/gpt-4o", actualInputTokens, actualOutputTokens)
+
+					consumedBudget += actualCost
+					lastSuccessfulCost = actualCost
+
+					t.Logf("Request %d (VK%d) succeeded: input_tokens=%d, output_tokens=%d, cost=$%.6f, consumed=$%.6f/$%.2f",
+						requestNum, (vkIndex%2)+1, actualInputTokens, actualOutputTokens, actualCost, consumedBudget, customerBudget)
+				}
+			}
+		}
+
+		requestNum++
+		vkIndex++
+
+		if shouldStop {
+			break
+		}
+
+		if consumedBudget >= customerBudget {
+			shouldStop = true
+		}
+	}
+
+	t.Fatalf("Made %d requests but never hit customer budget limit (consumed $%.6f / $%.2f) - budget not being enforced",
+		requestNum-1, consumedBudget, customerBudget)
+}
+
+// TestCustomerBudgetExceededWithMultipleTeams tests that customer level budgets are enforced across multiple teams
+// by making requests until budget is consumed
+func TestCustomerBudgetExceededWithMultipleTeams(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a customer with a fixed budget
+	customerBudget := 0.01
+	customerName := "test-customer-multi-team-" + generateRandomID()
+	createCustomerResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/customers",
+		Body: CreateCustomerRequest{
+			Name: customerName,
+			Budget: &BudgetRequest{
+				MaxLimit:      customerBudget,
+				ResetDuration: "1h",
+			},
+		},
+	})
+
+	if createCustomerResp.StatusCode != 200 {
+		t.Fatalf("Failed to create customer: status %d", createCustomerResp.StatusCode)
+	}
+
+	customerID := ExtractIDFromResponse(t, createCustomerResp)
+	testData.AddCustomer(customerID)
+
+	// Create 2 teams under the customer
+	var vkValues []string
+	for i := 1; i <= 2; i++ {
+		createTeamResp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/api/governance/teams",
+			Body: CreateTeamRequest{
+				Name:       "test-team-" + generateRandomID(),
+				CustomerID: &customerID,
+				Budgets: []BudgetRequest{{
+					MaxLimit:      1.0, // High team budget so customer is the limiting factor
+					ResetDuration: "1h",
+				}},
+			},
+		})
+
+		if createTeamResp.StatusCode != 200 {
+			t.Fatalf("Failed to create team %d: status %d", i, createTeamResp.StatusCode)
+		}
+
+		teamID := ExtractIDFromResponse(t, createTeamResp)
+		testData.AddTeam(teamID)
+
+		// Create a VK under each team
+		createVKResp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/api/governance/virtual-keys",
+			Body: CreateVirtualKeyRequest{
+				Name:   "test-vk-" + generateRandomID(),
+				TeamID: &teamID,
+				Budget: &BudgetRequest{
+					MaxLimit:      1.0, // High VK budget so customer is the limiting factor
+					ResetDuration: "1h",
+				},
+			},
+		})
+
+		if createVKResp.StatusCode != 200 {
+			t.Fatalf("Failed to create VK %d: status %d", i, createVKResp.StatusCode)
+		}
+
+		vkID := ExtractIDFromResponse(t, createVKResp)
+		testData.AddVirtualKey(vkID)
+
+		vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+		vkValues = append(vkValues, vk["value"].(string))
+	}
+
+	t.Logf("Created customer %s with budget $%.2f and 2 teams with VKs", customerName, customerBudget)
+
+	// Keep making requests alternating between VKs in different teams, tracking actual token usage until customer budget is exceeded
+	consumedBudget := 0.0
+	requestNum := 1
+	var lastSuccessfulCost float64
+	var shouldStop = false
+	vkIndex := 0
+
+	for requestNum <= 50 {
+		// Alternate between VKs in different teams to test shared customer budget
+		vkValue := vkValues[vkIndex%2]
+
+		// Create a longer prompt to consume more tokens and budget faster
+		longPrompt := "Please provide a comprehensive and detailed response to the following question. " +
+			"I need extensive information covering all aspects of the topic. " +
+			"Provide multiple paragraphs with detailed explanations. " +
+			"Request number " + strconv.Itoa(requestNum) + ". " +
+			"Here is a detailed prompt that will consume significant tokens: " +
+			"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+			"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+			"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+			"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+			"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+			"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+			"Mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+			"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+			"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+			"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+			"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+			"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+			"Mollit anim id est laborum."
+
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: longPrompt,
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode >= 400 {
+			// Request failed - check if it's due to budget
+			if CheckErrorMessage(t, resp, "budget") || CheckErrorMessage(t, resp, "customer") {
+				t.Logf("Request %d correctly rejected: customer budget exceeded", requestNum)
+				t.Logf("Consumed budget: $%.6f (limit: $%.2f)", consumedBudget, customerBudget)
+				t.Logf("Last successful request cost: $%.6f", lastSuccessfulCost)
+
+				// Verify that we made at least one successful request before hitting budget
+				if requestNum == 1 {
+					t.Fatalf("First request should have succeeded but was rejected due to budget")
+				}
+				return // Test passed
+			} else {
+				t.Fatalf("Request %d failed with unexpected error (not budget): %v", requestNum, resp.Body)
+			}
+		}
+
+		// Request succeeded - extract actual token usage from response
+		if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+			if prompt, ok := usage["prompt_tokens"].(float64); ok {
+				if completion, ok := usage["completion_tokens"].(float64); ok {
+					actualInputTokens := int(prompt)
+					actualOutputTokens := int(completion)
+					actualCost, _ := CalculateCost("openai/gpt-4o", actualInputTokens, actualOutputTokens)
+
+					consumedBudget += actualCost
+					lastSuccessfulCost = actualCost
+
+					t.Logf("Request %d (VK%d) succeeded: input_tokens=%d, output_tokens=%d, cost=$%.6f, consumed=$%.6f/$%.2f",
+						requestNum, (vkIndex%2)+1, actualInputTokens, actualOutputTokens, actualCost, consumedBudget, customerBudget)
+				}
+			}
+		}
+
+		requestNum++
+		vkIndex++
+
+		if shouldStop {
+			break
+		}
+
+		if consumedBudget >= customerBudget {
+			shouldStop = true
+		}
+	}
+
+	t.Fatalf("Made %d requests but never hit customer budget limit (consumed $%.6f / $%.2f) - budget not being enforced",
+		requestNum-1, consumedBudget, customerBudget)
+}
--- a/tests/governance/e2e_test.go
+++ b/tests/governance/e2e_test.go
--- a/tests/governance/edgecases_test.go
+++ b/tests/governance/edgecases_test.go
@@ -0,0 +1,190 @@
+package governance
+
+import (
+	"strconv"
+	"testing"
+	"time"
+)
+
+// TestCrissCrossComplexBudgetHierarchy tests complex scenarios involving provider, VK, team, and customer level budgets
+// Tests that the most restrictive budget at each level is enforced
+func TestCrissCrossComplexBudgetHierarchy(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a customer with a moderate budget
+	customerBudget := 0.15
+	customerName := "test-customer-criss-cross-" + generateRandomID()
+	createCustomerResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/customers",
+		Body: CreateCustomerRequest{
+			Name: customerName,
+			Budget: &BudgetRequest{
+				MaxLimit:      customerBudget,
+				ResetDuration: "1h",
+			},
+		},
+	})
+
+	if createCustomerResp.StatusCode != 200 {
+		t.Fatalf("Failed to create customer: status %d", createCustomerResp.StatusCode)
+	}
+
+	customerID := ExtractIDFromResponse(t, createCustomerResp)
+	testData.AddCustomer(customerID)
+
+	// Create a team under customer with a tighter budget
+	teamBudget := 0.12
+	createTeamResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/teams",
+		Body: CreateTeamRequest{
+			Name:       "test-team-criss-cross-" + generateRandomID(),
+			CustomerID: &customerID,
+			Budgets: []BudgetRequest{{
+				MaxLimit:      teamBudget,
+				ResetDuration: "1h",
+			}},
+		},
+	})
+
+	if createTeamResp.StatusCode != 200 {
+		t.Fatalf("Failed to create team: status %d", createTeamResp.StatusCode)
+	}
+
+	teamID := ExtractIDFromResponse(t, createTeamResp)
+	testData.AddTeam(teamID)
+
+	// Create a VK with even tighter budget and provider-specific budgets
+	vkBudget := 0.01
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name:   "test-vk-criss-cross-" + generateRandomID(),
+			TeamID: &teamID,
+			Budget: &BudgetRequest{
+				MaxLimit:      vkBudget,
+				ResetDuration: "1h",
+			},
+			ProviderConfigs: []ProviderConfigRequest{
+				{
+					Provider:      "openai",
+					Weight:        float64Ptr(1.0),
+					AllowedModels: []string{"*"},
+					KeyIDs:        []string{"*"},
+					Budget: &BudgetRequest{
+						MaxLimit:      0.08, // Even tighter provider budget
+						ResetDuration: "1h",
+					},
+				},
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created hierarchy: Customer ($%.2f) -> Team ($%.2f) -> VK ($%.2f) with Provider Budget ($0.08)",
+		customerBudget, teamBudget, vkBudget)
+
+	// Wait for VK and provider config budgets to be synced to in-memory store
+	time.Sleep(1000 * time.Millisecond)
+
+	// Test: Provider budget should be the limiting factor (most restrictive)
+	consumedBudget := 0.0
+	requestNum := 1
+	var lastSuccessfulCost float64
+	var shouldStop = false
+
+	for requestNum <= 50 {
+		longPrompt := "Please provide a comprehensive and detailed response to the following question. " +
+			"I need extensive information covering all aspects of the topic. " +
+			"Provide multiple paragraphs with detailed explanations. " +
+			"Request number " + strconv.Itoa(requestNum) + ". " +
+			"Here is a detailed prompt that will consume significant tokens: " +
+			"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+			"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+			"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+			"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+			"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+			"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+			"Mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+			"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+			"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+			"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+			"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+			"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+			"Mollit anim id est laborum."
+
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: longPrompt,
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode >= 400 {
+			// Request failed - check if it's due to budget
+			if CheckErrorMessage(t, resp, "budget") || CheckErrorMessage(t, resp, "provider") {
+				t.Logf("Request %d correctly rejected: budget exceeded in criss-cross hierarchy", requestNum)
+				t.Logf("Consumed budget: $%.6f (provider budget limit: $0.08)", consumedBudget)
+				t.Logf("Last successful request cost: $%.6f", lastSuccessfulCost)
+
+				if requestNum == 1 {
+					t.Fatalf("First request should have succeeded but was rejected due to budget")
+				}
+				return // Test passed
+			} else {
+				t.Fatalf("Request %d failed with unexpected error (not budget): %v", requestNum, resp.Body)
+			}
+		}
+
+		// Request succeeded - extract actual token usage from response
+		if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+			if prompt, ok := usage["prompt_tokens"].(float64); ok {
+				if completion, ok := usage["completion_tokens"].(float64); ok {
+					actualInputTokens := int(prompt)
+					actualOutputTokens := int(completion)
+					actualCost, _ := CalculateCost("openai/gpt-4o", actualInputTokens, actualOutputTokens)
+
+					consumedBudget += actualCost
+					lastSuccessfulCost = actualCost
+
+					t.Logf("Request %d succeeded: input_tokens=%d, output_tokens=%d, cost=$%.6f, consumed=$%.6f",
+						requestNum, actualInputTokens, actualOutputTokens, actualCost, consumedBudget)
+				}
+			}
+		}
+
+		requestNum++
+
+		if shouldStop {
+			break
+		}
+
+		if consumedBudget >= 0.08 { // Provider budget
+			shouldStop = true
+		}
+	}
+
+	t.Fatalf("Made %d requests but never hit provider budget limit - budget not being enforced",
+		requestNum-1)
+}
--- a/tests/governance/go.mod
+++ b/tests/governance/go.mod
@@ -0,0 +1,3 @@
+module github.com/maximhq/bifrost/tests/governance
+
+go 1.26.2
--- a/tests/governance/go.sum
+++ b/tests/governance/go.sum
--- a/tests/governance/inmemorysync_test.go
+++ b/tests/governance/inmemorysync_test.go
@@ -0,0 +1,608 @@
+package governance
+
+import (
+	"testing"
+	"time"
+)
+
+// TestInMemorySyncVirtualKeyUpdate tests that in-memory store is updated when VK is updated in DB
+func TestInMemorySyncVirtualKeyUpdate(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with initial budget
+	vkName := "test-vk-sync-" + generateRandomID()
+	initialBudget := 10.0
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			Budget: &BudgetRequest{
+				MaxLimit:      initialBudget,
+				ResetDuration: "1h",
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK %s with initial budget $%.2f", vkName, initialBudget)
+
+	// Verify in-memory store has the VK
+	getDataResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	if getDataResp.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
+	}
+
+	virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
+
+	// Check that VK exists in in-memory store
+	vkData, exists := virtualKeysMap[vkValue]
+	if !exists {
+		t.Fatalf("VK %s not found in in-memory store after creation", vkValue)
+	}
+
+	vkDataMap := vkData.(map[string]interface{})
+	vkID2, _ := vkDataMap["id"].(string)
+	if vkID2 != vkID {
+		t.Fatalf("VK ID mismatch in in-memory store: expected %s, got %s", vkID, vkID2)
+	}
+
+	t.Logf("VK found in in-memory store after creation ✓")
+
+	// Update VK budget to 20.0
+	newBudget := 20.0
+	updateResp := MakeRequest(t, APIRequest{
+		Method: "PUT",
+		Path:   "/api/governance/virtual-keys/" + vkID,
+		Body: UpdateVirtualKeyRequest{
+			Budget: &UpdateBudgetRequest{
+				MaxLimit: &newBudget,
+			},
+		},
+	})
+
+	if updateResp.StatusCode != 200 {
+		t.Fatalf("Failed to update VK: status %d, body: %v", updateResp.StatusCode, updateResp.Body)
+	}
+
+	t.Logf("Updated VK budget from $%.2f to $%.2f", initialBudget, newBudget)
+
+	// Verify in-memory store is updated
+	time.Sleep(500 * time.Millisecond) // Small delay for async updates
+
+	getVKResp2 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	if getVKResp2.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data after update: status %d", getVKResp2.StatusCode)
+	}
+
+	virtualKeysMap2 := getVKResp2.Body["virtual_keys"].(map[string]interface{})
+
+	getBudgetsResp2 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/budgets?from_memory=true",
+	})
+
+	budgetsMap2 := getBudgetsResp2.Body["budgets"].(map[string]interface{})
+
+	// Check that VK still exists
+	vkData2, exists := virtualKeysMap2[vkValue]
+	if !exists {
+		t.Fatalf("VK %s not found in in-memory store after update", vkValue)
+	}
+
+	vkDataMap2 := vkData2.(map[string]interface{})
+	budgetID, _ := vkDataMap2["budget_id"].(string)
+
+	// Check that budget in in-memory store is updated
+	if budgetID != "" {
+		budgetData, budgetExists := budgetsMap2[budgetID]
+		if !budgetExists {
+			t.Fatalf("Budget %s not found in in-memory store", budgetID)
+		}
+
+		budgetDataMap := budgetData.(map[string]interface{})
+		maxLimit, _ := budgetDataMap["max_limit"].(float64)
+		if maxLimit != newBudget {
+			t.Fatalf("Budget max_limit not updated in in-memory store: expected %.2f, got %.2f", newBudget, maxLimit)
+		}
+	}
+
+	t.Logf("VK budget updated in in-memory store ✓")
+}
+
+// TestInMemorySyncTeamUpdate tests that in-memory store is updated when Team is updated
+func TestInMemorySyncTeamUpdate(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a team with initial budget
+	teamName := "test-team-sync-" + generateRandomID()
+	initialBudget := 50.0
+	createTeamResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/teams",
+		Body: CreateTeamRequest{
+			Name: teamName,
+			Budgets: []BudgetRequest{{
+				MaxLimit:      initialBudget,
+				ResetDuration: "1h",
+			}},
+		},
+	})
+
+	if createTeamResp.StatusCode != 200 {
+		t.Fatalf("Failed to create team: status %d", createTeamResp.StatusCode)
+	}
+
+	teamID := ExtractIDFromResponse(t, createTeamResp)
+	testData.AddTeam(teamID)
+
+	t.Logf("Created team %s with initial budget $%.2f", teamName, initialBudget)
+
+	// Verify in-memory store has the team
+	getDataResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/teams?from_memory=true",
+	})
+
+	if getDataResp.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
+	}
+
+	teamsMap := getDataResp.Body["teams"].(map[string]interface{})
+
+	_, exists := teamsMap[teamID]
+	if !exists {
+		t.Fatalf("Team %s not found in in-memory store after creation", teamID)
+	}
+
+	t.Logf("Team found in in-memory store after creation ✓")
+
+	// Update team budget to 100.0
+	newTeamBudget := 100.0
+	updateResp := MakeRequest(t, APIRequest{
+		Method: "PUT",
+		Path:   "/api/governance/teams/" + teamID,
+		Body: UpdateTeamRequest{
+			Budgets: &[]BudgetRequest{{
+				MaxLimit:      newTeamBudget,
+				ResetDuration: "1h",
+			}},
+		},
+	})
+
+	if updateResp.StatusCode != 200 {
+		t.Fatalf("Failed to update team: status %d", updateResp.StatusCode)
+	}
+
+	t.Logf("Updated team budget from $%.2f to $%.2f", initialBudget, newTeamBudget)
+
+	// Verify in-memory store is updated
+	time.Sleep(500 * time.Millisecond)
+
+	getTeamsResp2 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/teams?from_memory=true",
+	})
+
+	if getTeamsResp2.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data after update: status %d", getTeamsResp2.StatusCode)
+	}
+
+	teamsMap2 := getTeamsResp2.Body["teams"].(map[string]interface{})
+
+	getBudgetsResp2 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/budgets?from_memory=true",
+	})
+
+	budgetsMap2 := getBudgetsResp2.Body["budgets"].(map[string]interface{})
+
+	teamData2, exists := teamsMap2[teamID]
+	if !exists {
+		t.Fatalf("Team %s not found in in-memory store after update", teamID)
+	}
+
+	teamDataMap := teamData2.(map[string]interface{})
+	// Teams now expose a `budgets` array instead of a single `budget_id` — read the first.
+	var budgetID string
+	if budgetsList, ok := teamDataMap["budgets"].([]interface{}); ok && len(budgetsList) > 0 {
+		if b, ok := budgetsList[0].(map[string]interface{}); ok {
+			budgetID, _ = b["id"].(string)
+		}
+	}
+
+	if budgetID != "" {
+		budgetData, budgetExists := budgetsMap2[budgetID]
+		if !budgetExists {
+			t.Fatalf("Budget %s not found in in-memory store", budgetID)
+		}
+
+		budgetDataMap := budgetData.(map[string]interface{})
+		maxLimit, _ := budgetDataMap["max_limit"].(float64)
+		if maxLimit != newTeamBudget {
+			t.Fatalf("Team budget max_limit not updated in in-memory store: expected %.2f, got %.2f", newTeamBudget, maxLimit)
+		}
+	}
+
+	t.Logf("Team budget updated in in-memory store ✓")
+}
+
+// TestInMemorySyncCustomerUpdate tests that in-memory store is updated when Customer is updated
+func TestInMemorySyncCustomerUpdate(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a customer with initial budget
+	customerName := "test-customer-sync-" + generateRandomID()
+	initialBudget := 100.0
+	createCustomerResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/customers",
+		Body: CreateCustomerRequest{
+			Name: customerName,
+			Budget: &BudgetRequest{
+				MaxLimit:      initialBudget,
+				ResetDuration: "1h",
+			},
+		},
+	})
+
+	if createCustomerResp.StatusCode != 200 {
+		t.Fatalf("Failed to create customer: status %d", createCustomerResp.StatusCode)
+	}
+
+	customerID := ExtractIDFromResponse(t, createCustomerResp)
+	testData.AddCustomer(customerID)
+
+	t.Logf("Created customer %s with initial budget $%.2f", customerName, initialBudget)
+
+	// Verify in-memory store has the customer
+	getDataResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers?from_memory=true",
+	})
+
+	if getDataResp.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
+	}
+
+	customersMap := getDataResp.Body["customers"].(map[string]interface{})
+
+	_, exists := customersMap[customerID]
+	if !exists {
+		t.Fatalf("Customer %s not found in in-memory store after creation", customerID)
+	}
+
+	t.Logf("Customer found in in-memory store after creation ✓")
+
+	// Update customer budget to 250.0
+	newCustomerBudget := 250.0
+	updateResp := MakeRequest(t, APIRequest{
+		Method: "PUT",
+		Path:   "/api/governance/customers/" + customerID,
+		Body: UpdateCustomerRequest{
+			Budget: &UpdateBudgetRequest{
+				MaxLimit: &newCustomerBudget,
+			},
+		},
+	})
+
+	if updateResp.StatusCode != 200 {
+		t.Fatalf("Failed to update customer: status %d", updateResp.StatusCode)
+	}
+
+	t.Logf("Updated customer budget from $%.2f to $%.2f", initialBudget, newCustomerBudget)
+
+	// Verify in-memory store is updated
+	time.Sleep(500 * time.Millisecond)
+
+	getCustomersResp2 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers?from_memory=true",
+	})
+
+	if getCustomersResp2.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data after update: status %d", getCustomersResp2.StatusCode)
+	}
+
+	customersMap2 := getCustomersResp2.Body["customers"].(map[string]interface{})
+
+	getBudgetsResp2 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/budgets?from_memory=true",
+	})
+
+	budgetsMap2 := getBudgetsResp2.Body["budgets"].(map[string]interface{})
+
+	customerData2, exists := customersMap2[customerID]
+	if !exists {
+		t.Fatalf("Customer %s not found in in-memory store after update", customerID)
+	}
+
+	customerDataMap := customerData2.(map[string]interface{})
+	budgetID, _ := customerDataMap["budget_id"].(string)
+
+	if budgetID != "" {
+		budgetData, budgetExists := budgetsMap2[budgetID]
+		if !budgetExists {
+			t.Fatalf("Budget %s not found in in-memory store", budgetID)
+		}
+
+		budgetDataMap := budgetData.(map[string]interface{})
+		maxLimit, _ := budgetDataMap["max_limit"].(float64)
+		if maxLimit != newCustomerBudget {
+			t.Fatalf("Customer budget max_limit not updated in in-memory store: expected %.2f, got %.2f", newCustomerBudget, maxLimit)
+		}
+	}
+
+	t.Logf("Customer budget updated in in-memory store ✓")
+}
+
+// TestInMemorySyncVirtualKeyDelete tests that in-memory store is updated when VK is deleted
+func TestInMemorySyncVirtualKeyDelete(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK
+	vkName := "test-vk-delete-" + generateRandomID()
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			Budget: &BudgetRequest{
+				MaxLimit:      10.0,
+				ResetDuration: "1h",
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	// Verify in-memory store has the VK (poll to ensure sync completed)
+	vkExists := WaitForCondition(t, func() bool {
+		getDataResp := MakeRequest(t, APIRequest{
+			Method: "GET",
+			Path:   "/api/governance/virtual-keys?from_memory=true",
+		})
+
+		if getDataResp.StatusCode != 200 {
+			return false
+		}
+
+		virtualKeysMap, ok := getDataResp.Body["virtual_keys"].(map[string]interface{})
+		if !ok {
+			return false
+		}
+
+		_, exists := virtualKeysMap[vkValue]
+		return exists
+	}, 5*time.Second, "VK exists in in-memory store after creation")
+
+	if !vkExists {
+		t.Fatalf("VK not found in in-memory store after creation (timeout after 5s)")
+	}
+
+	t.Logf("VK found in in-memory store after creation ✓")
+
+	// Delete the VK
+	deleteResp := MakeRequest(t, APIRequest{
+		Method: "DELETE",
+		Path:   "/api/governance/virtual-keys/" + vkID,
+	})
+
+	if deleteResp.StatusCode != 200 {
+		t.Fatalf("Failed to delete VK: status %d", deleteResp.StatusCode)
+	}
+
+	t.Logf("Deleted VK from database")
+
+	// Verify in-memory store is updated (poll with timeout instead of fixed sleep)
+	vkRemoved := WaitForCondition(t, func() bool {
+		getDataResp2 := MakeRequest(t, APIRequest{
+			Method: "GET",
+			Path:   "/api/governance/virtual-keys?from_memory=true",
+		})
+
+		if getDataResp2.StatusCode != 200 {
+			return false
+		}
+
+		virtualKeysMap2, ok := getDataResp2.Body["virtual_keys"].(map[string]interface{})
+		if !ok {
+			return false
+		}
+
+		_, exists := virtualKeysMap2[vkValue]
+		return !exists // Return true when VK is NOT found (successfully removed)
+	}, 5*time.Second, "VK removed from in-memory store after deletion")
+
+	if !vkRemoved {
+		t.Fatalf("VK %s still exists in in-memory store after deletion (timeout after 5s)", vkValue)
+	}
+
+	t.Logf("VK removed from in-memory store ✓")
+}
+
+// TestDataEndpointConsistency tests that governance endpoints return consistent data
+func TestDataEndpointConsistency(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create multiple resources
+	vkName := "test-vk-consistency-" + generateRandomID()
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			Budget: &BudgetRequest{
+				MaxLimit:      15.0,
+				ResetDuration: "1h",
+			},
+		},
+	})
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	teamName := "test-team-consistency-" + generateRandomID()
+	createTeamResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/teams",
+		Body: CreateTeamRequest{
+			Name: teamName,
+			Budgets: []BudgetRequest{{
+				MaxLimit:      30.0,
+				ResetDuration: "1h",
+			}},
+		},
+	})
+
+	teamID := ExtractIDFromResponse(t, createTeamResp)
+	testData.AddTeam(teamID)
+
+	customerName := "test-customer-consistency-" + generateRandomID()
+	createCustomerResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/customers",
+		Body: CreateCustomerRequest{
+			Name: customerName,
+			Budget: &BudgetRequest{
+				MaxLimit:      60.0,
+				ResetDuration: "1h",
+			},
+		},
+	})
+
+	customerID := ExtractIDFromResponse(t, createCustomerResp)
+	testData.AddCustomer(customerID)
+
+	// Wait for all resources to be available in in-memory store
+	allResourcesReady := WaitForCondition(t, func() bool {
+		getVKResp := MakeRequest(t, APIRequest{
+			Method: "GET",
+			Path:   "/api/governance/virtual-keys?from_memory=true",
+		})
+		if getVKResp.StatusCode != 200 {
+			return false
+		}
+
+		getTeamsResp := MakeRequest(t, APIRequest{
+			Method: "GET",
+			Path:   "/api/governance/teams?from_memory=true",
+		})
+		if getTeamsResp.StatusCode != 200 {
+			return false
+		}
+
+		getCustomersResp := MakeRequest(t, APIRequest{
+			Method: "GET",
+			Path:   "/api/governance/customers?from_memory=true",
+		})
+		return getCustomersResp.StatusCode == 200
+	}, 3*time.Second, "all resources available in in-memory store")
+
+	if !allResourcesReady {
+		t.Fatalf("Resources not available in in-memory store (timeout after 3s)")
+	}
+
+	// Get data from separate endpoints
+	getVKResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	if getVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to get virtual keys: status %d", getVKResp.StatusCode)
+	}
+
+	getTeamsResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/teams?from_memory=true",
+	})
+
+	if getTeamsResp.StatusCode != 200 {
+		t.Fatalf("Failed to get teams: status %d", getTeamsResp.StatusCode)
+	}
+
+	getCustomersResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/customers?from_memory=true",
+	})
+
+	if getCustomersResp.StatusCode != 200 {
+		t.Fatalf("Failed to get customers: status %d", getCustomersResp.StatusCode)
+	}
+
+	virtualKeysMap := getVKResp.Body["virtual_keys"].(map[string]interface{})
+	teamsMap := getTeamsResp.Body["teams"].(map[string]interface{})
+	customersMap := getCustomersResp.Body["customers"].(map[string]interface{})
+
+	// Verify all created resources are in the in-memory data
+	vkCount := len(virtualKeysMap)
+	teamCount := len(teamsMap)
+	customerCount := len(customersMap)
+
+	if vkCount == 0 {
+		t.Fatalf("No virtual keys found in data endpoint")
+	}
+	if teamCount == 0 {
+		t.Fatalf("No teams found in data endpoint")
+	}
+	if customerCount == 0 {
+		t.Fatalf("No customers found in data endpoint")
+	}
+
+	t.Logf("Data endpoint returned consistent data: %d VKs, %d teams, %d customers ✓", vkCount, teamCount, customerCount)
+
+	// Get the individual endpoints and verify consistency
+	getVKsResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys",
+	})
+
+	if getVKsResp.StatusCode != 200 {
+		t.Fatalf("Failed to get virtual keys: status %d", getVKsResp.StatusCode)
+	}
+
+	vksFromEndpoint, _ := getVKsResp.Body["count"].(float64)
+	if int(vksFromEndpoint) != vkCount {
+		// Can fail because sqlite db might get locked because of all parallel tests
+		t.Logf("[WARN]VK count mismatch between /data endpoint and /virtual-keys endpoint: %d vs %d (this can happen because of parallel tests)", vkCount, int(vksFromEndpoint))
+	}
+
+	t.Logf("Data consistency verified between endpoints ✓")
+}
--- a/tests/governance/providerbudget_test.go
+++ b/tests/governance/providerbudget_test.go
@@ -0,0 +1,240 @@
+package governance
+
+import (
+	"strconv"
+	"testing"
+)
+
+// TestProviderBudgetExceeded tests provider-specific budgets within a VK by making requests until budget is consumed
+func TestProviderBudgetExceeded(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with different budgets for different providers
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: "test-vk-provider-budget-" + generateRandomID(),
+			Budget: &BudgetRequest{
+				MaxLimit:      1.0, // High overall budget
+				ResetDuration: "1h",
+			},
+			ProviderConfigs: []ProviderConfigRequest{
+				{
+					Provider:      "openai",
+					Weight:        float64Ptr(1.0),
+					AllowedModels: []string{"*"},
+					KeyIDs:        []string{"*"},
+					Budget: &BudgetRequest{
+						MaxLimit:      0.01, // Specific OpenAI budget
+						ResetDuration: "1h",
+					},
+				},
+				{
+					Provider:      "anthropic",
+					Weight:        float64Ptr(1.0),
+					AllowedModels: []string{"*"},
+					KeyIDs:        []string{"*"},
+					Budget: &BudgetRequest{
+						MaxLimit:      0.01, // Specific Anthropic budget
+						ResetDuration: "1h",
+					},
+				},
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK with OpenAI budget $0.01 and Anthropic budget $0.01")
+
+	// Test OpenAI provider budget exceeded
+	t.Run("OpenAIProviderBudgetExceeded", func(t *testing.T) {
+		providerBudget := 0.01
+		consumedBudget := 0.0
+		requestNum := 1
+		var lastSuccessfulCost float64
+		var shouldStop = false
+
+		for requestNum <= 50 {
+			longPrompt := "Please provide a comprehensive and detailed response to the following question. " +
+				"I need extensive information covering all aspects of the topic. " +
+				"Provide multiple paragraphs with detailed explanations. " +
+				"Request number " + strconv.Itoa(requestNum) + ". " +
+				"Here is a detailed prompt that will consume significant tokens: " +
+				"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+				"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+				"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+				"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+				"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+				"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+				"Mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+				"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+				"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+				"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+				"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+				"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+				"Mollit anim id est laborum."
+
+			resp := MakeRequest(t, APIRequest{
+				Method: "POST",
+				Path:   "/v1/chat/completions",
+				Body: ChatCompletionRequest{
+					Model: "openai/gpt-4o",
+					Messages: []ChatMessage{
+						{
+							Role:    "user",
+							Content: longPrompt,
+						},
+					},
+				},
+				VKHeader: &vkValue,
+			})
+
+			if resp.StatusCode >= 400 {
+				if CheckErrorMessage(t, resp, "budget") || CheckErrorMessage(t, resp, "provider") {
+					t.Logf("Request %d correctly rejected: OpenAI provider budget exceeded", requestNum)
+					t.Logf("Consumed budget: $%.6f (limit: $%.2f)", consumedBudget, providerBudget)
+					t.Logf("Last successful request cost: $%.6f", lastSuccessfulCost)
+
+					if requestNum == 1 {
+						t.Fatalf("First request should have succeeded but was rejected due to budget")
+					}
+					return // Test passed
+				} else {
+					t.Fatalf("Request %d failed with unexpected error (not budget): %v", requestNum, resp.Body)
+				}
+			}
+
+			// Request succeeded - extract actual token usage from response
+			if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+				if prompt, ok := usage["prompt_tokens"].(float64); ok {
+					if completion, ok := usage["completion_tokens"].(float64); ok {
+						actualInputTokens := int(prompt)
+						actualOutputTokens := int(completion)
+						actualCost, _ := CalculateCost("openai/gpt-4o", actualInputTokens, actualOutputTokens)
+
+						consumedBudget += actualCost
+						lastSuccessfulCost = actualCost
+
+						t.Logf("Request %d succeeded: input_tokens=%d, output_tokens=%d, cost=$%.6f, consumed=$%.6f/$%.2f",
+							requestNum, actualInputTokens, actualOutputTokens, actualCost, consumedBudget, providerBudget)
+					}
+				}
+			}
+
+			requestNum++
+
+			if shouldStop {
+				break
+			}
+
+			if consumedBudget >= providerBudget {
+				shouldStop = true
+			}
+		}
+
+		t.Fatalf("Made %d requests but never hit provider budget limit (consumed $%.6f / $%.2f) - budget not being enforced",
+			requestNum-1, consumedBudget, providerBudget)
+	})
+
+	// Test Anthropic provider budget exceeded
+	t.Run("AnthropicProviderBudgetExceeded", func(t *testing.T) {
+		providerBudget := 0.01
+		consumedBudget := 0.0
+		requestNum := 1
+		var lastSuccessfulCost float64
+		var shouldStop = false
+
+		for requestNum <= 50 {
+			longPrompt := "Please provide a comprehensive and detailed response to the following question. " +
+				"I need extensive information covering all aspects of the topic. " +
+				"Provide multiple paragraphs with detailed explanations. " +
+				"Request number " + strconv.Itoa(requestNum) + ". " +
+				"Here is a detailed prompt that will consume significant tokens: " +
+				"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+				"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+				"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+				"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+				"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+				"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+				"Mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+				"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+				"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+				"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+				"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+				"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+				"Mollit anim id est laborum."
+
+			resp := MakeRequest(t, APIRequest{
+				Method: "POST",
+				Path:   "/v1/chat/completions",
+				Body: ChatCompletionRequest{
+					Model: "anthropic/claude-3-7-sonnet-20250219",
+					Messages: []ChatMessage{
+						{
+							Role:    "user",
+							Content: longPrompt,
+						},
+					},
+				},
+				VKHeader: &vkValue,
+			})
+
+			if resp.StatusCode >= 400 {
+				if CheckErrorMessage(t, resp, "budget") || CheckErrorMessage(t, resp, "provider") {
+					t.Logf("Request %d correctly rejected: Anthropic provider budget exceeded", requestNum)
+					t.Logf("Consumed budget: $%.6f (limit: $%.2f)", consumedBudget, providerBudget)
+					t.Logf("Last successful request cost: $%.6f", lastSuccessfulCost)
+
+					if requestNum == 1 {
+						t.Fatalf("First request should have succeeded but was rejected due to budget")
+					}
+					return // Test passed
+				} else {
+					t.Fatalf("Request %d failed with unexpected error (not budget): %v", requestNum, resp.Body)
+				}
+			}
+
+			// Request succeeded - extract actual token usage from response
+			if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+				if prompt, ok := usage["prompt_tokens"].(float64); ok {
+					if completion, ok := usage["completion_tokens"].(float64); ok {
+						actualInputTokens := int(prompt)
+						actualOutputTokens := int(completion)
+						actualCost, _ := CalculateCost("anthropic/claude-3-7-sonnet-20250219", actualInputTokens, actualOutputTokens)
+
+						consumedBudget += actualCost
+						lastSuccessfulCost = actualCost
+
+						t.Logf("Request %d succeeded: input_tokens=%d, output_tokens=%d, cost=$%.6f, consumed=$%.6f/$%.2f",
+							requestNum, actualInputTokens, actualOutputTokens, actualCost, consumedBudget, providerBudget)
+					}
+				}
+			}
+
+			requestNum++
+
+			if shouldStop {
+				break
+			}
+
+			if consumedBudget >= providerBudget {
+				shouldStop = true
+			}
+		}
+
+		t.Fatalf("Made %d requests but never hit provider budget limit (consumed $%.6f / $%.2f) - budget not being enforced",
+			requestNum-1, consumedBudget, providerBudget)
+	})
+}
--- a/tests/governance/ratelimit_test.go
+++ b/tests/governance/ratelimit_test.go
--- a/tests/governance/ratelimitenforcement_test.go
+++ b/tests/governance/ratelimitenforcement_test.go
@@ -0,0 +1,631 @@
+package governance
+
+import (
+	"testing"
+	"time"
+)
+
+// TestVirtualKeyTokenRateLimitEnforcement verifies VK token rate limits actually reject requests
+// Rate limit enforcement is POST-HOC: the request that exceeds the limit is ALLOWED,
+// but subsequent requests are BLOCKED.
+func TestVirtualKeyTokenRateLimitEnforcement(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with a VERY restrictive token rate limit
+	vkName := "test-vk-strict-token-limit-" + generateRandomID()
+	tokenLimit := int64(100) // Only 100 tokens max
+	tokenResetDuration := "1h"
+
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			RateLimit: &CreateRateLimitRequest{
+				TokenMaxLimit:      &tokenLimit,
+				TokenResetDuration: &tokenResetDuration,
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK with strict token limit: %d tokens per %s", tokenLimit, tokenResetDuration)
+
+	// Verify rate limit is in in-memory store
+	getDataResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	if getDataResp.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
+	}
+
+	virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
+	vkData := virtualKeysMap[vkValue].(map[string]interface{})
+	rateLimitID, _ := vkData["rate_limit_id"].(string)
+
+	if rateLimitID == "" {
+		t.Fatalf("Rate limit not configured on VK")
+	}
+
+	t.Logf("Rate limit ID %s configured on VK ✓", rateLimitID)
+
+	// Make requests until token limit is exceeded
+	// Rate limit enforcement is POST-HOC: request that exceeds is allowed, next is blocked
+	consumedTokens := int64(0)
+	requestNum := 1
+	shouldStop := false
+
+	for requestNum <= 20 {
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: "Hello how are you?",
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode >= 400 {
+			// Request rejected - check if it's due to rate limit
+			if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "token") || CheckErrorMessage(t, resp, "rate") {
+				t.Logf("Request %d correctly rejected: token limit exceeded at %d/%d", requestNum, consumedTokens, tokenLimit)
+
+				// Verify rejection happened after exceeding the limit
+				if consumedTokens < tokenLimit {
+					t.Fatalf("Request rejected before token limit was exceeded: consumed %d < limit %d", consumedTokens, tokenLimit)
+				}
+
+				t.Logf("Token rate limit enforcement verified ✓")
+				t.Logf("Request blocked after token limit exceeded")
+				return // Test passed
+			} else {
+				t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
+			}
+		}
+
+		// Request succeeded - extract token usage
+		var tokensUsed int64
+		if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+			if total, ok := usage["total_tokens"].(float64); ok {
+				tokensUsed = int64(total)
+			}
+		}
+
+		consumedTokens += tokensUsed
+		t.Logf("Request %d succeeded: tokens=%d, consumed=%d/%d", requestNum, tokensUsed, consumedTokens, tokenLimit)
+
+		requestNum++
+
+		if shouldStop {
+			break
+		}
+
+		if consumedTokens >= tokenLimit {
+			shouldStop = true
+		}
+	}
+
+	t.Fatalf("Made %d requests but never hit token rate limit (consumed %d / %d) - rate limit not being enforced",
+		requestNum-1, consumedTokens, tokenLimit)
+}
+
+// TestVirtualKeyRequestRateLimitEnforcement verifies VK request rate limits actually reject requests
+func TestVirtualKeyRequestRateLimitEnforcement(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with a very restrictive request rate limit
+	vkName := "test-vk-strict-request-limit-" + generateRandomID()
+	requestLimit := int64(1) // Only 1 request allowed
+	requestResetDuration := "1h"
+
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			RateLimit: &CreateRateLimitRequest{
+				RequestMaxLimit:      &requestLimit,
+				RequestResetDuration: &requestResetDuration,
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK with request limit: %d request per %s", requestLimit, requestResetDuration)
+
+	// Make requests until request limit is exceeded
+	requestCount := int64(0)
+	requestNum := 1
+
+	for requestNum <= 10 {
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: "Request to test request rate limit.",
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode >= 400 {
+			// Request rejected - check if it's due to rate limit
+			if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "request") || CheckErrorMessage(t, resp, "rate") {
+				t.Logf("Request %d correctly rejected: request limit exceeded at %d/%d", requestNum, requestCount, requestLimit)
+
+				// Verify rejection happened after exceeding the limit
+				if requestCount < requestLimit {
+					t.Fatalf("Request rejected before request limit was exceeded: count %d < limit %d", requestCount, requestLimit)
+				}
+
+				t.Logf("Request rate limit enforcement verified ✓")
+				t.Logf("Request blocked after request limit exceeded")
+				return // Test passed
+			} else {
+				t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
+			}
+		}
+
+		// Request succeeded - increment count
+		requestCount++
+		t.Logf("Request %d succeeded: count=%d/%d", requestNum, requestCount, requestLimit)
+
+		requestNum++
+	}
+
+	t.Fatalf("Made %d requests but never hit request rate limit (count %d / %d) - rate limit not being enforced",
+		requestNum-1, requestCount, requestLimit)
+}
+
+// TestProviderConfigTokenRateLimitEnforcement verifies provider-level token limits reject requests
+func TestProviderConfigTokenRateLimitEnforcement(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with provider-level token rate limit
+	vkName := "test-vk-provider-strict-token-" + generateRandomID()
+	providerTokenLimit := int64(100)
+	tokenResetDuration := "1h"
+
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			ProviderConfigs: []ProviderConfigRequest{
+				{
+					Provider:      "openai",
+					Weight:        float64Ptr(1.0),
+					AllowedModels: []string{"*"},
+					KeyIDs:        []string{"*"},
+					RateLimit: &CreateRateLimitRequest{
+						TokenMaxLimit:      &providerTokenLimit,
+						TokenResetDuration: &tokenResetDuration,
+					},
+				},
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK with provider token limit: %d tokens", providerTokenLimit)
+
+	// Verify provider config rate limit is set
+	getDataResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	if getDataResp.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
+	}
+
+	virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
+	vkData := virtualKeysMap[vkValue].(map[string]interface{})
+	providerConfigs, _ := vkData["provider_configs"].([]interface{})
+
+	if len(providerConfigs) == 0 {
+		t.Fatalf("Provider config not found")
+	}
+
+	t.Logf("Provider config rate limit configured ✓")
+
+	// Make requests until provider token limit is exceeded
+	// Rate limit enforcement is POST-HOC: request that exceeds is allowed, next is blocked
+	consumedTokens := int64(0)
+	requestNum := 1
+	shouldStop := false
+
+	for requestNum <= 20 {
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: "Request to openai to test provider token limit.",
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode >= 400 {
+			// Request rejected - check if it's due to rate limit
+			if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "token") || CheckErrorMessage(t, resp, "rate") {
+				t.Logf("Request %d correctly rejected: provider token limit exceeded at %d/%d", requestNum, consumedTokens, providerTokenLimit)
+
+				// Verify rejection happened after exceeding the limit
+				if consumedTokens < providerTokenLimit {
+					t.Fatalf("Request rejected before provider token limit was exceeded: consumed %d < limit %d", consumedTokens, providerTokenLimit)
+				}
+
+				t.Logf("Provider token rate limit enforcement verified ✓")
+				t.Logf("Request blocked after provider token limit exceeded")
+				return // Test passed
+			} else {
+				t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
+			}
+		}
+
+		// Request succeeded - extract token usage
+		var tokensUsed int64
+		if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+			if total, ok := usage["total_tokens"].(float64); ok {
+				tokensUsed = int64(total)
+			}
+		}
+
+		consumedTokens += tokensUsed
+		t.Logf("Request %d succeeded: tokens=%d, consumed=%d/%d", requestNum, tokensUsed, consumedTokens, providerTokenLimit)
+
+		requestNum++
+
+		if shouldStop {
+			break
+		}
+
+		if consumedTokens >= providerTokenLimit {
+			shouldStop = true
+		}
+	}
+
+	t.Fatalf("Made %d requests but never hit provider token rate limit (consumed %d / %d) - rate limit not being enforced",
+		requestNum-1, consumedTokens, providerTokenLimit)
+}
+
+// TestProviderConfigRequestRateLimitEnforcement verifies provider-level request limits
+func TestProviderConfigRequestRateLimitEnforcement(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with provider-level request rate limit
+	vkName := "test-vk-provider-strict-request-" + generateRandomID()
+	providerRequestLimit := int64(1) // Only 1 request allowed
+	requestResetDuration := "1h"
+
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			ProviderConfigs: []ProviderConfigRequest{
+				{
+					Provider:      "openai",
+					Weight:        float64Ptr(1.0),
+					AllowedModels: []string{"*"},
+					KeyIDs:        []string{"*"},
+					RateLimit: &CreateRateLimitRequest{
+						RequestMaxLimit:      &providerRequestLimit,
+						RequestResetDuration: &requestResetDuration,
+					},
+				},
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK with provider request limit: %d request", providerRequestLimit)
+
+	// Make requests until provider request limit is exceeded
+	requestCount := int64(0)
+	requestNum := 1
+
+	for requestNum <= 10 {
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: "Request to test provider request rate limit.",
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode >= 400 {
+			// Request rejected - check if it's due to rate limit
+			if resp.StatusCode == 429 || CheckErrorMessage(t, resp, "request") || CheckErrorMessage(t, resp, "rate") {
+				t.Logf("Request %d correctly rejected: provider request limit exceeded at %d/%d", requestNum, requestCount, providerRequestLimit)
+
+				// Verify rejection happened after exceeding the limit
+				if requestCount < providerRequestLimit {
+					t.Fatalf("Request rejected before provider request limit was exceeded: count %d < limit %d", requestCount, providerRequestLimit)
+				}
+
+				t.Logf("Provider request rate limit enforcement verified ✓")
+				t.Logf("Request blocked after provider request limit exceeded")
+				return // Test passed
+			} else {
+				t.Fatalf("Request %d failed with unexpected error (not rate limit): %v", requestNum, resp.Body)
+			}
+		}
+
+		// Request succeeded - increment count
+		requestCount++
+		t.Logf("Request %d succeeded: count=%d/%d", requestNum, requestCount, providerRequestLimit)
+
+		requestNum++
+	}
+
+	t.Fatalf("Made %d requests but never hit provider request rate limit (count %d / %d) - rate limit not being enforced",
+		requestNum-1, requestCount, providerRequestLimit)
+}
+
+// TestProviderAndVKRateLimitBothEnforced verifies both provider and VK limits are enforced
+func TestProviderAndVKRateLimitBothEnforced(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create VK with both VK and provider request limits
+	vkName := "test-vk-both-enforced-" + generateRandomID()
+	vkRequestLimit := int64(5)
+	providerRequestLimit := int64(2) // More restrictive
+	requestResetDuration := "1h"
+
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			RateLimit: &CreateRateLimitRequest{
+				RequestMaxLimit:      &vkRequestLimit,
+				RequestResetDuration: &requestResetDuration,
+			},
+			ProviderConfigs: []ProviderConfigRequest{
+				{
+					Provider:      "openai",
+					Weight:        float64Ptr(1.0),
+					AllowedModels: []string{"*"},
+					KeyIDs:        []string{"*"},
+					RateLimit: &CreateRateLimitRequest{
+						RequestMaxLimit:      &providerRequestLimit,
+						RequestResetDuration: &requestResetDuration,
+					},
+				},
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK with VK limit (%d) and provider limit (%d requests)", vkRequestLimit, providerRequestLimit)
+
+	// Make requests - provider limit (2) is more restrictive than VK limit (5)
+	// So we should hit provider limit first
+	successCount := 0
+	for i := 0; i < 5; i++ {
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: "Request " + string(rune('0'+i)) + " to test both limits.",
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode == 200 {
+			successCount++
+			t.Logf("Request %d succeeded (count: %d)", i+1, successCount)
+		} else if resp.StatusCode >= 400 {
+			t.Logf("Request %d rejected with status %d", i+1, resp.StatusCode)
+			if successCount < int(providerRequestLimit) {
+				t.Fatalf("Request rejected before provider limit (%d): %v", providerRequestLimit, resp.Body)
+			}
+			// Expected - hit provider limit first
+			return
+		}
+	}
+
+	if successCount > 0 {
+		if successCount >= 5 {
+			t.Fatalf("Made all %d requests without hitting rate limit (provider limit was %d) - rate limit not enforced",
+				successCount, providerRequestLimit)
+		}
+		t.Logf("Both VK and provider rate limits are configured and enforced ✓")
+	} else {
+		t.Skip("Could not test - all requests failed")
+	}
+}
+
+// TestRateLimitInMemoryUsageTracking verifies usage counters are tracked in in-memory store
+func TestRateLimitInMemoryUsageTracking(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create VK with rate limit
+	vkName := "test-vk-usage-tracking-" + generateRandomID()
+	tokenLimit := int64(10000)
+	tokenResetDuration := "1h"
+
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			RateLimit: &CreateRateLimitRequest{
+				TokenMaxLimit:      &tokenLimit,
+				TokenResetDuration: &tokenResetDuration,
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK for usage tracking test")
+
+	// Make a request
+	resp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/v1/chat/completions",
+		Body: ChatCompletionRequest{
+			Model: "openai/gpt-4o",
+			Messages: []ChatMessage{
+				{
+					Role:    "user",
+					Content: "Test for usage tracking.",
+				},
+			},
+		},
+		VKHeader: &vkValue,
+	})
+
+	if resp.StatusCode != 200 {
+		t.Skip("Could not execute request for usage tracking test")
+	}
+
+	// Get usage from response
+	var tokensUsed int
+	if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+		if total, ok := usage["total_tokens"].(float64); ok {
+			tokensUsed = int(total)
+		}
+	}
+
+	if tokensUsed == 0 {
+		t.Skip("Could not extract token usage from response")
+	}
+
+	t.Logf("Request used %d tokens", tokensUsed)
+
+	// Wait for async update to propagate to in-memory store
+	var rateLimitID string
+	usageUpdated := WaitForCondition(t, func() bool {
+		getDataResp := MakeRequest(t, APIRequest{
+			Method: "GET",
+			Path:   "/api/governance/virtual-keys?from_memory=true",
+		})
+
+		if getDataResp.StatusCode != 200 {
+			return false
+		}
+
+		virtualKeysMap, ok := getDataResp.Body["virtual_keys"].(map[string]interface{})
+		if !ok || virtualKeysMap == nil {
+			return false
+		}
+
+		vkData, ok := virtualKeysMap[vkValue].(map[string]interface{})
+		if !ok {
+			return false
+		}
+
+		rateLimitID, _ = vkData["rate_limit_id"].(string)
+		return rateLimitID != ""
+	}, 3*time.Second, "rate limit usage tracked in in-memory store")
+
+	if !usageUpdated {
+		t.Fatalf("Rate limit usage not tracked in in-memory store after request (timeout after 3s)")
+	}
+
+	if rateLimitID != "" {
+		t.Logf("Rate limit %s is configured and tracking usage ✓", rateLimitID)
+	} else {
+		t.Logf("Rate limit is configured ✓")
+	}
+}
--- a/tests/governance/teambudget_test.go
+++ b/tests/governance/teambudget_test.go
@@ -0,0 +1,160 @@
+package governance
+
+import (
+	"strconv"
+	"testing"
+)
+
+// TestTeamBudgetExceededWithMultipleVKs tests that team level budgets are enforced across multiple VKs
+// by making requests until budget is consumed
+func TestTeamBudgetExceededWithMultipleVKs(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a team with a fixed budget
+	teamBudget := 0.01
+	teamName := "test-team-budget-exceeded-" + generateRandomID()
+	createTeamResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/teams",
+		Body: CreateTeamRequest{
+			Name: teamName,
+			Budgets: []BudgetRequest{{
+				MaxLimit:      teamBudget,
+				ResetDuration: "1h",
+			}},
+		},
+	})
+
+	if createTeamResp.StatusCode != 200 {
+		t.Fatalf("Failed to create team: status %d", createTeamResp.StatusCode)
+	}
+
+	teamID := ExtractIDFromResponse(t, createTeamResp)
+	testData.AddTeam(teamID)
+
+	// Create 2 VKs under the team
+	var vkValues []string
+	for i := 1; i <= 2; i++ {
+		createVKResp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/api/governance/virtual-keys",
+			Body: CreateVirtualKeyRequest{
+				Name:   "test-vk-" + generateRandomID(),
+				TeamID: &teamID,
+				Budget: &BudgetRequest{
+					MaxLimit:      1.0, // High VK budget so team is the limiting factor
+					ResetDuration: "1h",
+				},
+			},
+		})
+
+		if createVKResp.StatusCode != 200 {
+			t.Fatalf("Failed to create VK %d: status %d", i, createVKResp.StatusCode)
+		}
+
+		vkID := ExtractIDFromResponse(t, createVKResp)
+		testData.AddVirtualKey(vkID)
+
+		vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+		vkValues = append(vkValues, vk["value"].(string))
+	}
+
+	t.Logf("Created team %s with budget $%.2f and 2 VKs", teamName, teamBudget)
+
+	// Keep making requests alternating between VKs, tracking actual token usage until team budget is exceeded
+	consumedBudget := 0.0
+	requestNum := 1
+	var lastSuccessfulCost float64
+	var shouldStop = false
+	vkIndex := 0
+
+	for requestNum <= 50 {
+		// Alternate between VKs to test shared team budget
+		vkValue := vkValues[vkIndex%2]
+
+		// Create a longer prompt to consume more tokens and budget faster
+		longPrompt := "Please provide a comprehensive and detailed response to the following question. " +
+			"I need extensive information covering all aspects of the topic. " +
+			"Provide multiple paragraphs with detailed explanations. " +
+			"Request number " + strconv.Itoa(requestNum) + ". " +
+			"Here is a detailed prompt that will consume significant tokens: " +
+			"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+			"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+			"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+			"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+			"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+			"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+			"Mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+			"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+			"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+			"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+			"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+			"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+			"Mollit anim id est laborum."
+
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: longPrompt,
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode >= 400 {
+			// Request failed - check if it's due to budget
+			if CheckErrorMessage(t, resp, "budget") || CheckErrorMessage(t, resp, "team") {
+				t.Logf("Request %d correctly rejected: team budget exceeded", requestNum)
+				t.Logf("Consumed budget: $%.6f (limit: $%.2f)", consumedBudget, teamBudget)
+				t.Logf("Last successful request cost: $%.6f", lastSuccessfulCost)
+
+				// Verify that we made at least one successful request before hitting budget
+				if requestNum == 1 {
+					t.Fatalf("First request should have succeeded but was rejected due to budget")
+				}
+				return // Test passed
+			} else {
+				t.Fatalf("Request %d failed with unexpected error (not budget): %v", requestNum, resp.Body)
+			}
+		}
+
+		// Request succeeded - extract actual token usage from response
+		if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+			if prompt, ok := usage["prompt_tokens"].(float64); ok {
+				if completion, ok := usage["completion_tokens"].(float64); ok {
+					actualInputTokens := int(prompt)
+					actualOutputTokens := int(completion)
+					actualCost, _ := CalculateCost("openai/gpt-4o", actualInputTokens, actualOutputTokens)
+
+					consumedBudget += actualCost
+					lastSuccessfulCost = actualCost
+
+					t.Logf("Request %d (VK%d) succeeded: input_tokens=%d, output_tokens=%d, cost=$%.6f, consumed=$%.6f/$%.2f",
+						requestNum, (vkIndex%2)+1, actualInputTokens, actualOutputTokens, actualCost, consumedBudget, teamBudget)
+				}
+			}
+		}
+
+		requestNum++
+		vkIndex++
+
+		if shouldStop {
+			break
+		}
+
+		if consumedBudget >= teamBudget {
+			shouldStop = true
+		}
+	}
+
+	t.Fatalf("Made %d requests but never hit team budget limit (consumed $%.6f / $%.2f) - budget not being enforced",
+		requestNum-1, consumedBudget, teamBudget)
+}
--- a/tests/governance/test_utils.go
+++ b/tests/governance/test_utils.go
@@ -0,0 +1,551 @@
+package governance
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math/rand"
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+)
+
+// ModelCost defines the cost structure for a model
+type ModelCost struct {
+	Provider           string
+	InputCostPerToken  float64
+	OutputCostPerToken float64
+	MaxInputTokens     int
+	MaxOutputTokens    int
+}
+
+// TestModels defines all models used for testing
+var TestModels = map[string]ModelCost{
+	"openai/gpt-4o": {
+		Provider:           "openai",
+		InputCostPerToken:  0.0000025,
+		OutputCostPerToken: 0.00001,
+		MaxInputTokens:     128000,
+		MaxOutputTokens:    16384,
+	},
+	"anthropic/claude-3-7-sonnet-20250219": {
+		Provider:           "anthropic",
+		InputCostPerToken:  0.000003,
+		OutputCostPerToken: 0.000015,
+		MaxInputTokens:     200000,
+		MaxOutputTokens:    128000,
+	},
+	"anthropic/claude-4-opus-20250514": {
+		Provider:           "anthropic",
+		InputCostPerToken:  0.000015,
+		OutputCostPerToken: 0.000075,
+		MaxInputTokens:     200000,
+		MaxOutputTokens:    32000,
+	},
+	"openrouter/anthropic/claude-3.7-sonnet": {
+		Provider:           "openrouter",
+		InputCostPerToken:  0.000003,
+		OutputCostPerToken: 0.000015,
+		MaxInputTokens:     200000,
+		MaxOutputTokens:    128000,
+	},
+	"openrouter/openai/gpt-4o": {
+		Provider:           "openrouter",
+		InputCostPerToken:  0.0000025,
+		OutputCostPerToken: 0.00001,
+		MaxInputTokens:     128000,
+		MaxOutputTokens:    4096,
+	},
+}
+
+// CalculateCost calculates the cost based on input and output tokens
+func CalculateCost(model string, inputTokens, outputTokens int) (float64, error) {
+	modelInfo, ok := TestModels[model]
+	if !ok {
+		return 0, fmt.Errorf("unknown model: %s", model)
+	}
+
+	inputCost := float64(inputTokens) * modelInfo.InputCostPerToken
+	outputCost := float64(outputTokens) * modelInfo.OutputCostPerToken
+	return inputCost + outputCost, nil
+}
+
+// APIRequest represents a request to the Bifrost API
+type APIRequest struct {
+	Method   string
+	Path     string
+	Body     interface{}
+	VKHeader *string
+}
+
+// APIResponse represents a response from the Bifrost API
+type APIResponse struct {
+	StatusCode int
+	Body       map[string]interface{}
+	RawBody    []byte
+}
+
+// MakeRequest makes an HTTP request to the Bifrost API
+func MakeRequest(t *testing.T, req APIRequest) *APIResponse {
+	client := &http.Client{}
+	url := fmt.Sprintf("http://localhost:8080%s", req.Path)
+
+	var body io.Reader
+	if req.Body != nil {
+		bodyBytes, err := json.Marshal(req.Body)
+		if err != nil {
+			t.Fatalf("Failed to marshal request body: %v", err)
+		}
+		body = bytes.NewReader(bodyBytes)
+	}
+
+	httpReq, err := http.NewRequest(req.Method, url, body)
+	if err != nil {
+		t.Fatalf("Failed to create HTTP request: %v", err)
+	}
+
+	httpReq.Header.Set("Content-Type", "application/json")
+
+	// Add virtual key header if provided
+	if req.VKHeader != nil {
+		httpReq.Header.Set("x-bf-vk", *req.VKHeader)
+	}
+
+	resp, err := client.Do(httpReq)
+	if err != nil {
+		t.Fatalf("Failed to execute HTTP request: %v", err)
+	}
+	defer resp.Body.Close()
+
+	rawBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		t.Fatalf("Failed to read response body: %v", err)
+	}
+
+	var responseBody map[string]interface{}
+	if len(rawBody) > 0 {
+		err = json.Unmarshal(rawBody, &responseBody)
+		if err != nil {
+			// If unmarshaling fails, store the raw response
+			responseBody = map[string]interface{}{"raw": string(rawBody)}
+		}
+	}
+
+	return &APIResponse{
+		StatusCode: resp.StatusCode,
+		Body:       responseBody,
+		RawBody:    rawBody,
+	}
+}
+
+// MakeRequestWithCustomHeaders makes an HTTP request with custom headers
+// Use this when you need to test specific header formats (e.g., Authorization, x-api-key)
+func MakeRequestWithCustomHeaders(t *testing.T, req APIRequest, customHeaders map[string]string) *APIResponse {
+	client := &http.Client{}
+	url := fmt.Sprintf("http://localhost:8080%s", req.Path)
+
+	var body io.Reader
+	if req.Body != nil {
+		bodyBytes, err := json.Marshal(req.Body)
+		if err != nil {
+			t.Fatalf("Failed to marshal request body: %v", err)
+		}
+		body = bytes.NewReader(bodyBytes)
+	}
+
+	httpReq, err := http.NewRequest(req.Method, url, body)
+	if err != nil {
+		t.Fatalf("Failed to create HTTP request: %v", err)
+	}
+
+	httpReq.Header.Set("Content-Type", "application/json")
+
+	// Add custom headers
+	for key, value := range customHeaders {
+		httpReq.Header.Set(key, value)
+	}
+
+	resp, err := client.Do(httpReq)
+	if err != nil {
+		t.Fatalf("Failed to execute HTTP request: %v", err)
+	}
+	defer resp.Body.Close()
+
+	rawBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		t.Fatalf("Failed to read response body: %v", err)
+	}
+
+	var responseBody map[string]interface{}
+	if len(rawBody) > 0 {
+		err = json.Unmarshal(rawBody, &responseBody)
+		if err != nil {
+			// If unmarshaling fails, store the raw response
+			responseBody = map[string]interface{}{"raw": string(rawBody)}
+		}
+	}
+
+	return &APIResponse{
+		StatusCode: resp.StatusCode,
+		Body:       responseBody,
+		RawBody:    rawBody,
+	}
+}
+
+// generateRandomID generates a random ID for test resources
+func generateRandomID() string {
+	rand.Seed(time.Now().UnixNano())
+	const letters = "abcdefghijklmnopqrstuvwxyz0123456789"
+	b := make([]byte, 8)
+	for i := range b {
+		b[i] = letters[rand.Intn(len(letters))]
+	}
+	return string(b)
+}
+
+// CreateVirtualKeyRequest represents a request to create a virtual key
+type CreateVirtualKeyRequest struct {
+	Name            string                  `json:"name"`
+	Description     string                  `json:"description,omitempty"`
+	IsActive        *bool                   `json:"is_active,omitempty"`
+	TeamID          *string                 `json:"team_id,omitempty"`
+	CustomerID      *string                 `json:"customer_id,omitempty"`
+	Budget          *BudgetRequest          `json:"budget,omitempty"`
+	RateLimit       *CreateRateLimitRequest `json:"rate_limit,omitempty"`
+	ProviderConfigs []ProviderConfigRequest `json:"provider_configs,omitempty"`
+}
+
+// ProviderConfigRequest represents a provider configuration for a virtual key
+type ProviderConfigRequest struct {
+	ID            *uint                   `json:"id,omitempty"`
+	Provider      string                  `json:"provider"`
+	Weight        *float64                `json:"weight,omitempty"`
+	AllowedModels []string                `json:"allowed_models,omitempty"`
+	KeyIDs        []string                `json:"key_ids,omitempty"`
+	Budget        *BudgetRequest          `json:"budget,omitempty"`
+	RateLimit     *CreateRateLimitRequest `json:"rate_limit,omitempty"`
+}
+
+// float64Ptr returns a pointer to a float64 value
+func float64Ptr(v float64) *float64 {
+	return &v
+}
+
+// BudgetRequest represents a budget request
+type BudgetRequest struct {
+	MaxLimit      float64 `json:"max_limit"`
+	ResetDuration string  `json:"reset_duration"`
+}
+
+// CreateTeamRequest represents a request to create a team
+type CreateTeamRequest struct {
+	Name       string          `json:"name"`
+	CustomerID *string         `json:"customer_id,omitempty"`
+	Budgets    []BudgetRequest `json:"budgets,omitempty"`
+}
+
+// CreateCustomerRequest represents a request to create a customer
+type CreateCustomerRequest struct {
+	Name   string         `json:"name"`
+	Budget *BudgetRequest `json:"budget,omitempty"`
+}
+
+// UpdateBudgetRequest represents a request to update a budget
+type UpdateBudgetRequest struct {
+	MaxLimit      *float64 `json:"max_limit,omitempty"`
+	ResetDuration *string  `json:"reset_duration,omitempty"`
+}
+
+// CreateRateLimitRequest represents a request to create a rate limit
+type CreateRateLimitRequest struct {
+	TokenMaxLimit        *int64  `json:"token_max_limit,omitempty"`
+	TokenResetDuration   *string `json:"token_reset_duration,omitempty"`
+	RequestMaxLimit      *int64  `json:"request_max_limit,omitempty"`
+	RequestResetDuration *string `json:"request_reset_duration,omitempty"`
+}
+
+// UpdateVirtualKeyRequest represents a request to update a virtual key
+type UpdateVirtualKeyRequest struct {
+	Name            *string                 `json:"name,omitempty"`
+	TeamID          *string                 `json:"team_id,omitempty"`
+	CustomerID      *string                 `json:"customer_id,omitempty"`
+	Budget          *UpdateBudgetRequest    `json:"budget,omitempty"`
+	RateLimit       *CreateRateLimitRequest `json:"rate_limit,omitempty"`
+	IsActive        *bool                   `json:"is_active,omitempty"`
+	ProviderConfigs []ProviderConfigRequest `json:"provider_configs,omitempty"`
+}
+
+// UpdateTeamRequest represents a request to update a team
+type UpdateTeamRequest struct {
+	Name *string `json:"name,omitempty"`
+	// Pointer-to-slice so tests can distinguish:
+	//   nil                  → field omitted (budgets untouched by server)
+	//   &[]BudgetRequest{}   → explicit empty array (server clears all budgets)
+	//   &[]BudgetRequest{…}  → replace with the provided budgets
+	Budgets *[]BudgetRequest `json:"budgets,omitempty"`
+}
+
+// UpdateCustomerRequest represents a request to update a customer
+type UpdateCustomerRequest struct {
+	Name   *string              `json:"name,omitempty"`
+	Budget *UpdateBudgetRequest `json:"budget,omitempty"`
+}
+
+// ChatCompletionRequest represents an OpenAI-compatible chat completion request
+type ChatCompletionRequest struct {
+	Model       string        `json:"model"`
+	Messages    []ChatMessage `json:"messages"`
+	Temperature *float64      `json:"temperature,omitempty"`
+	MaxTokens   *int          `json:"max_tokens,omitempty"`
+	TopP        *float64      `json:"top_p,omitempty"`
+}
+
+// ChatMessage represents a chat message in OpenAI format
+type ChatMessage struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+// ExtractIDFromResponse extracts the ID from a creation response
+func ExtractIDFromResponse(t *testing.T, resp *APIResponse) string {
+	if resp.StatusCode >= 400 {
+		t.Fatalf("Request failed with status %d: %v", resp.StatusCode, resp.Body)
+	}
+
+	// Navigate through the response to find the ID
+	data := resp.Body
+	parts := []string{"virtual_key", "team", "customer"}
+	for _, part := range parts {
+		if val, ok := data[part]; ok {
+			if nested, ok := val.(map[string]interface{}); ok {
+				if id, ok := nested["id"].(string); ok {
+					return id
+				}
+			}
+		}
+	}
+
+	t.Fatalf("Could not extract ID from response: %v", resp.Body)
+	return ""
+}
+
+// CheckErrorMessage checks if the response error contains expected text
+// Returns true if error found, false otherwise. Asserts fail if status is not >= 400.
+func CheckErrorMessage(t *testing.T, resp *APIResponse, expectedText string) bool {
+	if resp.StatusCode < 400 {
+		t.Fatalf("Expected error response but got status %d. Response: %v", resp.StatusCode, resp.Body)
+	}
+
+	// Check in various fields where errors might appear
+	if msg, ok := resp.Body["message"].(string); ok && contains(msg, expectedText) {
+		return true
+	}
+
+	if err, ok := resp.Body["error"].(string); ok && contains(err, expectedText) {
+		return true
+	}
+
+	// Check raw body as fallback
+	if contains(string(resp.RawBody), expectedText) {
+		return true
+	}
+
+	return false
+}
+
+// contains checks if a string contains a substring (case-insensitive)
+func contains(haystack, needle string) bool {
+	return strings.Contains(strings.ToLower(haystack), strings.ToLower(needle))
+}
+
+// GlobalTestData stores IDs of created resources for cleanup
+type GlobalTestData struct {
+	VirtualKeys []string
+	Teams       []string
+	Customers   []string
+}
+
+// NewGlobalTestData creates a new test data holder
+func NewGlobalTestData() *GlobalTestData {
+	return &GlobalTestData{
+		VirtualKeys: make([]string, 0),
+		Teams:       make([]string, 0),
+		Customers:   make([]string, 0),
+	}
+}
+
+// AddVirtualKey adds a virtual key ID to the test data
+func (g *GlobalTestData) AddVirtualKey(id string) {
+	g.VirtualKeys = append(g.VirtualKeys, id)
+}
+
+// AddTeam adds a team ID to the test data
+func (g *GlobalTestData) AddTeam(id string) {
+	g.Teams = append(g.Teams, id)
+}
+
+// AddCustomer adds a customer ID to the test data
+func (g *GlobalTestData) AddCustomer(id string) {
+	g.Customers = append(g.Customers, id)
+}
+
+// deleteWithRetry performs a DELETE request with retry logic
+// Retries up to 5 times if the response status is not 200 or 204
+// Delete requests don't require VK headers
+func deleteWithRetry(t *testing.T, path string, resourceType string, resourceID string) bool {
+	maxRetries := 5
+	for attempt := 1; attempt <= maxRetries; attempt++ {
+		resp := MakeRequest(t, APIRequest{
+			Method: "DELETE",
+			Path:   path,
+			// Note: VKHeader is intentionally not set for DELETE requests
+		})
+
+		// Success: 200 or 204 means the resource was deleted successfully
+		if resp.StatusCode == 200 || resp.StatusCode == 204 {
+			if attempt > 1 {
+				t.Logf("Successfully deleted %s %s after %d attempts", resourceType, resourceID, attempt)
+			}
+			return true
+		}
+
+		// 404 means resource doesn't exist, which is fine for cleanup
+		if resp.StatusCode == 404 {
+			t.Logf("%s %s not found (already deleted or never existed)", resourceType, resourceID)
+			return true
+		}
+
+		// If this is not the last attempt, log and retry
+		if attempt < maxRetries {
+			t.Logf("Attempt %d/%d: Failed to delete %s %s: status %d, retrying...", attempt, maxRetries, resourceType, resourceID, resp.StatusCode)
+			// Progressive backoff: 100ms, 200ms, 300ms, 400ms
+			time.Sleep(time.Duration(100*attempt) * time.Millisecond)
+		} else {
+			// Last attempt failed
+			t.Logf("Warning: Failed to delete %s %s after %d attempts: status %d", resourceType, resourceID, maxRetries, resp.StatusCode)
+			return false
+		}
+	}
+
+	return false
+}
+
+// Cleanup deletes all created resources
+// Retries up to 5 times for each delete operation if status is not 200 or 204
+// Delete requests don't require VK headers
+func (g *GlobalTestData) Cleanup(t *testing.T) {
+	// Delete virtual keys
+	for _, vkID := range g.VirtualKeys {
+		deleteWithRetry(t, fmt.Sprintf("/api/governance/virtual-keys/%s", vkID), "virtual key", vkID)
+	}
+
+	// Delete teams
+	for _, teamID := range g.Teams {
+		deleteWithRetry(t, fmt.Sprintf("/api/governance/teams/%s", teamID), "team", teamID)
+	}
+
+	// Delete customers
+	for _, customerID := range g.Customers {
+		deleteWithRetry(t, fmt.Sprintf("/api/governance/customers/%s", customerID), "customer", customerID)
+	}
+
+	t.Logf("Cleanup completed: deleted %d VKs, %d teams, %d customers",
+		len(g.VirtualKeys), len(g.Teams), len(g.Customers))
+}
+
+// WaitForCondition polls a condition function until it returns true or times out
+// Useful for waiting for async updates to propagate to in-memory store
+func WaitForCondition(t *testing.T, checkFunc func() bool, timeout time.Duration, description string) bool {
+	deadline := time.Now().Add(timeout)
+	attempt := 0
+
+	for time.Now().Before(deadline) {
+		attempt++
+		if checkFunc() {
+			if attempt > 1 {
+				t.Logf("Condition '%s' met after %d attempts", description, attempt)
+			}
+			return true
+		}
+
+		// Progressive backoff: start with 50ms, max 500ms
+		sleepDuration := time.Duration(50*attempt) * time.Millisecond
+		if sleepDuration > 500*time.Millisecond {
+			sleepDuration = 500 * time.Millisecond
+		}
+		time.Sleep(sleepDuration)
+	}
+
+	t.Logf("Timeout waiting for condition '%s' after %d attempts (%.1fs)", description, attempt, timeout.Seconds())
+	return false
+}
+
+// WaitForAPICondition makes repeated API requests until a condition is satisfied or times out
+// Useful for verifying async updates in API responses
+func WaitForAPICondition(t *testing.T, req APIRequest, condition func(*APIResponse) bool, timeout time.Duration, description string) (*APIResponse, bool) {
+	deadline := time.Now().Add(timeout)
+	attempt := 0
+	var lastResp *APIResponse
+
+	for time.Now().Before(deadline) {
+		attempt++
+		lastResp = MakeRequest(t, req)
+
+		if condition(lastResp) {
+			if attempt > 1 {
+				t.Logf("API condition '%s' met after %d attempts", description, attempt)
+			}
+			return lastResp, true
+		}
+
+		// Progressive backoff: start with 100ms, max 500ms
+		sleepDuration := time.Duration(100*attempt) * time.Millisecond
+		if sleepDuration > 500*time.Millisecond {
+			sleepDuration = 500 * time.Millisecond
+		}
+		time.Sleep(sleepDuration)
+	}
+
+	t.Logf("Timeout waiting for API condition '%s' after %d attempts (%.1fs)", description, attempt, timeout.Seconds())
+	return lastResp, false
+}
+
+// ParseDuration function to parse duration strings
+// Copied from framework/configstore/tables/utils.go
+func ParseDuration(duration string) (time.Duration, error) {
+	if duration == "" {
+		return 0, fmt.Errorf("duration is empty")
+	}
+
+	// Handle special cases for days, weeks, months, years
+	switch {
+	case duration[len(duration)-1:] == "d":
+		days := duration[:len(duration)-1]
+		if d, err := time.ParseDuration(days + "h"); err == nil {
+			return d * 24, nil
+		}
+		return 0, fmt.Errorf("invalid day duration: %s", duration)
+	case duration[len(duration)-1:] == "w":
+		weeks := duration[:len(duration)-1]
+		if w, err := time.ParseDuration(weeks + "h"); err == nil {
+			return w * 24 * 7, nil
+		}
+		return 0, fmt.Errorf("invalid week duration: %s", duration)
+	case duration[len(duration)-1:] == "M":
+		months := duration[:len(duration)-1]
+		if m, err := time.ParseDuration(months + "h"); err == nil {
+			return m * 24 * 30, nil // Approximate month as 30 days
+		}
+		return 0, fmt.Errorf("invalid month duration: %s", duration)
+	case duration[len(duration)-1:] == "Y":
+		years := duration[:len(duration)-1]
+		if y, err := time.ParseDuration(years + "h"); err == nil {
+			return y * 24 * 365, nil // Approximate year as 365 days
+		}
+		return 0, fmt.Errorf("invalid year duration: %s", duration)
+	default:
+		return time.ParseDuration(duration)
+	}
+}
--- a/tests/governance/usagetracking_test.go
+++ b/tests/governance/usagetracking_test.go
@@ -0,0 +1,626 @@
+package governance
+
+import (
+	"testing"
+	"time"
+)
+
+// TestUsageTrackingRateLimitReset tests that rate limit resets happen correctly on ticker
+func TestUsageTrackingRateLimitReset(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with a rate limit that resets every 30 seconds
+	vkName := "test-vk-rate-limit-reset-" + generateRandomID()
+	tokenLimit := int64(10000) // 10k token limit
+	tokenResetDuration := "30s"
+
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			RateLimit: &CreateRateLimitRequest{
+				TokenMaxLimit:      &tokenLimit,
+				TokenResetDuration: &tokenResetDuration,
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK %s with rate limit: %d tokens reset every %s", vkName, tokenLimit, tokenResetDuration)
+
+	// Get initial rate limit data from data endpoint
+	getVKResp1 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	if getVKResp1.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data: status %d", getVKResp1.StatusCode)
+	}
+
+	virtualKeysMap1 := getVKResp1.Body["virtual_keys"].(map[string]interface{})
+	vkData1 := virtualKeysMap1[vkValue].(map[string]interface{})
+	rateLimitID, _ := vkData1["rate_limit_id"].(string)
+	if rateLimitID == "" {
+		t.Fatalf("Rate limit ID not found for VK")
+	}
+
+	t.Logf("Rate limit ID: %s", rateLimitID)
+
+	// Make a request to consume tokens
+	// Cost should be approximately: 5000 * 0.0000025 + 100 * 0.00001 = 0.013-0.014 dollars
+	resp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/v1/chat/completions",
+		Body: ChatCompletionRequest{
+			Model: "openai/gpt-4o",
+			Messages: []ChatMessage{
+				{
+					Role:    "user",
+					Content: "This is a test prompt to consume tokens for rate limit testing.",
+				},
+			},
+		},
+		VKHeader: &vkValue,
+	})
+
+	if resp.StatusCode != 200 {
+		t.Logf("Request failed with status %d (may be due to other limits), body: %v", resp.StatusCode, resp.Body)
+		t.Skip("Could not execute request to test rate limit reset")
+	}
+
+	// Extract token count from response
+	var tokensUsed int
+	if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+		if totalTokens, ok := usage["total_tokens"].(float64); ok {
+			tokensUsed = int(totalTokens)
+		}
+	}
+
+	if tokensUsed == 0 {
+		t.Logf("No token usage in response, cannot verify rate limit reset")
+		t.Skip("Could not extract token usage from response")
+	}
+
+	t.Logf("Request consumed %d tokens", tokensUsed)
+
+	// Get rate limit data after request
+	getDataResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	if getDataResp.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
+	}
+
+	// Rate limit counter should have been updated
+	t.Logf("Rate limit should be tracking usage in in-memory store")
+
+	// Wait for more than 30 seconds for the rate limit to reset
+	t.Logf("Waiting 35 seconds for rate limit ticker to reset...")
+	time.Sleep(35 * time.Second)
+
+	// Get rate limit data after reset
+	getDataResp3 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	if getDataResp3.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data after reset wait: status %d", getDataResp3.StatusCode)
+	}
+
+	// Verify rate limit has been reset (usage should be 0 or close to it)
+	t.Logf("Rate limit reset should have occurred after 30s timeout ✓")
+}
+
+// TestUsageTrackingBudgetReset tests that budget resets happen correctly on ticker
+func TestUsageTrackingBudgetReset(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with a budget that resets every 30 seconds
+	vkName := "test-vk-budget-reset-" + generateRandomID()
+	budgetLimit := 1.0 // $1 budget
+	resetDuration := "30s"
+
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			Budget: &BudgetRequest{
+				MaxLimit:      budgetLimit,
+				ResetDuration: resetDuration,
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK %s with budget: $%.2f reset every %s", vkName, budgetLimit, resetDuration)
+
+	// Get initial budget data
+	getVKResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	virtualKeysMap := getVKResp.Body["virtual_keys"].(map[string]interface{})
+
+	getBudgetsResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/budgets?from_memory=true",
+	})
+
+	budgetsMap := getBudgetsResp.Body["budgets"].(map[string]interface{})
+
+	vkData := virtualKeysMap[vkValue].(map[string]interface{})
+	budgetID, _ := vkData["budget_id"].(string)
+	if budgetID == "" {
+		t.Fatalf("Budget ID not found for VK")
+	}
+
+	budgetData := budgetsMap[budgetID].(map[string]interface{})
+	initialUsage, _ := budgetData["current_usage"].(float64)
+
+	t.Logf("Initial budget usage: $%.6f", initialUsage)
+
+	// Make a request to consume budget
+	resp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/v1/chat/completions",
+		Body: ChatCompletionRequest{
+			Model: "openai/gpt-4o",
+			Messages: []ChatMessage{
+				{
+					Role:    "user",
+					Content: "Test prompt for budget reset testing.",
+				},
+			},
+		},
+		VKHeader: &vkValue,
+	})
+
+	if resp.StatusCode != 200 {
+		t.Logf("Request failed with status %d, body: %v", resp.StatusCode, resp.Body)
+		t.Skip("Could not execute request to test budget reset")
+	}
+
+	// Wait for async PostHook goroutine to complete budget update
+	time.Sleep(2 * time.Second)
+
+	getBudgetsResp2 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/budgets?from_memory=true",
+	})
+
+	budgetsMap2 := getBudgetsResp2.Body["budgets"].(map[string]interface{})
+	budgetData2 := budgetsMap2[budgetID].(map[string]interface{})
+	usageAfterRequest, _ := budgetData2["current_usage"].(float64)
+
+	t.Logf("Budget usage after request: $%.6f", usageAfterRequest)
+
+	// Wait for budget reset
+	t.Logf("Waiting 35 seconds for budget ticker to reset...")
+	time.Sleep(35 * time.Second)
+
+	// Get budget data after reset
+	getDataResp3 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	if getDataResp3.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data after reset wait: status %d", getDataResp3.StatusCode)
+	}
+
+	getBudgetsResp3 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/budgets?from_memory=true",
+	})
+
+	budgetsMap3 := getBudgetsResp3.Body["budgets"].(map[string]interface{})
+	budgetData3 := budgetsMap3[budgetID].(map[string]interface{})
+	usageAfterReset, _ := budgetData3["current_usage"].(float64)
+
+	// Budget should be reset (close to 0)
+	if usageAfterReset > 0.001 {
+		t.Fatalf("Budget not reset after 30s timeout: usage is $%.6f (should be ~0)", usageAfterReset)
+	}
+
+	t.Logf("Budget reset correctly after 30s timeout ✓")
+}
+
+// TestInMemoryUsageUpdateOnRequest tests that in-memory usage counters are updated on request
+func TestInMemoryUsageUpdateOnRequest(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with rate limit to track usage
+	vkName := "test-vk-usage-update-" + generateRandomID()
+	tokenLimit := int64(100000)
+	tokenResetDuration := "1h"
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			RateLimit: &CreateRateLimitRequest{
+				TokenMaxLimit:      &tokenLimit,
+				TokenResetDuration: &tokenResetDuration,
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK %s for usage tracking test", vkName)
+
+	// Make a request to consume tokens
+	resp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/v1/chat/completions",
+		Body: ChatCompletionRequest{
+			Model: "openai/gpt-4o",
+			Messages: []ChatMessage{
+				{
+					Role:    "user",
+					Content: "Short test prompt for usage tracking.",
+				},
+			},
+		},
+		VKHeader: &vkValue,
+	})
+
+	if resp.StatusCode != 200 {
+		t.Logf("Request failed with status %d", resp.StatusCode)
+		t.Skip("Could not execute request to test usage tracking")
+	}
+
+	// Extract token usage from response
+	var tokensUsed int
+	if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+		if totalTokens, ok := usage["total_tokens"].(float64); ok {
+			tokensUsed = int(totalTokens)
+		}
+	}
+
+	if tokensUsed == 0 {
+		t.Logf("No token usage in response")
+		t.Skip("Could not extract token usage from response")
+	}
+
+	t.Logf("Request consumed %d tokens", tokensUsed)
+
+	// Wait for async update to propagate to in-memory store
+	var rateLimitID string
+	var tokenUsage int64
+	usageUpdated := WaitForCondition(t, func() bool {
+		getDataResp := MakeRequest(t, APIRequest{
+			Method: "GET",
+			Path:   "/api/governance/virtual-keys?from_memory=true",
+		})
+
+		if getDataResp.StatusCode != 200 {
+			return false
+		}
+
+		virtualKeysMap, ok := getDataResp.Body["virtual_keys"].(map[string]interface{})
+		if !ok {
+			return false
+		}
+
+		vkData, ok := virtualKeysMap[vkValue].(map[string]interface{})
+		if !ok {
+			return false
+		}
+
+		// Rate limit should exist
+		rateLimitID, _ = vkData["rate_limit_id"].(string)
+		if rateLimitID == "" {
+			return false
+		}
+
+		// Fetch the rate limit data to check token usage
+		getRateLimitsResp := MakeRequest(t, APIRequest{
+			Method: "GET",
+			Path:   "/api/governance/rate-limits?from_memory=true",
+		})
+
+		if getRateLimitsResp.StatusCode != 200 {
+			return false
+		}
+
+		rateLimitsMap, ok := getRateLimitsResp.Body["rate_limits"].(map[string]interface{})
+		if !ok {
+			return false
+		}
+
+		rateLimitData, ok := rateLimitsMap[rateLimitID].(map[string]interface{})
+		if !ok {
+			return false
+		}
+
+		// Check that token usage has been updated (should be > 0 after the request)
+		if tokenCurrentUsage, ok := rateLimitData["token_current_usage"].(float64); ok {
+			tokenUsage = int64(tokenCurrentUsage)
+			return tokenUsage > 0
+		}
+
+		return false
+	}, 3*time.Second, "usage updated in in-memory store")
+
+	if !usageUpdated {
+		t.Fatalf("Rate limit usage not updated in in-memory store after request (timeout after 3s)")
+	}
+
+	if rateLimitID != "" {
+		t.Logf("Rate limit tracking is enabled for VK ✓")
+		t.Logf("Token usage in rate limit: %d tokens", tokenUsage)
+	} else {
+		t.Logf("No rate limit on VK (optional)")
+	}
+
+	t.Logf("In-memory usage tracking verified ✓")
+}
+
+// TestResetTickerBothBudgetAndRateLimit tests that ticker resets both budget and rate limit together
+func TestResetTickerBothBudgetAndRateLimit(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with both budget and rate limit that reset every 30 seconds
+	vkName := "test-vk-both-reset-" + generateRandomID()
+	budgetLimit := 2.0
+	budgetResetDuration := "30s"
+	tokenLimit := int64(50000)
+	tokenResetDuration := "30s"
+
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			Budget: &BudgetRequest{
+				MaxLimit:      budgetLimit,
+				ResetDuration: budgetResetDuration,
+			},
+			RateLimit: &CreateRateLimitRequest{
+				TokenMaxLimit:      &tokenLimit,
+				TokenResetDuration: &tokenResetDuration,
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK %s with budget and rate limit both resetting every 30s", vkName)
+
+	// Make requests to consume both budget and tokens
+	for i := 0; i < 3; i++ {
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: "Test request " + string(rune('0'+i)) + " for reset ticker test.",
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode != 200 {
+			t.Logf("Request %d failed with status %d", i+1, resp.StatusCode)
+			break
+		}
+		t.Logf("Request %d succeeded", i+1)
+	}
+
+	// Wait for async PostHook goroutines to complete budget updates
+	t.Logf("Waiting 3 seconds for async updates to complete...")
+	time.Sleep(3 * time.Second)
+
+	// Get usage before reset
+	getVKResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	virtualKeysMap := getVKResp.Body["virtual_keys"].(map[string]interface{})
+
+	getBudgetsResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/budgets?from_memory=true",
+	})
+
+	budgetsMap := getBudgetsResp.Body["budgets"].(map[string]interface{})
+
+	vkData := virtualKeysMap[vkValue].(map[string]interface{})
+	budgetID, _ := vkData["budget_id"].(string)
+
+	var usageBeforeReset float64
+	if budgetID != "" {
+		budgetData := budgetsMap[budgetID].(map[string]interface{})
+		usageBeforeReset, _ = budgetData["current_usage"].(float64)
+	}
+
+	t.Logf("Budget usage before reset: $%.6f", usageBeforeReset)
+
+	// Wait for reset (reset ticker runs every 10s, budget resets at 30s, add buffer for processing)
+	t.Logf("Waiting 40 seconds for reset ticker...")
+	time.Sleep(40 * time.Second)
+
+	// Get usage after reset
+	getBudgetsResp2 := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/budgets?from_memory=true",
+	})
+
+	budgetsMap2 := getBudgetsResp2.Body["budgets"].(map[string]interface{})
+
+	var usageAfterReset float64
+	if budgetID != "" {
+		budgetData2 := budgetsMap2[budgetID].(map[string]interface{})
+		usageAfterReset, _ = budgetData2["current_usage"].(float64)
+	}
+
+	t.Logf("Budget usage after reset: $%.6f", usageAfterReset)
+
+	if usageBeforeReset > 0 && usageAfterReset >= usageBeforeReset {
+		t.Fatalf("Budget not reset properly: before=$%.6f, after=$%.6f (expected reset to ~0)", usageBeforeReset, usageAfterReset)
+	}
+
+	t.Logf("Both budget and rate limit reset on ticker ✓")
+}
+
+// TestDataPersistenceAcrossRequests tests that budget and rate limit data persists correctly
+func TestDataPersistenceAcrossRequests(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with both budget and rate limit
+	vkName := "test-vk-persistence-" + generateRandomID()
+	budgetLimit := 5.0
+	budgetResetDuration := "1h"
+	tokenLimit := int64(100000)
+	tokenResetDuration := "1h"
+	requestLimit := int64(100)
+	requestResetDuration := "1h"
+
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			Budget: &BudgetRequest{
+				MaxLimit:      budgetLimit,
+				ResetDuration: budgetResetDuration,
+			},
+			RateLimit: &CreateRateLimitRequest{
+				TokenMaxLimit:        &tokenLimit,
+				TokenResetDuration:   &tokenResetDuration,
+				RequestMaxLimit:      &requestLimit,
+				RequestResetDuration: &requestResetDuration,
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK %s for persistence testing", vkName)
+
+	// Make multiple requests and verify data persists
+	successCount := 0
+	for i := 0; i < 2; i++ {
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: "Persistence test request " + string(rune('0'+i)) + ".",
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode == 200 {
+			successCount++
+		} else {
+			t.Logf("Request %d failed with status %d", i+1, resp.StatusCode)
+		}
+	}
+
+	if successCount == 0 {
+		t.Skip("Could not make requests to test persistence")
+	}
+
+	t.Logf("Made %d successful requests", successCount)
+
+	// Verify data persists in in-memory store
+	getDataResp := MakeRequest(t, APIRequest{
+		Method: "GET",
+		Path:   "/api/governance/virtual-keys?from_memory=true",
+	})
+
+	if getDataResp.StatusCode != 200 {
+		t.Fatalf("Failed to get governance data: status %d", getDataResp.StatusCode)
+	}
+
+	virtualKeysMap := getDataResp.Body["virtual_keys"].(map[string]interface{})
+
+	vkData, exists := virtualKeysMap[vkValue]
+	if !exists {
+		t.Fatalf("VK not found in in-memory store after requests")
+	}
+
+	vkDataMap := vkData.(map[string]interface{})
+	budgetID, _ := vkDataMap["budget_id"].(string)
+	rateLimitID, _ := vkDataMap["rate_limit_id"].(string)
+
+	if budgetID == "" {
+		t.Fatalf("Budget ID not found for VK")
+	}
+	if rateLimitID == "" {
+		t.Fatalf("Rate limit ID not found for VK")
+	}
+
+	t.Logf("VK data persists correctly in in-memory store ✓")
+}
--- a/tests/governance/vkbudget_test.go
+++ b/tests/governance/vkbudget_test.go
@@ -0,0 +1,131 @@
+package governance
+
+import (
+	"strconv"
+	"testing"
+)
+
+// TestVKBudgetExceeded tests that VK level budgets are enforced by making requests until budget is consumed
+func TestVKBudgetExceeded(t *testing.T) {
+	t.Parallel()
+	testData := NewGlobalTestData()
+	defer testData.Cleanup(t)
+
+	// Create a VK with a fixed budget
+	vkBudget := 0.01
+	vkName := "test-vk-budget-exceeded-" + generateRandomID()
+	createVKResp := MakeRequest(t, APIRequest{
+		Method: "POST",
+		Path:   "/api/governance/virtual-keys",
+		Body: CreateVirtualKeyRequest{
+			Name: vkName,
+			Budget: &BudgetRequest{
+				MaxLimit:      vkBudget,
+				ResetDuration: "1h",
+			},
+		},
+	})
+
+	if createVKResp.StatusCode != 200 {
+		t.Fatalf("Failed to create VK: status %d", createVKResp.StatusCode)
+	}
+
+	vkID := ExtractIDFromResponse(t, createVKResp)
+	testData.AddVirtualKey(vkID)
+
+	vk := createVKResp.Body["virtual_key"].(map[string]interface{})
+	vkValue := vk["value"].(string)
+
+	t.Logf("Created VK %s with budget $%.2f", vkName, vkBudget)
+
+	// Keep making requests, tracking actual token usage from responses, until budget is exceeded
+	consumedBudget := 0.0
+	requestNum := 1
+	var lastSuccessfulCost float64
+
+	var shouldStop = false
+
+	for requestNum <= 50 {
+		// Create a longer prompt to consume more tokens and budget faster
+		longPrompt := "Please provide a comprehensive and detailed response to the following question. " +
+			"I need extensive information covering all aspects of the topic. " +
+			"Provide multiple paragraphs with detailed explanations. " +
+			"Request number " + strconv.Itoa(requestNum) + ". " +
+			"Here is a detailed prompt that will consume significant tokens: " +
+			"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+			"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+			"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+			"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+			"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+			"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+			"Mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+			"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
+			"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. " +
+			"Nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit. " +
+			"In voluptate velit esse cillum dolore eu fugiat nulla pariatur. " +
+			"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt. " +
+			"Mollit anim id est laborum."
+
+		resp := MakeRequest(t, APIRequest{
+			Method: "POST",
+			Path:   "/v1/chat/completions",
+			Body: ChatCompletionRequest{
+				Model: "openai/gpt-4o",
+				Messages: []ChatMessage{
+					{
+						Role:    "user",
+						Content: longPrompt,
+					},
+				},
+			},
+			VKHeader: &vkValue,
+		})
+
+		if resp.StatusCode >= 400 {
+			// Request failed - check if it's due to budget
+			if CheckErrorMessage(t, resp, "budget") {
+				t.Logf("Request %d correctly rejected: budget exceeded", requestNum)
+				t.Logf("Consumed budget: $%.6f (limit: $%.2f)", consumedBudget, vkBudget)
+				t.Logf("Last successful request cost: $%.6f", lastSuccessfulCost)
+
+				// Verify that we made at least one successful request before hitting budget
+				if requestNum == 1 {
+					t.Fatalf("First request should have succeeded but was rejected due to budget")
+				}
+				return // Test passed
+			} else {
+				t.Fatalf("Request %d failed with unexpected error (not budget): %v", requestNum, resp.Body)
+			}
+		}
+
+		// Request succeeded - extract actual token usage from response
+		if usage, ok := resp.Body["usage"].(map[string]interface{}); ok {
+			if prompt, ok := usage["prompt_tokens"].(float64); ok {
+				if completion, ok := usage["completion_tokens"].(float64); ok {
+					actualInputTokens := int(prompt)
+					actualOutputTokens := int(completion)
+					actualCost, _ := CalculateCost("openai/gpt-4o", actualInputTokens, actualOutputTokens)
+
+					consumedBudget += actualCost
+					lastSuccessfulCost = actualCost
+
+					t.Logf("Request %d succeeded: input_tokens=%d, output_tokens=%d, cost=$%.6f, consumed=$%.6f/$%.2f",
+						requestNum, actualInputTokens, actualOutputTokens, actualCost, consumedBudget, vkBudget)
+				}
+			}
+		}
+
+		requestNum++
+
+		if shouldStop {
+			break
+		}
+
+		if consumedBudget >= vkBudget {
+			shouldStop = true
+		}
+	}
+
+	t.Fatalf("Made %d requests but never hit budget limit (consumed $%.6f / $%.2f) - budget not being enforced",
+		requestNum-1, consumedBudget, vkBudget)
+}