package semanticcache import ( "context" "os" "strings" "testing" "github.com/google/uuid" bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" "github.com/maximhq/bifrost/framework/vectorstore" ) // requiresVectors returns true if the vector store requires vectors for storage. // Some stores (like Qdrant, Pinecone, and Weaviate) require vectors for all entries, // while others (like Redis) can store metadata without vectors. func requiresVectors(storeType vectorstore.VectorStoreType) bool { switch storeType { case vectorstore.VectorStoreTypeQdrant, vectorstore.VectorStoreTypePinecone, vectorstore.VectorStoreTypeWeaviate: return true default: return false } } // skipIfNoAPIKey skips the test if OPENAI_API_KEY is not set and the store requires vectors. func skipIfNoAPIKey(t *testing.T, storeType vectorstore.VectorStoreType) { if requiresVectors(storeType) && os.Getenv("OPENAI_API_KEY") == "" { t.Skipf("Skipping %s test: OPENAI_API_KEY not set (required for embedding generation)", storeType) } } // VectorStoreTestCase defines a test case for a specific vector store type VectorStoreTestCase struct { Name string StoreType vectorstore.VectorStoreType } // getVectorStoreTestCases returns all vector store test cases func getVectorStoreTestCases() []VectorStoreTestCase { return []VectorStoreTestCase{ {"Weaviate", vectorstore.VectorStoreTypeWeaviate}, {"Redis", vectorstore.VectorStoreTypeRedis}, {"Qdrant", vectorstore.VectorStoreTypeQdrant}, {"Pinecone", vectorstore.VectorStoreTypePinecone}, } } // getDefaultTestConfig returns the default test configuration func getDefaultTestConfig() *Config { return &Config{ Provider: schemas.OpenAI, EmbeddingModel: "text-embedding-3-small", Dimension: 1536, Threshold: 0.8, CleanUpOnShutdown: true, Keys: []schemas.Key{ { Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"), Models: schemas.WhiteList{"*"}, Weight: 1.0, }, }, } } // TestSemanticCache_AllVectorStores_BasicFlow tests the basic cache flow across all vector stores func TestSemanticCache_AllVectorStores_BasicFlow(t *testing.T) { for _, tc := range getVectorStoreTestCases() { t.Run(tc.Name, func(t *testing.T) { skipIfNoAPIKey(t, tc.StoreType) setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType) defer setup.Cleanup() ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-basic") // Test request request := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Hello from " + tc.Name + " test!"), }, }, }, Params: &schemas.ChatParameters{ Temperature: bifrost.Ptr(0.7), MaxCompletionTokens: bifrost.Ptr(100), }, }, } t.Logf("[%s] Testing first request (cache miss)...", tc.Name) // First request - should be a cache miss modifiedReq, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request) if err != nil { t.Fatalf("[%s] PreHook failed: %v", tc.Name, err) } if shortCircuit != nil { t.Fatalf("[%s] Expected cache miss, but got cache hit", tc.Name) } if modifiedReq == nil { t.Fatalf("[%s] Modified request is nil", tc.Name) } t.Logf("[%s] Cache miss handled correctly", tc.Name) // Simulate a response response := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ID: uuid.New().String(), Choices: []schemas.BifrostResponseChoice{ { Index: 0, ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ Message: &schemas.ChatMessage{ Role: schemas.ChatMessageRoleAssistant, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Hello! Response from " + tc.Name + " test."), }}, }, }, }, ExtraFields: schemas.BifrostResponseExtraFields{ Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o-mini", RequestType: schemas.ChatCompletionRequest, }, }, } // Cache the response t.Logf("[%s] Caching response...", tc.Name) _, _, err = setup.Plugin.PostLLMHook(ctx, response, nil) if err != nil { t.Fatalf("[%s] PostHook failed: %v", tc.Name, err) } // Wait for async caching to complete WaitForCache(setup.Plugin) t.Logf("[%s] Response cached successfully", tc.Name) // Second request - should be a cache hit t.Logf("[%s] Testing second identical request (expecting cache hit)...", tc.Name) ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx2.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-basic") _, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request) if err != nil { t.Fatalf("[%s] Second PreHook failed: %v", tc.Name, err) } if shortCircuit2 == nil { t.Fatalf("[%s] Expected cache hit on identical request, but got cache miss", tc.Name) } if shortCircuit2.Response == nil { t.Fatalf("[%s] Cache hit but response is nil", tc.Name) } t.Logf("[%s] Cache hit detected and response returned", tc.Name) t.Logf("[%s] Basic flow test passed!", tc.Name) }) } } // TestSemanticCache_AllVectorStores_DirectHashMatch tests direct hash matching across all vector stores func TestSemanticCache_AllVectorStores_DirectHashMatch(t *testing.T) { for _, tc := range getVectorStoreTestCases() { t.Run(tc.Name, func(t *testing.T) { skipIfNoAPIKey(t, tc.StoreType) setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType) defer setup.Cleanup() // Use unique cache key per test run to avoid stale data from previous runs // (Pinecone Local doesn't support deletion by metadata filter) testRunID := uuid.New().String()[:8] cacheKey := "test-" + strings.ToLower(tc.Name) + "-direct-" + testRunID ctx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect) testRequest := CreateBasicChatRequest("Direct hash test for "+tc.Name+" "+testRunID, 0.7, 50) t.Logf("[%s] Making first request to populate cache...", tc.Name) response1, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest) if err1 != nil { t.Skipf("[%s] First request failed (likely no API key): %v", tc.Name, err1) return } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) WaitForCache(setup.Plugin) // Second request with direct-only cache type ctx2 := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect) t.Logf("[%s] Making second request with CacheTypeDirect...", tc.Name) response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err2 != nil { t.Fatalf("[%s] Second request failed: %v", tc.Name, err2.Error.Message) } AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") t.Logf("[%s] Direct hash match test passed!", tc.Name) }) } } // TestSemanticCache_AllVectorStores_NamespaceIsolation tests that different cache keys are isolated func TestSemanticCache_AllVectorStores_NamespaceIsolation(t *testing.T) { for _, tc := range getVectorStoreTestCases() { t.Run(tc.Name, func(t *testing.T) { skipIfNoAPIKey(t, tc.StoreType) setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType) defer setup.Cleanup() // Use unique cache keys per test run to avoid stale data from previous runs // (Pinecone Local doesn't support deletion by metadata filter) testRunID := uuid.New().String()[:8] cacheKey1 := "test-" + strings.ToLower(tc.Name) + "-namespace-1-" + testRunID cacheKey2 := "test-" + strings.ToLower(tc.Name) + "-namespace-2-" + testRunID // Cache with first key ctx1 := CreateContextWithCacheKey(cacheKey1) testRequest := CreateBasicChatRequest("Namespace isolation test for "+tc.Name+" "+testRunID, 0.7, 50) t.Logf("[%s] Making request with cache key 1...", tc.Name) response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { t.Skipf("[%s] First request failed (likely no API key): %v", tc.Name, err1) return } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) WaitForCache(setup.Plugin) // Try with different cache key - should miss ctx2 := CreateContextWithCacheKey(cacheKey2) t.Logf("[%s] Making same request with different cache key (expecting miss)...", tc.Name) response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err2 != nil { t.Fatalf("[%s] Second request failed: %v", tc.Name, err2.Error.Message) } // Should be a cache miss because different namespace AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Try with original key - should hit ctx3 := CreateContextWithCacheKey(cacheKey1) t.Logf("[%s] Making same request with original cache key (expecting hit)...", tc.Name) response3, err3 := setup.Client.ChatCompletionRequest(ctx3, testRequest) if err3 != nil { t.Fatalf("[%s] Third request failed: %v", tc.Name, err3.Error.Message) } AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "direct") t.Logf("[%s] Namespace isolation test passed!", tc.Name) }) } } // TestSemanticCache_AllVectorStores_ParameterFiltering tests that different parameters don't share cache func TestSemanticCache_AllVectorStores_ParameterFiltering(t *testing.T) { for _, tc := range getVectorStoreTestCases() { t.Run(tc.Name, func(t *testing.T) { skipIfNoAPIKey(t, tc.StoreType) setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType) defer setup.Cleanup() ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-params") // First request with temperature=0.7 request1 := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Parameter test for " + tc.Name), }, }, }, Params: &schemas.ChatParameters{ Temperature: bifrost.Ptr(0.7), MaxCompletionTokens: bifrost.Ptr(100), }, }, } t.Logf("[%s] Testing first request with temperature=0.7...", tc.Name) _, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, request1) if err != nil { t.Fatalf("[%s] First PreHook failed: %v", tc.Name, err) } if shortCircuit1 != nil { t.Fatalf("[%s] Expected cache miss for first request", tc.Name) } // Cache a response response := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ID: uuid.New().String(), Choices: []schemas.BifrostResponseChoice{ { ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ Message: &schemas.ChatMessage{ Role: schemas.ChatMessageRoleAssistant, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Response for " + tc.Name), }}, }, }, }, ExtraFields: schemas.BifrostResponseExtraFields{ Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o-mini", RequestType: schemas.ChatCompletionRequest, }, }, } _, _, err = setup.Plugin.PostLLMHook(ctx, response, nil) if err != nil { t.Fatalf("[%s] PostHook failed: %v", tc.Name, err) } WaitForCache(setup.Plugin) t.Logf("[%s] First response cached", tc.Name) // Second request with different temperature - should be cache miss t.Logf("[%s] Testing second request with temperature=0.5 (expecting cache miss)...", tc.Name) ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx2.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-params") request2 := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Parameter test for " + tc.Name), }, }, }, Params: &schemas.ChatParameters{ Temperature: bifrost.Ptr(0.5), // Different temperature MaxCompletionTokens: bifrost.Ptr(100), }, }, } _, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request2) if err != nil { t.Fatalf("[%s] Second PreHook failed: %v", tc.Name, err) } if shortCircuit2 != nil { t.Fatalf("[%s] Expected cache miss due to different temperature, but got cache hit", tc.Name) } t.Logf("[%s] Parameter filtering test passed!", tc.Name) }) } } // TestSemanticCache_AllVectorStores_EmbeddingRequest tests embedding request caching across all vector stores func TestSemanticCache_AllVectorStores_EmbeddingRequest(t *testing.T) { for _, tc := range getVectorStoreTestCases() { t.Run(tc.Name, func(t *testing.T) { skipIfNoAPIKey(t, tc.StoreType) setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType) defer setup.Cleanup() // Use unique cache key per test run to avoid stale data from previous runs // (Pinecone Local doesn't support deletion by metadata filter) testRunID := uuid.New().String()[:8] cacheKey := "test-" + strings.ToLower(tc.Name) + "-embedding-" + testRunID embeddingRequest := CreateEmbeddingRequest([]string{"Test embedding with " + tc.Name + " " + testRunID}) // Cache first request ctx1 := CreateContextWithCacheKey(cacheKey) t.Logf("[%s] Making first embedding request...", tc.Name) response1, err1 := setup.Client.EmbeddingRequest(ctx1, embeddingRequest) if err1 != nil { t.Skipf("[%s] First embedding request failed (likely no API key): %v", tc.Name, err1) return } AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response1}) WaitForCache(setup.Plugin) // Second request - should be cache hit ctx2 := CreateContextWithCacheKey(cacheKey) t.Logf("[%s] Making second embedding request (expecting cache hit)...", tc.Name) response2, err2 := setup.Client.EmbeddingRequest(ctx2, embeddingRequest) if err2 != nil { t.Fatalf("[%s] Second embedding request failed: %v", tc.Name, err2.Error.Message) } AssertCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2}, "direct") t.Logf("[%s] Embedding request caching test passed!", tc.Name) }) } }