package semanticcache import ( "context" "strings" "testing" "time" "github.com/google/uuid" bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" ) // TestSemanticCacheBasicFlow tests the complete semantic cache flow func TestSemanticCacheBasicFlow(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx.SetValue(CacheKey, "test-cache-enabled") // Test request request := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Hello, world!"), }, }, }, Params: &schemas.ChatParameters{ Temperature: bifrost.Ptr(0.7), MaxCompletionTokens: bifrost.Ptr(100), }, }, } t.Log("Testing first request (cache miss)...") // First request - should be a cache miss modifiedReq, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request) if err != nil { t.Fatalf("PreLLMHook failed: %v", err) } if shortCircuit != nil { t.Fatal("Expected cache miss, but got cache hit") } if modifiedReq == nil { t.Fatal("Modified request is nil") } t.Log("✅ Cache miss handled correctly") // Simulate a response response := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ID: uuid.New().String(), Choices: []schemas.BifrostResponseChoice{ { Index: 0, ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ Message: &schemas.ChatMessage{ Role: schemas.ChatMessageRoleAssistant, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Hello! How can I help you today?"), }}, }, }, }, ExtraFields: schemas.BifrostResponseExtraFields{ Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o-mini", RequestType: schemas.ChatCompletionRequest, }, }, } // Capture original response content for comparison var originalContent string if len(response.ChatResponse.Choices) > 0 && response.ChatResponse.Choices[0].Message.Content.ContentStr != nil { originalContent = *response.ChatResponse.Choices[0].Message.Content.ContentStr } if originalContent == "" { t.Fatal("Original response content is empty") } t.Logf("Original response content: %s", originalContent) // Cache the response t.Log("Caching response...") _, _, err = setup.Plugin.PostLLMHook(ctx, response, nil) if err != nil { t.Fatalf("PostLLMHook failed: %v", err) } // Wait for async caching to complete WaitForCache(setup.Plugin) t.Log("✅ Response cached successfully") // Second request - should be a cache hit t.Log("Testing second identical request (expecting cache hit)...") // Reset context for second request ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx2.SetValue(CacheKey, "test-cache-enabled") modifiedReq2, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request) if err != nil { t.Fatalf("Second PreLLMHook failed: %v", err) } if shortCircuit2 == nil { t.Fatal("expected cache hit on identical request") return } if shortCircuit2.Response == nil { t.Fatal("Cache hit but response is nil") } if modifiedReq2 == nil { t.Fatal("Modified request is nil on cache hit") } t.Log("✅ Cache hit detected and response returned") // Verify the cached response if len(shortCircuit2.Response.ChatResponse.Choices) == 0 { t.Fatal("Cached response has no choices") } cachedContent := shortCircuit2.Response.ChatResponse.Choices[0].Message.Content.ContentStr if cachedContent == nil || *cachedContent == "" { t.Fatal("Cached response content is empty") } t.Logf("✅ Cached response content: %s", *cachedContent) // Compare original and cached content cachedContentStr := *cachedContent // Trim whitespace and newlines for comparison originalContentTrimmed := strings.TrimSpace(originalContent) cachedContentTrimmed := strings.TrimSpace(cachedContentStr) if originalContentTrimmed != cachedContentTrimmed { t.Fatalf("❌ Content mismatch: original='%s', cached='%s'", originalContentTrimmed, cachedContentTrimmed) } t.Log("✅ Content verification passed - original and cached responses match") t.Log("🎉 Basic semantic cache flow test passed!") } // TestSemanticCacheStrictFiltering tests that the cache respects parameter differences func TestSemanticCacheStrictFiltering(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx.SetValue(CacheKey, "test-cache-enabled") // Base request baseRequest := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("What is the weather like?"), }, }, }, Params: &schemas.ChatParameters{ Temperature: bifrost.Ptr(0.7), MaxCompletionTokens: bifrost.Ptr(100), }, }, } t.Log("Testing first request with temperature=0.7...") // First request _, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, baseRequest) if err != nil { t.Fatalf("First PreLLMHook failed: %v", err) } if shortCircuit1 != nil { t.Fatal("Expected cache miss for first request") } // Cache a response response := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ID: uuid.New().String(), Choices: []schemas.BifrostResponseChoice{ { ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ Message: &schemas.ChatMessage{ Role: schemas.ChatMessageRoleAssistant, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("It's sunny today!"), }}, }, }, }, ExtraFields: schemas.BifrostResponseExtraFields{ Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o-mini", RequestType: schemas.ChatCompletionRequest, }, }, } _, _, err = setup.Plugin.PostLLMHook(ctx, response, nil) if err != nil { t.Fatalf("PostLLMHook failed: %v", err) } WaitForCache(setup.Plugin) t.Log("✅ First response cached") // Second request with different temperature - should be cache miss t.Log("Testing second request with temperature=0.5 (expecting cache miss)...") ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx2.SetValue(CacheKey, "test-cache-enabled") modifiedRequest := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("What is the weather like?"), }, }, }, Params: &schemas.ChatParameters{ Temperature: bifrost.Ptr(0.5), // Different temperature MaxCompletionTokens: bifrost.Ptr(100), }, }, } _, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, modifiedRequest) if err != nil { t.Fatalf("Second PreLLMHook failed: %v", err) } if shortCircuit2 != nil { t.Fatal("Expected cache miss due to different temperature, but got cache hit") } t.Log("✅ Strict filtering working - different parameters result in cache miss") // Third request with different model - should be cache miss t.Log("Testing third request with different model (expecting cache miss)...") ctx3 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx3.SetValue(CacheKey, "test-cache-enabled") modifiedRequest2 := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-3.5-turbo", // Different model Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("What is the weather like?"), }, }, }, Params: &schemas.ChatParameters{ Temperature: bifrost.Ptr(0.7), MaxCompletionTokens: bifrost.Ptr(100), }, }, } _, shortCircuit3, err := setup.Plugin.PreLLMHook(ctx3, modifiedRequest2) if err != nil { t.Fatalf("Third PreLLMHook failed: %v", err) } if shortCircuit3 != nil { t.Fatal("Expected cache miss due to different model, but got cache hit") } t.Log("✅ Strict filtering working - different model results in cache miss") t.Log("🎉 Strict filtering test passed!") } // TestSemanticCacheStreamingFlow tests streaming response caching func TestSemanticCacheStreamingFlow(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx.SetValue(CacheKey, "test-cache-enabled") request := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionStreamRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Tell me a short story"), }, }, }, Params: &schemas.ChatParameters{ Temperature: bifrost.Ptr(0.8), }, }, } t.Log("Testing streaming request (cache miss)...") // First request - should be cache miss _, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request) if err != nil { t.Fatalf("PreLLMHook failed: %v", err) } if shortCircuit != nil { t.Fatal("Expected cache miss for streaming request") } t.Log("✅ Streaming cache miss handled correctly") // Simulate streaming response chunks t.Log("Caching streaming response chunks...") chunks := []string{ "Once upon a time,", " there was a brave", " knight who saved the day.", } for i, chunk := range chunks { var finishReason *string if i == len(chunks)-1 { finishReason = bifrost.Ptr("stop") } chunkResponse := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ID: uuid.New().String(), Choices: []schemas.BifrostResponseChoice{ { Index: i, FinishReason: finishReason, ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{ Delta: &schemas.ChatStreamResponseChoiceDelta{ Content: bifrost.Ptr(chunk), }, }, }, }, ExtraFields: schemas.BifrostResponseExtraFields{ Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o-mini", RequestType: schemas.ChatCompletionStreamRequest, ChunkIndex: i, }, }, } _, _, err = setup.Plugin.PostLLMHook(ctx, chunkResponse, nil) if err != nil { t.Fatalf("PostLLMHook failed for chunk %d: %v", i, err) } } WaitForCache(setup.Plugin) t.Log("✅ Streaming response chunks cached") // Test cache retrieval for streaming t.Log("Testing streaming cache retrieval...") ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx2.SetValue(CacheKey, "test-cache-enabled") _, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request) if err != nil { t.Fatalf("Second PreLLMHook failed: %v", err) } if shortCircuit2 == nil { t.Log("⚠️ Expected streaming cache hit, but got cache miss - this may be expected with the new unified storage") return } if shortCircuit2.Stream == nil { t.Fatal("Cache hit but stream is nil") } t.Log("✅ Streaming cache hit detected") // Read from the cached stream chunkCount := 0 for chunk := range shortCircuit2.Stream { if chunk.BifrostChatResponse == nil { continue } chunkCount++ t.Logf("Received cached chunk %d", chunkCount) } if chunkCount == 0 { t.Fatal("No chunks received from cached stream") } t.Logf("✅ Received %d cached chunks", chunkCount) t.Log("🎉 Streaming cache test passed!") } // TestSemanticCache_NoCacheWhenKeyMissing verifies cache is disabled when cache key is missing from context func TestSemanticCache_NoCacheWhenKeyMissing(t *testing.T) { t.Log("Testing cache behavior when cache key is missing...") setup := NewTestSetup(t) defer setup.Cleanup() ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) // Don't set the cache key - cache should be disabled request := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Test message"), }, }, }, }, } _, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request) if err != nil { t.Fatalf("PreLLMHook failed: %v", err) } if shortCircuit != nil { t.Fatal("Expected no caching when cache key is not set, but got cache hit") } t.Log("✅ Cache properly disabled when no cache key is set") t.Log("🎉 No cache key test passed!") } // TestSemanticCache_CustomTTLHandling verifies cache respects custom TTL values from context func TestSemanticCache_CustomTTLHandling(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() // Configure plugin with custom TTL key ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx.SetValue(CacheKey, "test-cache-enabled") ctx.SetValue(CacheTTLKey, 1*time.Minute) // Custom TTL request := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("TTL test message"), }, }, }, }, } // First request - cache miss _, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request) if err != nil { t.Fatalf("PreLLMHook failed: %v", err) } if shortCircuit != nil { t.Fatal("Expected cache miss, but got cache hit") } // Simulate response and cache it response := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ID: "ttl-test-response", Choices: []schemas.BifrostResponseChoice{ { ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ Message: &schemas.ChatMessage{ Role: "assistant", Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("TTL test response"), }, }, }, }, }, ExtraFields: schemas.BifrostResponseExtraFields{ Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o-mini", RequestType: schemas.ChatCompletionRequest, }, }, } _, _, err = setup.Plugin.PostLLMHook(ctx, response, nil) if err != nil { t.Fatalf("PostLLMHook failed: %v", err) } WaitForCache(setup.Plugin) t.Log("✅ Custom TTL configuration test passed!") } // TestSemanticCache_CustomThresholdHandling verifies cache respects custom similarity threshold from context func TestSemanticCache_CustomThresholdHandling(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() // Configure plugin with custom threshold key ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx.SetValue(CacheKey, "test-cache-enabled") ctx.SetValue(CacheThresholdKey, 0.95) // Very high threshold request := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Threshold test message"), }, }, }, }, } // Test that custom threshold is used (this would need semantic search to be fully testable) _, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request) if err != nil { t.Fatalf("PreLLMHook failed: %v", err) } if shortCircuit != nil { t.Fatal("Expected cache miss with high threshold, but got cache hit") } t.Log("✅ Custom threshold configuration test passed!") } // TestSemanticCache_ProviderModelCachingFlags verifies cache behavior with provider/model caching flags func TestSemanticCache_ProviderModelCachingFlags(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() // Test with provider/model caching disabled setup.Config.CacheByProvider = bifrost.Ptr(false) setup.Config.CacheByModel = bifrost.Ptr(false) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx.SetValue(CacheKey, "test-cache-enabled") request1 := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Provider model flags test"), }, }, }, }, } // First request with OpenAI _, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, request1) if err != nil { t.Fatalf("PreLLMHook failed: %v", err) } if shortCircuit1 != nil { t.Fatal("Expected cache miss, but got cache hit") } // Cache the response response := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ID: "provider-model-test", Choices: []schemas.BifrostResponseChoice{ { ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ Message: &schemas.ChatMessage{ Role: "assistant", Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Provider model test response"), }, }, }, }, }, ExtraFields: schemas.BifrostResponseExtraFields{ Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o-mini", RequestType: schemas.ChatCompletionRequest, }, }, } _, _, err = setup.Plugin.PostLLMHook(ctx, response, nil) if err != nil { t.Fatalf("PostLLMHook failed: %v", err) } WaitForCache(setup.Plugin) // Second request with different provider - should potentially hit cache since provider is not considered request2 := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.Anthropic, // Different provider Model: "claude-3-haiku", // Different model Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Provider model flags test"), // Same content }, }, }, }, } ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx2.SetValue(CacheKey, "test-cache-enabled") _, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request2) if err != nil { t.Fatalf("Second PreLLMHook failed: %v", err) } // With provider/model caching disabled, we might get cache hits across different providers/models // This behavior depends on the exact implementation of hash generation t.Logf("Cache behavior with disabled provider/model flags: hit=%v", shortCircuit2 != nil) t.Log("✅ Provider/model caching flags test passed!") } // TestSemanticCache_ConfigurationEdgeCases verifies edge cases in configuration handling func TestSemanticCache_ConfigurationEdgeCases(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() // Test with invalid TTL type in context ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx.SetValue(CacheKey, "test-cache-enabled") ctx.SetValue(CacheTTLKey, "not-a-duration") // Invalid TTL type request := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, Model: "gpt-4o-mini", Input: []schemas.ChatMessage{ { Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ ContentStr: bifrost.Ptr("Edge case test"), }, }, }, }, } // Should handle invalid TTL gracefully _, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request) if err != nil { t.Fatalf("PreLLMHook failed with invalid TTL: %v", err) } if shortCircuit != nil { t.Fatal("Unexpected cache hit with invalid TTL") } // Test with invalid threshold type ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) ctx2.SetValue(CacheKey, "test-cache-enabled") ctx2.SetValue(CacheThresholdKey, "not-a-float") // Invalid threshold type // Should handle invalid threshold gracefully _, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request) if err != nil { t.Fatalf("PreLLMHook failed with invalid threshold: %v", err) } if shortCircuit2 != nil { t.Fatal("Unexpected cache hit with invalid threshold") } t.Log("✅ Configuration edge cases test passed!") }