first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/plugins/semanticcache/plugin_integration_test.go
+++ b/plugins/semanticcache/plugin_integration_test.go
@@ -0,0 +1,736 @@
+package semanticcache
+
+import (
+	"context"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/google/uuid"
+	bifrost "github.com/maximhq/bifrost/core"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// TestSemanticCacheBasicFlow tests the complete semantic cache flow
+func TestSemanticCacheBasicFlow(t *testing.T) {
+	setup := NewTestSetup(t)
+	defer setup.Cleanup()
+
+	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx.SetValue(CacheKey, "test-cache-enabled")
+
+	// Test request
+	request := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.OpenAI,
+			Model:    "gpt-4o-mini",
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("Hello, world!"),
+					},
+				},
+			},
+			Params: &schemas.ChatParameters{
+				Temperature:         bifrost.Ptr(0.7),
+				MaxCompletionTokens: bifrost.Ptr(100),
+			},
+		},
+	}
+
+	t.Log("Testing first request (cache miss)...")
+
+	// First request - should be a cache miss
+	modifiedReq, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
+	if err != nil {
+		t.Fatalf("PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit != nil {
+		t.Fatal("Expected cache miss, but got cache hit")
+	}
+
+	if modifiedReq == nil {
+		t.Fatal("Modified request is nil")
+	}
+
+	t.Log("✅ Cache miss handled correctly")
+
+	// Simulate a response
+	response := &schemas.BifrostResponse{
+		ChatResponse: &schemas.BifrostChatResponse{
+			ID: uuid.New().String(),
+			Choices: []schemas.BifrostResponseChoice{
+				{
+					Index: 0,
+					ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
+						Message: &schemas.ChatMessage{
+							Role: schemas.ChatMessageRoleAssistant,
+							Content: &schemas.ChatMessageContent{
+								ContentStr: bifrost.Ptr("Hello! How can I help you today?"),
+							}},
+					},
+				},
+			},
+			ExtraFields: schemas.BifrostResponseExtraFields{
+				Provider:               schemas.OpenAI,
+				OriginalModelRequested: "gpt-4o-mini",
+				RequestType:            schemas.ChatCompletionRequest,
+			},
+		},
+	}
+
+	// Capture original response content for comparison
+	var originalContent string
+	if len(response.ChatResponse.Choices) > 0 && response.ChatResponse.Choices[0].Message.Content.ContentStr != nil {
+		originalContent = *response.ChatResponse.Choices[0].Message.Content.ContentStr
+	}
+	if originalContent == "" {
+		t.Fatal("Original response content is empty")
+	}
+	t.Logf("Original response content: %s", originalContent)
+
+	// Cache the response
+	t.Log("Caching response...")
+	_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
+	if err != nil {
+		t.Fatalf("PostLLMHook failed: %v", err)
+	}
+
+	// Wait for async caching to complete
+	WaitForCache(setup.Plugin)
+	t.Log("✅ Response cached successfully")
+
+	// Second request - should be a cache hit
+	t.Log("Testing second identical request (expecting cache hit)...")
+
+	// Reset context for second request
+	ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx2.SetValue(CacheKey, "test-cache-enabled")
+
+	modifiedReq2, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
+	if err != nil {
+		t.Fatalf("Second PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit2 == nil {
+		t.Fatal("expected cache hit on identical request")
+		return
+	}
+
+	if shortCircuit2.Response == nil {
+		t.Fatal("Cache hit but response is nil")
+	}
+
+	if modifiedReq2 == nil {
+		t.Fatal("Modified request is nil on cache hit")
+	}
+
+	t.Log("✅ Cache hit detected and response returned")
+
+	// Verify the cached response
+	if len(shortCircuit2.Response.ChatResponse.Choices) == 0 {
+		t.Fatal("Cached response has no choices")
+	}
+
+	cachedContent := shortCircuit2.Response.ChatResponse.Choices[0].Message.Content.ContentStr
+	if cachedContent == nil || *cachedContent == "" {
+		t.Fatal("Cached response content is empty")
+	}
+
+	t.Logf("✅ Cached response content: %s", *cachedContent)
+
+	// Compare original and cached content
+	cachedContentStr := *cachedContent
+	// Trim whitespace and newlines for comparison
+	originalContentTrimmed := strings.TrimSpace(originalContent)
+	cachedContentTrimmed := strings.TrimSpace(cachedContentStr)
+
+	if originalContentTrimmed != cachedContentTrimmed {
+		t.Fatalf("❌ Content mismatch: original='%s', cached='%s'", originalContentTrimmed, cachedContentTrimmed)
+	}
+
+	t.Log("✅ Content verification passed - original and cached responses match")
+	t.Log("🎉 Basic semantic cache flow test passed!")
+}
+
+// TestSemanticCacheStrictFiltering tests that the cache respects parameter differences
+func TestSemanticCacheStrictFiltering(t *testing.T) {
+	setup := NewTestSetup(t)
+	defer setup.Cleanup()
+
+	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx.SetValue(CacheKey, "test-cache-enabled")
+
+	// Base request
+	baseRequest := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.OpenAI,
+			Model:    "gpt-4o-mini",
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("What is the weather like?"),
+					},
+				},
+			},
+			Params: &schemas.ChatParameters{
+				Temperature:         bifrost.Ptr(0.7),
+				MaxCompletionTokens: bifrost.Ptr(100),
+			},
+		},
+	}
+
+	t.Log("Testing first request with temperature=0.7...")
+
+	// First request
+	_, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, baseRequest)
+	if err != nil {
+		t.Fatalf("First PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit1 != nil {
+		t.Fatal("Expected cache miss for first request")
+	}
+
+	// Cache a response
+	response := &schemas.BifrostResponse{
+		ChatResponse: &schemas.BifrostChatResponse{
+			ID: uuid.New().String(),
+			Choices: []schemas.BifrostResponseChoice{
+				{
+					ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
+						Message: &schemas.ChatMessage{
+							Role: schemas.ChatMessageRoleAssistant,
+							Content: &schemas.ChatMessageContent{
+								ContentStr: bifrost.Ptr("It's sunny today!"),
+							}},
+					},
+				},
+			},
+			ExtraFields: schemas.BifrostResponseExtraFields{
+				Provider:               schemas.OpenAI,
+				OriginalModelRequested: "gpt-4o-mini",
+				RequestType:            schemas.ChatCompletionRequest,
+			},
+		},
+	}
+
+	_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
+	if err != nil {
+		t.Fatalf("PostLLMHook failed: %v", err)
+	}
+
+	WaitForCache(setup.Plugin)
+	t.Log("✅ First response cached")
+
+	// Second request with different temperature - should be cache miss
+	t.Log("Testing second request with temperature=0.5 (expecting cache miss)...")
+
+	ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx2.SetValue(CacheKey, "test-cache-enabled")
+
+	modifiedRequest := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.OpenAI,
+			Model:    "gpt-4o-mini",
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("What is the weather like?"),
+					},
+				},
+			},
+			Params: &schemas.ChatParameters{
+				Temperature:         bifrost.Ptr(0.5), // Different temperature
+				MaxCompletionTokens: bifrost.Ptr(100),
+			},
+		},
+	}
+
+	_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, modifiedRequest)
+	if err != nil {
+		t.Fatalf("Second PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit2 != nil {
+		t.Fatal("Expected cache miss due to different temperature, but got cache hit")
+	}
+
+	t.Log("✅ Strict filtering working - different parameters result in cache miss")
+
+	// Third request with different model - should be cache miss
+	t.Log("Testing third request with different model (expecting cache miss)...")
+
+	ctx3 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx3.SetValue(CacheKey, "test-cache-enabled")
+
+	modifiedRequest2 := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.OpenAI,
+			Model:    "gpt-3.5-turbo", // Different model
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("What is the weather like?"),
+					},
+				},
+			},
+			Params: &schemas.ChatParameters{
+				Temperature:         bifrost.Ptr(0.7),
+				MaxCompletionTokens: bifrost.Ptr(100),
+			},
+		},
+	}
+
+	_, shortCircuit3, err := setup.Plugin.PreLLMHook(ctx3, modifiedRequest2)
+	if err != nil {
+		t.Fatalf("Third PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit3 != nil {
+		t.Fatal("Expected cache miss due to different model, but got cache hit")
+	}
+
+	t.Log("✅ Strict filtering working - different model results in cache miss")
+	t.Log("🎉 Strict filtering test passed!")
+}
+
+// TestSemanticCacheStreamingFlow tests streaming response caching
+func TestSemanticCacheStreamingFlow(t *testing.T) {
+	setup := NewTestSetup(t)
+	defer setup.Cleanup()
+
+	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx.SetValue(CacheKey, "test-cache-enabled")
+
+	request := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionStreamRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.OpenAI,
+			Model:    "gpt-4o-mini",
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("Tell me a short story"),
+					},
+				},
+			},
+			Params: &schemas.ChatParameters{
+				Temperature: bifrost.Ptr(0.8),
+			},
+		},
+	}
+
+	t.Log("Testing streaming request (cache miss)...")
+
+	// First request - should be cache miss
+	_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
+	if err != nil {
+		t.Fatalf("PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit != nil {
+		t.Fatal("Expected cache miss for streaming request")
+	}
+
+	t.Log("✅ Streaming cache miss handled correctly")
+
+	// Simulate streaming response chunks
+	t.Log("Caching streaming response chunks...")
+
+	chunks := []string{
+		"Once upon a time,",
+		" there was a brave",
+		" knight who saved the day.",
+	}
+
+	for i, chunk := range chunks {
+		var finishReason *string
+		if i == len(chunks)-1 {
+			finishReason = bifrost.Ptr("stop")
+		}
+
+		chunkResponse := &schemas.BifrostResponse{
+			ChatResponse: &schemas.BifrostChatResponse{
+				ID: uuid.New().String(),
+				Choices: []schemas.BifrostResponseChoice{
+					{
+						Index:        i,
+						FinishReason: finishReason,
+						ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{
+							Delta: &schemas.ChatStreamResponseChoiceDelta{
+								Content: bifrost.Ptr(chunk),
+							},
+						},
+					},
+				},
+				ExtraFields: schemas.BifrostResponseExtraFields{
+					Provider:               schemas.OpenAI,
+					OriginalModelRequested: "gpt-4o-mini",
+					RequestType:            schemas.ChatCompletionStreamRequest,
+					ChunkIndex:             i,
+				},
+			},
+		}
+
+		_, _, err = setup.Plugin.PostLLMHook(ctx, chunkResponse, nil)
+		if err != nil {
+			t.Fatalf("PostLLMHook failed for chunk %d: %v", i, err)
+		}
+	}
+
+	WaitForCache(setup.Plugin)
+	t.Log("✅ Streaming response chunks cached")
+
+	// Test cache retrieval for streaming
+	t.Log("Testing streaming cache retrieval...")
+
+	ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx2.SetValue(CacheKey, "test-cache-enabled")
+
+	_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
+	if err != nil {
+		t.Fatalf("Second PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit2 == nil {
+		t.Log("⚠️ Expected streaming cache hit, but got cache miss - this may be expected with the new unified storage")
+		return
+	}
+
+	if shortCircuit2.Stream == nil {
+		t.Fatal("Cache hit but stream is nil")
+	}
+
+	t.Log("✅ Streaming cache hit detected")
+
+	// Read from the cached stream
+	chunkCount := 0
+	for chunk := range shortCircuit2.Stream {
+		if chunk.BifrostChatResponse == nil {
+			continue
+		}
+		chunkCount++
+		t.Logf("Received cached chunk %d", chunkCount)
+	}
+
+	if chunkCount == 0 {
+		t.Fatal("No chunks received from cached stream")
+	}
+
+	t.Logf("✅ Received %d cached chunks", chunkCount)
+	t.Log("🎉 Streaming cache test passed!")
+}
+
+// TestSemanticCache_NoCacheWhenKeyMissing verifies cache is disabled when cache key is missing from context
+func TestSemanticCache_NoCacheWhenKeyMissing(t *testing.T) {
+	t.Log("Testing cache behavior when cache key is missing...")
+
+	setup := NewTestSetup(t)
+	defer setup.Cleanup()
+
+	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	// Don't set the cache key - cache should be disabled
+
+	request := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.OpenAI,
+			Model:    "gpt-4o-mini",
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("Test message"),
+					},
+				},
+			},
+		},
+	}
+
+	_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
+	if err != nil {
+		t.Fatalf("PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit != nil {
+		t.Fatal("Expected no caching when cache key is not set, but got cache hit")
+	}
+
+	t.Log("✅ Cache properly disabled when no cache key is set")
+	t.Log("🎉 No cache key test passed!")
+}
+
+// TestSemanticCache_CustomTTLHandling verifies cache respects custom TTL values from context
+func TestSemanticCache_CustomTTLHandling(t *testing.T) {
+	setup := NewTestSetup(t)
+	defer setup.Cleanup()
+
+	// Configure plugin with custom TTL key
+	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx.SetValue(CacheKey, "test-cache-enabled")
+	ctx.SetValue(CacheTTLKey, 1*time.Minute) // Custom TTL
+
+	request := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.OpenAI,
+			Model:    "gpt-4o-mini",
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("TTL test message"),
+					},
+				},
+			},
+		},
+	}
+
+	// First request - cache miss
+	_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
+	if err != nil {
+		t.Fatalf("PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit != nil {
+		t.Fatal("Expected cache miss, but got cache hit")
+	}
+
+	// Simulate response and cache it
+	response := &schemas.BifrostResponse{
+		ChatResponse: &schemas.BifrostChatResponse{
+			ID: "ttl-test-response",
+			Choices: []schemas.BifrostResponseChoice{
+				{
+					ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
+						Message: &schemas.ChatMessage{
+							Role: "assistant",
+							Content: &schemas.ChatMessageContent{
+								ContentStr: bifrost.Ptr("TTL test response"),
+							},
+						},
+					},
+				},
+			},
+			ExtraFields: schemas.BifrostResponseExtraFields{
+				Provider:               schemas.OpenAI,
+				OriginalModelRequested: "gpt-4o-mini",
+				RequestType:            schemas.ChatCompletionRequest,
+			},
+		},
+	}
+
+	_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
+	if err != nil {
+		t.Fatalf("PostLLMHook failed: %v", err)
+	}
+
+	WaitForCache(setup.Plugin)
+
+	t.Log("✅ Custom TTL configuration test passed!")
+}
+
+// TestSemanticCache_CustomThresholdHandling verifies cache respects custom similarity threshold from context
+func TestSemanticCache_CustomThresholdHandling(t *testing.T) {
+	setup := NewTestSetup(t)
+	defer setup.Cleanup()
+
+	// Configure plugin with custom threshold key
+	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx.SetValue(CacheKey, "test-cache-enabled")
+	ctx.SetValue(CacheThresholdKey, 0.95) // Very high threshold
+
+	request := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.OpenAI,
+			Model:    "gpt-4o-mini",
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("Threshold test message"),
+					},
+				},
+			},
+		},
+	}
+
+	// Test that custom threshold is used (this would need semantic search to be fully testable)
+	_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
+	if err != nil {
+		t.Fatalf("PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit != nil {
+		t.Fatal("Expected cache miss with high threshold, but got cache hit")
+	}
+
+	t.Log("✅ Custom threshold configuration test passed!")
+}
+
+// TestSemanticCache_ProviderModelCachingFlags verifies cache behavior with provider/model caching flags
+func TestSemanticCache_ProviderModelCachingFlags(t *testing.T) {
+	setup := NewTestSetup(t)
+	defer setup.Cleanup()
+
+	// Test with provider/model caching disabled
+	setup.Config.CacheByProvider = bifrost.Ptr(false)
+	setup.Config.CacheByModel = bifrost.Ptr(false)
+
+	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx.SetValue(CacheKey, "test-cache-enabled")
+
+	request1 := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.OpenAI,
+			Model:    "gpt-4o-mini",
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("Provider model flags test"),
+					},
+				},
+			},
+		},
+	}
+
+	// First request with OpenAI
+	_, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, request1)
+	if err != nil {
+		t.Fatalf("PreLLMHook failed: %v", err)
+	}
+
+	if shortCircuit1 != nil {
+		t.Fatal("Expected cache miss, but got cache hit")
+	}
+
+	// Cache the response
+	response := &schemas.BifrostResponse{
+		ChatResponse: &schemas.BifrostChatResponse{
+			ID: "provider-model-test",
+			Choices: []schemas.BifrostResponseChoice{
+				{
+					ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
+						Message: &schemas.ChatMessage{
+							Role: "assistant",
+							Content: &schemas.ChatMessageContent{
+								ContentStr: bifrost.Ptr("Provider model test response"),
+							},
+						},
+					},
+				},
+			},
+			ExtraFields: schemas.BifrostResponseExtraFields{
+				Provider:               schemas.OpenAI,
+				OriginalModelRequested: "gpt-4o-mini",
+				RequestType:            schemas.ChatCompletionRequest,
+			},
+		},
+	}
+
+	_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
+	if err != nil {
+		t.Fatalf("PostLLMHook failed: %v", err)
+	}
+
+	WaitForCache(setup.Plugin)
+
+	// Second request with different provider - should potentially hit cache since provider is not considered
+	request2 := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.Anthropic, // Different provider
+			Model:    "claude-3-haiku",  // Different model
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("Provider model flags test"), // Same content
+					},
+				},
+			},
+		},
+	}
+
+	ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx2.SetValue(CacheKey, "test-cache-enabled")
+
+	_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request2)
+	if err != nil {
+		t.Fatalf("Second PreLLMHook failed: %v", err)
+	}
+
+	// With provider/model caching disabled, we might get cache hits across different providers/models
+	// This behavior depends on the exact implementation of hash generation
+	t.Logf("Cache behavior with disabled provider/model flags: hit=%v", shortCircuit2 != nil)
+
+	t.Log("✅ Provider/model caching flags test passed!")
+}
+
+// TestSemanticCache_ConfigurationEdgeCases verifies edge cases in configuration handling
+func TestSemanticCache_ConfigurationEdgeCases(t *testing.T) {
+	setup := NewTestSetup(t)
+	defer setup.Cleanup()
+
+	// Test with invalid TTL type in context
+	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx.SetValue(CacheKey, "test-cache-enabled")
+	ctx.SetValue(CacheTTLKey, "not-a-duration") // Invalid TTL type
+
+	request := &schemas.BifrostRequest{
+		RequestType: schemas.ChatCompletionRequest,
+		ChatRequest: &schemas.BifrostChatRequest{
+			Provider: schemas.OpenAI,
+			Model:    "gpt-4o-mini",
+			Input: []schemas.ChatMessage{
+				{
+					Role: schemas.ChatMessageRoleUser,
+					Content: &schemas.ChatMessageContent{
+						ContentStr: bifrost.Ptr("Edge case test"),
+					},
+				},
+			},
+		},
+	}
+
+	// Should handle invalid TTL gracefully
+	_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
+	if err != nil {
+		t.Fatalf("PreLLMHook failed with invalid TTL: %v", err)
+	}
+
+	if shortCircuit != nil {
+		t.Fatal("Unexpected cache hit with invalid TTL")
+	}
+
+	// Test with invalid threshold type
+	ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
+	ctx2.SetValue(CacheKey, "test-cache-enabled")
+	ctx2.SetValue(CacheThresholdKey, "not-a-float") // Invalid threshold type
+
+	// Should handle invalid threshold gracefully
+	_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
+	if err != nil {
+		t.Fatalf("PreLLMHook failed with invalid threshold: %v", err)
+	}
+
+	if shortCircuit2 != nil {
+		t.Fatal("Unexpected cache hit with invalid threshold")
+	}
+
+	t.Log("✅ Configuration edge cases test passed!")
+}