bifrost/plugins/semanticcache/plugin_integration_test.go

package semanticcache

import (
	"context"
	"strings"
	"testing"
	"time"

	"github.com/google/uuid"
	bifrost "github.com/maximhq/bifrost/core"
	"github.com/maximhq/bifrost/core/schemas"
)

// TestSemanticCacheBasicFlow tests the complete semantic cache flow
func TestSemanticCacheBasicFlow(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx.SetValue(CacheKey, "test-cache-enabled")

	// Test request
	request := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.OpenAI,
			Model:    "gpt-4o-mini",
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("Hello, world!"),
					},
				},
			},
			Params: &schemas.ChatParameters{
				Temperature:         bifrost.Ptr(0.7),
				MaxCompletionTokens: bifrost.Ptr(100),
			},
		},
	}

	t.Log("Testing first request (cache miss)...")

	// First request - should be a cache miss
	modifiedReq, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
	if err != nil {
		t.Fatalf("PreLLMHook failed: %v", err)
	}

	if shortCircuit != nil {
		t.Fatal("Expected cache miss, but got cache hit")
	}

	if modifiedReq == nil {
		t.Fatal("Modified request is nil")
	}

	t.Log("✅ Cache miss handled correctly")

	// Simulate a response
	response := &schemas.BifrostResponse{
		ChatResponse: &schemas.BifrostChatResponse{
			ID: uuid.New().String(),
			Choices: []schemas.BifrostResponseChoice{
				{
					Index: 0,
					ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
						Message: &schemas.ChatMessage{
							Role: schemas.ChatMessageRoleAssistant,
							Content: &schemas.ChatMessageContent{
								ContentStr: bifrost.Ptr("Hello! How can I help you today?"),
							}},
					},
				},
			},
			ExtraFields: schemas.BifrostResponseExtraFields{
				Provider:               schemas.OpenAI,
				OriginalModelRequested: "gpt-4o-mini",
				RequestType:            schemas.ChatCompletionRequest,
			},
		},
	}

	// Capture original response content for comparison
	var originalContent string
	if len(response.ChatResponse.Choices) > 0 && response.ChatResponse.Choices[0].Message.Content.ContentStr != nil {
		originalContent = *response.ChatResponse.Choices[0].Message.Content.ContentStr
	}
	if originalContent == "" {
		t.Fatal("Original response content is empty")
	}
	t.Logf("Original response content: %s", originalContent)

	// Cache the response
	t.Log("Caching response...")
	_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
	if err != nil {
		t.Fatalf("PostLLMHook failed: %v", err)
	}

	// Wait for async caching to complete
	WaitForCache(setup.Plugin)
	t.Log("✅ Response cached successfully")

	// Second request - should be a cache hit
	t.Log("Testing second identical request (expecting cache hit)...")

	// Reset context for second request
	ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx2.SetValue(CacheKey, "test-cache-enabled")

	modifiedReq2, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
	if err != nil {
		t.Fatalf("Second PreLLMHook failed: %v", err)
	}

	if shortCircuit2 == nil {
		t.Fatal("expected cache hit on identical request")
		return
	}

	if shortCircuit2.Response == nil {
		t.Fatal("Cache hit but response is nil")
	}

	if modifiedReq2 == nil {
		t.Fatal("Modified request is nil on cache hit")
	}

	t.Log("✅ Cache hit detected and response returned")

	// Verify the cached response
	if len(shortCircuit2.Response.ChatResponse.Choices) == 0 {
		t.Fatal("Cached response has no choices")
	}

	cachedContent := shortCircuit2.Response.ChatResponse.Choices[0].Message.Content.ContentStr
	if cachedContent == nil || *cachedContent == "" {
		t.Fatal("Cached response content is empty")
	}

	t.Logf("✅ Cached response content: %s", *cachedContent)

	// Compare original and cached content
	cachedContentStr := *cachedContent
	// Trim whitespace and newlines for comparison
	originalContentTrimmed := strings.TrimSpace(originalContent)
	cachedContentTrimmed := strings.TrimSpace(cachedContentStr)

	if originalContentTrimmed != cachedContentTrimmed {
		t.Fatalf("❌ Content mismatch: original='%s', cached='%s'", originalContentTrimmed, cachedContentTrimmed)
	}

	t.Log("✅ Content verification passed - original and cached responses match")
	t.Log("🎉 Basic semantic cache flow test passed!")
}

// TestSemanticCacheStrictFiltering tests that the cache respects parameter differences
func TestSemanticCacheStrictFiltering(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx.SetValue(CacheKey, "test-cache-enabled")

	// Base request
	baseRequest := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.OpenAI,
			Model:    "gpt-4o-mini",
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("What is the weather like?"),
					},
				},
			},
			Params: &schemas.ChatParameters{
				Temperature:         bifrost.Ptr(0.7),
				MaxCompletionTokens: bifrost.Ptr(100),
			},
		},
	}

	t.Log("Testing first request with temperature=0.7...")

	// First request
	_, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, baseRequest)
	if err != nil {
		t.Fatalf("First PreLLMHook failed: %v", err)
	}

	if shortCircuit1 != nil {
		t.Fatal("Expected cache miss for first request")
	}

	// Cache a response
	response := &schemas.BifrostResponse{
		ChatResponse: &schemas.BifrostChatResponse{
			ID: uuid.New().String(),
			Choices: []schemas.BifrostResponseChoice{
				{
					ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
						Message: &schemas.ChatMessage{
							Role: schemas.ChatMessageRoleAssistant,
							Content: &schemas.ChatMessageContent{
								ContentStr: bifrost.Ptr("It's sunny today!"),
							}},
					},
				},
			},
			ExtraFields: schemas.BifrostResponseExtraFields{
				Provider:               schemas.OpenAI,
				OriginalModelRequested: "gpt-4o-mini",
				RequestType:            schemas.ChatCompletionRequest,
			},
		},
	}

	_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
	if err != nil {
		t.Fatalf("PostLLMHook failed: %v", err)
	}

	WaitForCache(setup.Plugin)
	t.Log("✅ First response cached")

	// Second request with different temperature - should be cache miss
	t.Log("Testing second request with temperature=0.5 (expecting cache miss)...")

	ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx2.SetValue(CacheKey, "test-cache-enabled")

	modifiedRequest := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.OpenAI,
			Model:    "gpt-4o-mini",
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("What is the weather like?"),
					},
				},
			},
			Params: &schemas.ChatParameters{
				Temperature:         bifrost.Ptr(0.5), // Different temperature
				MaxCompletionTokens: bifrost.Ptr(100),
			},
		},
	}

	_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, modifiedRequest)
	if err != nil {
		t.Fatalf("Second PreLLMHook failed: %v", err)
	}

	if shortCircuit2 != nil {
		t.Fatal("Expected cache miss due to different temperature, but got cache hit")
	}

	t.Log("✅ Strict filtering working - different parameters result in cache miss")

	// Third request with different model - should be cache miss
	t.Log("Testing third request with different model (expecting cache miss)...")

	ctx3 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx3.SetValue(CacheKey, "test-cache-enabled")

	modifiedRequest2 := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.OpenAI,
			Model:    "gpt-3.5-turbo", // Different model
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("What is the weather like?"),
					},
				},
			},
			Params: &schemas.ChatParameters{
				Temperature:         bifrost.Ptr(0.7),
				MaxCompletionTokens: bifrost.Ptr(100),
			},
		},
	}

	_, shortCircuit3, err := setup.Plugin.PreLLMHook(ctx3, modifiedRequest2)
	if err != nil {
		t.Fatalf("Third PreLLMHook failed: %v", err)
	}

	if shortCircuit3 != nil {
		t.Fatal("Expected cache miss due to different model, but got cache hit")
	}

	t.Log("✅ Strict filtering working - different model results in cache miss")
	t.Log("🎉 Strict filtering test passed!")
}

// TestSemanticCacheStreamingFlow tests streaming response caching
func TestSemanticCacheStreamingFlow(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx.SetValue(CacheKey, "test-cache-enabled")

	request := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionStreamRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.OpenAI,
			Model:    "gpt-4o-mini",
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("Tell me a short story"),
					},
				},
			},
			Params: &schemas.ChatParameters{
				Temperature: bifrost.Ptr(0.8),
			},
		},
	}

	t.Log("Testing streaming request (cache miss)...")

	// First request - should be cache miss
	_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
	if err != nil {
		t.Fatalf("PreLLMHook failed: %v", err)
	}

	if shortCircuit != nil {
		t.Fatal("Expected cache miss for streaming request")
	}

	t.Log("✅ Streaming cache miss handled correctly")

	// Simulate streaming response chunks
	t.Log("Caching streaming response chunks...")

	chunks := []string{
		"Once upon a time,",
		" there was a brave",
		" knight who saved the day.",
	}

	for i, chunk := range chunks {
		var finishReason *string
		if i == len(chunks)-1 {
			finishReason = bifrost.Ptr("stop")
		}

		chunkResponse := &schemas.BifrostResponse{
			ChatResponse: &schemas.BifrostChatResponse{
				ID: uuid.New().String(),
				Choices: []schemas.BifrostResponseChoice{
					{
						Index:        i,
						FinishReason: finishReason,
						ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{
							Delta: &schemas.ChatStreamResponseChoiceDelta{
								Content: bifrost.Ptr(chunk),
							},
						},
					},
				},
				ExtraFields: schemas.BifrostResponseExtraFields{
					Provider:               schemas.OpenAI,
					OriginalModelRequested: "gpt-4o-mini",
					RequestType:            schemas.ChatCompletionStreamRequest,
					ChunkIndex:             i,
				},
			},
		}

		_, _, err = setup.Plugin.PostLLMHook(ctx, chunkResponse, nil)
		if err != nil {
			t.Fatalf("PostLLMHook failed for chunk %d: %v", i, err)
		}
	}

	WaitForCache(setup.Plugin)
	t.Log("✅ Streaming response chunks cached")

	// Test cache retrieval for streaming
	t.Log("Testing streaming cache retrieval...")

	ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx2.SetValue(CacheKey, "test-cache-enabled")

	_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
	if err != nil {
		t.Fatalf("Second PreLLMHook failed: %v", err)
	}

	if shortCircuit2 == nil {
		t.Log("⚠️ Expected streaming cache hit, but got cache miss - this may be expected with the new unified storage")
		return
	}

	if shortCircuit2.Stream == nil {
		t.Fatal("Cache hit but stream is nil")
	}

	t.Log("✅ Streaming cache hit detected")

	// Read from the cached stream
	chunkCount := 0
	for chunk := range shortCircuit2.Stream {
		if chunk.BifrostChatResponse == nil {
			continue
		}
		chunkCount++
		t.Logf("Received cached chunk %d", chunkCount)
	}

	if chunkCount == 0 {
		t.Fatal("No chunks received from cached stream")
	}

	t.Logf("✅ Received %d cached chunks", chunkCount)
	t.Log("🎉 Streaming cache test passed!")
}

// TestSemanticCache_NoCacheWhenKeyMissing verifies cache is disabled when cache key is missing from context
func TestSemanticCache_NoCacheWhenKeyMissing(t *testing.T) {
	t.Log("Testing cache behavior when cache key is missing...")

	setup := NewTestSetup(t)
	defer setup.Cleanup()

	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	// Don't set the cache key - cache should be disabled

	request := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.OpenAI,
			Model:    "gpt-4o-mini",
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("Test message"),
					},
				},
			},
		},
	}

	_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
	if err != nil {
		t.Fatalf("PreLLMHook failed: %v", err)
	}

	if shortCircuit != nil {
		t.Fatal("Expected no caching when cache key is not set, but got cache hit")
	}

	t.Log("✅ Cache properly disabled when no cache key is set")
	t.Log("🎉 No cache key test passed!")
}

// TestSemanticCache_CustomTTLHandling verifies cache respects custom TTL values from context
func TestSemanticCache_CustomTTLHandling(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	// Configure plugin with custom TTL key
	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx.SetValue(CacheKey, "test-cache-enabled")
	ctx.SetValue(CacheTTLKey, 1*time.Minute) // Custom TTL

	request := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.OpenAI,
			Model:    "gpt-4o-mini",
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("TTL test message"),
					},
				},
			},
		},
	}

	// First request - cache miss
	_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
	if err != nil {
		t.Fatalf("PreLLMHook failed: %v", err)
	}

	if shortCircuit != nil {
		t.Fatal("Expected cache miss, but got cache hit")
	}

	// Simulate response and cache it
	response := &schemas.BifrostResponse{
		ChatResponse: &schemas.BifrostChatResponse{
			ID: "ttl-test-response",
			Choices: []schemas.BifrostResponseChoice{
				{
					ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
						Message: &schemas.ChatMessage{
							Role: "assistant",
							Content: &schemas.ChatMessageContent{
								ContentStr: bifrost.Ptr("TTL test response"),
							},
						},
					},
				},
			},
			ExtraFields: schemas.BifrostResponseExtraFields{
				Provider:               schemas.OpenAI,
				OriginalModelRequested: "gpt-4o-mini",
				RequestType:            schemas.ChatCompletionRequest,
			},
		},
	}

	_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
	if err != nil {
		t.Fatalf("PostLLMHook failed: %v", err)
	}

	WaitForCache(setup.Plugin)

	t.Log("✅ Custom TTL configuration test passed!")
}

// TestSemanticCache_CustomThresholdHandling verifies cache respects custom similarity threshold from context
func TestSemanticCache_CustomThresholdHandling(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	// Configure plugin with custom threshold key
	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx.SetValue(CacheKey, "test-cache-enabled")
	ctx.SetValue(CacheThresholdKey, 0.95) // Very high threshold

	request := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.OpenAI,
			Model:    "gpt-4o-mini",
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("Threshold test message"),
					},
				},
			},
		},
	}

	// Test that custom threshold is used (this would need semantic search to be fully testable)
	_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
	if err != nil {
		t.Fatalf("PreLLMHook failed: %v", err)
	}

	if shortCircuit != nil {
		t.Fatal("Expected cache miss with high threshold, but got cache hit")
	}

	t.Log("✅ Custom threshold configuration test passed!")
}

// TestSemanticCache_ProviderModelCachingFlags verifies cache behavior with provider/model caching flags
func TestSemanticCache_ProviderModelCachingFlags(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	// Test with provider/model caching disabled
	setup.Config.CacheByProvider = bifrost.Ptr(false)
	setup.Config.CacheByModel = bifrost.Ptr(false)

	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx.SetValue(CacheKey, "test-cache-enabled")

	request1 := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.OpenAI,
			Model:    "gpt-4o-mini",
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("Provider model flags test"),
					},
				},
			},
		},
	}

	// First request with OpenAI
	_, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, request1)
	if err != nil {
		t.Fatalf("PreLLMHook failed: %v", err)
	}

	if shortCircuit1 != nil {
		t.Fatal("Expected cache miss, but got cache hit")
	}

	// Cache the response
	response := &schemas.BifrostResponse{
		ChatResponse: &schemas.BifrostChatResponse{
			ID: "provider-model-test",
			Choices: []schemas.BifrostResponseChoice{
				{
					ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
						Message: &schemas.ChatMessage{
							Role: "assistant",
							Content: &schemas.ChatMessageContent{
								ContentStr: bifrost.Ptr("Provider model test response"),
							},
						},
					},
				},
			},
			ExtraFields: schemas.BifrostResponseExtraFields{
				Provider:               schemas.OpenAI,
				OriginalModelRequested: "gpt-4o-mini",
				RequestType:            schemas.ChatCompletionRequest,
			},
		},
	}

	_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
	if err != nil {
		t.Fatalf("PostLLMHook failed: %v", err)
	}

	WaitForCache(setup.Plugin)

	// Second request with different provider - should potentially hit cache since provider is not considered
	request2 := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.Anthropic, // Different provider
			Model:    "claude-3-haiku",  // Different model
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("Provider model flags test"), // Same content
					},
				},
			},
		},
	}

	ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx2.SetValue(CacheKey, "test-cache-enabled")

	_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request2)
	if err != nil {
		t.Fatalf("Second PreLLMHook failed: %v", err)
	}

	// With provider/model caching disabled, we might get cache hits across different providers/models
	// This behavior depends on the exact implementation of hash generation
	t.Logf("Cache behavior with disabled provider/model flags: hit=%v", shortCircuit2 != nil)

	t.Log("✅ Provider/model caching flags test passed!")
}

// TestSemanticCache_ConfigurationEdgeCases verifies edge cases in configuration handling
func TestSemanticCache_ConfigurationEdgeCases(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	// Test with invalid TTL type in context
	ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx.SetValue(CacheKey, "test-cache-enabled")
	ctx.SetValue(CacheTTLKey, "not-a-duration") // Invalid TTL type

	request := &schemas.BifrostRequest{
		RequestType: schemas.ChatCompletionRequest,
		ChatRequest: &schemas.BifrostChatRequest{
			Provider: schemas.OpenAI,
			Model:    "gpt-4o-mini",
			Input: []schemas.ChatMessage{
				{
					Role: schemas.ChatMessageRoleUser,
					Content: &schemas.ChatMessageContent{
						ContentStr: bifrost.Ptr("Edge case test"),
					},
				},
			},
		},
	}

	// Should handle invalid TTL gracefully
	_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
	if err != nil {
		t.Fatalf("PreLLMHook failed with invalid TTL: %v", err)
	}

	if shortCircuit != nil {
		t.Fatal("Unexpected cache hit with invalid TTL")
	}

	// Test with invalid threshold type
	ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
	ctx2.SetValue(CacheKey, "test-cache-enabled")
	ctx2.SetValue(CacheThresholdKey, "not-a-float") // Invalid threshold type

	// Should handle invalid threshold gracefully
	_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
	if err != nil {
		t.Fatalf("PreLLMHook failed with invalid threshold: %v", err)
	}

	if shortCircuit2 != nil {
		t.Fatal("Unexpected cache hit with invalid threshold")
	}

	t.Log("✅ Configuration edge cases test passed!")
}