first commit
This commit is contained in:
428
plugins/semanticcache/plugin_vectorstore_test.go
Normal file
428
plugins/semanticcache/plugin_vectorstore_test.go
Normal file
@@ -0,0 +1,428 @@
|
||||
package semanticcache
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/uuid"
|
||||
bifrost "github.com/maximhq/bifrost/core"
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
"github.com/maximhq/bifrost/framework/vectorstore"
|
||||
)
|
||||
|
||||
// requiresVectors returns true if the vector store requires vectors for storage.
|
||||
// Some stores (like Qdrant, Pinecone, and Weaviate) require vectors for all entries,
|
||||
// while others (like Redis) can store metadata without vectors.
|
||||
func requiresVectors(storeType vectorstore.VectorStoreType) bool {
|
||||
switch storeType {
|
||||
case vectorstore.VectorStoreTypeQdrant, vectorstore.VectorStoreTypePinecone, vectorstore.VectorStoreTypeWeaviate:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// skipIfNoAPIKey skips the test if OPENAI_API_KEY is not set and the store requires vectors.
|
||||
func skipIfNoAPIKey(t *testing.T, storeType vectorstore.VectorStoreType) {
|
||||
if requiresVectors(storeType) && os.Getenv("OPENAI_API_KEY") == "" {
|
||||
t.Skipf("Skipping %s test: OPENAI_API_KEY not set (required for embedding generation)", storeType)
|
||||
}
|
||||
}
|
||||
|
||||
// VectorStoreTestCase defines a test case for a specific vector store
|
||||
type VectorStoreTestCase struct {
|
||||
Name string
|
||||
StoreType vectorstore.VectorStoreType
|
||||
}
|
||||
|
||||
// getVectorStoreTestCases returns all vector store test cases
|
||||
func getVectorStoreTestCases() []VectorStoreTestCase {
|
||||
return []VectorStoreTestCase{
|
||||
{"Weaviate", vectorstore.VectorStoreTypeWeaviate},
|
||||
{"Redis", vectorstore.VectorStoreTypeRedis},
|
||||
{"Qdrant", vectorstore.VectorStoreTypeQdrant},
|
||||
{"Pinecone", vectorstore.VectorStoreTypePinecone},
|
||||
}
|
||||
}
|
||||
|
||||
// getDefaultTestConfig returns the default test configuration
|
||||
func getDefaultTestConfig() *Config {
|
||||
return &Config{
|
||||
Provider: schemas.OpenAI,
|
||||
EmbeddingModel: "text-embedding-3-small",
|
||||
Dimension: 1536,
|
||||
Threshold: 0.8,
|
||||
CleanUpOnShutdown: true,
|
||||
Keys: []schemas.Key{
|
||||
{
|
||||
Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"),
|
||||
Models: schemas.WhiteList{"*"},
|
||||
Weight: 1.0,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// TestSemanticCache_AllVectorStores_BasicFlow tests the basic cache flow across all vector stores
|
||||
func TestSemanticCache_AllVectorStores_BasicFlow(t *testing.T) {
|
||||
for _, tc := range getVectorStoreTestCases() {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
skipIfNoAPIKey(t, tc.StoreType)
|
||||
setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType)
|
||||
defer setup.Cleanup()
|
||||
|
||||
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
|
||||
ctx.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-basic")
|
||||
|
||||
// Test request
|
||||
request := &schemas.BifrostRequest{
|
||||
RequestType: schemas.ChatCompletionRequest,
|
||||
ChatRequest: &schemas.BifrostChatRequest{
|
||||
Provider: schemas.OpenAI,
|
||||
Model: "gpt-4o-mini",
|
||||
Input: []schemas.ChatMessage{
|
||||
{
|
||||
Role: schemas.ChatMessageRoleUser,
|
||||
Content: &schemas.ChatMessageContent{
|
||||
ContentStr: bifrost.Ptr("Hello from " + tc.Name + " test!"),
|
||||
},
|
||||
},
|
||||
},
|
||||
Params: &schemas.ChatParameters{
|
||||
Temperature: bifrost.Ptr(0.7),
|
||||
MaxCompletionTokens: bifrost.Ptr(100),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
t.Logf("[%s] Testing first request (cache miss)...", tc.Name)
|
||||
|
||||
// First request - should be a cache miss
|
||||
modifiedReq, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
|
||||
if err != nil {
|
||||
t.Fatalf("[%s] PreHook failed: %v", tc.Name, err)
|
||||
}
|
||||
|
||||
if shortCircuit != nil {
|
||||
t.Fatalf("[%s] Expected cache miss, but got cache hit", tc.Name)
|
||||
}
|
||||
|
||||
if modifiedReq == nil {
|
||||
t.Fatalf("[%s] Modified request is nil", tc.Name)
|
||||
}
|
||||
|
||||
t.Logf("[%s] Cache miss handled correctly", tc.Name)
|
||||
|
||||
// Simulate a response
|
||||
response := &schemas.BifrostResponse{
|
||||
ChatResponse: &schemas.BifrostChatResponse{
|
||||
ID: uuid.New().String(),
|
||||
Choices: []schemas.BifrostResponseChoice{
|
||||
{
|
||||
Index: 0,
|
||||
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
|
||||
Message: &schemas.ChatMessage{
|
||||
Role: schemas.ChatMessageRoleAssistant,
|
||||
Content: &schemas.ChatMessageContent{
|
||||
ContentStr: bifrost.Ptr("Hello! Response from " + tc.Name + " test."),
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
ExtraFields: schemas.BifrostResponseExtraFields{
|
||||
Provider: schemas.OpenAI,
|
||||
OriginalModelRequested: "gpt-4o-mini",
|
||||
RequestType: schemas.ChatCompletionRequest,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Cache the response
|
||||
t.Logf("[%s] Caching response...", tc.Name)
|
||||
_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("[%s] PostHook failed: %v", tc.Name, err)
|
||||
}
|
||||
|
||||
// Wait for async caching to complete
|
||||
WaitForCache(setup.Plugin)
|
||||
t.Logf("[%s] Response cached successfully", tc.Name)
|
||||
|
||||
// Second request - should be a cache hit
|
||||
t.Logf("[%s] Testing second identical request (expecting cache hit)...", tc.Name)
|
||||
|
||||
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
|
||||
ctx2.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-basic")
|
||||
|
||||
_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
|
||||
if err != nil {
|
||||
t.Fatalf("[%s] Second PreHook failed: %v", tc.Name, err)
|
||||
}
|
||||
|
||||
if shortCircuit2 == nil {
|
||||
t.Fatalf("[%s] Expected cache hit on identical request, but got cache miss", tc.Name)
|
||||
}
|
||||
|
||||
if shortCircuit2.Response == nil {
|
||||
t.Fatalf("[%s] Cache hit but response is nil", tc.Name)
|
||||
}
|
||||
|
||||
t.Logf("[%s] Cache hit detected and response returned", tc.Name)
|
||||
t.Logf("[%s] Basic flow test passed!", tc.Name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestSemanticCache_AllVectorStores_DirectHashMatch tests direct hash matching across all vector stores
|
||||
func TestSemanticCache_AllVectorStores_DirectHashMatch(t *testing.T) {
|
||||
for _, tc := range getVectorStoreTestCases() {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
skipIfNoAPIKey(t, tc.StoreType)
|
||||
setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType)
|
||||
defer setup.Cleanup()
|
||||
|
||||
// Use unique cache key per test run to avoid stale data from previous runs
|
||||
// (Pinecone Local doesn't support deletion by metadata filter)
|
||||
testRunID := uuid.New().String()[:8]
|
||||
cacheKey := "test-" + strings.ToLower(tc.Name) + "-direct-" + testRunID
|
||||
|
||||
ctx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect)
|
||||
|
||||
testRequest := CreateBasicChatRequest("Direct hash test for "+tc.Name+" "+testRunID, 0.7, 50)
|
||||
|
||||
t.Logf("[%s] Making first request to populate cache...", tc.Name)
|
||||
response1, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest)
|
||||
if err1 != nil {
|
||||
t.Skipf("[%s] First request failed (likely no API key): %v", tc.Name, err1)
|
||||
return
|
||||
}
|
||||
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
|
||||
|
||||
WaitForCache(setup.Plugin)
|
||||
|
||||
// Second request with direct-only cache type
|
||||
ctx2 := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect)
|
||||
|
||||
t.Logf("[%s] Making second request with CacheTypeDirect...", tc.Name)
|
||||
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
|
||||
if err2 != nil {
|
||||
t.Fatalf("[%s] Second request failed: %v", tc.Name, err2.Error.Message)
|
||||
}
|
||||
|
||||
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
|
||||
t.Logf("[%s] Direct hash match test passed!", tc.Name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestSemanticCache_AllVectorStores_NamespaceIsolation tests that different cache keys are isolated
|
||||
func TestSemanticCache_AllVectorStores_NamespaceIsolation(t *testing.T) {
|
||||
for _, tc := range getVectorStoreTestCases() {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
skipIfNoAPIKey(t, tc.StoreType)
|
||||
setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType)
|
||||
defer setup.Cleanup()
|
||||
|
||||
// Use unique cache keys per test run to avoid stale data from previous runs
|
||||
// (Pinecone Local doesn't support deletion by metadata filter)
|
||||
testRunID := uuid.New().String()[:8]
|
||||
cacheKey1 := "test-" + strings.ToLower(tc.Name) + "-namespace-1-" + testRunID
|
||||
cacheKey2 := "test-" + strings.ToLower(tc.Name) + "-namespace-2-" + testRunID
|
||||
|
||||
// Cache with first key
|
||||
ctx1 := CreateContextWithCacheKey(cacheKey1)
|
||||
testRequest := CreateBasicChatRequest("Namespace isolation test for "+tc.Name+" "+testRunID, 0.7, 50)
|
||||
|
||||
t.Logf("[%s] Making request with cache key 1...", tc.Name)
|
||||
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
|
||||
if err1 != nil {
|
||||
t.Skipf("[%s] First request failed (likely no API key): %v", tc.Name, err1)
|
||||
return
|
||||
}
|
||||
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
|
||||
|
||||
WaitForCache(setup.Plugin)
|
||||
|
||||
// Try with different cache key - should miss
|
||||
ctx2 := CreateContextWithCacheKey(cacheKey2)
|
||||
|
||||
t.Logf("[%s] Making same request with different cache key (expecting miss)...", tc.Name)
|
||||
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
|
||||
if err2 != nil {
|
||||
t.Fatalf("[%s] Second request failed: %v", tc.Name, err2.Error.Message)
|
||||
}
|
||||
|
||||
// Should be a cache miss because different namespace
|
||||
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2})
|
||||
|
||||
// Try with original key - should hit
|
||||
ctx3 := CreateContextWithCacheKey(cacheKey1)
|
||||
|
||||
t.Logf("[%s] Making same request with original cache key (expecting hit)...", tc.Name)
|
||||
response3, err3 := setup.Client.ChatCompletionRequest(ctx3, testRequest)
|
||||
if err3 != nil {
|
||||
t.Fatalf("[%s] Third request failed: %v", tc.Name, err3.Error.Message)
|
||||
}
|
||||
|
||||
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "direct")
|
||||
t.Logf("[%s] Namespace isolation test passed!", tc.Name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestSemanticCache_AllVectorStores_ParameterFiltering tests that different parameters don't share cache
|
||||
func TestSemanticCache_AllVectorStores_ParameterFiltering(t *testing.T) {
|
||||
for _, tc := range getVectorStoreTestCases() {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
skipIfNoAPIKey(t, tc.StoreType)
|
||||
setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType)
|
||||
defer setup.Cleanup()
|
||||
|
||||
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
|
||||
ctx.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-params")
|
||||
|
||||
// First request with temperature=0.7
|
||||
request1 := &schemas.BifrostRequest{
|
||||
RequestType: schemas.ChatCompletionRequest,
|
||||
ChatRequest: &schemas.BifrostChatRequest{
|
||||
Provider: schemas.OpenAI,
|
||||
Model: "gpt-4o-mini",
|
||||
Input: []schemas.ChatMessage{
|
||||
{
|
||||
Role: schemas.ChatMessageRoleUser,
|
||||
Content: &schemas.ChatMessageContent{
|
||||
ContentStr: bifrost.Ptr("Parameter test for " + tc.Name),
|
||||
},
|
||||
},
|
||||
},
|
||||
Params: &schemas.ChatParameters{
|
||||
Temperature: bifrost.Ptr(0.7),
|
||||
MaxCompletionTokens: bifrost.Ptr(100),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
t.Logf("[%s] Testing first request with temperature=0.7...", tc.Name)
|
||||
|
||||
_, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, request1)
|
||||
if err != nil {
|
||||
t.Fatalf("[%s] First PreHook failed: %v", tc.Name, err)
|
||||
}
|
||||
|
||||
if shortCircuit1 != nil {
|
||||
t.Fatalf("[%s] Expected cache miss for first request", tc.Name)
|
||||
}
|
||||
|
||||
// Cache a response
|
||||
response := &schemas.BifrostResponse{
|
||||
ChatResponse: &schemas.BifrostChatResponse{
|
||||
ID: uuid.New().String(),
|
||||
Choices: []schemas.BifrostResponseChoice{
|
||||
{
|
||||
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
|
||||
Message: &schemas.ChatMessage{
|
||||
Role: schemas.ChatMessageRoleAssistant,
|
||||
Content: &schemas.ChatMessageContent{
|
||||
ContentStr: bifrost.Ptr("Response for " + tc.Name),
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
ExtraFields: schemas.BifrostResponseExtraFields{
|
||||
Provider: schemas.OpenAI,
|
||||
OriginalModelRequested: "gpt-4o-mini",
|
||||
RequestType: schemas.ChatCompletionRequest,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("[%s] PostHook failed: %v", tc.Name, err)
|
||||
}
|
||||
|
||||
WaitForCache(setup.Plugin)
|
||||
t.Logf("[%s] First response cached", tc.Name)
|
||||
|
||||
// Second request with different temperature - should be cache miss
|
||||
t.Logf("[%s] Testing second request with temperature=0.5 (expecting cache miss)...", tc.Name)
|
||||
|
||||
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
|
||||
ctx2.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-params")
|
||||
|
||||
request2 := &schemas.BifrostRequest{
|
||||
RequestType: schemas.ChatCompletionRequest,
|
||||
ChatRequest: &schemas.BifrostChatRequest{
|
||||
Provider: schemas.OpenAI,
|
||||
Model: "gpt-4o-mini",
|
||||
Input: []schemas.ChatMessage{
|
||||
{
|
||||
Role: schemas.ChatMessageRoleUser,
|
||||
Content: &schemas.ChatMessageContent{
|
||||
ContentStr: bifrost.Ptr("Parameter test for " + tc.Name),
|
||||
},
|
||||
},
|
||||
},
|
||||
Params: &schemas.ChatParameters{
|
||||
Temperature: bifrost.Ptr(0.5), // Different temperature
|
||||
MaxCompletionTokens: bifrost.Ptr(100),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request2)
|
||||
if err != nil {
|
||||
t.Fatalf("[%s] Second PreHook failed: %v", tc.Name, err)
|
||||
}
|
||||
|
||||
if shortCircuit2 != nil {
|
||||
t.Fatalf("[%s] Expected cache miss due to different temperature, but got cache hit", tc.Name)
|
||||
}
|
||||
|
||||
t.Logf("[%s] Parameter filtering test passed!", tc.Name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestSemanticCache_AllVectorStores_EmbeddingRequest tests embedding request caching across all vector stores
|
||||
func TestSemanticCache_AllVectorStores_EmbeddingRequest(t *testing.T) {
|
||||
for _, tc := range getVectorStoreTestCases() {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
skipIfNoAPIKey(t, tc.StoreType)
|
||||
setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType)
|
||||
defer setup.Cleanup()
|
||||
|
||||
// Use unique cache key per test run to avoid stale data from previous runs
|
||||
// (Pinecone Local doesn't support deletion by metadata filter)
|
||||
testRunID := uuid.New().String()[:8]
|
||||
cacheKey := "test-" + strings.ToLower(tc.Name) + "-embedding-" + testRunID
|
||||
|
||||
embeddingRequest := CreateEmbeddingRequest([]string{"Test embedding with " + tc.Name + " " + testRunID})
|
||||
|
||||
// Cache first request
|
||||
ctx1 := CreateContextWithCacheKey(cacheKey)
|
||||
t.Logf("[%s] Making first embedding request...", tc.Name)
|
||||
response1, err1 := setup.Client.EmbeddingRequest(ctx1, embeddingRequest)
|
||||
if err1 != nil {
|
||||
t.Skipf("[%s] First embedding request failed (likely no API key): %v", tc.Name, err1)
|
||||
return
|
||||
}
|
||||
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response1})
|
||||
|
||||
WaitForCache(setup.Plugin)
|
||||
|
||||
// Second request - should be cache hit
|
||||
ctx2 := CreateContextWithCacheKey(cacheKey)
|
||||
t.Logf("[%s] Making second embedding request (expecting cache hit)...", tc.Name)
|
||||
response2, err2 := setup.Client.EmbeddingRequest(ctx2, embeddingRequest)
|
||||
if err2 != nil {
|
||||
t.Fatalf("[%s] Second embedding request failed: %v", tc.Name, err2.Error.Message)
|
||||
}
|
||||
AssertCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2}, "direct")
|
||||
|
||||
t.Logf("[%s] Embedding request caching test passed!", tc.Name)
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user