452 lines
15 KiB
Go
452 lines
15 KiB
Go
package semanticcache
|
|
|
|
import (
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/maximhq/bifrost/core/schemas"
|
|
)
|
|
|
|
// TestResponsesAPIBasicFunctionality tests the core caching functionality with Responses API
|
|
func TestResponsesAPIBasicFunctionality(t *testing.T) {
|
|
setup := NewTestSetup(t)
|
|
defer setup.Cleanup()
|
|
|
|
ctx := CreateContextWithCacheKey("test-responses-basic")
|
|
|
|
// Create test request
|
|
testRequest := CreateBasicResponsesRequest(
|
|
"What is Bifrost? Answer in one short sentence.",
|
|
0.7,
|
|
500,
|
|
)
|
|
|
|
t.Log("Making first Responses API request (should go to OpenAI and be cached)...")
|
|
|
|
// Make first request (will go to OpenAI and be cached) - with retries
|
|
start1 := time.Now()
|
|
response1, err1 := setup.Client.ResponsesRequest(ctx, testRequest)
|
|
duration1 := time.Since(start1)
|
|
|
|
if err1 != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
|
|
if response1 == nil || len(response1.Output) == 0 {
|
|
t.Fatal("First Responses response is invalid")
|
|
}
|
|
|
|
t.Logf("First request completed in %v", duration1)
|
|
t.Logf("Response contains %d output messages", len(response1.Output))
|
|
if c := response1.Output[0].Content; c != nil && c.ContentStr != nil {
|
|
t.Logf("Response: %s", *c.ContentStr)
|
|
} else if c != nil && len(c.ContentBlocks) > 0 && c.ContentBlocks[0].Text != nil {
|
|
t.Logf("Response: %s", *c.ContentBlocks[0].Text)
|
|
} else {
|
|
t.Log("Response: <no text>")
|
|
}
|
|
|
|
// Wait for cache to be written
|
|
WaitForCache(setup.Plugin)
|
|
|
|
t.Log("Making second identical Responses API request (should be served from cache)...")
|
|
|
|
// Make second identical request (should be cached)
|
|
start2 := time.Now()
|
|
response2, err2 := setup.Client.ResponsesRequest(ctx, testRequest)
|
|
duration2 := time.Since(start2)
|
|
|
|
if err2 != nil {
|
|
t.Fatalf("Second Responses request failed: %v", err2)
|
|
}
|
|
|
|
if response2 == nil || len(response2.Output) == 0 {
|
|
t.Fatal("Second Responses response is invalid")
|
|
}
|
|
if response2.Output[0].Content.ContentStr != nil {
|
|
t.Logf("Response: %s", *response2.Output[0].Content.ContentStr)
|
|
} else {
|
|
t.Logf("Response: %v", *response2.Output[0].Content.ContentBlocks[0].Text)
|
|
}
|
|
|
|
t.Logf("Second request completed in %v", duration2)
|
|
|
|
// Verify cache hit
|
|
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, string(CacheTypeDirect))
|
|
|
|
// Performance comparison
|
|
t.Logf("Performance Summary:")
|
|
t.Logf("First request (OpenAI): %v", duration1)
|
|
t.Logf("Second request (Cache): %v", duration2)
|
|
|
|
if duration2 >= duration1 {
|
|
t.Log("⚠️ Cache doesn't seem faster, but this could be due to test environment")
|
|
}
|
|
|
|
// Verify provider information is maintained in cached response
|
|
if response2.ExtraFields.Provider != testRequest.Provider {
|
|
t.Errorf("Provider mismatch in cached response: expected %s, got %s",
|
|
testRequest.Provider, response2.ExtraFields.Provider)
|
|
}
|
|
|
|
t.Log("✅ Basic Responses API semantic caching test completed successfully!")
|
|
}
|
|
|
|
// TestResponsesAPIDifferentParameters tests that different parameters produce different cache entries
|
|
func TestResponsesAPIDifferentParameters(t *testing.T) {
|
|
setup := NewTestSetup(t)
|
|
defer setup.Cleanup()
|
|
|
|
ctx := CreateContextWithCacheKey("test-responses-params")
|
|
basePrompt := "Explain quantum computing"
|
|
|
|
tests := []struct {
|
|
name string
|
|
request1 *schemas.BifrostResponsesRequest
|
|
request2 *schemas.BifrostResponsesRequest
|
|
shouldCache bool
|
|
}{
|
|
{
|
|
name: "Identical Requests",
|
|
request1: CreateBasicResponsesRequest(basePrompt, 0.5, 500),
|
|
request2: CreateBasicResponsesRequest(basePrompt, 0.5, 500),
|
|
shouldCache: true,
|
|
},
|
|
{
|
|
name: "Different Temperature",
|
|
request1: CreateBasicResponsesRequest(basePrompt, 0.1, 500),
|
|
request2: CreateBasicResponsesRequest(basePrompt, 0.9, 500),
|
|
shouldCache: false,
|
|
},
|
|
{
|
|
name: "Different MaxOutputTokens",
|
|
request1: CreateBasicResponsesRequest(basePrompt, 0.5, 500),
|
|
request2: CreateBasicResponsesRequest(basePrompt, 0.5, 200),
|
|
shouldCache: false,
|
|
},
|
|
{
|
|
name: "Different Instructions",
|
|
request1: CreateResponsesRequestWithInstructions(basePrompt, "Be concise", 0.5, 500),
|
|
request2: CreateResponsesRequestWithInstructions(basePrompt, "Be detailed", 0.5, 500),
|
|
shouldCache: false,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
// Clear cache for this subtest
|
|
clearTestKeysWithStore(t, setup.Store)
|
|
|
|
// Make first request
|
|
_, err1 := setup.Client.ResponsesRequest(ctx, tt.request1)
|
|
if err1 != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
|
|
WaitForCache(setup.Plugin)
|
|
|
|
// Make second request
|
|
response2, err2 := setup.Client.ResponsesRequest(ctx, tt.request2)
|
|
if err2 != nil {
|
|
if err2.Error != nil {
|
|
t.Fatalf("Second request failed: %v", err2.Error.Message)
|
|
} else {
|
|
t.Fatalf("Second request failed: %v", err2)
|
|
}
|
|
}
|
|
|
|
if tt.shouldCache {
|
|
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
|
|
t.Log("✓ Parameters match: cache hit as expected")
|
|
} else {
|
|
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2})
|
|
t.Log("✓ Parameters differ: no cache hit as expected")
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestResponsesAPISemanticMatching tests semantic similarity matching with Responses API
|
|
func TestResponsesAPISemanticMatching(t *testing.T) {
|
|
setup := NewTestSetup(t)
|
|
defer setup.Cleanup()
|
|
|
|
ctx := CreateContextWithCacheKeyAndType("test-responses-semantic", CacheTypeSemantic)
|
|
|
|
// First request
|
|
originalRequest := CreateBasicResponsesRequest("What is machine learning?", 0.5, 500)
|
|
t.Log("Making first Responses request with original text...")
|
|
response1, err1 := setup.Client.ResponsesRequest(ctx, originalRequest)
|
|
if err1 != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
|
|
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
|
|
WaitForCache(setup.Plugin)
|
|
|
|
// Test semantic match with similar but different text
|
|
semanticRequest := CreateBasicResponsesRequest("Can you explain machine learning concepts?", 0.5, 500)
|
|
t.Log("Making semantically similar Responses request...")
|
|
response2, err2 := setup.Client.ResponsesRequest(ctx, semanticRequest)
|
|
if err2 != nil {
|
|
if err2.Error != nil {
|
|
t.Fatalf("Second request failed: %v", err2.Error.Message)
|
|
} else {
|
|
t.Fatalf("Second request failed: %v", err2)
|
|
}
|
|
}
|
|
|
|
// This should be a semantic cache hit
|
|
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "semantic")
|
|
t.Log("✓ Semantic cache hit with similar content")
|
|
}
|
|
|
|
// TestResponsesAPIWithInstructions tests caching with system instructions
|
|
func TestResponsesAPIWithInstructions(t *testing.T) {
|
|
setup := NewTestSetup(t)
|
|
defer setup.Cleanup()
|
|
|
|
ctx := CreateContextWithCacheKey("test-responses-instructions")
|
|
|
|
// Create request with instructions
|
|
request1 := CreateResponsesRequestWithInstructions(
|
|
"Explain artificial intelligence",
|
|
"You are a helpful assistant. Be concise and accurate.",
|
|
0.7,
|
|
500,
|
|
)
|
|
|
|
t.Log("Making first Responses request with instructions...")
|
|
response1, err1 := setup.Client.ResponsesRequest(ctx, request1)
|
|
if err1 != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
|
|
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
|
|
WaitForCache(setup.Plugin)
|
|
|
|
// Make identical request
|
|
request2 := CreateResponsesRequestWithInstructions(
|
|
"Explain artificial intelligence",
|
|
"You are a helpful assistant. Be concise and accurate.",
|
|
0.7,
|
|
500,
|
|
)
|
|
|
|
t.Log("Making second identical Responses request with instructions...")
|
|
response2, err2 := setup.Client.ResponsesRequest(ctx, request2)
|
|
if err2 != nil {
|
|
if err2.Error != nil {
|
|
t.Fatalf("Second request failed: %v", err2.Error.Message)
|
|
} else {
|
|
t.Fatalf("Second request failed: %v", err2)
|
|
}
|
|
}
|
|
|
|
// Should be a cache hit
|
|
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
|
|
t.Log("✓ Responses API with instructions cached correctly")
|
|
}
|
|
|
|
// TestResponsesAPICacheExpiration tests TTL functionality for Responses API requests
|
|
func TestResponsesAPICacheExpiration(t *testing.T) {
|
|
setup := NewTestSetup(t)
|
|
defer setup.Cleanup()
|
|
|
|
// Set very short TTL for testing
|
|
shortTTL := 5 * time.Second
|
|
ctx := CreateContextWithCacheKeyAndTTL("test-responses-ttl", shortTTL)
|
|
|
|
responsesRequest := CreateBasicResponsesRequest("TTL test for Responses API", 0.5, 500)
|
|
|
|
t.Log("Making first Responses request with short TTL...")
|
|
response1, err1 := setup.Client.ResponsesRequest(ctx, responsesRequest)
|
|
if err1 != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
|
|
|
|
WaitForCache(setup.Plugin)
|
|
|
|
t.Log("Making second Responses request before TTL expiration...")
|
|
response2, err2 := setup.Client.ResponsesRequest(ctx, responsesRequest)
|
|
if err2 != nil {
|
|
if err2.Error != nil {
|
|
t.Fatalf("Second request failed: %v", err2.Error.Message)
|
|
} else {
|
|
t.Fatalf("Second request failed: %v", err2)
|
|
}
|
|
}
|
|
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
|
|
|
|
t.Logf("Waiting for TTL expiration (%v)...", shortTTL)
|
|
time.Sleep(shortTTL + 2*time.Second) // Wait for TTL to expire
|
|
|
|
t.Log("Making third Responses request after TTL expiration...")
|
|
response3, err3 := setup.Client.ResponsesRequest(ctx, responsesRequest)
|
|
if err3 != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
// Should not be a cache hit since TTL expired
|
|
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response3})
|
|
|
|
t.Log("✅ Responses API requests properly handle TTL expiration")
|
|
}
|
|
|
|
// TestResponsesAPIWithoutCacheKey tests that Responses requests without cache key are not cached
|
|
func TestResponsesAPIWithoutCacheKey(t *testing.T) {
|
|
setup := NewTestSetup(t)
|
|
defer setup.Cleanup()
|
|
|
|
// Don't set cache key in context
|
|
ctx := CreateContextWithCacheKey("")
|
|
|
|
responsesRequest := CreateBasicResponsesRequest("Test Responses without cache key", 0.5, 500)
|
|
|
|
t.Log("Making Responses request without cache key...")
|
|
|
|
response, err := setup.Client.ResponsesRequest(ctx, responsesRequest)
|
|
if err != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
|
|
// Should not be cached
|
|
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response})
|
|
|
|
t.Log("✅ Responses requests without cache key are properly not cached")
|
|
}
|
|
|
|
// TestResponsesAPINoStoreFlag tests that Responses requests with no-store flag are not cached
|
|
func TestResponsesAPINoStoreFlag(t *testing.T) {
|
|
setup := NewTestSetup(t)
|
|
defer setup.Cleanup()
|
|
|
|
responsesRequest := CreateBasicResponsesRequest("Test no-store with Responses API", 0.7, 500)
|
|
ctx := CreateContextWithCacheKeyAndNoStore("test-no-store-responses", true)
|
|
|
|
t.Log("Testing no-store with Responses API...")
|
|
response1, err1 := setup.Client.ResponsesRequest(ctx, responsesRequest)
|
|
if err1 != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
|
|
|
|
WaitForCache(setup.Plugin)
|
|
|
|
// Verify not cached
|
|
response2, err2 := setup.Client.ResponsesRequest(ctx, responsesRequest)
|
|
if err2 != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}) // Should not be cached
|
|
|
|
t.Log("✅ Responses API no-store flag working correctly")
|
|
}
|
|
|
|
// TestResponsesAPIStreaming tests streaming Responses API requests
|
|
func TestResponsesAPIStreaming(t *testing.T) {
|
|
t.Log("Responses streaming not supported yet")
|
|
|
|
setup := NewTestSetup(t)
|
|
defer setup.Cleanup()
|
|
|
|
ctx := CreateContextWithCacheKey("test-responses-streaming")
|
|
prompt := "Explain the basics of quantum computing in simple terms"
|
|
|
|
// Make non-streaming request first
|
|
t.Log("Making non-streaming Responses request...")
|
|
nonStreamRequest := CreateBasicResponsesRequest(prompt, 0.5, 500)
|
|
_, err1 := setup.Client.ResponsesRequest(ctx, nonStreamRequest)
|
|
if err1 != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
|
|
WaitForCache(setup.Plugin)
|
|
|
|
// Make streaming request with same prompt and parameters
|
|
t.Log("Making streaming Responses request with same prompt...")
|
|
streamRequest := CreateStreamingResponsesRequest(prompt, 0.5, 500)
|
|
stream, err2 := setup.Client.ResponsesStreamRequest(ctx, streamRequest)
|
|
if err2 != nil {
|
|
t.Fatalf("Streaming Responses request failed: %v", err2)
|
|
}
|
|
|
|
var streamResponses []schemas.BifrostResponsesStreamResponse
|
|
for streamMsg := range stream {
|
|
if streamMsg.BifrostError != nil {
|
|
t.Fatalf("Error in Responses stream: %v", streamMsg.BifrostError)
|
|
}
|
|
if streamMsg.BifrostResponsesStreamResponse != nil {
|
|
streamResponses = append(streamResponses, *streamMsg.BifrostResponsesStreamResponse)
|
|
}
|
|
}
|
|
|
|
if len(streamResponses) == 0 {
|
|
t.Fatal("No streaming responses received")
|
|
}
|
|
|
|
// Check if any of the streaming responses was served from cache
|
|
cacheHitFound := false
|
|
for _, resp := range streamResponses {
|
|
if resp.ExtraFields.CacheDebug != nil && resp.ExtraFields.CacheDebug.CacheHit {
|
|
cacheHitFound = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if !cacheHitFound {
|
|
t.Log("⚠️ No cache hit detected in streaming responses - this could be expected behavior")
|
|
} else {
|
|
t.Log("✓ Cache hit detected in streaming Responses API")
|
|
}
|
|
|
|
t.Log("✅ Streaming Responses API test completed")
|
|
}
|
|
|
|
// TestResponsesAPIComplexParameters tests complex parameter handling
|
|
func TestResponsesAPIComplexParameters(t *testing.T) {
|
|
setup := NewTestSetup(t)
|
|
defer setup.Cleanup()
|
|
|
|
ctx := CreateContextWithCacheKey("test-responses-complex-params")
|
|
|
|
// Create request with various complex parameters
|
|
request := CreateBasicResponsesRequest("Test complex parameters", 0.8, 500)
|
|
request.Params.TopP = PtrFloat64(0.9)
|
|
request.Params.Background = &[]bool{true}[0]
|
|
request.Params.ParallelToolCalls = &[]bool{false}[0]
|
|
request.Params.ServiceTier = &[]string{"default"}[0]
|
|
request.Params.Store = &[]bool{true}[0]
|
|
|
|
t.Log("Making first Responses request with complex parameters...")
|
|
response1, err1 := setup.Client.ResponsesRequest(ctx, request)
|
|
if err1 != nil {
|
|
return // Test will be skipped by retry function
|
|
}
|
|
|
|
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
|
|
WaitForCache(setup.Plugin)
|
|
|
|
// Create identical request
|
|
request2 := CreateBasicResponsesRequest("Test complex parameters", 0.8, 500)
|
|
request2.Params.TopP = PtrFloat64(0.9)
|
|
request2.Params.Background = &[]bool{true}[0]
|
|
request2.Params.ParallelToolCalls = &[]bool{false}[0]
|
|
request2.Params.ServiceTier = &[]string{"default"}[0]
|
|
request2.Params.Store = &[]bool{true}[0]
|
|
|
|
t.Log("Making second identical Responses request with complex parameters...")
|
|
response2, err2 := setup.Client.ResponsesRequest(ctx, request2)
|
|
if err2 != nil {
|
|
if err2.Error != nil {
|
|
t.Fatalf("Second request failed: %v", err2.Error.Message)
|
|
} else {
|
|
t.Fatalf("Second request failed: %v", err2)
|
|
}
|
|
}
|
|
|
|
// Should be a cache hit
|
|
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
|
|
t.Log("✓ Responses API with complex parameters cached correctly")
|
|
}
|