first commit
This commit is contained in:
333
plugins/semanticcache/plugin_streaming_test.go
Normal file
333
plugins/semanticcache/plugin_streaming_test.go
Normal file
@@ -0,0 +1,333 @@
|
||||
package semanticcache
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/maximhq/bifrost/core/schemas"
|
||||
)
|
||||
|
||||
// TestStreamingCacheBasicFunctionality tests streaming response caching
|
||||
func TestStreamingCacheBasicFunctionality(t *testing.T) {
|
||||
setup := NewTestSetup(t)
|
||||
defer setup.Cleanup()
|
||||
|
||||
ctx := CreateContextWithCacheKey("test-stream-value")
|
||||
|
||||
// Create a test streaming request
|
||||
testRequest := CreateStreamingChatRequest(
|
||||
"Count from 1 to 3, each number on a new line.",
|
||||
0.0, // Use 0 temperature for more predictable responses
|
||||
20,
|
||||
)
|
||||
|
||||
t.Log("Making first streaming request (should go to OpenAI and be cached)...")
|
||||
|
||||
// Make first streaming request
|
||||
start1 := time.Now()
|
||||
stream1, err1 := setup.Client.ChatCompletionStreamRequest(ctx, testRequest)
|
||||
if err1 != nil {
|
||||
return // Test will be skipped by retry function
|
||||
}
|
||||
|
||||
var responses1 []schemas.BifrostChatResponse
|
||||
for streamMsg := range stream1 {
|
||||
if streamMsg.BifrostError != nil {
|
||||
t.Fatalf("Error in first stream: %v", streamMsg.BifrostError)
|
||||
}
|
||||
if streamMsg.BifrostChatResponse != nil {
|
||||
responses1 = append(responses1, *streamMsg.BifrostChatResponse)
|
||||
}
|
||||
}
|
||||
duration1 := time.Since(start1)
|
||||
|
||||
if len(responses1) == 0 {
|
||||
t.Fatal("First streaming request returned no responses")
|
||||
}
|
||||
|
||||
t.Logf("First streaming request completed in %v with %d chunks", duration1, len(responses1))
|
||||
|
||||
// Wait for cache to be written
|
||||
WaitForCache(setup.Plugin)
|
||||
|
||||
t.Log("Making second identical streaming request (should be served from cache)...")
|
||||
|
||||
// Make second identical streaming request
|
||||
start2 := time.Now()
|
||||
stream2, err2 := setup.Client.ChatCompletionStreamRequest(ctx, testRequest)
|
||||
if err2 != nil {
|
||||
t.Fatalf("Second streaming request failed: %v", err2)
|
||||
}
|
||||
|
||||
var responses2 []schemas.BifrostChatResponse
|
||||
for streamMsg := range stream2 {
|
||||
if streamMsg.BifrostError != nil {
|
||||
t.Fatalf("Error in second stream: %v", streamMsg.BifrostError)
|
||||
}
|
||||
if streamMsg.BifrostChatResponse != nil {
|
||||
responses2 = append(responses2, *streamMsg.BifrostChatResponse)
|
||||
}
|
||||
}
|
||||
duration2 := time.Since(start2)
|
||||
|
||||
if len(responses2) == 0 {
|
||||
t.Fatal("Second streaming request returned no responses")
|
||||
}
|
||||
|
||||
t.Logf("Second streaming request completed in %v with %d chunks", duration2, len(responses2))
|
||||
|
||||
// Validate that both streams have the same number of chunks
|
||||
if len(responses1) != len(responses2) {
|
||||
t.Errorf("Stream chunk count mismatch: original=%d, cached=%d", len(responses1), len(responses2))
|
||||
}
|
||||
|
||||
// Validate that the second stream was cached
|
||||
cached := false
|
||||
for _, response := range responses2 {
|
||||
if response.ExtraFields.CacheDebug != nil && response.ExtraFields.CacheDebug.CacheHit {
|
||||
cached = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !cached {
|
||||
t.Fatal("Second streaming request was not served from cache")
|
||||
}
|
||||
|
||||
// Validate performance improvement
|
||||
if duration2 >= duration1 {
|
||||
t.Errorf("Cached stream took longer than original: cache=%v, original=%v", duration2, duration1)
|
||||
} else {
|
||||
speedup := float64(duration1) / float64(duration2)
|
||||
t.Logf("Streaming cache speedup: %.2fx faster", speedup)
|
||||
}
|
||||
|
||||
// Validate chunk ordering is maintained
|
||||
for i := range responses2 {
|
||||
if responses2[i].ExtraFields.ChunkIndex != responses1[i].ExtraFields.ChunkIndex {
|
||||
t.Errorf("Chunk index mismatch at position %d: original=%d, cached=%d",
|
||||
i, responses1[i].ExtraFields.ChunkIndex, responses2[i].ExtraFields.ChunkIndex)
|
||||
}
|
||||
}
|
||||
|
||||
t.Log("✅ Streaming cache test completed successfully!")
|
||||
}
|
||||
|
||||
// TestStreamingVsNonStreaming tests that streaming and non-streaming requests are cached separately
|
||||
func TestStreamingVsNonStreaming(t *testing.T) {
|
||||
setup := NewTestSetup(t)
|
||||
defer setup.Cleanup()
|
||||
|
||||
ctx := CreateContextWithCacheKey("stream-vs-non-test")
|
||||
|
||||
prompt := "What is the meaning of life?"
|
||||
|
||||
// Make non-streaming request first
|
||||
t.Log("Making non-streaming request...")
|
||||
nonStreamRequest := CreateBasicChatRequest(prompt, 0.5, 50)
|
||||
nonStreamResponse, err1 := setup.Client.ChatCompletionRequest(ctx, nonStreamRequest)
|
||||
if err1 != nil {
|
||||
return // Test will be skipped by retry function
|
||||
}
|
||||
|
||||
WaitForCache(setup.Plugin)
|
||||
|
||||
// Make streaming request with same prompt and parameters
|
||||
t.Log("Making streaming request with same prompt...")
|
||||
streamRequest := CreateStreamingChatRequest(prompt, 0.5, 50)
|
||||
stream, err2 := setup.Client.ChatCompletionStreamRequest(ctx, streamRequest)
|
||||
if err2 != nil {
|
||||
t.Fatalf("Streaming request failed: %v", err2)
|
||||
}
|
||||
|
||||
var streamResponses []schemas.BifrostChatResponse
|
||||
for streamMsg := range stream {
|
||||
if streamMsg.BifrostError != nil {
|
||||
t.Fatalf("Error in stream: %v", streamMsg.BifrostError)
|
||||
}
|
||||
if streamMsg.BifrostChatResponse != nil {
|
||||
streamResponses = append(streamResponses, *streamMsg.BifrostChatResponse)
|
||||
}
|
||||
}
|
||||
|
||||
if len(streamResponses) == 0 {
|
||||
t.Fatal("Streaming request returned no responses")
|
||||
}
|
||||
|
||||
// Verify that the streaming request was NOT served from the non-streaming cache
|
||||
// (They should be cached separately)
|
||||
streamCached := false
|
||||
for _, response := range streamResponses {
|
||||
if response.ExtraFields.RawResponse != nil {
|
||||
if rawMap, ok := response.ExtraFields.RawResponse.(map[string]interface{}); ok {
|
||||
if cachedFlag, exists := rawMap["bifrost_cached"]; exists {
|
||||
if cachedBool, ok := cachedFlag.(bool); ok && cachedBool {
|
||||
streamCached = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if streamCached {
|
||||
t.Error("Streaming request should not be cached from non-streaming cache")
|
||||
} else {
|
||||
t.Log("✅ Streaming request correctly not cached from non-streaming cache")
|
||||
}
|
||||
|
||||
// Verify non-streaming response was not affected
|
||||
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: nonStreamResponse})
|
||||
|
||||
t.Log("✅ Streaming vs non-streaming test completed!")
|
||||
}
|
||||
|
||||
// TestStreamingChunkOrdering tests that cached streaming responses maintain proper chunk ordering
|
||||
func TestStreamingChunkOrdering(t *testing.T) {
|
||||
setup := NewTestSetup(t)
|
||||
defer setup.Cleanup()
|
||||
|
||||
ctx := CreateContextWithCacheKey("chunk-order-test")
|
||||
|
||||
// Request that should generate multiple chunks
|
||||
testRequest := CreateStreamingChatRequest(
|
||||
"List the first 5 prime numbers, one per line with explanation.",
|
||||
0.0,
|
||||
100,
|
||||
)
|
||||
|
||||
t.Log("Making first streaming request to establish cache...")
|
||||
stream1, err1 := setup.Client.ChatCompletionStreamRequest(ctx, testRequest)
|
||||
if err1 != nil {
|
||||
return // Test will be skipped by retry function
|
||||
}
|
||||
|
||||
var originalChunks []schemas.BifrostChatResponse
|
||||
for streamMsg := range stream1 {
|
||||
if streamMsg.BifrostError != nil {
|
||||
t.Fatalf("Error in first stream: %v", streamMsg.BifrostError)
|
||||
}
|
||||
if streamMsg.BifrostChatResponse != nil {
|
||||
originalChunks = append(originalChunks, *streamMsg.BifrostChatResponse)
|
||||
}
|
||||
}
|
||||
|
||||
if len(originalChunks) < 2 {
|
||||
t.Skipf("Need at least 2 chunks to test ordering, got %d", len(originalChunks))
|
||||
}
|
||||
|
||||
t.Logf("Original stream had %d chunks", len(originalChunks))
|
||||
|
||||
WaitForCache(setup.Plugin)
|
||||
|
||||
t.Log("Making second streaming request to test cached chunk ordering...")
|
||||
stream2, err2 := setup.Client.ChatCompletionStreamRequest(ctx, testRequest)
|
||||
if err2 != nil {
|
||||
t.Fatalf("Second streaming request failed: %v", err2)
|
||||
}
|
||||
|
||||
var cachedChunks []schemas.BifrostChatResponse
|
||||
for streamMsg := range stream2 {
|
||||
if streamMsg.BifrostError != nil {
|
||||
t.Fatalf("Error in second stream: %v", streamMsg.BifrostError)
|
||||
}
|
||||
if streamMsg.BifrostChatResponse != nil {
|
||||
cachedChunks = append(cachedChunks, *streamMsg.BifrostChatResponse)
|
||||
}
|
||||
}
|
||||
|
||||
if len(cachedChunks) != len(originalChunks) {
|
||||
t.Errorf("Cached stream chunk count mismatch: original=%d, cached=%d",
|
||||
len(originalChunks), len(cachedChunks))
|
||||
}
|
||||
|
||||
// Verify chunk ordering
|
||||
for i := 0; i < len(cachedChunks) && i < len(originalChunks); i++ {
|
||||
originalIndex := originalChunks[i].ExtraFields.ChunkIndex
|
||||
cachedIndex := cachedChunks[i].ExtraFields.ChunkIndex
|
||||
|
||||
if originalIndex != cachedIndex {
|
||||
t.Errorf("Chunk index mismatch at position %d: original=%d, cached=%d",
|
||||
i, originalIndex, cachedIndex)
|
||||
}
|
||||
|
||||
// Only verify cache hit on the last chunk (where CacheDebug is set)
|
||||
if i == len(cachedChunks)-1 {
|
||||
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: &cachedChunks[i]}, string(CacheTypeDirect))
|
||||
}
|
||||
}
|
||||
|
||||
// Verify chunks are in sequential order
|
||||
for i := 1; i < len(cachedChunks); i++ {
|
||||
prevIndex := cachedChunks[i-1].ExtraFields.ChunkIndex
|
||||
currIndex := cachedChunks[i].ExtraFields.ChunkIndex
|
||||
|
||||
if currIndex <= prevIndex {
|
||||
t.Errorf("Chunks not in sequential order: chunk %d has index %d, chunk %d has index %d",
|
||||
i-1, prevIndex, i, currIndex)
|
||||
}
|
||||
}
|
||||
|
||||
t.Log("✅ Streaming chunk ordering test completed successfully!")
|
||||
}
|
||||
|
||||
// TestSpeechSynthesisStreaming tests speech synthesis streaming caching
|
||||
func TestSpeechSynthesisStreaming(t *testing.T) {
|
||||
setup := NewTestSetup(t)
|
||||
defer setup.Cleanup()
|
||||
|
||||
ctx := CreateContextWithCacheKey("speech-stream-test")
|
||||
|
||||
// Create speech synthesis request
|
||||
speechRequest := CreateSpeechRequest(
|
||||
"This is a test of speech synthesis streaming cache.",
|
||||
"alloy",
|
||||
)
|
||||
|
||||
t.Log("Making first speech synthesis request...")
|
||||
start1 := time.Now()
|
||||
response1, err1 := setup.Client.SpeechRequest(ctx, speechRequest)
|
||||
duration1 := time.Since(start1)
|
||||
|
||||
if err1 != nil {
|
||||
return // Test will be skipped by retry function
|
||||
}
|
||||
|
||||
if response1 == nil {
|
||||
t.Fatal("First speech response is nil")
|
||||
}
|
||||
|
||||
t.Logf("First speech request completed in %v", duration1)
|
||||
|
||||
WaitForCache(setup.Plugin)
|
||||
|
||||
t.Log("Making second identical speech synthesis request...")
|
||||
start2 := time.Now()
|
||||
response2, err2 := setup.Client.SpeechRequest(ctx, speechRequest)
|
||||
duration2 := time.Since(start2)
|
||||
|
||||
if err2 != nil {
|
||||
t.Fatalf("Second speech request failed: %v", err2)
|
||||
}
|
||||
|
||||
if response2 == nil {
|
||||
t.Fatal("Second speech response is nil")
|
||||
}
|
||||
|
||||
t.Logf("Second speech request completed in %v", duration2)
|
||||
|
||||
// Check if second request was cached
|
||||
AssertCacheHit(t, &schemas.BifrostResponse{SpeechResponse: response2}, string(CacheTypeDirect))
|
||||
|
||||
// Performance comparison
|
||||
t.Logf("Speech Synthesis Performance:")
|
||||
t.Logf("First request: %v", duration1)
|
||||
t.Logf("Second request: %v", duration2)
|
||||
|
||||
if duration2 < duration1 {
|
||||
speedup := float64(duration1) / float64(duration2)
|
||||
t.Logf("Speech cache speedup: %.2fx faster", speedup)
|
||||
}
|
||||
|
||||
t.Log("✅ Speech synthesis streaming test completed successfully!")
|
||||
}
|
||||
Reference in New Issue
Block a user