bifrost/plugins/semanticcache/plugin_responses_test.go

package semanticcache

import (
	"testing"
	"time"

	"github.com/maximhq/bifrost/core/schemas"
)

// TestResponsesAPIBasicFunctionality tests the core caching functionality with Responses API
func TestResponsesAPIBasicFunctionality(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	ctx := CreateContextWithCacheKey("test-responses-basic")

	// Create test request
	testRequest := CreateBasicResponsesRequest(
		"What is Bifrost? Answer in one short sentence.",
		0.7,
		500,
	)

	t.Log("Making first Responses API request (should go to OpenAI and be cached)...")

	// Make first request (will go to OpenAI and be cached) - with retries
	start1 := time.Now()
	response1, err1 := setup.Client.ResponsesRequest(ctx, testRequest)
	duration1 := time.Since(start1)

	if err1 != nil {
		return // Test will be skipped by retry function
	}

	if response1 == nil || len(response1.Output) == 0 {
		t.Fatal("First Responses response is invalid")
	}

	t.Logf("First request completed in %v", duration1)
	t.Logf("Response contains %d output messages", len(response1.Output))
	if c := response1.Output[0].Content; c != nil && c.ContentStr != nil {
		t.Logf("Response: %s", *c.ContentStr)
	} else if c != nil && len(c.ContentBlocks) > 0 && c.ContentBlocks[0].Text != nil {
		t.Logf("Response: %s", *c.ContentBlocks[0].Text)
	} else {
		t.Log("Response: <no text>")
	}

	// Wait for cache to be written
	WaitForCache(setup.Plugin)

	t.Log("Making second identical Responses API request (should be served from cache)...")

	// Make second identical request (should be cached)
	start2 := time.Now()
	response2, err2 := setup.Client.ResponsesRequest(ctx, testRequest)
	duration2 := time.Since(start2)

	if err2 != nil {
		t.Fatalf("Second Responses request failed: %v", err2)
	}

	if response2 == nil || len(response2.Output) == 0 {
		t.Fatal("Second Responses response is invalid")
	}
	if response2.Output[0].Content.ContentStr != nil {
		t.Logf("Response: %s", *response2.Output[0].Content.ContentStr)
	} else {
		t.Logf("Response: %v", *response2.Output[0].Content.ContentBlocks[0].Text)
	}

	t.Logf("Second request completed in %v", duration2)

	// Verify cache hit
	AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, string(CacheTypeDirect))

	// Performance comparison
	t.Logf("Performance Summary:")
	t.Logf("First request (OpenAI):  %v", duration1)
	t.Logf("Second request (Cache):  %v", duration2)

	if duration2 >= duration1 {
		t.Log("⚠️  Cache doesn't seem faster, but this could be due to test environment")
	}

	// Verify provider information is maintained in cached response
	if response2.ExtraFields.Provider != testRequest.Provider {
		t.Errorf("Provider mismatch in cached response: expected %s, got %s",
			testRequest.Provider, response2.ExtraFields.Provider)
	}

	t.Log("✅ Basic Responses API semantic caching test completed successfully!")
}

// TestResponsesAPIDifferentParameters tests that different parameters produce different cache entries
func TestResponsesAPIDifferentParameters(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	ctx := CreateContextWithCacheKey("test-responses-params")
	basePrompt := "Explain quantum computing"

	tests := []struct {
		name        string
		request1    *schemas.BifrostResponsesRequest
		request2    *schemas.BifrostResponsesRequest
		shouldCache bool
	}{
		{
			name:        "Identical Requests",
			request1:    CreateBasicResponsesRequest(basePrompt, 0.5, 500),
			request2:    CreateBasicResponsesRequest(basePrompt, 0.5, 500),
			shouldCache: true,
		},
		{
			name:        "Different Temperature",
			request1:    CreateBasicResponsesRequest(basePrompt, 0.1, 500),
			request2:    CreateBasicResponsesRequest(basePrompt, 0.9, 500),
			shouldCache: false,
		},
		{
			name:        "Different MaxOutputTokens",
			request1:    CreateBasicResponsesRequest(basePrompt, 0.5, 500),
			request2:    CreateBasicResponsesRequest(basePrompt, 0.5, 200),
			shouldCache: false,
		},
		{
			name:        "Different Instructions",
			request1:    CreateResponsesRequestWithInstructions(basePrompt, "Be concise", 0.5, 500),
			request2:    CreateResponsesRequestWithInstructions(basePrompt, "Be detailed", 0.5, 500),
			shouldCache: false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// Clear cache for this subtest
			clearTestKeysWithStore(t, setup.Store)

			// Make first request
			_, err1 := setup.Client.ResponsesRequest(ctx, tt.request1)
			if err1 != nil {
				return // Test will be skipped by retry function
			}

			WaitForCache(setup.Plugin)

			// Make second request
			response2, err2 := setup.Client.ResponsesRequest(ctx, tt.request2)
			if err2 != nil {
				if err2.Error != nil {
					t.Fatalf("Second request failed: %v", err2.Error.Message)
				} else {
					t.Fatalf("Second request failed: %v", err2)
				}
			}

			if tt.shouldCache {
				AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
				t.Log("✓ Parameters match: cache hit as expected")
			} else {
				AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2})
				t.Log("✓ Parameters differ: no cache hit as expected")
			}
		})
	}
}

// TestResponsesAPISemanticMatching tests semantic similarity matching with Responses API
func TestResponsesAPISemanticMatching(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	ctx := CreateContextWithCacheKeyAndType("test-responses-semantic", CacheTypeSemantic)

	// First request
	originalRequest := CreateBasicResponsesRequest("What is machine learning?", 0.5, 500)
	t.Log("Making first Responses request with original text...")
	response1, err1 := setup.Client.ResponsesRequest(ctx, originalRequest)
	if err1 != nil {
		return // Test will be skipped by retry function
	}

	AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
	WaitForCache(setup.Plugin)

	// Test semantic match with similar but different text
	semanticRequest := CreateBasicResponsesRequest("Can you explain machine learning concepts?", 0.5, 500)
	t.Log("Making semantically similar Responses request...")
	response2, err2 := setup.Client.ResponsesRequest(ctx, semanticRequest)
	if err2 != nil {
		if err2.Error != nil {
			t.Fatalf("Second request failed: %v", err2.Error.Message)
		} else {
			t.Fatalf("Second request failed: %v", err2)
		}
	}

	// This should be a semantic cache hit
	AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "semantic")
	t.Log("✓ Semantic cache hit with similar content")
}

// TestResponsesAPIWithInstructions tests caching with system instructions
func TestResponsesAPIWithInstructions(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	ctx := CreateContextWithCacheKey("test-responses-instructions")

	// Create request with instructions
	request1 := CreateResponsesRequestWithInstructions(
		"Explain artificial intelligence",
		"You are a helpful assistant. Be concise and accurate.",
		0.7,
		500,
	)

	t.Log("Making first Responses request with instructions...")
	response1, err1 := setup.Client.ResponsesRequest(ctx, request1)
	if err1 != nil {
		return // Test will be skipped by retry function
	}

	AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
	WaitForCache(setup.Plugin)

	// Make identical request
	request2 := CreateResponsesRequestWithInstructions(
		"Explain artificial intelligence",
		"You are a helpful assistant. Be concise and accurate.",
		0.7,
		500,
	)

	t.Log("Making second identical Responses request with instructions...")
	response2, err2 := setup.Client.ResponsesRequest(ctx, request2)
	if err2 != nil {
		if err2.Error != nil {
			t.Fatalf("Second request failed: %v", err2.Error.Message)
		} else {
			t.Fatalf("Second request failed: %v", err2)
		}
	}

	// Should be a cache hit
	AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
	t.Log("✓ Responses API with instructions cached correctly")
}

// TestResponsesAPICacheExpiration tests TTL functionality for Responses API requests
func TestResponsesAPICacheExpiration(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	// Set very short TTL for testing
	shortTTL := 5 * time.Second
	ctx := CreateContextWithCacheKeyAndTTL("test-responses-ttl", shortTTL)

	responsesRequest := CreateBasicResponsesRequest("TTL test for Responses API", 0.5, 500)

	t.Log("Making first Responses request with short TTL...")
	response1, err1 := setup.Client.ResponsesRequest(ctx, responsesRequest)
	if err1 != nil {
		return // Test will be skipped by retry function
	}
	AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})

	WaitForCache(setup.Plugin)

	t.Log("Making second Responses request before TTL expiration...")
	response2, err2 := setup.Client.ResponsesRequest(ctx, responsesRequest)
	if err2 != nil {
		if err2.Error != nil {
			t.Fatalf("Second request failed: %v", err2.Error.Message)
		} else {
			t.Fatalf("Second request failed: %v", err2)
		}
	}
	AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")

	t.Logf("Waiting for TTL expiration (%v)...", shortTTL)
	time.Sleep(shortTTL + 2*time.Second) // Wait for TTL to expire

	t.Log("Making third Responses request after TTL expiration...")
	response3, err3 := setup.Client.ResponsesRequest(ctx, responsesRequest)
	if err3 != nil {
		return // Test will be skipped by retry function
	}
	// Should not be a cache hit since TTL expired
	AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response3})

	t.Log("✅ Responses API requests properly handle TTL expiration")
}

// TestResponsesAPIWithoutCacheKey tests that Responses requests without cache key are not cached
func TestResponsesAPIWithoutCacheKey(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	// Don't set cache key in context
	ctx := CreateContextWithCacheKey("")

	responsesRequest := CreateBasicResponsesRequest("Test Responses without cache key", 0.5, 500)

	t.Log("Making Responses request without cache key...")

	response, err := setup.Client.ResponsesRequest(ctx, responsesRequest)
	if err != nil {
		return // Test will be skipped by retry function
	}

	// Should not be cached
	AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response})

	t.Log("✅ Responses requests without cache key are properly not cached")
}

// TestResponsesAPINoStoreFlag tests that Responses requests with no-store flag are not cached
func TestResponsesAPINoStoreFlag(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	responsesRequest := CreateBasicResponsesRequest("Test no-store with Responses API", 0.7, 500)
	ctx := CreateContextWithCacheKeyAndNoStore("test-no-store-responses", true)

	t.Log("Testing no-store with Responses API...")
	response1, err1 := setup.Client.ResponsesRequest(ctx, responsesRequest)
	if err1 != nil {
		return // Test will be skipped by retry function
	}
	AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})

	WaitForCache(setup.Plugin)

	// Verify not cached
	response2, err2 := setup.Client.ResponsesRequest(ctx, responsesRequest)
	if err2 != nil {
		return // Test will be skipped by retry function
	}
	AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}) // Should not be cached

	t.Log("✅ Responses API no-store flag working correctly")
}

// TestResponsesAPIStreaming tests streaming Responses API requests
func TestResponsesAPIStreaming(t *testing.T) {
	t.Log("Responses streaming not supported yet")

	setup := NewTestSetup(t)
	defer setup.Cleanup()

	ctx := CreateContextWithCacheKey("test-responses-streaming")
	prompt := "Explain the basics of quantum computing in simple terms"

	// Make non-streaming request first
	t.Log("Making non-streaming Responses request...")
	nonStreamRequest := CreateBasicResponsesRequest(prompt, 0.5, 500)
	_, err1 := setup.Client.ResponsesRequest(ctx, nonStreamRequest)
	if err1 != nil {
		return // Test will be skipped by retry function
	}

	WaitForCache(setup.Plugin)

	// Make streaming request with same prompt and parameters
	t.Log("Making streaming Responses request with same prompt...")
	streamRequest := CreateStreamingResponsesRequest(prompt, 0.5, 500)
	stream, err2 := setup.Client.ResponsesStreamRequest(ctx, streamRequest)
	if err2 != nil {
		t.Fatalf("Streaming Responses request failed: %v", err2)
	}

	var streamResponses []schemas.BifrostResponsesStreamResponse
	for streamMsg := range stream {
		if streamMsg.BifrostError != nil {
			t.Fatalf("Error in Responses stream: %v", streamMsg.BifrostError)
		}
		if streamMsg.BifrostResponsesStreamResponse != nil {
			streamResponses = append(streamResponses, *streamMsg.BifrostResponsesStreamResponse)
		}
	}

	if len(streamResponses) == 0 {
		t.Fatal("No streaming responses received")
	}

	// Check if any of the streaming responses was served from cache
	cacheHitFound := false
	for _, resp := range streamResponses {
		if resp.ExtraFields.CacheDebug != nil && resp.ExtraFields.CacheDebug.CacheHit {
			cacheHitFound = true
			break
		}
	}

	if !cacheHitFound {
		t.Log("⚠️  No cache hit detected in streaming responses - this could be expected behavior")
	} else {
		t.Log("✓ Cache hit detected in streaming Responses API")
	}

	t.Log("✅ Streaming Responses API test completed")
}

// TestResponsesAPIComplexParameters tests complex parameter handling
func TestResponsesAPIComplexParameters(t *testing.T) {
	setup := NewTestSetup(t)
	defer setup.Cleanup()

	ctx := CreateContextWithCacheKey("test-responses-complex-params")

	// Create request with various complex parameters
	request := CreateBasicResponsesRequest("Test complex parameters", 0.8, 500)
	request.Params.TopP = PtrFloat64(0.9)
	request.Params.Background = &[]bool{true}[0]
	request.Params.ParallelToolCalls = &[]bool{false}[0]
	request.Params.ServiceTier = &[]string{"default"}[0]
	request.Params.Store = &[]bool{true}[0]

	t.Log("Making first Responses request with complex parameters...")
	response1, err1 := setup.Client.ResponsesRequest(ctx, request)
	if err1 != nil {
		return // Test will be skipped by retry function
	}

	AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
	WaitForCache(setup.Plugin)

	// Create identical request
	request2 := CreateBasicResponsesRequest("Test complex parameters", 0.8, 500)
	request2.Params.TopP = PtrFloat64(0.9)
	request2.Params.Background = &[]bool{true}[0]
	request2.Params.ParallelToolCalls = &[]bool{false}[0]
	request2.Params.ServiceTier = &[]string{"default"}[0]
	request2.Params.Store = &[]bool{true}[0]

	t.Log("Making second identical Responses request with complex parameters...")
	response2, err2 := setup.Client.ResponsesRequest(ctx, request2)
	if err2 != nil {
		if err2.Error != nil {
			t.Fatalf("Second request failed: %v", err2.Error.Message)
		} else {
			t.Fatalf("Second request failed: %v", err2)
		}
	}

	// Should be a cache hit
	AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
	t.Log("✓ Responses API with complex parameters cached correctly")
}