first commit

This commit is contained in:
Beyhan Oğur
2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions

View File

View File

@@ -0,0 +1,193 @@
package semanticcache
import (
"encoding/json"
"testing"
"time"
bifrost "github.com/maximhq/bifrost/core"
)
func TestUnmarshalJSON_DefaultCacheKey(t *testing.T) {
tests := []struct {
name string
json string
expected string
}{
{
name: "set",
json: `{"dimension": 1536, "default_cache_key": "my-cache-key"}`,
expected: "my-cache-key",
},
{
name: "omitted",
json: `{"dimension": 1536}`,
expected: "",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
var config Config
if err := json.Unmarshal([]byte(tc.json), &config); err != nil {
t.Fatalf("Failed to unmarshal: %v", err)
}
if config.DefaultCacheKey != tc.expected {
t.Errorf("Expected DefaultCacheKey %q, got %q", tc.expected, config.DefaultCacheKey)
}
})
}
}
func TestUnmarshalJSON_AllFields(t *testing.T) {
input := `{
"provider": "openai",
"embedding_model": "text-embedding-3-small",
"cleanup_on_shutdown": true,
"dimension": 1536,
"ttl": "10m",
"threshold": 0.9,
"vector_store_namespace": "my-ns",
"default_cache_key": "global-key",
"conversation_history_threshold": 5,
"cache_by_model": false,
"cache_by_provider": false,
"exclude_system_prompt": true
}`
var config Config
if err := json.Unmarshal([]byte(input), &config); err != nil {
t.Fatalf("Failed to unmarshal: %v", err)
}
if config.Provider != "openai" {
t.Errorf("Provider: expected %q, got %q", "openai", config.Provider)
}
if config.EmbeddingModel != "text-embedding-3-small" {
t.Errorf("EmbeddingModel: expected %q, got %q", "text-embedding-3-small", config.EmbeddingModel)
}
if !config.CleanUpOnShutdown {
t.Error("CleanUpOnShutdown: expected true")
}
if config.Dimension != 1536 {
t.Errorf("Dimension: expected 1536, got %d", config.Dimension)
}
if config.TTL != 10*time.Minute {
t.Errorf("TTL: expected 10m, got %v", config.TTL)
}
if config.Threshold != 0.9 {
t.Errorf("Threshold: expected 0.9, got %f", config.Threshold)
}
if config.VectorStoreNamespace != "my-ns" {
t.Errorf("VectorStoreNamespace: expected %q, got %q", "my-ns", config.VectorStoreNamespace)
}
if config.DefaultCacheKey != "global-key" {
t.Errorf("DefaultCacheKey: expected %q, got %q", "global-key", config.DefaultCacheKey)
}
if config.ConversationHistoryThreshold != 5 {
t.Errorf("ConversationHistoryThreshold: expected 5, got %d", config.ConversationHistoryThreshold)
}
if config.CacheByModel == nil || *config.CacheByModel != false {
t.Errorf("CacheByModel: expected false, got %v", config.CacheByModel)
}
if config.CacheByProvider == nil || *config.CacheByProvider != false {
t.Errorf("CacheByProvider: expected false, got %v", config.CacheByProvider)
}
if config.ExcludeSystemPrompt == nil || *config.ExcludeSystemPrompt != true {
t.Errorf("ExcludeSystemPrompt: expected true, got %v", config.ExcludeSystemPrompt)
}
}
func TestUnmarshalJSON_TTLFormats(t *testing.T) {
tests := []struct {
name string
json string
expected time.Duration
}{
{
name: "duration string",
json: `{"dimension": 1536, "ttl": "5m"}`,
expected: 5 * time.Minute,
},
{
name: "numeric seconds",
json: `{"dimension": 1536, "ttl": 300}`,
expected: 300 * time.Second,
},
{
name: "omitted",
json: `{"dimension": 1536}`,
expected: 0,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
var config Config
if err := json.Unmarshal([]byte(tc.json), &config); err != nil {
t.Fatalf("Failed to unmarshal: %v", err)
}
if config.TTL != tc.expected {
t.Errorf("Expected TTL %v, got %v", tc.expected, config.TTL)
}
})
}
}
func TestUnmarshalJSON_BoolPointerFields(t *testing.T) {
tests := []struct {
name string
json string
expectCacheByModel *bool
expectCacheByProv *bool
expectExcludeSys *bool
}{
{
name: "all set to true",
json: `{"dimension": 1536, "cache_by_model": true, "cache_by_provider": true, "exclude_system_prompt": true}`,
expectCacheByModel: bifrost.Ptr(true),
expectCacheByProv: bifrost.Ptr(true),
expectExcludeSys: bifrost.Ptr(true),
},
{
name: "all set to false",
json: `{"dimension": 1536, "cache_by_model": false, "cache_by_provider": false, "exclude_system_prompt": false}`,
expectCacheByModel: bifrost.Ptr(false),
expectCacheByProv: bifrost.Ptr(false),
expectExcludeSys: bifrost.Ptr(false),
},
{
name: "all omitted",
json: `{"dimension": 1536}`,
expectCacheByModel: nil,
expectCacheByProv: nil,
expectExcludeSys: nil,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
var config Config
if err := json.Unmarshal([]byte(tc.json), &config); err != nil {
t.Fatalf("Failed to unmarshal: %v", err)
}
assertBoolPtr(t, "CacheByModel", config.CacheByModel, tc.expectCacheByModel)
assertBoolPtr(t, "CacheByProvider", config.CacheByProvider, tc.expectCacheByProv)
assertBoolPtr(t, "ExcludeSystemPrompt", config.ExcludeSystemPrompt, tc.expectExcludeSys)
})
}
}
func assertBoolPtr(t *testing.T, field string, got, want *bool) {
t.Helper()
if got == nil && want == nil {
return
}
if got == nil || want == nil {
t.Errorf("%s: expected %v, got %v", field, want, got)
return
}
if *got != *want {
t.Errorf("%s: expected %v, got %v", field, *want, *got)
}
}

View File

@@ -0,0 +1,161 @@
module github.com/maximhq/bifrost/plugins/semanticcache
go 1.26.2
require (
github.com/cespare/xxhash/v2 v2.3.0
github.com/google/uuid v1.6.0
github.com/maximhq/bifrost/core v1.5.4
github.com/maximhq/bifrost/framework v1.3.4
github.com/maximhq/bifrost/plugins/mocker v1.5.3
)
require (
cel.dev/expr v0.25.1 // indirect
cloud.google.com/go v0.123.0 // indirect
cloud.google.com/go/auth v0.18.2 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
cloud.google.com/go/compute/metadata v0.9.0 // indirect
cloud.google.com/go/iam v1.5.3 // indirect
cloud.google.com/go/monitoring v1.24.3 // indirect
cloud.google.com/go/storage v1.61.3 // indirect
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 // indirect
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0 // indirect
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.55.0 // indirect
github.com/andybalholm/brotli v1.2.0 // indirect
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
github.com/aws/aws-sdk-go-v2 v1.41.5 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 // indirect
github.com/aws/aws-sdk-go-v2/config v1.32.11 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.19.14 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.5 // indirect
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.22 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.13 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.21 // indirect
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.3 // indirect
github.com/aws/aws-sdk-go-v2/service/signin v1.0.9 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.30.15 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 // indirect
github.com/aws/smithy-go v1.24.2 // indirect
github.com/bahlo/generic-list-go v0.2.0 // indirect
github.com/buger/jsonparser v1.1.2 // indirect
github.com/bytedance/gopkg v0.1.3 // indirect
github.com/bytedance/sonic v1.15.0 // indirect
github.com/bytedance/sonic/loader v0.5.0 // indirect
github.com/cloudwego/base64x v0.1.6 // indirect
github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect
github.com/envoyproxy/protoc-gen-validate v1.3.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-jose/go-jose/v4 v4.1.4 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/analysis v0.24.2 // indirect
github.com/go-openapi/errors v0.22.5 // indirect
github.com/go-openapi/jsonpointer v0.22.4 // indirect
github.com/go-openapi/jsonreference v0.21.4 // indirect
github.com/go-openapi/loads v0.23.2 // indirect
github.com/go-openapi/runtime v0.29.2 // indirect
github.com/go-openapi/spec v0.22.2 // indirect
github.com/go-openapi/strfmt v0.25.0 // indirect
github.com/go-openapi/swag v0.25.4 // indirect
github.com/go-openapi/swag/cmdutils v0.25.4 // indirect
github.com/go-openapi/swag/conv v0.25.4 // indirect
github.com/go-openapi/swag/fileutils v0.25.4 // indirect
github.com/go-openapi/swag/jsonname v0.25.4 // indirect
github.com/go-openapi/swag/jsonutils v0.25.4 // indirect
github.com/go-openapi/swag/loading v0.25.4 // indirect
github.com/go-openapi/swag/mangling v0.25.4 // indirect
github.com/go-openapi/swag/netutils v0.25.4 // indirect
github.com/go-openapi/swag/stringutils v0.25.4 // indirect
github.com/go-openapi/swag/typeutils v0.25.4 // indirect
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
github.com/go-openapi/validate v0.25.1 // indirect
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
github.com/google/s2a-go v0.1.9 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.14 // indirect
github.com/googleapis/gax-go/v2 v2.19.0 // indirect
github.com/invopop/jsonschema v0.13.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jackc/pgx/v5 v5.9.1 // indirect
github.com/jackc/puddle/v2 v2.2.2 // indirect
github.com/jaswdr/faker/v2 v2.8.0 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
github.com/klauspost/compress v1.18.2 // indirect
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/mailru/easyjson v0.9.1 // indirect
github.com/mark3labs/mcp-go v0.43.2 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-sqlite3 v1.14.32 // indirect
github.com/oapi-codegen/runtime v1.1.1 // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/pinecone-io/go-pinecone/v5 v5.3.0 // indirect
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/qdrant/go-client v1.16.2 // indirect
github.com/redis/go-redis/v9 v9.17.2 // indirect
github.com/rs/zerolog v1.34.0 // indirect
github.com/spf13/cast v1.10.0 // indirect
github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect
github.com/stretchr/testify v1.11.1 // indirect
github.com/tidwall/gjson v1.18.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tidwall/sjson v1.2.5 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasthttp v1.68.0 // indirect
github.com/weaviate/weaviate v1.36.5 // indirect
github.com/weaviate/weaviate-go-client/v5 v5.7.1 // indirect
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
go.mongodb.org/mongo-driver v1.17.6 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/contrib/detectors/gcp v1.40.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
go.opentelemetry.io/otel v1.43.0 // indirect
go.opentelemetry.io/otel/metric v1.43.0 // indirect
go.opentelemetry.io/otel/sdk v1.43.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.43.0 // indirect
go.opentelemetry.io/otel/trace v1.43.0 // indirect
go.starlark.net v0.0.0-20260102030733-3fee463870c9 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/arch v0.23.0 // indirect
golang.org/x/crypto v0.49.0 // indirect
golang.org/x/net v0.52.0 // indirect
golang.org/x/oauth2 v0.36.0 // indirect
golang.org/x/sync v0.20.0 // indirect
golang.org/x/sys v0.42.0 // indirect
golang.org/x/text v0.35.0 // indirect
golang.org/x/time v0.15.0 // indirect
google.golang.org/api v0.274.0 // indirect
google.golang.org/genproto v0.0.0-20260316180232-0b37fe3546d5 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect
google.golang.org/grpc v1.80.0 // indirect
google.golang.org/protobuf v1.36.11 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
gorm.io/driver/postgres v1.6.0 // indirect
gorm.io/driver/sqlite v1.6.0 // indirect
gorm.io/gorm v1.31.1 // indirect
)

View File

@@ -0,0 +1,393 @@
cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4=
cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4=
cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE=
cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU=
cloud.google.com/go/auth v0.18.2 h1:+Nbt5Ev0xEqxlNjd6c+yYUeosQ5TtEUaNcN/3FozlaM=
cloud.google.com/go/auth v0.18.2/go.mod h1:xD+oY7gcahcu7G2SG2DsBerfFxgPAJz17zz2joOFF3M=
cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc=
cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c=
cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs=
cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10=
cloud.google.com/go/iam v1.5.3 h1:+vMINPiDF2ognBJ97ABAYYwRgsaqxPbQDlMnbHMjolc=
cloud.google.com/go/iam v1.5.3/go.mod h1:MR3v9oLkZCTlaqljW6Eb2d3HGDGK5/bDv93jhfISFvU=
cloud.google.com/go/logging v1.13.2 h1:qqlHCBvieJT9Cdq4QqYx1KPadCQ2noD4FK02eNqHAjA=
cloud.google.com/go/logging v1.13.2/go.mod h1:zaybliM3yun1J8mU2dVQ1/qDzjbOqEijZCn6hSBtKak=
cloud.google.com/go/longrunning v0.8.0 h1:LiKK77J3bx5gDLi4SMViHixjD2ohlkwBi+mKA7EhfW8=
cloud.google.com/go/longrunning v0.8.0/go.mod h1:UmErU2Onzi+fKDg2gR7dusz11Pe26aknR4kHmJJqIfk=
cloud.google.com/go/monitoring v1.24.3 h1:dde+gMNc0UhPZD1Azu6at2e79bfdztVDS5lvhOdsgaE=
cloud.google.com/go/monitoring v1.24.3/go.mod h1:nYP6W0tm3N9H/bOw8am7t62YTzZY+zUeQ+Bi6+2eonI=
cloud.google.com/go/storage v1.61.3 h1:VS//ZfBuPGDvakfD9xyPW1RGF1Vy3BWUoVZXgW1KMOg=
cloud.google.com/go/storage v1.61.3/go.mod h1:JtqK8BBB7TWv0HVGHubtUdzYYrakOQIsMLffZ2Z/HWk=
cloud.google.com/go/trace v1.11.7 h1:kDNDX8JkaAG3R2nq1lIdkb7FCSi1rCmsEtKVsty7p+U=
cloud.google.com/go/trace v1.11.7/go.mod h1:TNn9d5V3fQVf6s4SCveVMIBS2LJUqo73GACmq/Tky0s=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 h1:JXg2dwJUmPB9JmtVmdEB16APJ7jurfbY5jnfXpJoRMc=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0/go.mod h1:YD5h/ldMsG0XiIw7PdyNhLxaM317eFh5yNLccNfGdyw=
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 h1:Hk5QBxZQC1jb2Fwj6mpzme37xbCDdNTxU7O9eb5+LB4=
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1/go.mod h1:IYus9qsFobWIc2YVwe/WPjcnyCkPKtnHAqUYeebc8z0=
github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2 h1:yz1bePFlP5Vws5+8ez6T3HWXPmwOK7Yvq8QxDBD3SKY=
github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2/go.mod h1:Pa9ZNPuoNu/GztvBSKk9J1cDJW6vk/n0zLtV4mgd8N8=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDozdmndjTm8DXdpCzPajMgA=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI=
github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM=
github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE=
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 h1:XRzhVemXdgvJqCH0sFfrBUTnUJSBrBf7++ypk+twtRs=
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 h1:DHa2U07rk8syqvCge0QIGMCE1WxGj9njT44GH7zNJLQ=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0 h1:UnDZ/zFfG1JhH/DqxIZYU/1CUAlTUScoXD/LcM2Ykk8=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0/go.mod h1:IA1C1U7jO/ENqm/vhi7V9YYpBsp+IMyqNrEN94N7tVc=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.55.0 h1:7t/qx5Ost0s0wbA/VDrByOooURhp+ikYwv20i9Y07TQ=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.55.0/go.mod h1:vB2GH9GAYYJTO3mEn8oYwzEdhlayZIdQz6zdzgUIRvA=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.55.0 h1:0s6TxfCu2KHkkZPnBfsQ2y5qia0jl3MMrmBhu3nCOYk=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.55.0/go.mod h1:Mf6O40IAyB9zR/1J8nGDDPirZQQPbYJni8Yisy7NTMc=
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
github.com/aws/aws-sdk-go-v2 v1.41.5 h1:dj5kopbwUsVUVFgO4Fi5BIT3t4WyqIDjGKCangnV/yY=
github.com/aws/aws-sdk-go-v2 v1.41.5/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 h1:eBMB84YGghSocM7PsjmmPffTa+1FBUeNvGvFou6V/4o=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI=
github.com/aws/aws-sdk-go-v2/config v1.32.11 h1:ftxI5sgz8jZkckuUHXfC/wMUc8u3fG1vQS0plr2F2Zs=
github.com/aws/aws-sdk-go-v2/config v1.32.11/go.mod h1:twF11+6ps9aNRKEDimksp923o44w/Thk9+8YIlzWMmo=
github.com/aws/aws-sdk-go-v2/credentials v1.19.14 h1:n+UcGWAIZHkXzYt87uMFBv/l8THYELoX6gVcUvgl6fI=
github.com/aws/aws-sdk-go-v2/credentials v1.19.14/go.mod h1:cJKuyWB59Mqi0jM3nFYQRmnHVQIcgoxjEMAbLkpr62w=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 h1:NUS3K4BTDArQqNu2ih7yeDLaS3bmHD0YndtA6UP884g=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21/go.mod h1:YWNWJQNjKigKY1RHVJCuupeWDrrHjRqHm0N9rdrWzYI=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21 h1:Rgg6wvjjtX8bNHcvi9OnXWwcE0a2vGpbwmtICOsvcf4=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21/go.mod h1:A/kJFst/nm//cyqonihbdpQZwiUhhzpqTsdbhDdRF9c=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21 h1:PEgGVtPoB6NTpPrBgqSE5hE/o47Ij9qk/SEZFbUOe9A=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21/go.mod h1:p+hz+PRAYlY3zcpJhPwXlLC4C+kqn70WIHwnzAfs6ps=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.5 h1:clHU5fm//kWS1C2HgtgWxfQbFbx4b6rx+5jzhgX9HrI=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.5/go.mod h1:O3h0IK87yXci+kg6flUKzJnWeziQUKciKrLjcatSNcY=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.22 h1:rWyie/PxDRIdhNf4DzRk0lvjVOqFJuNnO8WwaIRVxzQ=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.22/go.mod h1:zd/JsJ4P7oGfUhXn1VyLqaRZwPmZwg44Jf2dS84Dm3Y=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 h1:5EniKhLZe4xzL7a+fU3C2tfUN4nWIqlLesfrjkuPFTY=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7/go.mod h1:x0nZssQ3qZSnIcePWLvcoFisRXJzcTVvYpAAdYX8+GI=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.13 h1:JRaIgADQS/U6uXDqlPiefP32yXTda7Kqfx+LgspooZM=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.13/go.mod h1:CEuVn5WqOMilYl+tbccq8+N2ieCy0gVn3OtRb0vBNNM=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21 h1:c31//R3xgIJMSC8S6hEVq+38DcvUlgFY0FM6mSI5oto=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21/go.mod h1:r6+pf23ouCB718FUxaqzZdbpYFyDtehyZcmP5KL9FkA=
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.21 h1:ZlvrNcHSFFWURB8avufQq9gFsheUgjVD9536obIknfM=
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.21/go.mod h1:cv3TNhVrssKR0O/xxLJVRfd2oazSnZnkUeTf6ctUwfQ=
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.3 h1:HwxWTbTrIHm5qY+CAEur0s/figc3qwvLWsNkF4RPToo=
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.3/go.mod h1:uoA43SdFwacedBfSgfFSjjCvYe8aYBS7EnU5GZ/YKMM=
github.com/aws/aws-sdk-go-v2/service/signin v1.0.9 h1:QKZH0S178gCmFEgst8hN0mCX1KxLgHBKKY/CLqwP8lg=
github.com/aws/aws-sdk-go-v2/service/signin v1.0.9/go.mod h1:7yuQJoT+OoH8aqIxw9vwF+8KpvLZ8AWmvmUWHsGQZvI=
github.com/aws/aws-sdk-go-v2/service/sso v1.30.15 h1:lFd1+ZSEYJZYvv9d6kXzhkZu07si3f+GQ1AaYwa2LUM=
github.com/aws/aws-sdk-go-v2/service/sso v1.30.15/go.mod h1:WSvS1NLr7JaPunCXqpJnWk1Bjo7IxzZXrZi1QQCkuqM=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 h1:dzztQ1YmfPrxdrOiuZRMF6fuOwWlWpD2StNLTceKpys=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19/go.mod h1:YO8TrYtFdl5w/4vmjL8zaBSsiNp3w0L1FfKVKenZT7w=
github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 h1:p8ogvvLugcR/zLBXTXrTkj0RYBUdErbMnAFFp12Lm/U=
github.com/aws/aws-sdk-go-v2/service/sts v1.41.10/go.mod h1:60dv0eZJfeVXfbT1tFJinbHrDfSJ2GZl4Q//OSSNAVw=
github.com/aws/smithy-go v1.24.2 h1:FzA3bu/nt/vDvmnkg+R8Xl46gmzEDam6mZ1hzmwXFng=
github.com/aws/smithy-go v1.24.2/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc=
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w=
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/buger/jsonparser v1.1.2 h1:frqHqw7otoVbk5M8LlE/L7HTnIq2v9RX6EJ48i9AxJk=
github.com/buger/jsonparser v1.1.2/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
github.com/bytedance/sonic v1.15.0 h1:/PXeWFaR5ElNcVE84U0dOHjiMHQOwNIx3K4ymzh/uSE=
github.com/bytedance/sonic v1.15.0/go.mod h1:tFkWrPz0/CUCLEF4ri4UkHekCIcdnkqXw9VduqpJh0k=
github.com/bytedance/sonic/loader v0.5.0 h1:gXH3KVnatgY7loH5/TkeVyXPfESoqSBSBEiDd5VjlgE=
github.com/bytedance/sonic/loader v0.5.0/go.mod h1:AR4NYCk5DdzZizZ5djGqQ92eEhCCcdf5x77udYiSJRo=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w=
github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA=
github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU=
github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g=
github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98=
github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI=
github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4=
github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4=
github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA=
github.com/fasthttp/websocket v1.5.12 h1:e4RGPpWW2HTbL3zV0Y/t7g0ub294LkiuXXUuTOUInlE=
github.com/fasthttp/websocket v1.5.12/go.mod h1:I+liyL7/4moHojiOgUOIKEWm9EIxHqxZChS+aMFltyg=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/go-jose/go-jose/v4 v4.1.4 h1:moDMcTHmvE6Groj34emNPLs/qtYXRVcd6S7NHbHz3kA=
github.com/go-jose/go-jose/v4 v4.1.4/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-openapi/analysis v0.24.2 h1:6p7WXEuKy1llDgOH8FooVeO+Uq2za9qoAOq4ZN08B50=
github.com/go-openapi/analysis v0.24.2/go.mod h1:x27OOHKANE0lutg2ml4kzYLoHGMKgRm1Cj2ijVOjJuE=
github.com/go-openapi/errors v0.22.5 h1:Yfv4O/PRYpNF3BNmVkEizcHb3uLVVsrDt3LNdgAKRY4=
github.com/go-openapi/errors v0.22.5/go.mod h1:z9S8ASTUqx7+CP1Q8dD8ewGH/1JWFFLX/2PmAYNQLgk=
github.com/go-openapi/jsonpointer v0.22.4 h1:dZtK82WlNpVLDW2jlA1YCiVJFVqkED1MegOUy9kR5T4=
github.com/go-openapi/jsonpointer v0.22.4/go.mod h1:elX9+UgznpFhgBuaMQ7iu4lvvX1nvNsesQ3oxmYTw80=
github.com/go-openapi/jsonreference v0.21.4 h1:24qaE2y9bx/q3uRK/qN+TDwbok1NhbSmGjjySRCHtC8=
github.com/go-openapi/jsonreference v0.21.4/go.mod h1:rIENPTjDbLpzQmQWCj5kKj3ZlmEh+EFVbz3RTUh30/4=
github.com/go-openapi/loads v0.23.2 h1:rJXAcP7g1+lWyBHC7iTY+WAF0rprtM+pm8Jxv1uQJp4=
github.com/go-openapi/loads v0.23.2/go.mod h1:IEVw1GfRt/P2Pplkelxzj9BYFajiWOtY2nHZNj4UnWY=
github.com/go-openapi/runtime v0.29.2 h1:UmwSGWNmWQqKm1c2MGgXVpC2FTGwPDQeUsBMufc5Yj0=
github.com/go-openapi/runtime v0.29.2/go.mod h1:biq5kJXRJKBJxTDJXAa00DOTa/anflQPhT0/wmjuy+0=
github.com/go-openapi/spec v0.22.2 h1:KEU4Fb+Lp1qg0V4MxrSCPv403ZjBl8Lx1a83gIPU8Qc=
github.com/go-openapi/spec v0.22.2/go.mod h1:iIImLODL2loCh3Vnox8TY2YWYJZjMAKYyLH2Mu8lOZs=
github.com/go-openapi/strfmt v0.25.0 h1:7R0RX7mbKLa9EYCTHRcCuIPcaqlyQiWNPTXwClK0saQ=
github.com/go-openapi/strfmt v0.25.0/go.mod h1:nNXct7OzbwrMY9+5tLX4I21pzcmE6ccMGXl3jFdPfn8=
github.com/go-openapi/swag v0.25.4 h1:OyUPUFYDPDBMkqyxOTkqDYFnrhuhi9NR6QVUvIochMU=
github.com/go-openapi/swag v0.25.4/go.mod h1:zNfJ9WZABGHCFg2RnY0S4IOkAcVTzJ6z2Bi+Q4i6qFQ=
github.com/go-openapi/swag/cmdutils v0.25.4 h1:8rYhB5n6WawR192/BfUu2iVlxqVR9aRgGJP6WaBoW+4=
github.com/go-openapi/swag/cmdutils v0.25.4/go.mod h1:pdae/AFo6WxLl5L0rq87eRzVPm/XRHM3MoYgRMvG4A0=
github.com/go-openapi/swag/conv v0.25.4 h1:/Dd7p0LZXczgUcC/Ikm1+YqVzkEeCc9LnOWjfkpkfe4=
github.com/go-openapi/swag/conv v0.25.4/go.mod h1:3LXfie/lwoAv0NHoEuY1hjoFAYkvlqI/Bn5EQDD3PPU=
github.com/go-openapi/swag/fileutils v0.25.4 h1:2oI0XNW5y6UWZTC7vAxC8hmsK/tOkWXHJQH4lKjqw+Y=
github.com/go-openapi/swag/fileutils v0.25.4/go.mod h1:cdOT/PKbwcysVQ9Tpr0q20lQKH7MGhOEb6EwmHOirUk=
github.com/go-openapi/swag/jsonname v0.25.4 h1:bZH0+MsS03MbnwBXYhuTttMOqk+5KcQ9869Vye1bNHI=
github.com/go-openapi/swag/jsonname v0.25.4/go.mod h1:GPVEk9CWVhNvWhZgrnvRA6utbAltopbKwDu8mXNUMag=
github.com/go-openapi/swag/jsonutils v0.25.4 h1:VSchfbGhD4UTf4vCdR2F4TLBdLwHyUDTd1/q4i+jGZA=
github.com/go-openapi/swag/jsonutils v0.25.4/go.mod h1:7OYGXpvVFPn4PpaSdPHJBtF0iGnbEaTk8AvBkoWnaAY=
github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4 h1:IACsSvBhiNJwlDix7wq39SS2Fh7lUOCJRmx/4SN4sVo=
github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4/go.mod h1:Mt0Ost9l3cUzVv4OEZG+WSeoHwjWLnarzMePNDAOBiM=
github.com/go-openapi/swag/loading v0.25.4 h1:jN4MvLj0X6yhCDduRsxDDw1aHe+ZWoLjW+9ZQWIKn2s=
github.com/go-openapi/swag/loading v0.25.4/go.mod h1:rpUM1ZiyEP9+mNLIQUdMiD7dCETXvkkC30z53i+ftTE=
github.com/go-openapi/swag/mangling v0.25.4 h1:2b9kBJk9JvPgxr36V23FxJLdwBrpijI26Bx5JH4Hp48=
github.com/go-openapi/swag/mangling v0.25.4/go.mod h1:6dxwu6QyORHpIIApsdZgb6wBk/DPU15MdyYj/ikn0Hg=
github.com/go-openapi/swag/netutils v0.25.4 h1:Gqe6K71bGRb3ZQLusdI8p/y1KLgV4M/k+/HzVSqT8H0=
github.com/go-openapi/swag/netutils v0.25.4/go.mod h1:m2W8dtdaoX7oj9rEttLyTeEFFEBvnAx9qHd5nJEBzYg=
github.com/go-openapi/swag/stringutils v0.25.4 h1:O6dU1Rd8bej4HPA3/CLPciNBBDwZj9HiEpdVsb8B5A8=
github.com/go-openapi/swag/stringutils v0.25.4/go.mod h1:GTsRvhJW5xM5gkgiFe0fV3PUlFm0dr8vki6/VSRaZK0=
github.com/go-openapi/swag/typeutils v0.25.4 h1:1/fbZOUN472NTc39zpa+YGHn3jzHWhv42wAJSN91wRw=
github.com/go-openapi/swag/typeutils v0.25.4/go.mod h1:Ou7g//Wx8tTLS9vG0UmzfCsjZjKhpjxayRKTHXf2pTE=
github.com/go-openapi/swag/yamlutils v0.25.4 h1:6jdaeSItEUb7ioS9lFoCZ65Cne1/RZtPBZ9A56h92Sw=
github.com/go-openapi/swag/yamlutils v0.25.4/go.mod h1:MNzq1ulQu+yd8Kl7wPOut/YHAAU/H6hL91fF+E2RFwc=
github.com/go-openapi/testify/enable/yaml/v2 v2.0.2 h1:0+Y41Pz1NkbTHz8NngxTuAXxEodtNSI1WG1c/m5Akw4=
github.com/go-openapi/testify/enable/yaml/v2 v2.0.2/go.mod h1:kme83333GCtJQHXQ8UKX3IBZu6z8T5Dvy5+CW3NLUUg=
github.com/go-openapi/testify/v2 v2.0.2 h1:X999g3jeLcoY8qctY/c/Z8iBHTbwLz7R2WXd6Ub6wls=
github.com/go-openapi/testify/v2 v2.0.2/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16pjwMkYkP4Ywn54=
github.com/go-openapi/validate v0.25.1 h1:sSACUI6Jcnbo5IWqbYHgjibrhhmt3vR6lCzKZnmAgBw=
github.com/go-openapi/validate v0.25.1/go.mod h1:RMVyVFYte0gbSTaZ0N4KmTn6u/kClvAFp+mAVfS/DQc=
github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc=
github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0=
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/enterprise-certificate-proxy v0.3.14 h1:yh8ncqsbUY4shRD5dA6RlzjJaT4hi3kII+zYw8wmLb8=
github.com/googleapis/enterprise-certificate-proxy v0.3.14/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg=
github.com/googleapis/gax-go/v2 v2.19.0 h1:fYQaUOiGwll0cGj7jmHT/0nPlcrZDFPrZRhTsoCr8hE=
github.com/googleapis/gax-go/v2 v2.19.0/go.mod h1:w2ROXVdfGEVFXzmlciUU4EdjHgWvB5h2n6x/8XSTTJA=
github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68=
github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo=
github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.9.1 h1:uwrxJXBnx76nyISkhr33kQLlUqjv7et7b9FjCen/tdc=
github.com/jackc/pgx/v5 v5.9.1/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM0Jn4=
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/jaswdr/faker/v2 v2.8.0 h1:3AxdXW9U7dJmWckh/P0YgRbNlCcVsTyrUNUnLVP9b3Q=
github.com/jaswdr/faker/v2 v2.8.0/go.mod h1:jZq+qzNQr8/P+5fHd9t3txe2GNPnthrTfohtnJ7B+68=
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
github.com/keybase/go-keychain v0.0.1 h1:way+bWYa6lDppZoZcgMbYsvC7GxljxrskdNInRtuthU=
github.com/keybase/go-keychain v0.0.1/go.mod h1:PdEILRW3i9D8JcdM+FmY6RwkHGnhHxXwkPPMeUgOK1k=
github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk=
github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mailru/easyjson v0.9.1 h1:LbtsOm5WAswyWbvTEOqhypdPeZzHavpZx96/n553mR8=
github.com/mailru/easyjson v0.9.1/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/mark3labs/mcp-go v0.43.2 h1:21PUSlWWiSbUPQwXIJ5WKlETixpFpq+WBpbMGDSVy/I=
github.com/mark3labs/mcp-go v0.43.2/go.mod h1:YnJfOL382MIWDx1kMY+2zsRHU/q78dBg9aFb8W6Thdw=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs=
github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/maximhq/bifrost/core v1.5.4 h1:hf0BhoHVVpY1EQ4FkyRzW4IBYjrolxdZV0ucgWfHhcE=
github.com/maximhq/bifrost/core v1.5.4/go.mod h1:z1/vOalbDAD7v7sYbXQsqR+2qIFP0jKOSIStw6Q4P4U=
github.com/maximhq/bifrost/framework v1.3.4 h1:nZPv1FYry1njexZ0Hb6CZQXybwRFKGMTRyGWz2HGcio=
github.com/maximhq/bifrost/framework v1.3.4/go.mod h1:e0defDjWWFi6c2Zs3AOkMcRbYzjww4sjkyZtARrP4Zk=
github.com/maximhq/bifrost/plugins/mocker v1.5.3 h1:PuQShiJS6jbI1S0XAnwtB9dfiYC+TSbxbjJ1FWOb2aE=
github.com/maximhq/bifrost/plugins/mocker v1.5.3/go.mod h1:Ob9R3faldCd1EnTfuPqkLK4CbjA1nLe4e2/Onf/Kk7E=
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/pinecone-io/go-pinecone/v5 v5.3.0 h1:0YQlEtmXGWK/I8ztkOVM6PuBYgFJZhjSdb0ddU+bHPE=
github.com/pinecone-io/go-pinecone/v5 v5.3.0/go.mod h1:6Fg85fcyvMUQFf9KW7zniN81kelSYvsjF+KPLdc1MGA=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/qdrant/go-client v1.16.2 h1:UUMJJfvXTByhwhH1DwWdbkhZ2cTdvSqVkXSIfBrVWSg=
github.com/qdrant/go-client v1.16.2/go.mod h1:I+EL3h4HRoRTeHtbfOd/4kDXwCukZfkd41j/9wryGkw=
github.com/redis/go-redis/v9 v9.17.2 h1:P2EGsA4qVIM3Pp+aPocCJ7DguDHhqrXNhVcEp4ViluI=
github.com/redis/go-redis/v9 v9.17.2/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ=
github.com/savsgio/gotils v0.0.0-20250408102913-196191ec6287 h1:qIQ0tWF9vxGtkJa24bR+2i53WBCz1nW/Pc47oVYauC4=
github.com/savsgio/gotils v0.0.0-20250408102913-196191ec6287/go.mod h1:sM7Mt7uEoCeFSCBM+qBrqvEo+/9vdmj19wzp3yzUhmg=
github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo=
github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs=
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/objx v0.5.3 h1:jmXUvGomnU1o3W/V5h2VEradbpJDwGrzugQQvL0POH4=
github.com/stretchr/objx v0.5.3/go.mod h1:rDQraq+vQZU7Fde9LOZLr8Tax6zZvy4kuNKF+QYS+U0=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.68.0 h1:v12Nx16iepr8r9ySOwqI+5RBJ/DqTxhOy1HrHoDFnok=
github.com/valyala/fasthttp v1.68.0/go.mod h1:5EXiRfYQAoiO/khu4oU9VISC/eVY6JqmSpPJoHCKsz4=
github.com/weaviate/weaviate v1.36.5 h1:lCiuEfQ08+5wK0DkTCUBb6ayNep9QpBH6JJhmZaRfzk=
github.com/weaviate/weaviate v1.36.5/go.mod h1:ljzrgEmGKn3CRzDdcxvhmBUUZIcghwIYd1Lmn54f3Z8=
github.com/weaviate/weaviate-go-client/v5 v5.7.1 h1:vEMxh486QqRqWaq58UEe/TiTbGbo9T5x7ZPFd5QENvQ=
github.com/weaviate/weaviate-go-client/v5 v5.7.1/go.mod h1:T/JDErjN074GrnYIa0AgK1TGUGP/6A/8vqXNPlv4c6E=
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
go.mongodb.org/mongo-driver v1.17.6 h1:87JUG1wZfWsr6rIz3ZmpH90rL5tea7O3IHuSwHUpsss=
go.mongodb.org/mongo-driver v1.17.6/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ=
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
go.opentelemetry.io/contrib/detectors/gcp v1.40.0 h1:Awaf8gmW99tZTOWqkLCOl6aw1/rxAWVlHsHIZ3fT2sA=
go.opentelemetry.io/contrib/detectors/gcp v1.40.0/go.mod h1:99OY9ZCqyLkzJLTh5XhECpLRSxcZl+ZDKBEO+jMBFR4=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg=
go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.40.0 h1:ZrPRak/kS4xI3AVXy8F7pipuDXmDsrO8Lg+yQjBLjw0=
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.40.0/go.mod h1:3y6kQCWztq6hyW8Z9YxQDDm0Je9AJoFar2G0yDcmhRk=
go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM=
go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY=
go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg=
go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg=
go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw=
go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A=
go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A=
go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
go.starlark.net v0.0.0-20260102030733-3fee463870c9 h1:nV1OyvU+0CYrp5eKfQ3rD03TpFYYhH08z31NK1HmtTk=
go.starlark.net v0.0.0-20260102030733-3fee463870c9/go.mod h1:YKMCv9b1WrfWmeqdV5MAuEHWsu5iC+fe6kYl2sQjdI8=
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/arch v0.23.0 h1:lKF64A2jF6Zd8L0knGltUnegD62JMFBiCPBmQpToHhg=
golang.org/x/arch v0.23.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs=
golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4=
gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E=
google.golang.org/api v0.274.0 h1:aYhycS5QQCwxHLwfEHRRLf9yNsfvp1JadKKWBE54RFA=
google.golang.org/api v0.274.0/go.mod h1:JbAt7mF+XVmWu6xNP8/+CTiGH30ofmCmk9nM8d8fHew=
google.golang.org/genproto v0.0.0-20260316180232-0b37fe3546d5 h1:JNfk58HZ8lfmXbYK2vx/UvsqIL59TzByCxPIX4TDmsE=
google.golang.org/genproto v0.0.0-20260316180232-0b37fe3546d5/go.mod h1:x5julN69+ED4PcFk/XWayw35O0lf/nGa4aNgODCmNmw=
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA=
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8=
google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM=
google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gorm.io/driver/postgres v1.6.0 h1:2dxzU8xJ+ivvqTRph34QX+WrRaJlmfyPqXmoGVjMBa4=
gorm.io/driver/postgres v1.6.0/go.mod h1:vUw0mrGgrTK+uPHEhAdV4sfFELrByKVGnaVRkXDhtWo=
gorm.io/driver/sqlite v1.6.0 h1:WHRRrIiulaPiPFmDcod6prc4l2VGVWHz80KspNsxSfQ=
gorm.io/driver/sqlite v1.6.0/go.mod h1:AO9V1qIQddBESngQUKWL9yoH93HIeA1X6V633rBwyT8=
gorm.io/gorm v1.31.1 h1:7CA8FTFz/gRfgqgpeKIBcervUn3xSyPUmr6B2WXJ7kg=
gorm.io/gorm v1.31.1/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs=

View File

@@ -0,0 +1,871 @@
// Package semanticcache provides semantic caching integration for Bifrost plugin.
// This plugin caches responses using both direct hash matching (xxhash) and semantic similarity search (embeddings).
// It supports configurable caching behavior via the VectorStore abstraction, with TTL management and streaming response handling.
package semanticcache
import (
"context"
"encoding/json"
"fmt"
"strconv"
"sync"
"time"
"github.com/google/uuid"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
"github.com/maximhq/bifrost/framework"
"github.com/maximhq/bifrost/framework/vectorstore"
)
// Config contains configuration for the semantic cache plugin.
// The VectorStore abstraction handles the underlying storage implementation and its defaults.
// Only specify values you want to override from the semantic cache defaults.
type Config struct {
// Embedding Model settings - REQUIRED for semantic caching
Provider schemas.ModelProvider `json:"provider"`
Keys []schemas.Key `json:"keys"`
EmbeddingModel string `json:"embedding_model,omitempty"` // Model to use for generating embeddings (optional)
// Plugin behavior settings
CleanUpOnShutdown bool `json:"cleanup_on_shutdown,omitempty"` // Clean up cache on shutdown (default: false)
TTL time.Duration `json:"ttl,omitempty"` // Time-to-live for cached responses (default: 5min)
Threshold float64 `json:"threshold,omitempty"` // Cosine similarity threshold for semantic matching (default: 0.8)
VectorStoreNamespace string `json:"vector_store_namespace,omitempty"` // Namespace for vector store (optional)
Dimension int `json:"dimension"` // Dimension for vector store
// Advanced caching behavior
DefaultCacheKey string `json:"default_cache_key,omitempty"` // Default cache key used when no per-request key is provided (optional, caching is disabled when empty and no per-request key is set)
ConversationHistoryThreshold int `json:"conversation_history_threshold,omitempty"` // Skip caching for requests with more than this number of messages in the conversation history (default: 3)
CacheByModel *bool `json:"cache_by_model,omitempty"` // Include model in cache key (default: true)
CacheByProvider *bool `json:"cache_by_provider,omitempty"` // Include provider in cache key (default: true)
ExcludeSystemPrompt *bool `json:"exclude_system_prompt,omitempty"` // Exclude system prompt in cache key (default: false)
}
// UnmarshalJSON implements custom JSON unmarshaling for semantic cache Config.
// It supports TTL parsing from both string durations ("1m", "1hr") and numeric seconds for configurable cache behavior.
func (c *Config) UnmarshalJSON(data []byte) error {
// Define a temporary struct to avoid infinite recursion
type TempConfig struct {
Provider string `json:"provider"`
Keys []schemas.Key `json:"keys"`
EmbeddingModel string `json:"embedding_model,omitempty"`
CleanUpOnShutdown bool `json:"cleanup_on_shutdown,omitempty"`
Dimension int `json:"dimension"`
TTL interface{} `json:"ttl,omitempty"`
Threshold float64 `json:"threshold,omitempty"`
VectorStoreNamespace string `json:"vector_store_namespace,omitempty"`
DefaultCacheKey string `json:"default_cache_key,omitempty"`
ConversationHistoryThreshold int `json:"conversation_history_threshold,omitempty"`
CacheByModel *bool `json:"cache_by_model,omitempty"`
CacheByProvider *bool `json:"cache_by_provider,omitempty"`
ExcludeSystemPrompt *bool `json:"exclude_system_prompt,omitempty"`
}
var temp TempConfig
if err := json.Unmarshal(data, &temp); err != nil {
return fmt.Errorf("failed to unmarshal config: %w", err)
}
// Set simple fields
c.Provider = schemas.ModelProvider(temp.Provider)
c.Keys = temp.Keys
c.EmbeddingModel = temp.EmbeddingModel
c.CleanUpOnShutdown = temp.CleanUpOnShutdown
c.Dimension = temp.Dimension
c.CacheByModel = temp.CacheByModel
c.CacheByProvider = temp.CacheByProvider
c.VectorStoreNamespace = temp.VectorStoreNamespace
c.ConversationHistoryThreshold = temp.ConversationHistoryThreshold
c.Threshold = temp.Threshold
c.DefaultCacheKey = temp.DefaultCacheKey
c.ExcludeSystemPrompt = temp.ExcludeSystemPrompt
// Handle TTL field with custom parsing for VectorStore-backed cache behavior
if temp.TTL != nil {
switch v := temp.TTL.(type) {
case string:
// Try parsing as duration string (e.g., "1m", "1hr") for semantic cache TTL
duration, err := time.ParseDuration(v)
if err != nil {
return fmt.Errorf("failed to parse TTL duration string '%s': %w", v, err)
}
c.TTL = duration
case int:
// Handle integer seconds for semantic cache TTL
c.TTL = time.Duration(v) * time.Second
default:
// Try converting to string and parsing as number for semantic cache TTL
ttlStr := fmt.Sprintf("%v", v)
if seconds, err := strconv.ParseFloat(ttlStr, 64); err == nil {
c.TTL = time.Duration(seconds * float64(time.Second))
} else {
return fmt.Errorf("unsupported TTL type: %T (value: %v)", v, v)
}
}
}
return nil
}
// StreamChunk represents a single chunk from a streaming response
type StreamChunk struct {
Timestamp time.Time // When chunk was received
Response *schemas.BifrostResponse // The actual response chunk
FinishReason *string // If this is the final chunk
}
// StreamAccumulator manages accumulation of streaming chunks for caching
type StreamAccumulator struct {
RequestID string // The request ID
StorageID string // The final cache entry ID
Chunks []*StreamChunk // All chunks for this stream
IsComplete bool // Whether the stream is complete
HasError bool // Whether any chunk in the stream had an error
FinalTimestamp time.Time // When the stream completed
Embedding []float32 // Embedding for the original request
Metadata map[string]interface{} // Metadata for caching
TTL time.Duration // TTL for this cache entry
mu sync.Mutex // Protects chunk operations
}
// Plugin implements the schemas.LLMPlugin interface for semantic caching.
// It caches responses using a two-tier approach: direct hash matching for exact requests
// and semantic similarity search for related content. The plugin supports configurable caching behavior
// via the VectorStore abstraction, including TTL management and streaming response handling.
//
// Fields:
// - store: VectorStore instance for semantic cache operations
// - config: Plugin configuration including semantic cache and caching settings
// - logger: Logger instance for plugin operations
type Plugin struct {
store vectorstore.VectorStore
config *Config
logger schemas.Logger
client *bifrost.Bifrost
streamAccumulators sync.Map // Track stream accumulators by request ID
waitGroup sync.WaitGroup
}
// Plugin constants
const (
PluginName string = "semantic_cache"
DefaultVectorStoreNamespace string = "BifrostSemanticCachePlugin"
PluginLoggerPrefix string = "[Semantic Cache]"
CacheConnectionTimeout time.Duration = 5 * time.Second
CreateNamespaceTimeout time.Duration = 30 * time.Second
CacheSetTimeout time.Duration = 30 * time.Second
DefaultCacheTTL time.Duration = 5 * time.Minute
DefaultCacheThreshold float64 = 0.8
DefaultConversationHistoryThreshold int = 3
)
var SelectFields = []string{"request_hash", "response", "stream_chunks", "expires_at", "cache_key", "provider", "model"}
var VectorStoreProperties = map[string]vectorstore.VectorStoreProperties{
"request_hash": {
DataType: vectorstore.VectorStorePropertyTypeString,
Description: "The hash of the request",
},
"response": {
DataType: vectorstore.VectorStorePropertyTypeString,
Description: "The response from the provider",
},
"stream_chunks": {
DataType: vectorstore.VectorStorePropertyTypeStringArray,
Description: "The stream chunks from the provider",
},
"expires_at": {
DataType: vectorstore.VectorStorePropertyTypeInteger,
Description: "The expiration time of the cache entry",
},
"cache_key": {
DataType: vectorstore.VectorStorePropertyTypeString,
Description: "The cache key from the request",
},
"provider": {
DataType: vectorstore.VectorStorePropertyTypeString,
Description: "The provider used for the request",
},
"model": {
DataType: vectorstore.VectorStorePropertyTypeString,
Description: "The model used for the request",
},
"params_hash": {
DataType: vectorstore.VectorStorePropertyTypeString,
Description: "The hash of the parameters used for the request",
},
"from_bifrost_semantic_cache_plugin": {
DataType: vectorstore.VectorStorePropertyTypeBoolean,
Description: "Whether the cache entry was created by the BifrostSemanticCachePlugin",
},
}
type PluginAccount struct {
provider schemas.ModelProvider
keys []schemas.Key
}
func (pa *PluginAccount) GetConfiguredProviders() ([]schemas.ModelProvider, error) {
return []schemas.ModelProvider{pa.provider}, nil
}
func (pa *PluginAccount) GetKeysForProvider(ctx context.Context, providerKey schemas.ModelProvider) ([]schemas.Key, error) {
return pa.keys, nil
}
func (pa *PluginAccount) GetConfigForProvider(providerKey schemas.ModelProvider) (*schemas.ProviderConfig, error) {
return &schemas.ProviderConfig{
NetworkConfig: schemas.DefaultNetworkConfig,
ConcurrencyAndBufferSize: schemas.DefaultConcurrencyAndBufferSize,
}, nil
}
// Dependencies is a list of dependencies that the plugin requires.
var Dependencies []framework.FrameworkDependency = []framework.FrameworkDependency{framework.FrameworkDependencyVectorStore}
// ProvidersWithEmbeddingSupport lists all providers that support embedding operations.
// Providers not in this list will return UnsupportedOperationError for embedding requests.
var ProvidersWithEmbeddingSupport = map[schemas.ModelProvider]bool{
schemas.OpenAI: true,
schemas.Azure: true,
schemas.Bedrock: true,
schemas.Cohere: true,
schemas.Gemini: true,
schemas.Vertex: true,
schemas.Mistral: true,
schemas.Ollama: true,
schemas.Nebius: true,
schemas.HuggingFace: true,
schemas.SGL: true,
}
const (
CacheKey schemas.BifrostContextKey = "semantic_cache_key" // To set the cache key for a request - REQUIRED for all requests
CacheTTLKey schemas.BifrostContextKey = "semantic_cache_ttl" // To explicitly set the TTL for a request
CacheThresholdKey schemas.BifrostContextKey = "semantic_cache_threshold" // To explicitly set the threshold for a request
CacheTypeKey schemas.BifrostContextKey = "semantic_cache_cache_type" // To explicitly set the cache type for a request
CacheNoStoreKey schemas.BifrostContextKey = "semantic_cache_no_store" // To explicitly disable storing the response in the cache
// context keys for internal usage
requestIDKey schemas.BifrostContextKey = "semantic_cache_request_id"
requestStorageIDKey schemas.BifrostContextKey = "semantic_cache_request_storage_id"
requestHashKey schemas.BifrostContextKey = "semantic_cache_request_hash"
requestEmbeddingKey schemas.BifrostContextKey = "semantic_cache_embedding"
requestEmbeddingTokensKey schemas.BifrostContextKey = "semantic_cache_embedding_tokens"
requestParamsHashKey schemas.BifrostContextKey = "semantic_cache_params_hash"
requestModelKey schemas.BifrostContextKey = "semantic_cache_model"
requestProviderKey schemas.BifrostContextKey = "semantic_cache_provider"
isCacheHitKey schemas.BifrostContextKey = "semantic_cache_is_cache_hit"
cacheHitTypeKey schemas.BifrostContextKey = "semantic_cache_cache_hit_type"
)
type CacheType string
const (
CacheTypeDirect CacheType = "direct"
CacheTypeSemantic CacheType = "semantic"
)
// Init creates a new semantic cache plugin instance with the provided configuration.
// It uses the VectorStore abstraction for cache operations and returns a configured plugin.
//
// The VectorStore handles the underlying storage implementation and its defaults.
// The plugin only sets defaults for its own behavior (TTL, cache key generation, etc.).
//
// Parameters:
// - config: Semantic cache and plugin configuration (CacheKey is required)
// - logger: Logger instance for the plugin
// - store: VectorStore instance for cache operations
//
// Returns:
// - schemas.LLMPlugin: A configured semantic cache plugin instance
// - error: Any error that occurred during plugin initialization
func Init(ctx context.Context, config *Config, logger schemas.Logger, store vectorstore.VectorStore) (schemas.LLMPlugin, error) {
if config == nil {
return nil, fmt.Errorf("config is required")
}
if store == nil {
return nil, fmt.Errorf("store is required")
}
// Set plugin-specific defaults
if config.VectorStoreNamespace == "" {
logger.Debug(PluginLoggerPrefix + " Vector store namespace is not set, using default of " + DefaultVectorStoreNamespace)
config.VectorStoreNamespace = DefaultVectorStoreNamespace
}
if config.TTL == 0 {
logger.Debug(PluginLoggerPrefix + " TTL is not set, using default of 5 minutes")
config.TTL = DefaultCacheTTL
}
if config.Threshold == 0 {
logger.Debug(PluginLoggerPrefix + " Threshold is not set, using default of " + strconv.FormatFloat(DefaultCacheThreshold, 'f', -1, 64))
config.Threshold = DefaultCacheThreshold
}
if config.ConversationHistoryThreshold == 0 {
logger.Debug(PluginLoggerPrefix + " Conversation history threshold is not set, using default of " + strconv.Itoa(DefaultConversationHistoryThreshold))
config.ConversationHistoryThreshold = DefaultConversationHistoryThreshold
}
// Set cache behavior defaults
if config.CacheByModel == nil {
config.CacheByModel = bifrost.Ptr(true)
}
if config.CacheByProvider == nil {
config.CacheByProvider = bifrost.Ptr(true)
}
plugin := &Plugin{
store: store,
config: config,
logger: logger,
waitGroup: sync.WaitGroup{},
}
if config.Provider == "" && config.Dimension == 1 {
logger.Info(PluginLoggerPrefix + " Starting in direct-only mode (dimension=1, no embedding provider)")
} else if config.Provider == "" || len(config.Keys) == 0 {
logger.Warn(PluginLoggerPrefix + " Incomplete semantic mode config: missing provider or keys, falling back to direct search only")
} else {
// Validate that the provider supports embeddings
if bifrost.IsStandardProvider(config.Provider) && !ProvidersWithEmbeddingSupport[config.Provider] {
return nil, fmt.Errorf("provider '%s' does not support embedding operations required for semantic cache. Supported providers: openai, azure, bedrock, cohere, gemini, vertex, mistral, ollama, nebius, huggingface, sgl. Note: custom providers based on embedding-capable providers are also supported", config.Provider)
}
bifrost, err := bifrost.Init(ctx, schemas.BifrostConfig{
Logger: logger,
Account: &PluginAccount{
provider: config.Provider,
keys: config.Keys,
},
})
if err != nil {
return nil, fmt.Errorf("failed to initialize bifrost for semantic cache: %w", err)
}
plugin.client = bifrost
}
createCtx, cancel := context.WithTimeout(ctx, CreateNamespaceTimeout)
defer cancel()
if err := store.CreateNamespace(createCtx, config.VectorStoreNamespace, config.Dimension, VectorStoreProperties); err != nil {
return nil, fmt.Errorf("failed to create namespace for semantic cache: %w", err)
}
return plugin, nil
}
// GetName returns the canonical name of the semantic cache plugin.
// This name is used for plugin identification and logging purposes.
//
// Returns:
// - string: The plugin name for semantic cache
func (plugin *Plugin) GetName() string {
return PluginName
}
// HTTPTransportPreHook is not used for this plugin
func (plugin *Plugin) HTTPTransportPreHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest) (*schemas.HTTPResponse, error) {
return nil, nil
}
// HTTPTransportPostHook is not used for this plugin
func (plugin *Plugin) HTTPTransportPostHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, resp *schemas.HTTPResponse) error {
return nil
}
// HTTPTransportStreamChunkHook passes through streaming chunks unchanged
func (plugin *Plugin) HTTPTransportStreamChunkHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, chunk *schemas.BifrostStreamChunk) (*schemas.BifrostStreamChunk, error) {
return chunk, nil
}
func (plugin *Plugin) clearRequestScopedContext(ctx *schemas.BifrostContext) {
ctx.ClearValue(requestIDKey)
ctx.ClearValue(requestStorageIDKey)
ctx.ClearValue(requestHashKey)
ctx.ClearValue(requestParamsHashKey)
ctx.ClearValue(requestModelKey)
ctx.ClearValue(requestProviderKey)
ctx.ClearValue(requestEmbeddingKey)
ctx.ClearValue(requestEmbeddingTokensKey)
ctx.ClearValue(isCacheHitKey)
ctx.ClearValue(cacheHitTypeKey)
}
// PreLLMHook is called before a request is processed by Bifrost.
// It performs a two-stage cache lookup: first direct hash matching, then semantic similarity search.
// Uses UUID-based keys for entries stored in the VectorStore.
//
// Parameters:
// - ctx: Pointer to the schemas.BifrostContext
// - req: The incoming Bifrost request
//
// Returns:
// - *schemas.BifrostRequest: The original request
// - *schemas.BifrostResponse: Cached response if found, nil otherwise
// - error: Any error that occurred during cache lookup
func (plugin *Plugin) PreLLMHook(ctx *schemas.BifrostContext, req *schemas.BifrostRequest) (*schemas.BifrostRequest, *schemas.LLMPluginShortCircuit, error) {
provider, model, _ := req.GetRequestFields()
// Get the cache key from the context
var cacheKey string
var ok bool
cacheKey, ok = ctx.Value(CacheKey).(string)
if !ok || cacheKey == "" {
if plugin.config.DefaultCacheKey != "" {
cacheKey = plugin.config.DefaultCacheKey
plugin.logger.Debug(PluginLoggerPrefix + " Using default cache key: " + cacheKey)
} else {
plugin.logger.Debug(PluginLoggerPrefix + " No cache key found in context, continuing without caching")
return req, nil, nil
}
}
// Clear request-scoped semantic cache state up front in case the context is reused.
plugin.clearRequestScopedContext(ctx)
if !isSemanticCacheSupportedRequestType(req.RequestType) {
plugin.logger.Debug(PluginLoggerPrefix + " Skipping caching for unsupported request type: " + string(req.RequestType))
return req, nil, nil
}
if plugin.isConversationHistoryThresholdExceeded(req) {
plugin.logger.Debug(PluginLoggerPrefix + " Skipping caching for request with conversation history threshold exceeded")
return req, nil, nil
}
// Generate UUID for this request
requestID := uuid.New().String()
// Store request ID, model, and provider in context for PostLLMHook
ctx.SetValue(requestIDKey, requestID)
ctx.SetValue(requestModelKey, model)
ctx.SetValue(requestProviderKey, provider)
performDirectSearch, performSemanticSearch := true, true
if ctx.Value(CacheTypeKey) != nil {
cacheTypeVal, ok := ctx.Value(CacheTypeKey).(CacheType)
if !ok {
plugin.logger.Warn(PluginLoggerPrefix + " Cache type is not a CacheType, using all available cache types")
} else {
performDirectSearch = cacheTypeVal == CacheTypeDirect
performSemanticSearch = cacheTypeVal == CacheTypeSemantic
}
}
if performDirectSearch {
shortCircuit, err := plugin.performDirectSearch(ctx, req, cacheKey)
if err != nil {
plugin.logger.Warn(PluginLoggerPrefix + " Direct search failed: " + err.Error() + " (" + describeRequestShape(req) + ")")
// Don't return - continue to semantic search fallback
shortCircuit = nil // Ensure we don't use an invalid shortCircuit
}
if shortCircuit != nil {
return req, shortCircuit, nil
}
}
if performSemanticSearch && plugin.client != nil {
if req.EmbeddingRequest != nil || req.TranscriptionRequest != nil {
plugin.logger.Debug(PluginLoggerPrefix + " Skipping semantic search for embedding/transcription input")
// For vector stores that require vectors, set a zero vector placeholder
// This allows direct hash matching to work without the overhead of generating embeddings
if plugin.store.RequiresVectors() && plugin.config.Dimension > 0 {
zeroVector := make([]float32, plugin.config.Dimension)
ctx.SetValue(requestEmbeddingKey, zeroVector)
plugin.logger.Debug(PluginLoggerPrefix + " Using zero vector placeholder for embedding/transcription request storage")
}
return req, nil, nil
}
// Try semantic search as fallback
shortCircuit, err := plugin.performSemanticSearch(ctx, req, cacheKey)
if err != nil {
plugin.logger.Debug(PluginLoggerPrefix + " Semantic search skipped: " + err.Error() + " (" + describeRequestShape(req) + ")")
return req, nil, nil
}
if shortCircuit != nil {
return req, shortCircuit, nil
}
} else if !performSemanticSearch && plugin.store.RequiresVectors() && plugin.client != nil {
// Vector store requires vectors but we're in direct-only mode
// Generate embeddings for storage purposes (not for searching)
if req.EmbeddingRequest != nil || req.TranscriptionRequest != nil {
plugin.logger.Debug(PluginLoggerPrefix + " Skipping embedding generation for embedding/transcription input")
// For vector stores that require vectors, set a zero vector placeholder
// This allows direct hash matching to work without the overhead of generating embeddings
if plugin.config.Dimension > 0 {
zeroVector := make([]float32, plugin.config.Dimension)
ctx.SetValue(requestEmbeddingKey, zeroVector)
plugin.logger.Debug(PluginLoggerPrefix + " Using zero vector placeholder for embedding/transcription request storage")
}
return req, nil, nil
}
// Use zero vector for direct-only cache type to prevent semantic search matches
// This preserves cache type isolation - direct-only entries won't be found by semantic search
if plugin.config.Dimension > 0 {
zeroVector := make([]float32, plugin.config.Dimension)
ctx.SetValue(requestEmbeddingKey, zeroVector)
plugin.logger.Debug(PluginLoggerPrefix + " Using zero vector for direct-only cache storage (preserves isolation)")
}
}
return req, nil, nil
}
// PostLLMHook is called after a response is received from a provider.
// It caches responses in the VectorStore using UUID-based keys with unified metadata structure
// including provider, model, request hash, and TTL. Handles both single and streaming responses.
//
// The function performs the following operations:
// 1. Checks configurable caching behavior and skips caching for unsuccessful responses if configured
// 2. Retrieves the request hash and ID from the context (set during PreLLMHook)
// 3. Marshals the response for storage
// 4. Stores the unified cache entry in the VectorStore asynchronously (non-blocking)
//
// The VectorStore Add operation runs in a separate goroutine to avoid blocking the response.
// The function gracefully handles errors and continues without caching if any step fails,
// ensuring that response processing is never interrupted by caching issues.
//
// Parameters:
// - ctx: Pointer to the schemas.BifrostContext containing the request hash and ID
// - res: The response from the provider to be cached
// - bifrostErr: The error from the provider, if any (used for success determination)
//
// Returns:
// - *schemas.BifrostResponse: The original response, unmodified
// - *schemas.BifrostError: The original error, unmodified
// - error: Any error that occurred during caching preparation (always nil as errors are handled gracefully)
func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.BifrostResponse, bifrostErr *schemas.BifrostError) (*schemas.BifrostResponse, *schemas.BifrostError, error) {
if bifrostErr != nil {
return res, bifrostErr, nil
}
// Skip caching for large payloads — body is too large to materialize for cache storage
if isLargePayload, ok := ctx.Value(schemas.BifrostContextKeyLargePayloadMode).(bool); ok && isLargePayload {
plugin.logger.Debug(PluginLoggerPrefix + " Skipping semantic cache for large payload request")
return res, nil, nil
}
if isLargeResponse, ok := ctx.Value(schemas.BifrostContextKeyLargeResponseMode).(bool); ok && isLargeResponse {
plugin.logger.Debug(PluginLoggerPrefix + " Skipping semantic cache for large payload response")
return res, nil, nil
}
isCacheHit := ctx.Value(isCacheHitKey)
if isCacheHit != nil {
isCacheHitValue, ok := isCacheHit.(bool)
if ok && isCacheHitValue {
return res, nil, nil
}
}
// Check if caching is explicitly disabled
noStore := ctx.Value(CacheNoStoreKey)
if noStore != nil {
noStoreValue, ok := noStore.(bool)
if ok && noStoreValue {
plugin.logger.Debug(PluginLoggerPrefix + " Caching is explicitly disabled for this request, continuing without caching")
return res, nil, nil
}
}
// Get the cache key from context
cacheKey, ok := ctx.Value(CacheKey).(string)
if !ok || cacheKey == "" {
if plugin.config.DefaultCacheKey != "" {
cacheKey = plugin.config.DefaultCacheKey
} else {
return res, nil, nil
}
}
// Get the request ID from context
requestID, ok := ctx.Value(requestIDKey).(string)
if !ok {
return res, nil, nil
}
storageID := requestID
// When direct lookup prepared a deterministic storage ID, reuse it here so
// default-mode traffic warms the GetChunk fast path instead of only the
// legacy search path.
if v, ok := ctx.Value(requestStorageIDKey).(string); ok && v != "" {
storageID = v
}
// Check cache type to optimize embedding handling
var embedding []float32
var hash string
var shouldStoreEmbeddings = true
var shouldStoreHash = true
if ctx.Value(CacheTypeKey) != nil {
cacheTypeVal, ok := ctx.Value(CacheTypeKey).(CacheType)
if ok {
if cacheTypeVal == CacheTypeDirect {
// For direct-only caching, skip embedding operations entirely
// unless the vector store requires vectors for all entries
if plugin.store.RequiresVectors() {
// Vector stores like Qdrant and Pinecone require vectors for all entries
// Keep embeddings enabled for storage, but lookups will still use direct hash matching
plugin.logger.Debug(PluginLoggerPrefix + " Vector store requires vectors, keeping embedding generation enabled for storage")
} else {
shouldStoreEmbeddings = false
plugin.logger.Debug(PluginLoggerPrefix + " Skipping embedding operations for direct-only cache type")
}
} else if cacheTypeVal == CacheTypeSemantic {
shouldStoreHash = false
plugin.logger.Debug(PluginLoggerPrefix + " Skipping hash operations for semantic cache type")
}
}
}
if shouldStoreHash {
// Get the hash from context
hash, ok = ctx.Value(requestHashKey).(string)
if !ok {
plugin.logger.Warn(PluginLoggerPrefix + " Hash is not a string. Continuing without caching")
return res, nil, nil
}
}
extraFields := res.GetExtraFields()
requestType := extraFields.RequestType
// Get embedding from context if available and needed
// For embedding/transcription requests, we still need to retrieve the zero vector placeholder
// if the vector store requires vectors for all entries
isEmbeddingOrTranscription := requestType == schemas.EmbeddingRequest || requestType == schemas.TranscriptionRequest
needsEmbedding := shouldStoreEmbeddings && !isEmbeddingOrTranscription
needsZeroVector := isEmbeddingOrTranscription && plugin.store.RequiresVectors()
if needsEmbedding || needsZeroVector {
embeddingValue := ctx.Value(requestEmbeddingKey)
if embeddingValue != nil {
embedding, ok = embeddingValue.([]float32)
if !ok {
plugin.logger.Warn(PluginLoggerPrefix + " Embedding is not a []float32, continuing without caching")
return res, nil, nil
}
}
// Note: embedding can be nil for direct cache hits or when semantic search is disabled
// This is fine - we can still cache using direct hash matching (unless store requires vectors)
}
// Get the provider from context
provider, ok := ctx.Value(requestProviderKey).(schemas.ModelProvider)
if !ok {
plugin.logger.Warn(PluginLoggerPrefix + " Provider is not a schemas.ModelProvider, continuing without caching")
return res, nil, nil
}
// Get the model from context
model, ok := ctx.Value(requestModelKey).(string)
if !ok {
plugin.logger.Warn(PluginLoggerPrefix + " Model is not a string, continuing without caching")
return res, nil, nil
}
isFinalChunk := bifrost.IsFinalChunk(ctx)
// Get the input tokens from context (can be nil if not set)
inputTokens, ok := ctx.Value(requestEmbeddingTokensKey).(int)
if ok {
isStreamRequest := bifrost.IsStreamRequestType(requestType)
if !isStreamRequest || (isStreamRequest && isFinalChunk) {
if extraFields.CacheDebug == nil {
extraFields.CacheDebug = &schemas.BifrostCacheDebug{}
}
extraFields.CacheDebug.CacheHit = false
extraFields.CacheDebug.ProviderUsed = bifrost.Ptr(string(plugin.config.Provider))
extraFields.CacheDebug.ModelUsed = bifrost.Ptr(plugin.config.EmbeddingModel)
extraFields.CacheDebug.InputTokens = &inputTokens
}
}
cacheTTL := plugin.config.TTL
ttlValue := ctx.Value(CacheTTLKey)
if ttlValue != nil {
// Get the request TTL from the context
ttl, ok := ttlValue.(time.Duration)
if !ok {
plugin.logger.Warn(PluginLoggerPrefix + " TTL is not a time.Duration, using default TTL")
} else {
cacheTTL = ttl
}
}
// Get metadata from context BEFORE goroutine to avoid race conditions
// when the same context is reused across multiple requests
paramsHash, _ := ctx.Value(requestParamsHashKey).(string)
// Cache everything in a unified VectorEntry asynchronously to avoid blocking the response
plugin.waitGroup.Add(1)
go func() {
defer plugin.waitGroup.Done()
// Create a background context with timeout for the cache operation
cacheCtx, cancel := context.WithTimeout(context.Background(), CacheSetTimeout)
defer cancel()
// Build unified metadata with provider, model, and all params
unifiedMetadata := plugin.buildUnifiedMetadata(provider, model, paramsHash, hash, cacheKey, cacheTTL)
// Handle streaming vs non-streaming responses
// Pass nil for embedding if we're in direct-only mode to optimize storage
embeddingToStore := embedding
if !shouldStoreEmbeddings {
embeddingToStore = nil
}
if bifrost.IsStreamRequestType(requestType) {
if err := plugin.addStreamingResponse(cacheCtx, requestID, storageID, res, bifrostErr, embeddingToStore, unifiedMetadata, cacheTTL, isFinalChunk); err != nil {
plugin.logger.Warn("%s Failed to cache streaming response: %v", PluginLoggerPrefix, err)
}
} else {
if err := plugin.addSingleResponse(cacheCtx, storageID, res, embeddingToStore, unifiedMetadata, cacheTTL); err != nil {
plugin.logger.Warn("%s Failed to cache single response: %v", PluginLoggerPrefix, err)
}
}
}()
return res, nil, nil
}
// WaitForPendingOperations blocks until all pending cache operations (goroutines) complete.
// This is useful in tests to ensure cache entries are stored before checking for cache hits.
func (plugin *Plugin) WaitForPendingOperations() {
plugin.waitGroup.Wait()
}
// Cleanup performs cleanup operations for the semantic cache plugin.
// It removes all cached entries created by this plugin from the VectorStore only if CleanUpOnShutdown is true.
// Identifies cache entries by the presence of semantic cache-specific fields (request_hash, cache_key).
//
// The function performs the following operations:
// 1. Checks if cleanup is enabled via CleanUpOnShutdown config
// 2. Retrieves all entries and filters client-side to identify cache entries
// 3. Deletes all matching cache entries from the VectorStore in batches
//
// This method should be called when shutting down the application to ensure
// proper resource cleanup if configured to do so.
//
// Returns:
// - error: Any error that occurred during cleanup operations
func (plugin *Plugin) Cleanup() error {
plugin.waitGroup.Wait()
// Clean up old stream accumulators first
plugin.cleanupOldStreamAccumulators()
// Shutdown the internal Bifrost client used for embeddings
if plugin.client != nil {
plugin.client.Shutdown()
}
// Only clean up cache entries if configured to do so
if !plugin.config.CleanUpOnShutdown {
plugin.logger.Debug(PluginLoggerPrefix + " Cleanup on shutdown is disabled, skipping cache cleanup")
return nil
}
// Clean up all cache entries created by this plugin
ctx, cancel := context.WithTimeout(context.Background(), CacheSetTimeout)
defer cancel()
plugin.logger.Debug(PluginLoggerPrefix + " Starting cleanup of cache entries...")
// Delete all cache entries created by this plugin
queries := []vectorstore.Query{
{
Field: "from_bifrost_semantic_cache_plugin",
Operator: vectorstore.QueryOperatorEqual,
Value: true,
},
}
results, err := plugin.store.DeleteAll(ctx, plugin.config.VectorStoreNamespace, queries)
if err != nil {
return fmt.Errorf("failed to delete cache entries: %w", err)
}
for _, result := range results {
if result.Status == vectorstore.DeleteStatusError {
plugin.logger.Warn("%s Failed to delete cache entry: %s", PluginLoggerPrefix, result.Error)
}
}
plugin.logger.Info("%s Cleanup completed - deleted all cache entries", PluginLoggerPrefix)
if err := plugin.store.DeleteNamespace(ctx, plugin.config.VectorStoreNamespace); err != nil {
return fmt.Errorf("failed to delete namespace: %w", err)
}
return nil
}
// Public Methods for External Use
// ClearCacheForKey deletes cache entries for a specific cache key.
// Uses the unified VectorStore interface for deletion of all entries with the given cache key.
//
// Parameters:
// - cacheKey: The specific cache key to delete
//
// Returns:
// - error: Any error that occurred during cache key deletion
func (plugin *Plugin) ClearCacheForKey(cacheKey string) error {
// Delete all entries with "cache_key" equal to the given cacheKey
queries := []vectorstore.Query{
{
Field: "cache_key",
Operator: vectorstore.QueryOperatorEqual,
Value: cacheKey,
},
{
Field: "from_bifrost_semantic_cache_plugin",
Operator: vectorstore.QueryOperatorEqual,
Value: true,
},
}
ctx, cancel := context.WithTimeout(context.Background(), CacheSetTimeout)
defer cancel()
results, err := plugin.store.DeleteAll(ctx, plugin.config.VectorStoreNamespace, queries)
if err != nil {
plugin.logger.Warn("%s Failed to delete cache entries for key '%s': %v", PluginLoggerPrefix, cacheKey, err)
return err
}
for _, result := range results {
if result.Status == vectorstore.DeleteStatusError {
plugin.logger.Warn("%s Failed to delete cache entry for key %s: %s", PluginLoggerPrefix, result.ID, result.Error)
}
}
plugin.logger.Debug(fmt.Sprintf("%s Deleted all cache entries for key %s", PluginLoggerPrefix, cacheKey))
return nil
}
// ClearCacheForRequestID deletes cache entries for a specific request ID.
// Uses the unified VectorStore interface to delete the single entry by its UUID.
//
// Parameters:
// - requestID: The UUID-based request ID to delete cache entries for
//
// Returns:
// - error: Any error that occurred during cache key deletion
func (plugin *Plugin) ClearCacheForRequestID(requestID string) error {
// With the unified VectorStore interface, we delete the single entry by its UUID
ctx, cancel := context.WithTimeout(context.Background(), CacheSetTimeout)
defer cancel()
if err := plugin.store.Delete(ctx, plugin.config.VectorStoreNamespace, requestID); err != nil {
plugin.logger.Warn("%s Failed to delete cache entry: %v", PluginLoggerPrefix, err)
return err
}
plugin.logger.Debug(fmt.Sprintf("%s Deleted cache entry for key %s", PluginLoggerPrefix, requestID))
return nil
}

View File

@@ -0,0 +1,924 @@
package semanticcache
import (
"context"
"errors"
"sync"
"testing"
"time"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
"github.com/maximhq/bifrost/framework/vectorstore"
)
// TestCacheTypeDirectOnly tests that CacheTypeKey set to "direct" only performs direct hash matching
func TestCacheTypeDirectOnly(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// First, cache a response using CacheTypeDirect so it is stored under the deterministic ID
ctx1 := CreateContextWithCacheKeyAndType("test-cache-type-direct", CacheTypeDirect)
testRequest := CreateBasicChatRequest("What is Bifrost?", 0.7, 50)
t.Log("Making first request to populate cache...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Now test with CacheTypeKey set to direct only
ctx2 := CreateContextWithCacheKeyAndType("test-cache-type-direct", CacheTypeDirect)
t.Log("Making second request with CacheTypeKey=direct...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
}
// Should be a cache hit from direct search
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
t.Log("✅ CacheTypeKey=direct correctly performs only direct hash matching")
}
// TestCacheTypeSemanticOnly tests that CacheTypeKey set to "semantic" only performs semantic search
func TestCacheTypeSemanticOnly(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// First, cache a response using normal behavior
ctx1 := CreateContextWithCacheKey("test-cache-type-semantic")
testRequest := CreateBasicChatRequest("Explain machine learning concepts", 0.7, 50)
t.Log("Making first request to populate cache...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Test with slightly different wording that should match semantically but not directly
similarRequest := CreateBasicChatRequest("Can you explain concepts in machine learning", 0.7, 50)
// Try with semantic-only search
ctx2 := CreateContextWithCacheKeyAndType("test-cache-type-semantic", CacheTypeSemantic)
t.Log("Making second request with similar content and CacheTypeKey=semantic...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, similarRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// This might be a cache hit if semantic similarity is high enough
// The test validates that semantic search is attempted
if response2.ExtraFields.CacheDebug != nil && response2.ExtraFields.CacheDebug.CacheHit {
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "semantic")
t.Log("✅ CacheTypeKey=semantic correctly found semantic match")
} else {
t.Log(" No semantic match found (threshold may be too high for these similar phrases)")
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2})
}
t.Log("✅ CacheTypeKey=semantic correctly performs only semantic search")
}
// TestCacheTypeDirectWithSemanticFallback tests the default behavior (both direct and semantic)
func TestCacheTypeDirectWithSemanticFallback(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Cache a response first
ctx1 := CreateContextWithCacheKey("test-cache-type-fallback")
testRequest := CreateBasicChatRequest("Define artificial intelligence", 0.7, 50)
t.Log("Making first request to populate cache...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Test exact match (should hit direct cache)
ctx2 := CreateContextWithCacheKey("test-cache-type-fallback")
t.Log("Making second identical request (should hit direct cache)...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
// Test similar request (should potentially hit semantic cache)
similarRequest := CreateBasicChatRequest("What is artificial intelligence", 0.7, 50)
t.Log("Making third similar request (should attempt semantic match)...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx2, similarRequest)
if err3 != nil {
t.Fatalf("Third request failed: %v", err3)
}
// May or may not be a cache hit depending on semantic similarity
if response3.ExtraFields.CacheDebug != nil && response3.ExtraFields.CacheDebug.CacheHit {
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "semantic")
t.Log("✅ Default behavior correctly found semantic match")
} else {
t.Log(" No semantic match found (normal for different wording)")
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3})
}
t.Log("✅ Default behavior correctly attempts both direct and semantic search")
}
// TestCacheTypeInvalidValue tests behavior with invalid CacheTypeKey values
func TestCacheTypeInvalidValue(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Create context with invalid cache type
ctx := CreateContextWithCacheKey("test-invalid-cache-type")
ctx = ctx.WithValue(CacheTypeKey, "invalid_type")
testRequest := CreateBasicChatRequest("Test invalid cache type", 0.7, 50)
t.Log("Making request with invalid CacheTypeKey value...")
response, err := setup.Client.ChatCompletionRequest(ctx, testRequest)
if err != nil {
return // Test will be skipped by retry function
}
// Should fall back to default behavior (both direct and semantic)
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response})
t.Log("✅ Invalid CacheTypeKey value falls back to default behavior")
}
// TestCacheTypeWithEmbeddingRequests tests CacheTypeKey behavior with embedding requests
func TestCacheTypeWithEmbeddingRequests(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
embeddingRequest := CreateEmbeddingRequest([]string{"Test embedding with cache type"})
// Cache first request
ctx1 := CreateContextWithCacheKey("test-embedding-cache-type")
t.Log("Making first embedding request...")
response1, err1 := setup.Client.EmbeddingRequest(ctx1, embeddingRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response1})
WaitForCache(setup.Plugin)
// Test with direct-only cache type
ctx2 := CreateContextWithCacheKeyAndType("test-embedding-cache-type", CacheTypeDirect)
t.Log("Making second embedding request with CacheTypeKey=direct...")
response2, err2 := setup.Client.EmbeddingRequest(ctx2, embeddingRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2}, "direct")
// Test with semantic-only cache type (should not find semantic match for embeddings)
ctx3 := CreateContextWithCacheKeyAndType("test-embedding-cache-type", CacheTypeSemantic)
t.Log("Making third embedding request with CacheTypeKey=semantic...")
response3, err3 := setup.Client.EmbeddingRequest(ctx3, embeddingRequest)
if err3 != nil {
t.Fatalf("Third request failed: %v", err3)
}
// Semantic search should be skipped for embedding requests
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response3})
t.Log("✅ CacheTypeKey works correctly with embedding requests")
}
// TestCacheTypePerformanceCharacteristics tests that different cache types have expected performance
func TestCacheTypePerformanceCharacteristics(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("Performance test for cache types", 0.7, 50)
// Cache first request using CacheTypeDirect so it is stored under the deterministic ID
ctx1 := CreateContextWithCacheKeyAndType("test-cache-performance", CacheTypeDirect)
t.Log("Making first request to populate cache...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Test direct-only performance
ctx2 := CreateContextWithCacheKeyAndType("test-cache-performance", CacheTypeDirect)
start2 := time.Now()
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
duration2 := time.Since(start2)
if err2 != nil {
t.Fatalf("Direct cache request failed: %v", err2)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
t.Logf("Direct cache lookup took: %v", duration2)
// Test default behavior (both direct and semantic) performance
ctx3 := CreateContextWithCacheKey("test-cache-performance")
start3 := time.Now()
response3, err3 := setup.Client.ChatCompletionRequest(ctx3, testRequest)
duration3 := time.Since(start3)
if err3 != nil {
t.Fatalf("Default cache request failed: %v", err3)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "direct")
t.Logf("Default cache lookup took: %v", duration3)
// Both should be fast since they hit direct cache
// Direct-only might be slightly faster as it doesn't need to prepare for semantic fallback
t.Log("✅ Cache type performance characteristics validated")
}
type directFastPathStore struct {
chunks map[string]vectorstore.SearchResult
addIDs []string
getChunkCalls int
getAllCalls int
lastGetChunkID string
lastGetAllCtx context.Context
getAllErr error
}
func newDirectFastPathStore() *directFastPathStore {
return &directFastPathStore{
chunks: make(map[string]vectorstore.SearchResult),
}
}
func (s *directFastPathStore) Ping(ctx context.Context) error { return nil }
func (s *directFastPathStore) CreateNamespace(ctx context.Context, namespace string, dimension int, properties map[string]vectorstore.VectorStoreProperties) error {
return nil
}
func (s *directFastPathStore) DeleteNamespace(ctx context.Context, namespace string) error {
return nil
}
func (s *directFastPathStore) GetChunk(ctx context.Context, namespace string, id string) (vectorstore.SearchResult, error) {
s.getChunkCalls++
s.lastGetChunkID = id
result, ok := s.chunks[id]
if !ok {
return vectorstore.SearchResult{}, vectorstore.ErrNotFound
}
return result, nil
}
func (s *directFastPathStore) GetChunks(ctx context.Context, namespace string, ids []string) ([]vectorstore.SearchResult, error) {
return nil, vectorstore.ErrNotSupported
}
func (s *directFastPathStore) GetAll(ctx context.Context, namespace string, queries []vectorstore.Query, selectFields []string, cursor *string, limit int64) ([]vectorstore.SearchResult, *string, error) {
s.getAllCalls++
s.lastGetAllCtx = ctx
if s.getAllErr != nil {
return nil, nil, s.getAllErr
}
return nil, nil, vectorstore.ErrNotSupported
}
func (s *directFastPathStore) GetNearest(ctx context.Context, namespace string, vector []float32, queries []vectorstore.Query, selectFields []string, threshold float64, limit int64) ([]vectorstore.SearchResult, error) {
return nil, vectorstore.ErrNotSupported
}
func (s *directFastPathStore) RequiresVectors() bool { return false }
func (s *directFastPathStore) Add(ctx context.Context, namespace string, id string, embedding []float32, metadata map[string]interface{}) error {
s.addIDs = append(s.addIDs, id)
s.chunks[id] = vectorstore.SearchResult{
ID: id,
Properties: metadata,
}
return nil
}
func (s *directFastPathStore) Delete(ctx context.Context, namespace string, id string) error {
return nil
}
func (s *directFastPathStore) DeleteAll(ctx context.Context, namespace string, queries []vectorstore.Query) ([]vectorstore.DeleteResult, error) {
return nil, vectorstore.ErrNotSupported
}
func (s *directFastPathStore) Close(ctx context.Context, namespace string) error { return nil }
func newCrossProviderChatRequest(provider schemas.ModelProvider, model string, requestType schemas.RequestType, prompt string) *schemas.BifrostRequest {
return &schemas.BifrostRequest{
RequestType: requestType,
ChatRequest: &schemas.BifrostChatRequest{
Provider: provider,
Model: model,
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr(prompt),
},
},
},
},
}
}
func TestDirectCacheHitPreservesCachedProviderMetadataAcrossProviders(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
store := newDirectFastPathStore()
config := getDefaultTestConfig()
config.CacheByProvider = bifrost.Ptr(false)
config.CacheByModel = bifrost.Ptr(false)
config.ConversationHistoryThreshold = DefaultConversationHistoryThreshold
plugin := &Plugin{
store: store,
config: config,
logger: logger,
}
const cacheKey = "cross-provider-direct-single"
const prompt = "Explain green threading in Go in one short sentence."
seedCtx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect)
seedReq := newCrossProviderChatRequest(schemas.OpenAI, "gpt-5.2", schemas.ChatCompletionRequest, prompt)
_, shortCircuit, err := plugin.PreLLMHook(seedCtx, seedReq)
if err != nil {
t.Fatalf("seed PreLLMHook failed: %v", err)
}
if shortCircuit != nil {
t.Fatal("expected seed request to miss cache")
}
seedResponse := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ID: "cross-provider-direct-single",
Choices: []schemas.BifrostResponseChoice{
{
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Go schedules lightweight goroutines in user space onto a smaller pool of OS threads."),
},
},
},
},
},
ExtraFields: schemas.BifrostResponseExtraFields{
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-5.2",
ResolvedModelUsed: "gpt-5.2",
RequestType: schemas.ChatCompletionRequest,
},
},
}
if _, _, err = plugin.PostLLMHook(seedCtx, seedResponse, nil); err != nil {
t.Fatalf("seed PostLLMHook failed: %v", err)
}
plugin.WaitForPendingOperations()
hitCtx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect)
hitReq := newCrossProviderChatRequest(schemas.Anthropic, "claude-sonnet-4-6", schemas.ChatCompletionRequest, prompt)
_, shortCircuit, err = plugin.PreLLMHook(hitCtx, hitReq)
if err != nil {
t.Fatalf("hit PreLLMHook failed: %v", err)
}
if shortCircuit == nil || shortCircuit.Response == nil || shortCircuit.Response.ChatResponse == nil {
t.Fatal("expected cross-provider direct cache hit to return a response")
}
extraFields := shortCircuit.Response.ChatResponse.ExtraFields
if extraFields.Provider != schemas.OpenAI {
t.Fatalf("expected cached provider %q, got %q", schemas.OpenAI, extraFields.Provider)
}
if extraFields.OriginalModelRequested != "gpt-5.2" {
t.Fatalf("expected OriginalModelRequested %q, got %q", "gpt-5.2", extraFields.OriginalModelRequested)
}
if extraFields.ResolvedModelUsed != "gpt-5.2" {
t.Fatalf("expected ResolvedModelUsed %q, got %q", "gpt-5.2", extraFields.ResolvedModelUsed)
}
if extraFields.CacheDebug == nil {
t.Fatal("expected cache_debug on cache hit")
}
if !extraFields.CacheDebug.CacheHit {
t.Fatal("expected cache hit to be marked in cache_debug")
}
if extraFields.CacheDebug.HitType == nil || *extraFields.CacheDebug.HitType != string(CacheTypeDirect) {
t.Fatalf("expected hit_type %q, got %v", CacheTypeDirect, extraFields.CacheDebug.HitType)
}
if extraFields.CacheDebug.RequestedProvider == nil || *extraFields.CacheDebug.RequestedProvider != string(schemas.Anthropic) {
t.Fatalf("expected requested_provider %q, got %v", schemas.Anthropic, extraFields.CacheDebug.RequestedProvider)
}
if extraFields.CacheDebug.RequestedModel == nil || *extraFields.CacheDebug.RequestedModel != "claude-sonnet-4-6" {
t.Fatalf("expected requested_model %q, got %v", "claude-sonnet-4-6", extraFields.CacheDebug.RequestedModel)
}
}
func TestStreamingDirectCacheHitPreservesCachedProviderMetadataAcrossProviders(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
store := newDirectFastPathStore()
config := getDefaultTestConfig()
config.CacheByProvider = bifrost.Ptr(false)
config.CacheByModel = bifrost.Ptr(false)
config.ConversationHistoryThreshold = DefaultConversationHistoryThreshold
plugin := &Plugin{
store: store,
config: config,
logger: logger,
}
const cacheKey = "cross-provider-direct-stream"
const prompt = "Explain green threading in Go in one short sentence."
seedCtx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect)
seedReq := newCrossProviderChatRequest(schemas.OpenAI, "gpt-5.2", schemas.ChatCompletionStreamRequest, prompt)
_, shortCircuit, err := plugin.PreLLMHook(seedCtx, seedReq)
if err != nil {
t.Fatalf("seed PreLLMHook failed: %v", err)
}
if shortCircuit != nil {
t.Fatal("expected seed request to miss cache")
}
chunks := []struct {
content string
chunkIndex int
finishReason *string
streamEnd bool
}{
{content: "Go schedules lightweight goroutines", chunkIndex: 0, finishReason: nil, streamEnd: false},
{content: " onto a smaller pool of OS threads.", chunkIndex: 1, finishReason: bifrost.Ptr("stop"), streamEnd: true},
}
for _, chunk := range chunks {
seedCtx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, chunk.streamEnd)
chunkResponse := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ID: "cross-provider-direct-stream",
Choices: []schemas.BifrostResponseChoice{
{
Index: chunk.chunkIndex,
FinishReason: chunk.finishReason,
ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{
Delta: &schemas.ChatStreamResponseChoiceDelta{
Content: bifrost.Ptr(chunk.content),
},
},
},
},
ExtraFields: schemas.BifrostResponseExtraFields{
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-5.2",
ResolvedModelUsed: "gpt-5.2",
RequestType: schemas.ChatCompletionStreamRequest,
ChunkIndex: chunk.chunkIndex,
},
},
}
if _, _, err = plugin.PostLLMHook(seedCtx, chunkResponse, nil); err != nil {
t.Fatalf("seed PostLLMHook failed for chunk %d: %v", chunk.chunkIndex, err)
}
plugin.WaitForPendingOperations()
}
hitCtx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect)
hitReq := newCrossProviderChatRequest(schemas.Anthropic, "claude-sonnet-4-6", schemas.ChatCompletionStreamRequest, prompt)
_, shortCircuit, err = plugin.PreLLMHook(hitCtx, hitReq)
if err != nil {
t.Fatalf("hit PreLLMHook failed: %v", err)
}
if shortCircuit == nil || shortCircuit.Stream == nil {
t.Fatal("expected cross-provider streaming direct cache hit to return a stream")
}
chunkCount := 0
for chunk := range shortCircuit.Stream {
if chunk.BifrostChatResponse == nil {
t.Fatal("expected cached chat stream chunk")
}
extraFields := chunk.BifrostChatResponse.ExtraFields
if extraFields.Provider != schemas.OpenAI {
t.Fatalf("expected cached provider %q on chunk %d, got %q", schemas.OpenAI, chunkCount, extraFields.Provider)
}
if extraFields.OriginalModelRequested != "gpt-5.2" {
t.Fatalf("expected OriginalModelRequested %q on chunk %d, got %q", "gpt-5.2", chunkCount, extraFields.OriginalModelRequested)
}
if extraFields.ResolvedModelUsed != "gpt-5.2" {
t.Fatalf("expected ResolvedModelUsed %q on chunk %d, got %q", "gpt-5.2", chunkCount, extraFields.ResolvedModelUsed)
}
if chunkCount == len(chunks)-1 {
if extraFields.CacheDebug == nil || !extraFields.CacheDebug.CacheHit {
t.Fatal("expected final cached stream chunk to include cache_debug cache_hit=true")
}
if extraFields.CacheDebug.HitType == nil || *extraFields.CacheDebug.HitType != string(CacheTypeDirect) {
t.Fatalf("expected final stream hit_type %q, got %v", CacheTypeDirect, extraFields.CacheDebug.HitType)
}
if extraFields.CacheDebug.RequestedProvider == nil || *extraFields.CacheDebug.RequestedProvider != string(schemas.Anthropic) {
t.Fatalf("expected final stream requested_provider %q, got %v", schemas.Anthropic, extraFields.CacheDebug.RequestedProvider)
}
if extraFields.CacheDebug.RequestedModel == nil || *extraFields.CacheDebug.RequestedModel != "claude-sonnet-4-6" {
t.Fatalf("expected final stream requested_model %q, got %v", "claude-sonnet-4-6", extraFields.CacheDebug.RequestedModel)
}
}
chunkCount++
}
if chunkCount != len(chunks) {
t.Fatalf("expected %d cached stream chunks, got %d", len(chunks), chunkCount)
}
}
func TestCacheTypeDirectUsesChunkLookup(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
store := newDirectFastPathStore()
plugin := &Plugin{
store: store,
config: getDefaultTestConfig(),
logger: logger,
}
req := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50),
}
ctx := CreateContextWithCacheKeyAndType("chunk-fast-path", CacheTypeDirect)
directID, err := plugin.prepareDirectCacheLookup(ctx, req, "chunk-fast-path")
if err != nil {
t.Fatalf("prepareDirectCacheLookup failed: %v", err)
}
cachedContent := "cached response"
cachedResponse := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
Choices: []schemas.BifrostResponseChoice{
{
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: &cachedContent,
},
},
},
},
},
},
}
responseJSON, err := schemas.MarshalDeeplySorted(cachedResponse)
if err != nil {
t.Fatalf("failed to marshal cached response: %v", err)
}
store.chunks[directID] = vectorstore.SearchResult{
ID: directID,
Properties: map[string]interface{}{
"response": string(responseJSON),
"expires_at": time.Now().Add(time.Minute).Unix(),
},
}
shortCircuit, err := plugin.performDirectChunkLookup(ctx, req, "chunk-fast-path")
if err != nil {
t.Fatalf("performDirectChunkLookup failed: %v", err)
}
if shortCircuit == nil || shortCircuit.Response == nil || shortCircuit.Response.ChatResponse == nil {
t.Fatal("expected direct chunk lookup to return cached response")
}
if store.getChunkCalls != 1 {
t.Fatalf("expected one GetChunk call, got %d", store.getChunkCalls)
}
if store.getAllCalls != 0 {
t.Fatalf("expected no GetAll calls, got %d", store.getAllCalls)
}
if store.lastGetChunkID != directID {
t.Fatalf("expected GetChunk to use %q, got %q", directID, store.lastGetChunkID)
}
}
func TestDefaultDirectSearchSetsStorageIDForDeterministicWrites(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
store := newDirectFastPathStore()
plugin := &Plugin{
store: store,
config: getDefaultTestConfig(),
logger: logger,
}
req := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50),
}
ctx := CreateContextWithCacheKey("default-mode")
_, err := plugin.performDirectSearch(ctx, req, "default-mode")
if err != nil && !errors.Is(err, vectorstore.ErrNotSupported) {
t.Fatalf("performDirectSearch failed: %v", err)
}
storageID, _ := ctx.Value(requestStorageIDKey).(string)
if storageID == "" {
t.Fatal("expected default direct search to set requestStorageIDKey")
}
if store.getChunkCalls != 1 {
t.Fatalf("expected one GetChunk call, got %d", store.getChunkCalls)
}
}
func TestPreLLMHookClearsStaleStorageIDOnReusedContext(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
store := newDirectFastPathStore()
config := getDefaultTestConfig()
config.ConversationHistoryThreshold = 3
plugin := &Plugin{
store: store,
config: config,
logger: logger,
}
req := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50),
}
ctx := CreateContextWithCacheKey("reused-context")
ctx.SetValue(requestStorageIDKey, "stale-storage-id")
if _, _, err := plugin.PreLLMHook(ctx, req); err != nil {
t.Fatalf("PreLLMHook failed: %v", err)
}
storageID, _ := ctx.Value(requestStorageIDKey).(string)
if storageID == "" {
t.Fatal("expected PreLLMHook to replace stale requestStorageIDKey with a deterministic id")
}
if storageID == "stale-storage-id" {
t.Fatal("expected PreLLMHook to clear stale requestStorageIDKey before setting a deterministic id")
}
}
func TestCacheTypeDirectStoresDeterministicID(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
store := newDirectFastPathStore()
config := getDefaultTestConfig()
plugin := &Plugin{
store: store,
config: config,
logger: logger,
}
req := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50),
}
ctx := CreateContextWithCacheKeyAndType("deterministic-store", CacheTypeDirect)
ctx.SetValue(requestIDKey, "request-uuid")
ctx.SetValue(requestProviderKey, schemas.OpenAI)
ctx.SetValue(requestModelKey, req.ChatRequest.Model)
directID, err := plugin.prepareDirectCacheLookup(ctx, req, "deterministic-store")
if err != nil {
t.Fatalf("prepareDirectCacheLookup failed: %v", err)
}
ctx.SetValue(requestStorageIDKey, directID)
content := "stored response"
response := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
Choices: []schemas.BifrostResponseChoice{
{
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: &content,
},
},
},
},
},
},
}
response.ChatResponse.ExtraFields.RequestType = schemas.ChatCompletionRequest
if _, _, err := plugin.PostLLMHook(ctx, response, nil); err != nil {
t.Fatalf("PostLLMHook failed: %v", err)
}
plugin.WaitForPendingOperations()
if len(store.addIDs) != 1 {
t.Fatalf("expected one store.Add call, got %d", len(store.addIDs))
}
if store.addIDs[0] != directID {
t.Fatalf("expected deterministic storage id %q, got %q", directID, store.addIDs[0])
}
if store.addIDs[0] == "request-uuid" {
t.Fatal("expected storage id to differ from request UUID")
}
}
func TestPostLLMHookUsesDeterministicStorageIDOutsideDirectMode(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
store := newDirectFastPathStore()
plugin := &Plugin{
store: store,
config: getDefaultTestConfig(),
logger: logger,
}
content := "stored response"
response := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
Choices: []schemas.BifrostResponseChoice{
{
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: &content,
},
},
},
},
},
},
}
response.ChatResponse.ExtraFields.RequestType = schemas.ChatCompletionRequest
ctx := CreateContextWithCacheKey("default-mode-store")
ctx.SetValue(requestIDKey, "request-uuid")
ctx.SetValue(requestProviderKey, schemas.OpenAI)
ctx.SetValue(requestModelKey, "openai/gpt-4o-mini")
ctx.SetValue(requestHashKey, "request-hash")
ctx.SetValue(requestParamsHashKey, "params-hash")
directID := plugin.generateDirectCacheID(schemas.OpenAI, "openai/gpt-4o-mini", "default-mode-store", "request-hash", "params-hash")
ctx.SetValue(requestStorageIDKey, directID)
if _, _, err := plugin.PostLLMHook(ctx, response, nil); err != nil {
t.Fatalf("PostLLMHook failed: %v", err)
}
plugin.WaitForPendingOperations()
if len(store.addIDs) != 1 {
t.Fatalf("expected one store.Add call, got %d", len(store.addIDs))
}
if store.addIDs[0] != directID {
t.Fatalf("expected PostLLMHook to use deterministic storage id outside direct mode, got %q", store.addIDs[0])
}
}
func TestPerformDirectSearchDisablesScanFallbackForLegacyLookup(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
store := newDirectFastPathStore()
plugin := &Plugin{
store: store,
config: getDefaultTestConfig(),
logger: logger,
}
req := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50),
}
ctx := CreateContextWithCacheKey("legacy-no-scan")
_, err := plugin.performDirectSearch(ctx, req, "legacy-no-scan")
if err != nil && !errors.Is(err, vectorstore.ErrNotSupported) {
t.Fatalf("performDirectSearch failed: %v", err)
}
if store.getAllCalls != 1 {
t.Fatalf("expected one legacy GetAll call, got %d", store.getAllCalls)
}
if !vectorstore.IsScanFallbackDisabled(store.lastGetAllCtx) {
t.Fatal("expected legacy direct lookup to disable scan fallback")
}
}
func TestPerformLegacyDirectSearchTreatsQuerySyntaxErrorAsMiss(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
store := newDirectFastPathStore()
store.getAllErr = vectorstore.ErrQuerySyntax
plugin := &Plugin{
store: store,
config: getDefaultTestConfig(),
logger: logger,
}
req := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50),
}
ctx := CreateContextWithCacheKey("legacy-query-syntax")
_, err := plugin.prepareDirectCacheLookup(ctx, req, "legacy-query-syntax")
if err != nil {
t.Fatalf("prepareDirectCacheLookup failed: %v", err)
}
shortCircuit, err := plugin.performLegacyDirectSearch(ctx, req, "legacy-query-syntax")
if err != nil {
t.Fatalf("performLegacyDirectSearch failed: %v", err)
}
if shortCircuit != nil {
t.Fatal("expected query syntax incompatibility to be treated as a miss")
}
if store.getAllCalls != 1 {
t.Fatalf("expected one legacy GetAll call, got %d", store.getAllCalls)
}
}
func TestGetOrCreateStreamAccumulatorUsesSingleAccumulatorPerRequest(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
plugin := &Plugin{
logger: logger,
}
requestID := "stream-request"
storageID := "stream-storage"
embedding := []float32{1, 2, 3}
metadata := map[string]interface{}{"cache_key": "stream-cache"}
ttl := time.Minute
const workers = 8
results := make(chan *StreamAccumulator, workers)
var wg sync.WaitGroup
wg.Add(workers)
for range workers {
go func() {
defer wg.Done()
results <- plugin.getOrCreateStreamAccumulator(requestID, storageID, embedding, metadata, ttl)
}()
}
wg.Wait()
close(results)
var first *StreamAccumulator
for accumulator := range results {
if accumulator == nil {
t.Fatal("expected accumulator")
}
if first == nil {
first = accumulator
continue
}
if accumulator != first {
t.Fatal("expected all callers to receive the same accumulator instance")
}
}
stored, ok := plugin.streamAccumulators.Load(requestID)
if !ok {
t.Fatal("expected accumulator to be stored")
}
if stored.(*StreamAccumulator) != first {
t.Fatal("expected stored accumulator to match returned accumulator")
}
if first.StorageID != storageID {
t.Fatalf("expected storage id %q, got %q", storageID, first.StorageID)
}
if first.TTL != ttl {
t.Fatalf("expected ttl %v, got %v", ttl, first.TTL)
}
}

View File

@@ -0,0 +1,454 @@
package semanticcache
import (
"strconv"
"testing"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
)
// TestConversationHistoryThresholdBasic tests basic conversation history threshold functionality
func TestConversationHistoryThresholdBasic(t *testing.T) {
// Test with threshold of 2 messages
setup := CreateTestSetupWithConversationThreshold(t, 2)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-conversation-threshold-basic")
// Test 1: Conversation with exactly 2 messages (should cache)
conversation1 := BuildConversationHistory("",
[]string{"Hello", "Hi there!"},
)
request1 := CreateConversationRequest(conversation1, 0.7, 50)
t.Log("Testing conversation with exactly 2 messages (at threshold)...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx, request1)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Fresh request
WaitForCache(setup.Plugin)
// Verify it was cached
response2, err2 := setup.Client.ChatCompletionRequest(ctx, request1)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should be cached
// Test 2: Conversation with 3 messages (exceeds threshold, should NOT cache)
conversation2 := BuildConversationHistory("",
[]string{"Hello", "Hi there!"},
[]string{"How are you?", "I'm doing well!"},
)
messages2 := AddUserMessage(conversation2, "What's the weather?")
request2 := CreateConversationRequest(messages2, 0.7, 50) // 5 messages total > 2
t.Log("Testing conversation with 5 messages (exceeds threshold)...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx, request2)
if err3 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) // Should not cache
WaitForCache(setup.Plugin)
// Verify it was NOT cached
t.Log("Verifying conversation exceeding threshold was not cached...")
response4, err4 := setup.Client.ChatCompletionRequest(ctx, request2)
if err4 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}) // Should still be fresh (not cached)
t.Log("✅ Conversation history threshold works correctly")
}
// TestConversationHistoryThresholdWithSystemPrompt tests threshold with system messages
func TestConversationHistoryThresholdWithSystemPrompt(t *testing.T) {
// Test with threshold of 3, ExcludeSystemPrompt = false
setup := CreateTestSetupWithConversationThreshold(t, 3)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-threshold-system-prompt")
// System prompt + 2 user/assistant pairs = 5 messages total > 3
conversation := BuildConversationHistory(
"You are a helpful assistant", // System message (counts toward threshold)
[]string{"Hello", "Hi there!"},
[]string{"How are you?", "I'm doing well!"},
)
request := CreateConversationRequest(conversation, 0.7, 50)
t.Log("Testing conversation with system prompt (5 total messages > 3 threshold)...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx, request)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Should not cache (exceeds threshold)
WaitForCache(setup.Plugin)
// Verify not cached
response2, err2 := setup.Client.ChatCompletionRequest(ctx, request)
if err2 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Should not be cached
t.Log("✅ Conversation threshold correctly counts system messages")
}
// TestConversationHistoryThresholdWithExcludeSystemPrompt tests interaction between threshold and exclude system prompt
func TestConversationHistoryThresholdWithExcludeSystemPrompt(t *testing.T) {
// Create setup with both threshold=3 and ExcludeSystemPrompt=true
setup := CreateTestSetupWithThresholdAndExcludeSystem(t, 3, true)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-threshold-exclude-system")
// Create conversation with exactly 3 non-system messages to test threshold boundary
// System + 1.5 user/assistant pairs = 4 messages total
// With ExcludeSystemPrompt=true, should only count 3 non-system messages for threshold
conversation := BuildConversationHistory(
"You are helpful", // System (excluded from count)
[]string{"Hello", "Hi"}, // User + Assistant = 2 messages
[]string{"Thanks", ""}, // User only = 1 message (no assistant response)
)
// No slicing needed; BuildConversationHistory skips empty assistant entries.
request := CreateConversationRequest(conversation, 0.7, 50) // 3 non-system messages exactly
t.Log("Testing threshold with ExcludeSystemPrompt=true (3 non-system messages = at threshold)...")
// Test logic:
// - Total messages: 4 (1 system + 3 others)
// - With ExcludeSystemPrompt=true: counts as 3 non-system messages
// - Threshold is 3, so 3 <= 3 should allow caching
response1, err1 := setup.Client.ChatCompletionRequest(ctx, request)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Fresh request, should not hit cache
WaitForCache(setup.Plugin)
// Second request should hit cache (3 non-system messages <= 3 threshold)
response2, err2 := setup.Client.ChatCompletionRequest(ctx, request)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should cache since 3 <= 3 after excluding system
t.Log("✅ Conversation threshold respects ExcludeSystemPrompt setting")
}
// TestConversationHistoryThresholdDifferentValues tests different threshold values
func TestConversationHistoryThresholdDifferentValues(t *testing.T) {
testCases := []struct {
name string
threshold int
messages int
shouldCache bool
}{
{"Threshold 1, 1 message", 1, 1, true},
{"Threshold 1, 2 messages", 1, 2, false},
{"Threshold 5, 4 messages", 5, 4, true},
{"Threshold 5, 6 messages", 5, 6, false},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
setup := CreateTestSetupWithConversationThreshold(t, tc.threshold)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-threshold-" + tc.name)
// Build conversation with specified number of messages
var conversation []schemas.ChatMessage
for i := 0; i < tc.messages; i++ {
role := schemas.ChatMessageRoleUser
if i%2 == 1 {
role = schemas.ChatMessageRoleAssistant
}
message := schemas.ChatMessage{
Role: role,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Message " + strconv.Itoa(i+1)),
},
}
conversation = append(conversation, message)
}
request := CreateConversationRequest(conversation, 0.7, 50)
response1, err1 := setup.Client.ChatCompletionRequest(ctx, request)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Always fresh first time
WaitForCache(setup.Plugin)
response2, err2 := setup.Client.ChatCompletionRequest(ctx, request)
if err2 != nil {
return // Test will be skipped by retry function
}
if tc.shouldCache {
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
} else {
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2})
}
})
}
t.Log("✅ Different conversation threshold values work correctly")
}
// TestExcludeSystemPromptBasic tests basic ExcludeSystemPrompt functionality
func TestExcludeSystemPromptBasic(t *testing.T) {
// Test with ExcludeSystemPrompt = true
setup := CreateTestSetupWithExcludeSystemPrompt(t, true)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-exclude-system-basic")
// Create two conversations with different system prompts but same user/assistant messages
conversation1 := BuildConversationHistory(
"You are a helpful assistant",
[]string{"What is AI?", "AI is artificial intelligence."},
)
conversation2 := BuildConversationHistory(
"You are a technical expert", // Different system prompt
[]string{"What is AI?", "AI is artificial intelligence."}, // Same user/assistant
)
request1 := CreateConversationRequest(conversation1, 0.7, 50)
request2 := CreateConversationRequest(conversation2, 0.7, 50)
t.Log("Caching conversation with system prompt 1...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx, request1)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
t.Log("Testing conversation with different system prompt (should hit cache due to ExcludeSystemPrompt=true)...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx, request2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// Should hit cache because system prompts are excluded from cache key
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
t.Log("✅ ExcludeSystemPrompt=true correctly ignores system prompts in cache keys")
}
// TestExcludeSystemPromptComparison tests ExcludeSystemPrompt true vs false
func TestExcludeSystemPromptComparison(t *testing.T) {
// Test 1: ExcludeSystemPrompt = false (default)
setup1 := CreateTestSetupWithExcludeSystemPrompt(t, false)
defer setup1.Cleanup()
ctx1 := CreateContextWithCacheKey("test-exclude-system-false")
conversation1 := BuildConversationHistory(
"You are helpful",
[]string{"Hello", "Hi there!"},
)
conversation2 := BuildConversationHistory(
"You are an expert", // Different system prompt
[]string{"Hello", "Hi there!"}, // Same user/assistant
)
request1 := CreateConversationRequest(conversation1, 0.7, 50)
request2 := CreateConversationRequest(conversation2, 0.7, 50)
t.Log("Testing ExcludeSystemPrompt=false...")
response1, err1 := setup1.Client.ChatCompletionRequest(ctx1, request1)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup1.Plugin)
response2, err2 := setup1.Client.ChatCompletionRequest(ctx1, request2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// Should NOT hit direct cache, but might hit semantic cache due to similar content
if response2.ExtraFields.CacheDebug != nil && response2.ExtraFields.CacheDebug.CacheHit {
if response2.ExtraFields.CacheDebug.HitType != nil && *response2.ExtraFields.CacheDebug.HitType == "semantic" {
t.Log("✅ Found semantic cache match (expected with similar content)")
} else {
t.Error("❌ Unexpected direct cache hit with different system prompts")
}
} else {
t.Log("✅ No cache hit (system prompts create different cache keys)")
}
// Test 2: ExcludeSystemPrompt = true
setup2 := CreateTestSetupWithExcludeSystemPrompt(t, true)
defer setup2.Cleanup()
ctx2 := CreateContextWithCacheKey("test-exclude-system-true")
t.Log("Testing ExcludeSystemPrompt=true...")
response3, err3 := setup2.Client.ChatCompletionRequest(ctx2, request1)
if err3 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3})
WaitForCache(setup2.Plugin)
response4, err4 := setup2.Client.ChatCompletionRequest(ctx2, request2)
if err4 != nil {
t.Fatalf("Fourth request failed: %v", err4)
}
// Should hit cache because system prompts are excluded from cache key
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}, "direct")
t.Log("✅ ExcludeSystemPrompt true vs false comparison works correctly")
}
// TestExcludeSystemPromptWithMultipleSystemMessages tests behavior with multiple system messages
func TestExcludeSystemPromptWithMultipleSystemMessages(t *testing.T) {
setup := CreateTestSetupWithExcludeSystemPrompt(t, true)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-multiple-system-messages")
// Manually create conversation with multiple system messages
conversation1 := []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleSystem,
Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("You are helpful")},
},
{
Role: schemas.ChatMessageRoleSystem,
Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("Be concise")},
},
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("Hello")},
},
{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("Hi!")},
},
}
conversation2 := []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleSystem,
Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("You are an expert")},
},
{
Role: schemas.ChatMessageRoleSystem,
Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("Be detailed")},
},
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("Hello")},
},
{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("Hi!")},
},
}
request1 := CreateConversationRequest(conversation1, 0.7, 50)
request2 := CreateConversationRequest(conversation2, 0.7, 50)
t.Log("Caching conversation with multiple system messages...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx, request1)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
t.Log("Testing conversation with different multiple system messages...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx, request2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// Should hit cache because all system messages are excluded
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
t.Log("✅ ExcludeSystemPrompt works with multiple system messages")
}
// TestExcludeSystemPromptWithNoSystemMessages tests behavior when there are no system messages
func TestExcludeSystemPromptWithNoSystemMessages(t *testing.T) {
setup := CreateTestSetupWithExcludeSystemPrompt(t, true)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-no-system-messages")
// Conversation with no system messages
conversation := []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("Hello")},
},
{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("Hi there!")},
},
}
request := CreateConversationRequest(conversation, 0.7, 50)
t.Log("Testing conversation with no system messages...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx, request)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Should cache normally
response2, err2 := setup.Client.ChatCompletionRequest(ctx, request)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
t.Log("✅ ExcludeSystemPrompt works correctly when no system messages present")
}

View File

@@ -0,0 +1,601 @@
package semanticcache
import (
"context"
"strings"
"testing"
"time"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
"github.com/maximhq/bifrost/framework/vectorstore"
)
// TestSemanticCacheBasicFunctionality tests the core caching functionality
func TestSemanticCacheBasicFunctionality(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-basic-value")
// Create test request
testRequest := CreateBasicChatRequest(
"What is Bifrost? Answer in one short sentence.",
0.7,
50,
)
t.Log("Making first request (should go to OpenAI and be cached)...")
// Make first request (will go to OpenAI and be cached) - with retries
start1 := time.Now()
response1, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest)
duration1 := time.Since(start1)
if err1 != nil {
return // Test will be skipped by retry function
}
if response1 == nil || len(response1.Choices) == 0 || response1.Choices[0].Message.Content.ContentStr == nil {
t.Fatal("First response is invalid")
}
t.Logf("First request completed in %v", duration1)
t.Logf("Response: %s", *response1.Choices[0].Message.Content.ContentStr)
// Wait for cache to be written
WaitForCache(setup.Plugin)
t.Log("Making second identical request (should be served from cache)...")
// Make second identical request (should be cached)
start2 := time.Now()
response2, err2 := setup.Client.ChatCompletionRequest(ctx, testRequest)
duration2 := time.Since(start2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
if response2 == nil || len(response2.Choices) == 0 || response2.Choices[0].Message.Content.ContentStr == nil {
t.Fatal("Second response is invalid")
}
t.Logf("Second request completed in %v", duration2)
t.Logf("Response: %s", *response2.Choices[0].Message.Content.ContentStr)
// Verify cache hit
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, string(CacheTypeDirect))
// Performance comparison
t.Logf("Performance Summary:")
t.Logf("First request (OpenAI): %v", duration1)
t.Logf("Second request (Cache): %v", duration2)
if duration2 >= duration1 {
t.Errorf("Cache request took longer than original request: cache=%v, original=%v", duration2, duration1)
} else {
speedup := float64(duration1) / float64(duration2)
t.Logf("Cache speedup: %.2fx faster", speedup)
// Assert that cache is at least 1.5x faster (reasonable expectation)
if speedup < 1.5 {
t.Errorf("Cache speedup is less than 1.5x: got %.2fx", speedup)
}
}
// Verify responses are identical (content should be the same)
content1 := *response1.Choices[0].Message.Content.ContentStr
content2 := *response2.Choices[0].Message.Content.ContentStr
if content1 != content2 {
t.Errorf("Response content differs between cached and original:\nOriginal: %s\nCached: %s", content1, content2)
}
// Verify provider information is maintained in cached response
if response2.ExtraFields.Provider != testRequest.Provider {
t.Errorf("Provider mismatch in cached response: expected %s, got %s",
testRequest.Provider, response2.ExtraFields.Provider)
}
t.Log("✅ Basic semantic caching test completed successfully!")
}
// TestSemanticSearch tests the semantic similarity search functionality
func TestSemanticSearch(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Lower threshold for more flexible matching
setup.Config.Threshold = 0.5
ctx := CreateContextWithCacheKey("semantic-test-value")
// First request - this will be cached
firstRequest := CreateBasicChatRequest(
"What is machine learning? Explain briefly.",
0.0, // Use 0 temperature for consistent results
50,
)
t.Log("Making first request (should go to OpenAI and be cached)...")
start1 := time.Now()
response1, err1 := setup.Client.ChatCompletionRequest(ctx, firstRequest)
duration1 := time.Since(start1)
if err1 != nil {
return // Test will be skipped by retry function
}
if response1 == nil || len(response1.Choices) == 0 || response1.Choices[0].Message.Content.ContentStr == nil {
t.Fatal("First response is invalid")
}
t.Logf("First request completed in %v", duration1)
t.Logf("Response: %s", *response1.Choices[0].Message.Content.ContentStr)
// Wait for cache to be written (async PostLLMHook needs time to complete)
WaitForCache(setup.Plugin)
// Second request - very similar text to test semantic matching
secondRequest := CreateBasicChatRequest(
"What is machine learning? Explain it briefly.",
0.0, // Use 0 temperature for consistent results
50,
)
t.Log("Making semantically similar request (should be served from semantic cache)...")
start2 := time.Now()
response2, err2 := setup.Client.ChatCompletionRequest(ctx, secondRequest)
duration2 := time.Since(start2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
if response2 == nil || len(response2.Choices) == 0 || response2.Choices[0].Message.Content.ContentStr == nil {
t.Fatal("Second response is invalid")
}
t.Logf("Second request completed in %v", duration2)
t.Logf("Response: %s", *response2.Choices[0].Message.Content.ContentStr)
// Check if second request was served from semantic cache
semanticMatch := false
if response2.ExtraFields.CacheDebug != nil && response2.ExtraFields.CacheDebug.CacheHit {
if response2.ExtraFields.CacheDebug.HitType != nil && *response2.ExtraFields.CacheDebug.HitType == string(CacheTypeSemantic) {
semanticMatch = true
threshold := 0.0
similarity := 0.0
if response2.ExtraFields.CacheDebug.Threshold != nil {
threshold = *response2.ExtraFields.CacheDebug.Threshold
}
if response2.ExtraFields.CacheDebug.Similarity != nil {
similarity = *response2.ExtraFields.CacheDebug.Similarity
}
t.Logf("✅ Second request was served from semantic cache! Cache threshold: %f, Cache similarity: %f", threshold, similarity)
}
}
if !semanticMatch {
t.Error("Semantic match expected but not found")
return
}
// Performance comparison
t.Logf("Semantic Cache Performance:")
t.Logf("First request (OpenAI): %v", duration1)
t.Logf("Second request (Semantic): %v", duration2)
if duration2 < duration1 {
speedup := float64(duration1) / float64(duration2)
t.Logf("Semantic cache speedup: %.2fx faster", speedup)
}
t.Log("✅ Semantic search test completed successfully!")
}
func TestToFloat32Embedding(t *testing.T) {
input := []float64{0.12345678901234568, -0.875, 1.5}
got := toFloat32Embedding(input)
if len(got) != len(input) {
t.Fatalf("expected %d elements, got %d", len(input), len(got))
}
for i, want := range input {
if got[i] != float32(want) {
t.Fatalf("expected element %d to be %v, got %v", i, float32(want), got[i])
}
}
}
func TestFlattenToFloat32Embedding(t *testing.T) {
input := [][]float64{
{0.25, 0.5},
{-0.75},
{},
{1.25, 2.5},
}
got := flattenToFloat32Embedding(input)
want := []float32{0.25, 0.5, -0.75, 1.25, 2.5}
if len(got) != len(want) {
t.Fatalf("expected %d elements, got %d", len(want), len(got))
}
for i := range want {
if got[i] != want[i] {
t.Fatalf("expected element %d to be %v, got %v", i, want[i], got[i])
}
}
}
// TestDirectVsSemanticSearch tests the difference between direct hash matching and semantic search
func TestDirectVsSemanticSearch(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Lower threshold for more flexible semantic matching
setup.Config.Threshold = 0.2
ctx := CreateContextWithCacheKey("direct-vs-semantic-test")
// Test Case 1: Exact same request (should use direct hash matching)
t.Log("=== Test Case 1: Exact Same Request (Direct Hash Match) ===")
exactRequest := CreateBasicChatRequest(
"What is artificial intelligence?",
0.1,
100,
)
t.Log("Making first request...")
_, err1 := setup.Client.ChatCompletionRequest(ctx, exactRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
WaitForCache(setup.Plugin)
t.Log("Making exact same request (should hit direct cache)...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx, exactRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// Should be a direct cache hit
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, string(CacheTypeDirect))
// Test Case 2: Similar but different request (should use semantic search)
t.Log("\n=== Test Case 2: Semantically Similar Request ===")
semanticRequest := CreateBasicChatRequest(
"Can you explain what AI is?", // Similar but different wording
0.1, // Same parameters
100,
)
t.Log("Making semantically similar request...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx, semanticRequest)
if err3 != nil {
t.Fatalf("Third request failed: %v", err3)
}
semanticMatch := false
// Check if it was served from cache and what type
if response3.ExtraFields.CacheDebug != nil && response3.ExtraFields.CacheDebug.CacheHit {
if response3.ExtraFields.CacheDebug.HitType != nil && *response3.ExtraFields.CacheDebug.HitType == string(CacheTypeSemantic) {
semanticMatch = true
threshold := 0.0
similarity := 0.0
if response3.ExtraFields.CacheDebug.Threshold != nil {
threshold = *response3.ExtraFields.CacheDebug.Threshold
}
if response3.ExtraFields.CacheDebug.Similarity != nil {
similarity = *response3.ExtraFields.CacheDebug.Similarity
}
t.Logf("✅ Third request was served from semantic cache! Cache threshold: %f, Cache similarity: %f", threshold, similarity)
}
}
if !semanticMatch {
t.Error("Semantic match expected but not found")
return
}
t.Log("✅ Direct vs semantic search test completed!")
}
// TestNoCacheScenarios tests scenarios where caching should NOT occur
func TestNoCacheScenarios(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("no-cache-test")
// Test Case 1: Different parameters should NOT cache hit
t.Log("=== Test Case 1: Different Parameters ===")
basePrompt := "What is the capital of France?"
// First request
request1 := CreateBasicChatRequest(basePrompt, 0.1, 50)
_, err1 := setup.Client.ChatCompletionRequest(ctx, request1)
if err1 != nil {
return // Test will be skipped by retry function
}
WaitForCache(setup.Plugin)
// Second request with different temperature
request2 := CreateBasicChatRequest(basePrompt, 0.9, 50) // Different temperature
response2, err2 := setup.Client.ChatCompletionRequest(ctx, request2)
if err2 != nil {
return // Test will be skipped by retry function
}
// Should NOT be cached
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2})
// Test Case 2: Different max_tokens should NOT cache hit
t.Log("\n=== Test Case 2: Different MaxTokens ===")
request3 := CreateBasicChatRequest(basePrompt, 0.1, 200) // Different max_tokens
response3, err3 := setup.Client.ChatCompletionRequest(ctx, request3)
if err3 != nil {
return // Test will be skipped by retry function
}
// Should NOT be cached
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3})
t.Log("✅ No cache scenarios test completed!")
}
// TestCacheConfiguration tests different cache configuration options
func TestCacheConfiguration(t *testing.T) {
tests := []struct {
name string
config *Config
expectedBehavior string
}{
{
name: "High Threshold",
config: &Config{
Provider: schemas.OpenAI,
EmbeddingModel: "text-embedding-3-small",
Dimension: 1536,
Threshold: 0.95, // Very high threshold
Keys: []schemas.Key{
{Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"), Models: schemas.WhiteList{"*"}, Weight: 1.0},
},
},
expectedBehavior: "strict_matching",
},
{
name: "Low Threshold",
config: &Config{
Provider: schemas.OpenAI,
EmbeddingModel: "text-embedding-3-small",
Dimension: 1536,
Threshold: 0.1, // Very low threshold
Keys: []schemas.Key{
{Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"), Models: schemas.WhiteList{"*"}, Weight: 1.0},
},
},
expectedBehavior: "loose_matching",
},
{
name: "Custom TTL",
config: &Config{
Provider: schemas.OpenAI,
EmbeddingModel: "text-embedding-3-small",
Dimension: 1536,
Threshold: 0.8,
TTL: 1 * time.Hour, // Custom TTL
Keys: []schemas.Key{
{Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"), Models: schemas.WhiteList{"*"}, Weight: 1.0},
},
},
expectedBehavior: "custom_ttl",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
setup := NewTestSetupWithConfig(t, tt.config)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("config-test-" + tt.name)
// Basic functionality test with the configuration
testRequest := CreateBasicChatRequest("Test configuration: "+tt.name, 0.5, 50)
_, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
WaitForCache(setup.Plugin)
_, err2 := setup.Client.ChatCompletionRequest(ctx, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
t.Logf("✅ Configuration test '%s' completed", tt.name)
})
}
}
// MockUnsupportedStore is a mock store that returns ErrNotSupported for semantic operations
type MockUnsupportedStore struct{}
func (m *MockUnsupportedStore) Ping(ctx context.Context) error {
return nil
}
func (m *MockUnsupportedStore) CreateNamespace(ctx context.Context, namespace string, dimension int, properties map[string]vectorstore.VectorStoreProperties) error {
return vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) DeleteNamespace(ctx context.Context, namespace string) error {
return nil
}
func (m *MockUnsupportedStore) GetChunk(ctx context.Context, namespace string, id string) (vectorstore.SearchResult, error) {
return vectorstore.SearchResult{}, vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) GetChunks(ctx context.Context, namespace string, ids []string) ([]vectorstore.SearchResult, error) {
return nil, vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) GetAll(ctx context.Context, namespace string, queries []vectorstore.Query, selectFields []string, cursor *string, limit int64) ([]vectorstore.SearchResult, *string, error) {
return nil, nil, vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) GetNearest(ctx context.Context, namespace string, vector []float32, queries []vectorstore.Query, selectFields []string, threshold float64, limit int64) ([]vectorstore.SearchResult, error) {
return nil, vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) RequiresVectors() bool {
return false
}
func (m *MockUnsupportedStore) Add(ctx context.Context, namespace string, id string, embedding []float32, metadata map[string]interface{}) error {
return vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) Delete(ctx context.Context, namespace string, id string) error {
return vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) DeleteAll(ctx context.Context, namespace string, queries []vectorstore.Query) ([]vectorstore.DeleteResult, error) {
return nil, vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) SearchSemanticCache(ctx context.Context, queryEmbedding []float32, metadata map[string]interface{}, threshold float64, limit int64) ([]vectorstore.SearchResult, error) {
return nil, vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) AddSemanticCache(ctx context.Context, key string, embedding []float32, metadata map[string]interface{}, ttl time.Duration) error {
return vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) EnsureSemanticIndex(ctx context.Context, keyPrefix string, embeddingDim int, metadataFields []string) error {
return vectorstore.ErrNotSupported
}
func (m *MockUnsupportedStore) Close(ctx context.Context, namespace string) error {
return nil
}
// TestInvalidProviderRejection tests that providers without embedding support are rejected during initialization
func TestInvalidProviderRejection(t *testing.T) {
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
// Create a mock vector store for testing
mockStore := &MockUnsupportedStore{}
// Test each provider that doesn't support embeddings
unsupportedProviders := []schemas.ModelProvider{
schemas.Anthropic,
schemas.Cerebras,
schemas.Groq,
schemas.OpenRouter,
schemas.Parasail,
schemas.Perplexity,
schemas.Replicate,
schemas.XAI,
schemas.Elevenlabs,
}
for _, provider := range unsupportedProviders {
t.Run(string(provider), func(t *testing.T) {
config := &Config{
Provider: provider,
EmbeddingModel: "some-model",
Dimension: 1536,
Threshold: 0.8,
CleanUpOnShutdown: false,
Keys: []schemas.Key{
{
Value: *schemas.NewEnvVar("env.TEST_API_KEY"),
Models: schemas.WhiteList{"*"},
Weight: 1.0,
},
},
}
_, err := Init(ctx, config, logger, mockStore)
if err == nil {
t.Errorf("Expected error for provider '%s' but got none", provider)
}
expectedErrSubstring := "does not support embedding operations"
if err != nil && !strings.Contains(err.Error(), expectedErrSubstring) {
t.Errorf("Expected error message to contain '%s', but got: %v", expectedErrSubstring, err)
}
})
}
}
// TestValidProviderAccepted tests that providers with embedding support are accepted during initialization
func TestValidProviderAccepted(t *testing.T) {
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
// Create a mock vector store for testing
mockStore := &MockUnsupportedStore{}
// Test a supported provider (OpenAI)
config := &Config{
Provider: schemas.OpenAI,
EmbeddingModel: "text-embedding-3-small",
Dimension: 1536,
Threshold: 0.8,
CleanUpOnShutdown: false,
Keys: []schemas.Key{
{
Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"),
Models: schemas.WhiteList{"*"},
Weight: 1.0,
},
},
}
// Should fail due to namespace creation, not provider validation
_, err := Init(ctx, config, logger, mockStore)
if err != nil && strings.Contains(err.Error(), "does not support embedding operations") {
t.Errorf("Valid provider OpenAI should not be rejected for embedding support, but got: %v", err)
}
}

View File

@@ -0,0 +1,327 @@
package semanticcache
import (
"testing"
"github.com/maximhq/bifrost/core/schemas"
)
// TestCrossCacheTypeAccessibility tests that entries cached one way are accessible another way
func TestCrossCacheTypeAccessibility(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("What is artificial intelligence?", 0.7, 100)
// Test 1: Cache with default behavior (both direct + semantic)
ctx1 := CreateContextWithCacheKey("test-cross-cache-access")
t.Log("Caching with default behavior (both direct + semantic)...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Test 2: Retrieve with direct-only cache type
ctx2 := CreateContextWithCacheKeyAndType("test-cross-cache-access", CacheTypeDirect)
t.Log("Retrieving with CacheTypeKey=direct...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should find direct match
// Test 3: Retrieve with semantic-only cache type
ctx3 := CreateContextWithCacheKeyAndType("test-cross-cache-access", CacheTypeSemantic)
t.Log("Retrieving with CacheTypeKey=semantic...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx3, testRequest)
if err3 != nil {
t.Fatalf("Third request failed: %v", err3)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "semantic") // Should find semantic match
t.Log("✅ Entries cached with default behavior are accessible via both cache types")
}
// TestCacheTypeIsolation tests that entries cached separately by type behave correctly
func TestCacheTypeIsolation(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("Define blockchain technology", 0.7, 100)
// Clear cache to start fresh
clearTestKeysWithStore(t, setup.Store)
// Test 1: Cache with direct-only
ctx1 := CreateContextWithCacheKeyAndType("test-cache-isolation", CacheTypeDirect)
t.Log("Caching with CacheTypeKey=direct only...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Fresh request
WaitForCache(setup.Plugin)
// Test 2: Try to retrieve with semantic-only (should miss because no semantic entry)
ctx2 := CreateContextWithCacheKeyAndType("test-cache-isolation", CacheTypeSemantic)
t.Log("Retrieving same request with CacheTypeKey=semantic (should miss)...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Should miss - no semantic cache entry
WaitForCache(setup.Plugin)
// Test 3: Retrieve with direct-only (should hit)
t.Log("Retrieving with CacheTypeKey=direct (should hit)...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err3 != nil {
t.Fatalf("Third request failed: %v", err3)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "direct") // Should hit direct cache
// Test 4: Default behavior (should find the direct cache)
ctx4 := CreateContextWithCacheKey("test-cache-isolation")
t.Log("Retrieving with default behavior (should find direct cache)...")
response4, err4 := setup.Client.ChatCompletionRequest(ctx4, testRequest)
if err4 != nil {
t.Fatalf("Fourth request failed: %v", err4)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}, "direct") // Should find existing direct cache
t.Log("✅ Cache type isolation works correctly")
}
// TestCacheTypeFallbackBehavior tests whether cache types fallback to each other
func TestCacheTypeFallbackBehavior(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Cache an entry with default behavior
originalRequest := CreateBasicChatRequest("Explain machine learning", 0.7, 100)
ctx1 := CreateContextWithCacheKey("test-fallback-behavior")
t.Log("Caching with default behavior...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, originalRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Test similar request with direct-only (should miss direct, no fallback, but should cache response)
similarRequest := CreateBasicChatRequest("Explain machine learning concepts", 0.7, 100)
ctx2 := CreateContextWithCacheKeyAndType("test-fallback-behavior", CacheTypeDirect)
t.Log("Testing similar request with CacheTypeKey=direct (should miss, make request, cache without embeddings)...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, similarRequest)
if err2 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Should miss - no direct match, no semantic search
WaitForCache(setup.Plugin) // Let the response get cached
// Test same similar request with semantic-only (should hit original entry)
ctx3 := CreateContextWithCacheKeyAndType("test-fallback-behavior", CacheTypeSemantic)
t.Log("Testing similar request with CacheTypeKey=semantic (should find semantic match from step 1)...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx3, similarRequest)
if err3 != nil {
t.Fatalf("Third request failed: %v", err3)
}
// Should find semantic match from step 1's cached entry (which has embeddings)
if response3.ExtraFields.CacheDebug != nil && response3.ExtraFields.CacheDebug.CacheHit {
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "semantic")
t.Log("✅ Semantic search found similar entry from step 1")
} else {
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3})
t.Log(" No semantic match found (threshold may be too high or semantic similarity low)")
}
// Test a different similar request with default behavior (try both, fallback to semantic)
// Use a slightly different request to avoid hitting the cached response from step 2
differentSimilarRequest := CreateBasicChatRequest("Explain the basics of machine learning", 0.7, 100)
ctx4 := CreateContextWithCacheKey("test-fallback-behavior")
t.Log("Testing different similar request with default behavior (direct miss -> semantic fallback)...")
response4, err4 := setup.Client.ChatCompletionRequest(ctx4, differentSimilarRequest)
if err4 != nil {
t.Fatalf("Fourth request failed: %v", err4)
}
// Should try direct first (miss), then semantic (might hit)
if response4.ExtraFields.CacheDebug != nil && response4.ExtraFields.CacheDebug.CacheHit {
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}, "semantic")
t.Log("✅ Default behavior found semantic fallback")
} else {
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4})
t.Log(" No fallback match found")
}
t.Log("✅ Cache type fallback behavior verified")
}
// TestMultipleCacheEntriesPriority tests behavior when multiple cache entries exist
func TestMultipleCacheEntriesPriority(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("What is deep learning?", 0.7, 100)
// Create cache entry with default behavior first
ctx1 := CreateContextWithCacheKey("test-cache-priority")
t.Log("Creating cache entry with default behavior...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
originalContent := *response1.Choices[0].Message.Content.ContentStr
WaitForCache(setup.Plugin)
// Verify it hits cache with default behavior
t.Log("Verifying cache hit with default behavior...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should hit direct cache
cachedContent := *response2.Choices[0].Message.Content.ContentStr
// Verify content is the same
if originalContent != cachedContent {
t.Errorf("Cache content mismatch:\nOriginal: %s\nCached: %s", originalContent, cachedContent)
}
// Test with direct-only access
ctx2 := CreateContextWithCacheKeyAndType("test-cache-priority", CacheTypeDirect)
t.Log("Accessing with CacheTypeKey=direct...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err3 != nil {
t.Fatalf("Third request failed: %v", err3)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "direct") // Should find direct cache
// Test with semantic-only access
ctx3 := CreateContextWithCacheKeyAndType("test-cache-priority", CacheTypeSemantic)
t.Log("Accessing with CacheTypeKey=semantic...")
response4, err4 := setup.Client.ChatCompletionRequest(ctx3, testRequest)
if err4 != nil {
t.Fatalf("Fourth request failed: %v", err4)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}, "semantic") // Should find semantic cache
t.Log("✅ Multiple cache entries accessible correctly")
}
// TestCrossCacheTypeWithDifferentParameters tests cache type behavior with parameter variations
func TestCrossCacheTypeWithDifferentParameters(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
baseMessage := "Explain quantum computing"
// Cache with specific parameters
request1 := CreateBasicChatRequest(baseMessage, 0.7, 100)
ctx1 := CreateContextWithCacheKey("test-cross-cache-params")
t.Log("Caching with temp=0.7, max_tokens=100...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, request1)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Test same parameters with direct-only
ctx2 := CreateContextWithCacheKeyAndType("test-cross-cache-params", CacheTypeDirect)
t.Log("Retrieving same parameters with CacheTypeKey=direct...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, request1)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should hit
// Test different parameters - should miss
request3 := CreateBasicChatRequest(baseMessage, 0.5, 200) // Different temp and tokens
t.Log("Testing different parameters (should miss)...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx2, request3)
if err3 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) // Should miss due to different params
// Test semantic search with different parameters
ctx4 := CreateContextWithCacheKeyAndType("test-cross-cache-params", CacheTypeSemantic)
similarRequest := CreateBasicChatRequest("Can you explain quantum computing", 0.5, 200)
t.Log("Testing semantic search with different params and similar message...")
response4, err4 := setup.Client.ChatCompletionRequest(ctx4, similarRequest)
if err4 != nil {
return // Test will be skipped by retry function
}
// Should miss semantic search due to different parameters (params_hash different)
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4})
t.Log("✅ Cross-cache-type parameter handling works correctly")
}
// TestCacheTypeErrorHandling tests error scenarios with cache types
func TestCacheTypeErrorHandling(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("Test error handling", 0.7, 50)
// Test invalid cache type (should fallback to default)
ctx1 := CreateContextWithCacheKey("test-cache-error-handling")
ctx1 = ctx1.WithValue(CacheTypeKey, "invalid_cache_type")
t.Log("Testing invalid cache type (should fallback to default behavior)...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Should work with fallback behavior
WaitForCache(setup.Plugin)
// Test nil cache type (should use default)
ctx2 := CreateContextWithCacheKey("test-cache-error-handling")
ctx2 = ctx2.WithValue(CacheTypeKey, nil)
t.Log("Testing nil cache type (should use default behavior)...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should find cached entry from first request
t.Log("✅ Cache type error handling works correctly")
}

View File

@@ -0,0 +1,133 @@
package semanticcache
import (
"context"
"testing"
"github.com/maximhq/bifrost/core/schemas"
)
// TestDefaultCacheKey_CachesWithoutPerRequestKey verifies that when DefaultCacheKey
// is configured, requests without an explicit cache key are cached automatically.
func TestDefaultCacheKey_CachesWithoutPerRequestKey(t *testing.T) {
config := getDefaultTestConfig()
config.DefaultCacheKey = "test-default-key"
setup := NewTestSetupWithConfig(t, config)
defer setup.Cleanup()
// Context with NO per-request cache key
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
testRequest := CreateBasicChatRequest("What is Bifrost? Answer in one short sentence.", 0.7, 50)
t.Log("Making first request without per-request cache key (should use default and be cached)...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
if response1 == nil || len(response1.Choices) == 0 || response1.Choices[0].Message.Content.ContentStr == nil {
t.Fatal("First response is invalid")
}
// First request should NOT be a cache hit
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
t.Log("Making second identical request without per-request cache key (should hit cache)...")
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
}
t.Fatalf("Second request failed: %v", err2)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, string(CacheTypeDirect))
t.Log("Default cache key correctly enabled caching without per-request key")
}
// TestDefaultCacheKey_PerRequestKeyOverridesDefault verifies that an explicit
// per-request cache key takes precedence over the configured default.
func TestDefaultCacheKey_PerRequestKeyOverridesDefault(t *testing.T) {
config := getDefaultTestConfig()
config.DefaultCacheKey = "test-default-key"
setup := NewTestSetupWithConfig(t, config)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("What is the capital of France?", 0.5, 50)
// Cache with the default key (no per-request key)
ctx1 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
_, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
WaitForCache(setup.Plugin)
// Verify the cache was actually populated with the default key
ctxDefault2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
responseDefault2, errDefault2 := setup.Client.ChatCompletionRequest(ctxDefault2, testRequest)
if errDefault2 != nil {
if errDefault2.Error != nil {
t.Fatalf("Default-key verification request failed: %v", errDefault2.Error.Message)
}
t.Fatalf("Default-key verification request failed: %v", errDefault2)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: responseDefault2}, string(CacheTypeDirect))
// Same request but with a DIFFERENT per-request key — should miss
ctx2 := CreateContextWithCacheKey("override-key")
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
}
t.Fatalf("Second request failed: %v", err2)
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2})
t.Log("Per-request cache key correctly overrides default (different namespace = cache miss)")
}
// TestDefaultCacheKey_EmptyDefault_NoCaching verifies that when DefaultCacheKey
// is empty (default zero value), requests without a per-request key bypass caching.
func TestDefaultCacheKey_EmptyDefault_NoCaching(t *testing.T) {
config := getDefaultTestConfig()
// DefaultCacheKey is intentionally left empty (zero value)
setup := NewTestSetupWithConfig(t, config)
defer setup.Cleanup()
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
testRequest := CreateBasicChatRequest("What is deep learning", 0.7, 50)
t.Log("Making first request without any cache key and no default (should not cache)...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
t.Log("Making second identical request (should still not cache)...")
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
}
t.Fatalf("Second request failed: %v", err2)
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2})
t.Log("Empty default cache key correctly preserves opt-in behavior")
}

View File

@@ -0,0 +1,622 @@
package semanticcache
import (
"context"
"strings"
"testing"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
)
// TestParameterVariations tests that different parameters don't cache hit inappropriately
func TestParameterVariations(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
basePrompt := "What is the capital of France?"
tests := []struct {
name string
request1 *schemas.BifrostChatRequest
request2 *schemas.BifrostChatRequest
shouldCache bool
}{
{
name: "Same Parameters",
request1: CreateBasicChatRequest(basePrompt, 0.5, 50),
request2: CreateBasicChatRequest(basePrompt, 0.5, 50),
shouldCache: true,
},
{
name: "Different Temperature",
request1: CreateBasicChatRequest(basePrompt, 0.1, 50),
request2: CreateBasicChatRequest(basePrompt, 0.9, 50),
shouldCache: false,
},
{
name: "Different MaxTokens",
request1: CreateBasicChatRequest(basePrompt, 0.5, 50),
request2: CreateBasicChatRequest(basePrompt, 0.5, 200),
shouldCache: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a fresh context for each subtest to avoid context pollution
ctx := CreateContextWithCacheKey("param-variations-test")
// Clear cache for this subtest
clearTestKeysWithStore(t, setup.Store)
// Make first request
_, err1 := setup.Client.ChatCompletionRequest(ctx, tt.request1)
if err1 != nil {
return // Test will be skipped by retry function
}
WaitForCache(setup.Plugin)
// Make second request
response2, err2 := setup.Client.ChatCompletionRequest(ctx, tt.request2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// Check cache behavior
if tt.shouldCache {
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, string(CacheTypeDirect))
} else {
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2})
}
})
}
}
// TestToolVariations tests caching behavior with different tool configurations
func TestToolVariations(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("tool-variations-test")
// Base request without tools
baseRequest := &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("What's the weather like today?"),
},
},
},
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(100),
Temperature: bifrost.Ptr(0.5),
},
}
// Request with tools
requestWithTools := &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("What's the weather like today?"),
},
},
},
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(100),
Temperature: bifrost.Ptr(0.5),
Tools: []schemas.ChatTool{
{
Type: schemas.ChatToolTypeFunction,
Function: &schemas.ChatToolFunction{
Name: "get_weather",
Description: bifrost.Ptr("Get the current weather"),
Parameters: &schemas.ToolFunctionParameters{
Type: "object",
Properties: schemas.NewOrderedMapFromPairs(
schemas.KV("location", map[string]interface{}{
"type": "string",
"description": "The city and state",
}),
),
},
Strict: bifrost.Ptr(false),
},
},
},
},
}
// Request with different tools
requestWithDifferentTools := &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("What's the weather like today?"),
},
},
},
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(100),
Temperature: bifrost.Ptr(0.5),
Tools: []schemas.ChatTool{
{
Type: schemas.ChatToolTypeFunction,
Function: &schemas.ChatToolFunction{
Name: "get_current_weather",
Description: bifrost.Ptr("Get current weather information"),
Parameters: &schemas.ToolFunctionParameters{
Type: "object",
Properties: schemas.NewOrderedMapFromPairs(
schemas.KV("city", map[string]interface{}{ // Different parameter name
"type": "string",
"description": "The city name",
}),
),
},
Strict: bifrost.Ptr(false),
},
},
},
},
}
// Test 1: Request without tools
t.Log("Making request without tools...")
_, err1 := setup.Client.ChatCompletionRequest(ctx, baseRequest)
if err1 != nil {
t.Fatalf("Request without tools failed: %v", err1)
}
WaitForCache(setup.Plugin)
// Test 2: Request with tools (should NOT cache hit)
t.Log("Making request with tools...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx, requestWithTools)
if err2 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2})
WaitForCache(setup.Plugin)
// Test 3: Same request with tools (should cache hit)
t.Log("Making same request with tools again...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx, requestWithTools)
if err3 != nil {
t.Fatalf("Second request with tools failed: %v", err3)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "")
// Test 4: Request with different tools (should NOT cache hit)
t.Log("Making request with different tools...")
response4, err4 := setup.Client.ChatCompletionRequest(ctx, requestWithDifferentTools)
if err4 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4})
t.Log("✅ Tool variations test completed!")
}
// TestContentVariations tests caching behavior with different content types
func TestContentVariations(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
tests := []struct {
name string
request *schemas.BifrostChatRequest
}{
{
name: "Image URL Content",
request: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentBlocks: []schemas.ChatContentBlock{
{
Type: schemas.ChatContentBlockTypeText,
Text: bifrost.Ptr("Analyze this image"),
},
{
Type: schemas.ChatContentBlockTypeImage,
ImageURLStruct: &schemas.ChatInputImage{
URL: "https://pub-cdead89c2f004d8f963fd34010c479d0.r2.dev/Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
},
},
},
},
},
},
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(200),
Temperature: bifrost.Ptr(0.3),
},
},
},
{
name: "Multiple Images",
request: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentBlocks: []schemas.ChatContentBlock{
{
Type: schemas.ChatContentBlockTypeText,
Text: bifrost.Ptr("Compare these images"),
},
{
Type: schemas.ChatContentBlockTypeImage,
ImageURLStruct: &schemas.ChatInputImage{
URL: "https://pub-cdead89c2f004d8f963fd34010c479d0.r2.dev/Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
},
},
{
Type: schemas.ChatContentBlockTypeImage,
ImageURLStruct: &schemas.ChatInputImage{
URL: "https://upload.wikimedia.org/wikipedia/commons/b/b5/Scenery_.jpg",
},
},
},
},
},
},
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(200),
Temperature: bifrost.Ptr(0.3),
},
},
},
{
name: "Very Long Content",
request: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr(strings.Repeat("This is a very long prompt. ", 100)),
},
},
},
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(50),
Temperature: bifrost.Ptr(0.2),
},
},
},
{
name: "Multi-turn Conversation",
request: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("What is AI?"),
},
},
{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("AI stands for Artificial Intelligence..."),
},
},
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Can you give me examples?"),
},
},
},
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(150),
Temperature: bifrost.Ptr(0.5),
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Logf("Testing content variation: %s", tt.name)
// Create a fresh context for each subtest to avoid context pollution
ctx := CreateContextWithCacheKey("content-variations-test")
// Make first request
_, err1 := setup.Client.ChatCompletionRequest(ctx, tt.request)
if err1 != nil {
t.Logf("⚠️ First %s request failed: %v", tt.name, err1)
return // Skip this test case
}
WaitForCache(setup.Plugin)
// Make second identical request
response2, err2 := setup.Client.ChatCompletionRequest(ctx, tt.request)
if err2 != nil {
t.Fatalf("Second %s request failed: %v", tt.name, err2)
}
// Should be cached
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, string(CacheTypeDirect))
t.Logf("✅ %s content variation successful", tt.name)
})
}
}
// TestBoundaryParameterValues tests edge case parameter values
func TestBoundaryParameterValues(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
tests := []struct {
name string
request *schemas.BifrostChatRequest
}{
{
name: "Maximum Parameter Values",
request: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Test max parameters"),
},
},
},
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(4096),
PresencePenalty: bifrost.Ptr(2.0),
FrequencyPenalty: bifrost.Ptr(2.0),
Temperature: bifrost.Ptr(2.0),
TopP: bifrost.Ptr(1.0),
},
},
},
{
name: "Minimum Parameter Values",
request: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Test min parameters"),
},
},
},
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(1),
PresencePenalty: bifrost.Ptr(-2.0),
FrequencyPenalty: bifrost.Ptr(-2.0),
Temperature: bifrost.Ptr(0.0),
TopP: bifrost.Ptr(0.01),
},
},
},
{
name: "Edge Case Parameters",
request: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Test edge case parameters"),
},
},
},
Params: &schemas.ChatParameters{
MaxCompletionTokens: bifrost.Ptr(1),
User: bifrost.Ptr("test-user-id-12345"),
Temperature: bifrost.Ptr(0.0),
TopP: bifrost.Ptr(0.1),
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Logf("Testing boundary parameters: %s", tt.name)
// Create a fresh context for each subtest to avoid context pollution
ctx := CreateContextWithCacheKey("boundary-params-test")
_, err := setup.Client.ChatCompletionRequest(ctx, tt.request)
if err != nil {
t.Logf("⚠️ %s request failed (may be expected): %v", tt.name, err)
} else {
t.Logf("✅ %s handled gracefully", tt.name)
}
})
}
}
// TestSemanticSimilarityEdgeCases tests edge cases in semantic similarity matching
func TestSemanticSimilarityEdgeCases(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
setup.Config.Threshold = 0.9
// Test case: Similar questions with different wording
similarTests := []struct {
prompt1 string
prompt2 string
shouldMatch bool
description string
}{
{
prompt1: "What is machine learning?",
prompt2: "Can you explain machine learning?",
shouldMatch: true,
description: "Similar questions about ML",
},
{
prompt1: "How does AI work?",
prompt2: "Explain artificial intelligence",
shouldMatch: true,
description: "AI-related questions",
},
{
prompt1: "What is the weather today?",
prompt2: "What do you know about bifrost?",
shouldMatch: false,
description: "Completely different topics",
},
{
prompt1: "Hello, how are you?",
prompt2: "Hi, how are you doing?",
shouldMatch: true,
description: "Similar greetings",
},
}
for i, test := range similarTests {
t.Run(test.description, func(t *testing.T) {
// Create a fresh context for each subtest to avoid context pollution
ctx := CreateContextWithCacheKey("semantic-edge-test")
// Clear cache for this subtest
clearTestKeysWithStore(t, setup.Store)
// Make first request
request1 := CreateBasicChatRequest(test.prompt1, 0.1, 50)
_, err1 := setup.Client.ChatCompletionRequest(ctx, request1)
if err1 != nil {
return // Test will be skipped by retry function
}
// Wait for cache to be written
WaitForCache(setup.Plugin)
// Make second request with similar content
request2 := CreateBasicChatRequest(test.prompt2, 0.1, 50) // Same parameters
response2, err2 := setup.Client.ChatCompletionRequest(ctx, request2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
var cacheThresholdFloat float64
var cacheSimilarityFloat float64
// Check if semantic matching occurred
semanticMatch := false
if response2.ExtraFields.CacheDebug != nil && response2.ExtraFields.CacheDebug.CacheHit {
if response2.ExtraFields.CacheDebug.HitType != nil && *response2.ExtraFields.CacheDebug.HitType == string(CacheTypeSemantic) {
semanticMatch = true
if response2.ExtraFields.CacheDebug.Threshold != nil {
cacheThresholdFloat = *response2.ExtraFields.CacheDebug.Threshold
}
if response2.ExtraFields.CacheDebug.Similarity != nil {
cacheSimilarityFloat = *response2.ExtraFields.CacheDebug.Similarity
}
}
}
if test.shouldMatch {
if semanticMatch {
t.Logf("✅ Test %d: Semantic match found as expected for '%s'", i+1, test.description)
} else {
t.Logf(" Test %d: No semantic match found for '%s', check with threshold: %f and found similarity: %f", i+1, test.description, cacheThresholdFloat, cacheSimilarityFloat)
}
} else {
if semanticMatch {
t.Errorf("❌ Test %d: Unexpected semantic match for different topics: '%s', check with threshold: %f and found similarity: %f", i+1, test.description, cacheThresholdFloat, cacheSimilarityFloat)
} else {
t.Logf("✅ Test %d: Correctly no semantic match for different topics: '%s'", i+1, test.description)
}
}
})
}
}
// TestErrorHandlingEdgeCases tests various error scenarios
func TestErrorHandlingEdgeCases(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("Test error handling scenarios", 0.5, 50)
// Test without cache key (should not crash and bypass cache)
t.Run("Request without cache key", func(t *testing.T) {
ctxNoKey := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
response, err := setup.Client.ChatCompletionRequest(ctxNoKey, testRequest)
if err != nil {
t.Errorf("Request without cache key failed: %v", err)
return
}
// Should bypass cache since there's no cache key
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response})
t.Log("✅ Request without cache key correctly bypassed cache")
})
// Test with invalid cache key type
t.Run("Request with invalid cache key type", func(t *testing.T) {
// First establish a cached response with valid context
validCtx := CreateContextWithCacheKey("error-handling-test")
_, err := setup.Client.ChatCompletionRequest(validCtx, testRequest)
if err != nil {
t.Fatalf("First request with valid cache key failed: %v", err)
}
WaitForCache(setup.Plugin)
// Now test with invalid key type - should bypass cache
ctxInvalidKey := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline).WithValue(CacheKey, 12345)
response, err := setup.Client.ChatCompletionRequest(ctxInvalidKey, testRequest)
if err != nil {
t.Errorf("Request with invalid cache key type failed: %v", err)
return
}
// Should bypass cache due to invalid key type
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response})
t.Log("✅ Request with invalid cache key type correctly bypassed cache")
})
t.Log("✅ Error handling edge cases completed!")
}

View File

@@ -0,0 +1,174 @@
package semanticcache
import (
"testing"
"time"
"github.com/maximhq/bifrost/core/schemas"
)
// TestEmbeddingRequestsCaching tests that embedding requests are properly cached using direct hash matching
func TestEmbeddingRequestsCaching(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-embedding-cache")
// Create embedding request
embeddingRequest := CreateEmbeddingRequest([]string{
"What is machine learning?",
"Explain artificial intelligence in simple terms.",
})
t.Log("Making first embedding request (should go to OpenAI and be cached)...")
// Make first request (will go to OpenAI and be cached) - with retries
start1 := time.Now()
response1, err1 := setup.Client.EmbeddingRequest(ctx, embeddingRequest)
duration1 := time.Since(start1)
if err1 != nil {
return // Test will be skipped by retry function
}
if response1 == nil || len(response1.Data) == 0 {
t.Fatal("First embedding response is invalid")
}
t.Logf("First embedding request completed in %v", duration1)
t.Logf("Response contains %d embeddings", len(response1.Data))
// Wait for cache to be written
WaitForCache(setup.Plugin)
t.Log("Making second identical embedding request (should be served from cache)...")
// Make second identical request (should be cached)
start2 := time.Now()
response2, err2 := setup.Client.EmbeddingRequest(ctx, embeddingRequest)
duration2 := time.Since(start2)
if err2 != nil {
t.Fatalf("Second embedding request failed: %v", err2)
}
if response2 == nil || len(response2.Data) == 0 {
t.Fatal("Second embedding response is invalid")
}
// Verify cache hit
AssertCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2}, "direct")
t.Logf("Second embedding request completed in %v", duration2)
// Cache should be significantly faster
if duration2 >= duration1 { // Allow some margin but cache should be much faster
t.Log("⚠️ Cache doesn't seem faster, but this could be due to test environment")
}
// Responses should be identical
if len(response1.Data) != len(response2.Data) {
t.Errorf("Response lengths differ: %d vs %d", len(response1.Data), len(response2.Data))
}
t.Log("✅ Embedding requests properly cached using direct hash matching")
}
// TestEmbeddingRequestsNoCacheWithoutCacheKey tests that embedding requests without cache key are not cached
func TestEmbeddingRequestsNoCacheWithoutCacheKey(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Don't set cache key in context
ctx := CreateContextWithCacheKey("")
embeddingRequest := CreateEmbeddingRequest([]string{"Test embedding without cache key"})
t.Log("Making embedding request without cache key...")
response, err := setup.Client.EmbeddingRequest(ctx, embeddingRequest)
if err != nil {
t.Fatalf("Embedding request failed: %v", err)
}
// Should not be cached
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response})
t.Log("✅ Embedding requests without cache key are properly not cached")
}
// TestEmbeddingRequestsDifferentTexts tests that different embedding texts produce different cache entries
func TestEmbeddingRequestsDifferentTexts(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-embedding-different")
// Create two different embedding requests
request1 := CreateEmbeddingRequest([]string{"First set of texts"})
request2 := CreateEmbeddingRequest([]string{"Second set of texts"})
t.Log("Making first embedding request...")
response1, err1 := setup.Client.EmbeddingRequest(ctx, request1)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response1})
WaitForCache(setup.Plugin)
t.Log("Making second different embedding request...")
response2, err2 := setup.Client.EmbeddingRequest(ctx, request2)
if err2 != nil {
return // Test will be skipped by retry function
}
// Should not be a cache hit since texts are different
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2})
t.Log("✅ Different embedding texts produce different cache entries")
}
// TestEmbeddingRequestsCacheExpiration tests TTL functionality for embedding requests
func TestEmbeddingRequestsCacheExpiration(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Set very short TTL for testing
shortTTL := 5 * time.Second
ctx := CreateContextWithCacheKeyAndTTL("test-embedding-ttl", shortTTL)
embeddingRequest := CreateEmbeddingRequest([]string{"TTL test embedding"})
t.Log("Making first embedding request with short TTL...")
response1, err1 := setup.Client.EmbeddingRequest(ctx, embeddingRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response1})
WaitForCache(setup.Plugin)
t.Log("Making second request before TTL expiration...")
response2, err2 := setup.Client.EmbeddingRequest(ctx, embeddingRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2}, "direct")
t.Logf("Waiting for TTL expiration (%v)...", shortTTL)
time.Sleep(shortTTL + 1*time.Second) // Wait for TTL to expire
t.Log("Making third request after TTL expiration...")
response3, err3 := setup.Client.EmbeddingRequest(ctx, embeddingRequest)
if err3 != nil {
return // Test will be skipped by retry function
}
// Should not be a cache hit since TTL expired
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response3})
t.Log("✅ Embedding requests properly handle TTL expiration")
}

View File

@@ -0,0 +1,427 @@
package semanticcache
import (
"os"
"testing"
"time"
"github.com/maximhq/bifrost/core/schemas"
)
// TestImageGenerationCacheBasicFunctionality tests basic image generation caching
func TestImageGenerationCacheBasicFunctionality(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in -short mode")
}
if os.Getenv("OPENAI_API_KEY") == "" {
t.Skip("OPENAI_API_KEY not set")
}
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-image-gen-value")
// Create test image generation request
testRequest := CreateImageGenerationRequest(
"A serene Japanese garden with cherry blossoms in spring",
"1024x1024",
"low",
)
t.Log("Making first image generation request (should go to OpenAI and be cached)...")
// Make first request (will go to OpenAI and be cached)
start1 := time.Now()
response1, err1 := setup.Client.ImageGenerationRequest(ctx, testRequest)
duration1 := time.Since(start1)
if err1 != nil {
t.Skipf("First image generation request failed (may be rate limited): %v", err1)
return
}
if response1 == nil || len(response1.Data) == 0 {
t.Fatal("First response is invalid or has no image data")
}
t.Logf("First request completed in %v", duration1)
t.Logf("Response: ID=%s, Images=%d", response1.ID, len(response1.Data))
// Wait for cache to be written
WaitForCache(setup.Plugin)
t.Log("Making second identical request (should be served from cache)...")
// Make second identical request (should be cached)
start2 := time.Now()
response2, err2 := setup.Client.ImageGenerationRequest(ctx, testRequest)
duration2 := time.Since(start2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
if response2 == nil || len(response2.Data) == 0 {
t.Fatal("Second response is invalid or has no image data")
}
t.Logf("Second request completed in %v", duration2)
// Verify cache hit
AssertCacheHit(t, &schemas.BifrostResponse{ImageGenerationResponse: response2}, string(CacheTypeDirect))
// Performance comparison
t.Logf("Performance Summary:")
t.Logf("First request (OpenAI): %v", duration1)
t.Logf("Second request (Cache): %v", duration2)
if duration2 < duration1 {
if duration2 == 0 {
t.Errorf("Second request duration too small to compute speedup (duration2=0)")
return
}
speedup := float64(duration1) / float64(duration2)
t.Logf("Cache speedup: %.2fx faster", speedup)
} else {
if duration2 == 0 {
t.Errorf("Second request duration too small to compute speedup (duration2=0)")
return
}
speedup := float64(duration1) / float64(duration2)
t.Logf("Cache was slower than original: speedup=%.2fx (this can happen due to system load)", speedup)
// Only fail if cache is extremely slow (10x+ slower), indicating a real problem
if duration2 > duration1*10 {
t.Errorf("Cache is extremely slow compared to original: cache=%v, original=%v (cache may not be working)", duration2, duration1)
}
}
// Verify image data is preserved in cached response
if len(response2.Data) != len(response1.Data) {
t.Errorf("Image count differs between cached and original: original=%d, cached=%d",
len(response1.Data), len(response2.Data))
}
// Verify provider information is maintained in cached response
if response2.ExtraFields.Provider != testRequest.Provider {
t.Errorf("Provider mismatch in cached response: expected %s, got %s",
testRequest.Provider, response2.ExtraFields.Provider)
}
t.Log("✅ Basic image generation caching test completed successfully!")
}
// TestImageGenerationSemanticSearch tests semantic similarity search for image generation
func TestImageGenerationSemanticSearch(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in -short mode")
}
if os.Getenv("OPENAI_API_KEY") == "" {
t.Skip("OPENAI_API_KEY not set")
}
// Initialize test with custom threshold
config := &Config{
Provider: schemas.OpenAI,
EmbeddingModel: "text-embedding-3-small",
Dimension: 1536,
Threshold: 0.5,
Keys: []schemas.Key{
{Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"), Models: []string{"*"}, Weight: 1.0},
},
}
setup := NewTestSetupWithConfig(t, config)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("image-semantic-test-value")
// First request - this will be cached
firstRequest := CreateImageGenerationRequest(
"A beautiful sunset over the ocean with golden clouds",
"1024x1024",
"low",
)
t.Log("Making first image generation request (should go to OpenAI and be cached)...")
start1 := time.Now()
response1, err1 := setup.Client.ImageGenerationRequest(ctx, firstRequest)
duration1 := time.Since(start1)
if err1 != nil {
t.Skipf("First image generation request failed (may be rate limited): %v", err1)
return
}
if response1 == nil || len(response1.Data) == 0 {
t.Fatal("First response is invalid or has no image data")
}
t.Logf("First request completed in %v", duration1)
// Wait for cache to be written
WaitForCache(setup.Plugin)
// Second request - very similar text to test semantic matching
secondRequest := CreateImageGenerationRequest(
"A gorgeous sunset over the sea with orange clouds",
"1024x1024",
"low",
)
t.Log("Making semantically similar request (should be served from semantic cache)...")
start2 := time.Now()
response2, err2 := setup.Client.ImageGenerationRequest(ctx, secondRequest)
duration2 := time.Since(start2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
if response2 == nil || len(response2.Data) == 0 {
t.Fatal("Second response is invalid or has no image data")
}
t.Logf("Second request completed in %v", duration2)
// Check if second request was served from semantic cache
semanticMatch := false
if response2.ExtraFields.CacheDebug != nil && response2.ExtraFields.CacheDebug.CacheHit {
if response2.ExtraFields.CacheDebug.HitType != nil && *response2.ExtraFields.CacheDebug.HitType == string(CacheTypeSemantic) {
semanticMatch = true
threshold := 0.0
similarity := 0.0
if response2.ExtraFields.CacheDebug.Threshold != nil {
threshold = *response2.ExtraFields.CacheDebug.Threshold
}
if response2.ExtraFields.CacheDebug.Similarity != nil {
similarity = *response2.ExtraFields.CacheDebug.Similarity
}
t.Logf("✅ Second request was served from semantic cache! Cache threshold: %f, Cache similarity: %f", threshold, similarity)
}
}
if !semanticMatch {
t.Error("Semantic match expected but not found")
return
}
// Performance comparison
t.Logf("Semantic Cache Performance:")
t.Logf("First request (OpenAI): %v", duration1)
t.Logf("Second request (Semantic): %v", duration2)
if duration2 < duration1 {
speedup := float64(duration1) / float64(duration2)
t.Logf("Semantic cache speedup: %.2fx faster", speedup)
} else {
slowdown := float64(duration2) / float64(duration1)
t.Logf("Semantic cache was slower than original: %.2fx slower (this can happen due to system load)", slowdown)
// Only fail if cache is extremely slow (10x+ slower), indicating a real problem
if slowdown > 10 {
t.Errorf("Semantic cache is extremely slow compared to original: slowdown=%.2fx, cache=%v, original=%v (cache may not be working)", slowdown, duration2, duration1)
}
}
t.Log("✅ Image generation semantic search test completed successfully!")
}
// TestImageGenerationDifferentParameters tests that different parameters are cached separately
func TestImageGenerationDifferentParameters(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in -short mode")
}
if os.Getenv("OPENAI_API_KEY") == "" {
t.Skip("OPENAI_API_KEY not set")
}
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("image-params-test")
basePrompt := "A cute cat sitting on a windowsill"
// First request with 1024x1024
request1 := CreateImageGenerationRequest(basePrompt, "1024x1024", "low")
t.Log("Making first request with 1024x1024...")
_, err1 := setup.Client.ImageGenerationRequest(ctx, request1)
if err1 != nil {
t.Skipf("First image generation request failed (may be rate limited): %v", err1)
return
}
WaitForCache(setup.Plugin)
// Second request with different size - should NOT be cached
request2 := CreateImageGenerationRequest(basePrompt, "1024x1536", "low")
t.Log("Making second request with different size (1024x1536)...")
response2, err2 := setup.Client.ImageGenerationRequest(ctx, request2)
if err2 != nil {
t.Skipf("Second image generation request failed (may be rate limited): %v", err2)
return
}
// Should NOT be cached (different size)
AssertNoCacheHit(t, &schemas.BifrostResponse{ImageGenerationResponse: response2})
WaitForCache(setup.Plugin)
// Third request with different quality - should NOT be cached
request3 := CreateImageGenerationRequest(basePrompt, "1024x1024", "high")
t.Log("Making third request with different quality (high)...")
response3, err3 := setup.Client.ImageGenerationRequest(ctx, request3)
if err3 != nil {
t.Skipf("Third image generation request failed (may be rate limited): %v", err3)
return
}
// Should NOT be cached (different quality)
AssertNoCacheHit(t, &schemas.BifrostResponse{ImageGenerationResponse: response3})
t.Log("✅ Image generation different parameters test completed!")
}
// TestImageGenerationStreamCaching tests streaming image generation caching
func TestImageGenerationStreamCaching(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in -short mode")
}
if os.Getenv("OPENAI_API_KEY") == "" {
t.Skip("OPENAI_API_KEY not set")
}
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("image-stream-test")
// Create test image generation request
testRequest := CreateImageGenerationRequest(
"A futuristic city skyline at night with neon lights",
"1024x1024",
"low",
)
t.Log("Making first streaming image generation request...")
// Make first streaming request
start1 := time.Now()
stream1, err1 := setup.Client.ImageGenerationStreamRequest(ctx, testRequest)
if err1 != nil {
t.Skipf("First streaming request failed (may be rate limited): %v", err1)
return
}
var responses1 []schemas.BifrostImageGenerationStreamResponse
for streamMsg := range stream1 {
if streamMsg.BifrostError != nil {
t.Fatalf("Error in first stream: %v", streamMsg.BifrostError)
}
if streamMsg.BifrostImageGenerationStreamResponse != nil {
responses1 = append(responses1, *streamMsg.BifrostImageGenerationStreamResponse)
}
}
duration1 := time.Since(start1)
if len(responses1) == 0 {
t.Fatal("First streaming request returned no responses")
}
t.Logf("First streaming request completed in %v with %d chunks", duration1, len(responses1))
// Wait for cache to be written
WaitForCache(setup.Plugin)
t.Log("Making second identical streaming request (should be served from cache)...")
// Make second identical streaming request
start2 := time.Now()
stream2, err2 := setup.Client.ImageGenerationStreamRequest(ctx, testRequest)
if err2 != nil {
t.Fatalf("Second streaming request failed: %v", err2)
}
var responses2 []schemas.BifrostImageGenerationStreamResponse
for streamMsg := range stream2 {
if streamMsg.BifrostError != nil {
t.Fatalf("Error in second stream: %v", streamMsg.BifrostError)
}
if streamMsg.BifrostImageGenerationStreamResponse != nil {
responses2 = append(responses2, *streamMsg.BifrostImageGenerationStreamResponse)
}
}
duration2 := time.Since(start2)
if len(responses2) == 0 {
t.Fatal("Second streaming request returned no responses")
}
t.Logf("Second streaming request completed in %v with %d chunks", duration2, len(responses2))
// Validate that both streams have the same number of chunks
if len(responses1) != len(responses2) {
t.Errorf("Stream chunk count mismatch: original=%d, cached=%d", len(responses1), len(responses2))
}
// Validate that the second stream was cached
// Cache debug info is only on the last chunk for streaming responses
cached := false
if len(responses2) > 0 {
lastResponse := responses2[len(responses2)-1]
if lastResponse.ExtraFields.CacheDebug != nil && lastResponse.ExtraFields.CacheDebug.CacheHit {
cached = true
hitType := "unknown"
cacheID := "unknown"
if lastResponse.ExtraFields.CacheDebug.HitType != nil {
hitType = *lastResponse.ExtraFields.CacheDebug.HitType
}
if lastResponse.ExtraFields.CacheDebug.CacheID != nil {
cacheID = *lastResponse.ExtraFields.CacheDebug.CacheID
}
t.Logf("✅ Cache hit confirmed on last chunk: HitType=%s, CacheID=%s", hitType, cacheID)
} else {
// Check all chunks for debugging
for i, response := range responses2 {
if response.ExtraFields.CacheDebug != nil {
t.Logf("Chunk %d: CacheDebug present, CacheHit=%v", i, response.ExtraFields.CacheDebug.CacheHit)
} else {
t.Logf("Chunk %d: No CacheDebug info", i)
}
}
}
}
if !cached {
t.Fatal("Second streaming request was not served from cache (CacheDebug not found on last chunk)")
}
// Performance comparison
t.Logf("Streaming Performance Summary:")
t.Logf("First request (OpenAI): %v", duration1)
t.Logf("Second request (Cache): %v", duration2)
if duration2 < duration1 {
speedup := float64(duration1) / float64(duration2)
t.Logf("Streaming cache speedup: %.2fx faster", speedup)
} else {
speedup := float64(duration1) / float64(duration2)
t.Logf("Streaming cache was slower than original: speedup=%.2fx (this can happen due to system load)", speedup)
// Only fail if cache is extremely slow (10x+ slower), indicating a real problem
if duration2 > duration1*10 {
t.Errorf("Streaming cache is extremely slow compared to original: cache=%v, original=%v (cache may not be working)", duration2, duration1)
}
}
t.Log("✅ Image generation streaming cache test completed successfully!")
}

View File

@@ -0,0 +1,736 @@
package semanticcache
import (
"context"
"strings"
"testing"
"time"
"github.com/google/uuid"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
)
// TestSemanticCacheBasicFlow tests the complete semantic cache flow
func TestSemanticCacheBasicFlow(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx.SetValue(CacheKey, "test-cache-enabled")
// Test request
request := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Hello, world!"),
},
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.7),
MaxCompletionTokens: bifrost.Ptr(100),
},
},
}
t.Log("Testing first request (cache miss)...")
// First request - should be a cache miss
modifiedReq, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
if err != nil {
t.Fatalf("PreLLMHook failed: %v", err)
}
if shortCircuit != nil {
t.Fatal("Expected cache miss, but got cache hit")
}
if modifiedReq == nil {
t.Fatal("Modified request is nil")
}
t.Log("✅ Cache miss handled correctly")
// Simulate a response
response := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ID: uuid.New().String(),
Choices: []schemas.BifrostResponseChoice{
{
Index: 0,
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Hello! How can I help you today?"),
}},
},
},
},
ExtraFields: schemas.BifrostResponseExtraFields{
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o-mini",
RequestType: schemas.ChatCompletionRequest,
},
},
}
// Capture original response content for comparison
var originalContent string
if len(response.ChatResponse.Choices) > 0 && response.ChatResponse.Choices[0].Message.Content.ContentStr != nil {
originalContent = *response.ChatResponse.Choices[0].Message.Content.ContentStr
}
if originalContent == "" {
t.Fatal("Original response content is empty")
}
t.Logf("Original response content: %s", originalContent)
// Cache the response
t.Log("Caching response...")
_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
if err != nil {
t.Fatalf("PostLLMHook failed: %v", err)
}
// Wait for async caching to complete
WaitForCache(setup.Plugin)
t.Log("✅ Response cached successfully")
// Second request - should be a cache hit
t.Log("Testing second identical request (expecting cache hit)...")
// Reset context for second request
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx2.SetValue(CacheKey, "test-cache-enabled")
modifiedReq2, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
if err != nil {
t.Fatalf("Second PreLLMHook failed: %v", err)
}
if shortCircuit2 == nil {
t.Fatal("expected cache hit on identical request")
return
}
if shortCircuit2.Response == nil {
t.Fatal("Cache hit but response is nil")
}
if modifiedReq2 == nil {
t.Fatal("Modified request is nil on cache hit")
}
t.Log("✅ Cache hit detected and response returned")
// Verify the cached response
if len(shortCircuit2.Response.ChatResponse.Choices) == 0 {
t.Fatal("Cached response has no choices")
}
cachedContent := shortCircuit2.Response.ChatResponse.Choices[0].Message.Content.ContentStr
if cachedContent == nil || *cachedContent == "" {
t.Fatal("Cached response content is empty")
}
t.Logf("✅ Cached response content: %s", *cachedContent)
// Compare original and cached content
cachedContentStr := *cachedContent
// Trim whitespace and newlines for comparison
originalContentTrimmed := strings.TrimSpace(originalContent)
cachedContentTrimmed := strings.TrimSpace(cachedContentStr)
if originalContentTrimmed != cachedContentTrimmed {
t.Fatalf("❌ Content mismatch: original='%s', cached='%s'", originalContentTrimmed, cachedContentTrimmed)
}
t.Log("✅ Content verification passed - original and cached responses match")
t.Log("🎉 Basic semantic cache flow test passed!")
}
// TestSemanticCacheStrictFiltering tests that the cache respects parameter differences
func TestSemanticCacheStrictFiltering(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx.SetValue(CacheKey, "test-cache-enabled")
// Base request
baseRequest := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("What is the weather like?"),
},
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.7),
MaxCompletionTokens: bifrost.Ptr(100),
},
},
}
t.Log("Testing first request with temperature=0.7...")
// First request
_, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, baseRequest)
if err != nil {
t.Fatalf("First PreLLMHook failed: %v", err)
}
if shortCircuit1 != nil {
t.Fatal("Expected cache miss for first request")
}
// Cache a response
response := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ID: uuid.New().String(),
Choices: []schemas.BifrostResponseChoice{
{
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("It's sunny today!"),
}},
},
},
},
ExtraFields: schemas.BifrostResponseExtraFields{
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o-mini",
RequestType: schemas.ChatCompletionRequest,
},
},
}
_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
if err != nil {
t.Fatalf("PostLLMHook failed: %v", err)
}
WaitForCache(setup.Plugin)
t.Log("✅ First response cached")
// Second request with different temperature - should be cache miss
t.Log("Testing second request with temperature=0.5 (expecting cache miss)...")
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx2.SetValue(CacheKey, "test-cache-enabled")
modifiedRequest := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("What is the weather like?"),
},
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.5), // Different temperature
MaxCompletionTokens: bifrost.Ptr(100),
},
},
}
_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, modifiedRequest)
if err != nil {
t.Fatalf("Second PreLLMHook failed: %v", err)
}
if shortCircuit2 != nil {
t.Fatal("Expected cache miss due to different temperature, but got cache hit")
}
t.Log("✅ Strict filtering working - different parameters result in cache miss")
// Third request with different model - should be cache miss
t.Log("Testing third request with different model (expecting cache miss)...")
ctx3 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx3.SetValue(CacheKey, "test-cache-enabled")
modifiedRequest2 := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-3.5-turbo", // Different model
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("What is the weather like?"),
},
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.7),
MaxCompletionTokens: bifrost.Ptr(100),
},
},
}
_, shortCircuit3, err := setup.Plugin.PreLLMHook(ctx3, modifiedRequest2)
if err != nil {
t.Fatalf("Third PreLLMHook failed: %v", err)
}
if shortCircuit3 != nil {
t.Fatal("Expected cache miss due to different model, but got cache hit")
}
t.Log("✅ Strict filtering working - different model results in cache miss")
t.Log("🎉 Strict filtering test passed!")
}
// TestSemanticCacheStreamingFlow tests streaming response caching
func TestSemanticCacheStreamingFlow(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx.SetValue(CacheKey, "test-cache-enabled")
request := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionStreamRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Tell me a short story"),
},
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.8),
},
},
}
t.Log("Testing streaming request (cache miss)...")
// First request - should be cache miss
_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
if err != nil {
t.Fatalf("PreLLMHook failed: %v", err)
}
if shortCircuit != nil {
t.Fatal("Expected cache miss for streaming request")
}
t.Log("✅ Streaming cache miss handled correctly")
// Simulate streaming response chunks
t.Log("Caching streaming response chunks...")
chunks := []string{
"Once upon a time,",
" there was a brave",
" knight who saved the day.",
}
for i, chunk := range chunks {
var finishReason *string
if i == len(chunks)-1 {
finishReason = bifrost.Ptr("stop")
}
chunkResponse := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ID: uuid.New().String(),
Choices: []schemas.BifrostResponseChoice{
{
Index: i,
FinishReason: finishReason,
ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{
Delta: &schemas.ChatStreamResponseChoiceDelta{
Content: bifrost.Ptr(chunk),
},
},
},
},
ExtraFields: schemas.BifrostResponseExtraFields{
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o-mini",
RequestType: schemas.ChatCompletionStreamRequest,
ChunkIndex: i,
},
},
}
_, _, err = setup.Plugin.PostLLMHook(ctx, chunkResponse, nil)
if err != nil {
t.Fatalf("PostLLMHook failed for chunk %d: %v", i, err)
}
}
WaitForCache(setup.Plugin)
t.Log("✅ Streaming response chunks cached")
// Test cache retrieval for streaming
t.Log("Testing streaming cache retrieval...")
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx2.SetValue(CacheKey, "test-cache-enabled")
_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
if err != nil {
t.Fatalf("Second PreLLMHook failed: %v", err)
}
if shortCircuit2 == nil {
t.Log("⚠️ Expected streaming cache hit, but got cache miss - this may be expected with the new unified storage")
return
}
if shortCircuit2.Stream == nil {
t.Fatal("Cache hit but stream is nil")
}
t.Log("✅ Streaming cache hit detected")
// Read from the cached stream
chunkCount := 0
for chunk := range shortCircuit2.Stream {
if chunk.BifrostChatResponse == nil {
continue
}
chunkCount++
t.Logf("Received cached chunk %d", chunkCount)
}
if chunkCount == 0 {
t.Fatal("No chunks received from cached stream")
}
t.Logf("✅ Received %d cached chunks", chunkCount)
t.Log("🎉 Streaming cache test passed!")
}
// TestSemanticCache_NoCacheWhenKeyMissing verifies cache is disabled when cache key is missing from context
func TestSemanticCache_NoCacheWhenKeyMissing(t *testing.T) {
t.Log("Testing cache behavior when cache key is missing...")
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
// Don't set the cache key - cache should be disabled
request := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Test message"),
},
},
},
},
}
_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
if err != nil {
t.Fatalf("PreLLMHook failed: %v", err)
}
if shortCircuit != nil {
t.Fatal("Expected no caching when cache key is not set, but got cache hit")
}
t.Log("✅ Cache properly disabled when no cache key is set")
t.Log("🎉 No cache key test passed!")
}
// TestSemanticCache_CustomTTLHandling verifies cache respects custom TTL values from context
func TestSemanticCache_CustomTTLHandling(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Configure plugin with custom TTL key
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx.SetValue(CacheKey, "test-cache-enabled")
ctx.SetValue(CacheTTLKey, 1*time.Minute) // Custom TTL
request := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("TTL test message"),
},
},
},
},
}
// First request - cache miss
_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
if err != nil {
t.Fatalf("PreLLMHook failed: %v", err)
}
if shortCircuit != nil {
t.Fatal("Expected cache miss, but got cache hit")
}
// Simulate response and cache it
response := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ID: "ttl-test-response",
Choices: []schemas.BifrostResponseChoice{
{
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: "assistant",
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("TTL test response"),
},
},
},
},
},
ExtraFields: schemas.BifrostResponseExtraFields{
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o-mini",
RequestType: schemas.ChatCompletionRequest,
},
},
}
_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
if err != nil {
t.Fatalf("PostLLMHook failed: %v", err)
}
WaitForCache(setup.Plugin)
t.Log("✅ Custom TTL configuration test passed!")
}
// TestSemanticCache_CustomThresholdHandling verifies cache respects custom similarity threshold from context
func TestSemanticCache_CustomThresholdHandling(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Configure plugin with custom threshold key
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx.SetValue(CacheKey, "test-cache-enabled")
ctx.SetValue(CacheThresholdKey, 0.95) // Very high threshold
request := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Threshold test message"),
},
},
},
},
}
// Test that custom threshold is used (this would need semantic search to be fully testable)
_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
if err != nil {
t.Fatalf("PreLLMHook failed: %v", err)
}
if shortCircuit != nil {
t.Fatal("Expected cache miss with high threshold, but got cache hit")
}
t.Log("✅ Custom threshold configuration test passed!")
}
// TestSemanticCache_ProviderModelCachingFlags verifies cache behavior with provider/model caching flags
func TestSemanticCache_ProviderModelCachingFlags(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Test with provider/model caching disabled
setup.Config.CacheByProvider = bifrost.Ptr(false)
setup.Config.CacheByModel = bifrost.Ptr(false)
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx.SetValue(CacheKey, "test-cache-enabled")
request1 := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Provider model flags test"),
},
},
},
},
}
// First request with OpenAI
_, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, request1)
if err != nil {
t.Fatalf("PreLLMHook failed: %v", err)
}
if shortCircuit1 != nil {
t.Fatal("Expected cache miss, but got cache hit")
}
// Cache the response
response := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ID: "provider-model-test",
Choices: []schemas.BifrostResponseChoice{
{
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: "assistant",
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Provider model test response"),
},
},
},
},
},
ExtraFields: schemas.BifrostResponseExtraFields{
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o-mini",
RequestType: schemas.ChatCompletionRequest,
},
},
}
_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
if err != nil {
t.Fatalf("PostLLMHook failed: %v", err)
}
WaitForCache(setup.Plugin)
// Second request with different provider - should potentially hit cache since provider is not considered
request2 := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.Anthropic, // Different provider
Model: "claude-3-haiku", // Different model
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Provider model flags test"), // Same content
},
},
},
},
}
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx2.SetValue(CacheKey, "test-cache-enabled")
_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request2)
if err != nil {
t.Fatalf("Second PreLLMHook failed: %v", err)
}
// With provider/model caching disabled, we might get cache hits across different providers/models
// This behavior depends on the exact implementation of hash generation
t.Logf("Cache behavior with disabled provider/model flags: hit=%v", shortCircuit2 != nil)
t.Log("✅ Provider/model caching flags test passed!")
}
// TestSemanticCache_ConfigurationEdgeCases verifies edge cases in configuration handling
func TestSemanticCache_ConfigurationEdgeCases(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Test with invalid TTL type in context
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx.SetValue(CacheKey, "test-cache-enabled")
ctx.SetValue(CacheTTLKey, "not-a-duration") // Invalid TTL type
request := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Edge case test"),
},
},
},
},
}
// Should handle invalid TTL gracefully
_, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
if err != nil {
t.Fatalf("PreLLMHook failed with invalid TTL: %v", err)
}
if shortCircuit != nil {
t.Fatal("Unexpected cache hit with invalid TTL")
}
// Test with invalid threshold type
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx2.SetValue(CacheKey, "test-cache-enabled")
ctx2.SetValue(CacheThresholdKey, "not-a-float") // Invalid threshold type
// Should handle invalid threshold gracefully
_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
if err != nil {
t.Fatalf("PreLLMHook failed with invalid threshold: %v", err)
}
if shortCircuit2 != nil {
t.Fatal("Unexpected cache hit with invalid threshold")
}
t.Log("✅ Configuration edge cases test passed!")
}

View File

@@ -0,0 +1,306 @@
package semanticcache
import (
"testing"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
)
// TestExtractTextForEmbedding_NilContent verifies that extractTextForEmbedding
// does not panic when chat messages have nil Content (e.g., assistant tool-call messages).
func TestExtractTextForEmbedding_NilContent(t *testing.T) {
plugin := &Plugin{
config: &Config{},
}
tests := []struct {
name string
request *schemas.BifrostRequest
}{
{
name: "ChatRequest with nil Content in assistant tool-call message",
request: &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Call the get_weather function"),
},
},
{
Role: schemas.ChatMessageRoleAssistant,
Content: nil, // tool-call message with no content
ChatAssistantMessage: &schemas.ChatAssistantMessage{
ToolCalls: []schemas.ChatAssistantMessageToolCall{
{
ID: bifrost.Ptr("call_123"),
Type: bifrost.Ptr("function"),
Function: schemas.ChatAssistantMessageToolCallFunction{
Name: bifrost.Ptr("get_weather"),
Arguments: `{"location": "San Francisco"}`,
},
},
},
},
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.7),
MaxCompletionTokens: bifrost.Ptr(100),
},
},
},
},
{
name: "ChatRequest where all messages have nil Content",
request: &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleAssistant,
Content: nil,
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.7),
MaxCompletionTokens: bifrost.Ptr(100),
},
},
},
},
{
name: "ResponsesRequest with nil Content",
request: &schemas.BifrostRequest{
RequestType: schemas.ResponsesRequest,
ResponsesRequest: createResponsesRequestWithNilContent(),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// This should not panic
text, hash, err := plugin.extractTextForEmbedding(tt.request)
// We don't care about the error — the important thing is no panic
t.Logf("text=%q, hash=%q, err=%v", text, hash, err)
})
}
}
func TestPrepareDirectCacheLookup_ResponsesStreamRequest(t *testing.T) {
plugin := &Plugin{
config: getDefaultTestConfig(),
logger: bifrost.NewDefaultLogger(schemas.LogLevelDebug),
}
req := &schemas.BifrostRequest{
RequestType: schemas.ResponsesStreamRequest,
ResponsesRequest: CreateStreamingResponsesRequest("Explain cache invalidation", 0.2, 200),
}
ctx := CreateContextWithCacheKey("responses-stream-direct")
directID, err := plugin.prepareDirectCacheLookup(ctx, req, "responses-stream-direct")
if err != nil {
t.Fatalf("prepareDirectCacheLookup failed: %v", err)
}
if directID == "" {
t.Fatal("expected deterministic direct cache id")
}
if got, _ := ctx.Value(requestHashKey).(string); got == "" {
t.Fatal("expected request hash to be stored in context")
}
if got, _ := ctx.Value(requestParamsHashKey).(string); got == "" {
t.Fatal("expected params hash to be stored in context")
}
}
func TestPrepareDirectCacheLookup_UnsupportedRequestTypeFailsClosed(t *testing.T) {
plugin := &Plugin{
config: getDefaultTestConfig(),
logger: bifrost.NewDefaultLogger(schemas.LogLevelDebug),
}
req := &schemas.BifrostRequest{
RequestType: schemas.PassthroughRequest,
PassthroughRequest: &schemas.BifrostPassthroughRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Method: "GET",
Path: "/v1/models",
},
}
ctx := CreateContextWithCacheKey("unsupported-direct")
directID, err := plugin.prepareDirectCacheLookup(ctx, req, "unsupported-direct")
if err == nil {
t.Fatal("expected prepareDirectCacheLookup to reject unsupported request type")
}
if directID != "" {
t.Fatalf("expected no direct cache id, got %q", directID)
}
if got, _ := ctx.Value(requestHashKey).(string); got != "" {
t.Fatalf("expected request hash to remain unset, got %q", got)
}
if got, _ := ctx.Value(requestParamsHashKey).(string); got != "" {
t.Fatalf("expected params hash to remain unset, got %q", got)
}
if got, _ := ctx.Value(requestStorageIDKey).(string); got != "" {
t.Fatalf("expected storage id to remain unset, got %q", got)
}
}
func TestPreLLMHookSkipsUnsupportedCountTokensRequest(t *testing.T) {
plugin := &Plugin{
config: getDefaultTestConfig(),
logger: bifrost.NewDefaultLogger(schemas.LogLevelDebug),
}
req := &schemas.BifrostRequest{
RequestType: schemas.CountTokensRequest,
CountTokensRequest: &schemas.BifrostResponsesRequest{
Provider: schemas.Anthropic,
Model: "claude-sonnet-4-5",
Input: []schemas.ResponsesMessage{
{
Role: bifrost.Ptr(schemas.ResponsesInputMessageRoleUser),
Content: &schemas.ResponsesMessageContent{
ContentStr: bifrost.Ptr("How many tokens is this message?"),
},
},
},
},
}
ctx := CreateContextWithCacheKey("count-tokens-test")
ctx.SetValue(requestIDKey, "stale-request-id")
ctx.SetValue(requestStorageIDKey, "stale-storage-id")
ctx.SetValue(requestHashKey, "stale-request-hash")
ctx.SetValue(requestParamsHashKey, "stale-params-hash")
ctx.SetValue(requestModelKey, "stale-model")
ctx.SetValue(requestProviderKey, schemas.OpenAI)
ctx.SetValue(requestEmbeddingKey, []float32{1, 2, 3})
ctx.SetValue(requestEmbeddingTokensKey, 99)
ctx.SetValue(isCacheHitKey, true)
ctx.SetValue(cacheHitTypeKey, CacheTypeDirect)
modifiedReq, shortCircuit, err := plugin.PreLLMHook(ctx, req)
if err != nil {
t.Fatalf("PreLLMHook failed: %v", err)
}
if modifiedReq != req {
t.Fatal("expected original request to be returned unchanged")
}
if shortCircuit != nil {
t.Fatal("expected no short-circuit for unsupported count tokens request")
}
if got, _ := ctx.Value(requestIDKey).(string); got != "" {
t.Fatalf("expected requestIDKey to remain unset, got %q", got)
}
if got, _ := ctx.Value(requestHashKey).(string); got != "" {
t.Fatalf("expected requestHashKey to remain unset, got %q", got)
}
if got, _ := ctx.Value(requestParamsHashKey).(string); got != "" {
t.Fatalf("expected requestParamsHashKey to remain unset, got %q", got)
}
if got, _ := ctx.Value(requestStorageIDKey).(string); got != "" {
t.Fatalf("expected requestStorageIDKey to remain unset, got %q", got)
}
if got, _ := ctx.Value(requestModelKey).(string); got != "" {
t.Fatalf("expected requestModelKey to remain unset, got %q", got)
}
if got, ok := ctx.Value(requestProviderKey).(schemas.ModelProvider); ok && got != "" {
t.Fatalf("expected requestProviderKey to remain unset, got %q", got)
}
if got := ctx.Value(requestEmbeddingKey); got != nil {
t.Fatalf("expected requestEmbeddingKey to remain unset, got %#v", got)
}
if got, ok := ctx.Value(requestEmbeddingTokensKey).(int); ok && got != 0 {
t.Fatalf("expected requestEmbeddingTokensKey to remain unset, got %d", got)
}
if got, ok := ctx.Value(isCacheHitKey).(bool); ok && got {
t.Fatal("expected isCacheHitKey to remain unset")
}
if got, ok := ctx.Value(cacheHitTypeKey).(CacheType); ok && got != "" {
t.Fatalf("expected cacheHitTypeKey to remain unset, got %q", got)
}
}
// TestGetNormalizedInputForCaching_NilContent verifies that getNormalizedInputForCaching
// does not panic when chat messages have nil Content.
func TestGetNormalizedInputForCaching_NilContent(t *testing.T) {
plugin := &Plugin{
config: &Config{},
}
request := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Call the get_weather function"),
},
},
{
Role: schemas.ChatMessageRoleAssistant,
Content: nil,
ChatAssistantMessage: &schemas.ChatAssistantMessage{
ToolCalls: []schemas.ChatAssistantMessageToolCall{
{
ID: bifrost.Ptr("call_123"),
Type: bifrost.Ptr("function"),
Function: schemas.ChatAssistantMessageToolCallFunction{
Name: bifrost.Ptr("get_weather"),
Arguments: `{"location": "San Francisco"}`,
},
},
},
},
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.7),
MaxCompletionTokens: bifrost.Ptr(100),
},
},
}
// This should not panic
result := plugin.getNormalizedInputForCaching(request)
t.Logf("result type: %T", result)
}
// createResponsesRequestWithNilContent builds a BifrostResponsesRequest with a nil Content message for testing.
func createResponsesRequestWithNilContent() *schemas.BifrostResponsesRequest {
return &schemas.BifrostResponsesRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ResponsesMessage{
{
Role: bifrost.Ptr(schemas.ResponsesInputMessageRoleUser),
Content: &schemas.ResponsesMessageContent{
ContentStr: bifrost.Ptr("Hello"),
},
},
{
Role: bifrost.Ptr(schemas.ResponsesInputMessageRoleAssistant),
Content: nil,
},
},
Params: &schemas.ResponsesParameters{
Temperature: bifrost.Ptr(0.7),
MaxOutputTokens: bifrost.Ptr(100),
},
}
}

View File

@@ -0,0 +1,326 @@
package semanticcache
import (
"testing"
"github.com/maximhq/bifrost/core/schemas"
)
// TestCacheNoStoreBasicFunctionality tests that CacheNoStoreKey prevents caching
func TestCacheNoStoreBasicFunctionality(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("What is artificial intelligence?", 0.7, 100)
// Test 1: Normal caching (control test)
ctx1 := CreateContextWithCacheKey("test-no-store-control")
t.Log("Making normal request (should be cached)...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Fresh request
WaitForCache(setup.Plugin)
// Verify it got cached
t.Log("Verifying normal caching worked...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should be cached
// Test 2: NoStore = true (should not cache)
ctx2 := CreateContextWithCacheKeyAndNoStore("test-no-store-disabled", true)
t.Log("Making request with CacheNoStoreKey=true (should not be cached)...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err3 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) // Fresh request
WaitForCache(setup.Plugin)
// Verify it was NOT cached
t.Log("Verifying no-store request was not cached...")
response4, err4 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err4 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}) // Should still be fresh (not cached)
// Test 3: NoStore = false (should cache normally)
ctx3 := CreateContextWithCacheKeyAndNoStore("test-no-store-enabled", false)
t.Log("Making request with CacheNoStoreKey=false (should be cached)...")
response5, err5 := setup.Client.ChatCompletionRequest(ctx3, testRequest)
if err5 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response5}) // Fresh request
WaitForCache(setup.Plugin)
// Verify it got cached
t.Log("Verifying no-store=false request was cached...")
response6, err6 := setup.Client.ChatCompletionRequest(ctx3, testRequest)
if err6 != nil {
t.Fatalf("Sixth request failed: %v", err6)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response6}, "direct") // Should be cached
t.Log("✅ CacheNoStoreKey basic functionality works correctly")
}
// TestCacheNoStoreWithDifferentRequestTypes tests NoStore with various request types
func TestCacheNoStoreWithDifferentRequestTypes(t *testing.T) {
t.Skip("Skipping Embedding Tests")
setup := NewTestSetup(t)
defer setup.Cleanup()
// Test with chat completion
chatRequest := CreateBasicChatRequest("Test no-store with chat", 0.7, 50)
ctx1 := CreateContextWithCacheKeyAndNoStore("test-no-store-chat", true)
t.Log("Testing no-store with chat completion...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, chatRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Verify not cached
response2, err2 := setup.Client.ChatCompletionRequest(ctx1, chatRequest)
if err2 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Should not be cached
// Test with embedding request
embeddingRequest := CreateEmbeddingRequest([]string{"Test no-store with embeddings"})
ctx2 := CreateContextWithCacheKeyAndNoStore("test-no-store-embedding", true)
t.Log("Testing no-store with embedding request...")
response3, err3 := setup.Client.EmbeddingRequest(ctx2, embeddingRequest)
if err3 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response3})
WaitForCache(setup.Plugin)
// Verify not cached
response4, err4 := setup.Client.EmbeddingRequest(ctx2, embeddingRequest)
if err4 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response4}) // Should not be cached
t.Log("✅ CacheNoStoreKey works with different request types")
}
// TestCacheNoStoreWithConversationHistory tests NoStore with conversation context
func TestCacheNoStoreWithConversationHistory(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Create conversation context
conversation := BuildConversationHistory(
"You are a helpful assistant",
[]string{"Hello", "Hi! How can I help?"},
)
messages := AddUserMessage(conversation, "What is machine learning?")
request := CreateConversationRequest(messages, 0.7, 100)
// Test with no-store enabled
ctx := CreateContextWithCacheKeyAndNoStore("test-no-store-conversation", true)
t.Log("Testing no-store with conversation history...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx, request)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Verify not cached (same conversation should not hit cache)
response2, err2 := setup.Client.ChatCompletionRequest(ctx, request)
if err2 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Should not be cached due to no-store
t.Log("✅ CacheNoStoreKey works with conversation history")
}
// TestCacheNoStoreWithCacheTypes tests NoStore interaction with CacheTypeKey
func TestCacheNoStoreWithCacheTypes(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("Test no-store with cache types", 0.7, 50)
// Test no-store with direct cache type
ctx1 := CreateContextWithCacheKey("test-no-store-cache-types")
ctx1 = ctx1.WithValue(CacheNoStoreKey, true)
ctx1 = ctx1.WithValue(CacheTypeKey, CacheTypeDirect)
t.Log("Testing no-store with CacheTypeKey=direct...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Should not be cached
response2, err2 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err2 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // No-store should override cache type
// Test no-store with semantic cache type
ctx2 := CreateContextWithCacheKey("test-no-store-cache-types")
ctx2 = ctx2.WithValue(CacheNoStoreKey, true)
ctx2 = ctx2.WithValue(CacheTypeKey, CacheTypeSemantic)
t.Log("Testing no-store with CacheTypeKey=semantic...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err3 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3})
WaitForCache(setup.Plugin)
// Should not be cached
response4, err4 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err4 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}) // No-store should override cache type
t.Log("✅ CacheNoStoreKey correctly overrides cache type settings")
}
// TestCacheNoStoreErrorHandling tests error scenarios with NoStore
func TestCacheNoStoreErrorHandling(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("Test no-store error handling", 0.7, 50)
// Test with invalid no-store value (non-boolean)
ctx1 := CreateContextWithCacheKey("test-no-store-errors")
ctx1 = ctx1.WithValue(CacheNoStoreKey, "invalid")
t.Log("Testing no-store with invalid value (should cache normally)...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Should be cached (invalid value should be ignored)
response2, err2 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should be cached (invalid value ignored)
// Test with nil value (should cache normally)
ctx2 := CreateContextWithCacheKey("test-no-store-nil")
ctx2 = ctx2.WithValue(CacheNoStoreKey, nil)
t.Log("Testing no-store with nil value (should cache normally)...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err3 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3})
WaitForCache(setup.Plugin)
// Should be cached (nil should be treated as normal caching)
response4, err4 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err4 != nil {
t.Fatalf("Fourth request failed: %v", err4)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}, "direct") // Should be cached (nil ignored)
t.Log("✅ CacheNoStoreKey error handling works correctly")
}
// TestCacheNoStoreReadButNoWrite tests that NoStore allows reading cache but prevents writing
func TestCacheNoStoreReadButNoWrite(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
testRequest := CreateBasicChatRequest("Describe Isaac Newton's three laws of motion", 0.7, 50)
// Step 1: Cache a response normally
ctx1 := CreateContextWithCacheKey("test-no-store-read")
t.Log("Caching response normally...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Step 2: Try to read with no-store enabled (should still read from cache)
ctx2 := CreateContextWithCacheKeyAndNoStore("test-no-store-read", true)
t.Log("Reading with no-store enabled (should still hit cache for reads)...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// The current implementation should still read from cache even with no-store
// (no-store only affects writing, not reading)
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
// Step 3: Make a semantically similar request with no-store (strong paraphrase for deterministic semantic hit)
newRequest := CreateBasicChatRequest("Describe the three laws of motion by Isaac Newton", 0.7, 50)
t.Log("Making semantically similar request with no-store (should get semantic hit, but not cache response)...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx2, newRequest)
if err3 != nil {
t.Fatalf("Third request failed: %v", err3)
}
// Should get semantic cache hit (no-store allows reads, just prevents writes)
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "semantic")
WaitForCache(setup.Plugin)
// Step 4: Repeat similar request with no-store (should still get semantic hit)
t.Log("Repeating similar request with no-store (should still get semantic hit)...")
response4, err4 := setup.Client.ChatCompletionRequest(ctx2, newRequest)
if err4 != nil {
t.Fatalf("Fourth request failed: %v", err4)
}
// Should get semantic cache hit again (consistent behavior)
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}, "semantic")
t.Log("✅ CacheNoStoreKey allows reading but prevents writing")
}

View File

@@ -0,0 +1,332 @@
package semanticcache
import (
"testing"
"github.com/maximhq/bifrost/core/schemas"
)
// TestTextNormalizationDirectCache tests that text normalization works correctly
// for direct cache (hash-based) matching across all input types
func TestTextNormalizationDirectCache(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
t.Run("ChatCompletion", func(t *testing.T) {
testChatCompletionNormalization(t, setup)
})
t.Run("Speech", func(t *testing.T) {
testSpeechNormalization(t, setup)
})
}
func testChatCompletionNormalization(t *testing.T, setup *TestSetup) {
ctx := CreateContextWithCacheKey("test-chat-normalization")
// Test cases with different case and whitespace variations
testCases := []struct {
name string
userMsg string
systemMsg string
}{
{
name: "Original",
userMsg: "Explain quantum physics",
systemMsg: "You are a helpful science teacher",
},
{
name: "Lowercase",
userMsg: "explain quantum physics",
systemMsg: "you are a helpful science teacher",
},
{
name: "Uppercase",
userMsg: "EXPLAIN QUANTUM PHYSICS",
systemMsg: "YOU ARE A HELPFUL SCIENCE TEACHER",
},
{
name: "Mixed Case",
userMsg: "ExPlAiN QuAnTuM PhYsIcS",
systemMsg: "YoU aRe A hElPfUl ScIeNcE tEaChEr",
},
{
name: "With Whitespace",
userMsg: " Explain quantum physics ",
systemMsg: " You are a helpful science teacher ",
},
{
name: "Extra Whitespace",
userMsg: " Explain quantum physics ",
systemMsg: " You are a helpful science teacher ",
},
}
// Create chat completion requests for all test cases
requests := make([]*schemas.BifrostChatRequest, len(testCases))
for i, tc := range testCases {
requests[i] = &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleSystem,
Content: &schemas.ChatMessageContent{
ContentStr: &tc.systemMsg,
},
},
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: &tc.userMsg,
},
},
},
Params: &schemas.ChatParameters{
Temperature: PtrFloat64(0.5),
MaxCompletionTokens: PtrInt(50),
},
}
}
// Make first request (should miss cache and be stored)
t.Logf("Making first request with user: '%s', system: '%s'", testCases[0].userMsg, testCases[0].systemMsg)
response1, err1 := setup.Client.ChatCompletionRequest(ctx, requests[0])
if err1 != nil {
return // Test will be skipped by retry function
}
if response1 == nil || len(response1.Choices) == 0 {
t.Fatal("First response is invalid")
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Test all other variations should hit cache due to normalization
for i := 1; i < len(testCases); i++ {
tc := testCases[i]
t.Logf("Testing variation '%s' with user: '%s', system: '%s'", tc.name, tc.userMsg, tc.systemMsg)
response, err := setup.Client.ChatCompletionRequest(ctx, requests[i])
if err != nil {
t.Fatalf("Request for case '%s' failed: %v", tc.name, err)
}
if response == nil || len(response.Choices) == 0 {
t.Fatalf("Response for case '%s' is invalid", tc.name)
}
// Should be cache hit due to normalization
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response}, "direct")
t.Logf("✓ Cache hit for '%s' variation", tc.name)
}
}
func testSpeechNormalization(t *testing.T, setup *TestSetup) {
ctx := CreateContextWithCacheKey("test-speech-normalization")
// Test cases with different case and whitespace variations for speech input
testCases := []struct {
name string
input string
}{
{"Original", "Hello, this is a test speech synthesis"},
{"Lowercase", "hello, this is a test speech synthesis"},
{"Uppercase", "HELLO, THIS IS A TEST SPEECH SYNTHESIS"},
{"Mixed Case", "HeLLo, ThIs Is A tEsT sPeEcH sYnThEsIs"},
{"Leading Whitespace", " Hello, this is a test speech synthesis"},
{"Trailing Whitespace", "Hello, this is a test speech synthesis "},
{"Both Whitespace", " Hello, this is a test speech synthesis "},
{"Extra Spaces", " Hello, this is a test speech synthesis "},
}
// Create speech requests for all test cases
requests := make([]*schemas.BifrostSpeechRequest, len(testCases))
for i, tc := range testCases {
requests[i] = CreateSpeechRequest(tc.input, "alloy")
}
// Make first request (should miss cache and be stored)
t.Logf("Making first speech request with: '%s'", testCases[0].input)
response1, err1 := setup.Client.SpeechRequest(ctx, requests[0])
if err1 != nil {
return // Test will be skipped by retry function
}
if response1 == nil {
t.Fatal("First response is invalid")
}
AssertNoCacheHit(t, &schemas.BifrostResponse{SpeechResponse: response1})
WaitForCache(setup.Plugin)
// Test all other variations should hit cache due to normalization
for i := 1; i < len(testCases); i++ {
tc := testCases[i]
t.Logf("Testing variation '%s' with input: '%s'", tc.name, tc.input)
response, err := setup.Client.SpeechRequest(ctx, requests[i])
if err != nil {
t.Fatalf("Request for case '%s' failed: %v", tc.name, err)
}
if response == nil {
t.Fatalf("Response for case '%s' is invalid", tc.name)
}
// Should be cache hit due to normalization
AssertCacheHit(t, &schemas.BifrostResponse{SpeechResponse: response}, "direct")
t.Logf("✓ Cache hit for '%s' variation", tc.name)
}
}
// TestChatCompletionContentBlocksNormalization tests normalization for content blocks
func TestChatCompletionContentBlocksNormalization(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-content-blocks-normalization")
// Test cases with content blocks having different text normalization
testCases := []struct {
name string
textBlocks []string
}{
{
name: "Original",
textBlocks: []string{"Hello World", "How are you today?"},
},
{
name: "Lowercase",
textBlocks: []string{"hello world", "how are you today?"},
},
{
name: "With Whitespace",
textBlocks: []string{" Hello World ", " How are you today? "},
},
{
name: "Mixed Case",
textBlocks: []string{"HeLLo WoRLd", "HoW aRe YoU tOdAy?"},
},
}
// Create chat completion requests with content blocks
requests := make([]*schemas.BifrostChatRequest, len(testCases))
for i, tc := range testCases {
// Create content blocks
contentBlocks := make([]schemas.ChatContentBlock, len(tc.textBlocks))
for j, text := range tc.textBlocks {
contentBlocks[j] = schemas.ChatContentBlock{
Type: schemas.ChatContentBlockTypeText,
Text: &text,
}
}
requests[i] = &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentBlocks: contentBlocks,
},
},
},
Params: &schemas.ChatParameters{
Temperature: PtrFloat64(0.5),
MaxCompletionTokens: PtrInt(50),
},
}
}
// Make first request (should miss cache and be stored)
t.Logf("Making first request with content blocks: %v", testCases[0].textBlocks)
response1, err1 := setup.Client.ChatCompletionRequest(ctx, requests[0])
if err1 != nil {
return // Test will be skipped by retry function
}
if response1 == nil || len(response1.Choices) == 0 {
t.Fatal("First response is invalid")
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Test all other variations should hit cache due to normalization
for i := 1; i < len(testCases); i++ {
tc := testCases[i]
t.Logf("Testing variation '%s' with content blocks: %v", tc.name, tc.textBlocks)
response, err := setup.Client.ChatCompletionRequest(ctx, requests[i])
if err != nil {
t.Fatalf("Request for case '%s' failed: %v", tc.name, err)
}
if response == nil || len(response.Choices) == 0 {
t.Fatalf("Response for case '%s' is invalid", tc.name)
}
// Should be cache hit due to normalization
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response}, "direct")
t.Logf("✓ Cache hit for '%s' variation", tc.name)
}
}
// TestNormalizationWithSemanticCache tests that normalization works with semantic cache as well
func TestNormalizationWithSemanticCache(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-normalization-semantic")
// Make first request with original text
originalRequest := CreateBasicChatRequest("What is Machine Learning?", 0.5, 50)
t.Log("Making first request with original text...")
response1, err1 := setup.Client.ChatCompletionRequest(ctx, originalRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Test semantic match with different case (should hit semantic cache after normalization)
normalizedRequest := CreateBasicChatRequest("what is machine learning?", 0.5, 50)
t.Log("Making semantic request with normalized case...")
response2, err2 := setup.Client.ChatCompletionRequest(ctx, normalizedRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// This should be a direct cache hit since the normalized text is identical
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
t.Log("✓ Direct cache hit with normalized text")
// Test with semantically similar but different text
semanticRequest := CreateBasicChatRequest("can you explain machine learning concepts?", 0.5, 50)
t.Log("Making semantically similar request...")
response3, err3 := setup.Client.ChatCompletionRequest(ctx, semanticRequest)
if err3 != nil {
t.Fatalf("Third request failed: %v", err3)
}
// This should be a semantic cache hit
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "semantic")
t.Log("✓ Semantic cache hit with similar content")
}
// Helper functions for pointer creation
func PtrFloat64(f float64) *float64 {
return &f
}
func PtrInt(i int) *int {
return &i
}

View File

@@ -0,0 +1,451 @@
package semanticcache
import (
"testing"
"time"
"github.com/maximhq/bifrost/core/schemas"
)
// TestResponsesAPIBasicFunctionality tests the core caching functionality with Responses API
func TestResponsesAPIBasicFunctionality(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-responses-basic")
// Create test request
testRequest := CreateBasicResponsesRequest(
"What is Bifrost? Answer in one short sentence.",
0.7,
500,
)
t.Log("Making first Responses API request (should go to OpenAI and be cached)...")
// Make first request (will go to OpenAI and be cached) - with retries
start1 := time.Now()
response1, err1 := setup.Client.ResponsesRequest(ctx, testRequest)
duration1 := time.Since(start1)
if err1 != nil {
return // Test will be skipped by retry function
}
if response1 == nil || len(response1.Output) == 0 {
t.Fatal("First Responses response is invalid")
}
t.Logf("First request completed in %v", duration1)
t.Logf("Response contains %d output messages", len(response1.Output))
if c := response1.Output[0].Content; c != nil && c.ContentStr != nil {
t.Logf("Response: %s", *c.ContentStr)
} else if c != nil && len(c.ContentBlocks) > 0 && c.ContentBlocks[0].Text != nil {
t.Logf("Response: %s", *c.ContentBlocks[0].Text)
} else {
t.Log("Response: <no text>")
}
// Wait for cache to be written
WaitForCache(setup.Plugin)
t.Log("Making second identical Responses API request (should be served from cache)...")
// Make second identical request (should be cached)
start2 := time.Now()
response2, err2 := setup.Client.ResponsesRequest(ctx, testRequest)
duration2 := time.Since(start2)
if err2 != nil {
t.Fatalf("Second Responses request failed: %v", err2)
}
if response2 == nil || len(response2.Output) == 0 {
t.Fatal("Second Responses response is invalid")
}
if response2.Output[0].Content.ContentStr != nil {
t.Logf("Response: %s", *response2.Output[0].Content.ContentStr)
} else {
t.Logf("Response: %v", *response2.Output[0].Content.ContentBlocks[0].Text)
}
t.Logf("Second request completed in %v", duration2)
// Verify cache hit
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, string(CacheTypeDirect))
// Performance comparison
t.Logf("Performance Summary:")
t.Logf("First request (OpenAI): %v", duration1)
t.Logf("Second request (Cache): %v", duration2)
if duration2 >= duration1 {
t.Log("⚠️ Cache doesn't seem faster, but this could be due to test environment")
}
// Verify provider information is maintained in cached response
if response2.ExtraFields.Provider != testRequest.Provider {
t.Errorf("Provider mismatch in cached response: expected %s, got %s",
testRequest.Provider, response2.ExtraFields.Provider)
}
t.Log("✅ Basic Responses API semantic caching test completed successfully!")
}
// TestResponsesAPIDifferentParameters tests that different parameters produce different cache entries
func TestResponsesAPIDifferentParameters(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-responses-params")
basePrompt := "Explain quantum computing"
tests := []struct {
name string
request1 *schemas.BifrostResponsesRequest
request2 *schemas.BifrostResponsesRequest
shouldCache bool
}{
{
name: "Identical Requests",
request1: CreateBasicResponsesRequest(basePrompt, 0.5, 500),
request2: CreateBasicResponsesRequest(basePrompt, 0.5, 500),
shouldCache: true,
},
{
name: "Different Temperature",
request1: CreateBasicResponsesRequest(basePrompt, 0.1, 500),
request2: CreateBasicResponsesRequest(basePrompt, 0.9, 500),
shouldCache: false,
},
{
name: "Different MaxOutputTokens",
request1: CreateBasicResponsesRequest(basePrompt, 0.5, 500),
request2: CreateBasicResponsesRequest(basePrompt, 0.5, 200),
shouldCache: false,
},
{
name: "Different Instructions",
request1: CreateResponsesRequestWithInstructions(basePrompt, "Be concise", 0.5, 500),
request2: CreateResponsesRequestWithInstructions(basePrompt, "Be detailed", 0.5, 500),
shouldCache: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Clear cache for this subtest
clearTestKeysWithStore(t, setup.Store)
// Make first request
_, err1 := setup.Client.ResponsesRequest(ctx, tt.request1)
if err1 != nil {
return // Test will be skipped by retry function
}
WaitForCache(setup.Plugin)
// Make second request
response2, err2 := setup.Client.ResponsesRequest(ctx, tt.request2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
if tt.shouldCache {
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
t.Log("✓ Parameters match: cache hit as expected")
} else {
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2})
t.Log("✓ Parameters differ: no cache hit as expected")
}
})
}
}
// TestResponsesAPISemanticMatching tests semantic similarity matching with Responses API
func TestResponsesAPISemanticMatching(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKeyAndType("test-responses-semantic", CacheTypeSemantic)
// First request
originalRequest := CreateBasicResponsesRequest("What is machine learning?", 0.5, 500)
t.Log("Making first Responses request with original text...")
response1, err1 := setup.Client.ResponsesRequest(ctx, originalRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
WaitForCache(setup.Plugin)
// Test semantic match with similar but different text
semanticRequest := CreateBasicResponsesRequest("Can you explain machine learning concepts?", 0.5, 500)
t.Log("Making semantically similar Responses request...")
response2, err2 := setup.Client.ResponsesRequest(ctx, semanticRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// This should be a semantic cache hit
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "semantic")
t.Log("✓ Semantic cache hit with similar content")
}
// TestResponsesAPIWithInstructions tests caching with system instructions
func TestResponsesAPIWithInstructions(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-responses-instructions")
// Create request with instructions
request1 := CreateResponsesRequestWithInstructions(
"Explain artificial intelligence",
"You are a helpful assistant. Be concise and accurate.",
0.7,
500,
)
t.Log("Making first Responses request with instructions...")
response1, err1 := setup.Client.ResponsesRequest(ctx, request1)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
WaitForCache(setup.Plugin)
// Make identical request
request2 := CreateResponsesRequestWithInstructions(
"Explain artificial intelligence",
"You are a helpful assistant. Be concise and accurate.",
0.7,
500,
)
t.Log("Making second identical Responses request with instructions...")
response2, err2 := setup.Client.ResponsesRequest(ctx, request2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// Should be a cache hit
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
t.Log("✓ Responses API with instructions cached correctly")
}
// TestResponsesAPICacheExpiration tests TTL functionality for Responses API requests
func TestResponsesAPICacheExpiration(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Set very short TTL for testing
shortTTL := 5 * time.Second
ctx := CreateContextWithCacheKeyAndTTL("test-responses-ttl", shortTTL)
responsesRequest := CreateBasicResponsesRequest("TTL test for Responses API", 0.5, 500)
t.Log("Making first Responses request with short TTL...")
response1, err1 := setup.Client.ResponsesRequest(ctx, responsesRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
WaitForCache(setup.Plugin)
t.Log("Making second Responses request before TTL expiration...")
response2, err2 := setup.Client.ResponsesRequest(ctx, responsesRequest)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
t.Logf("Waiting for TTL expiration (%v)...", shortTTL)
time.Sleep(shortTTL + 2*time.Second) // Wait for TTL to expire
t.Log("Making third Responses request after TTL expiration...")
response3, err3 := setup.Client.ResponsesRequest(ctx, responsesRequest)
if err3 != nil {
return // Test will be skipped by retry function
}
// Should not be a cache hit since TTL expired
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response3})
t.Log("✅ Responses API requests properly handle TTL expiration")
}
// TestResponsesAPIWithoutCacheKey tests that Responses requests without cache key are not cached
func TestResponsesAPIWithoutCacheKey(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
// Don't set cache key in context
ctx := CreateContextWithCacheKey("")
responsesRequest := CreateBasicResponsesRequest("Test Responses without cache key", 0.5, 500)
t.Log("Making Responses request without cache key...")
response, err := setup.Client.ResponsesRequest(ctx, responsesRequest)
if err != nil {
return // Test will be skipped by retry function
}
// Should not be cached
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response})
t.Log("✅ Responses requests without cache key are properly not cached")
}
// TestResponsesAPINoStoreFlag tests that Responses requests with no-store flag are not cached
func TestResponsesAPINoStoreFlag(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
responsesRequest := CreateBasicResponsesRequest("Test no-store with Responses API", 0.7, 500)
ctx := CreateContextWithCacheKeyAndNoStore("test-no-store-responses", true)
t.Log("Testing no-store with Responses API...")
response1, err1 := setup.Client.ResponsesRequest(ctx, responsesRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
WaitForCache(setup.Plugin)
// Verify not cached
response2, err2 := setup.Client.ResponsesRequest(ctx, responsesRequest)
if err2 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}) // Should not be cached
t.Log("✅ Responses API no-store flag working correctly")
}
// TestResponsesAPIStreaming tests streaming Responses API requests
func TestResponsesAPIStreaming(t *testing.T) {
t.Log("Responses streaming not supported yet")
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-responses-streaming")
prompt := "Explain the basics of quantum computing in simple terms"
// Make non-streaming request first
t.Log("Making non-streaming Responses request...")
nonStreamRequest := CreateBasicResponsesRequest(prompt, 0.5, 500)
_, err1 := setup.Client.ResponsesRequest(ctx, nonStreamRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
WaitForCache(setup.Plugin)
// Make streaming request with same prompt and parameters
t.Log("Making streaming Responses request with same prompt...")
streamRequest := CreateStreamingResponsesRequest(prompt, 0.5, 500)
stream, err2 := setup.Client.ResponsesStreamRequest(ctx, streamRequest)
if err2 != nil {
t.Fatalf("Streaming Responses request failed: %v", err2)
}
var streamResponses []schemas.BifrostResponsesStreamResponse
for streamMsg := range stream {
if streamMsg.BifrostError != nil {
t.Fatalf("Error in Responses stream: %v", streamMsg.BifrostError)
}
if streamMsg.BifrostResponsesStreamResponse != nil {
streamResponses = append(streamResponses, *streamMsg.BifrostResponsesStreamResponse)
}
}
if len(streamResponses) == 0 {
t.Fatal("No streaming responses received")
}
// Check if any of the streaming responses was served from cache
cacheHitFound := false
for _, resp := range streamResponses {
if resp.ExtraFields.CacheDebug != nil && resp.ExtraFields.CacheDebug.CacheHit {
cacheHitFound = true
break
}
}
if !cacheHitFound {
t.Log("⚠️ No cache hit detected in streaming responses - this could be expected behavior")
} else {
t.Log("✓ Cache hit detected in streaming Responses API")
}
t.Log("✅ Streaming Responses API test completed")
}
// TestResponsesAPIComplexParameters tests complex parameter handling
func TestResponsesAPIComplexParameters(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-responses-complex-params")
// Create request with various complex parameters
request := CreateBasicResponsesRequest("Test complex parameters", 0.8, 500)
request.Params.TopP = PtrFloat64(0.9)
request.Params.Background = &[]bool{true}[0]
request.Params.ParallelToolCalls = &[]bool{false}[0]
request.Params.ServiceTier = &[]string{"default"}[0]
request.Params.Store = &[]bool{true}[0]
t.Log("Making first Responses request with complex parameters...")
response1, err1 := setup.Client.ResponsesRequest(ctx, request)
if err1 != nil {
return // Test will be skipped by retry function
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1})
WaitForCache(setup.Plugin)
// Create identical request
request2 := CreateBasicResponsesRequest("Test complex parameters", 0.8, 500)
request2.Params.TopP = PtrFloat64(0.9)
request2.Params.Background = &[]bool{true}[0]
request2.Params.ParallelToolCalls = &[]bool{false}[0]
request2.Params.ServiceTier = &[]string{"default"}[0]
request2.Params.Store = &[]bool{true}[0]
t.Log("Making second identical Responses request with complex parameters...")
response2, err2 := setup.Client.ResponsesRequest(ctx, request2)
if err2 != nil {
if err2.Error != nil {
t.Fatalf("Second request failed: %v", err2.Error.Message)
} else {
t.Fatalf("Second request failed: %v", err2)
}
}
// Should be a cache hit
AssertCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}, "direct")
t.Log("✓ Responses API with complex parameters cached correctly")
}

View File

@@ -0,0 +1,333 @@
package semanticcache
import (
"testing"
"time"
"github.com/maximhq/bifrost/core/schemas"
)
// TestStreamingCacheBasicFunctionality tests streaming response caching
func TestStreamingCacheBasicFunctionality(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("test-stream-value")
// Create a test streaming request
testRequest := CreateStreamingChatRequest(
"Count from 1 to 3, each number on a new line.",
0.0, // Use 0 temperature for more predictable responses
20,
)
t.Log("Making first streaming request (should go to OpenAI and be cached)...")
// Make first streaming request
start1 := time.Now()
stream1, err1 := setup.Client.ChatCompletionStreamRequest(ctx, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
var responses1 []schemas.BifrostChatResponse
for streamMsg := range stream1 {
if streamMsg.BifrostError != nil {
t.Fatalf("Error in first stream: %v", streamMsg.BifrostError)
}
if streamMsg.BifrostChatResponse != nil {
responses1 = append(responses1, *streamMsg.BifrostChatResponse)
}
}
duration1 := time.Since(start1)
if len(responses1) == 0 {
t.Fatal("First streaming request returned no responses")
}
t.Logf("First streaming request completed in %v with %d chunks", duration1, len(responses1))
// Wait for cache to be written
WaitForCache(setup.Plugin)
t.Log("Making second identical streaming request (should be served from cache)...")
// Make second identical streaming request
start2 := time.Now()
stream2, err2 := setup.Client.ChatCompletionStreamRequest(ctx, testRequest)
if err2 != nil {
t.Fatalf("Second streaming request failed: %v", err2)
}
var responses2 []schemas.BifrostChatResponse
for streamMsg := range stream2 {
if streamMsg.BifrostError != nil {
t.Fatalf("Error in second stream: %v", streamMsg.BifrostError)
}
if streamMsg.BifrostChatResponse != nil {
responses2 = append(responses2, *streamMsg.BifrostChatResponse)
}
}
duration2 := time.Since(start2)
if len(responses2) == 0 {
t.Fatal("Second streaming request returned no responses")
}
t.Logf("Second streaming request completed in %v with %d chunks", duration2, len(responses2))
// Validate that both streams have the same number of chunks
if len(responses1) != len(responses2) {
t.Errorf("Stream chunk count mismatch: original=%d, cached=%d", len(responses1), len(responses2))
}
// Validate that the second stream was cached
cached := false
for _, response := range responses2 {
if response.ExtraFields.CacheDebug != nil && response.ExtraFields.CacheDebug.CacheHit {
cached = true
break
}
}
if !cached {
t.Fatal("Second streaming request was not served from cache")
}
// Validate performance improvement
if duration2 >= duration1 {
t.Errorf("Cached stream took longer than original: cache=%v, original=%v", duration2, duration1)
} else {
speedup := float64(duration1) / float64(duration2)
t.Logf("Streaming cache speedup: %.2fx faster", speedup)
}
// Validate chunk ordering is maintained
for i := range responses2 {
if responses2[i].ExtraFields.ChunkIndex != responses1[i].ExtraFields.ChunkIndex {
t.Errorf("Chunk index mismatch at position %d: original=%d, cached=%d",
i, responses1[i].ExtraFields.ChunkIndex, responses2[i].ExtraFields.ChunkIndex)
}
}
t.Log("✅ Streaming cache test completed successfully!")
}
// TestStreamingVsNonStreaming tests that streaming and non-streaming requests are cached separately
func TestStreamingVsNonStreaming(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("stream-vs-non-test")
prompt := "What is the meaning of life?"
// Make non-streaming request first
t.Log("Making non-streaming request...")
nonStreamRequest := CreateBasicChatRequest(prompt, 0.5, 50)
nonStreamResponse, err1 := setup.Client.ChatCompletionRequest(ctx, nonStreamRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
WaitForCache(setup.Plugin)
// Make streaming request with same prompt and parameters
t.Log("Making streaming request with same prompt...")
streamRequest := CreateStreamingChatRequest(prompt, 0.5, 50)
stream, err2 := setup.Client.ChatCompletionStreamRequest(ctx, streamRequest)
if err2 != nil {
t.Fatalf("Streaming request failed: %v", err2)
}
var streamResponses []schemas.BifrostChatResponse
for streamMsg := range stream {
if streamMsg.BifrostError != nil {
t.Fatalf("Error in stream: %v", streamMsg.BifrostError)
}
if streamMsg.BifrostChatResponse != nil {
streamResponses = append(streamResponses, *streamMsg.BifrostChatResponse)
}
}
if len(streamResponses) == 0 {
t.Fatal("Streaming request returned no responses")
}
// Verify that the streaming request was NOT served from the non-streaming cache
// (They should be cached separately)
streamCached := false
for _, response := range streamResponses {
if response.ExtraFields.RawResponse != nil {
if rawMap, ok := response.ExtraFields.RawResponse.(map[string]interface{}); ok {
if cachedFlag, exists := rawMap["bifrost_cached"]; exists {
if cachedBool, ok := cachedFlag.(bool); ok && cachedBool {
streamCached = true
break
}
}
}
}
}
if streamCached {
t.Error("Streaming request should not be cached from non-streaming cache")
} else {
t.Log("✅ Streaming request correctly not cached from non-streaming cache")
}
// Verify non-streaming response was not affected
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: nonStreamResponse})
t.Log("✅ Streaming vs non-streaming test completed!")
}
// TestStreamingChunkOrdering tests that cached streaming responses maintain proper chunk ordering
func TestStreamingChunkOrdering(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("chunk-order-test")
// Request that should generate multiple chunks
testRequest := CreateStreamingChatRequest(
"List the first 5 prime numbers, one per line with explanation.",
0.0,
100,
)
t.Log("Making first streaming request to establish cache...")
stream1, err1 := setup.Client.ChatCompletionStreamRequest(ctx, testRequest)
if err1 != nil {
return // Test will be skipped by retry function
}
var originalChunks []schemas.BifrostChatResponse
for streamMsg := range stream1 {
if streamMsg.BifrostError != nil {
t.Fatalf("Error in first stream: %v", streamMsg.BifrostError)
}
if streamMsg.BifrostChatResponse != nil {
originalChunks = append(originalChunks, *streamMsg.BifrostChatResponse)
}
}
if len(originalChunks) < 2 {
t.Skipf("Need at least 2 chunks to test ordering, got %d", len(originalChunks))
}
t.Logf("Original stream had %d chunks", len(originalChunks))
WaitForCache(setup.Plugin)
t.Log("Making second streaming request to test cached chunk ordering...")
stream2, err2 := setup.Client.ChatCompletionStreamRequest(ctx, testRequest)
if err2 != nil {
t.Fatalf("Second streaming request failed: %v", err2)
}
var cachedChunks []schemas.BifrostChatResponse
for streamMsg := range stream2 {
if streamMsg.BifrostError != nil {
t.Fatalf("Error in second stream: %v", streamMsg.BifrostError)
}
if streamMsg.BifrostChatResponse != nil {
cachedChunks = append(cachedChunks, *streamMsg.BifrostChatResponse)
}
}
if len(cachedChunks) != len(originalChunks) {
t.Errorf("Cached stream chunk count mismatch: original=%d, cached=%d",
len(originalChunks), len(cachedChunks))
}
// Verify chunk ordering
for i := 0; i < len(cachedChunks) && i < len(originalChunks); i++ {
originalIndex := originalChunks[i].ExtraFields.ChunkIndex
cachedIndex := cachedChunks[i].ExtraFields.ChunkIndex
if originalIndex != cachedIndex {
t.Errorf("Chunk index mismatch at position %d: original=%d, cached=%d",
i, originalIndex, cachedIndex)
}
// Only verify cache hit on the last chunk (where CacheDebug is set)
if i == len(cachedChunks)-1 {
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: &cachedChunks[i]}, string(CacheTypeDirect))
}
}
// Verify chunks are in sequential order
for i := 1; i < len(cachedChunks); i++ {
prevIndex := cachedChunks[i-1].ExtraFields.ChunkIndex
currIndex := cachedChunks[i].ExtraFields.ChunkIndex
if currIndex <= prevIndex {
t.Errorf("Chunks not in sequential order: chunk %d has index %d, chunk %d has index %d",
i-1, prevIndex, i, currIndex)
}
}
t.Log("✅ Streaming chunk ordering test completed successfully!")
}
// TestSpeechSynthesisStreaming tests speech synthesis streaming caching
func TestSpeechSynthesisStreaming(t *testing.T) {
setup := NewTestSetup(t)
defer setup.Cleanup()
ctx := CreateContextWithCacheKey("speech-stream-test")
// Create speech synthesis request
speechRequest := CreateSpeechRequest(
"This is a test of speech synthesis streaming cache.",
"alloy",
)
t.Log("Making first speech synthesis request...")
start1 := time.Now()
response1, err1 := setup.Client.SpeechRequest(ctx, speechRequest)
duration1 := time.Since(start1)
if err1 != nil {
return // Test will be skipped by retry function
}
if response1 == nil {
t.Fatal("First speech response is nil")
}
t.Logf("First speech request completed in %v", duration1)
WaitForCache(setup.Plugin)
t.Log("Making second identical speech synthesis request...")
start2 := time.Now()
response2, err2 := setup.Client.SpeechRequest(ctx, speechRequest)
duration2 := time.Since(start2)
if err2 != nil {
t.Fatalf("Second speech request failed: %v", err2)
}
if response2 == nil {
t.Fatal("Second speech response is nil")
}
t.Logf("Second speech request completed in %v", duration2)
// Check if second request was cached
AssertCacheHit(t, &schemas.BifrostResponse{SpeechResponse: response2}, string(CacheTypeDirect))
// Performance comparison
t.Logf("Speech Synthesis Performance:")
t.Logf("First request: %v", duration1)
t.Logf("Second request: %v", duration2)
if duration2 < duration1 {
speedup := float64(duration1) / float64(duration2)
t.Logf("Speech cache speedup: %.2fx faster", speedup)
}
t.Log("✅ Speech synthesis streaming test completed successfully!")
}

View File

@@ -0,0 +1,428 @@
package semanticcache
import (
"context"
"os"
"strings"
"testing"
"github.com/google/uuid"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
"github.com/maximhq/bifrost/framework/vectorstore"
)
// requiresVectors returns true if the vector store requires vectors for storage.
// Some stores (like Qdrant, Pinecone, and Weaviate) require vectors for all entries,
// while others (like Redis) can store metadata without vectors.
func requiresVectors(storeType vectorstore.VectorStoreType) bool {
switch storeType {
case vectorstore.VectorStoreTypeQdrant, vectorstore.VectorStoreTypePinecone, vectorstore.VectorStoreTypeWeaviate:
return true
default:
return false
}
}
// skipIfNoAPIKey skips the test if OPENAI_API_KEY is not set and the store requires vectors.
func skipIfNoAPIKey(t *testing.T, storeType vectorstore.VectorStoreType) {
if requiresVectors(storeType) && os.Getenv("OPENAI_API_KEY") == "" {
t.Skipf("Skipping %s test: OPENAI_API_KEY not set (required for embedding generation)", storeType)
}
}
// VectorStoreTestCase defines a test case for a specific vector store
type VectorStoreTestCase struct {
Name string
StoreType vectorstore.VectorStoreType
}
// getVectorStoreTestCases returns all vector store test cases
func getVectorStoreTestCases() []VectorStoreTestCase {
return []VectorStoreTestCase{
{"Weaviate", vectorstore.VectorStoreTypeWeaviate},
{"Redis", vectorstore.VectorStoreTypeRedis},
{"Qdrant", vectorstore.VectorStoreTypeQdrant},
{"Pinecone", vectorstore.VectorStoreTypePinecone},
}
}
// getDefaultTestConfig returns the default test configuration
func getDefaultTestConfig() *Config {
return &Config{
Provider: schemas.OpenAI,
EmbeddingModel: "text-embedding-3-small",
Dimension: 1536,
Threshold: 0.8,
CleanUpOnShutdown: true,
Keys: []schemas.Key{
{
Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"),
Models: schemas.WhiteList{"*"},
Weight: 1.0,
},
},
}
}
// TestSemanticCache_AllVectorStores_BasicFlow tests the basic cache flow across all vector stores
func TestSemanticCache_AllVectorStores_BasicFlow(t *testing.T) {
for _, tc := range getVectorStoreTestCases() {
t.Run(tc.Name, func(t *testing.T) {
skipIfNoAPIKey(t, tc.StoreType)
setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType)
defer setup.Cleanup()
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-basic")
// Test request
request := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Hello from " + tc.Name + " test!"),
},
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.7),
MaxCompletionTokens: bifrost.Ptr(100),
},
},
}
t.Logf("[%s] Testing first request (cache miss)...", tc.Name)
// First request - should be a cache miss
modifiedReq, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request)
if err != nil {
t.Fatalf("[%s] PreHook failed: %v", tc.Name, err)
}
if shortCircuit != nil {
t.Fatalf("[%s] Expected cache miss, but got cache hit", tc.Name)
}
if modifiedReq == nil {
t.Fatalf("[%s] Modified request is nil", tc.Name)
}
t.Logf("[%s] Cache miss handled correctly", tc.Name)
// Simulate a response
response := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ID: uuid.New().String(),
Choices: []schemas.BifrostResponseChoice{
{
Index: 0,
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Hello! Response from " + tc.Name + " test."),
}},
},
},
},
ExtraFields: schemas.BifrostResponseExtraFields{
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o-mini",
RequestType: schemas.ChatCompletionRequest,
},
},
}
// Cache the response
t.Logf("[%s] Caching response...", tc.Name)
_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
if err != nil {
t.Fatalf("[%s] PostHook failed: %v", tc.Name, err)
}
// Wait for async caching to complete
WaitForCache(setup.Plugin)
t.Logf("[%s] Response cached successfully", tc.Name)
// Second request - should be a cache hit
t.Logf("[%s] Testing second identical request (expecting cache hit)...", tc.Name)
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx2.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-basic")
_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request)
if err != nil {
t.Fatalf("[%s] Second PreHook failed: %v", tc.Name, err)
}
if shortCircuit2 == nil {
t.Fatalf("[%s] Expected cache hit on identical request, but got cache miss", tc.Name)
}
if shortCircuit2.Response == nil {
t.Fatalf("[%s] Cache hit but response is nil", tc.Name)
}
t.Logf("[%s] Cache hit detected and response returned", tc.Name)
t.Logf("[%s] Basic flow test passed!", tc.Name)
})
}
}
// TestSemanticCache_AllVectorStores_DirectHashMatch tests direct hash matching across all vector stores
func TestSemanticCache_AllVectorStores_DirectHashMatch(t *testing.T) {
for _, tc := range getVectorStoreTestCases() {
t.Run(tc.Name, func(t *testing.T) {
skipIfNoAPIKey(t, tc.StoreType)
setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType)
defer setup.Cleanup()
// Use unique cache key per test run to avoid stale data from previous runs
// (Pinecone Local doesn't support deletion by metadata filter)
testRunID := uuid.New().String()[:8]
cacheKey := "test-" + strings.ToLower(tc.Name) + "-direct-" + testRunID
ctx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect)
testRequest := CreateBasicChatRequest("Direct hash test for "+tc.Name+" "+testRunID, 0.7, 50)
t.Logf("[%s] Making first request to populate cache...", tc.Name)
response1, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest)
if err1 != nil {
t.Skipf("[%s] First request failed (likely no API key): %v", tc.Name, err1)
return
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Second request with direct-only cache type
ctx2 := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect)
t.Logf("[%s] Making second request with CacheTypeDirect...", tc.Name)
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
t.Fatalf("[%s] Second request failed: %v", tc.Name, err2.Error.Message)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct")
t.Logf("[%s] Direct hash match test passed!", tc.Name)
})
}
}
// TestSemanticCache_AllVectorStores_NamespaceIsolation tests that different cache keys are isolated
func TestSemanticCache_AllVectorStores_NamespaceIsolation(t *testing.T) {
for _, tc := range getVectorStoreTestCases() {
t.Run(tc.Name, func(t *testing.T) {
skipIfNoAPIKey(t, tc.StoreType)
setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType)
defer setup.Cleanup()
// Use unique cache keys per test run to avoid stale data from previous runs
// (Pinecone Local doesn't support deletion by metadata filter)
testRunID := uuid.New().String()[:8]
cacheKey1 := "test-" + strings.ToLower(tc.Name) + "-namespace-1-" + testRunID
cacheKey2 := "test-" + strings.ToLower(tc.Name) + "-namespace-2-" + testRunID
// Cache with first key
ctx1 := CreateContextWithCacheKey(cacheKey1)
testRequest := CreateBasicChatRequest("Namespace isolation test for "+tc.Name+" "+testRunID, 0.7, 50)
t.Logf("[%s] Making request with cache key 1...", tc.Name)
response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest)
if err1 != nil {
t.Skipf("[%s] First request failed (likely no API key): %v", tc.Name, err1)
return
}
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1})
WaitForCache(setup.Plugin)
// Try with different cache key - should miss
ctx2 := CreateContextWithCacheKey(cacheKey2)
t.Logf("[%s] Making same request with different cache key (expecting miss)...", tc.Name)
response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest)
if err2 != nil {
t.Fatalf("[%s] Second request failed: %v", tc.Name, err2.Error.Message)
}
// Should be a cache miss because different namespace
AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2})
// Try with original key - should hit
ctx3 := CreateContextWithCacheKey(cacheKey1)
t.Logf("[%s] Making same request with original cache key (expecting hit)...", tc.Name)
response3, err3 := setup.Client.ChatCompletionRequest(ctx3, testRequest)
if err3 != nil {
t.Fatalf("[%s] Third request failed: %v", tc.Name, err3.Error.Message)
}
AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "direct")
t.Logf("[%s] Namespace isolation test passed!", tc.Name)
})
}
}
// TestSemanticCache_AllVectorStores_ParameterFiltering tests that different parameters don't share cache
func TestSemanticCache_AllVectorStores_ParameterFiltering(t *testing.T) {
for _, tc := range getVectorStoreTestCases() {
t.Run(tc.Name, func(t *testing.T) {
skipIfNoAPIKey(t, tc.StoreType)
setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType)
defer setup.Cleanup()
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-params")
// First request with temperature=0.7
request1 := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Parameter test for " + tc.Name),
},
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.7),
MaxCompletionTokens: bifrost.Ptr(100),
},
},
}
t.Logf("[%s] Testing first request with temperature=0.7...", tc.Name)
_, shortCircuit1, err := setup.Plugin.PreLLMHook(ctx, request1)
if err != nil {
t.Fatalf("[%s] First PreHook failed: %v", tc.Name, err)
}
if shortCircuit1 != nil {
t.Fatalf("[%s] Expected cache miss for first request", tc.Name)
}
// Cache a response
response := &schemas.BifrostResponse{
ChatResponse: &schemas.BifrostChatResponse{
ID: uuid.New().String(),
Choices: []schemas.BifrostResponseChoice{
{
ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
Message: &schemas.ChatMessage{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Response for " + tc.Name),
}},
},
},
},
ExtraFields: schemas.BifrostResponseExtraFields{
Provider: schemas.OpenAI,
OriginalModelRequested: "gpt-4o-mini",
RequestType: schemas.ChatCompletionRequest,
},
},
}
_, _, err = setup.Plugin.PostLLMHook(ctx, response, nil)
if err != nil {
t.Fatalf("[%s] PostHook failed: %v", tc.Name, err)
}
WaitForCache(setup.Plugin)
t.Logf("[%s] First response cached", tc.Name)
// Second request with different temperature - should be cache miss
t.Logf("[%s] Testing second request with temperature=0.5 (expecting cache miss)...", tc.Name)
ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
ctx2.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-params")
request2 := &schemas.BifrostRequest{
RequestType: schemas.ChatCompletionRequest,
ChatRequest: &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: bifrost.Ptr("Parameter test for " + tc.Name),
},
},
},
Params: &schemas.ChatParameters{
Temperature: bifrost.Ptr(0.5), // Different temperature
MaxCompletionTokens: bifrost.Ptr(100),
},
},
}
_, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request2)
if err != nil {
t.Fatalf("[%s] Second PreHook failed: %v", tc.Name, err)
}
if shortCircuit2 != nil {
t.Fatalf("[%s] Expected cache miss due to different temperature, but got cache hit", tc.Name)
}
t.Logf("[%s] Parameter filtering test passed!", tc.Name)
})
}
}
// TestSemanticCache_AllVectorStores_EmbeddingRequest tests embedding request caching across all vector stores
func TestSemanticCache_AllVectorStores_EmbeddingRequest(t *testing.T) {
for _, tc := range getVectorStoreTestCases() {
t.Run(tc.Name, func(t *testing.T) {
skipIfNoAPIKey(t, tc.StoreType)
setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType)
defer setup.Cleanup()
// Use unique cache key per test run to avoid stale data from previous runs
// (Pinecone Local doesn't support deletion by metadata filter)
testRunID := uuid.New().String()[:8]
cacheKey := "test-" + strings.ToLower(tc.Name) + "-embedding-" + testRunID
embeddingRequest := CreateEmbeddingRequest([]string{"Test embedding with " + tc.Name + " " + testRunID})
// Cache first request
ctx1 := CreateContextWithCacheKey(cacheKey)
t.Logf("[%s] Making first embedding request...", tc.Name)
response1, err1 := setup.Client.EmbeddingRequest(ctx1, embeddingRequest)
if err1 != nil {
t.Skipf("[%s] First embedding request failed (likely no API key): %v", tc.Name, err1)
return
}
AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response1})
WaitForCache(setup.Plugin)
// Second request - should be cache hit
ctx2 := CreateContextWithCacheKey(cacheKey)
t.Logf("[%s] Making second embedding request (expecting cache hit)...", tc.Name)
response2, err2 := setup.Client.EmbeddingRequest(ctx2, embeddingRequest)
if err2 != nil {
t.Fatalf("[%s] Second embedding request failed: %v", tc.Name, err2.Error.Message)
}
AssertCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2}, "direct")
t.Logf("[%s] Embedding request caching test passed!", tc.Name)
})
}
}

View File

@@ -0,0 +1,466 @@
package semanticcache
import (
"context"
"encoding/json"
"errors"
"fmt"
"strconv"
"strings"
"time"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
"github.com/maximhq/bifrost/framework/vectorstore"
)
func (plugin *Plugin) prepareDirectCacheLookup(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (string, error) {
hash, err := plugin.generateRequestHash(req)
if err != nil {
return "", fmt.Errorf("failed to generate request hash: %w", err)
}
plugin.logger.Debug(PluginLoggerPrefix + " Generated Hash for Request: " + hash)
paramsHash, err := plugin.computeRequestParamsHash(req)
if err != nil {
return "", fmt.Errorf("failed to compute direct lookup params hash: %w", err)
}
ctx.SetValue(requestHashKey, hash)
ctx.SetValue(requestParamsHashKey, paramsHash)
provider, model, _ := req.GetRequestFields()
directCacheID := plugin.generateDirectCacheID(provider, model, cacheKey, hash, paramsHash)
return directCacheID, nil
}
func (plugin *Plugin) performLegacyDirectSearch(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (*schemas.LLMPluginShortCircuit, error) {
hash, _ := ctx.Value(requestHashKey).(string)
paramsHash, _ := ctx.Value(requestParamsHashKey).(string)
provider, model, _ := req.GetRequestFields()
filters := []vectorstore.Query{
{Field: "request_hash", Operator: vectorstore.QueryOperatorEqual, Value: hash},
{Field: "cache_key", Operator: vectorstore.QueryOperatorEqual, Value: cacheKey},
{Field: "params_hash", Operator: vectorstore.QueryOperatorEqual, Value: paramsHash},
{Field: "from_bifrost_semantic_cache_plugin", Operator: vectorstore.QueryOperatorEqual, Value: true},
}
if plugin.config.CacheByProvider != nil && *plugin.config.CacheByProvider {
filters = append(filters, vectorstore.Query{Field: "provider", Operator: vectorstore.QueryOperatorEqual, Value: string(provider)})
}
if plugin.config.CacheByModel != nil && *plugin.config.CacheByModel {
filters = append(filters, vectorstore.Query{Field: "model", Operator: vectorstore.QueryOperatorEqual, Value: model})
}
plugin.logger.Debug(fmt.Sprintf("%s Searching for legacy direct hash match with %d filters", PluginLoggerPrefix, len(filters)))
selectFields := append([]string(nil), SelectFields...)
if bifrost.IsStreamRequestType(req.RequestType) {
selectFields = removeField(selectFields, "response")
} else {
selectFields = removeField(selectFields, "stream_chunks")
}
searchCtx := vectorstore.WithDisableScanFallback(ctx)
var cursor *string
results, _, err := plugin.store.GetAll(searchCtx, plugin.config.VectorStoreNamespace, filters, selectFields, cursor, 1)
if err != nil {
if errors.Is(err, vectorstore.ErrNotFound) || errors.Is(err, vectorstore.ErrQuerySyntax) {
return nil, nil
}
return nil, fmt.Errorf("failed to search for legacy direct hash match: %w", err)
}
if len(results) == 0 {
plugin.logger.Debug(PluginLoggerPrefix + " No legacy direct hash match found")
return nil, nil
}
result := results[0]
plugin.logger.Debug(fmt.Sprintf("%s Found legacy direct hash match with ID: %s", PluginLoggerPrefix, result.ID))
return plugin.buildResponseFromResult(ctx, req, result, CacheTypeDirect, 1.0, 0)
}
func (plugin *Plugin) performDirectChunkLookup(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (*schemas.LLMPluginShortCircuit, error) {
directCacheID, err := plugin.prepareDirectCacheLookup(ctx, req, cacheKey)
if err != nil {
return nil, err
}
ctx.SetValue(requestStorageIDKey, directCacheID)
result, err := plugin.store.GetChunk(ctx, plugin.config.VectorStoreNamespace, directCacheID)
if err != nil {
errMsg := strings.ToLower(err.Error())
isMiss := errors.Is(err, vectorstore.ErrNotFound) ||
strings.Contains(errMsg, "not found") ||
strings.Contains(errMsg, "status code: 404")
if isMiss {
plugin.logger.Debug(PluginLoggerPrefix + " No direct chunk match found")
return nil, nil
}
return nil, fmt.Errorf("failed to fetch direct cache chunk: %w", err)
}
plugin.logger.Debug(fmt.Sprintf("%s Found direct chunk match with ID: %s", PluginLoggerPrefix, result.ID))
return plugin.buildResponseFromResult(ctx, req, result, CacheTypeDirect, 1.0, 0)
}
func (plugin *Plugin) performDirectSearch(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (*schemas.LLMPluginShortCircuit, error) {
shortCircuit, err := plugin.performDirectChunkLookup(ctx, req, cacheKey)
if err != nil {
return nil, err
}
if shortCircuit != nil {
return shortCircuit, nil
}
return plugin.performLegacyDirectSearch(ctx, req, cacheKey)
}
// generateEmbeddingsForStorage generates embeddings and stores them in context for PostHook storage.
// This is used when the vector store requires vectors but we're in direct-only cache mode.
// Unlike performSemanticSearch, this function does not perform any search - it only generates
// and stores embeddings so they can be persisted with the cache entry.
func (plugin *Plugin) generateEmbeddingsForStorage(ctx *schemas.BifrostContext, req *schemas.BifrostRequest) error {
// Extract text and metadata for embedding
text, paramsHash, err := plugin.extractTextForEmbedding(req)
if err != nil {
return fmt.Errorf("failed to extract text for embedding: %w", err)
}
// Generate embedding
embedding, inputTokens, err := plugin.generateEmbedding(ctx, text)
if err != nil {
return fmt.Errorf("failed to generate embedding: %w", err)
}
// Store embedding and metadata in context for PostHook
ctx.SetValue(requestEmbeddingKey, embedding)
ctx.SetValue(requestEmbeddingTokensKey, inputTokens)
ctx.SetValue(requestParamsHashKey, paramsHash)
return nil
}
// performSemanticSearch performs semantic similarity search and returns matching response if found.
func (plugin *Plugin) performSemanticSearch(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (*schemas.LLMPluginShortCircuit, error) {
// Extract text and metadata for embedding
text, paramsHash, err := plugin.extractTextForEmbedding(req)
if err != nil {
return nil, fmt.Errorf("failed to extract text for embedding: %w", err)
}
// Generate embedding
embedding, inputTokens, err := plugin.generateEmbedding(ctx, text)
if err != nil {
return nil, fmt.Errorf("failed to generate embedding: %w", err)
}
// Store embedding and metadata in context for PostLLMHook
ctx.SetValue(requestEmbeddingKey, embedding)
ctx.SetValue(requestEmbeddingTokensKey, inputTokens)
ctx.SetValue(requestParamsHashKey, paramsHash)
cacheThreshold := plugin.config.Threshold
thresholdValue := ctx.Value(CacheThresholdKey)
if thresholdValue != nil {
threshold, ok := thresholdValue.(float64)
if !ok {
plugin.logger.Warn(PluginLoggerPrefix + " Threshold is not a float64, using default threshold")
} else {
cacheThreshold = threshold
}
}
provider, model, _ := req.GetRequestFields()
// Build strict metadata filters as Query slices (provider, model, and all params)
strictFilters := []vectorstore.Query{
{Field: "cache_key", Operator: vectorstore.QueryOperatorEqual, Value: cacheKey},
{Field: "params_hash", Operator: vectorstore.QueryOperatorEqual, Value: paramsHash},
{Field: "from_bifrost_semantic_cache_plugin", Operator: vectorstore.QueryOperatorEqual, Value: true},
}
if plugin.config.CacheByProvider != nil && *plugin.config.CacheByProvider {
strictFilters = append(strictFilters, vectorstore.Query{Field: "provider", Operator: vectorstore.QueryOperatorEqual, Value: string(provider)})
}
if plugin.config.CacheByModel != nil && *plugin.config.CacheByModel {
strictFilters = append(strictFilters, vectorstore.Query{Field: "model", Operator: vectorstore.QueryOperatorEqual, Value: model})
}
plugin.logger.Debug(fmt.Sprintf("%s Performing semantic search with %d metadata filters", PluginLoggerPrefix, len(strictFilters)))
// Make a full copy so we don't mutate the original backing array
selectFields := append([]string(nil), SelectFields...)
if bifrost.IsStreamRequestType(req.RequestType) {
selectFields = removeField(selectFields, "response")
} else {
selectFields = removeField(selectFields, "stream_chunks")
}
// For semantic search, we want semantic similarity in content but exact parameter matching
results, err := plugin.store.GetNearest(ctx, plugin.config.VectorStoreNamespace, embedding, strictFilters, selectFields, cacheThreshold, 1)
if err != nil {
return nil, fmt.Errorf("failed to search semantic cache: %w", err)
}
if len(results) == 0 {
plugin.logger.Debug(PluginLoggerPrefix + " No semantic match found")
return nil, nil
}
// Found a semantically similar entry
result := results[0]
plugin.logger.Debug(fmt.Sprintf("%s Found semantic match with ID: %s, Score: %f", PluginLoggerPrefix, result.ID, *result.Score))
// Build response from cached result
return plugin.buildResponseFromResult(ctx, req, result, CacheTypeSemantic, cacheThreshold, inputTokens)
}
// buildResponseFromResult constructs a LLMPluginShortCircuit response from a cached VectorEntry result
func (plugin *Plugin) buildResponseFromResult(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, result vectorstore.SearchResult, cacheType CacheType, threshold float64, inputTokens int) (*schemas.LLMPluginShortCircuit, error) {
// Extract response data from the result properties
properties := result.Properties
if properties == nil {
return nil, fmt.Errorf("no properties found in cached result")
}
// Check TTL - if entry has expired, delete it and return cache miss
if expiresAtRaw, exists := properties["expires_at"]; exists && expiresAtRaw != nil {
var expiresAt int64
var validType bool
switch v := expiresAtRaw.(type) {
case string:
var err error
expiresAt, err = strconv.ParseInt(v, 10, 64)
if err != nil {
validType = false
} else {
validType = true
}
case float64:
expiresAt = int64(v)
validType = true
case int64:
expiresAt = v
validType = true
case int:
expiresAt = int64(v)
validType = true
}
if validType {
currentTime := time.Now().Unix()
if expiresAt < currentTime {
// Entry has expired, delete it asynchronously
go func() {
deleteCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := plugin.store.Delete(deleteCtx, plugin.config.VectorStoreNamespace, result.ID)
if err != nil {
plugin.logger.Warn("%s Failed to delete expired entry %s: %v", PluginLoggerPrefix, result.ID, err)
}
}()
// Return nil to indicate cache miss
return nil, nil
}
}
}
// Check if this is a streaming response - need to check for non-null values
streamResponses, hasStreamingResponse := properties["stream_chunks"]
singleResponse, hasSingleResponse := properties["response"]
// Consider fields present only if they're not null
hasValidSingleResponse := hasSingleResponse && singleResponse != nil
hasValidStreamingResponse := hasStreamingResponse && streamResponses != nil
// Parse stream_chunks
streamChunks, err := plugin.parseStreamChunks(streamResponses)
if err != nil || len(streamChunks) == 0 {
hasValidStreamingResponse = false
}
similarity := 0.0
if result.Score != nil {
similarity = *result.Score
}
if hasValidStreamingResponse && !hasValidSingleResponse {
// Handle streaming response
return plugin.buildStreamingResponseFromResult(ctx, req, result, streamResponses, cacheType, threshold, similarity, inputTokens)
} else if hasValidSingleResponse && !hasValidStreamingResponse {
// Handle single response
return plugin.buildSingleResponseFromResult(ctx, req, result, singleResponse, cacheType, threshold, similarity, inputTokens)
} else {
return nil, fmt.Errorf("cached result has invalid response data: both or neither response/stream_chunks are present (response: %v, stream_chunks: %v)", singleResponse, streamResponses)
}
}
// buildSingleResponseFromResult constructs a single response from cached data
func (plugin *Plugin) buildSingleResponseFromResult(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, result vectorstore.SearchResult, responseData interface{}, cacheType CacheType, threshold float64, similarity float64, inputTokens int) (*schemas.LLMPluginShortCircuit, error) {
requestedProvider, requestedModel, _ := req.GetRequestFields()
responseStr, ok := responseData.(string)
if !ok {
return nil, fmt.Errorf("cached response is not a string")
}
// Unmarshal the cached response
var cachedResponse schemas.BifrostResponse
if err := json.Unmarshal([]byte(responseStr), &cachedResponse); err != nil {
return nil, fmt.Errorf("failed to unmarshal cached response: %w", err)
}
extraFields := cachedResponse.GetExtraFields()
if extraFields.CacheDebug == nil {
extraFields.CacheDebug = &schemas.BifrostCacheDebug{}
}
extraFields.CacheDebug.CacheHit = true
extraFields.CacheDebug.HitType = bifrost.Ptr(string(cacheType))
extraFields.CacheDebug.CacheID = bifrost.Ptr(result.ID)
extraFields.CacheDebug.RequestedProvider = bifrost.Ptr(string(requestedProvider))
extraFields.CacheDebug.RequestedModel = bifrost.Ptr(requestedModel)
if cacheType == CacheTypeSemantic {
extraFields.CacheDebug.ProviderUsed = bifrost.Ptr(string(plugin.config.Provider))
extraFields.CacheDebug.ModelUsed = bifrost.Ptr(plugin.config.EmbeddingModel)
extraFields.CacheDebug.Threshold = &threshold
extraFields.CacheDebug.Similarity = &similarity
extraFields.CacheDebug.InputTokens = &inputTokens
} else {
extraFields.CacheDebug.ProviderUsed = nil
extraFields.CacheDebug.ModelUsed = nil
extraFields.CacheDebug.Threshold = nil
extraFields.CacheDebug.Similarity = nil
extraFields.CacheDebug.InputTokens = nil
}
ctx.SetValue(isCacheHitKey, true)
ctx.SetValue(cacheHitTypeKey, cacheType)
return &schemas.LLMPluginShortCircuit{
Response: &cachedResponse,
}, nil
}
// buildStreamingResponseFromResult constructs a streaming response from cached data
func (plugin *Plugin) buildStreamingResponseFromResult(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, result vectorstore.SearchResult, streamData interface{}, cacheType CacheType, threshold float64, similarity float64, inputTokens int) (*schemas.LLMPluginShortCircuit, error) {
requestedProvider, requestedModel, _ := req.GetRequestFields()
// Parse stream_chunks
streamArray, err := plugin.parseStreamChunks(streamData)
if err != nil {
return nil, fmt.Errorf("failed to parse stream_chunks: %w", err)
}
// Mark cache-hit once to avoid concurrent ctx writes
ctx.SetValue(isCacheHitKey, true)
ctx.SetValue(cacheHitTypeKey, cacheType)
// Create stream channel
streamChan := make(chan *schemas.BifrostStreamChunk)
go func() {
defer close(streamChan)
// Set cache-hit markers inside the streaming goroutine to avoid races
ctx.SetValue(isCacheHitKey, true)
ctx.SetValue(cacheHitTypeKey, cacheType)
// Process each stream chunk
for i, chunkData := range streamArray {
chunkStr, ok := chunkData.(string)
if !ok {
plugin.logger.Warn("%s Stream chunk %d is not a string, skipping", PluginLoggerPrefix, i)
continue
}
// Unmarshal the chunk as BifrostResponse
var cachedResponse schemas.BifrostResponse
if err := json.Unmarshal([]byte(chunkStr), &cachedResponse); err != nil {
plugin.logger.Warn("%s Failed to unmarshal stream chunk %d, skipping: %v", PluginLoggerPrefix, i, err)
continue
}
// Add cache debug to only the last chunk and set stream end indicator
if i == len(streamArray)-1 {
ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true)
extraFields := cachedResponse.GetExtraFields()
cacheDebug := schemas.BifrostCacheDebug{
CacheHit: true,
HitType: bifrost.Ptr(string(cacheType)),
CacheID: bifrost.Ptr(result.ID),
RequestedProvider: bifrost.Ptr(string(requestedProvider)),
RequestedModel: bifrost.Ptr(requestedModel),
}
if cacheType == CacheTypeSemantic {
cacheDebug.ProviderUsed = bifrost.Ptr(string(plugin.config.Provider))
cacheDebug.ModelUsed = bifrost.Ptr(plugin.config.EmbeddingModel)
cacheDebug.Threshold = &threshold
cacheDebug.Similarity = &similarity
cacheDebug.InputTokens = &inputTokens
} else {
cacheDebug.ProviderUsed = nil
cacheDebug.ModelUsed = nil
cacheDebug.Threshold = nil
cacheDebug.Similarity = nil
cacheDebug.InputTokens = nil
}
extraFields.CacheDebug = &cacheDebug
}
// Send chunk to stream
streamChan <- &schemas.BifrostStreamChunk{
BifrostTextCompletionResponse: cachedResponse.TextCompletionResponse,
BifrostChatResponse: cachedResponse.ChatResponse,
BifrostResponsesStreamResponse: cachedResponse.ResponsesStreamResponse,
BifrostSpeechStreamResponse: cachedResponse.SpeechStreamResponse,
BifrostTranscriptionStreamResponse: cachedResponse.TranscriptionStreamResponse,
BifrostImageGenerationStreamResponse: cachedResponse.ImageGenerationStreamResponse,
}
}
}()
return &schemas.LLMPluginShortCircuit{
Stream: streamChan,
}, nil
}
// parseStreamChunks parses stream_chunks data from various formats into []interface{}
// Handles []interface{}, []string, and JSON string formats
func (plugin *Plugin) parseStreamChunks(streamData interface{}) ([]interface{}, error) {
if streamData == nil {
return nil, fmt.Errorf("stream data is nil")
}
switch v := streamData.(type) {
case []interface{}:
return v, nil
case []string:
// Convert []string to []interface{}
result := make([]interface{}, len(v))
for i, s := range v {
result[i] = s
}
return result, nil
case string:
// Parse JSON string from Redis
var stringArray []string
if err := json.Unmarshal([]byte(v), &stringArray); err != nil {
return nil, fmt.Errorf("failed to parse JSON string: %w", err)
}
// Convert to []interface{}
result := make([]interface{}, len(stringArray))
for i, s := range stringArray {
result[i] = s
}
return result, nil
default:
return nil, fmt.Errorf("unsupported stream data type: %T", streamData)
}
}

View File

@@ -0,0 +1,201 @@
package semanticcache
import (
"context"
"encoding/json"
"fmt"
"sort"
"sync"
"time"
)
// Streaming State Management Methods
// createStreamAccumulator creates a new stream accumulator for a request
func (plugin *Plugin) createStreamAccumulator(requestID string, storageID string, embedding []float32, metadata map[string]interface{}, ttl time.Duration) *StreamAccumulator {
return &StreamAccumulator{
RequestID: requestID,
StorageID: storageID,
Chunks: make([]*StreamChunk, 0),
IsComplete: false,
Embedding: embedding,
Metadata: metadata,
TTL: ttl,
mu: sync.Mutex{},
}
}
// getOrCreateStreamAccumulator gets or creates a stream accumulator for a request
func (plugin *Plugin) getOrCreateStreamAccumulator(requestID string, storageID string, embedding []float32, metadata map[string]interface{}, ttl time.Duration) *StreamAccumulator {
if existing, ok := plugin.streamAccumulators.Load(requestID); ok {
return existing.(*StreamAccumulator)
}
newAccumulator := plugin.createStreamAccumulator(requestID, storageID, embedding, metadata, ttl)
actual, _ := plugin.streamAccumulators.LoadOrStore(requestID, newAccumulator)
return actual.(*StreamAccumulator)
}
// addStreamChunk adds a chunk to the stream accumulator
func (plugin *Plugin) addStreamChunk(requestID string, chunk *StreamChunk, isFinalChunk bool) error {
// Get accumulator (should exist if properly initialized)
accumulatorInterface, exists := plugin.streamAccumulators.Load(requestID)
if !exists {
return fmt.Errorf("stream accumulator not found for request %s", requestID)
}
accumulator := accumulatorInterface.(*StreamAccumulator)
accumulator.mu.Lock()
defer accumulator.mu.Unlock()
// Add chunk to the list (chunks arrive in order)
accumulator.Chunks = append(accumulator.Chunks, chunk)
// Set FinalTimestamp when FinishReason is present
// This handles both normal completion chunks and usage-only last chunks
if isFinalChunk {
accumulator.FinalTimestamp = chunk.Timestamp
}
plugin.logger.Debug(fmt.Sprintf("%s Added chunk to stream accumulator for request %s", PluginLoggerPrefix, requestID))
return nil
}
// processAccumulatedStream processes all accumulated chunks and caches the complete stream
// Flow: Collect everything → Check for ANY errors → If no errors, order and send to .Add() → If any errors, drop operation
func (plugin *Plugin) processAccumulatedStream(ctx context.Context, requestID string) error {
accumulatorInterface, exists := plugin.streamAccumulators.Load(requestID)
if !exists {
return fmt.Errorf("stream accumulator not found for request %s", requestID)
}
accumulator := accumulatorInterface.(*StreamAccumulator)
accumulator.mu.Lock()
// Ensure unlock happens after cleanup
defer accumulator.mu.Unlock()
// Ensure cleanup happens
defer plugin.cleanupStreamAccumulator(requestID)
// STEP 1: Check if any chunk in the entire stream had an error
if accumulator.HasError {
plugin.logger.Debug(fmt.Sprintf("%s Stream for request %s had errors, dropping entire operation (not caching)", PluginLoggerPrefix, requestID))
return nil
}
// STEP 2: All chunks are clean, now sort and build ordered stream for caching
plugin.logger.Debug(fmt.Sprintf("%s Stream for request %s completed successfully, processing %d chunks for caching", PluginLoggerPrefix, requestID, len(accumulator.Chunks)))
// Sort chunks by their ChunkIndex to ensure proper order (stable + nil-safe)
sort.SliceStable(accumulator.Chunks, func(i, j int) bool {
if accumulator.Chunks[i].Response == nil || accumulator.Chunks[j].Response == nil {
// Push nils to the end deterministically
return accumulator.Chunks[j].Response != nil
}
if accumulator.Chunks[i].Response.TextCompletionResponse != nil {
return accumulator.Chunks[i].Response.TextCompletionResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.TextCompletionResponse.ExtraFields.ChunkIndex
}
if accumulator.Chunks[i].Response.ChatResponse != nil {
return accumulator.Chunks[i].Response.ChatResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.ChatResponse.ExtraFields.ChunkIndex
}
if accumulator.Chunks[i].Response.ResponsesResponse != nil {
return accumulator.Chunks[i].Response.ResponsesResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.ResponsesResponse.ExtraFields.ChunkIndex
}
if accumulator.Chunks[i].Response.ResponsesStreamResponse != nil {
return accumulator.Chunks[i].Response.ResponsesStreamResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.ResponsesStreamResponse.ExtraFields.ChunkIndex
}
if accumulator.Chunks[i].Response.SpeechResponse != nil {
return accumulator.Chunks[i].Response.SpeechResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.SpeechResponse.ExtraFields.ChunkIndex
}
if accumulator.Chunks[i].Response.SpeechStreamResponse != nil {
return accumulator.Chunks[i].Response.SpeechStreamResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.SpeechStreamResponse.ExtraFields.ChunkIndex
}
if accumulator.Chunks[i].Response.TranscriptionResponse != nil {
return accumulator.Chunks[i].Response.TranscriptionResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.TranscriptionResponse.ExtraFields.ChunkIndex
}
if accumulator.Chunks[i].Response.TranscriptionStreamResponse != nil {
return accumulator.Chunks[i].Response.TranscriptionStreamResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.TranscriptionStreamResponse.ExtraFields.ChunkIndex
}
if accumulator.Chunks[i].Response.ImageGenerationStreamResponse != nil {
// For image generation, sort by Index first, then ChunkIndex
if accumulator.Chunks[i].Response.ImageGenerationStreamResponse.Index != accumulator.Chunks[j].Response.ImageGenerationStreamResponse.Index {
return accumulator.Chunks[i].Response.ImageGenerationStreamResponse.Index < accumulator.Chunks[j].Response.ImageGenerationStreamResponse.Index
}
return accumulator.Chunks[i].Response.ImageGenerationStreamResponse.ChunkIndex < accumulator.Chunks[j].Response.ImageGenerationStreamResponse.ChunkIndex
}
return false
})
var streamResponses []string
for i, chunk := range accumulator.Chunks {
if chunk.Response != nil {
chunkData, err := json.Marshal(chunk.Response)
if err != nil {
plugin.logger.Warn("%s Failed to marshal stream chunk %d: %v", PluginLoggerPrefix, i, err)
continue
}
streamResponses = append(streamResponses, string(chunkData))
}
}
// STEP 3: Validate we have valid chunks to cache
if len(streamResponses) == 0 {
plugin.logger.Warn("%s Stream for request %s has no valid response chunks, skipping cache storage", PluginLoggerPrefix, requestID)
return nil
}
// STEP 4: Build final metadata and submit to .Add() method
finalMetadata := make(map[string]interface{})
for k, v := range accumulator.Metadata {
finalMetadata[k] = v
}
finalMetadata["stream_chunks"] = streamResponses
// Store complete unified entry using the final cache storage ID.
if err := plugin.store.Add(ctx, plugin.config.VectorStoreNamespace, accumulator.StorageID, accumulator.Embedding, finalMetadata); err != nil {
return fmt.Errorf("failed to store complete streaming cache entry: %w", err)
}
plugin.logger.Debug(fmt.Sprintf("%s Successfully cached complete stream with %d ordered chunks, ID: %s", PluginLoggerPrefix, len(streamResponses), accumulator.StorageID))
return nil
}
// cleanupStreamAccumulator removes the stream accumulator for a request
func (plugin *Plugin) cleanupStreamAccumulator(requestID string) {
plugin.streamAccumulators.Delete(requestID)
}
// cleanupOldStreamAccumulators removes stream accumulators older than 5 minutes
func (plugin *Plugin) cleanupOldStreamAccumulators() {
fiveMinutesAgo := time.Now().Add(-5 * time.Minute)
cleanedCount := 0
toDelete := make([]string, 0)
plugin.streamAccumulators.Range(func(key, value interface{}) bool {
requestID := key.(string)
accumulator := value.(*StreamAccumulator)
// Check if this accumulator is old (no activity for 5 minutes)
accumulator.mu.Lock()
if len(accumulator.Chunks) > 0 {
firstChunkTime := accumulator.Chunks[0].Timestamp
if firstChunkTime.Before(fiveMinutesAgo) {
toDelete = append(toDelete, requestID)
plugin.logger.Debug(fmt.Sprintf("%s Cleaned up old stream accumulator for request %s", PluginLoggerPrefix, requestID))
}
}
accumulator.mu.Unlock()
return true
})
// Delete outside the Range loop to avoid concurrent modification
for _, requestID := range toDelete {
plugin.streamAccumulators.Delete(requestID)
cleanedCount++
}
if cleanedCount > 0 {
plugin.logger.Debug(fmt.Sprintf("%s Cleaned up %d old stream accumulators", PluginLoggerPrefix, cleanedCount))
}
}

View File

@@ -0,0 +1,781 @@
package semanticcache
import (
"context"
"os"
"strconv"
"testing"
"time"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
"github.com/maximhq/bifrost/framework/vectorstore"
mocker "github.com/maximhq/bifrost/plugins/mocker"
)
// getWeaviateConfigFromEnv retrieves Weaviate configuration from environment variables
func getWeaviateConfigFromEnv() vectorstore.WeaviateConfig {
scheme := os.Getenv("WEAVIATE_SCHEME")
if scheme == "" {
scheme = "http"
}
host := schemas.NewEnvVar("env.WEAVIATE_HOST")
if host.GetValue() == "" {
host = schemas.NewEnvVar("localhost:9000")
}
apiKey := schemas.NewEnvVar("env.WEAVIATE_API_KEY")
timeoutStr := os.Getenv("WEAVIATE_TIMEOUT")
timeout := 30 // default
if timeoutStr != "" {
if t, err := strconv.Atoi(timeoutStr); err == nil {
timeout = t
}
}
return vectorstore.WeaviateConfig{
Scheme: scheme,
Host: host,
APIKey: apiKey,
Timeout: time.Duration(timeout) * time.Second,
}
}
// getRedisConfigFromEnv retrieves Redis configuration from environment variables
func getRedisConfigFromEnv() vectorstore.RedisConfig {
addr := schemas.NewEnvVar("env.REDIS_ADDR")
if addr.GetValue() == "" {
addr = schemas.NewEnvVar("localhost:6379")
}
username := schemas.NewEnvVar("env.REDIS_USERNAME")
password := schemas.NewEnvVar("env.REDIS_PASSWORD")
db := schemas.NewEnvVar("env.REDIS_DB")
timeoutStr := os.Getenv("REDIS_TIMEOUT")
if timeoutStr == "" {
timeoutStr = "10s"
}
timeout, err := time.ParseDuration(timeoutStr)
if err != nil {
timeout = 10 * time.Second
}
return vectorstore.RedisConfig{
Addr: addr,
Username: username,
Password: password,
DB: db,
ContextTimeout: timeout,
}
}
// getQdrantConfigFromEnv retrieves Qdrant configuration from environment variables
func getQdrantConfigFromEnv() vectorstore.QdrantConfig {
host := schemas.NewEnvVar("env.QDRANT_HOST")
if host.GetValue() == "" {
host = schemas.NewEnvVar("localhost")
}
port := schemas.NewEnvVar("env.QDRANT_PORT")
if port.GetValue() == "" {
port = schemas.NewEnvVar("6334")
}
apiKey := schemas.NewEnvVar("env.QDRANT_API_KEY")
useTLS := schemas.NewEnvVar("env.QDRANT_USE_TLS")
if useTLS.GetValue() == "" {
useTLS = schemas.NewEnvVar("false")
}
return vectorstore.QdrantConfig{
Host: *host,
Port: *port,
APIKey: *apiKey,
UseTLS: *useTLS,
}
}
// getPineconeConfigFromEnv retrieves Pinecone configuration from environment variables
func getPineconeConfigFromEnv() vectorstore.PineconeConfig {
apiKey := schemas.NewEnvVar("env.PINECONE_API_KEY")
if apiKey.GetValue() == "" {
apiKey = schemas.NewEnvVar("pclocal") // Pinecone Local doesn't validate API keys
}
indexHost := schemas.NewEnvVar("env.PINECONE_INDEX_HOST")
if indexHost.GetValue() == "" {
indexHost = schemas.NewEnvVar("localhost:5081") // Pinecone Local default port
}
return vectorstore.PineconeConfig{
APIKey: *apiKey,
IndexHost: *indexHost,
}
}
// BaseAccount implements the schemas.Account interface for testing purposes.
type BaseAccount struct{}
func (baseAccount *BaseAccount) GetConfiguredProviders() ([]schemas.ModelProvider, error) {
return []schemas.ModelProvider{schemas.OpenAI}, nil
}
func (baseAccount *BaseAccount) GetKeysForProvider(ctx context.Context, providerKey schemas.ModelProvider) ([]schemas.Key, error) {
return []schemas.Key{
{
Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"),
Models: schemas.WhiteList{"*"}, // "*" means allow all models
Weight: 1.0,
},
}, nil
}
func (baseAccount *BaseAccount) GetConfigForProvider(providerKey schemas.ModelProvider) (*schemas.ProviderConfig, error) {
return &schemas.ProviderConfig{
NetworkConfig: schemas.NetworkConfig{
DefaultRequestTimeoutInSeconds: 60,
MaxRetries: 5,
RetryBackoffInitial: 100 * time.Millisecond,
RetryBackoffMax: 30 * time.Second,
},
ConcurrencyAndBufferSize: schemas.ConcurrencyAndBufferSize{
Concurrency: 10,
BufferSize: 10,
},
}, nil
}
// getMockRules returns a list of mock rules for the semantic cache tests
func getMockRules() []mocker.MockRule {
return []mocker.MockRule{
// Core test prompts
{
Name: "bifrost-definition",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)What is Bifrost.*")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Bifrost is a unified API for interacting with multiple AI providers."}},
},
},
{
Name: "machine-learning-explanation",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)what is machine learning\\?|explain machine learning|machine learning concepts|can you explain machine learning|explain the basics of machine learning")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Machine learning is a field of AI that uses statistical techniques to give computer systems the ability to learn from data."}},
},
},
{
Name: "ai-explanation",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)what is artificial intelligence\\?|can you explain what ai is\\?|define artificial intelligence")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Artificial intelligence is the simulation of human intelligence in machines."}},
},
},
{
Name: "capital-of-france",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("What is the capital of France\\?")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "The capital of France is Paris."}},
},
},
{
Name: "newton-laws",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)describe.*newton.*three laws|describe.*three laws.*newton")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Newton's three laws of motion are: 1. An object at rest stays at rest and an object in motion stays in motion with the same speed and in the same direction unless acted upon by an unbalanced force. 2. The acceleration of an object as produced by a net force is directly proportional to the magnitude of the net force, in the same direction as the net force, and inversely proportional to the mass of the object. 3. For every action, there is an equal and opposite reaction."}},
},
},
// Weather-related prompts
{
Name: "weather-question",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)what.*weather|weather.*like")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "It's sunny today with a temperature of 72°F."}},
},
},
// Blockchain and deep learning
{
Name: "blockchain-definition",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)define blockchain|blockchain technology")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Blockchain is a distributed ledger technology that maintains a continuously growing list of records."}},
},
},
{
Name: "deep-learning",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)what is deep learning")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Deep learning is a subset of machine learning that uses neural networks with multiple layers."}},
},
},
// Quantum computing
{
Name: "quantum-computing",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)quantum computing|explain quantum")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Quantum computing uses quantum mechanical phenomena to process information in ways that classical computers cannot."}},
},
},
// Conversation prompts
{
Name: "hello-greeting",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)^hello$|^hi$|hello.*world")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Hello! How can I help you today?"}},
},
},
{
Name: "how-are-you",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)how are you")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "I'm doing well, thank you for asking!"}},
},
},
{
Name: "meaning-of-life",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)meaning of life")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "The meaning of life is a philosophical question that has been pondered for centuries. Some say it's 42!"}},
},
},
{
Name: "short-story",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)tell me.*short story")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Once upon a time, there was a brave knight who saved the day."}},
},
},
// Test-specific prompts
{
Name: "test-configuration",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)test configuration")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "This is a test configuration response."}},
},
},
{
Name: "test-messages",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)test.*message|test.*no-store|test.*cache|test.*error|ttl test|threshold test|provider.*test|edge case test")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "This is a test response for various test scenarios."}},
},
},
{
Name: "long-prompt",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)very long prompt")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "This is a response to a very long prompt."}},
},
},
{
Name: "parameter-tests",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)test.*parameters|performance test")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Parameter test response with various settings."}},
},
},
// Dynamic message patterns (for conversation tests)
{
Name: "message-pattern",
Enabled: true,
Conditions: mocker.Conditions{MessageRegex: bifrost.Ptr("(?i)message \\d+")},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "Response to numbered message."}},
},
},
// Default catch-all rule (lowest priority)
{
Name: "default-mock",
Enabled: true,
Priority: -1, // Lower priority
Conditions: mocker.Conditions{},
Probability: 1.0,
Responses: []mocker.Response{
{Type: mocker.ResponseTypeSuccess, Content: &mocker.SuccessResponse{Message: "This is a generic mocked response."}},
},
},
}
}
// getMockedBifrostClient creates a Bifrost client with a mocker plugin for testing
func getMockedBifrostClient(t *testing.T, ctx *schemas.BifrostContext, logger schemas.Logger, semanticCachePlugin schemas.LLMPlugin) *bifrost.Bifrost {
mockerCfg := mocker.MockerConfig{
Enabled: true,
Rules: getMockRules(),
}
mockerPlugin, err := mocker.Init(mockerCfg)
if err != nil {
t.Fatalf("Failed to initialize mocker plugin: %v", err)
}
account := &BaseAccount{}
client, err := bifrost.Init(ctx, schemas.BifrostConfig{
Account: account,
LLMPlugins: []schemas.LLMPlugin{semanticCachePlugin, mockerPlugin},
Logger: logger,
})
if err != nil {
t.Fatalf("Error initializing Bifrost with mocker: %v", err)
}
return client
}
// TestSetup contains common test setup components
type TestSetup struct {
Logger schemas.Logger
Store vectorstore.VectorStore
Plugin schemas.LLMPlugin
Client *bifrost.Bifrost
Config *Config
}
// NewTestSetup creates a new test setup with default configuration
func NewTestSetup(t *testing.T) *TestSetup {
return NewTestSetupWithConfig(t, &Config{
Provider: schemas.OpenAI,
EmbeddingModel: "text-embedding-3-small",
Dimension: 1536,
Threshold: 0.8,
CleanUpOnShutdown: true,
Keys: []schemas.Key{
{
Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"),
Models: schemas.WhiteList{"*"},
Weight: 1.0,
},
},
})
}
// NewTestSetupWithConfig creates a new test setup with custom configuration
func NewTestSetupWithConfig(t *testing.T, config *Config) *TestSetup {
return NewTestSetupWithVectorStore(t, config, vectorstore.VectorStoreTypeWeaviate)
}
// NewTestSetupWithVectorStore creates a new test setup with custom configuration and vector store type
func NewTestSetupWithVectorStore(t *testing.T, config *Config, storeType vectorstore.VectorStoreType) *TestSetup {
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug)
// Get the appropriate config for the vector store type
var storeConfig interface{}
switch storeType {
case vectorstore.VectorStoreTypeWeaviate:
storeConfig = getWeaviateConfigFromEnv()
case vectorstore.VectorStoreTypeRedis:
storeConfig = getRedisConfigFromEnv()
case vectorstore.VectorStoreTypeQdrant:
storeConfig = getQdrantConfigFromEnv()
case vectorstore.VectorStoreTypePinecone:
storeConfig = getPineconeConfigFromEnv()
default:
t.Fatalf("Unsupported vector store type: %s", storeType)
}
store, err := vectorstore.NewVectorStore(context.Background(), &vectorstore.Config{
Type: storeType,
Config: storeConfig,
Enabled: true,
}, logger)
if err != nil {
t.Skipf("Vector store %s not available or failed to connect: %v", storeType, err)
}
plugin, err := Init(schemas.NewBifrostContext(context.Background(), schemas.NoDeadline), config, logger, store)
if err != nil {
t.Fatalf("Failed to initialize plugin: %v", err)
}
// Clear test keys
pluginImpl := plugin.(*Plugin)
clearTestKeysWithStore(t, pluginImpl.store)
// Get a mocked Bifrost client
client := getMockedBifrostClient(t, ctx, logger, plugin)
return &TestSetup{
Logger: logger,
Store: store,
Plugin: plugin,
Client: client,
Config: config,
}
}
// Cleanup cleans up test resources
func (ts *TestSetup) Cleanup() {
if ts.Client != nil {
ts.Client.Shutdown()
}
}
// clearTestKeysWithStore removes all keys matching the test prefix using the store interface
func clearTestKeysWithStore(t *testing.T, store vectorstore.VectorStore) {
// With the new unified VectorStore interface, cleanup is typically handled
// by the vector store implementation (e.g., dropping entire classes)
t.Logf("Test cleanup delegated to vector store implementation")
}
// CreateBasicChatRequest creates a basic chat completion request for testing
func CreateBasicChatRequest(content string, temperature float64, maxTokens int) *schemas.BifrostChatRequest {
return &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: []schemas.ChatMessage{
{
Role: "user",
Content: &schemas.ChatMessageContent{
ContentStr: &content,
},
},
},
Params: &schemas.ChatParameters{
Temperature: &temperature,
MaxCompletionTokens: &maxTokens,
},
}
}
// CreateStreamingChatRequest creates a streaming chat completion request for testing
func CreateStreamingChatRequest(content string, temperature float64, maxTokens int) *schemas.BifrostChatRequest {
return CreateBasicChatRequest(content, temperature, maxTokens)
}
// CreateSpeechRequest creates a speech synthesis request for testing
func CreateSpeechRequest(input string, voice string) *schemas.BifrostSpeechRequest {
return &schemas.BifrostSpeechRequest{
Provider: schemas.OpenAI,
Model: "tts-1",
Input: &schemas.SpeechInput{
Input: input,
},
Params: &schemas.SpeechParameters{
VoiceConfig: &schemas.SpeechVoiceInput{
Voice: &voice,
},
ResponseFormat: "mp3",
},
}
}
// AssertCacheHit verifies that a response was served from cache
func AssertCacheHit(t *testing.T, response *schemas.BifrostResponse, expectedCacheType string) {
extraFields := response.GetExtraFields()
if extraFields.CacheDebug == nil {
t.Error("Cache metadata missing 'cache_debug'")
return
}
// Check that it's actually a cache hit
if !extraFields.CacheDebug.CacheHit {
t.Error("❌ Expected cache hit but response was not cached")
return
}
if expectedCacheType != "" {
cacheType := extraFields.CacheDebug.HitType
if cacheType != nil && *cacheType != expectedCacheType {
t.Errorf("Expected cache type '%s', got '%s'", expectedCacheType, *cacheType)
return
}
t.Log("✅ Response correctly served from cache")
}
t.Log("✅ Response correctly served from cache")
}
// AssertNoCacheHit verifies that a response was NOT served from cache
func AssertNoCacheHit(t *testing.T, response *schemas.BifrostResponse) {
extraFields := response.GetExtraFields()
if extraFields.CacheDebug == nil {
t.Log("✅ Response correctly not served from cache (no 'cache_debug' flag)")
return
}
// Check the actual CacheHit field instead of just checking if CacheDebug exists
if extraFields.CacheDebug.CacheHit {
t.Error("❌ Response was cached when it shouldn't be")
return
}
t.Log("✅ Response correctly not served from cache (cache_debug present but CacheHit=false)")
}
// WaitForCache waits for async cache operations to complete
func WaitForCache(plugin schemas.LLMPlugin) {
if p, ok := plugin.(*Plugin); ok {
p.WaitForPendingOperations()
}
// Small buffer for Weaviate index consistency
time.Sleep(500 * time.Millisecond)
}
// CreateEmbeddingRequest creates an embedding request for testing
func CreateEmbeddingRequest(texts []string) *schemas.BifrostEmbeddingRequest {
return &schemas.BifrostEmbeddingRequest{
Provider: schemas.OpenAI,
Model: "text-embedding-3-small",
Input: &schemas.EmbeddingInput{
Texts: texts,
},
}
}
// CreateBasicResponsesRequest creates a basic Responses API request for testing
func CreateBasicResponsesRequest(content string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest {
userRole := schemas.ResponsesInputMessageRoleUser
return &schemas.BifrostResponsesRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o",
Input: []schemas.ResponsesMessage{
{
Role: &userRole,
Content: &schemas.ResponsesMessageContent{
ContentStr: &content,
},
},
},
Params: &schemas.ResponsesParameters{
Temperature: &temperature,
MaxOutputTokens: &maxTokens,
},
}
}
// CreateResponsesRequestWithTools creates a Responses API request with tools for testing
func CreateResponsesRequestWithTools(content string, temperature float64, maxTokens int, tools []schemas.ResponsesTool) *schemas.BifrostResponsesRequest {
req := CreateBasicResponsesRequest(content, temperature, maxTokens)
req.Params.Tools = tools
return req
}
// CreateResponsesRequestWithInstructions creates a Responses API request with system instructions
func CreateResponsesRequestWithInstructions(content string, instructions string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest {
req := CreateBasicResponsesRequest(content, temperature, maxTokens)
req.Params.Instructions = &instructions
return req
}
// CreateStreamingResponsesRequest creates a streaming Responses API request for testing
func CreateStreamingResponsesRequest(content string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest {
return CreateBasicResponsesRequest(content, temperature, maxTokens)
}
// CreateImageGenerationRequest creates an image generation request for testing
func CreateImageGenerationRequest(prompt string, size string, quality string) *schemas.BifrostImageGenerationRequest {
return &schemas.BifrostImageGenerationRequest{
Provider: schemas.OpenAI,
Model: "gpt-image-1",
Input: &schemas.ImageGenerationInput{
Prompt: prompt,
},
Params: &schemas.ImageGenerationParameters{
Size: bifrost.Ptr(size),
Quality: bifrost.Ptr(quality),
N: bifrost.Ptr(1),
},
}
}
// CreateContextWithCacheKey creates a context with the test cache key
func CreateContextWithCacheKey(value string) *schemas.BifrostContext {
return schemas.NewBifrostContextWithValue(context.Background(), schemas.NoDeadline, CacheKey, value)
}
// CreateContextWithCacheKeyAndType creates a context with cache key and cache type
func CreateContextWithCacheKeyAndType(value string, cacheType CacheType) *schemas.BifrostContext {
return schemas.NewBifrostContextWithValue(context.Background(), schemas.NoDeadline, CacheKey, value).WithValue(CacheTypeKey, cacheType)
}
// CreateContextWithCacheKeyAndTTL creates a context with cache key and custom TTL
func CreateContextWithCacheKeyAndTTL(value string, ttl time.Duration) *schemas.BifrostContext {
return schemas.NewBifrostContextWithValue(context.Background(), schemas.NoDeadline, CacheKey, value).WithValue(CacheTTLKey, ttl)
}
// CreateContextWithCacheKeyAndThreshold creates a context with cache key and custom threshold
func CreateContextWithCacheKeyAndThreshold(value string, threshold float64) *schemas.BifrostContext {
return schemas.NewBifrostContext(context.Background(), schemas.NoDeadline).WithValue(CacheKey, value).WithValue(CacheThresholdKey, threshold)
}
// CreateContextWithCacheKeyAndNoStore creates a context with cache key and no-store flag
func CreateContextWithCacheKeyAndNoStore(value string, noStore bool) *schemas.BifrostContext {
return schemas.NewBifrostContext(context.Background(), schemas.NoDeadline).WithValue(CacheKey, value).WithValue(CacheNoStoreKey, noStore)
}
// CreateTestSetupWithConversationThreshold creates a test setup with custom conversation history threshold
func CreateTestSetupWithConversationThreshold(t *testing.T, threshold int) *TestSetup {
config := &Config{
Provider: schemas.OpenAI,
EmbeddingModel: "text-embedding-3-small",
Dimension: 1536,
CleanUpOnShutdown: true,
Threshold: 0.8,
ConversationHistoryThreshold: threshold,
Keys: []schemas.Key{
{
Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"),
Models: []string{"*"},
Weight: 1.0,
},
},
}
return NewTestSetupWithConfig(t, config)
}
// CreateTestSetupWithExcludeSystemPrompt creates a test setup with ExcludeSystemPrompt setting
func CreateTestSetupWithExcludeSystemPrompt(t *testing.T, excludeSystem bool) *TestSetup {
config := &Config{
Provider: schemas.OpenAI,
EmbeddingModel: "text-embedding-3-small",
Dimension: 1536,
CleanUpOnShutdown: true,
Threshold: 0.8,
ExcludeSystemPrompt: &excludeSystem,
Keys: []schemas.Key{
{
Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"),
Models: []string{"*"},
Weight: 1.0,
},
},
}
return NewTestSetupWithConfig(t, config)
}
// CreateTestSetupWithThresholdAndExcludeSystem creates a test setup with both conversation threshold and exclude system prompt settings
func CreateTestSetupWithThresholdAndExcludeSystem(t *testing.T, threshold int, excludeSystem bool) *TestSetup {
config := &Config{
Provider: schemas.OpenAI,
EmbeddingModel: "text-embedding-3-small",
Dimension: 1536,
CleanUpOnShutdown: true,
Threshold: 0.8,
ConversationHistoryThreshold: threshold,
ExcludeSystemPrompt: &excludeSystem,
Keys: []schemas.Key{
{
Value: *schemas.NewEnvVar("env.OPENAI_API_KEY"),
Models: []string{"*"},
Weight: 1.0,
},
},
}
return NewTestSetupWithConfig(t, config)
}
// CreateConversationRequest creates a chat request with conversation history
func CreateConversationRequest(messages []schemas.ChatMessage, temperature float64, maxTokens int) *schemas.BifrostChatRequest {
return &schemas.BifrostChatRequest{
Provider: schemas.OpenAI,
Model: "gpt-4o-mini",
Input: messages,
Params: &schemas.ChatParameters{
Temperature: &temperature,
MaxCompletionTokens: &maxTokens,
},
}
}
// BuildConversationHistory creates a conversation history from pairs of user/assistant messages
func BuildConversationHistory(systemPrompt string, userAssistantPairs ...[]string) []schemas.ChatMessage {
messages := []schemas.ChatMessage{}
// Add system prompt if provided
if systemPrompt != "" {
messages = append(messages, schemas.ChatMessage{
Role: schemas.ChatMessageRoleSystem,
Content: &schemas.ChatMessageContent{
ContentStr: &systemPrompt,
},
})
}
// Add user/assistant pairs
for _, pair := range userAssistantPairs {
if len(pair) >= 1 && pair[0] != "" {
userMsg := pair[0]
messages = append(messages, schemas.ChatMessage{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: &userMsg,
},
})
}
if len(pair) >= 2 && pair[1] != "" {
assistantMsg := pair[1]
messages = append(messages, schemas.ChatMessage{
Role: schemas.ChatMessageRoleAssistant,
Content: &schemas.ChatMessageContent{
ContentStr: &assistantMsg,
},
})
}
}
return messages
}
// AddUserMessage adds a user message to existing conversation
func AddUserMessage(messages []schemas.ChatMessage, userMessage string) []schemas.ChatMessage {
newMessage := schemas.ChatMessage{
Role: schemas.ChatMessageRoleUser,
Content: &schemas.ChatMessageContent{
ContentStr: &userMessage,
},
}
return append(messages, newMessage)
}
// RetryConfig defines retry configuration for API requests
type RetryConfig struct {
MaxRetries int
BaseDelay time.Duration
}
// DefaultRetryConfig returns the default retry configuration
func DefaultRetryConfig() RetryConfig {
return RetryConfig{
MaxRetries: 2,
BaseDelay: 5 * time.Millisecond,
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
1.5.4