first commit

This commit is contained in:
Beyhan Oğur
2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions

View File

@@ -0,0 +1,812 @@
package vectorstore
import (
"context"
"testing"
"time"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/weaviate/weaviate-go-client/v5/weaviate/filters"
"github.com/weaviate/weaviate/entities/models"
)
// Test constants
const (
TestTimeout = 30 * time.Second
TestClassName = "TestWeaviate"
TestEmbeddingDim = 384
DefaultTestScheme = "http"
DefaultTestHost = "localhost:9000"
DefaultTestTimeout = 10 * time.Second
)
// TestSetup provides common test infrastructure
type TestSetup struct {
Store *WeaviateStore
Logger schemas.Logger
Config WeaviateConfig
ctx context.Context
cancel context.CancelFunc
}
// NewTestSetup creates a test setup with environment-driven configuration
func NewTestSetup(t *testing.T) *TestSetup {
// Get configuration from environment variables
scheme := getEnvWithDefault("WEAVIATE_SCHEME", DefaultTestScheme)
host := schemas.NewEnvVar(getEnvWithDefault("WEAVIATE_HOST", DefaultTestHost))
timeoutStr := getEnvWithDefault("WEAVIATE_TIMEOUT", "10s")
timeout, err := time.ParseDuration(timeoutStr)
if err != nil {
timeout = DefaultTestTimeout
}
config := WeaviateConfig{
Scheme: scheme,
Host: host,
APIKey: schemas.NewEnvVar("env.WEAVIATE_API_KEY"),
Timeout: timeout,
}
logger := bifrost.NewDefaultLogger(schemas.LogLevelInfo)
ctx, cancel := context.WithTimeout(context.Background(), TestTimeout)
store, err := newWeaviateStore(ctx, &config, logger)
if err != nil {
cancel()
t.Fatalf("Failed to create Weaviate store: %v", err)
}
setup := &TestSetup{
Store: store,
Logger: logger,
Config: config,
ctx: ctx,
cancel: cancel,
}
// Ensure class exists for integration tests
if !testing.Short() {
setup.ensureClassExists(t)
}
return setup
}
// Cleanup cleans up test resources
func (ts *TestSetup) Cleanup(t *testing.T) {
defer ts.cancel()
if !testing.Short() {
// Clean up test data
ts.cleanupTestData(t)
}
if err := ts.Store.Close(ts.ctx, TestClassName); err != nil {
t.Logf("Warning: Failed to close store: %v", err)
}
}
// ensureClassExists creates the test class in Weaviate
func (ts *TestSetup) ensureClassExists(t *testing.T) {
// Try to get class schema first
exists, err := ts.Store.client.Schema().ClassGetter().
WithClassName(TestClassName).
Do(ts.ctx)
if err == nil && exists != nil {
t.Logf("Class %s already exists", TestClassName)
return
}
// Create class with minimal schema - let Weaviate auto-create properties
class := &models.Class{
Class: TestClassName,
Properties: []*models.Property{
{
Name: "key",
DataType: []string{"text"},
},
{
Name: "test_type",
DataType: []string{"text"},
},
{
Name: "size",
DataType: []string{"int"},
},
{
Name: "public",
DataType: []string{"boolean"},
},
},
VectorIndexConfig: map[string]interface{}{
"distance": "cosine",
},
}
err = ts.Store.client.Schema().ClassCreator().
WithClass(class).
Do(ts.ctx)
if err != nil {
t.Logf("Warning: Failed to create test class %s: %v", TestClassName, err)
t.Logf("This might be due to auto-schema creation. Continuing...")
} else {
t.Logf("Created test class: %s", TestClassName)
}
}
// cleanupTestData removes all test objects from the class
func (ts *TestSetup) cleanupTestData(t *testing.T) {
// Delete all objects in the test class
allTestKeys, _, err := ts.Store.GetAll(ts.ctx, TestClassName, []Query{}, []string{}, nil, 1000)
if err != nil {
t.Logf("Warning: Failed to get all test keys: %v", err)
return
}
for _, key := range allTestKeys {
err := ts.Store.Delete(ts.ctx, TestClassName, key.ID)
if err != nil {
t.Logf("Warning: Failed to delete test key %s: %v", key.ID, err)
}
}
t.Logf("Cleaned up test class: %s", TestClassName)
}
// ============================================================================
// UNIT TESTS
// ============================================================================
func TestWeaviateConfig_Validation(t *testing.T) {
logger := bifrost.NewDefaultLogger(schemas.LogLevelInfo)
ctx := context.Background()
tests := []struct {
name string
config WeaviateConfig
expectError bool
errorMsg string
}{
{
name: "valid config",
config: WeaviateConfig{
Scheme: "http",
Host: schemas.NewEnvVar("localhost:8080"),
},
expectError: false,
},
{
name: "missing scheme",
config: WeaviateConfig{
Host: schemas.NewEnvVar("localhost:8080"),
},
expectError: true,
errorMsg: "scheme and host are required",
},
{
name: "missing host",
config: WeaviateConfig{
Scheme: "http",
},
expectError: true,
errorMsg: "scheme and host are required",
},
{
name: "with api key",
config: WeaviateConfig{
Scheme: "https",
Host: schemas.NewEnvVar("cluster.weaviate.network"),
APIKey: schemas.NewEnvVar("test-key"),
},
expectError: false,
},
{
name: "with custom headers",
config: WeaviateConfig{
Scheme: "http",
Host: schemas.NewEnvVar("localhost:8080"),
Headers: map[string]string{
"Custom-Header": "value",
},
},
expectError: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
store, err := newWeaviateStore(ctx, &tt.config, logger)
if tt.expectError {
assert.Error(t, err)
assert.Nil(t, store)
if tt.errorMsg != "" {
assert.Contains(t, err.Error(), tt.errorMsg)
}
} else {
// Note: This will fail with connection error in unit tests
// but should pass config validation
assert.Nil(t, store) // Expected due to no real Weaviate instance
assert.Error(t, err) // Connection error expected
}
})
}
}
func TestDefaultClassName(t *testing.T) {
config := WeaviateConfig{
Scheme: "http",
Host: schemas.NewEnvVar("localhost:8080"),
}
// This will fail to connect but should set default class name
logger := bifrost.NewDefaultLogger(schemas.LogLevelInfo)
_, err := newWeaviateStore(context.Background(), &config, logger)
// Should fail with connection error, but we can't test the default class name
// without mocking the client, which would be more complex
assert.Error(t, err)
}
func TestBuildWeaviateFilter(t *testing.T) {
tests := []struct {
name string
queries []Query
expected *filters.WhereBuilder // We'll test the structure, not exact equality
isNil bool
}{
{
name: "empty queries",
queries: []Query{},
expected: nil,
isNil: true,
},
{
name: "single string query",
queries: []Query{
{Field: "category", Operator: QueryOperatorEqual, Value: "tech"},
},
isNil: false,
},
{
name: "single numeric query",
queries: []Query{
{Field: "size", Operator: QueryOperatorGreaterThan, Value: 1000},
},
isNil: false,
},
{
name: "multiple queries (AND)",
queries: []Query{
{Field: "category", Operator: QueryOperatorEqual, Value: "tech"},
{Field: "public", Operator: QueryOperatorEqual, Value: true},
},
isNil: false,
},
{
name: "mixed types",
queries: []Query{
{Field: "name", Operator: QueryOperatorLike, Value: "test%"},
{Field: "count", Operator: QueryOperatorLessThan, Value: int64(100)},
{Field: "active", Operator: QueryOperatorEqual, Value: true},
{Field: "score", Operator: QueryOperatorGreaterThanOrEqual, Value: 95.5},
},
isNil: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := buildWeaviateFilter(tt.queries)
if tt.isNil {
assert.Nil(t, result)
} else {
assert.NotNil(t, result)
// We can't easily test the internal structure without reflection
// or implementing String() methods, but we verify it's not nil
}
})
}
}
func TestConvertOperator(t *testing.T) {
tests := []struct {
input QueryOperator
expected filters.WhereOperator
}{
{QueryOperatorEqual, filters.Equal},
{QueryOperatorNotEqual, filters.NotEqual},
{QueryOperatorLessThan, filters.LessThan},
{QueryOperatorLessThanOrEqual, filters.LessThanEqual},
{QueryOperatorGreaterThan, filters.GreaterThan},
{QueryOperatorGreaterThanOrEqual, filters.GreaterThanEqual},
{QueryOperatorLike, filters.Like},
{QueryOperatorContainsAny, filters.ContainsAny},
{QueryOperatorContainsAll, filters.ContainsAll},
{QueryOperatorIsNull, filters.IsNull},
{QueryOperatorIsNotNull, filters.IsNull},
}
for _, tt := range tests {
t.Run(string(tt.input), func(t *testing.T) {
result := convertOperator(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
// ============================================================================
// INTEGRATION TESTS (require real Weaviate instance)
// ============================================================================
func TestWeaviateStore_Integration(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
}
setup := NewTestSetup(t)
defer setup.Cleanup(t)
t.Run("Add and GetChunk", func(t *testing.T) {
testKey := generateUUID()
embedding := generateTestEmbedding(TestEmbeddingDim)
metadata := map[string]interface{}{
"type": "document",
"size": 1024,
"public": true,
}
// Add object
err := setup.Store.Add(setup.ctx, TestClassName, testKey, embedding, metadata)
require.NoError(t, err)
// Small delay to ensure consistency
time.Sleep(100 * time.Millisecond)
// Get single chunk
result, err := setup.Store.GetChunk(setup.ctx, TestClassName, testKey)
require.NoError(t, err)
assert.NotEmpty(t, result)
assert.Equal(t, "document", result.Properties["type"]) // Should contain metadata
})
t.Run("Add without embedding", func(t *testing.T) {
testKey := generateUUID()
metadata := map[string]interface{}{
"type": "metadata-only",
}
// Add object without embedding
err := setup.Store.Add(setup.ctx, TestClassName, testKey, nil, metadata)
require.NoError(t, err)
time.Sleep(100 * time.Millisecond)
// Retrieve it
result, err := setup.Store.GetChunk(setup.ctx, TestClassName, testKey)
require.NoError(t, err)
assert.Equal(t, "metadata-only", result.Properties["type"])
})
}
func TestWeaviateStore_FilteringScenarios(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
}
setup := NewTestSetup(t)
defer setup.Cleanup(t)
// Setup test data for filtering scenarios
testData := []struct {
key string
metadata map[string]interface{}
}{
{
generateUUID(),
map[string]interface{}{
"type": "pdf",
"size": 1024,
"public": true,
"author": "alice",
},
},
{
generateUUID(),
map[string]interface{}{
"type": "docx",
"size": 2048,
"public": false,
"author": "bob",
},
},
{
generateUUID(),
map[string]interface{}{
"type": "pdf",
"size": 512,
"public": true,
"author": "alice",
},
},
{
generateUUID(),
map[string]interface{}{
"type": "txt",
"size": 256,
"public": true,
"author": "charlie",
},
},
}
filterFields := []string{"type", "size", "public", "author"}
// Add all test data
for _, item := range testData {
embedding := generateTestEmbedding(TestEmbeddingDim)
err := setup.Store.Add(setup.ctx, TestClassName, item.key, embedding, item.metadata)
require.NoError(t, err)
}
time.Sleep(500 * time.Millisecond) // Wait for consistency
t.Run("Filter by numeric comparison", func(t *testing.T) {
queries := []Query{
{Field: "size", Operator: "GreaterThan", Value: 1000},
}
results, _, err := setup.Store.GetAll(setup.ctx, TestClassName, queries, filterFields, nil, 10)
require.NoError(t, err)
assert.Len(t, results, 2) // doc1 (1024) and doc2 (2048)
})
t.Run("Filter by boolean", func(t *testing.T) {
queries := []Query{
{Field: "public", Operator: "Equal", Value: true},
}
results, _, err := setup.Store.GetAll(setup.ctx, TestClassName, queries, filterFields, nil, 10)
require.NoError(t, err)
assert.Len(t, results, 3) // doc1, doc3, doc4
})
t.Run("Multiple filters (AND)", func(t *testing.T) {
queries := []Query{
{Field: "type", Operator: "Equal", Value: "pdf"},
{Field: "public", Operator: "Equal", Value: true},
}
results, _, err := setup.Store.GetAll(setup.ctx, TestClassName, queries, filterFields, nil, 10)
require.NoError(t, err)
assert.Len(t, results, 2) // doc1 and doc3
})
t.Run("Complex multi-condition filter", func(t *testing.T) {
queries := []Query{
{Field: "author", Operator: "Equal", Value: "alice"},
{Field: "size", Operator: "LessThan", Value: 2000},
{Field: "public", Operator: "Equal", Value: true},
}
results, _, err := setup.Store.GetAll(setup.ctx, TestClassName, queries, filterFields, nil, 10)
require.NoError(t, err)
assert.Len(t, results, 2) // doc1 and doc3 (both by alice, < 2000 size, public)
})
t.Run("Pagination test", func(t *testing.T) {
// Test with limit of 2
results, cursor, err := setup.Store.GetAll(setup.ctx, TestClassName, nil, filterFields, nil, 2)
require.NoError(t, err)
assert.Len(t, results, 2)
if cursor != nil {
// Get next page
nextResults, _, err := setup.Store.GetAll(setup.ctx, TestClassName, nil, filterFields, cursor, 2)
require.NoError(t, err)
assert.LessOrEqual(t, len(nextResults), 2)
t.Logf("First page: %d results, Next page: %d results", len(results), len(nextResults))
}
})
}
func TestWeaviateStore_CompleteUseCases(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
}
setup := NewTestSetup(t)
defer setup.Cleanup(t)
t.Run("Document Storage & Retrieval Scenario", func(t *testing.T) {
// Add documents with different types
documents := []struct {
key string
embedding []float32
metadata map[string]interface{}
}{
{
generateUUID(),
generateTestEmbedding(TestEmbeddingDim),
map[string]interface{}{"type": "pdf", "size": 1024, "public": true},
},
{
generateUUID(),
generateTestEmbedding(TestEmbeddingDim),
map[string]interface{}{"type": "docx", "size": 2048, "public": false},
},
{
generateUUID(),
generateTestEmbedding(TestEmbeddingDim),
map[string]interface{}{"type": "pdf", "size": 512, "public": true},
},
}
filterFields := []string{"type", "size", "public", "author"}
for _, doc := range documents {
err := setup.Store.Add(setup.ctx, TestClassName, doc.key, doc.embedding, doc.metadata)
require.NoError(t, err)
}
time.Sleep(300 * time.Millisecond)
// Test various retrieval patterns
// Get PDF documents
pdfQuery := []Query{{Field: "type", Operator: "Equal", Value: "pdf"}}
results, _, err := setup.Store.GetAll(setup.ctx, TestClassName, pdfQuery, filterFields, nil, 10)
require.NoError(t, err)
assert.Len(t, results, 2) // doc1, doc3
// Get large documents (size > 1000)
sizeQuery := []Query{{Field: "size", Operator: "GreaterThan", Value: 1000}}
results, _, err = setup.Store.GetAll(setup.ctx, TestClassName, sizeQuery, filterFields, nil, 10)
require.NoError(t, err)
assert.Len(t, results, 2) // doc1, doc2
// Get public PDFs
combinedQuery := []Query{
{Field: "public", Operator: "Equal", Value: true},
{Field: "type", Operator: "Equal", Value: "pdf"},
}
results, _, err = setup.Store.GetAll(setup.ctx, TestClassName, combinedQuery, filterFields, nil, 10)
require.NoError(t, err)
assert.Len(t, results, 2) // doc1, doc3
// Vector similarity search
queryEmbedding := documents[0].embedding // Similar to doc1
vectorResults, err := setup.Store.GetNearest(setup.ctx, TestClassName, queryEmbedding, nil, filterFields, 0.8, 10)
require.NoError(t, err)
assert.GreaterOrEqual(t, len(vectorResults), 1)
})
t.Run("User Content Management Scenario", func(t *testing.T) {
// Add user content with metadata
userContent := []struct {
key string
embedding []float32
metadata map[string]interface{}
}{
{
generateUUID(),
generateTestEmbedding(TestEmbeddingDim),
map[string]interface{}{"user": "alice", "lang": "en", "category": "tech"},
},
{
generateUUID(),
generateTestEmbedding(TestEmbeddingDim),
map[string]interface{}{"user": "bob", "lang": "es", "category": "tech"},
},
{
generateUUID(),
generateTestEmbedding(TestEmbeddingDim),
map[string]interface{}{"user": "alice", "lang": "en", "category": "sports"},
},
}
filterFields := []string{"user", "lang", "category"}
for _, content := range userContent {
err := setup.Store.Add(setup.ctx, TestClassName, content.key, content.embedding, content.metadata)
require.NoError(t, err)
}
time.Sleep(300 * time.Millisecond)
// Test user-specific filtering
aliceQuery := []Query{{Field: "user", Operator: "Equal", Value: "alice"}}
results, _, err := setup.Store.GetAll(setup.ctx, TestClassName, aliceQuery, filterFields, nil, 10)
require.NoError(t, err)
assert.Len(t, results, 2) // Alice's content
// English tech content
techEnQuery := []Query{
{Field: "lang", Operator: "Equal", Value: "en"},
{Field: "category", Operator: "Equal", Value: "tech"},
}
results, _, err = setup.Store.GetAll(setup.ctx, TestClassName, techEnQuery, filterFields, nil, 10)
require.NoError(t, err)
assert.Len(t, results, 1) // user1_content
// Alice's similar content (semantic search with user filter)
aliceFilter := []Query{{Field: "user", Operator: "Equal", Value: "alice"}}
queryEmbedding := userContent[0].embedding
vectorResults, err := setup.Store.GetNearest(setup.ctx, TestClassName, queryEmbedding, aliceFilter, filterFields, 0.1, 10)
require.NoError(t, err)
assert.Len(t, vectorResults, 2) // Both of Alice's content
})
t.Run("Semantic Cache-like Workflow", func(t *testing.T) {
// Add request-response pairs with parameters
cacheEntries := []struct {
key string
embedding []float32
metadata map[string]interface{}
}{
{
generateUUID(),
generateTestEmbedding(TestEmbeddingDim),
map[string]interface{}{
"request_hash": "abc123",
"user": "u1",
"lang": "en",
"response": "answer1",
},
},
{
generateUUID(),
generateTestEmbedding(TestEmbeddingDim),
map[string]interface{}{
"request_hash": "def456",
"user": "u1",
"lang": "es",
"response": "answer2",
},
},
}
filterFields := []string{"request_hash", "user", "lang", "response"}
for _, entry := range cacheEntries {
err := setup.Store.Add(setup.ctx, TestClassName, entry.key, entry.embedding, entry.metadata)
require.NoError(t, err)
}
time.Sleep(300 * time.Millisecond)
// Test hash-based direct retrieval (exact match)
hashQuery := []Query{{Field: "request_hash", Operator: "Equal", Value: "abc123"}}
results, _, err := setup.Store.GetAll(setup.ctx, TestClassName, hashQuery, filterFields, nil, 10)
require.NoError(t, err)
assert.Len(t, results, 1)
// Test semantic search with user and language filters
userLangFilter := []Query{
{Field: "user", Operator: "Equal", Value: "u1"},
{Field: "lang", Operator: "Equal", Value: "en"},
}
similarEmbedding := generateSimilarEmbedding(cacheEntries[0].embedding, 0.9)
vectorResults, err := setup.Store.GetNearest(setup.ctx, TestClassName, similarEmbedding, userLangFilter, filterFields, 0.7, 10)
require.NoError(t, err)
assert.Len(t, vectorResults, 1) // Should find English content for u1
})
}
// ============================================================================
// INTERFACE COMPLIANCE TESTS
// ============================================================================
func TestWeaviateStore_InterfaceCompliance(t *testing.T) {
// Verify that WeaviateStore implements VectorStore interface
var _ VectorStore = (*WeaviateStore)(nil)
}
func TestVectorStoreFactory_Weaviate(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
}
logger := bifrost.NewDefaultLogger(schemas.LogLevelInfo)
config := &Config{
Enabled: true,
Type: VectorStoreTypeWeaviate,
Config: WeaviateConfig{
Scheme: getEnvWithDefault("WEAVIATE_SCHEME", DefaultTestScheme),
Host: schemas.NewEnvVar(getEnvWithDefault("WEAVIATE_HOST", DefaultTestHost)),
APIKey: schemas.NewEnvVar("env.WEAVIATE_API_KEY"),
},
}
store, err := NewVectorStore(context.Background(), config, logger)
if err != nil {
t.Skipf("Could not create Weaviate store: %v", err)
}
defer store.Close(context.Background(), TestClassName)
// Verify it's actually a WeaviateStore
weaviateStore, ok := store.(*WeaviateStore)
assert.True(t, ok)
assert.NotNil(t, weaviateStore)
}
func TestWeaviateStore_NamespaceDimensionHandling(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
}
setup := NewTestSetup(t)
defer setup.Cleanup(t)
testClassName := "TestDimensionHandling"
t.Run("Recreate class with different dimension should not crash", func(t *testing.T) {
properties := map[string]VectorStoreProperties{
"type": {DataType: VectorStorePropertyTypeString},
"test": {DataType: VectorStorePropertyTypeString},
}
// Step 1: Create class with dimension 512
err := setup.Store.CreateNamespace(setup.ctx, testClassName, 512, properties)
require.NoError(t, err)
// Add a document with 512-dimensional embedding
testKey512 := generateUUID()
embedding512 := generateTestEmbedding(512)
metadata := map[string]interface{}{
"type": "test_doc",
"test": "dimension_512",
}
err = setup.Store.Add(setup.ctx, testClassName, testKey512, embedding512, metadata)
require.NoError(t, err)
// Verify it was added
result, err := setup.Store.GetChunk(setup.ctx, testClassName, testKey512)
require.NoError(t, err)
assert.Equal(t, "dimension_512", result.Properties["test"])
// Step 2: Delete the class/namespace
err = setup.Store.DeleteNamespace(setup.ctx, testClassName)
require.NoError(t, err)
// Step 3: Create class with same name but different dimension - should not crash
err = setup.Store.CreateNamespace(setup.ctx, testClassName, 1024, properties)
require.NoError(t, err)
// Add a document with 1024-dimensional embedding
testKey1024 := generateUUID()
embedding1024 := generateTestEmbedding(1024)
metadata1024 := map[string]interface{}{
"type": "test_doc",
"test": "dimension_1024",
}
err = setup.Store.Add(setup.ctx, testClassName, testKey1024, embedding1024, metadata1024)
require.NoError(t, err)
// Verify new document exists
result, err = setup.Store.GetChunk(setup.ctx, testClassName, testKey1024)
require.NoError(t, err)
assert.Equal(t, "dimension_1024", result.Properties["test"])
// Verify vector search works with new dimension
vectorResults, err := setup.Store.GetNearest(setup.ctx, testClassName, embedding1024, nil, []string{"type", "test"}, 0.8, 10)
require.NoError(t, err)
assert.GreaterOrEqual(t, len(vectorResults), 1)
assert.NotNil(t, vectorResults[0].Score)
// Cleanup
err = setup.Store.DeleteNamespace(setup.ctx, testClassName)
if err != nil {
t.Logf("Warning: Failed to cleanup class: %v", err)
}
})
}