Files
bifrost/framework/tracing/tracer_test.go
Beyhan Oğur 880f412e2c first commit
2026-04-26 21:52:23 +03:00

440 lines
14 KiB
Go

package tracing
import (
"context"
"testing"
"time"
"github.com/maximhq/bifrost/core/schemas"
)
type testRealtimeObservabilityPlugin struct {
injected chan *schemas.Trace
}
func (p *testRealtimeObservabilityPlugin) GetName() string { return "test-observability" }
func (p *testRealtimeObservabilityPlugin) Cleanup() error { return nil }
func (p *testRealtimeObservabilityPlugin) PreLLMHook(_ *schemas.BifrostContext, req *schemas.BifrostRequest) (*schemas.BifrostRequest, *schemas.LLMPluginShortCircuit, error) {
return req, nil, nil
}
func (p *testRealtimeObservabilityPlugin) PostLLMHook(_ *schemas.BifrostContext, resp *schemas.BifrostResponse, bifrostErr *schemas.BifrostError) (*schemas.BifrostResponse, *schemas.BifrostError, error) {
return resp, bifrostErr, nil
}
func (p *testRealtimeObservabilityPlugin) Inject(_ context.Context, trace *schemas.Trace) error {
if trace == nil {
p.injected <- nil
return nil
}
traceCopy := *trace
p.injected <- &traceCopy
return nil
}
func TestTracer_CompleteAndFlushTraceInjectsObservabilityPlugins(t *testing.T) {
store := NewTraceStore(5*time.Minute, nil)
defer store.Stop()
tracer := NewTracer(store, nil, nil)
defer tracer.Stop()
traceID := tracer.CreateTrace("")
plugin := &testRealtimeObservabilityPlugin{
injected: make(chan *schemas.Trace, 1),
}
tracer.SetObservabilityPlugins([]schemas.ObservabilityPlugin{plugin})
tracer.CompleteAndFlushTrace(traceID)
select {
case trace := <-plugin.injected:
if trace == nil || trace.TraceID != traceID {
t.Fatalf("injected trace = %+v, want trace %q", trace, traceID)
}
case <-time.After(time.Second):
t.Fatal("timed out waiting for observability inject")
}
if got := tracer.store.GetTrace(traceID); got != nil {
t.Fatalf("trace %q was not released after flush", traceID)
}
}
func TestTracer_StartSpan_RootSpanWithW3CParent(t *testing.T) {
// This is the key test: verifies that when an incoming request has a W3C traceparent header,
// the root span in Bifrost correctly links to the upstream service's span.
store := NewTraceStore(5*time.Minute, nil)
defer store.Stop()
tracer := NewTracer(store, nil, nil)
defer tracer.Stop()
// Simulate incoming W3C traceparent: 00-{traceID}-{parentSpanID}-01
inheritedTraceID := "69538b980000000079943934f90c1d40"
externalParentSpanID := "aad09d1659b4c7e3"
// Create trace with inherited trace ID
traceID := tracer.CreateTrace(inheritedTraceID)
if traceID != inheritedTraceID {
t.Errorf("CreateTrace() = %q, want inherited trace ID %q", traceID, inheritedTraceID)
}
// Set up context with trace ID and parent span ID (as middleware would do)
ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID)
ctx = context.WithValue(ctx, schemas.BifrostContextKeyParentSpanID, externalParentSpanID)
// Create root span - this should link to the external parent
newCtx, handle := tracer.StartSpan(ctx, "bifrost-http-request", schemas.SpanKindHTTPRequest)
if handle == nil {
t.Fatal("StartSpan() returned nil handle")
}
// Verify the span was created with correct parent
trace := store.GetTrace(traceID)
if trace == nil {
t.Fatal("Trace not found in store")
}
if trace.RootSpan == nil {
t.Fatal("Root span not set on trace")
}
// THE CRITICAL CHECK: Root span should have the external parent span ID
if trace.RootSpan.ParentID != externalParentSpanID {
t.Errorf("Root span ParentID = %q, want external parent span ID %q", trace.RootSpan.ParentID, externalParentSpanID)
}
// Verify trace ID is preserved
if trace.RootSpan.TraceID != inheritedTraceID {
t.Errorf("Root span TraceID = %q, want %q", trace.RootSpan.TraceID, inheritedTraceID)
}
// Verify context has span ID for child span creation
spanID, ok := newCtx.Value(schemas.BifrostContextKeySpanID).(string)
if !ok || spanID == "" {
t.Error("Context should have span ID after StartSpan()")
}
if spanID != trace.RootSpan.SpanID {
t.Errorf("Context span ID = %q, want %q", spanID, trace.RootSpan.SpanID)
}
}
func TestTracer_StartSpan_RootSpanWithoutW3CParent(t *testing.T) {
// When there's no incoming W3C context, root span should have no parent
store := NewTraceStore(5*time.Minute, nil)
defer store.Stop()
tracer := NewTracer(store, nil, nil)
defer tracer.Stop()
// Create new trace (no inherited trace ID)
traceID := tracer.CreateTrace("")
// Set up context with only trace ID (no parent span ID)
ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID)
// Create root span
_, handle := tracer.StartSpan(ctx, "local-request", schemas.SpanKindHTTPRequest)
if handle == nil {
t.Fatal("StartSpan() returned nil handle")
}
trace := store.GetTrace(traceID)
if trace == nil {
t.Fatal("Trace not found in store")
}
// Root span should have no parent
if trace.RootSpan.ParentID != "" {
t.Errorf("Root span ParentID = %q, want empty string (no W3C parent)", trace.RootSpan.ParentID)
}
}
func TestTracer_StartSpan_ChildSpanLinking(t *testing.T) {
store := NewTraceStore(5*time.Minute, nil)
defer store.Stop()
tracer := NewTracer(store, nil, nil)
defer tracer.Stop()
inheritedTraceID := "69538b980000000079943934f90c1d40"
externalParentSpanID := "aad09d1659b4c7e3"
traceID := tracer.CreateTrace(inheritedTraceID)
// Set up context with W3C parent span ID
ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID)
ctx = context.WithValue(ctx, schemas.BifrostContextKeyParentSpanID, externalParentSpanID)
// Create root span
rootCtx, rootHandle := tracer.StartSpan(ctx, "http-request", schemas.SpanKindHTTPRequest)
if rootHandle == nil {
t.Fatal("StartSpan() returned nil handle for root span")
}
// Create child span using the context from root span
childCtx, childHandle := tracer.StartSpan(rootCtx, "llm-call", schemas.SpanKindLLMCall)
if childHandle == nil {
t.Fatal("StartSpan() returned nil handle for child span")
}
trace := store.GetTrace(traceID)
// Find the child span
var childSpan *schemas.Span
for _, span := range trace.Spans {
if span.Name == "llm-call" {
childSpan = span
break
}
}
if childSpan == nil {
t.Fatal("Child span not found in trace")
}
// Child span should have root span as parent (not the external parent)
if childSpan.ParentID != trace.RootSpan.SpanID {
t.Errorf("Child span ParentID = %q, want root span ID %q", childSpan.ParentID, trace.RootSpan.SpanID)
}
// Create grandchild span
_, grandchildHandle := tracer.StartSpan(childCtx, "plugin-call", schemas.SpanKindPlugin)
if grandchildHandle == nil {
t.Fatal("StartSpan() returned nil handle for grandchild span")
}
// Find the grandchild span
var grandchildSpan *schemas.Span
for _, span := range trace.Spans {
if span.Name == "plugin-call" {
grandchildSpan = span
break
}
}
if grandchildSpan == nil {
t.Fatal("Grandchild span not found in trace")
}
// Grandchild should have child as parent
if grandchildSpan.ParentID != childSpan.SpanID {
t.Errorf("Grandchild span ParentID = %q, want child span ID %q", grandchildSpan.ParentID, childSpan.SpanID)
}
}
func TestTracer_StartSpan_NoTraceID(t *testing.T) {
store := NewTraceStore(5*time.Minute, nil)
defer store.Stop()
tracer := NewTracer(store, nil, nil)
defer tracer.Stop()
// Context without trace ID
ctx := context.Background()
newCtx, handle := tracer.StartSpan(ctx, "operation", schemas.SpanKindHTTPRequest)
if handle != nil {
t.Error("StartSpan() should return nil handle when no trace ID in context")
}
// Context should be unchanged
if newCtx != ctx {
t.Error("Context should be unchanged when StartSpan() fails")
}
}
func TestTracer_EndTrace_ReturnsTraceData(t *testing.T) {
store := NewTraceStore(5*time.Minute, nil)
defer store.Stop()
tracer := NewTracer(store, nil, nil)
defer tracer.Stop()
inheritedTraceID := "69538b980000000079943934f90c1d40"
externalParentSpanID := "aad09d1659b4c7e3"
traceID := tracer.CreateTrace(inheritedTraceID)
ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID)
ctx = context.WithValue(ctx, schemas.BifrostContextKeyParentSpanID, externalParentSpanID)
_, rootHandle := tracer.StartSpan(ctx, "http-request", schemas.SpanKindHTTPRequest)
tracer.EndSpan(rootHandle, schemas.SpanStatusOk, "")
trace := tracer.EndTrace(traceID)
if trace == nil {
t.Fatal("EndTrace() returned nil")
}
if trace.TraceID != inheritedTraceID {
t.Errorf("trace.TraceID = %q, want %q", trace.TraceID, inheritedTraceID)
}
if len(trace.Spans) != 1 {
t.Errorf("len(trace.Spans) = %d, want 1", len(trace.Spans))
}
// Root span should still have external parent
if trace.RootSpan.ParentID != externalParentSpanID {
t.Errorf("Root span ParentID = %q, want %q", trace.RootSpan.ParentID, externalParentSpanID)
}
}
func TestTracer_SetAttribute(t *testing.T) {
store := NewTraceStore(5*time.Minute, nil)
defer store.Stop()
tracer := NewTracer(store, nil, nil)
defer tracer.Stop()
traceID := tracer.CreateTrace("")
ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID)
_, handle := tracer.StartSpan(ctx, "operation", schemas.SpanKindHTTPRequest)
tracer.SetAttribute(handle, "http.method", "POST")
tracer.SetAttribute(handle, "http.status_code", 200)
trace := store.GetTrace(traceID)
span := trace.RootSpan
if span.Attributes["http.method"] != "POST" {
t.Errorf("span attribute http.method = %v, want POST", span.Attributes["http.method"])
}
if span.Attributes["http.status_code"] != 200 {
t.Errorf("span attribute http.status_code = %v, want 200", span.Attributes["http.status_code"])
}
}
func TestTracer_AddEvent(t *testing.T) {
store := NewTraceStore(5*time.Minute, nil)
defer store.Stop()
tracer := NewTracer(store, nil, nil)
defer tracer.Stop()
traceID := tracer.CreateTrace("")
ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID)
_, handle := tracer.StartSpan(ctx, "operation", schemas.SpanKindHTTPRequest)
tracer.AddEvent(handle, "request.received", map[string]any{
"size": 1024,
})
trace := store.GetTrace(traceID)
span := trace.RootSpan
if len(span.Events) != 1 {
t.Fatalf("len(span.Events) = %d, want 1", len(span.Events))
}
if span.Events[0].Name != "request.received" {
t.Errorf("event name = %q, want request.received", span.Events[0].Name)
}
if span.Events[0].Attributes["size"] != 1024 {
t.Errorf("event attribute size = %v, want 1024", span.Events[0].Attributes["size"])
}
}
// TestIntegration_FullDistributedTraceFlow tests the complete flow of receiving
// a distributed trace from an upstream service and properly linking spans.
func TestIntegration_FullDistributedTraceFlow(t *testing.T) {
store := NewTraceStore(5*time.Minute, nil)
defer store.Stop()
tracer := NewTracer(store, nil, nil)
defer tracer.Stop()
// Simulating headers from user's actual Datadog request:
// traceparent: 00-69538b980000000079943934f90c1d40-aad09d1659b4c7e3-01
inheritedTraceID := "69538b980000000079943934f90c1d40"
externalParentSpanID := "aad09d1659b4c7e3"
// Step 1: Middleware extracts trace context and creates trace
traceID := tracer.CreateTrace(inheritedTraceID)
// Step 2: Middleware sets up context (simulating what TracingMiddleware does)
ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID)
ctx = context.WithValue(ctx, schemas.BifrostContextKeyParentSpanID, externalParentSpanID)
// Step 3: Middleware creates root span
httpCtx, httpHandle := tracer.StartSpan(ctx, "/v1/chat/completions", schemas.SpanKindHTTPRequest)
tracer.SetAttribute(httpHandle, "http.method", "POST")
// Step 4: Bifrost creates LLM call span
llmCtx, llmHandle := tracer.StartSpan(httpCtx, "openai.chat.completions", schemas.SpanKindLLMCall)
tracer.SetAttribute(llmHandle, "llm.model", "gpt-4")
tracer.SetAttribute(llmHandle, "llm.provider", "openai")
// Step 5: Plugin creates its own span
_, pluginHandle := tracer.StartSpan(llmCtx, "governance-plugin", schemas.SpanKindPlugin)
tracer.SetAttribute(pluginHandle, "plugin.name", "governance")
// Step 6: Complete spans (in reverse order)
tracer.EndSpan(pluginHandle, schemas.SpanStatusOk, "")
tracer.EndSpan(llmHandle, schemas.SpanStatusOk, "")
tracer.EndSpan(httpHandle, schemas.SpanStatusOk, "")
// Step 7: Complete trace
trace := tracer.EndTrace(traceID)
// Verify the trace structure for Datadog
if trace.TraceID != inheritedTraceID {
t.Errorf("Trace ID should match inherited ID from Datadog: got %q, want %q", trace.TraceID, inheritedTraceID)
}
// Find spans by name
var httpSpan, llmSpan, pluginSpan *schemas.Span
for _, span := range trace.Spans {
switch span.Name {
case "/v1/chat/completions":
httpSpan = span
case "openai.chat.completions":
llmSpan = span
case "governance-plugin":
pluginSpan = span
}
}
if httpSpan == nil || llmSpan == nil || pluginSpan == nil {
t.Fatal("Not all spans found in trace")
}
// Verify span hierarchy for Datadog linking:
// External Parent (aad09d1659b4c7e3) -> HTTP Span -> LLM Span -> Plugin Span
// HTTP span should link to Datadog's parent span
if httpSpan.ParentID != externalParentSpanID {
t.Errorf("HTTP span should link to Datadog parent: got ParentID %q, want %q",
httpSpan.ParentID, externalParentSpanID)
}
// LLM span should be child of HTTP span
if llmSpan.ParentID != httpSpan.SpanID {
t.Errorf("LLM span should be child of HTTP span: got ParentID %q, want %q",
llmSpan.ParentID, httpSpan.SpanID)
}
// Plugin span should be child of LLM span
if pluginSpan.ParentID != llmSpan.SpanID {
t.Errorf("Plugin span should be child of LLM span: got ParentID %q, want %q",
pluginSpan.ParentID, llmSpan.SpanID)
}
// All spans should have the same trace ID
if httpSpan.TraceID != inheritedTraceID || llmSpan.TraceID != inheritedTraceID || pluginSpan.TraceID != inheritedTraceID {
t.Error("All spans should have the inherited trace ID")
}
t.Logf("Trace structure (for Datadog):")
t.Logf(" Trace ID: %s", trace.TraceID)
t.Logf(" External Parent Span: %s (from Datadog)", externalParentSpanID)
t.Logf(" -> HTTP Span: %s (ParentID: %s)", httpSpan.SpanID, httpSpan.ParentID)
t.Logf(" -> LLM Span: %s (ParentID: %s)", llmSpan.SpanID, llmSpan.ParentID)
t.Logf(" -> Plugin Span: %s (ParentID: %s)", pluginSpan.SpanID, pluginSpan.ParentID)
}