package tracing import ( "context" "testing" "time" "github.com/maximhq/bifrost/core/schemas" ) type testRealtimeObservabilityPlugin struct { injected chan *schemas.Trace } func (p *testRealtimeObservabilityPlugin) GetName() string { return "test-observability" } func (p *testRealtimeObservabilityPlugin) Cleanup() error { return nil } func (p *testRealtimeObservabilityPlugin) PreLLMHook(_ *schemas.BifrostContext, req *schemas.BifrostRequest) (*schemas.BifrostRequest, *schemas.LLMPluginShortCircuit, error) { return req, nil, nil } func (p *testRealtimeObservabilityPlugin) PostLLMHook(_ *schemas.BifrostContext, resp *schemas.BifrostResponse, bifrostErr *schemas.BifrostError) (*schemas.BifrostResponse, *schemas.BifrostError, error) { return resp, bifrostErr, nil } func (p *testRealtimeObservabilityPlugin) Inject(_ context.Context, trace *schemas.Trace) error { if trace == nil { p.injected <- nil return nil } traceCopy := *trace p.injected <- &traceCopy return nil } func TestTracer_CompleteAndFlushTraceInjectsObservabilityPlugins(t *testing.T) { store := NewTraceStore(5*time.Minute, nil) defer store.Stop() tracer := NewTracer(store, nil, nil) defer tracer.Stop() traceID := tracer.CreateTrace("") plugin := &testRealtimeObservabilityPlugin{ injected: make(chan *schemas.Trace, 1), } tracer.SetObservabilityPlugins([]schemas.ObservabilityPlugin{plugin}) tracer.CompleteAndFlushTrace(traceID) select { case trace := <-plugin.injected: if trace == nil || trace.TraceID != traceID { t.Fatalf("injected trace = %+v, want trace %q", trace, traceID) } case <-time.After(time.Second): t.Fatal("timed out waiting for observability inject") } if got := tracer.store.GetTrace(traceID); got != nil { t.Fatalf("trace %q was not released after flush", traceID) } } func TestTracer_StartSpan_RootSpanWithW3CParent(t *testing.T) { // This is the key test: verifies that when an incoming request has a W3C traceparent header, // the root span in Bifrost correctly links to the upstream service's span. store := NewTraceStore(5*time.Minute, nil) defer store.Stop() tracer := NewTracer(store, nil, nil) defer tracer.Stop() // Simulate incoming W3C traceparent: 00-{traceID}-{parentSpanID}-01 inheritedTraceID := "69538b980000000079943934f90c1d40" externalParentSpanID := "aad09d1659b4c7e3" // Create trace with inherited trace ID traceID := tracer.CreateTrace(inheritedTraceID) if traceID != inheritedTraceID { t.Errorf("CreateTrace() = %q, want inherited trace ID %q", traceID, inheritedTraceID) } // Set up context with trace ID and parent span ID (as middleware would do) ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID) ctx = context.WithValue(ctx, schemas.BifrostContextKeyParentSpanID, externalParentSpanID) // Create root span - this should link to the external parent newCtx, handle := tracer.StartSpan(ctx, "bifrost-http-request", schemas.SpanKindHTTPRequest) if handle == nil { t.Fatal("StartSpan() returned nil handle") } // Verify the span was created with correct parent trace := store.GetTrace(traceID) if trace == nil { t.Fatal("Trace not found in store") } if trace.RootSpan == nil { t.Fatal("Root span not set on trace") } // THE CRITICAL CHECK: Root span should have the external parent span ID if trace.RootSpan.ParentID != externalParentSpanID { t.Errorf("Root span ParentID = %q, want external parent span ID %q", trace.RootSpan.ParentID, externalParentSpanID) } // Verify trace ID is preserved if trace.RootSpan.TraceID != inheritedTraceID { t.Errorf("Root span TraceID = %q, want %q", trace.RootSpan.TraceID, inheritedTraceID) } // Verify context has span ID for child span creation spanID, ok := newCtx.Value(schemas.BifrostContextKeySpanID).(string) if !ok || spanID == "" { t.Error("Context should have span ID after StartSpan()") } if spanID != trace.RootSpan.SpanID { t.Errorf("Context span ID = %q, want %q", spanID, trace.RootSpan.SpanID) } } func TestTracer_StartSpan_RootSpanWithoutW3CParent(t *testing.T) { // When there's no incoming W3C context, root span should have no parent store := NewTraceStore(5*time.Minute, nil) defer store.Stop() tracer := NewTracer(store, nil, nil) defer tracer.Stop() // Create new trace (no inherited trace ID) traceID := tracer.CreateTrace("") // Set up context with only trace ID (no parent span ID) ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID) // Create root span _, handle := tracer.StartSpan(ctx, "local-request", schemas.SpanKindHTTPRequest) if handle == nil { t.Fatal("StartSpan() returned nil handle") } trace := store.GetTrace(traceID) if trace == nil { t.Fatal("Trace not found in store") } // Root span should have no parent if trace.RootSpan.ParentID != "" { t.Errorf("Root span ParentID = %q, want empty string (no W3C parent)", trace.RootSpan.ParentID) } } func TestTracer_StartSpan_ChildSpanLinking(t *testing.T) { store := NewTraceStore(5*time.Minute, nil) defer store.Stop() tracer := NewTracer(store, nil, nil) defer tracer.Stop() inheritedTraceID := "69538b980000000079943934f90c1d40" externalParentSpanID := "aad09d1659b4c7e3" traceID := tracer.CreateTrace(inheritedTraceID) // Set up context with W3C parent span ID ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID) ctx = context.WithValue(ctx, schemas.BifrostContextKeyParentSpanID, externalParentSpanID) // Create root span rootCtx, rootHandle := tracer.StartSpan(ctx, "http-request", schemas.SpanKindHTTPRequest) if rootHandle == nil { t.Fatal("StartSpan() returned nil handle for root span") } // Create child span using the context from root span childCtx, childHandle := tracer.StartSpan(rootCtx, "llm-call", schemas.SpanKindLLMCall) if childHandle == nil { t.Fatal("StartSpan() returned nil handle for child span") } trace := store.GetTrace(traceID) // Find the child span var childSpan *schemas.Span for _, span := range trace.Spans { if span.Name == "llm-call" { childSpan = span break } } if childSpan == nil { t.Fatal("Child span not found in trace") } // Child span should have root span as parent (not the external parent) if childSpan.ParentID != trace.RootSpan.SpanID { t.Errorf("Child span ParentID = %q, want root span ID %q", childSpan.ParentID, trace.RootSpan.SpanID) } // Create grandchild span _, grandchildHandle := tracer.StartSpan(childCtx, "plugin-call", schemas.SpanKindPlugin) if grandchildHandle == nil { t.Fatal("StartSpan() returned nil handle for grandchild span") } // Find the grandchild span var grandchildSpan *schemas.Span for _, span := range trace.Spans { if span.Name == "plugin-call" { grandchildSpan = span break } } if grandchildSpan == nil { t.Fatal("Grandchild span not found in trace") } // Grandchild should have child as parent if grandchildSpan.ParentID != childSpan.SpanID { t.Errorf("Grandchild span ParentID = %q, want child span ID %q", grandchildSpan.ParentID, childSpan.SpanID) } } func TestTracer_StartSpan_NoTraceID(t *testing.T) { store := NewTraceStore(5*time.Minute, nil) defer store.Stop() tracer := NewTracer(store, nil, nil) defer tracer.Stop() // Context without trace ID ctx := context.Background() newCtx, handle := tracer.StartSpan(ctx, "operation", schemas.SpanKindHTTPRequest) if handle != nil { t.Error("StartSpan() should return nil handle when no trace ID in context") } // Context should be unchanged if newCtx != ctx { t.Error("Context should be unchanged when StartSpan() fails") } } func TestTracer_EndTrace_ReturnsTraceData(t *testing.T) { store := NewTraceStore(5*time.Minute, nil) defer store.Stop() tracer := NewTracer(store, nil, nil) defer tracer.Stop() inheritedTraceID := "69538b980000000079943934f90c1d40" externalParentSpanID := "aad09d1659b4c7e3" traceID := tracer.CreateTrace(inheritedTraceID) ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID) ctx = context.WithValue(ctx, schemas.BifrostContextKeyParentSpanID, externalParentSpanID) _, rootHandle := tracer.StartSpan(ctx, "http-request", schemas.SpanKindHTTPRequest) tracer.EndSpan(rootHandle, schemas.SpanStatusOk, "") trace := tracer.EndTrace(traceID) if trace == nil { t.Fatal("EndTrace() returned nil") } if trace.TraceID != inheritedTraceID { t.Errorf("trace.TraceID = %q, want %q", trace.TraceID, inheritedTraceID) } if len(trace.Spans) != 1 { t.Errorf("len(trace.Spans) = %d, want 1", len(trace.Spans)) } // Root span should still have external parent if trace.RootSpan.ParentID != externalParentSpanID { t.Errorf("Root span ParentID = %q, want %q", trace.RootSpan.ParentID, externalParentSpanID) } } func TestTracer_SetAttribute(t *testing.T) { store := NewTraceStore(5*time.Minute, nil) defer store.Stop() tracer := NewTracer(store, nil, nil) defer tracer.Stop() traceID := tracer.CreateTrace("") ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID) _, handle := tracer.StartSpan(ctx, "operation", schemas.SpanKindHTTPRequest) tracer.SetAttribute(handle, "http.method", "POST") tracer.SetAttribute(handle, "http.status_code", 200) trace := store.GetTrace(traceID) span := trace.RootSpan if span.Attributes["http.method"] != "POST" { t.Errorf("span attribute http.method = %v, want POST", span.Attributes["http.method"]) } if span.Attributes["http.status_code"] != 200 { t.Errorf("span attribute http.status_code = %v, want 200", span.Attributes["http.status_code"]) } } func TestTracer_AddEvent(t *testing.T) { store := NewTraceStore(5*time.Minute, nil) defer store.Stop() tracer := NewTracer(store, nil, nil) defer tracer.Stop() traceID := tracer.CreateTrace("") ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID) _, handle := tracer.StartSpan(ctx, "operation", schemas.SpanKindHTTPRequest) tracer.AddEvent(handle, "request.received", map[string]any{ "size": 1024, }) trace := store.GetTrace(traceID) span := trace.RootSpan if len(span.Events) != 1 { t.Fatalf("len(span.Events) = %d, want 1", len(span.Events)) } if span.Events[0].Name != "request.received" { t.Errorf("event name = %q, want request.received", span.Events[0].Name) } if span.Events[0].Attributes["size"] != 1024 { t.Errorf("event attribute size = %v, want 1024", span.Events[0].Attributes["size"]) } } // TestIntegration_FullDistributedTraceFlow tests the complete flow of receiving // a distributed trace from an upstream service and properly linking spans. func TestIntegration_FullDistributedTraceFlow(t *testing.T) { store := NewTraceStore(5*time.Minute, nil) defer store.Stop() tracer := NewTracer(store, nil, nil) defer tracer.Stop() // Simulating headers from user's actual Datadog request: // traceparent: 00-69538b980000000079943934f90c1d40-aad09d1659b4c7e3-01 inheritedTraceID := "69538b980000000079943934f90c1d40" externalParentSpanID := "aad09d1659b4c7e3" // Step 1: Middleware extracts trace context and creates trace traceID := tracer.CreateTrace(inheritedTraceID) // Step 2: Middleware sets up context (simulating what TracingMiddleware does) ctx := context.WithValue(context.Background(), schemas.BifrostContextKeyTraceID, traceID) ctx = context.WithValue(ctx, schemas.BifrostContextKeyParentSpanID, externalParentSpanID) // Step 3: Middleware creates root span httpCtx, httpHandle := tracer.StartSpan(ctx, "/v1/chat/completions", schemas.SpanKindHTTPRequest) tracer.SetAttribute(httpHandle, "http.method", "POST") // Step 4: Bifrost creates LLM call span llmCtx, llmHandle := tracer.StartSpan(httpCtx, "openai.chat.completions", schemas.SpanKindLLMCall) tracer.SetAttribute(llmHandle, "llm.model", "gpt-4") tracer.SetAttribute(llmHandle, "llm.provider", "openai") // Step 5: Plugin creates its own span _, pluginHandle := tracer.StartSpan(llmCtx, "governance-plugin", schemas.SpanKindPlugin) tracer.SetAttribute(pluginHandle, "plugin.name", "governance") // Step 6: Complete spans (in reverse order) tracer.EndSpan(pluginHandle, schemas.SpanStatusOk, "") tracer.EndSpan(llmHandle, schemas.SpanStatusOk, "") tracer.EndSpan(httpHandle, schemas.SpanStatusOk, "") // Step 7: Complete trace trace := tracer.EndTrace(traceID) // Verify the trace structure for Datadog if trace.TraceID != inheritedTraceID { t.Errorf("Trace ID should match inherited ID from Datadog: got %q, want %q", trace.TraceID, inheritedTraceID) } // Find spans by name var httpSpan, llmSpan, pluginSpan *schemas.Span for _, span := range trace.Spans { switch span.Name { case "/v1/chat/completions": httpSpan = span case "openai.chat.completions": llmSpan = span case "governance-plugin": pluginSpan = span } } if httpSpan == nil || llmSpan == nil || pluginSpan == nil { t.Fatal("Not all spans found in trace") } // Verify span hierarchy for Datadog linking: // External Parent (aad09d1659b4c7e3) -> HTTP Span -> LLM Span -> Plugin Span // HTTP span should link to Datadog's parent span if httpSpan.ParentID != externalParentSpanID { t.Errorf("HTTP span should link to Datadog parent: got ParentID %q, want %q", httpSpan.ParentID, externalParentSpanID) } // LLM span should be child of HTTP span if llmSpan.ParentID != httpSpan.SpanID { t.Errorf("LLM span should be child of HTTP span: got ParentID %q, want %q", llmSpan.ParentID, httpSpan.SpanID) } // Plugin span should be child of LLM span if pluginSpan.ParentID != llmSpan.SpanID { t.Errorf("Plugin span should be child of LLM span: got ParentID %q, want %q", pluginSpan.ParentID, llmSpan.SpanID) } // All spans should have the same trace ID if httpSpan.TraceID != inheritedTraceID || llmSpan.TraceID != inheritedTraceID || pluginSpan.TraceID != inheritedTraceID { t.Error("All spans should have the inherited trace ID") } t.Logf("Trace structure (for Datadog):") t.Logf(" Trace ID: %s", trace.TraceID) t.Logf(" External Parent Span: %s (from Datadog)", externalParentSpanID) t.Logf(" -> HTTP Span: %s (ParentID: %s)", httpSpan.SpanID, httpSpan.ParentID) t.Logf(" -> LLM Span: %s (ParentID: %s)", llmSpan.SpanID, llmSpan.ParentID) t.Logf(" -> Plugin Span: %s (ParentID: %s)", pluginSpan.SpanID, pluginSpan.ParentID) }