first commit

This commit is contained in:
Beyhan Oğur
2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions

View File

@@ -0,0 +1,318 @@
package logstore
import (
"context"
"errors"
"fmt"
"sync"
"time"
"github.com/bytedance/sonic"
"github.com/google/uuid"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
"github.com/valyala/fasthttp"
)
const (
// DefaultAsyncJobResultTTL is the default TTL for async job results in seconds (1 hour).
DefaultAsyncJobResultTTL = 3600
)
const (
asyncJobCleanupInterval = 1 * time.Minute
asyncJobCleanupTimeout = 1 * time.Minute
asyncJobStaleProcessingHours = 24
)
// --- AsyncJobExecutor ---
// AsyncOperation represents a function that can be executed asynchronously.
// It returns the response and an optional BifrostError.
type AsyncOperation func(ctx *schemas.BifrostContext) (any, *schemas.BifrostError)
// GovernanceStore is an interface that provides access to the governance store.
type GovernanceStore interface {
GetVirtualKey(ctx context.Context, vkValue string) (*configstoreTables.TableVirtualKey, bool)
}
// AsyncJobExecutor manages async job creation and background execution.
type AsyncJobExecutor struct {
logstore LogStore
governanceStore GovernanceStore
logger schemas.Logger
}
// NewAsyncJobExecutor creates a new AsyncJobExecutor.
func NewAsyncJobExecutor(logstore LogStore, governanceStore GovernanceStore, logger schemas.Logger) *AsyncJobExecutor {
return &AsyncJobExecutor{
logstore: logstore,
governanceStore: governanceStore,
logger: logger,
}
}
// RetrieveJob retrieves a job by its ID.
func (e *AsyncJobExecutor) RetrieveJob(ctx context.Context, jobID string, vkValue *string, operationType schemas.RequestType) (*AsyncJob, error) {
job, err := e.logstore.FindAsyncJobByID(ctx, jobID)
if err != nil {
if errors.Is(err, ErrNotFound) {
return nil, fmt.Errorf("job not found or expired")
}
return nil, fmt.Errorf("%w: %w", ErrJobInternal, err)
}
if job.VirtualKeyID != nil {
if vkValue == nil {
return nil, fmt.Errorf("virtual key is required")
}
vk, ok := e.governanceStore.GetVirtualKey(ctx, *vkValue)
if !ok {
return nil, fmt.Errorf("virtual key not found")
}
if *job.VirtualKeyID != vk.ID {
return nil, fmt.Errorf("virtual key mismatch")
}
}
if job.RequestType != operationType {
return nil, fmt.Errorf("operation type mismatch")
}
return job, nil
}
// SubmitJob creates a pending job, starts background execution, and returns the job record.
func (e *AsyncJobExecutor) SubmitJob(bifrostCtx *schemas.BifrostContext, resultTTL int, operation AsyncOperation, operationType schemas.RequestType) (*AsyncJob, error) {
if resultTTL <= 0 {
resultTTL = DefaultAsyncJobResultTTL
}
virtualKeyValue := getVirtualKeyFromContext(bifrostCtx)
var virtualKeyID *string
if virtualKeyValue != nil {
vk, ok := e.governanceStore.GetVirtualKey(bifrostCtx, *virtualKeyValue)
if !ok {
return nil, fmt.Errorf("virtual key not found")
}
virtualKeyID = &vk.ID
}
now := time.Now().UTC()
job := &AsyncJob{
ID: uuid.New().String(),
Status: schemas.AsyncJobStatusPending,
RequestType: operationType,
VirtualKeyID: virtualKeyID,
ResultTTL: resultTTL,
CreatedAt: now,
}
ctx := context.Background()
if err := e.logstore.CreateAsyncJob(ctx, job); err != nil {
return nil, fmt.Errorf("failed to create async job: %w", err)
}
var contextValues map[any]any
if bifrostCtx != nil {
contextValues = bifrostCtx.GetUserValues()
}
go e.executeJob(job.ID, job.ResultTTL, operation, contextValues)
return job, nil
}
// executeJob runs the operation in the background and updates the job record.
func (e *AsyncJobExecutor) executeJob(jobID string, resultTTL int, operation AsyncOperation, contextValues map[any]any) {
ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)
// Restore original request context values (virtual key, tracing headers, etc.)
for k, v := range contextValues {
ctx.SetValue(k, v)
}
// Clear trace context inherited from the original HTTP request.
ctx.ClearValue(schemas.BifrostContextKeyTraceID)
ctx.ClearValue(schemas.BifrostContextKeyParentSpanID)
ctx.ClearValue(schemas.BifrostContextKeySpanID)
markFailed := func(msg string) {
now := time.Now().UTC()
expiresAt := now.Add(time.Duration(resultTTL) * time.Second)
errJSON, _ := sonic.Marshal(&schemas.BifrostError{Error: &schemas.ErrorField{Message: msg}})
if err := e.logstore.UpdateAsyncJob(ctx, jobID, map[string]any{
"status": schemas.AsyncJobStatusFailed,
"status_code": fasthttp.StatusInternalServerError,
"error": string(errJSON),
"completed_at": now,
"expires_at": expiresAt,
}); err != nil {
e.logger.Warn("failed to update async job to failed: %v", err)
}
}
// The bifrost execution flow is very stable and panics are not expected.
// This recover is purely defensive to ensure the job always reaches a terminal
// state rather than being stuck in "processing" if an unexpected panic occurs.
defer func() {
if r := recover(); r != nil {
e.logger.Warn("async job %s panicked: %v", jobID, r)
markFailed(fmt.Sprintf("internal error: %v", r))
}
}()
// Mark as processing
if err := e.logstore.UpdateAsyncJob(ctx, jobID, map[string]interface{}{
"status": schemas.AsyncJobStatusProcessing,
}); err != nil {
e.logger.Warn("failed to update async job: %v", err)
}
ctx.SetValue(schemas.BifrostIsAsyncRequest, true)
// Execute the operation
resp, bifrostErr := operation(ctx)
now := time.Now().UTC()
expiresAt := now.Add(time.Duration(resultTTL) * time.Second)
if bifrostErr != nil {
errJSON, err := sonic.Marshal(bifrostErr)
if err != nil {
e.logger.Warn("failed to marshal bifrost error: %v", err)
markFailed(fmt.Sprintf("failed to serialize error response: %v", err))
return
}
statusCode := fasthttp.StatusInternalServerError
if bifrostErr.StatusCode != nil {
statusCode = *bifrostErr.StatusCode
}
if err := e.logstore.UpdateAsyncJob(ctx, jobID, map[string]interface{}{
"status": schemas.AsyncJobStatusFailed,
"status_code": statusCode,
"error": string(errJSON),
"completed_at": now,
"expires_at": expiresAt,
}); err != nil {
e.logger.Warn("failed to update async job: %v", err)
}
return
}
respJSON, err := sonic.Marshal(resp)
if err != nil {
e.logger.Warn("failed to marshal result: %v", err)
markFailed(fmt.Sprintf("failed to serialize result: %v", err))
return
}
if err := e.logstore.UpdateAsyncJob(ctx, jobID, map[string]interface{}{
"status": schemas.AsyncJobStatusCompleted,
"status_code": fasthttp.StatusOK,
"response": string(respJSON),
"completed_at": now,
"expires_at": expiresAt,
}); err != nil {
e.logger.Warn("failed to update async job: %v", err)
}
}
// --- Cleaner ---
// AsyncJobCleaner manages the cleanup of expired async jobs.
type AsyncJobCleaner struct {
store LogStore
logger schemas.Logger
stopCleanup chan struct{}
mu sync.Mutex
}
// NewAsyncJobCleaner creates a new AsyncJobCleaner instance.
func NewAsyncJobCleaner(store LogStore, logger schemas.Logger) *AsyncJobCleaner {
return &AsyncJobCleaner{
store: store,
logger: logger,
}
}
// StartCleanupRoutine starts a goroutine that periodically cleans up expired async jobs.
func (c *AsyncJobCleaner) StartCleanupRoutine() {
c.mu.Lock()
defer c.mu.Unlock()
if c.stopCleanup != nil {
return
}
c.stopCleanup = make(chan struct{})
stopCh := c.stopCleanup
go func() {
// Run initial cleanup
ctx, cancel := context.WithTimeout(context.Background(), asyncJobCleanupTimeout)
c.cleanupExpiredJobs(ctx)
cancel()
ticker := time.NewTicker(asyncJobCleanupInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
ctx, cancel := context.WithTimeout(context.Background(), asyncJobCleanupTimeout)
c.cleanupExpiredJobs(ctx)
cancel()
case <-stopCh:
c.logger.Debug("async job cleanup routine stopped")
return
}
}
}()
c.logger.Debug("async job cleanup routine started (interval: %s)", asyncJobCleanupInterval)
}
// StopCleanupRoutine gracefully stops the cleanup goroutine.
func (c *AsyncJobCleaner) StopCleanupRoutine() {
c.mu.Lock()
defer c.mu.Unlock()
if c.stopCleanup == nil {
c.logger.Debug("async job cleanup routine already stopped")
return
}
close(c.stopCleanup)
c.stopCleanup = nil
}
// cleanupExpiredJobs deletes expired async jobs and stale processing jobs.
func (c *AsyncJobCleaner) cleanupExpiredJobs(ctx context.Context) {
deleted, err := c.store.DeleteExpiredAsyncJobs(ctx)
if err != nil {
c.logger.Warn("failed to delete expired async jobs: %v", err)
} else if deleted > 0 {
c.logger.Debug("async job cleanup completed: deleted %d expired jobs", deleted)
}
// Clean up jobs stuck in "processing" for more than 24 hours
// This handles edge cases like marshal failures or server crashes
staleSince := time.Now().UTC().Add(-asyncJobStaleProcessingHours * time.Hour)
staleDeleted, err := c.store.DeleteStaleAsyncJobs(ctx, staleSince)
if err != nil {
c.logger.Warn("failed to delete stale processing async jobs: %v", err)
} else if staleDeleted > 0 {
c.logger.Warn("async job cleanup: deleted %d stale processing jobs (stuck > %dh)", staleDeleted, asyncJobStaleProcessingHours)
}
}
// getVirtualKeyFromContext extracts the virtual key value from context.
// Returns nil if no VK is present (e.g., direct key mode or no governance),
// or if the context itself is nil (callers like SubmitJob may be invoked with
// a nil ctx by background paths that don't carry a VK).
func getVirtualKeyFromContext(ctx *schemas.BifrostContext) *string {
if ctx == nil {
return nil
}
vkValue := bifrost.GetStringFromContext(ctx, schemas.BifrostContextKeyVirtualKey)
if vkValue == "" {
return nil
}
return &vkValue
}