package configstore import ( "context" "errors" "fmt" "time" "github.com/google/uuid" "github.com/maximhq/bifrost/core/schemas" "github.com/maximhq/bifrost/framework/configstore/tables" ) // Default lock configuration values const ( DefaultLockTTL = 30 * time.Second DefaultRetryInterval = 100 * time.Millisecond DefaultMaxRetries = 100 DefaultCleanupInterval = 5 * time.Minute ) // Lock errors var ( ErrLockNotAcquired = errors.New("failed to acquire lock") ErrLockNotHeld = errors.New("lock not held by this holder") ErrLockExpired = errors.New("lock has expired") ErrEmptyLockKey = errors.New("empty lock key") ) // LockStore defines the storage operations required for distributed locking. // This interface abstracts the database operations, making the lock implementation // testable and decoupled from the specific database implementation. type LockStore interface { // TryAcquireLock attempts to insert a lock row. Returns true if the lock was acquired. // If the lock already exists and is not expired, returns false. TryAcquireLock(ctx context.Context, lock *tables.TableDistributedLock) (bool, error) // GetLock retrieves a lock by its key. Returns nil if the lock doesn't exist. GetLock(ctx context.Context, lockKey string) (*tables.TableDistributedLock, error) // UpdateLockExpiry updates the expiration time for an existing lock. // Only succeeds if the holder ID matches the current lock holder. UpdateLockExpiry(ctx context.Context, lockKey, holderID string, expiresAt time.Time) error // ReleaseLock deletes a lock if the holder ID matches. // Returns true if the lock was released, false if it wasn't held by the given holder. ReleaseLock(ctx context.Context, lockKey, holderID string) (bool, error) // CleanupExpiredLocks removes all locks that have expired. // Returns the number of locks cleaned up. CleanupExpiredLocks(ctx context.Context) (int64, error) // CleanupExpiredLockByKey atomically deletes a lock only if it has expired. // Returns true if an expired lock was deleted, false if the lock doesn't exist or hasn't expired. CleanupExpiredLockByKey(ctx context.Context, lockKey string) (bool, error) } // DistributedLockManager creates and manages distributed locks. // It provides a factory for creating locks with consistent configuration. type DistributedLockManager struct { store LockStore logger schemas.Logger defaultTTL time.Duration retryInterval time.Duration maxRetries int } // DistributedLockManagerOption is a function that configures a DistributedLockManager. type DistributedLockManagerOption func(*DistributedLockManager) // WithDefaultTTL sets the default TTL for locks created by this manager. func WithDefaultTTL(ttl time.Duration) DistributedLockManagerOption { return func(m *DistributedLockManager) { m.defaultTTL = ttl } } // WithRetryInterval sets the interval between lock acquisition retries. func WithRetryInterval(interval time.Duration) DistributedLockManagerOption { return func(m *DistributedLockManager) { m.retryInterval = interval } } // WithMaxRetries sets the maximum number of retries for lock acquisition. func WithMaxRetries(maxRetries int) DistributedLockManagerOption { return func(m *DistributedLockManager) { m.maxRetries = maxRetries } } // NewDistributedLockManager creates a new lock manager with the given store and options. func NewDistributedLockManager(store LockStore, logger schemas.Logger, opts ...DistributedLockManagerOption) *DistributedLockManager { m := &DistributedLockManager{ store: store, logger: logger, defaultTTL: DefaultLockTTL, retryInterval: DefaultRetryInterval, maxRetries: DefaultMaxRetries, } for _, opt := range opts { opt(m) } return m } // NewLock creates a new DistributedLock for the given key. // The lock is not acquired until Lock() or TryLock() is called. // Returns an error if the lock key is empty. func (m *DistributedLockManager) NewLock(lockKey string) (*DistributedLock, error) { if lockKey == "" { return nil, ErrEmptyLockKey } return &DistributedLock{ store: m.store, logger: m.logger, lockKey: lockKey, holderID: uuid.New().String(), ttl: m.defaultTTL, retryInterval: m.retryInterval, maxRetries: m.maxRetries, }, nil } // NewLockWithTTL creates a new DistributedLock with a custom TTL. // Returns an error if the lock key is empty. func (m *DistributedLockManager) NewLockWithTTL(lockKey string, ttl time.Duration) (*DistributedLock, error) { lock, err := m.NewLock(lockKey) if err != nil { return nil, err } lock.ttl = ttl return lock, nil } // CleanupExpiredLocks removes all expired locks from the store. // This can be called periodically to clean up stale locks. func (m *DistributedLockManager) CleanupExpiredLocks(ctx context.Context) (int64, error) { return m.store.CleanupExpiredLocks(ctx) } // DistributedLock represents a distributed lock that can be acquired and released // across multiple processes or instances. type DistributedLock struct { store LockStore logger schemas.Logger lockKey string holderID string ttl time.Duration retryInterval time.Duration maxRetries int acquired bool } // Lock acquires the lock, blocking until it's available or the context is cancelled. // It will make up to (maxRetries + 1) attempts, sleeping retryInterval between failed attempts. func (l *DistributedLock) Lock(ctx context.Context) error { // if config_store is not present, return true if l.store == nil { return nil } for i := 0; i <= l.maxRetries; i++ { select { case <-ctx.Done(): return ctx.Err() default: } acquired, err := l.TryLock(ctx) if err != nil { return fmt.Errorf("error acquiring lock: %w", err) } if acquired { return nil } // Wait before retrying if i < l.maxRetries { select { case <-ctx.Done(): return ctx.Err() case <-time.After(l.retryInterval): } } } return ErrLockNotAcquired } // LockWithRetry acquires the lock, blocking until it's available or the context is cancelled. // It will retry up to maxRetries times with retryInterval between attempts. func (l *DistributedLock) LockWithRetry(ctx context.Context, maxRetries int) error { // if config_store is not present, return true if l.store == nil { return nil } for i := 0; i <= maxRetries; i++ { select { case <-ctx.Done(): return ctx.Err() default: } acquired, err := l.TryLock(ctx) if err != nil { return fmt.Errorf("error acquiring lock: %w", err) } if acquired { return nil } // Wait before retrying if i < maxRetries { // Exponential backoff capped to avoid overflow (max 32s). exp := i if exp > 5 { exp = 5 } backoff := time.Duration(1<