mirror of
https://github.com/openimsdk/open-im-server.git
synced 2025-06-26 22:40:40 +08:00
Update logic.
This commit is contained in:
parent
dae7c072ed
commit
1389c8b80d
@ -63,10 +63,6 @@ func Start(ctx context.Context, conf *Config, client discovery.Conn, service grp
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := locker.Start(ctx); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
srv := &cronServer{
|
srv := &cronServer{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
config: conf,
|
config: conf,
|
||||||
@ -92,8 +88,6 @@ func Start(ctx context.Context, conf *Config, client discovery.Conn, service grp
|
|||||||
<-ctx.Done()
|
<-ctx.Done()
|
||||||
log.ZDebug(ctx, "cron task server is shutting down")
|
log.ZDebug(ctx, "cron task server is shutting down")
|
||||||
|
|
||||||
locker.Stop() // release distributed lock
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,7 +107,7 @@ func (c *cronServer) registerClearS3() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
_, err := c.cron.AddFunc(c.config.CronTask.CronExecuteTime, func() {
|
_, err := c.cron.AddFunc(c.config.CronTask.CronExecuteTime, func() {
|
||||||
c.locker.ExecuteWithLock(c.ctx, c.clearS3)
|
c.locker.ExecuteWithLock(c.ctx, "clearS3", c.clearS3)
|
||||||
})
|
})
|
||||||
return errs.WrapMsg(err, "failed to register clear s3 cron task")
|
return errs.WrapMsg(err, "failed to register clear s3 cron task")
|
||||||
}
|
}
|
||||||
@ -124,14 +118,14 @@ func (c *cronServer) registerDeleteMsg() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
_, err := c.cron.AddFunc(c.config.CronTask.CronExecuteTime, func() {
|
_, err := c.cron.AddFunc(c.config.CronTask.CronExecuteTime, func() {
|
||||||
c.locker.ExecuteWithLock(c.ctx, c.deleteMsg)
|
c.locker.ExecuteWithLock(c.ctx, "deleteMsg", c.deleteMsg)
|
||||||
})
|
})
|
||||||
return errs.WrapMsg(err, "failed to register delete msg cron task")
|
return errs.WrapMsg(err, "failed to register delete msg cron task")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cronServer) registerClearUserMsg() error {
|
func (c *cronServer) registerClearUserMsg() error {
|
||||||
_, err := c.cron.AddFunc(c.config.CronTask.CronExecuteTime, func() {
|
_, err := c.cron.AddFunc(c.config.CronTask.CronExecuteTime, func() {
|
||||||
c.locker.ExecuteWithLock(c.ctx, c.clearUserMsg)
|
c.locker.ExecuteWithLock(c.ctx, "clearUserMsg", c.clearUserMsg)
|
||||||
})
|
})
|
||||||
return errs.WrapMsg(err, "failed to register clear user msg cron task")
|
return errs.WrapMsg(err, "failed to register clear user msg cron task")
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"sync/atomic"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/openimsdk/tools/log"
|
"github.com/openimsdk/tools/log"
|
||||||
@ -12,22 +11,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
lockKey = "openim/crontask/dist-lock"
|
lockLeaseTTL = 3000
|
||||||
lockLeaseTTL = 15 // Lease TTL in seconds
|
|
||||||
acquireRetryDelay = 500 * time.Millisecond
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type EtcdLocker struct {
|
type EtcdLocker struct {
|
||||||
client *clientv3.Client
|
client *clientv3.Client
|
||||||
instanceID string
|
instanceID string
|
||||||
leaseID clientv3.LeaseID
|
|
||||||
isLockOwner int32 // Using atomic for lock ownership check
|
|
||||||
watchCh clientv3.WatchChan
|
|
||||||
watchCancel context.CancelFunc
|
|
||||||
leaseTTL int64
|
|
||||||
stopCh chan struct{}
|
|
||||||
stoppedCh chan struct{}
|
|
||||||
acquireDelay time.Duration
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewEtcdLocker creates a new etcd distributed lock
|
// NewEtcdLocker creates a new etcd distributed lock
|
||||||
@ -39,53 +28,17 @@ func NewEtcdLocker(client *clientv3.Client) (*EtcdLocker, error) {
|
|||||||
locker := &EtcdLocker{
|
locker := &EtcdLocker{
|
||||||
client: client,
|
client: client,
|
||||||
instanceID: instanceID,
|
instanceID: instanceID,
|
||||||
leaseTTL: lockLeaseTTL,
|
|
||||||
stopCh: make(chan struct{}),
|
|
||||||
stoppedCh: make(chan struct{}),
|
|
||||||
acquireDelay: acquireRetryDelay,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return locker, nil
|
return locker, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *EtcdLocker) Start(ctx context.Context) error {
|
func (e *EtcdLocker) tryAcquireTaskLock(ctx context.Context, taskName string) (clientv3.LeaseID, bool, error) {
|
||||||
log.ZInfo(ctx, "Starting etcd distributed lock", "instanceID", e.instanceID)
|
lockKey := fmt.Sprintf("openim/crontask/%s-lock", taskName)
|
||||||
go e.runLockLoop(ctx)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *EtcdLocker) runLockLoop(ctx context.Context) {
|
lease, err := e.client.Grant(ctx, lockLeaseTTL)
|
||||||
defer close(e.stoppedCh)
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-e.stopCh:
|
|
||||||
e.releaseLock(ctx)
|
|
||||||
return
|
|
||||||
case <-ctx.Done():
|
|
||||||
e.releaseLock(ctx)
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
acquired, err := e.tryAcquireLock(ctx)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.ZWarn(ctx, "Failed to acquire lock", err, "instanceID", e.instanceID)
|
return 0, false, fmt.Errorf("failed to create lease: %w", err)
|
||||||
time.Sleep(e.acquireDelay)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if acquired {
|
|
||||||
e.runKeepAlive(ctx)
|
|
||||||
time.Sleep(e.acquireDelay)
|
|
||||||
} else {
|
|
||||||
e.watchLock(ctx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *EtcdLocker) tryAcquireLock(ctx context.Context) (bool, error) {
|
|
||||||
lease, err := e.client.Grant(ctx, e.leaseTTL)
|
|
||||||
if err != nil {
|
|
||||||
return false, fmt.Errorf("failed to create lease: %w", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
txnResp, err := e.client.Txn(ctx).
|
txnResp, err := e.client.Txn(ctx).
|
||||||
@ -96,151 +49,119 @@ func (e *EtcdLocker) tryAcquireLock(ctx context.Context) (bool, error) {
|
|||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
e.client.Revoke(ctx, lease.ID)
|
e.client.Revoke(ctx, lease.ID)
|
||||||
return false, fmt.Errorf("transaction failed: %w", err)
|
return 0, false, fmt.Errorf("transaction failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !txnResp.Succeeded {
|
if !txnResp.Succeeded {
|
||||||
rangeResp := txnResp.Responses[0].GetResponseRange()
|
rangeResp := txnResp.Responses[0].GetResponseRange()
|
||||||
if len(rangeResp.Kvs) > 0 {
|
if len(rangeResp.Kvs) > 0 {
|
||||||
currentOwner := string(rangeResp.Kvs[0].Value)
|
currentOwner := string(rangeResp.Kvs[0].Value)
|
||||||
log.ZInfo(ctx, "Lock already owned", "instanceID", e.instanceID, "owner", currentOwner)
|
log.ZInfo(ctx, "Task lock already owned, skipping execution",
|
||||||
|
"taskName", taskName,
|
||||||
|
"instanceID", e.instanceID,
|
||||||
|
"currentOwner", currentOwner)
|
||||||
}
|
}
|
||||||
|
|
||||||
e.client.Revoke(ctx, lease.ID)
|
e.client.Revoke(ctx, lease.ID)
|
||||||
return false, nil
|
return 0, false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
e.leaseID = lease.ID
|
log.ZInfo(ctx, "Successfully acquired task lock",
|
||||||
atomic.StoreInt32(&e.isLockOwner, 1)
|
"taskName", taskName,
|
||||||
log.ZInfo(ctx, "Successfully acquired lock", "instanceID", e.instanceID, "leaseID", lease.ID)
|
"instanceID", e.instanceID,
|
||||||
return true, nil
|
"leaseID", lease.ID)
|
||||||
|
return lease.ID, true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *EtcdLocker) runKeepAlive(ctx context.Context) {
|
func (e *EtcdLocker) releaseTaskLock(ctx context.Context, taskName string, leaseID clientv3.LeaseID) {
|
||||||
keepAliveCh, err := e.client.KeepAlive(ctx, e.leaseID)
|
if leaseID == 0 {
|
||||||
if err != nil {
|
|
||||||
log.ZError(ctx, "Failed to start lease keepalive", err, "instanceID", e.instanceID)
|
|
||||||
e.releaseLock(ctx)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_, err := e.client.Revoke(ctx, leaseID)
|
||||||
|
if err != nil {
|
||||||
|
log.ZWarn(ctx, "Failed to revoke task lease", err,
|
||||||
|
"taskName", taskName,
|
||||||
|
"instanceID", e.instanceID,
|
||||||
|
"leaseID", leaseID)
|
||||||
|
} else {
|
||||||
|
log.ZInfo(ctx, "Successfully released task lock",
|
||||||
|
"taskName", taskName,
|
||||||
|
"instanceID", e.instanceID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *EtcdLocker) startLeaseKeepAlive(ctx context.Context, taskName string, leaseID clientv3.LeaseID) (context.CancelFunc, error) {
|
||||||
|
keepAliveCtx, cancel := context.WithCancel(ctx)
|
||||||
|
|
||||||
|
keepAliveCh, err := e.client.KeepAlive(keepAliveCtx, leaseID)
|
||||||
|
if err != nil {
|
||||||
|
cancel()
|
||||||
|
return nil, fmt.Errorf("failed to start keepalive: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer cancel()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case _, ok := <-keepAliveCh:
|
case _, ok := <-keepAliveCh:
|
||||||
if !ok {
|
if !ok {
|
||||||
log.ZWarn(ctx, "Keepalive channel closed, lock lost", nil, "instanceID", e.instanceID)
|
log.ZWarn(keepAliveCtx, "KeepAlive channel closed, lease may have expired", nil,
|
||||||
atomic.StoreInt32(&e.isLockOwner, 0) // Set to false atomically
|
"taskName", taskName,
|
||||||
return
|
"instanceID", e.instanceID,
|
||||||
}
|
"leaseID", leaseID)
|
||||||
case <-ctx.Done():
|
|
||||||
log.ZInfo(ctx, "Context canceled, releasing lock", "instanceID", e.instanceID)
|
|
||||||
e.releaseLock(ctx)
|
|
||||||
return
|
|
||||||
case <-e.stopCh:
|
|
||||||
log.ZInfo(ctx, "Stop signal received, releasing lock", "instanceID", e.instanceID)
|
|
||||||
e.releaseLock(ctx)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *EtcdLocker) watchLock(ctx context.Context) {
|
|
||||||
log.ZInfo(ctx, "Starting to watch lock status", "instanceID", e.instanceID)
|
|
||||||
watchCtx, cancel := context.WithCancel(ctx)
|
|
||||||
e.watchCancel = cancel
|
|
||||||
defer e.cancelWatch()
|
|
||||||
|
|
||||||
e.watchCh = e.client.Watch(watchCtx, lockKey)
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case resp, ok := <-e.watchCh:
|
|
||||||
if !ok {
|
|
||||||
log.ZWarn(ctx, "Watch channel closed", nil, "instanceID", e.instanceID)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
for _, event := range resp.Events {
|
|
||||||
if event.Type == clientv3.EventTypeDelete {
|
|
||||||
log.ZInfo(ctx, "Lock released, attempting to acquire", "instanceID", e.instanceID)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
case <-e.stopCh:
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *EtcdLocker) releaseLock(ctx context.Context) {
|
|
||||||
if atomic.LoadInt32(&e.isLockOwner) == 0 {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
leaseID := e.leaseID
|
case <-keepAliveCtx.Done():
|
||||||
atomic.StoreInt32(&e.isLockOwner, 0)
|
log.ZDebug(keepAliveCtx, "KeepAlive stopped",
|
||||||
e.leaseID = 0
|
"taskName", taskName,
|
||||||
if leaseID != 0 {
|
"instanceID", e.instanceID)
|
||||||
_, err := e.client.Revoke(context.Background(), leaseID)
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return cancel, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *EtcdLocker) ExecuteWithLock(ctx context.Context, taskName string, task func()) {
|
||||||
|
leaseID, acquired, err := e.tryAcquireTaskLock(ctx, taskName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.ZWarn(ctx, "Failed to revoke lease", err, "instanceID", e.instanceID, "error", err)
|
log.ZWarn(ctx, "Failed to acquire task lock", err,
|
||||||
} else {
|
"taskName", taskName,
|
||||||
log.ZInfo(ctx, "Successfully released lock", "instanceID", e.instanceID)
|
"instanceID", e.instanceID)
|
||||||
}
|
return
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *EtcdLocker) CheckLockOwnership(ctx context.Context) (bool, error) {
|
if !acquired {
|
||||||
if atomic.LoadInt32(&e.isLockOwner) == 0 {
|
log.ZDebug(ctx, "Task is being executed by another instance, skipping",
|
||||||
return false, nil
|
"taskName", taskName,
|
||||||
|
"instanceID", e.instanceID)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := e.client.Get(ctx, lockKey)
|
cancelKeepAlive, err := e.startLeaseKeepAlive(ctx, taskName, leaseID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("failed to check lock status: %w", err)
|
log.ZWarn(ctx, "Failed to start lease keepalive", err,
|
||||||
}
|
"taskName", taskName,
|
||||||
if len(resp.Kvs) > 0 && string(resp.Kvs[0].Value) == e.instanceID {
|
"instanceID", e.instanceID)
|
||||||
return true, nil
|
e.releaseTaskLock(ctx, taskName, leaseID)
|
||||||
}
|
|
||||||
if atomic.LoadInt32(&e.isLockOwner) == 1 {
|
|
||||||
log.ZWarn(ctx, "Lock ownership lost unexpectedly", nil, "instanceID", e.instanceID)
|
|
||||||
atomic.StoreInt32(&e.isLockOwner, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *EtcdLocker) cancelWatch() {
|
|
||||||
if e.watchCancel != nil {
|
|
||||||
e.watchCancel()
|
|
||||||
e.watchCancel = nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *EtcdLocker) Stop() {
|
|
||||||
e.cancelWatch()
|
|
||||||
close(e.stopCh)
|
|
||||||
<-e.stoppedCh
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *EtcdLocker) IsLockOwner() bool {
|
|
||||||
return atomic.LoadInt32(&e.isLockOwner) == 1
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *EtcdLocker) ExecuteWithLock(ctx context.Context, task func()) {
|
|
||||||
if atomic.LoadInt32(&e.isLockOwner) == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
isOwner, err := e.CheckLockOwnership(ctx)
|
|
||||||
if err != nil {
|
|
||||||
log.ZWarn(ctx, "Failed to verify lock ownership", err, "instanceID", e.instanceID)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if !isOwner {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.ZInfo(ctx, "Starting lock-protected task execution", "instanceID", e.instanceID)
|
defer func() {
|
||||||
|
cancelKeepAlive()
|
||||||
|
e.releaseTaskLock(ctx, taskName, leaseID)
|
||||||
|
}()
|
||||||
|
|
||||||
|
log.ZInfo(ctx, "Starting task execution with lease keepalive",
|
||||||
|
"taskName", taskName,
|
||||||
|
"instanceID", e.instanceID,
|
||||||
|
"leaseID", leaseID)
|
||||||
|
|
||||||
task()
|
task()
|
||||||
log.ZInfo(ctx, "Lock-protected task execution completed", "instanceID", e.instanceID)
|
|
||||||
|
log.ZInfo(ctx, "Task execution completed",
|
||||||
|
"taskName", taskName,
|
||||||
|
"instanceID", e.instanceID)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user