refactor: improve webhook processing with enhanced logging and retry mechanisms
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any, cast
|
||||
@@ -14,6 +15,9 @@ from app.core.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_SCHEDULED_SUFFIX = ":scheduled"
|
||||
_DRY_RUN_BATCH_SIZE = 100
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QueuedTask:
|
||||
@@ -40,7 +44,84 @@ def _redis_client(redis_url: str | None = None) -> redis.Redis:
|
||||
return redis.Redis.from_url(redis_url or settings.rq_redis_url)
|
||||
|
||||
|
||||
def enqueue_task(task: QueuedTask, queue_name: str, *, redis_url: str | None = None) -> bool:
|
||||
def _scheduled_queue_name(queue_name: str) -> str:
|
||||
return f"{queue_name}{_SCHEDULED_SUFFIX}"
|
||||
|
||||
|
||||
def _now_seconds() -> float:
|
||||
return time.time()
|
||||
|
||||
|
||||
def _drain_ready_scheduled_tasks(
|
||||
client: redis.Redis,
|
||||
queue_name: str,
|
||||
*,
|
||||
max_items: int = _DRY_RUN_BATCH_SIZE,
|
||||
) -> float | None:
|
||||
scheduled_queue = _scheduled_queue_name(queue_name)
|
||||
now = _now_seconds()
|
||||
|
||||
ready_items = client.zrangebyscore(
|
||||
scheduled_queue,
|
||||
"-inf",
|
||||
now,
|
||||
start=0,
|
||||
num=max_items,
|
||||
)
|
||||
if ready_items:
|
||||
client.lpush(queue_name, *ready_items)
|
||||
client.zrem(scheduled_queue, *ready_items)
|
||||
logger.debug(
|
||||
"rq.queue.drain_ready_scheduled",
|
||||
extra={
|
||||
"queue_name": queue_name,
|
||||
"count": len(ready_items),
|
||||
},
|
||||
)
|
||||
|
||||
next_item = client.zrangebyscore(
|
||||
scheduled_queue,
|
||||
now,
|
||||
"+inf",
|
||||
start=0,
|
||||
num=1,
|
||||
withscores=True,
|
||||
)
|
||||
if not next_item:
|
||||
return None
|
||||
|
||||
next_score = float(cast(tuple[str | bytes, float], next_item[0])[1])
|
||||
return max(0.0, next_score - now)
|
||||
|
||||
|
||||
def _schedule_for_later(
|
||||
task: QueuedTask,
|
||||
queue_name: str,
|
||||
delay_seconds: float,
|
||||
*,
|
||||
redis_url: str | None = None,
|
||||
) -> bool:
|
||||
client = _redis_client(redis_url=redis_url)
|
||||
scheduled_queue = _scheduled_queue_name(queue_name)
|
||||
score = _now_seconds() + delay_seconds
|
||||
client.zadd(scheduled_queue, {task.to_json(): score})
|
||||
logger.info(
|
||||
"rq.queue.scheduled",
|
||||
extra={
|
||||
"task_type": task.task_type,
|
||||
"queue_name": queue_name,
|
||||
"delay_seconds": delay_seconds,
|
||||
},
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def enqueue_task(
|
||||
task: QueuedTask,
|
||||
queue_name: str,
|
||||
*,
|
||||
redis_url: str | None = None,
|
||||
) -> bool:
|
||||
"""Persist a task envelope in a Redis list-backed queue."""
|
||||
try:
|
||||
client = _redis_client(redis_url=redis_url)
|
||||
@@ -87,14 +168,22 @@ def dequeue_task(
|
||||
) -> QueuedTask | None:
|
||||
"""Pop one task envelope from the queue."""
|
||||
client = _redis_client(redis_url=redis_url)
|
||||
timeout = max(0.0, float(block_timeout))
|
||||
if block:
|
||||
raw = cast(tuple[bytes | str, bytes | str] | None, client.brpop(queue_name, timeout=block_timeout))
|
||||
next_delay = _drain_ready_scheduled_tasks(client, queue_name)
|
||||
if timeout == 0:
|
||||
timeout = next_delay if next_delay is not None else 0
|
||||
else:
|
||||
timeout = min(timeout, next_delay) if next_delay is not None else timeout
|
||||
raw = cast(tuple[bytes | str, bytes | str] | None, client.brpop(queue_name, timeout=timeout))
|
||||
if raw is None:
|
||||
_drain_ready_scheduled_tasks(client, queue_name)
|
||||
return None
|
||||
raw = raw[1]
|
||||
else:
|
||||
raw = cast(str | bytes | None, client.rpop(queue_name))
|
||||
if raw is None:
|
||||
_drain_ready_scheduled_tasks(client, queue_name)
|
||||
return None
|
||||
return _decode_task(raw, queue_name)
|
||||
|
||||
@@ -141,19 +230,32 @@ def requeue_if_failed(
|
||||
*,
|
||||
max_retries: int,
|
||||
redis_url: str | None = None,
|
||||
delay_seconds: float = 0,
|
||||
) -> bool:
|
||||
"""Requeue a failed task with capped retries.
|
||||
|
||||
Returns True if requeued.
|
||||
"""
|
||||
if task.attempts >= max_retries:
|
||||
requeued_task = _requeue_with_attempt(task)
|
||||
if requeued_task.attempts > max_retries:
|
||||
logger.warning(
|
||||
"rq.queue.drop_failed_task",
|
||||
extra={
|
||||
"task_type": task.task_type,
|
||||
"queue_name": queue_name,
|
||||
"attempts": task.attempts,
|
||||
"attempts": requeued_task.attempts,
|
||||
},
|
||||
)
|
||||
return False
|
||||
return enqueue_task(_requeue_with_attempt(task), queue_name, redis_url=redis_url)
|
||||
if delay_seconds > 0:
|
||||
return _schedule_for_later(
|
||||
requeued_task,
|
||||
queue_name,
|
||||
delay_seconds,
|
||||
redis_url=redis_url,
|
||||
)
|
||||
return enqueue_task(
|
||||
requeued_task,
|
||||
queue_name,
|
||||
redis_url=redis_url,
|
||||
)
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import random
|
||||
import time
|
||||
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
@@ -198,8 +199,13 @@ async def flush_webhook_delivery_queue(*, block: bool = False, block_timeout: fl
|
||||
"error": str(exc),
|
||||
},
|
||||
)
|
||||
requeue_if_failed(item)
|
||||
time.sleep(settings.rq_dispatch_throttle_seconds)
|
||||
delay = min(
|
||||
settings.rq_dispatch_retry_base_seconds * (2 ** max(0, item.attempts)),
|
||||
settings.rq_dispatch_retry_max_seconds,
|
||||
)
|
||||
jitter = random.uniform(0, min(settings.rq_dispatch_retry_max_seconds / 10, delay * 0.1))
|
||||
requeue_if_failed(item, delay_seconds=delay + jitter)
|
||||
await asyncio.sleep(settings.rq_dispatch_throttle_seconds)
|
||||
if processed > 0:
|
||||
logger.info("webhook.dispatch.batch_complete", extra={"count": processed})
|
||||
return processed
|
||||
|
||||
@@ -119,7 +119,11 @@ def dequeue_webhook_delivery(
|
||||
raise
|
||||
|
||||
|
||||
def requeue_if_failed(payload: QueuedInboundDelivery) -> bool:
|
||||
def requeue_if_failed(
|
||||
payload: QueuedInboundDelivery,
|
||||
*,
|
||||
delay_seconds: float = 0,
|
||||
) -> bool:
|
||||
"""Requeue payload delivery with capped retries.
|
||||
|
||||
Returns True if requeued.
|
||||
@@ -130,6 +134,7 @@ def requeue_if_failed(payload: QueuedInboundDelivery) -> bool:
|
||||
settings.rq_queue_name,
|
||||
max_retries=settings.rq_dispatch_max_retries,
|
||||
redis_url=settings.rq_redis_url,
|
||||
delay_seconds=delay_seconds,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
|
||||
Reference in New Issue
Block a user