Enhance debug logs (#1752)

* Add debug logs to dequeue

There were reports where a worker wouldn't fetch new jobs.
Since one possible cause is the infinite hanging of the `BLPOP` command,
this adds a couple of logs surrounding the `dequeue_any` function,
that interacts with Redis using `BLPOP`,

* Update worker.py

Fix Typo

* More logs

* Logs

* Update worker.py

* Add debug logs to the queue

* Reviewed logs

* Remove queue count before job push

* New log wording

* Remove logs
main
lowercase00 2 years ago committed by GitHub
parent 0f5f18bec7
commit 3be814c6d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2,6 +2,7 @@ import uuid
import sys import sys
import warnings import warnings
import typing as t import typing as t
import logging
from collections import namedtuple from collections import namedtuple
from datetime import datetime, timezone from datetime import datetime, timezone
@ -17,7 +18,14 @@ from .defaults import DEFAULT_RESULT_TTL
from .exceptions import DequeueTimeout, NoSuchJobError from .exceptions import DequeueTimeout, NoSuchJobError
from .job import Job, JobStatus from .job import Job, JobStatus
from .serializers import resolve_serializer from .serializers import resolve_serializer
from .utils import backend_class, get_version, import_attribute, parse_timeout, utcnow from .utils import backend_class, get_version, import_attribute, make_colorizer, parse_timeout, utcnow
green = make_colorizer('darkgreen')
yellow = make_colorizer('darkyellow')
blue = make_colorizer('darkblue')
logger = logging.getLogger("rq.queue")
def compact(lst): def compact(lst):
@ -78,6 +86,7 @@ class Queue:
self._key = '{0}{1}'.format(prefix, name) self._key = '{0}{1}'.format(prefix, name)
self._default_timeout = parse_timeout(default_timeout) or self.DEFAULT_TIMEOUT self._default_timeout = parse_timeout(default_timeout) or self.DEFAULT_TIMEOUT
self._is_async = is_async self._is_async = is_async
self.log = logger
if 'async' in kwargs: if 'async' in kwargs:
self._is_async = kwargs['async'] self._is_async = kwargs['async']
@ -204,8 +213,13 @@ class Queue:
end = offset + (length - 1) end = offset + (length - 1)
else: else:
end = length end = length
return [as_text(job_id) for job_id in job_ids = [
self.connection.lrange(self.key, start, end)] as_text(job_id)
for job_id
in self.connection.lrange(self.key, start, end)
]
self.log.debug(f"Getting jobs for queue {green(self.name)}: {len(job_ids)} found.")
return job_ids
def get_jobs(self, offset: int = 0, length: int = -1): def get_jobs(self, offset: int = 0, length: int = -1):
"""Returns a slice of jobs in the queue.""" """Returns a slice of jobs in the queue."""
@ -293,9 +307,10 @@ class Queue:
'at_front' allows you to push the job onto the front instead of the back of the queue""" 'at_front' allows you to push the job onto the front instead of the back of the queue"""
connection = pipeline if pipeline is not None else self.connection connection = pipeline if pipeline is not None else self.connection
if at_front: if at_front:
connection.lpush(self.key, job_id) result = connection.lpush(self.key, job_id)
else: else:
connection.rpush(self.key, job_id) result = connection.rpush(self.key, job_id)
self.log.debug(f"Pushed job {blue(job_id)} into {green(self.name)}, {result} job(s) are in queue.")
def create_job(self, func: t.Callable[..., t.Any], args=None, kwargs=None, timeout=None, def create_job(self, func: t.Callable[..., t.Any], args=None, kwargs=None, timeout=None,
result_ttl=None, ttl=None, failure_ttl=None, result_ttl=None, ttl=None, failure_ttl=None,
@ -682,7 +697,7 @@ class Queue:
return as_text(self.connection.lpop(self.key)) return as_text(self.connection.lpop(self.key))
@classmethod @classmethod
def lpop(cls, queue_keys, timeout, connection: t.Optional['Redis'] = None): def lpop(cls, queue_keys, timeout: int, connection: t.Optional['Redis'] = None):
"""Helper method. Intermediate method to abstract away from some """Helper method. Intermediate method to abstract away from some
Redis API details, where LPOP accepts only a single key, whereas BLPOP Redis API details, where LPOP accepts only a single key, whereas BLPOP
accepts multiple. So if we want the non-blocking LPOP, we need to accepts multiple. So if we want the non-blocking LPOP, we need to
@ -699,10 +714,13 @@ class Queue:
if timeout is not None: # blocking variant if timeout is not None: # blocking variant
if timeout == 0: if timeout == 0:
raise ValueError('RQ does not support indefinite timeouts. Please pick a timeout value > 0') raise ValueError('RQ does not support indefinite timeouts. Please pick a timeout value > 0')
logger.debug(f"Starting BLPOP operation for queues {green(queue_keys)} with timeout of {timeout}")
result = connection.blpop(queue_keys, timeout) result = connection.blpop(queue_keys, timeout)
if result is None: if result is None:
logger.debug(f"BLPOP Timeout, no jobs found on queues {green(queue_keys)}")
raise DequeueTimeout(timeout, queue_keys) raise DequeueTimeout(timeout, queue_keys)
queue_key, job_id = result queue_key, job_id = result
logger.debug(f"Dequeued job {blue(job_id)} from queue {green(queue_key)}")
return queue_key, job_id return queue_key, job_id
else: # non-blocking variant else: # non-blocking variant
for queue_key in queue_keys: for queue_key in queue_keys:

@ -60,7 +60,7 @@ green = make_colorizer('darkgreen')
yellow = make_colorizer('darkyellow') yellow = make_colorizer('darkyellow')
blue = make_colorizer('darkblue') blue = make_colorizer('darkblue')
logger = logging.getLogger(__name__) logger = logging.getLogger("rq.worker")
class StopRequested(Exception): class StopRequested(Exception):
@ -690,10 +690,12 @@ class Worker:
if self.should_run_maintenance_tasks: if self.should_run_maintenance_tasks:
self.run_maintenance_tasks() self.run_maintenance_tasks()
self.log.debug(f"Dequeueing jobs on queues {self._ordered_queues} and timeout {timeout}")
result = self.queue_class.dequeue_any(self._ordered_queues, timeout, result = self.queue_class.dequeue_any(self._ordered_queues, timeout,
connection=self.connection, connection=self.connection,
job_class=self.job_class, job_class=self.job_class,
serializer=self.serializer) serializer=self.serializer)
self.log.debug(f"Dequeued job {result[1]} from {result[0]}")
if result is not None: if result is not None:
job, queue = result job, queue = result
@ -946,7 +948,7 @@ class Worker:
"""Performs misc bookkeeping like updating states prior to """Performs misc bookkeeping like updating states prior to
job execution. job execution.
""" """
self.log.debug(f"Preparing for execution of Job ID {job.id}")
with self.connection.pipeline() as pipeline: with self.connection.pipeline() as pipeline:
self.set_current_job_id(job.id, pipeline=pipeline) self.set_current_job_id(job.id, pipeline=pipeline)
self.set_current_job_working_time(0, pipeline=pipeline) self.set_current_job_working_time(0, pipeline=pipeline)
@ -957,6 +959,7 @@ class Worker:
job.prepare_for_execution(self.name, pipeline=pipeline) job.prepare_for_execution(self.name, pipeline=pipeline)
pipeline.execute() pipeline.execute()
self.log.debug(f"Job preparation finished.")
msg = 'Processing {0} from {1} since {2}' msg = 'Processing {0} from {1} since {2}'
self.procline(msg.format(job.func_name, job.origin, time.time())) self.procline(msg.format(job.func_name, job.origin, time.time()))
@ -1084,12 +1087,14 @@ class Worker:
def execute_success_callback(self, job: 'Job', result): def execute_success_callback(self, job: 'Job', result):
"""Executes success_callback with timeout""" """Executes success_callback with timeout"""
self.log.debug(f"Running success callbacks for {job.id}")
job.heartbeat(utcnow(), CALLBACK_TIMEOUT) job.heartbeat(utcnow(), CALLBACK_TIMEOUT)
with self.death_penalty_class(CALLBACK_TIMEOUT, JobTimeoutException, job_id=job.id): with self.death_penalty_class(CALLBACK_TIMEOUT, JobTimeoutException, job_id=job.id):
job.success_callback(job, self.connection, result) job.success_callback(job, self.connection, result)
def execute_failure_callback(self, job): def execute_failure_callback(self, job):
"""Executes failure_callback with timeout""" """Executes failure_callback with timeout"""
self.log.debug(f"Running failure callbacks for {job.id}")
job.heartbeat(utcnow(), CALLBACK_TIMEOUT) job.heartbeat(utcnow(), CALLBACK_TIMEOUT)
with self.death_penalty_class(CALLBACK_TIMEOUT, JobTimeoutException, job_id=job.id): with self.death_penalty_class(CALLBACK_TIMEOUT, JobTimeoutException, job_id=job.id):
job.failure_callback(job, self.connection, *sys.exc_info()) job.failure_callback(job, self.connection, *sys.exc_info())
@ -1101,6 +1106,7 @@ class Worker:
push_connection(self.connection) push_connection(self.connection)
started_job_registry = queue.started_job_registry started_job_registry = queue.started_job_registry
self.log.debug("Started Job Registry set.")
try: try:
self.prepare_job_execution(job) self.prepare_job_execution(job)
@ -1108,7 +1114,9 @@ class Worker:
job.started_at = utcnow() job.started_at = utcnow()
timeout = job.timeout or self.queue_class.DEFAULT_TIMEOUT timeout = job.timeout or self.queue_class.DEFAULT_TIMEOUT
with self.death_penalty_class(timeout, JobTimeoutException, job_id=job.id): with self.death_penalty_class(timeout, JobTimeoutException, job_id=job.id):
self.log.debug("Performing Job...")
rv = job.perform() rv = job.perform()
self.log.debug(f"Finished performing Job ID {job.id}")
job.ended_at = utcnow() job.ended_at = utcnow()
@ -1123,6 +1131,7 @@ class Worker:
queue=queue, queue=queue,
started_job_registry=started_job_registry) started_job_registry=started_job_registry)
except: # NOQA except: # NOQA
self.log.debug(f"Job {job.id} raised an exception.")
job.ended_at = utcnow() job.ended_at = utcnow()
exc_info = sys.exc_info() exc_info = sys.exc_info()
exc_string = ''.join(traceback.format_exception(*exc_info)) exc_string = ''.join(traceback.format_exception(*exc_info))
@ -1164,7 +1173,7 @@ class Worker:
def handle_exception(self, job: 'Job', *exc_info): def handle_exception(self, job: 'Job', *exc_info):
"""Walks the exception handler stack to delegate exception handling.""" """Walks the exception handler stack to delegate exception handling."""
self.log.debug(f"Handling exception for {job.id}.")
exc_string = ''.join(traceback.format_exception(*exc_info)) exc_string = ''.join(traceback.format_exception(*exc_info))
# If the job cannot be deserialized, it will raise when func_name or # If the job cannot be deserialized, it will raise when func_name or

Loading…
Cancel
Save