scheduler: now operates with chunks of jobs (#1355)

* scheduler: now operates with chunks of jobs

* scheduler: set default chunk_size for ScheduledJobRegistry.get_jobs_to_schedule

* scheduler: fixed missing indent

* scheduler: added test for get_jobs_to_schedule() with chunk_size parameter

* scheduler: fixed test for passing python 3.5 (no f-strings)

* scheduler: fixed chunk_size in test make it lighter to run
main
Nikita Romaniuk 4 years ago committed by GitHub
parent 9adcd7e50c
commit 2da957a68d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -286,11 +286,11 @@ class ScheduledJobRegistry(BaseRegistry):
score = timestamp if timestamp is not None else current_timestamp() score = timestamp if timestamp is not None else current_timestamp()
return connection.zremrangebyscore(self.key, 0, score) return connection.zremrangebyscore(self.key, 0, score)
def get_jobs_to_schedule(self, timestamp=None): def get_jobs_to_schedule(self, timestamp=None, chunk_size=1000):
"""Remove jobs whose timestamp is in the past from registry.""" """Remove jobs whose timestamp is in the past from registry."""
score = timestamp if timestamp is not None else current_timestamp() score = timestamp if timestamp is not None else current_timestamp()
return [as_text(job_id) for job_id in return [as_text(job_id) for job_id in
self.connection.zrangebyscore(self.key, 0, score)] self.connection.zrangebyscore(self.key, 0, score, start=0, num=chunk_size)]
def get_scheduled_time(self, job_or_id): def get_scheduled_time(self, job_or_id):
"""Returns datetime (UTC) at which job is scheduled to be enqueued""" """Returns datetime (UTC) at which job is scheduled to be enqueued"""

@ -28,7 +28,6 @@ setup_loghandlers(
class RQScheduler(object): class RQScheduler(object):
# STARTED: scheduler has been started but sleeping # STARTED: scheduler has been started but sleeping
# WORKING: scheduler is in the midst of scheduling jobs # WORKING: scheduler is in the midst of scheduling jobs
# STOPPED: scheduler is in stopped condition # STOPPED: scheduler is in stopped condition
@ -137,11 +136,11 @@ class RQScheduler(object):
queue = Queue(registry.name, connection=self.connection) queue = Queue(registry.name, connection=self.connection)
with self.connection.pipeline() as pipeline: with self.connection.pipeline() as pipeline:
# This should be done in bulk jobs = Job.fetch_many(job_ids, connection=self.connection)
for job_id in job_ids: for job in jobs:
job = Job.fetch(job_id, connection=self.connection) if job is not None:
queue.enqueue_job(job, pipeline=pipeline) queue.enqueue_job(job, pipeline=pipeline)
registry.remove_jobs(timestamp) registry.remove(job, pipeline=pipeline)
pipeline.execute() pipeline.execute()
self._status = self.Status.STARTED self._status = self.Status.STARTED

@ -35,6 +35,21 @@ class TestScheduledJobRegistry(RQTestCase):
self.assertEqual(registry.get_jobs_to_enqueue(timestamp + 20), self.assertEqual(registry.get_jobs_to_enqueue(timestamp + 20),
['foo', 'bar']) ['foo', 'bar'])
def test_get_jobs_to_schedule_with_chunk_size(self):
"""Max amount of jobs returns by get_jobs_to_schedule() equal to chunk_size"""
queue = Queue(connection=self.testconn)
registry = ScheduledJobRegistry(queue=queue)
timestamp = current_timestamp()
chunk_size = 5
for index in range(0, chunk_size * 2):
self.testconn.zadd(registry.key, {'foo_{}'.format(index): 1})
self.assertEqual(len(registry.get_jobs_to_schedule(timestamp, chunk_size)),
chunk_size)
self.assertEqual(len(registry.get_jobs_to_schedule(timestamp, chunk_size * 2)),
chunk_size * 2)
def test_get_scheduled_time(self): def test_get_scheduled_time(self):
"""get_scheduled_time() returns job's scheduled datetime""" """get_scheduled_time() returns job's scheduled datetime"""
queue = Queue(connection=self.testconn) queue = Queue(connection=self.testconn)
@ -102,7 +117,6 @@ class TestScheduledJobRegistry(RQTestCase):
self.assertEqual(self.testconn.zscore(registry.key, job.id), self.assertEqual(self.testconn.zscore(registry.key, job.id),
1546300800 + 14400) # 2019-01-01 UTC in Unix timestamp 1546300800 + 14400) # 2019-01-01 UTC in Unix timestamp
# Score is always stored in UTC even if datetime is in a different tz # Score is always stored in UTC even if datetime is in a different tz
tz = timezone(timedelta(hours=7)) tz = timezone(timedelta(hours=7))
job = Job.create('myfunc', connection=self.testconn) job = Job.create('myfunc', connection=self.testconn)

Loading…
Cancel
Save