From e750134e8aa8ee6735666df292767e8a6940e04f Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 14 Oct 2015 19:03:59 +0100 Subject: [PATCH 1/4] move request_(force_)stop out of _install_signal_handlers --- rq/worker.py | 82 ++++++++++++++++++++++++++-------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/rq/worker.py b/rq/worker.py index 64efdba..ab3a39a 100644 --- a/rq/worker.py +++ b/rq/worker.py @@ -323,47 +323,47 @@ class Worker(object): gracefully. """ - def request_force_stop(signum, frame): - """Terminates the application (cold shutdown). - """ - self.log.warning('Cold shut down') - - # Take down the horse with the worker - if self.horse_pid: - msg = 'Taking down horse {0} with me'.format(self.horse_pid) - self.log.debug(msg) - try: - os.kill(self.horse_pid, signal.SIGKILL) - except OSError as e: - # ESRCH ("No such process") is fine with us - if e.errno != errno.ESRCH: - self.log.debug('Horse already down') - raise - raise SystemExit() - - def request_stop(signum, frame): - """Stops the current worker loop but waits for child processes to - end gracefully (warm shutdown). - """ - self.log.debug('Got signal {0}'.format(signal_name(signum))) - - signal.signal(signal.SIGINT, request_force_stop) - signal.signal(signal.SIGTERM, request_force_stop) - - msg = 'Warm shut down requested' - self.log.warning(msg) - - # If shutdown is requested in the middle of a job, wait until - # finish before shutting down - if self.get_state() == 'busy': - self._stop_requested = True - self.log.debug('Stopping after current horse is finished. ' - 'Press Ctrl+C again for a cold shutdown.') - else: - raise StopRequested() - - signal.signal(signal.SIGINT, request_stop) - signal.signal(signal.SIGTERM, request_stop) + signal.signal(signal.SIGINT, self.request_stop) + signal.signal(signal.SIGTERM, self.request_stop) + + def request_force_stop(self, signum, frame): + """Terminates the application (cold shutdown). + """ + self.log.warning('Cold shut down') + + # Take down the horse with the worker + if self.horse_pid: + msg = 'Taking down horse {0} with me'.format(self.horse_pid) + self.log.debug(msg) + try: + os.kill(self.horse_pid, signal.SIGKILL) + except OSError as e: + # ESRCH ("No such process") is fine with us + if e.errno != errno.ESRCH: + self.log.debug('Horse already down') + raise + raise SystemExit() + + def request_stop(self, signum, frame): + """Stops the current worker loop but waits for child processes to + end gracefully (warm shutdown). + """ + self.log.debug('Got signal {0}'.format(signal_name(signum))) + + signal.signal(signal.SIGINT, self.request_force_stop) + signal.signal(signal.SIGTERM, self.request_force_stop) + + msg = 'Warm shut down requested' + self.log.warning(msg) + + # If shutdown is requested in the middle of a job, wait until + # finish before shutting down + if self.get_state() == 'busy': + self._stop_requested = True + self.log.debug('Stopping after current horse is finished. ' + 'Press Ctrl+C again for a cold shutdown.') + else: + raise StopRequested() def check_for_suspension(self, burst): """Check to see if workers have been suspended by `rq suspend`""" From aada162a4d4ce461cf8376811be62ca7db9ec785 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 14 Oct 2015 19:48:57 +0100 Subject: [PATCH 2/4] worker shutdown tests --- tests/test_worker.py | 68 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/tests/test_worker.py b/tests/test_worker.py index 40b84b4..f3032ea 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -5,10 +5,13 @@ from __future__ import (absolute_import, division, print_function, import os from datetime import timedelta from time import sleep +import signal +import time +from multiprocessing import Process from tests import RQTestCase, slow from tests.fixtures import (create_file, create_file_after_timeout, - div_by_zero, do_nothing, say_hello, say_pid) + div_by_zero, do_nothing, say_hello, say_pid, long_running_job) from tests.helpers import strip_microseconds from rq import get_failed_queue, Queue, SimpleWorker, Worker @@ -468,3 +471,66 @@ class TestWorker(RQTestCase): worker = Worker(queue, connection=self.testconn) worker.work(burst=True) self.assertEqual(self.testconn.zcard(registry.key), 0) + + +def kill_worker(pid, double_kill): + # wait for the worker to be started over on the main process + time.sleep(0.5) + os.kill(pid, signal.SIGTERM) + if double_kill: + # give the worker time to switch signal handler + time.sleep(0.5) + os.kill(pid, signal.SIGTERM) + + +class TestWorkerShutdown(RQTestCase): + def setUp(self): + # we want tests to fail if signal are ignored and the work remain running, + # so set a signal to kill them after 5 seconds + signal.signal(signal.SIGALRM, self._timeout) + signal.alarm(5) + + def _timeout(self, signal, frame): + raise AssertionError("test still running after 5 seconds, " + "likely the worker wasn't shutdown correctly") + + @slow + def test_idle_worker_warm_shutdown(self): + w = Worker('foo') + self.assertFalse(w._stop_requested) + p = Process(target=kill_worker, args=(os.getpid(), False)) + p.start() + + w.work() + + p.join(1) + self.assertFalse(w._stop_requested) + + @slow + def test_working_worker_warm_shutdown(self): + fooq = Queue('foo') + w = Worker(fooq) + fooq.enqueue(long_running_job, 2) + self.assertFalse(w._stop_requested) + p = Process(target=kill_worker, args=(os.getpid(), False)) + p.start() + + w.work() + + p.join(1) + self.assertTrue(w._stop_requested) + + @slow + def test_working_worker_cold_shutdown(self): + fooq = Queue('foo') + w = Worker(fooq) + fooq.enqueue(long_running_job, 10) + self.assertFalse(w._stop_requested) + p = Process(target=kill_worker, args=(os.getpid(), True)) + p.start() + + self.assertRaises(SystemExit, w.work) + + p.join(1) + self.assertTrue(w._stop_requested) + From fa48751fedbfeb5b1b6010304960710d78874cbb Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 14 Oct 2015 20:08:55 +0100 Subject: [PATCH 3/4] correct SLOW env var, run slow tests on ci --- .travis.yml | 2 +- run_tests | 4 ++-- tests/__init__.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0a9a69e..2d1529d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,6 @@ install: - pip install coveralls --use-mirrors #- pip install pytest # installed by Travis by default already script: - - py.test --cov rq + - RUN_SLOW_TESTS_TOO=1 py.test --cov rq after_success: - coveralls diff --git a/run_tests b/run_tests index 50736cc..3b454d1 100755 --- a/run_tests +++ b/run_tests @@ -17,9 +17,9 @@ else safe_rg=cat fi -export ONLY_RUN_FAST_TESTS=1 +export RUN_SLOW_TESTS_TOO=1 if [ "$1" = '-f' ]; then # Poor man's argparse - unset ONLY_RUN_FAST_TESTS + unset RUN_SLOW_TESTS_TOO shift 1 fi diff --git a/tests/__init__.py b/tests/__init__.py index 93f11b3..7371a92 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -32,7 +32,7 @@ def slow(f): @wraps(f) def _inner(*args, **kwargs): - if os.environ.get('ONLY_RUN_FAST_TESTS'): + if os.environ.get('RUN_SLOW_TESTS_TOO'): f(*args, **kwargs) return _inner From 1349e90ba2094de6d42a0cc75dfaf36b1fa2cd4f Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 14 Oct 2015 21:22:36 +0100 Subject: [PATCH 4/4] test docstrings, and sentinel test --- tests/test_worker.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/test_worker.py b/tests/test_worker.py index f3032ea..4528ae1 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -11,7 +11,7 @@ from multiprocessing import Process from tests import RQTestCase, slow from tests.fixtures import (create_file, create_file_after_timeout, - div_by_zero, do_nothing, say_hello, say_pid, long_running_job) + div_by_zero, do_nothing, say_hello, say_pid) from tests.helpers import strip_microseconds from rq import get_failed_queue, Queue, SimpleWorker, Worker @@ -496,6 +496,7 @@ class TestWorkerShutdown(RQTestCase): @slow def test_idle_worker_warm_shutdown(self): + """worker with no ongoing job receiving single SIGTERM signal and shutting down""" w = Worker('foo') self.assertFalse(w._stop_requested) p = Process(target=kill_worker, args=(os.getpid(), False)) @@ -508,23 +509,29 @@ class TestWorkerShutdown(RQTestCase): @slow def test_working_worker_warm_shutdown(self): + """worker with an ongoing job receiving single SIGTERM signal, allowing job to finish then shutting down""" fooq = Queue('foo') w = Worker(fooq) - fooq.enqueue(long_running_job, 2) + + sentinel_file = '/tmp/.rq_sentinel_warm' + fooq.enqueue(create_file_after_timeout, sentinel_file, 2) self.assertFalse(w._stop_requested) p = Process(target=kill_worker, args=(os.getpid(), False)) p.start() w.work() - p.join(1) + p.join(2) self.assertTrue(w._stop_requested) + self.assertTrue(os.path.exists(sentinel_file)) @slow def test_working_worker_cold_shutdown(self): + """worker with an ongoing job receiving double SIGTERM signal and shutting down immediately""" fooq = Queue('foo') w = Worker(fooq) - fooq.enqueue(long_running_job, 10) + sentinel_file = '/tmp/.rq_sentinel_cold' + fooq.enqueue(create_file_after_timeout, sentinel_file, 2) self.assertFalse(w._stop_requested) p = Process(target=kill_worker, args=(os.getpid(), True)) p.start() @@ -533,4 +540,5 @@ class TestWorkerShutdown(RQTestCase): p.join(1) self.assertTrue(w._stop_requested) + self.assertFalse(os.path.exists(sentinel_file))