Fix paused handler exception handling
If a request handler is paused and an exception is thrown during normal events, that exception handling code was not properly unpausing the request and removing it from the list of active handlers. Change-Id: I0cfd8294f9ce88e5918654a4847776c981ee4fb3
This commit is contained in:
parent
f3f7e41f21
commit
58ccd2c718
|
@ -496,16 +496,7 @@ class NodeRequestHandler(object, metaclass=abc.ABCMeta):
|
|||
self.log.debug("Declining node request %s because %s",
|
||||
self.request.id, ', '.join(declined_reasons))
|
||||
self.decline_request()
|
||||
self.unlockNodeSet(clear_allocation=True)
|
||||
|
||||
# If conditions have changed for a paused request to now cause us
|
||||
# to decline it, we need to unpause so we don't keep trying it
|
||||
if self.paused:
|
||||
self.paused = False
|
||||
|
||||
self.zk.storeNodeRequest(self.request)
|
||||
self.zk.unlockNodeRequest(self.request)
|
||||
self.done = True
|
||||
self._declinedHandlerCleanup()
|
||||
return
|
||||
|
||||
if self.paused:
|
||||
|
@ -517,6 +508,27 @@ class NodeRequestHandler(object, metaclass=abc.ABCMeta):
|
|||
|
||||
self._waitForNodeSet()
|
||||
|
||||
def _declinedHandlerCleanup(self):
|
||||
"""
|
||||
After declining a request, do necessary cleanup actions.
|
||||
"""
|
||||
self.unlockNodeSet(clear_allocation=True)
|
||||
|
||||
# If conditions have changed for a paused request to now cause us
|
||||
# to decline it, we need to unpause so we don't keep trying it
|
||||
if self.paused:
|
||||
self.paused = False
|
||||
|
||||
try:
|
||||
self.zk.storeNodeRequest(self.request)
|
||||
self.zk.unlockNodeRequest(self.request)
|
||||
except Exception:
|
||||
# If the request is gone for some reason, we need to make
|
||||
# sure that self.done still gets set.
|
||||
self.log.exception("Unable to modify missing request %s",
|
||||
self.request.id)
|
||||
self.done = True
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Public methods
|
||||
# ---------------------------------------------------------------
|
||||
|
@ -576,16 +588,7 @@ class NodeRequestHandler(object, metaclass=abc.ABCMeta):
|
|||
"Declining node request %s due to exception in "
|
||||
"NodeRequestHandler:", self.request.id)
|
||||
self.decline_request()
|
||||
self.unlockNodeSet(clear_allocation=True)
|
||||
try:
|
||||
self.zk.storeNodeRequest(self.request)
|
||||
self.zk.unlockNodeRequest(self.request)
|
||||
except Exception:
|
||||
# If the request is gone for some reason, we need to make
|
||||
# sure that self.done still gets set.
|
||||
self.log.exception("Unable to decline missing request %s",
|
||||
self.request.id)
|
||||
self.done = True
|
||||
self._declinedHandlerCleanup()
|
||||
|
||||
def poll(self):
|
||||
if self.paused:
|
||||
|
|
|
@ -1370,3 +1370,54 @@ class TestLauncher(tests.DBTestCase):
|
|||
self.assertEqual(1, mock_poll.call_count)
|
||||
self.assertEqual(0, len(
|
||||
pool._pool_threads["fake-provider-main"].request_handlers))
|
||||
|
||||
def test_exception_causing_decline_of_paused_request(self):
|
||||
"""
|
||||
Test that a paused request, that later gets declined because of
|
||||
an exception (say, thrown from a provider operation), unpauses
|
||||
and removes the request handler.
|
||||
"""
|
||||
|
||||
# First config has max-servers set to 2
|
||||
configfile = self.setup_config('pause_declined_1.yaml')
|
||||
self.useBuilder(configfile)
|
||||
self.waitForImage('fake-provider', 'fake-image')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
pool.start()
|
||||
|
||||
# Create a request that uses all capacity (2 servers)
|
||||
req = zk.NodeRequest()
|
||||
req.state = zk.REQUESTED
|
||||
req.node_types.append('fake-label')
|
||||
req.node_types.append('fake-label')
|
||||
self.zk.storeNodeRequest(req)
|
||||
req = self.waitForNodeRequest(req)
|
||||
self.assertEqual(req.state, zk.FULFILLED)
|
||||
self.assertEqual(len(req.nodes), 2)
|
||||
|
||||
# Now that we have 2 nodes in use, create another request that
|
||||
# requests two nodes, which should cause the request to pause.
|
||||
req2 = zk.NodeRequest()
|
||||
req2.state = zk.REQUESTED
|
||||
req2.node_types.append('fake-label')
|
||||
req2.node_types.append('fake-label')
|
||||
self.zk.storeNodeRequest(req2)
|
||||
req2 = self.waitForNodeRequest(req2, (zk.PENDING,))
|
||||
|
||||
# Force an exception within the run handler.
|
||||
pool_worker = pool.getPoolWorkers('fake-provider')
|
||||
while not pool_worker[0].paused_handler:
|
||||
time.sleep(0.1)
|
||||
pool_worker[0].paused_handler.hasProviderQuota = mock.Mock(
|
||||
side_effect=Exception('mock exception')
|
||||
)
|
||||
|
||||
# The above exception should cause us to fail the paused request.
|
||||
req2 = self.waitForNodeRequest(req2, (zk.FAILED,))
|
||||
self.assertNotEqual(req2.declined_by, [])
|
||||
|
||||
# The exception handling should make sure that we unpause AND remove
|
||||
# the request handler.
|
||||
while pool_worker[0].paused_handler:
|
||||
time.sleep(0.1)
|
||||
self.assertEqual(0, len(pool_worker[0].request_handlers))
|
||||
|
|
Loading…
Reference in New Issue