diff options
author | Tristan Cacqueray <tdecacqu@redhat.com> | 2018-12-04 08:44:28 +0000 |
---|---|---|
committer | Tobias Henkel <tobias.henkel@bmw.de> | 2018-12-05 10:30:20 +0100 |
commit | 6fe861f42a22eb9e8617ce2faebb76ae6aee4552 (patch) | |
tree | 9f9737f8cb0c015c4b6571cca543eeacd1bcb480 | |
parent | 1b5d416f36dc027474dcc3eccfd5665e30884cc7 (diff) |
Set type for error'ed instances
When a server creation fails but has an external id we create a new
znode to offload the deletion of that node. This currently misses the
node type which will trigger an exception during node launch [1]. This
wedges the provider until the node deleter kicked in and deleted that
node successfully. Fix this by storing the node type in this znode.
[1] Exception
Traceback (most recent call last):
File "nodepool/driver/__init__.py", line 639, in run
self._runHandler()
File "nodepool/driver/__init__.py", line 563, in _runHandler
self._waitForNodeSet()
File "nodepool/driver/__init__.py", line 463, in _waitForNodeSet
if not self.hasRemainingQuota(ntype):
File "nodepool/driver/openstack/handler.py", line 314, in hasRemainingQuota
self.manager.estimatedNodepoolQuotaUsed())
File "nodepool/driver/openstack/provider.py", line 164, in estimatedNodepoolQuotaUsed
if node.type[0] not in provider_pool.labels:
IndexError: list index out of range
Change-Id: I67b269069dddb8349959802d7b1ee049a826d0c5
Co-authored-by: Tobias Henkel <tobias.henkel@bmw.de>
Notes
Notes (review):
Code-Review+2: James E. Blair <corvus@inaugust.com>
Code-Review+2: David Shrewsbury <shrewsbury.dave@gmail.com>
Workflow+1: David Shrewsbury <shrewsbury.dave@gmail.com>
Verified+2: Zuul
Submitted-by: Zuul
Submitted-at: Wed, 05 Dec 2018 16:14:50 +0000
Reviewed-on: https://review.openstack.org/622101
Project: openstack-infra/nodepool
Branch: refs/heads/master
-rw-r--r-- | nodepool/driver/fake/provider.py | 5 | ||||
-rw-r--r-- | nodepool/driver/openstack/handler.py | 1 | ||||
-rw-r--r-- | nodepool/tests/unit/test_launcher.py | 36 |
3 files changed, 42 insertions, 0 deletions
diff --git a/nodepool/driver/fake/provider.py b/nodepool/driver/fake/provider.py index 6e6fdcd..9a52c87 100644 --- a/nodepool/driver/fake/provider.py +++ b/nodepool/driver/fake/provider.py | |||
@@ -24,6 +24,7 @@ import openstack.exceptions | |||
24 | from nodepool import exceptions | 24 | from nodepool import exceptions |
25 | from nodepool.driver.openstack.provider import OpenStackProvider | 25 | from nodepool.driver.openstack.provider import OpenStackProvider |
26 | from nodepool.driver.fake.handler import FakeNodeRequestHandler | 26 | from nodepool.driver.fake.handler import FakeNodeRequestHandler |
27 | from openstack.cloud.exc import OpenStackCloudCreateException | ||
27 | 28 | ||
28 | 29 | ||
29 | class Dummy(object): | 30 | class Dummy(object): |
@@ -340,6 +341,7 @@ class FakeProvider(OpenStackProvider): | |||
340 | 341 | ||
341 | def __init__(self, provider, use_taskmanager): | 342 | def __init__(self, provider, use_taskmanager): |
342 | self.createServer_fails = 0 | 343 | self.createServer_fails = 0 |
344 | self.createServer_fails_with_external_id = 0 | ||
343 | self.__client = FakeProvider.fake_cloud() | 345 | self.__client = FakeProvider.fake_cloud() |
344 | super(FakeProvider, self).__init__(provider, use_taskmanager) | 346 | super(FakeProvider, self).__init__(provider, use_taskmanager) |
345 | 347 | ||
@@ -350,6 +352,9 @@ class FakeProvider(OpenStackProvider): | |||
350 | while self.createServer_fails: | 352 | while self.createServer_fails: |
351 | self.createServer_fails -= 1 | 353 | self.createServer_fails -= 1 |
352 | raise Exception("Expected createServer exception") | 354 | raise Exception("Expected createServer exception") |
355 | while self.createServer_fails_with_external_id: | ||
356 | self.createServer_fails_with_external_id -= 1 | ||
357 | raise OpenStackCloudCreateException('server', 'fakeid') | ||
353 | return super(FakeProvider, self).createServer(*args, **kwargs) | 358 | return super(FakeProvider, self).createServer(*args, **kwargs) |
354 | 359 | ||
355 | def getRequestHandler(self, poolworker, request): | 360 | def getRequestHandler(self, poolworker, request): |
diff --git a/nodepool/driver/openstack/handler.py b/nodepool/driver/openstack/handler.py index 3a8247f..f1e82dd 100644 --- a/nodepool/driver/openstack/handler.py +++ b/nodepool/driver/openstack/handler.py | |||
@@ -254,6 +254,7 @@ class OpenStackNodeLauncher(NodeLauncher): | |||
254 | deleting_node = zk.Node() | 254 | deleting_node = zk.Node() |
255 | deleting_node.provider = self.node.provider | 255 | deleting_node.provider = self.node.provider |
256 | deleting_node.pool = self.node.pool | 256 | deleting_node.pool = self.node.pool |
257 | deleting_node.type = self.node.type | ||
257 | deleting_node.external_id = self.node.external_id | 258 | deleting_node.external_id = self.node.external_id |
258 | deleting_node.state = zk.DELETING | 259 | deleting_node.state = zk.DELETING |
259 | self.zk.storeNode(deleting_node) | 260 | self.zk.storeNode(deleting_node) |
diff --git a/nodepool/tests/unit/test_launcher.py b/nodepool/tests/unit/test_launcher.py index 49bb688..3422b10 100644 --- a/nodepool/tests/unit/test_launcher.py +++ b/nodepool/tests/unit/test_launcher.py | |||
@@ -729,6 +729,42 @@ class TestLauncher(tests.DBTestCase): | |||
729 | # retries in config is set to 2, so 2 attempts to create a server | 729 | # retries in config is set to 2, so 2 attempts to create a server |
730 | self.assertEqual(0, manager.createServer_fails) | 730 | self.assertEqual(0, manager.createServer_fails) |
731 | 731 | ||
732 | def test_node_launch_retries_with_external_id(self): | ||
733 | configfile = self.setup_config('node_launch_retry.yaml') | ||
734 | pool = self.useNodepool(configfile, watermark_sleep=1) | ||
735 | self.useBuilder(configfile) | ||
736 | pool.start() | ||
737 | self.wait_for_config(pool) | ||
738 | manager = pool.getProviderManager('fake-provider') | ||
739 | manager.createServer_fails_with_external_id = 2 | ||
740 | self.waitForImage('fake-provider', 'fake-image') | ||
741 | |||
742 | # Stop the DeletedNodeWorker so we can make sure the fake znode that | ||
743 | # is used to delete the failed servers is still around when requesting. | ||
744 | # the second node. | ||
745 | pool._delete_thread.stop() | ||
746 | time.sleep(1) | ||
747 | |||
748 | req = zk.NodeRequest() | ||
749 | req.state = zk.REQUESTED | ||
750 | req.node_types.append('fake-label') | ||
751 | self.zk.storeNodeRequest(req) | ||
752 | |||
753 | req = self.waitForNodeRequest(req) | ||
754 | self.assertEqual(req.state, zk.FAILED) | ||
755 | |||
756 | # retries in config is set to 2, so 2 attempts to create a server | ||
757 | self.assertEqual(0, manager.createServer_fails_with_external_id) | ||
758 | |||
759 | # Request another node to check if nothing is wedged | ||
760 | req = zk.NodeRequest() | ||
761 | req.state = zk.REQUESTED | ||
762 | req.node_types.append('fake-label') | ||
763 | self.zk.storeNodeRequest(req) | ||
764 | |||
765 | req = self.waitForNodeRequest(req) | ||
766 | self.assertEqual(req.state, zk.FULFILLED) | ||
767 | |||
732 | def test_node_delete_failure(self): | 768 | def test_node_delete_failure(self): |
733 | def fail_delete(self, name): | 769 | def fail_delete(self, name): |
734 | raise RuntimeError('Fake Error') | 770 | raise RuntimeError('Fake Error') |