summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTristan Cacqueray <tdecacqu@redhat.com>2018-12-04 08:44:28 +0000
committerTobias Henkel <tobias.henkel@bmw.de>2018-12-05 10:30:20 +0100
commit6fe861f42a22eb9e8617ce2faebb76ae6aee4552 (patch)
tree9f9737f8cb0c015c4b6571cca543eeacd1bcb480
parent1b5d416f36dc027474dcc3eccfd5665e30884cc7 (diff)
Set type for error'ed instances
When a server creation fails but has an external id we create a new znode to offload the deletion of that node. This currently misses the node type which will trigger an exception during node launch [1]. This wedges the provider until the node deleter kicked in and deleted that node successfully. Fix this by storing the node type in this znode. [1] Exception Traceback (most recent call last): File "nodepool/driver/__init__.py", line 639, in run self._runHandler() File "nodepool/driver/__init__.py", line 563, in _runHandler self._waitForNodeSet() File "nodepool/driver/__init__.py", line 463, in _waitForNodeSet if not self.hasRemainingQuota(ntype): File "nodepool/driver/openstack/handler.py", line 314, in hasRemainingQuota self.manager.estimatedNodepoolQuotaUsed()) File "nodepool/driver/openstack/provider.py", line 164, in estimatedNodepoolQuotaUsed if node.type[0] not in provider_pool.labels: IndexError: list index out of range Change-Id: I67b269069dddb8349959802d7b1ee049a826d0c5 Co-authored-by: Tobias Henkel <tobias.henkel@bmw.de>
Notes
Notes (review): Code-Review+2: James E. Blair <corvus@inaugust.com> Code-Review+2: David Shrewsbury <shrewsbury.dave@gmail.com> Workflow+1: David Shrewsbury <shrewsbury.dave@gmail.com> Verified+2: Zuul Submitted-by: Zuul Submitted-at: Wed, 05 Dec 2018 16:14:50 +0000 Reviewed-on: https://review.openstack.org/622101 Project: openstack-infra/nodepool Branch: refs/heads/master
-rw-r--r--nodepool/driver/fake/provider.py5
-rw-r--r--nodepool/driver/openstack/handler.py1
-rw-r--r--nodepool/tests/unit/test_launcher.py36
3 files changed, 42 insertions, 0 deletions
diff --git a/nodepool/driver/fake/provider.py b/nodepool/driver/fake/provider.py
index 6e6fdcd..9a52c87 100644
--- a/nodepool/driver/fake/provider.py
+++ b/nodepool/driver/fake/provider.py
@@ -24,6 +24,7 @@ import openstack.exceptions
24from nodepool import exceptions 24from nodepool import exceptions
25from nodepool.driver.openstack.provider import OpenStackProvider 25from nodepool.driver.openstack.provider import OpenStackProvider
26from nodepool.driver.fake.handler import FakeNodeRequestHandler 26from nodepool.driver.fake.handler import FakeNodeRequestHandler
27from openstack.cloud.exc import OpenStackCloudCreateException
27 28
28 29
29class Dummy(object): 30class Dummy(object):
@@ -340,6 +341,7 @@ class FakeProvider(OpenStackProvider):
340 341
341 def __init__(self, provider, use_taskmanager): 342 def __init__(self, provider, use_taskmanager):
342 self.createServer_fails = 0 343 self.createServer_fails = 0
344 self.createServer_fails_with_external_id = 0
343 self.__client = FakeProvider.fake_cloud() 345 self.__client = FakeProvider.fake_cloud()
344 super(FakeProvider, self).__init__(provider, use_taskmanager) 346 super(FakeProvider, self).__init__(provider, use_taskmanager)
345 347
@@ -350,6 +352,9 @@ class FakeProvider(OpenStackProvider):
350 while self.createServer_fails: 352 while self.createServer_fails:
351 self.createServer_fails -= 1 353 self.createServer_fails -= 1
352 raise Exception("Expected createServer exception") 354 raise Exception("Expected createServer exception")
355 while self.createServer_fails_with_external_id:
356 self.createServer_fails_with_external_id -= 1
357 raise OpenStackCloudCreateException('server', 'fakeid')
353 return super(FakeProvider, self).createServer(*args, **kwargs) 358 return super(FakeProvider, self).createServer(*args, **kwargs)
354 359
355 def getRequestHandler(self, poolworker, request): 360 def getRequestHandler(self, poolworker, request):
diff --git a/nodepool/driver/openstack/handler.py b/nodepool/driver/openstack/handler.py
index 3a8247f..f1e82dd 100644
--- a/nodepool/driver/openstack/handler.py
+++ b/nodepool/driver/openstack/handler.py
@@ -254,6 +254,7 @@ class OpenStackNodeLauncher(NodeLauncher):
254 deleting_node = zk.Node() 254 deleting_node = zk.Node()
255 deleting_node.provider = self.node.provider 255 deleting_node.provider = self.node.provider
256 deleting_node.pool = self.node.pool 256 deleting_node.pool = self.node.pool
257 deleting_node.type = self.node.type
257 deleting_node.external_id = self.node.external_id 258 deleting_node.external_id = self.node.external_id
258 deleting_node.state = zk.DELETING 259 deleting_node.state = zk.DELETING
259 self.zk.storeNode(deleting_node) 260 self.zk.storeNode(deleting_node)
diff --git a/nodepool/tests/unit/test_launcher.py b/nodepool/tests/unit/test_launcher.py
index 49bb688..3422b10 100644
--- a/nodepool/tests/unit/test_launcher.py
+++ b/nodepool/tests/unit/test_launcher.py
@@ -729,6 +729,42 @@ class TestLauncher(tests.DBTestCase):
729 # retries in config is set to 2, so 2 attempts to create a server 729 # retries in config is set to 2, so 2 attempts to create a server
730 self.assertEqual(0, manager.createServer_fails) 730 self.assertEqual(0, manager.createServer_fails)
731 731
732 def test_node_launch_retries_with_external_id(self):
733 configfile = self.setup_config('node_launch_retry.yaml')
734 pool = self.useNodepool(configfile, watermark_sleep=1)
735 self.useBuilder(configfile)
736 pool.start()
737 self.wait_for_config(pool)
738 manager = pool.getProviderManager('fake-provider')
739 manager.createServer_fails_with_external_id = 2
740 self.waitForImage('fake-provider', 'fake-image')
741
742 # Stop the DeletedNodeWorker so we can make sure the fake znode that
743 # is used to delete the failed servers is still around when requesting.
744 # the second node.
745 pool._delete_thread.stop()
746 time.sleep(1)
747
748 req = zk.NodeRequest()
749 req.state = zk.REQUESTED
750 req.node_types.append('fake-label')
751 self.zk.storeNodeRequest(req)
752
753 req = self.waitForNodeRequest(req)
754 self.assertEqual(req.state, zk.FAILED)
755
756 # retries in config is set to 2, so 2 attempts to create a server
757 self.assertEqual(0, manager.createServer_fails_with_external_id)
758
759 # Request another node to check if nothing is wedged
760 req = zk.NodeRequest()
761 req.state = zk.REQUESTED
762 req.node_types.append('fake-label')
763 self.zk.storeNodeRequest(req)
764
765 req = self.waitForNodeRequest(req)
766 self.assertEqual(req.state, zk.FULFILLED)
767
732 def test_node_delete_failure(self): 768 def test_node_delete_failure(self):
733 def fail_delete(self, name): 769 def fail_delete(self, name):
734 raise RuntimeError('Fake Error') 770 raise RuntimeError('Fake Error')