summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames E. Blair <jeblair@redhat.com>2019-02-26 11:15:18 -0800
committerJames E. Blair <jeblair@redhat.com>2019-02-26 16:00:35 -0800
commit3561e278c6178436aa1d8d673f839a676598ea17 (patch)
tree4a4730f186d09e9083a89b2339f5674753a24772
parentff186a53d3c164f7051a25f1db98ef047a00a124 (diff)
Support requests for specific providers
In Zuul we support a paradigm where a job can be paused, whereupon dependent jobs then request nodes and are started. It is nearly always the case that the user would like the nodes for the dependent jobs to be in the same provider as the parent, as the use cases generally involve transferring data between the two. To support this, add a 'provider' attribute to the node request which, if present, means that all on-line launchers for that provider must decline the request before anyone else processes it. This will cuase the desired behavior if everything is working, and if some calamity befalls that launcher, other launchers can still attempt to fulfill the request, which might work, or might not, but performing that last ditch effort is fine once there are no alternatives. Change-Id: I91fe05081695d454651f6068eac5c08ac30ff899
Notes
Notes (review): Code-Review+2: Clark Boylan <cboylan@sapwetik.org> Code-Review+2: Tobias Henkel <tobias.henkel@bmw.de> Code-Review+2: Monty Taylor <mordred@inaugust.com> Workflow+1: Monty Taylor <mordred@inaugust.com> Verified+2: Zuul Submitted-by: Zuul Submitted-at: Thu, 28 Feb 2019 11:02:28 +0000 Reviewed-on: https://review.openstack.org/639418 Project: openstack-infra/nodepool Branch: refs/heads/master
-rwxr-xr-xnodepool/launcher.py22
-rw-r--r--nodepool/tests/unit/test_launcher.py57
-rwxr-xr-xnodepool/zk.py4
3 files changed, 83 insertions, 0 deletions
diff --git a/nodepool/launcher.py b/nodepool/launcher.py
index 90548b8..b301a56 100755
--- a/nodepool/launcher.py
+++ b/nodepool/launcher.py
@@ -168,6 +168,12 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
168 if provider.max_concurrency == 0: 168 if provider.max_concurrency == 0:
169 return True 169 return True
170 170
171 # Get the launchers which are currently online. This may
172 # become out of date as the loop progresses, but it should be
173 # good enough to determine whether we should process requests
174 # which express a preference for a specific provider.
175 launchers = self.zk.getRegisteredLaunchers()
176
171 # Sort requests by queue priority, then, for all requests at 177 # Sort requests by queue priority, then, for all requests at
172 # the same priority, use the relative_priority field to 178 # the same priority, use the relative_priority field to
173 # further sort, then finally, the submission order. 179 # further sort, then finally, the submission order.
@@ -210,6 +216,22 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
210 if self.launcher_id in req.declined_by: 216 if self.launcher_id in req.declined_by:
211 continue 217 continue
212 218
219 # Skip this request if it is requesting another provider
220 # which is online
221 if req.provider and req.provider != self.provider_name:
222 # The request is asking for a specific provider
223 candidate_launchers = set(
224 [x.id for x in launchers
225 if x.provider_name == req.provider])
226 if candidate_launchers:
227 # There is a launcher online which can satisfy the request
228 if not candidate_launchers.issubset(set(req.declined_by)):
229 # It has not yet declined the request, so yield to it.
230 self.log.debug(
231 "Yielding request %s to provider %s %s",
232 req.id, req.provider, candidate_launchers)
233 continue
234
213 self.log.debug("Locking request %s", req.id) 235 self.log.debug("Locking request %s", req.id)
214 try: 236 try:
215 self.zk.lockNodeRequest(req, blocking=False) 237 self.zk.lockNodeRequest(req, blocking=False)
diff --git a/nodepool/tests/unit/test_launcher.py b/nodepool/tests/unit/test_launcher.py
index 753cd72..79ba957 100644
--- a/nodepool/tests/unit/test_launcher.py
+++ b/nodepool/tests/unit/test_launcher.py
@@ -536,6 +536,63 @@ class TestLauncher(tests.DBTestCase):
536 self.assertEqual(nodes[0].username, 'zuul') 536 self.assertEqual(nodes[0].username, 'zuul')
537 self.assertNotEqual(nodes[0].host_keys, []) 537 self.assertNotEqual(nodes[0].host_keys, [])
538 538
539 def test_node_request_provider(self):
540 """Test that a node request for a specific provider is honored"""
541 configfile = self.setup_config('node_two_provider.yaml')
542 self.useBuilder(configfile)
543 pool = self.useNodepool(configfile, watermark_sleep=1)
544 pool.start()
545 # Validate we have images in both providers
546 self.waitForImage('fake-provider', 'fake-image')
547 self.waitForImage('fake-provider2', 'fake-image')
548 self.waitForNodes('fake-label', 1)
549
550 req1 = zk.NodeRequest()
551 req1.state = zk.REQUESTED
552 req1.provider = 'fake-provider'
553 req1.node_types.append('fake-label')
554 self.zk.storeNodeRequest(req1)
555
556 req2 = zk.NodeRequest()
557 req2.state = zk.REQUESTED
558 req2.provider = 'fake-provider2'
559 req2.node_types.append('fake-label')
560 self.zk.storeNodeRequest(req2)
561
562 req1 = self.waitForNodeRequest(req1)
563 self.assertEqual(req1.state, zk.FULFILLED)
564 self.assertEqual(len(req1.nodes), 1)
565 node = self.zk.getNode(req1.nodes[0])
566 self.assertEqual(node.provider, 'fake-provider')
567
568 req2 = self.waitForNodeRequest(req2)
569 self.assertEqual(req2.state, zk.FULFILLED)
570 self.assertEqual(len(req2.nodes), 1)
571 node = self.zk.getNode(req2.nodes[0])
572 self.assertEqual(node.provider, 'fake-provider2')
573
574 def test_node_request_invalid_provider(self):
575 """Test that a node request for a missing provider is handled"""
576 configfile = self.setup_config('node_two_provider.yaml')
577 self.useBuilder(configfile)
578 pool = self.useNodepool(configfile, watermark_sleep=1)
579 pool.start()
580 # Validate we have images in both providers
581 self.waitForImage('fake-provider', 'fake-image')
582 self.waitForImage('fake-provider2', 'fake-image')
583 self.waitForNodes('fake-label', 1)
584
585 req1 = zk.NodeRequest()
586 req1.state = zk.REQUESTED
587 req1.provider = 'missing-provider'
588 req1.node_types.append('fake-label')
589 self.zk.storeNodeRequest(req1)
590
591 req1 = self.waitForNodeRequest(req1)
592 self.assertEqual(req1.state, zk.FULFILLED)
593 self.assertEqual(len(req1.nodes), 1)
594 self.zk.getNode(req1.nodes[0])
595
539 def test_node_boot_from_volume(self): 596 def test_node_boot_from_volume(self):
540 """Test that an image and node are created from a volume""" 597 """Test that an image and node are created from a volume"""
541 configfile = self.setup_config('node_boot_from_volume.yaml') 598 configfile = self.setup_config('node_boot_from_volume.yaml')
diff --git a/nodepool/zk.py b/nodepool/zk.py
index ee456c4..9d0a85f 100755
--- a/nodepool/zk.py
+++ b/nodepool/zk.py
@@ -442,6 +442,7 @@ class NodeRequest(BaseModel):
442 self.nodes = [] 442 self.nodes = []
443 self.reuse = True 443 self.reuse = True
444 self.requestor = None 444 self.requestor = None
445 self.provider = None
445 self.relative_priority = 0 446 self.relative_priority = 0
446 447
447 def __repr__(self): 448 def __repr__(self):
@@ -458,6 +459,7 @@ class NodeRequest(BaseModel):
458 self.nodes == other.nodes and 459 self.nodes == other.nodes and
459 self.reuse == other.reuse and 460 self.reuse == other.reuse and
460 self.requestor == other.requestor and 461 self.requestor == other.requestor and
462 self.provider == other.provider and
461 self.relative_priority == other.relative_priority) 463 self.relative_priority == other.relative_priority)
462 else: 464 else:
463 return False 465 return False
@@ -472,6 +474,7 @@ class NodeRequest(BaseModel):
472 d['nodes'] = self.nodes 474 d['nodes'] = self.nodes
473 d['reuse'] = self.reuse 475 d['reuse'] = self.reuse
474 d['requestor'] = self.requestor 476 d['requestor'] = self.requestor
477 d['provider'] = self.provider
475 d['relative_priority'] = self.relative_priority 478 d['relative_priority'] = self.relative_priority
476 return d 479 return d
477 480
@@ -497,6 +500,7 @@ class NodeRequest(BaseModel):
497 self.nodes = d.get('nodes', []) 500 self.nodes = d.get('nodes', [])
498 self.reuse = d.get('reuse', True) 501 self.reuse = d.get('reuse', True)
499 self.requestor = d.get('requestor') 502 self.requestor = d.get('requestor')
503 self.provider = d.get('provider')
500 self.relative_priority = d.get('relative_priority', 0) 504 self.relative_priority = d.get('relative_priority', 0)
501 505
502 506