Support requests for specific providers

In Zuul we support a paradigm where a job can be paused, whereupon
dependent jobs then request nodes and are started.  It is nearly
always the case that the user would like the nodes for the dependent
jobs to be in the same provider as the parent, as the use cases
generally involve transferring data between the two.

To support this, add a 'provider' attribute to the node request which,
if present, means that all on-line launchers for that provider must
decline the request before anyone else processes it.  This will cuase
the desired behavior if everything is working, and if some calamity
befalls that launcher, other launchers can still attempt to fulfill
the request, which might work, or might not, but performing that last
ditch effort is fine once there are no alternatives.

Change-Id: I91fe05081695d454651f6068eac5c08ac30ff899
This commit is contained in:
James E. Blair 2019-02-26 11:15:18 -08:00
parent ff186a53d3
commit 3561e278c6
3 changed files with 83 additions and 0 deletions

View File

@ -168,6 +168,12 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
if provider.max_concurrency == 0:
return True
# Get the launchers which are currently online. This may
# become out of date as the loop progresses, but it should be
# good enough to determine whether we should process requests
# which express a preference for a specific provider.
launchers = self.zk.getRegisteredLaunchers()
# Sort requests by queue priority, then, for all requests at
# the same priority, use the relative_priority field to
# further sort, then finally, the submission order.
@ -210,6 +216,22 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
if self.launcher_id in req.declined_by:
continue
# Skip this request if it is requesting another provider
# which is online
if req.provider and req.provider != self.provider_name:
# The request is asking for a specific provider
candidate_launchers = set(
[x.id for x in launchers
if x.provider_name == req.provider])
if candidate_launchers:
# There is a launcher online which can satisfy the request
if not candidate_launchers.issubset(set(req.declined_by)):
# It has not yet declined the request, so yield to it.
self.log.debug(
"Yielding request %s to provider %s %s",
req.id, req.provider, candidate_launchers)
continue
self.log.debug("Locking request %s", req.id)
try:
self.zk.lockNodeRequest(req, blocking=False)

View File

@ -536,6 +536,63 @@ class TestLauncher(tests.DBTestCase):
self.assertEqual(nodes[0].username, 'zuul')
self.assertNotEqual(nodes[0].host_keys, [])
def test_node_request_provider(self):
"""Test that a node request for a specific provider is honored"""
configfile = self.setup_config('node_two_provider.yaml')
self.useBuilder(configfile)
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
# Validate we have images in both providers
self.waitForImage('fake-provider', 'fake-image')
self.waitForImage('fake-provider2', 'fake-image')
self.waitForNodes('fake-label', 1)
req1 = zk.NodeRequest()
req1.state = zk.REQUESTED
req1.provider = 'fake-provider'
req1.node_types.append('fake-label')
self.zk.storeNodeRequest(req1)
req2 = zk.NodeRequest()
req2.state = zk.REQUESTED
req2.provider = 'fake-provider2'
req2.node_types.append('fake-label')
self.zk.storeNodeRequest(req2)
req1 = self.waitForNodeRequest(req1)
self.assertEqual(req1.state, zk.FULFILLED)
self.assertEqual(len(req1.nodes), 1)
node = self.zk.getNode(req1.nodes[0])
self.assertEqual(node.provider, 'fake-provider')
req2 = self.waitForNodeRequest(req2)
self.assertEqual(req2.state, zk.FULFILLED)
self.assertEqual(len(req2.nodes), 1)
node = self.zk.getNode(req2.nodes[0])
self.assertEqual(node.provider, 'fake-provider2')
def test_node_request_invalid_provider(self):
"""Test that a node request for a missing provider is handled"""
configfile = self.setup_config('node_two_provider.yaml')
self.useBuilder(configfile)
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
# Validate we have images in both providers
self.waitForImage('fake-provider', 'fake-image')
self.waitForImage('fake-provider2', 'fake-image')
self.waitForNodes('fake-label', 1)
req1 = zk.NodeRequest()
req1.state = zk.REQUESTED
req1.provider = 'missing-provider'
req1.node_types.append('fake-label')
self.zk.storeNodeRequest(req1)
req1 = self.waitForNodeRequest(req1)
self.assertEqual(req1.state, zk.FULFILLED)
self.assertEqual(len(req1.nodes), 1)
self.zk.getNode(req1.nodes[0])
def test_node_boot_from_volume(self):
"""Test that an image and node are created from a volume"""
configfile = self.setup_config('node_boot_from_volume.yaml')

View File

@ -442,6 +442,7 @@ class NodeRequest(BaseModel):
self.nodes = []
self.reuse = True
self.requestor = None
self.provider = None
self.relative_priority = 0
def __repr__(self):
@ -458,6 +459,7 @@ class NodeRequest(BaseModel):
self.nodes == other.nodes and
self.reuse == other.reuse and
self.requestor == other.requestor and
self.provider == other.provider and
self.relative_priority == other.relative_priority)
else:
return False
@ -472,6 +474,7 @@ class NodeRequest(BaseModel):
d['nodes'] = self.nodes
d['reuse'] = self.reuse
d['requestor'] = self.requestor
d['provider'] = self.provider
d['relative_priority'] = self.relative_priority
return d
@ -497,6 +500,7 @@ class NodeRequest(BaseModel):
self.nodes = d.get('nodes', [])
self.reuse = d.get('reuse', True)
self.requestor = d.get('requestor')
self.provider = d.get('provider')
self.relative_priority = d.get('relative_priority', 0)