Request child nodes of paused jobs at higher priority

When requesting nodes for jobs which depend on paused parent jobs,
do so at a priority slightly higher than the pipeline priority
for that job.  E.g., if a gate pipeline priority is 200, then the
child job nodes will be requested at 199.

This is based on the theory that by the time the parent job has
run and paused, the queue item has already spent its time in line
waiting for nodes.  It's fair for it to get the additional
required nodes ASAP, and doing so means that the paused job may
finish quicker, reducing wasted node-time.

Change-Id: I5a210d37ef223e93361004a26ef327ffeb530a3c
This commit is contained in:
James E. Blair 2019-02-22 17:46:01 -08:00
parent d60346181b
commit 8e09a758cf
8 changed files with 104 additions and 2 deletions

View File

@ -0,0 +1,17 @@
- pipeline:
name: check
manager: independent
post-review: true
trigger:
gerrit:
- event: patchset-created
success:
gerrit:
Verified: 1
failure:
gerrit:
Verified: -1
- job:
name: base
parent: null

View File

@ -0,0 +1 @@
test

View File

@ -0,0 +1,7 @@
- hosts: all
tasks:
- name: Pause and let child run
zuul_return:
data:
zuul:
pause: true

View File

@ -0,0 +1,2 @@
- hosts: all
tasks: []

View File

@ -0,0 +1,23 @@
- job:
name: compile
run: playbooks/compile.yaml
nodeset:
nodes:
- name: compile_node
label: compile_label
- job:
name: test
run: playbooks/test.yaml
nodeset:
nodes:
- name: test_node
label: test_label
- project:
check:
jobs:
- compile
- test:
dependencies:
- compile

View File

@ -0,0 +1,7 @@
- tenant:
name: tenant-one
source:
gerrit:
config-projects:
- common-config
- org/project

View File

@ -31,6 +31,7 @@ from tests.base import (
ZuulDBTestCase,
FIXTURE_DIR,
simple_layout,
iterate_timeout,
)
@ -4637,7 +4638,7 @@ class TestJobPause(AnsibleZuulTestCase):
self.executor_server.verbose = True
# Second node request should fail
fail = {'_oid': '200-0000000001'}
fail = {'_oid': '199-0000000001'}
self.fake_nodepool.addFailRequest(fail)
A = self.fake_gerrit.addFakeChange('org/project2', 'master', 'A')
@ -5101,3 +5102,38 @@ class TestForceMergeMissingTemplate(ZuulTestCase):
self.assertHistory([
dict(name='other-job', result='SUCCESS', changes='2,1'),
])
class TestJobPausePriority(AnsibleZuulTestCase):
tenant_config_file = 'config/job-pause-priority/main.yaml'
def test_paused_job_priority(self):
"Test that nodes for children of paused jobs have a higher priority"
self.fake_nodepool.pause()
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
reqs = self.fake_nodepool.getNodeRequests()
self.assertEqual(len(reqs), 1)
self.assertEqual(reqs[0]['_oid'], '200-0000000000')
self.fake_nodepool.unpause()
self.waitUntilSettled()
self.fake_nodepool.pause()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
for x in iterate_timeout(60, 'paused job'):
reqs = self.fake_nodepool.getNodeRequests()
if reqs:
break
self.assertEqual(len(reqs), 1)
self.assertEqual(reqs[0]['_oid'], '199-0000000001')
self.fake_nodepool.unpause()
self.waitUntilSettled()

View File

@ -723,11 +723,20 @@ class NodeRequest(object):
@property
def priority(self):
precedence_adjustment = 0
if self.build_set:
precedence = self.build_set.item.pipeline.precedence
job_graph = self.build_set.item.job_graph
if job_graph:
for parent in job_graph.getParentJobsRecursively(
self.job.name):
build = self.build_set.getBuild(parent.name)
if build.paused:
precedence_adjustment = -1
else:
precedence = PRECEDENCE_NORMAL
return PRIORITY_MAP[precedence]
initial_precedence = PRIORITY_MAP[precedence]
return max(0, initial_precedence + precedence_adjustment)
@property
def fulfilled(self):