Merge "Resume paused job with skipped children"
This commit is contained in:
commit
4d91ea3bef
|
@ -1502,6 +1502,9 @@ class RecordingAnsibleJob(zuul.executor.server.AnsibleJob):
|
|||
build = self.executor_server.job_builds[self.job.unique]
|
||||
|
||||
if self.executor_server._run_ansible:
|
||||
# Call run on the fake build omitting the result so we also can
|
||||
# hold real ansible jobs.
|
||||
build.run()
|
||||
result = super(RecordingAnsibleJob, self).runAnsible(
|
||||
cmd, timeout, playbook, wrapped)
|
||||
else:
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
test
|
|
@ -0,0 +1,7 @@
|
|||
- hosts: all
|
||||
tasks:
|
||||
- name: Pause and let child run
|
||||
zuul_return:
|
||||
data:
|
||||
zuul:
|
||||
pause: true
|
|
@ -0,0 +1,4 @@
|
|||
- hosts: all
|
||||
tasks:
|
||||
- fail:
|
||||
msg: pre-test fails
|
|
@ -0,0 +1,4 @@
|
|||
- hosts: all
|
||||
tasks:
|
||||
- debug:
|
||||
msg: Dummy
|
|
@ -0,0 +1,22 @@
|
|||
- job:
|
||||
name: compile
|
||||
run: playbooks/compile.yaml
|
||||
|
||||
- job:
|
||||
name: test
|
||||
run: playbooks/test.yaml
|
||||
|
||||
- job:
|
||||
name: pre-test
|
||||
run: playbooks/pre-test.yaml
|
||||
|
||||
|
||||
- project:
|
||||
check:
|
||||
jobs:
|
||||
- compile
|
||||
- pre-test
|
||||
- test:
|
||||
dependencies:
|
||||
- compile
|
||||
- pre-test
|
|
@ -6,3 +6,4 @@
|
|||
- common-config
|
||||
- org/project
|
||||
- org/project2
|
||||
- org/project3
|
||||
|
|
|
@ -4272,3 +4272,82 @@ class TestJobPause(AnsibleZuulTestCase):
|
|||
self.assertHistory([
|
||||
dict(name='just-pause', result='SUCCESS', changes='1,1'),
|
||||
], ordered=False)
|
||||
|
||||
def test_job_pause_skipped_child(self):
|
||||
"""
|
||||
Tests that a paused job is resumed with externally skipped jobs.
|
||||
|
||||
Tests that this situation won't lead to stuck buildsets.
|
||||
Compile pauses before pre-test fails.
|
||||
|
||||
1. compile (pauses) --+
|
||||
|
|
||||
+--> test (skipped because of pre-test)
|
||||
|
|
||||
2. pre-test (fails) --+
|
||||
"""
|
||||
self.wait_timeout = 120
|
||||
self.executor_server.hold_jobs_in_build = True
|
||||
|
||||
# Output extra ansible info so we might see errors.
|
||||
self.executor_server.verbose = True
|
||||
self.executor_server.keep_jobdir = True
|
||||
|
||||
A = self.fake_gerrit.addFakeChange('org/project3', 'master', 'A')
|
||||
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.executor_server.release('compile')
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.executor_server.hold_jobs_in_build = False
|
||||
self.executor_server.release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertHistory([
|
||||
dict(name='pre-test', result='FAILURE', changes='1,1'),
|
||||
dict(name='compile', result='SUCCESS', changes='1,1'),
|
||||
])
|
||||
|
||||
self.assertIn('test : SKIPPED', A.messages[0])
|
||||
|
||||
def test_job_pause_pre_skipped_child(self):
|
||||
"""
|
||||
Tests that a paused job is resumed with pre-existing skipped jobs.
|
||||
|
||||
Tests that this situation won't lead to stuck buildsets.
|
||||
The pre-test fails before compile pauses so test is already skipped
|
||||
when compile pauses.
|
||||
|
||||
1. pre-test (fails) --+
|
||||
|
|
||||
+--> test (skipped because of pre-test)
|
||||
|
|
||||
2. compile (pauses) --+
|
||||
"""
|
||||
self.wait_timeout = 120
|
||||
self.executor_server.hold_jobs_in_build = True
|
||||
|
||||
# Output extra ansible info so we might see errors.
|
||||
self.executor_server.verbose = True
|
||||
self.executor_server.keep_jobdir = True
|
||||
|
||||
A = self.fake_gerrit.addFakeChange('org/project3', 'master', 'A')
|
||||
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.executor_server.release('pre-test')
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.executor_server.hold_jobs_in_build = False
|
||||
self.executor_server.release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertHistory([
|
||||
dict(name='pre-test', result='FAILURE', changes='1,1'),
|
||||
dict(name='compile', result='SUCCESS', changes='1,1'),
|
||||
])
|
||||
|
||||
self.assertIn('test : SKIPPED', A.messages[0])
|
||||
|
|
|
@ -721,42 +721,34 @@ class PipelineManager(object):
|
|||
def onBuildPaused(self, build):
|
||||
item = build.build_set.item
|
||||
self.log.debug("Build %s of %s paused", build, item.change)
|
||||
|
||||
child_jobs = item.job_graph.getDependentJobsRecursively(build.job.name)
|
||||
if not child_jobs:
|
||||
# if there are no child jobs we need to directly resume the build
|
||||
self.log.debug("Resuming build %s of %s with no child jobs",
|
||||
build, item.change)
|
||||
self.sched.executor.resumeBuild(build)
|
||||
|
||||
item.setResult(build)
|
||||
|
||||
# We need to resume builds because we could either have no children
|
||||
# or have children that are already skipped.
|
||||
self._resumeBuilds(build.build_set)
|
||||
return True
|
||||
|
||||
def _resumeParents(self, build):
|
||||
def _resumeBuilds(self, build_set):
|
||||
"""
|
||||
Resumes all parent jobs where all children are finished.
|
||||
Resumes all paused builds of a buildset that may be resumed.
|
||||
"""
|
||||
item = build.build_set.item
|
||||
builds = item.current_build_set.builds
|
||||
jobgraph = item.job_graph
|
||||
parent_builds = [builds.get(x.name) for x in
|
||||
jobgraph.getParentJobsRecursively(
|
||||
build.job.name, soft=True)]
|
||||
for parent_build in parent_builds:
|
||||
if parent_build and parent_build.paused:
|
||||
# check if all child jobs are finished
|
||||
child_builds = [builds.get(x.name) for x in
|
||||
jobgraph.getDependentJobsRecursively(
|
||||
parent_build.job.name)]
|
||||
all_completed = True
|
||||
for child_build in child_builds:
|
||||
if not child_build or not child_build.result:
|
||||
all_completed = False
|
||||
break
|
||||
jobgraph = build_set.item.job_graph
|
||||
for build in build_set.builds.values():
|
||||
if not build.paused:
|
||||
continue
|
||||
# check if all child jobs are finished
|
||||
child_builds = [build_set.builds.get(x.name) for x in
|
||||
jobgraph.getDependentJobsRecursively(
|
||||
build.job.name)]
|
||||
all_completed = True
|
||||
for child_build in child_builds:
|
||||
if not child_build or not child_build.result:
|
||||
all_completed = False
|
||||
break
|
||||
|
||||
if all_completed:
|
||||
self.sched.executor.resumeBuild(parent_build)
|
||||
parent_build.paused = False
|
||||
if all_completed:
|
||||
self.sched.executor.resumeBuild(build)
|
||||
build.paused = False
|
||||
|
||||
def onBuildCompleted(self, build):
|
||||
item = build.build_set.item
|
||||
|
@ -771,7 +763,7 @@ class PipelineManager(object):
|
|||
if build.retry:
|
||||
build.build_set.removeJobNodeSet(build.job.name)
|
||||
|
||||
self._resumeParents(build)
|
||||
self._resumeBuilds(build.build_set)
|
||||
return True
|
||||
|
||||
def onMergeCompleted(self, event):
|
||||
|
@ -799,7 +791,7 @@ class PipelineManager(object):
|
|||
self.log.info("Node request %s: failure for %s" %
|
||||
(request, request.job.name,))
|
||||
build_set.item.setNodeRequestFailure(request.job)
|
||||
self._resumeParents(request)
|
||||
self._resumeBuilds(request.build_set)
|
||||
self.log.info("Completed node request %s for job %s of item %s "
|
||||
"with nodes %s" %
|
||||
(request, request.job, build_set.item,
|
||||
|
|
Loading…
Reference in New Issue