Merge "Fix rare semaphore leak during reconfiguration"
This commit is contained in:
commit
b12d0d6164
|
@ -0,0 +1,110 @@
|
|||
- pipeline:
|
||||
name: check
|
||||
manager: independent
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: patchset-created
|
||||
success:
|
||||
gerrit:
|
||||
Verified: 1
|
||||
failure:
|
||||
gerrit:
|
||||
Verified: -1
|
||||
|
||||
- semaphore:
|
||||
name: test-semaphore-two
|
||||
max: 2
|
||||
|
||||
- job:
|
||||
name: base
|
||||
parent: null
|
||||
nodeset:
|
||||
nodes:
|
||||
- name: controller
|
||||
label: label1
|
||||
|
||||
- job:
|
||||
name: project-test1
|
||||
run: playbooks/project-test1.yaml
|
||||
|
||||
- job:
|
||||
name: semaphore-one-test1
|
||||
semaphore: test-semaphore
|
||||
run: playbooks/semaphore-one-test1.yaml
|
||||
|
||||
- job:
|
||||
name: semaphore-one-test2
|
||||
semaphore: test-semaphore
|
||||
run: playbooks/semaphore-one-test2.yaml
|
||||
|
||||
- job:
|
||||
name: semaphore-two-test1
|
||||
semaphore: test-semaphore-two
|
||||
run: playbooks/semaphore-two-test1.yaml
|
||||
|
||||
- job:
|
||||
name: semaphore-two-test2
|
||||
semaphore: test-semaphore-two
|
||||
run: playbooks/semaphore-two-test2.yaml
|
||||
|
||||
- job:
|
||||
name: semaphore-one-test3
|
||||
semaphore: test-semaphore
|
||||
run: playbooks/semaphore-two-test1.yaml
|
||||
nodeset:
|
||||
nodes:
|
||||
- name: controller
|
||||
label: label1
|
||||
|
||||
- job:
|
||||
name: semaphore-one-test1-resources-first
|
||||
semaphore:
|
||||
name: test-semaphore
|
||||
resources-first: True
|
||||
run: playbooks/semaphore-one-test1.yaml
|
||||
|
||||
- job:
|
||||
name: semaphore-one-test2-resources-first
|
||||
semaphore:
|
||||
name: test-semaphore
|
||||
resources-first: True
|
||||
run: playbooks/semaphore-one-test1.yaml
|
||||
|
||||
- project:
|
||||
name: org/project
|
||||
check:
|
||||
jobs:
|
||||
- project-test1
|
||||
# This is the difference to zuul.yaml that is used in
|
||||
# test_semaphore_reconfigure_job_removal:
|
||||
# - semaphore-one-test1
|
||||
# - semaphore-one-test2
|
||||
|
||||
|
||||
- project:
|
||||
name: org/project1
|
||||
check:
|
||||
jobs:
|
||||
- project-test1
|
||||
- semaphore-two-test1
|
||||
- semaphore-two-test2
|
||||
|
||||
- project:
|
||||
name: org/project2
|
||||
check:
|
||||
jobs:
|
||||
- semaphore-one-test3
|
||||
|
||||
- project:
|
||||
name: org/project3
|
||||
check:
|
||||
jobs:
|
||||
- project-test1
|
||||
- semaphore-one-test1-resources-first
|
||||
- semaphore-one-test2-resources-first
|
||||
|
||||
- project:
|
||||
name: org/project4
|
||||
check:
|
||||
jobs:
|
||||
- semaphore-one-test1-resources-first
|
|
@ -6584,6 +6584,96 @@ class TestSemaphore(ZuulTestCase):
|
|||
self.assertFalse('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
|
||||
def test_semaphore_reconfigure_job_removal(self):
|
||||
"Test job removal during reconfiguration with semaphores"
|
||||
self.executor_server.hold_jobs_in_build = True
|
||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
self.assertFalse('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertTrue('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
|
||||
self.commitConfigUpdate(
|
||||
'common-config',
|
||||
'config/semaphore/git/common-config/zuul-remove-job.yaml')
|
||||
self.sched.reconfigure(self.config)
|
||||
self.waitUntilSettled()
|
||||
|
||||
# Release job project-test1 which should be the only job left
|
||||
self.executor_server.release('project-test1')
|
||||
self.waitUntilSettled()
|
||||
|
||||
# The check pipeline should be empty
|
||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||
check_pipeline = tenant.layout.pipelines['check']
|
||||
items = check_pipeline.getAllItems()
|
||||
self.assertEqual(len(items), 0)
|
||||
|
||||
# The semaphore should be released
|
||||
self.assertFalse('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
|
||||
self.executor_server.hold_jobs_in_build = False
|
||||
self.executor_server.release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
def test_semaphore_reconfigure_job_removal_pending_node_request(self):
|
||||
"""
|
||||
Test job removal during reconfiguration with semaphores and pending
|
||||
node request.
|
||||
"""
|
||||
self.executor_server.hold_jobs_in_build = True
|
||||
|
||||
# Pause nodepool so we can block the job in node request state during
|
||||
# reconfiguration.
|
||||
self.fake_nodepool.pause()
|
||||
|
||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
self.assertFalse('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertTrue('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
|
||||
self.commitConfigUpdate(
|
||||
'common-config',
|
||||
'config/semaphore/git/common-config/zuul-remove-job.yaml')
|
||||
self.sched.reconfigure(self.config)
|
||||
self.waitUntilSettled()
|
||||
|
||||
# Now we can unpause nodepool
|
||||
self.fake_nodepool.unpause()
|
||||
self.waitUntilSettled()
|
||||
|
||||
# Release job project-test1 which should be the only job left
|
||||
self.executor_server.release('project-test1')
|
||||
self.waitUntilSettled()
|
||||
|
||||
# The check pipeline should be empty
|
||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||
check_pipeline = tenant.layout.pipelines['check']
|
||||
items = check_pipeline.getAllItems()
|
||||
self.assertEqual(len(items), 0)
|
||||
|
||||
# The semaphore should be released
|
||||
self.assertFalse('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
|
||||
self.executor_server.hold_jobs_in_build = False
|
||||
self.executor_server.release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
|
||||
class TestSemaphoreMultiTenant(ZuulTestCase):
|
||||
tenant_config_file = 'config/multi-tenant-semaphore/main.yaml'
|
||||
|
|
|
@ -836,7 +836,7 @@ class PipelineManager(object):
|
|||
self.log.debug("Item %s status is now:\n %s" %
|
||||
(item, item.formatStatus()))
|
||||
|
||||
if build.retry:
|
||||
if build.retry and build.build_set.getJobNodeSet(build.job.name):
|
||||
build.build_set.removeJobNodeSet(build.job.name)
|
||||
|
||||
self._resumeBuilds(build.build_set)
|
||||
|
|
|
@ -830,6 +830,8 @@ class Scheduler(threading.Thread):
|
|||
item.current_build_set.node_requests.items():
|
||||
requests_to_cancel.append(
|
||||
(item.current_build_set, request))
|
||||
|
||||
semaphores_to_release = []
|
||||
for build in builds_to_cancel:
|
||||
self.log.info(
|
||||
"Canceling build %s during reconfiguration" % (build,))
|
||||
|
@ -848,14 +850,16 @@ class Scheduler(threading.Thread):
|
|||
self.log.exception(
|
||||
"Exception while removing nodeset from build %s "
|
||||
"for change %s" % (build, build.build_set.item.change))
|
||||
tenant.semaphore_handler.release(
|
||||
build.build_set.item, build.job)
|
||||
semaphores_to_release.append((build.build_set.item, build.job))
|
||||
for build_set, request in requests_to_cancel:
|
||||
self.log.info(
|
||||
"Canceling node request %s during reconfiguration",
|
||||
request)
|
||||
self.nodepool.cancelRequest(request)
|
||||
build_set.removeJobNodeRequest(request.job.name)
|
||||
semaphores_to_release.append((build_set.item, request.job))
|
||||
for item, job in semaphores_to_release:
|
||||
tenant.semaphore_handler.release(item, job)
|
||||
|
||||
def _reconfigureTenant(self, tenant):
|
||||
# This is called from _doReconfigureEvent while holding the
|
||||
|
|
Loading…
Reference in New Issue