summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.openstack.org>2019-02-05 19:41:22 +0000
committerGerrit Code Review <review@openstack.org>2019-02-05 19:41:22 +0000
commit069a550f81bd1e647400c3e05c6cb86de8c9a5b7 (patch)
tree2b79934164570c0ee2eed5e40eda241813e575e2
parentd010bec30ab5a6a0a454110b766260f2d8a42fc1 (diff)
parent2dde4404e4a2048f6d1963c1823b2d9e657bbf85 (diff)
Merge "Fix missing semaphore release on node failure"
-rw-r--r--tests/fixtures/config/semaphore/git/common-config/zuul.yaml6
-rw-r--r--tests/fixtures/config/semaphore/git/org_project4/README1
-rw-r--r--tests/fixtures/config/semaphore/main.yaml1
-rw-r--r--tests/unit/test_scheduler.py64
-rw-r--r--zuul/manager/__init__.py3
5 files changed, 75 insertions, 0 deletions
diff --git a/tests/fixtures/config/semaphore/git/common-config/zuul.yaml b/tests/fixtures/config/semaphore/git/common-config/zuul.yaml
index dcb0cbd..01ca390 100644
--- a/tests/fixtures/config/semaphore/git/common-config/zuul.yaml
+++ b/tests/fixtures/config/semaphore/git/common-config/zuul.yaml
@@ -103,3 +103,9 @@
103 - project-test1 103 - project-test1
104 - semaphore-one-test1-resources-first 104 - semaphore-one-test1-resources-first
105 - semaphore-one-test2-resources-first 105 - semaphore-one-test2-resources-first
106
107- project:
108 name: org/project4
109 check:
110 jobs:
111 - semaphore-one-test1-resources-first
diff --git a/tests/fixtures/config/semaphore/git/org_project4/README b/tests/fixtures/config/semaphore/git/org_project4/README
new file mode 100644
index 0000000..9daeafb
--- /dev/null
+++ b/tests/fixtures/config/semaphore/git/org_project4/README
@@ -0,0 +1 @@
test
diff --git a/tests/fixtures/config/semaphore/main.yaml b/tests/fixtures/config/semaphore/main.yaml
index 919921e..3d7a5c9 100644
--- a/tests/fixtures/config/semaphore/main.yaml
+++ b/tests/fixtures/config/semaphore/main.yaml
@@ -9,3 +9,4 @@
9 - org/project1 9 - org/project1
10 - org/project2 10 - org/project2
11 - org/project3 11 - org/project3
12 - org/project4
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index 0848103..6258398 100644
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -6251,6 +6251,38 @@ class TestSemaphore(ZuulTestCase):
6251 self.assertEqual(A.reported, 1) 6251 self.assertEqual(A.reported, 1)
6252 self.assertEqual(B.reported, 1) 6252 self.assertEqual(B.reported, 1)
6253 6253
6254 def test_semaphore_node_failure(self):
6255 "Test semaphore and node failure"
6256 tenant = self.sched.abide.tenants.get('tenant-one')
6257
6258 # Pause nodepool so we can fail the node request later
6259 self.fake_nodepool.pause()
6260
6261 A = self.fake_gerrit.addFakeChange('org/project2', 'master', 'A')
6262 self.assertFalse('test-semaphore' in
6263 tenant.semaphore_handler.semaphores)
6264 self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
6265 self.waitUntilSettled()
6266
6267 # By default we first lock the semaphore and then get the nodes
6268 # so at this point the semaphore needs to be aquired.
6269 self.assertTrue('test-semaphore' in
6270 tenant.semaphore_handler.semaphores)
6271
6272 # Fail the node request and unpause
6273 req = self.fake_nodepool.getNodeRequests()[0]
6274 self.fake_nodepool.addFailRequest(req)
6275 self.fake_nodepool.unpause()
6276 self.waitUntilSettled()
6277
6278 # At this point the job that holds the semaphore failed with
6279 # node_failure and the semaphore must be released.
6280 self.assertFalse('test-semaphore' in
6281 tenant.semaphore_handler.semaphores)
6282 self.assertEquals(1, A.reported)
6283 self.assertIn('semaphore-one-test3 semaphore-one-test3 : NODE_FAILURE',
6284 A.messages[0])
6285
6254 def test_semaphore_resources_first(self): 6286 def test_semaphore_resources_first(self):
6255 "Test semaphores with max=1 (mutex) and get resources first" 6287 "Test semaphores with max=1 (mutex) and get resources first"
6256 tenant = self.sched.abide.tenants.get('tenant-one') 6288 tenant = self.sched.abide.tenants.get('tenant-one')
@@ -6298,6 +6330,38 @@ class TestSemaphore(ZuulTestCase):
6298 self.executor_server.release() 6330 self.executor_server.release()
6299 self.waitUntilSettled() 6331 self.waitUntilSettled()
6300 6332
6333 def test_semaphore_resources_first_node_failure(self):
6334 "Test semaphore and node failure"
6335 tenant = self.sched.abide.tenants.get('tenant-one')
6336
6337 # Pause nodepool so we can fail the node request later
6338 self.fake_nodepool.pause()
6339
6340 A = self.fake_gerrit.addFakeChange('org/project4', 'master', 'A')
6341 self.assertFalse('test-semaphore' in
6342 tenant.semaphore_handler.semaphores)
6343 self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
6344 self.waitUntilSettled()
6345
6346 # With resources first we first get the nodes so at this point the
6347 # semaphore must not be aquired.
6348 self.assertFalse('test-semaphore' in
6349 tenant.semaphore_handler.semaphores)
6350
6351 # Fail the node request and unpause
6352 req = self.fake_nodepool.getNodeRequests()[0]
6353 self.fake_nodepool.addFailRequest(req)
6354 self.fake_nodepool.unpause()
6355 self.waitUntilSettled()
6356
6357 # At this point the job should never have acuired a semaphore so check
6358 # that it still has not locked a semaphore.
6359 self.assertFalse('test-semaphore' in
6360 tenant.semaphore_handler.semaphores)
6361 self.assertEquals(1, A.reported)
6362 self.assertIn('semaphore-one-test1-resources-first : NODE_FAILURE',
6363 A.messages[0])
6364
6301 def test_semaphore_zk_error(self): 6365 def test_semaphore_zk_error(self):
6302 "Test semaphore release with zk error" 6366 "Test semaphore release with zk error"
6303 tenant = self.sched.abide.tenants.get('tenant-one') 6367 tenant = self.sched.abide.tenants.get('tenant-one')
diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py
index 416f3f1..d3ad0d6 100644
--- a/zuul/manager/__init__.py
+++ b/zuul/manager/__init__.py
@@ -864,6 +864,9 @@ class PipelineManager(object):
864 (request, request.job.name,)) 864 (request, request.job.name,))
865 build_set.item.setNodeRequestFailure(request.job) 865 build_set.item.setNodeRequestFailure(request.job)
866 self._resumeBuilds(request.build_set) 866 self._resumeBuilds(request.build_set)
867 tenant = build_set.item.pipeline.tenant
868 tenant.semaphore_handler.release(build_set.item, request.job)
869
867 self.log.info("Completed node request %s for job %s of item %s " 870 self.log.info("Completed node request %s for job %s of item %s "
868 "with nodes %s" % 871 "with nodes %s" %
869 (request, request.job, build_set.item, 872 (request, request.job, build_set.item,