summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.openstack.org>2019-01-03 23:00:24 +0000
committerGerrit Code Review <review@openstack.org>2019-01-03 23:00:24 +0000
commitc1006610fdaa52ad77e223b63e1ee0fa35483a51 (patch)
treef36013f4fc0599a040df9d82f2a97f2b4b540443
parent3f03cf43b92bc390fdc0ec70eceb6646f82ed822 (diff)
parent31a7ddc7916a3e9c88e73fd52124fa6c1720ca26 (diff)
Merge "Fix items stuck in queue pending node requests"
-rw-r--r--tests/unit/test_scheduler.py75
-rw-r--r--zuul/model.py7
-rw-r--r--zuul/scheduler.py6
3 files changed, 84 insertions, 4 deletions
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index 06d7b8d..2c0b93a 100644
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -4484,7 +4484,7 @@ class TestScheduler(ZuulTestCase):
4484 tenant = self.sched.abide.tenants.get('tenant-one') 4484 tenant = self.sched.abide.tenants.get('tenant-one')
4485 queue = tenant.layout.pipelines['gate'].queues[0] 4485 queue = tenant.layout.pipelines['gate'].queues[0]
4486 self.assertEqual(queue.window, 2) 4486 self.assertEqual(queue.window, 2)
4487 self.assertTrue(len(self.builds), 4) 4487 self.assertEqual(len(self.builds), 4)
4488 4488
4489 self.waitUntilSettled() 4489 self.waitUntilSettled()
4490 self.commitConfigUpdate('org/common-config', 4490 self.commitConfigUpdate('org/common-config',
@@ -4499,7 +4499,7 @@ class TestScheduler(ZuulTestCase):
4499 # B is outside the window, but still marked active until the 4499 # B is outside the window, but still marked active until the
4500 # next pass through the queue processor, so its builds haven't 4500 # next pass through the queue processor, so its builds haven't
4501 # been canceled. 4501 # been canceled.
4502 self.assertTrue(len(self.builds), 4) 4502 self.assertEqual(len(self.builds), 4)
4503 4503
4504 self.sched.reconfigure(self.config) 4504 self.sched.reconfigure(self.config)
4505 tenant = self.sched.abide.tenants.get('tenant-one') 4505 tenant = self.sched.abide.tenants.get('tenant-one')
@@ -4507,7 +4507,7 @@ class TestScheduler(ZuulTestCase):
4507 self.assertEqual(queue.window, 1) 4507 self.assertEqual(queue.window, 1)
4508 self.waitUntilSettled() 4508 self.waitUntilSettled()
4509 # B's builds have been canceled now 4509 # B's builds have been canceled now
4510 self.assertTrue(len(self.builds), 2) 4510 self.assertEqual(len(self.builds), 2)
4511 4511
4512 self.executor_server.hold_jobs_in_build = False 4512 self.executor_server.hold_jobs_in_build = False
4513 self.executor_server.release() 4513 self.executor_server.release()
@@ -4524,6 +4524,75 @@ class TestScheduler(ZuulTestCase):
4524 dict(name='job2', result='SUCCESS', changes='1,1 2,1'), 4524 dict(name='job2', result='SUCCESS', changes='1,1 2,1'),
4525 ], ordered=False) 4525 ], ordered=False)
4526 4526
4527 @simple_layout('layouts/reconfigure-window-fixed.yaml')
4528 def test_reconfigure_window_fixed_requests(self):
4529 # Test the active window shrinking during reconfiguration with
4530 # outstanding node requests
4531 self.executor_server.hold_jobs_in_build = True
4532
4533 # Start the jobs for A
4534 A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
4535 A.addApproval('Code-Review', 2)
4536 self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
4537 self.waitUntilSettled()
4538 self.log.debug("A complete")
4539
4540 # Hold node requests for B
4541 self.fake_nodepool.pause()
4542 B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
4543 B.addApproval('Code-Review', 2)
4544 self.fake_gerrit.addEvent(B.addApproval('Approved', 1))
4545 self.waitUntilSettled()
4546 self.log.debug("B complete")
4547
4548 tenant = self.sched.abide.tenants.get('tenant-one')
4549 queue = tenant.layout.pipelines['gate'].queues[0]
4550 self.assertEqual(queue.window, 2)
4551 self.assertEqual(len(self.builds), 2)
4552
4553 self.waitUntilSettled()
4554 self.commitConfigUpdate('org/common-config',
4555 'layouts/reconfigure-window-fixed2.yaml')
4556 self.sched.reconfigure(self.config)
4557 self.waitUntilSettled()
4558 self.log.debug("Reconfiguration complete")
4559
4560 tenant = self.sched.abide.tenants.get('tenant-one')
4561 queue = tenant.layout.pipelines['gate'].queues[0]
4562 # Because we have configured a static window, it should
4563 # be allowed to shrink on reconfiguration.
4564 self.assertEqual(queue.window, 1)
4565 self.assertEqual(len(self.builds), 2)
4566
4567 # After the previous reconfig, the queue processor will have
4568 # run and marked B inactive; run another reconfiguration so
4569 # that we're testing what happens when we reconfigure after
4570 # the active window having shrunk.
4571 self.sched.reconfigure(self.config)
4572
4573 # Unpause the node requests now
4574 self.fake_nodepool.unpause()
4575 self.waitUntilSettled()
4576 self.log.debug("Nodepool unpause complete")
4577
4578 # Allow A to merge and B to enter the active window and complete
4579 self.executor_server.hold_jobs_in_build = False
4580 self.executor_server.release()
4581 self.waitUntilSettled()
4582 self.log.debug("Executor unpause complete")
4583
4584 tenant = self.sched.abide.tenants.get('tenant-one')
4585 queue = tenant.layout.pipelines['gate'].queues[0]
4586 self.assertEqual(queue.window, 1)
4587
4588 self.waitUntilSettled()
4589 self.assertHistory([
4590 dict(name='job1', result='SUCCESS', changes='1,1'),
4591 dict(name='job2', result='SUCCESS', changes='1,1'),
4592 dict(name='job1', result='SUCCESS', changes='1,1 2,1'),
4593 dict(name='job2', result='SUCCESS', changes='1,1 2,1'),
4594 ], ordered=False)
4595
4527 @simple_layout('layouts/reconfigure-remove-add.yaml') 4596 @simple_layout('layouts/reconfigure-remove-add.yaml')
4528 def test_reconfigure_remove_add(self): 4597 def test_reconfigure_remove_add(self):
4529 # Test removing, then adding a job while in queue 4598 # Test removing, then adding a job while in queue
diff --git a/zuul/model.py b/zuul/model.py
index 3b972b9..09726ac 100644
--- a/zuul/model.py
+++ b/zuul/model.py
@@ -1849,7 +1849,7 @@ class BuildSet(object):
1849 1849
1850 def removeJobNodeSet(self, job_name): 1850 def removeJobNodeSet(self, job_name):
1851 if job_name not in self.nodesets: 1851 if job_name not in self.nodesets:
1852 raise Exception("No job set for %s" % (job_name)) 1852 raise Exception("No job nodeset for %s" % (job_name))
1853 del self.nodesets[job_name] 1853 del self.nodesets[job_name]
1854 1854
1855 def setJobNodeRequest(self, job_name, req): 1855 def setJobNodeRequest(self, job_name, req):
@@ -1860,6 +1860,11 @@ class BuildSet(object):
1860 def getJobNodeRequest(self, job_name): 1860 def getJobNodeRequest(self, job_name):
1861 return self.node_requests.get(job_name) 1861 return self.node_requests.get(job_name)
1862 1862
1863 def removeJobNodeRequest(self, job_name):
1864 if job_name not in self.node_requests:
1865 raise Exception("No node request for %s" % (job_name))
1866 del self.node_requests[job_name]
1867
1863 def jobNodeRequestComplete(self, job_name, req, nodeset): 1868 def jobNodeRequestComplete(self, job_name, req, nodeset):
1864 if job_name in self.nodesets: 1869 if job_name in self.nodesets:
1865 raise Exception("Prior node request for %s" % (job_name)) 1870 raise Exception("Prior node request for %s" % (job_name))
diff --git a/zuul/scheduler.py b/zuul/scheduler.py
index 0be456e..0303429 100644
--- a/zuul/scheduler.py
+++ b/zuul/scheduler.py
@@ -1313,6 +1313,12 @@ class Scheduler(threading.Thread):
1313 self.log.warning("Item %s does not contain job %s " 1313 self.log.warning("Item %s does not contain job %s "
1314 "for node request %s", 1314 "for node request %s",
1315 build_set.item, request.job.name, request) 1315 build_set.item, request.job.name, request)
1316 try:
1317 build_set.removeJobNodeRequest(request.job.name)
1318 except Exception:
1319 self.log.exception("Unable to remove obsolete node request "
1320 "%s for %s job %s",
1321 request, build_set.item, request.job.name)
1316 if request.fulfilled: 1322 if request.fulfilled:
1317 self.nodepool.returnNodeSet(request.nodeset) 1323 self.nodepool.returnNodeSet(request.nodeset)
1318 return 1324 return