summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.openstack.org>2018-11-27 22:34:31 +0000
committerGerrit Code Review <review@openstack.org>2018-11-27 22:34:31 +0000
commitccbe7b10df77adaf587e2b45cd764927f8f03d24 (patch)
tree3f89bc9bdd999048f21f20b4cf6eb4b7d7e84b4d
parentbe4dc6e9d20cf36ae6f7fb24cd3449e6655acb68 (diff)
parentae887dab58d0aa9a4ed375fbbf4b9a69d8043f8a (diff)
Merge "Improve resource usage with semaphores"
-rw-r--r--doc/source/user/config.rst20
-rw-r--r--releasenotes/notes/semaphore-resources-295dceaf7ddbab0d.yaml14
-rw-r--r--tests/fixtures/config/semaphore/git/common-config/zuul.yaml26
-rw-r--r--tests/fixtures/config/semaphore/git/org_project3/README1
-rw-r--r--tests/fixtures/config/semaphore/main.yaml1
-rw-r--r--tests/unit/test_scheduler.py58
-rw-r--r--zuul/configloader.py15
-rw-r--r--zuul/manager/__init__.py2
-rw-r--r--zuul/model.py62
9 files changed, 187 insertions, 12 deletions
diff --git a/doc/source/user/config.rst b/doc/source/user/config.rst
index e0235b8..61830a0 100644
--- a/doc/source/user/config.rst
+++ b/doc/source/user/config.rst
@@ -629,7 +629,25 @@ Here is an example of two job definitions:
629 The name of a :ref:`semaphore` which should be acquired and 629 The name of a :ref:`semaphore` which should be acquired and
630 released when the job begins and ends. If the semaphore is at 630 released when the job begins and ends. If the semaphore is at
631 maximum capacity, then Zuul will wait until it can be acquired 631 maximum capacity, then Zuul will wait until it can be acquired
632 before starting the job. 632 before starting the job. The format is either a string or a
633 dictionary. If it's a string it references a semaphore using the
634 default value for :attr:`job.semaphore.resources-first`.
635
636 .. attr:: name
637 :required:
638
639 The name of the referenced semaphore
640
641 .. attr:: resources-first
642 :default: False
643
644 By default a semaphore is acquired before the resources are
645 requested. However in some cases the user wants to run cheap
646 jobs as quickly as possible in a consecutive manner. In this
647 case :attr:`job.semaphore.resources-first` can be enabled to
648 request the resources before locking the semaphore. This can
649 lead to some amount of blocked resources while waiting for the
650 semaphore so this should be used with caution.
633 651
634 .. attr:: tags 652 .. attr:: tags
635 653
diff --git a/releasenotes/notes/semaphore-resources-295dceaf7ddbab0d.yaml b/releasenotes/notes/semaphore-resources-295dceaf7ddbab0d.yaml
new file mode 100644
index 0000000..9d3b7d4
--- /dev/null
+++ b/releasenotes/notes/semaphore-resources-295dceaf7ddbab0d.yaml
@@ -0,0 +1,14 @@
1---
2features:
3 - |
4 A job using a semaphore now can configure if it should acquire the it
5 before requesting resources or just before running.
6upgrade:
7 - |
8 The acquiring behavior of jobs with semaphores has been changed. Up to now
9 a job requested resources and aquired the semaphore just before it started
10 to run. However this could lead to a high amount of resource waste. Instead
11 jobs now acquire the semaphore before requesting the resources by default.
12 This behavior can be overridden by jobs using
13 :attr:`job.semaphore.resources-first` if some waste of resources is
14 acceptable.
diff --git a/tests/fixtures/config/semaphore/git/common-config/zuul.yaml b/tests/fixtures/config/semaphore/git/common-config/zuul.yaml
index 600543c..dcb0cbd 100644
--- a/tests/fixtures/config/semaphore/git/common-config/zuul.yaml
+++ b/tests/fixtures/config/semaphore/git/common-config/zuul.yaml
@@ -22,6 +22,10 @@
22- job: 22- job:
23 name: base 23 name: base
24 parent: null 24 parent: null
25 nodeset:
26 nodes:
27 - name: controller
28 label: label1
25 29
26- job: 30- job:
27 name: project-test1 31 name: project-test1
@@ -56,6 +60,20 @@
56 - name: controller 60 - name: controller
57 label: label1 61 label: label1
58 62
63- job:
64 name: semaphore-one-test1-resources-first
65 semaphore:
66 name: test-semaphore
67 resources-first: True
68 run: playbooks/semaphore-one-test1.yaml
69
70- job:
71 name: semaphore-one-test2-resources-first
72 semaphore:
73 name: test-semaphore
74 resources-first: True
75 run: playbooks/semaphore-one-test1.yaml
76
59- project: 77- project:
60 name: org/project 78 name: org/project
61 check: 79 check:
@@ -77,3 +95,11 @@
77 check: 95 check:
78 jobs: 96 jobs:
79 - semaphore-one-test3 97 - semaphore-one-test3
98
99- project:
100 name: org/project3
101 check:
102 jobs:
103 - project-test1
104 - semaphore-one-test1-resources-first
105 - semaphore-one-test2-resources-first
diff --git a/tests/fixtures/config/semaphore/git/org_project3/README b/tests/fixtures/config/semaphore/git/org_project3/README
new file mode 100644
index 0000000..9daeafb
--- /dev/null
+++ b/tests/fixtures/config/semaphore/git/org_project3/README
@@ -0,0 +1 @@
test
diff --git a/tests/fixtures/config/semaphore/main.yaml b/tests/fixtures/config/semaphore/main.yaml
index 83ed092..919921e 100644
--- a/tests/fixtures/config/semaphore/main.yaml
+++ b/tests/fixtures/config/semaphore/main.yaml
@@ -8,3 +8,4 @@
8 - org/project 8 - org/project
9 - org/project1 9 - org/project1
10 - org/project2 10 - org/project2
11 - org/project3
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index c4bd99d..a07c259 100644
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -5849,6 +5849,10 @@ class TestSemaphore(ZuulTestCase):
5849 5849
5850 self.executor_server.hold_jobs_in_build = True 5850 self.executor_server.hold_jobs_in_build = True
5851 5851
5852 # Pause nodepool so we can check the ordering of getting the nodes
5853 # and aquiring the semaphore.
5854 self.fake_nodepool.paused = True
5855
5852 A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') 5856 A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
5853 B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B') 5857 B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
5854 self.assertFalse('test-semaphore' in 5858 self.assertFalse('test-semaphore' in
@@ -5858,6 +5862,13 @@ class TestSemaphore(ZuulTestCase):
5858 self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1)) 5862 self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
5859 self.waitUntilSettled() 5863 self.waitUntilSettled()
5860 5864
5865 # By default we first lock the semaphore and then get the nodes
5866 # so at this point the semaphore needs to be aquired.
5867 self.assertTrue('test-semaphore' in
5868 tenant.semaphore_handler.semaphores)
5869 self.fake_nodepool.paused = False
5870 self.waitUntilSettled()
5871
5861 self.assertEqual(len(self.builds), 3) 5872 self.assertEqual(len(self.builds), 3)
5862 self.assertEqual(self.builds[0].name, 'project-test1') 5873 self.assertEqual(self.builds[0].name, 'project-test1')
5863 self.assertEqual(self.builds[1].name, 'semaphore-one-test1') 5874 self.assertEqual(self.builds[1].name, 'semaphore-one-test1')
@@ -5993,6 +6004,53 @@ class TestSemaphore(ZuulTestCase):
5993 self.assertEqual(A.reported, 1) 6004 self.assertEqual(A.reported, 1)
5994 self.assertEqual(B.reported, 1) 6005 self.assertEqual(B.reported, 1)
5995 6006
6007 def test_semaphore_resources_first(self):
6008 "Test semaphores with max=1 (mutex) and get resources first"
6009 tenant = self.sched.abide.tenants.get('tenant-one')
6010
6011 self.executor_server.hold_jobs_in_build = True
6012
6013 # Pause nodepool so we can check the ordering of getting the nodes
6014 # and aquiring the semaphore.
6015 self.fake_nodepool.paused = True
6016
6017 A = self.fake_gerrit.addFakeChange('org/project3', 'master', 'A')
6018 B = self.fake_gerrit.addFakeChange('org/project3', 'master', 'B')
6019 self.assertFalse('test-semaphore' in
6020 tenant.semaphore_handler.semaphores)
6021
6022 self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
6023 self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
6024 self.waitUntilSettled()
6025
6026 # Here we first get the resources and then lock the semaphore
6027 # so at this point the semaphore should not be aquired.
6028 self.assertFalse('test-semaphore' in
6029 tenant.semaphore_handler.semaphores)
6030 self.fake_nodepool.paused = False
6031 self.waitUntilSettled()
6032
6033 self.assertEqual(len(self.builds), 3)
6034 self.assertEqual(self.builds[0].name, 'project-test1')
6035 self.assertEqual(self.builds[1].name,
6036 'semaphore-one-test1-resources-first')
6037 self.assertEqual(self.builds[2].name, 'project-test1')
6038
6039 self.executor_server.release('semaphore-one-test1')
6040 self.waitUntilSettled()
6041
6042 self.assertEqual(len(self.builds), 3)
6043 self.assertEqual(self.builds[0].name, 'project-test1')
6044 self.assertEqual(self.builds[1].name, 'project-test1')
6045 self.assertEqual(self.builds[2].name,
6046 'semaphore-one-test2-resources-first')
6047 self.assertTrue('test-semaphore' in
6048 tenant.semaphore_handler.semaphores)
6049
6050 self.executor_server.hold_jobs_in_build = False
6051 self.executor_server.release()
6052 self.waitUntilSettled()
6053
5996 def test_semaphore_zk_error(self): 6054 def test_semaphore_zk_error(self):
5997 "Test semaphore release with zk error" 6055 "Test semaphore release with zk error"
5998 tenant = self.sched.abide.tenants.get('tenant-one') 6056 tenant = self.sched.abide.tenants.get('tenant-one')
diff --git a/zuul/configloader.py b/zuul/configloader.py
index 73d1aec..a476a35 100644
--- a/zuul/configloader.py
+++ b/zuul/configloader.py
@@ -517,6 +517,9 @@ class JobParser(object):
517 secret = {vs.Required('name'): str, 517 secret = {vs.Required('name'): str,
518 vs.Required('secret'): str} 518 vs.Required('secret'): str}
519 519
520 semaphore = {vs.Required('name'): str,
521 'resources-first': bool}
522
520 # Attributes of a job that can also be used in Project and ProjectTemplate 523 # Attributes of a job that can also be used in Project and ProjectTemplate
521 job_attributes = {'parent': vs.Any(str, None), 524 job_attributes = {'parent': vs.Any(str, None),
522 'final': bool, 525 'final': bool,
@@ -528,7 +531,7 @@ class JobParser(object):
528 'success-url': str, 531 'success-url': str,
529 'hold-following-changes': bool, 532 'hold-following-changes': bool,
530 'voting': bool, 533 'voting': bool,
531 'semaphore': str, 534 'semaphore': vs.Any(semaphore, str),
532 'tags': to_list(str), 535 'tags': to_list(str),
533 'branches': to_list(str), 536 'branches': to_list(str),
534 'files': to_list(str), 537 'files': to_list(str),
@@ -573,7 +576,6 @@ class JobParser(object):
573 'workspace', 576 'workspace',
574 'voting', 577 'voting',
575 'hold-following-changes', 578 'hold-following-changes',
576 'semaphore',
577 'attempts', 579 'attempts',
578 'failure-message', 580 'failure-message',
579 'success-message', 581 'success-message',
@@ -728,6 +730,15 @@ class JobParser(object):
728 new_projects[project.canonical_name] = job_project 730 new_projects[project.canonical_name] = job_project
729 job.required_projects = new_projects 731 job.required_projects = new_projects
730 732
733 if 'semaphore' in conf:
734 semaphore = conf.get('semaphore')
735 if isinstance(semaphore, str):
736 job.semaphore = model.JobSemaphore(semaphore)
737 else:
738 job.semaphore = model.JobSemaphore(
739 semaphore.get('name'),
740 semaphore.get('resources-first', False))
741
731 tags = conf.get('tags') 742 tags = conf.get('tags')
732 if tags: 743 if tags:
733 job.tags = set(tags) 744 job.tags = set(tags)
diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py
index 7c0ff66..da38818 100644
--- a/zuul/manager/__init__.py
+++ b/zuul/manager/__init__.py
@@ -322,7 +322,7 @@ class PipelineManager(object):
322 change.commit_needs_changes = dependencies 322 change.commit_needs_changes = dependencies
323 323
324 def provisionNodes(self, item): 324 def provisionNodes(self, item):
325 jobs = item.findJobsToRequest() 325 jobs = item.findJobsToRequest(item.pipeline.tenant.semaphore_handler)
326 if not jobs: 326 if not jobs:
327 return False 327 return False
328 build_set = item.current_build_set 328 build_set = item.current_build_set
diff --git a/zuul/model.py b/zuul/model.py
index 5de9cd6..d938b4a 100644
--- a/zuul/model.py
+++ b/zuul/model.py
@@ -1112,7 +1112,12 @@ class Job(ConfigObject):
1112 d['required_projects'] = [] 1112 d['required_projects'] = []
1113 for project in self.required_projects.values(): 1113 for project in self.required_projects.values():
1114 d['required_projects'].append(project.toDict()) 1114 d['required_projects'].append(project.toDict())
1115 d['semaphore'] = self.semaphore 1115 if self.semaphore:
1116 # For now just leave the semaphore name here until we really need
1117 # more information in zuul-web about this
1118 d['semaphore'] = self.semaphore.name
1119 else:
1120 d['semaphore'] = None
1116 d['variables'] = self.variables 1121 d['variables'] = self.variables
1117 d['final'] = self.final 1122 d['final'] = self.final
1118 d['abstract'] = self.abstract 1123 d['abstract'] = self.abstract
@@ -1511,6 +1516,21 @@ class JobProject(ConfigObject):
1511 return d 1516 return d
1512 1517
1513 1518
1519class JobSemaphore(ConfigObject):
1520 """ A reference to a semaphore from a job. """
1521
1522 def __init__(self, semaphore_name, resources_first=False):
1523 super().__init__()
1524 self.name = semaphore_name
1525 self.resources_first = resources_first
1526
1527 def toDict(self):
1528 d = dict()
1529 d['name'] = self.name
1530 d['resources_first'] = self.resources_first
1531 return d
1532
1533
1514class JobList(ConfigObject): 1534class JobList(ConfigObject):
1515 """ A list of jobs in a project's pipeline. """ 1535 """ A list of jobs in a project's pipeline. """
1516 1536
@@ -2135,13 +2155,13 @@ class QueueItem(object):
2135 # The nodes for this job are not ready, skip 2155 # The nodes for this job are not ready, skip
2136 # it for now. 2156 # it for now.
2137 continue 2157 continue
2138 if semaphore_handler.acquire(self, job): 2158 if semaphore_handler.acquire(self, job, False):
2139 # If this job needs a semaphore, either acquire it or 2159 # If this job needs a semaphore, either acquire it or
2140 # make sure that we have it before running the job. 2160 # make sure that we have it before running the job.
2141 torun.append(job) 2161 torun.append(job)
2142 return torun 2162 return torun
2143 2163
2144 def findJobsToRequest(self): 2164 def findJobsToRequest(self, semaphore_handler):
2145 build_set = self.current_build_set 2165 build_set = self.current_build_set
2146 toreq = [] 2166 toreq = []
2147 if not self.live: 2167 if not self.live:
@@ -2177,7 +2197,10 @@ class QueueItem(object):
2177 all_parent_jobs_successful = False 2197 all_parent_jobs_successful = False
2178 break 2198 break
2179 if all_parent_jobs_successful: 2199 if all_parent_jobs_successful:
2180 toreq.append(job) 2200 if semaphore_handler.acquire(self, job, True):
2201 # If this job needs a semaphore, either acquire it or
2202 # make sure that we have it before requesting the nodes.
2203 toreq.append(job)
2181 return toreq 2204 return toreq
2182 2205
2183 def setResult(self, build): 2206 def setResult(self, build):
@@ -3596,11 +3619,34 @@ class SemaphoreHandler(object):
3596 def __init__(self): 3619 def __init__(self):
3597 self.semaphores = {} 3620 self.semaphores = {}
3598 3621
3599 def acquire(self, item, job): 3622 def acquire(self, item, job, request_resources):
3623 """
3624 Aquires a semaphore for an item job combination. This gets called twice
3625 during the lifecycle of a job. The first call is before requesting
3626 build resources. The second call is before running the job. In which
3627 call we really acquire the semaphore is defined by the job.
3628
3629 :param item: The item
3630 :param job: The job
3631 :param request_resources: True if we want to acquire for the request
3632 resources phase, False if we want to acquire
3633 for the run phase.
3634 """
3600 if not job.semaphore: 3635 if not job.semaphore:
3601 return True 3636 return True
3602 3637
3603 semaphore_key = job.semaphore 3638 if job.semaphore.resources_first and request_resources:
3639 # We're currently in the resource request phase and want to get the
3640 # resources before locking. So we don't need to do anything here.
3641 return True
3642 else:
3643 # As a safety net we want to acuire the semaphore at least in the
3644 # run phase so don't filter this here as re-acuiring the semaphore
3645 # is not a problem here if it has been already acquired before in
3646 # the resources phase.
3647 pass
3648
3649 semaphore_key = job.semaphore.name
3604 3650
3605 m = self.semaphores.get(semaphore_key) 3651 m = self.semaphores.get(semaphore_key)
3606 if not m: 3652 if not m:
@@ -3612,7 +3658,7 @@ class SemaphoreHandler(object):
3612 return True 3658 return True
3613 3659
3614 # semaphore is there, check max 3660 # semaphore is there, check max
3615 if len(m) < self._max_count(item, job.semaphore): 3661 if len(m) < self._max_count(item, job.semaphore.name):
3616 self._acquire(semaphore_key, item, job.name) 3662 self._acquire(semaphore_key, item, job.name)
3617 return True 3663 return True
3618 3664
@@ -3622,7 +3668,7 @@ class SemaphoreHandler(object):
3622 if not job.semaphore: 3668 if not job.semaphore:
3623 return 3669 return
3624 3670
3625 semaphore_key = job.semaphore 3671 semaphore_key = job.semaphore.name
3626 3672
3627 m = self.semaphores.get(semaphore_key) 3673 m = self.semaphores.get(semaphore_key)
3628 if not m: 3674 if not m: