summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTobias Henkel <tobias.henkel@bmw.de>2018-10-12 15:04:19 +0200
committerTobias Henkel <tobias.henkel@bmw.de>2018-11-30 12:07:51 +0000
commit9296de9bf555674caa8d6e41acd325f300b3e8cd (patch)
tree9eb9829f118c56bf994717d05dc070a99330e36c
parent7111fcb407a11baa1ea0d2bbee7ba76ec748e884 (diff)
Ensure that completed handlers are removed frequently
On a busy system it can happen that assignHandlers takes quite some time (we saw occurrences of more than 10 minutes). Within this time no node request is marked as fulfilled even if the nodes are there. A possible solution is to return from assignHandlers frequently during the iteration so we can remove completed handlers and then proceed with assigning handlers. Change-Id: I10f40504c81d532e6953d7af63c5c58fd5283573
Notes
Notes (review): Code-Review+2: James E. Blair <corvus@inaugust.com> Code-Review+2: David Shrewsbury <shrewsbury.dave@gmail.com> Workflow+1: James E. Blair <corvus@inaugust.com> Verified+2: Zuul Submitted-by: Zuul Submitted-at: Sat, 01 Dec 2018 01:16:55 +0000 Reviewed-on: https://review.openstack.org/610029 Project: openstack-infra/nodepool Branch: refs/heads/master
-rwxr-xr-xnodepool/launcher.py30
1 files changed, 24 insertions, 6 deletions
diff --git a/nodepool/launcher.py b/nodepool/launcher.py
index 9e0f15f..bdc1f39 100755
--- a/nodepool/launcher.py
+++ b/nodepool/launcher.py
@@ -143,21 +143,26 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
143 # Private methods 143 # Private methods
144 # --------------------------------------------------------------- 144 # ---------------------------------------------------------------
145 145
146 def _assignHandlers(self): 146 def _assignHandlers(self, timeout=15):
147 ''' 147 '''
148 For each request we can grab, create a NodeRequestHandler for it. 148 For each request we can grab, create a NodeRequestHandler for it.
149 149
150 The NodeRequestHandler object will kick off any threads needed to 150 The NodeRequestHandler object will kick off any threads needed to
151 satisfy the request, then return. We will need to periodically poll 151 satisfy the request, then return. We will need to periodically poll
152 the handler for completion. 152 the handler for completion.
153
154 If exceeds the timeout it stops further iteration and returns False
155 in order to give us time to call _removeCompletedHandlers. Otherwise
156 it returns True to signal that it is finished for now.
153 ''' 157 '''
158 start = time.monotonic()
154 provider = self.getProviderConfig() 159 provider = self.getProviderConfig()
155 if not provider: 160 if not provider:
156 self.log.info("Missing config. Deleted provider?") 161 self.log.info("Missing config. Deleted provider?")
157 return 162 return True
158 163
159 if provider.max_concurrency == 0: 164 if provider.max_concurrency == 0:
160 return 165 return True
161 166
162 # Sort requests by queue priority, then, for all requests at 167 # Sort requests by queue priority, then, for all requests at
163 # the same priority, use the relative_priority field to 168 # the same priority, use the relative_priority field to
@@ -168,8 +173,11 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
168 r.id.split('-')[1])) 173 r.id.split('-')[1]))
169 174
170 for req in requests: 175 for req in requests:
176 if not self.running:
177 return True
178
171 if self.paused_handler: 179 if self.paused_handler:
172 return 180 return True
173 181
174 # Get active threads for all pools for this provider 182 # Get active threads for all pools for this provider
175 active_threads = sum([ 183 active_threads = sum([
@@ -183,7 +191,7 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
183 self.log.debug("Request handling limited: %s active threads ", 191 self.log.debug("Request handling limited: %s active threads ",
184 "with max concurrency of %s", 192 "with max concurrency of %s",
185 active_threads, provider.max_concurrency) 193 active_threads, provider.max_concurrency)
186 return 194 return True
187 195
188 req = self.zk.getNodeRequest(req.id) 196 req = self.zk.getNodeRequest(req.id)
189 if not req: 197 if not req:
@@ -217,6 +225,11 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
217 self.paused_handler = rh 225 self.paused_handler = rh
218 self.request_handlers.append(rh) 226 self.request_handlers.append(rh)
219 227
228 # if we exceeded the timeout stop iterating here
229 if time.monotonic() - start > timeout:
230 return False
231 return True
232
220 def _removeCompletedHandlers(self): 233 def _removeCompletedHandlers(self):
221 ''' 234 '''
222 Poll handlers to see which have completed. 235 Poll handlers to see which have completed.
@@ -305,7 +318,12 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
305 318
306 try: 319 try:
307 if not self.paused_handler: 320 if not self.paused_handler:
308 self._assignHandlers() 321 while not self._assignHandlers():
322 # _assignHandlers can take quite some time on a busy
323 # system so sprinkle _removeCompletedHandlers in
324 # between such that we have a chance to fulfill
325 # requests that already have all nodes.
326 self._removeCompletedHandlers()
309 else: 327 else:
310 # If we are paused, one request handler could not 328 # If we are paused, one request handler could not
311 # satisfy its assigned request, so give it 329 # satisfy its assigned request, so give it