Add memory awareness to system load governor
This will unregister for concurrent jobs whenever available system memory drops below 5% by default. It does not take into account buffers or cache which could be reclaimed. Users can tune this up or down as necessary. This is a very conservative default and will likely need tuning once observed in production. Change-Id: Iab6469c0173d9f5635769d4ab0e8034a41355cd4 Signed-off-by: Paul Belanger <pabelanger@redhat.com>
This commit is contained in:
parent
22c5b7155b
commit
1754b2caf0
|
@ -575,6 +575,16 @@ The following sections of ``zuul.conf`` are used by the executor:
|
|||
The executor will observe system load and determine whether
|
||||
to accept more jobs every 30 seconds.
|
||||
|
||||
.. attr:: min_avail_mem
|
||||
:default: 5.0
|
||||
|
||||
This is the minimum percentage of system RAM available. The
|
||||
executor will stop accepting more than 1 job at a time until
|
||||
more memory is available. The available memory percentage is
|
||||
calculated from the total available memory divided by the
|
||||
total real memory multiplied by 100. Buffers and cache are
|
||||
considered available in the calculation.
|
||||
|
||||
.. attr:: hostname
|
||||
:default: hostname of the server
|
||||
|
||||
|
|
|
@ -27,3 +27,4 @@ pyjwt
|
|||
iso8601
|
||||
aiohttp
|
||||
uvloop;python_version>='3.5'
|
||||
psutil
|
||||
|
|
|
@ -18,6 +18,7 @@ import json
|
|||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import psutil
|
||||
import shutil
|
||||
import signal
|
||||
import shlex
|
||||
|
@ -1949,6 +1950,7 @@ class ExecutorServer(object):
|
|||
''' Apply some heuristics to decide whether or not we should
|
||||
be askign for more jobs '''
|
||||
load_avg = os.getloadavg()[0]
|
||||
avail_mem_pct = 100.0 - psutil.virtual_memory().percent
|
||||
if self.accepting_work:
|
||||
# Don't unregister if we don't have any active jobs.
|
||||
if load_avg > self.max_load_avg and self.job_workers:
|
||||
|
@ -1956,10 +1958,19 @@ class ExecutorServer(object):
|
|||
"Unregistering due to high system load {} > {}".format(
|
||||
load_avg, self.max_load_avg))
|
||||
self.unregister_work()
|
||||
elif load_avg <= self.max_load_avg:
|
||||
elif avail_mem_pct < self.min_avail_mem:
|
||||
self.log.info(
|
||||
"Unregistering due to low memory {:3.1f}% < {}".format(
|
||||
avail_mem_pct, self.min_avail_mem))
|
||||
self.unregister_work()
|
||||
elif (load_avg <= self.max_load_avg and
|
||||
avail_mem_pct >= self.min_avail_mem):
|
||||
self.log.info(
|
||||
"Re-registering as load is within limits {} <= {}".format(
|
||||
load_avg, self.max_load_avg))
|
||||
"Re-registering as job is within limits "
|
||||
"{} <= {} {:3.1f}% <= {}".format(load_avg,
|
||||
self.max_load_avg,
|
||||
avail_mem_pct,
|
||||
self.min_avail_mem))
|
||||
self.register_work()
|
||||
if self.statsd:
|
||||
base_key = 'zuul.executor.%s' % self.hostname
|
||||
|
|
Loading…
Reference in New Issue