summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Wienand <iwienand@redhat.com>2019-02-21 07:56:30 +1100
committerIan Wienand <iwienand@redhat.com>2019-02-22 11:36:27 +1100
commitc68dbb9636c8c1424f67e0920132d4047bcafc99 (patch)
treed9122687e1dc462e79b63bd610e104655635828d
parente8ac13027e35e804712467f46740542287d0992f (diff)
Use a pipeline for dib stats
I noticed in OpenStack production we don't seem to be getting all the stats from dib, particularly from our very remote builder. This is likely because there is some packet loss quickly blasting out small UDP packets with the stats. A pipeline bundles the stats together into the largest size packets it can (this has been a problem before; see I3f68450c7164d1cf0f1f57f9a31e5dca2f72bc43). Add some additional checks for the size stats which did not seem to be covered by existing testing. I also noticed that the documentation had an extra ".builder." in the key which isn't actually there in the stats output. Change-Id: Ib744f19385906d1e72231958d11c98f15b72d6bd
Notes
Notes (review): Code-Review+2: David Shrewsbury <dshrewsb@redhat.com> Code-Review+2: Tobias Henkel <tobias.henkel@bmw.de> Workflow+1: Tobias Henkel <tobias.henkel@bmw.de> Verified+2: Zuul Submitted-by: Zuul Submitted-at: Fri, 22 Feb 2019 21:07:54 +0000 Reviewed-on: https://review.openstack.org/638265 Project: openstack-infra/nodepool Branch: refs/heads/master
-rw-r--r--doc/source/operation.rst6
-rwxr-xr-xnodepool/builder.py12
-rw-r--r--nodepool/tests/unit/test_builder.py7
3 files changed, 18 insertions, 7 deletions
diff --git a/doc/source/operation.rst b/doc/source/operation.rst
index 470e9f9..6e2b51e 100644
--- a/doc/source/operation.rst
+++ b/doc/source/operation.rst
@@ -294,15 +294,15 @@ Nodepool builder
294 Number of image uploads to a specific provider in the cloud plus the time in 294 Number of image uploads to a specific provider in the cloud plus the time in
295 seconds spent to upload the image. 295 seconds spent to upload the image.
296 296
297.. zuul:stat:: nodepool.builder.dib_image_build.<diskimage_name>.<ext>.rc 297.. zuul:stat:: nodepool.dib_image_build.<diskimage_name>.<ext>.rc
298 :type: gauge 298 :type: gauge
299 299
300 Return code of the DIB. 300 Return code of the DIB.
301 301
302.. zuul:stat:: nodepool.builder.dib_image_build.<diskimage_name>.<ext>.duration 302.. zuul:stat:: nodepool.dib_image_build.<diskimage_name>.<ext>.duration
303 :type: timer 303 :type: timer
304 304
305 Time the DIB run took. 305 Time the DIB run took in ms
306 306
307Nodepool launcher 307Nodepool launcher
308~~~~~~~~~~~~~~~~~ 308~~~~~~~~~~~~~~~~~
diff --git a/nodepool/builder.py b/nodepool/builder.py
index 196e6e0..2f6f4de 100755
--- a/nodepool/builder.py
+++ b/nodepool/builder.py
@@ -854,6 +854,9 @@ class BuildWorker(BaseWorker):
854 build_data.builder = self._hostname 854 build_data.builder = self._hostname
855 build_data.username = diskimage.username 855 build_data.username = diskimage.username
856 856
857 if self._statsd:
858 pipeline = self._statsd.pipeline()
859
857 if self._zk.didLoseConnection: 860 if self._zk.didLoseConnection:
858 self.log.info("ZooKeeper lost while building %s" % diskimage.name) 861 self.log.info("ZooKeeper lost while building %s" % diskimage.name)
859 self._zk.resetLostFlag() 862 self._zk.resetLostFlag()
@@ -881,16 +884,17 @@ class BuildWorker(BaseWorker):
881 size = os.stat("%s.%s" % (filename, ext)).st_blocks * 512 884 size = os.stat("%s.%s" % (filename, ext)).st_blocks * 512
882 self.log.debug("%s created image %s.%s (size: %d) " % 885 self.log.debug("%s created image %s.%s (size: %d) " %
883 (diskimage.name, filename, ext, size)) 886 (diskimage.name, filename, ext, size))
884 self._statsd.gauge(key, size) 887 pipeline.gauge(key, size)
885 888
886 if self._statsd: 889 if self._statsd:
887 # report result to statsd 890 # report result to statsd
888 for ext in img_types.split(','): 891 for ext in img_types.split(','):
889 key_base = 'nodepool.dib_image_build.%s.%s' % ( 892 key_base = 'nodepool.dib_image_build.%s.%s' % (
890 diskimage.name, ext) 893 diskimage.name, ext)
891 self._statsd.gauge(key_base + '.rc', rc) 894 pipeline.gauge(key_base + '.rc', rc)
892 self._statsd.timing(key_base + '.duration', 895 pipeline.timing(key_base + '.duration',
893 int(build_time * 1000)) 896 int(build_time * 1000))
897 pipeline.send()
894 898
895 return build_data 899 return build_data
896 900
diff --git a/nodepool/tests/unit/test_builder.py b/nodepool/tests/unit/test_builder.py
index 1379ec0..9a7dad4 100644
--- a/nodepool/tests/unit/test_builder.py
+++ b/nodepool/tests/unit/test_builder.py
@@ -326,6 +326,8 @@ class TestNodePoolBuilder(tests.DBTestCase):
326 '0', 'g') 326 '0', 'g')
327 self.assertReportedStat('nodepool.dib_image_build.' 327 self.assertReportedStat('nodepool.dib_image_build.'
328 'fake-image.tar.duration', None, 'ms') 328 'fake-image.tar.duration', None, 'ms')
329 self.assertReportedStat('nodepool.dib_image_build.'
330 'fake-image.tar.size', '4096', 'g')
329 331
330 def test_diskimage_build_formats(self): 332 def test_diskimage_build_formats(self):
331 configfile = self.setup_config('node_diskimage_formats.yaml') 333 configfile = self.setup_config('node_diskimage_formats.yaml')
@@ -336,6 +338,11 @@ class TestNodePoolBuilder(tests.DBTestCase):
336 338
337 self.assertEqual(build_default._formats, ['qcow2']) 339 self.assertEqual(build_default._formats, ['qcow2'])
338 self.assertEqual(build_vhd._formats, ['vhd']) 340 self.assertEqual(build_vhd._formats, ['vhd'])
341 self.assertReportedStat('nodepool.dib_image_build.'
342 'fake-image-default-format.qcow2.size',
343 '4096', 'g')
344 self.assertReportedStat('nodepool.dib_image_build.'
345 'fake-image-vhd.vhd.size', '4096', 'g')
339 346
340 @mock.patch('select.poll') 347 @mock.patch('select.poll')
341 def test_diskimage_build_timeout(self, mock_poll): 348 def test_diskimage_build_timeout(self, mock_poll):