Use a pipeline for dib stats

I noticed in OpenStack production we don't seem to be getting all the
stats from dib, particularly from our very remote builder.  This is
likely because there is some packet loss quickly blasting out small
UDP packets with the stats.  A pipeline bundles the stats together
into the largest size packets it can (this has been a problem before;
see I3f68450c7164d1cf0f1f57f9a31e5dca2f72bc43).

Add some additional checks for the size stats which did not seem to be
covered by existing testing.

I also noticed that the documentation had an extra ".builder." in the
key which isn't actually there in the stats output.

Change-Id: Ib744f19385906d1e72231958d11c98f15b72d6bd
This commit is contained in:
Ian Wienand 2019-02-21 07:56:30 +11:00
parent e8ac13027e
commit c68dbb9636
3 changed files with 18 additions and 7 deletions

View File

@ -294,15 +294,15 @@ Nodepool builder
Number of image uploads to a specific provider in the cloud plus the time in
seconds spent to upload the image.
.. zuul:stat:: nodepool.builder.dib_image_build.<diskimage_name>.<ext>.rc
.. zuul:stat:: nodepool.dib_image_build.<diskimage_name>.<ext>.rc
:type: gauge
Return code of the DIB.
.. zuul:stat:: nodepool.builder.dib_image_build.<diskimage_name>.<ext>.duration
.. zuul:stat:: nodepool.dib_image_build.<diskimage_name>.<ext>.duration
:type: timer
Time the DIB run took.
Time the DIB run took in ms
Nodepool launcher
~~~~~~~~~~~~~~~~~

View File

@ -854,6 +854,9 @@ class BuildWorker(BaseWorker):
build_data.builder = self._hostname
build_data.username = diskimage.username
if self._statsd:
pipeline = self._statsd.pipeline()
if self._zk.didLoseConnection:
self.log.info("ZooKeeper lost while building %s" % diskimage.name)
self._zk.resetLostFlag()
@ -881,16 +884,17 @@ class BuildWorker(BaseWorker):
size = os.stat("%s.%s" % (filename, ext)).st_blocks * 512
self.log.debug("%s created image %s.%s (size: %d) " %
(diskimage.name, filename, ext, size))
self._statsd.gauge(key, size)
pipeline.gauge(key, size)
if self._statsd:
# report result to statsd
for ext in img_types.split(','):
key_base = 'nodepool.dib_image_build.%s.%s' % (
diskimage.name, ext)
self._statsd.gauge(key_base + '.rc', rc)
self._statsd.timing(key_base + '.duration',
int(build_time * 1000))
pipeline.gauge(key_base + '.rc', rc)
pipeline.timing(key_base + '.duration',
int(build_time * 1000))
pipeline.send()
return build_data

View File

@ -326,6 +326,8 @@ class TestNodePoolBuilder(tests.DBTestCase):
'0', 'g')
self.assertReportedStat('nodepool.dib_image_build.'
'fake-image.tar.duration', None, 'ms')
self.assertReportedStat('nodepool.dib_image_build.'
'fake-image.tar.size', '4096', 'g')
def test_diskimage_build_formats(self):
configfile = self.setup_config('node_diskimage_formats.yaml')
@ -336,6 +338,11 @@ class TestNodePoolBuilder(tests.DBTestCase):
self.assertEqual(build_default._formats, ['qcow2'])
self.assertEqual(build_vhd._formats, ['vhd'])
self.assertReportedStat('nodepool.dib_image_build.'
'fake-image-default-format.qcow2.size',
'4096', 'g')
self.assertReportedStat('nodepool.dib_image_build.'
'fake-image-vhd.vhd.size', '4096', 'g')
@mock.patch('select.poll')
def test_diskimage_build_timeout(self, mock_poll):