summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Shrewsbury <shrewsbury.dave@gmail.com>2019-01-25 17:36:31 +0000
committerDavid Shrewsbury <shrewsbury.dave@gmail.com>2019-01-25 17:36:31 +0000
commitccf40a462a4805c7162bc34434cfe718cd96c448 (patch)
tree24a1dced4e0b7215618ca3e5de4c1ad418f38c27
parent7225354ec04f0466d95b6d68e51002723d6529c9 (diff)
Revert "Add a timeout for the image build"
This reverts commit 7225354ec04f0466d95b6d68e51002723d6529c9. The disk-image-create command does not appear to be starting. Change-Id: I81abe25a253a385cae08a57561129a678546f18f
Notes
Notes (review): Code-Review+2: Tobias Henkel <tobias.henkel@bmw.de> Code-Review+2: James E. Blair <corvus@inaugust.com> Workflow+1: James E. Blair <corvus@inaugust.com> Verified+2: Zuul Submitted-by: Zuul Submitted-at: Fri, 25 Jan 2019 22:37:34 +0000 Reviewed-on: https://review.openstack.org/633252 Project: openstack-infra/nodepool Branch: refs/heads/master
-rw-r--r--doc/source/configuration.rst6
-rwxr-xr-xnodepool/builder.py45
-rw-r--r--nodepool/cmd/config_validator.py1
-rwxr-xr-xnodepool/config.py5
-rw-r--r--nodepool/tests/__init__.py19
-rwxr-xr-xnodepool/tests/fake-image-create78
-rw-r--r--nodepool/tests/fixtures/config_validate/good.yaml1
-rw-r--r--nodepool/tests/fixtures/diskimage_build_timeout.yaml26
-rw-r--r--nodepool/tests/unit/test_builder.py9
-rw-r--r--releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml6
10 files changed, 49 insertions, 147 deletions
diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst
index 7bec670..27abacf 100644
--- a/doc/source/configuration.rst
+++ b/doc/source/configuration.rst
@@ -277,12 +277,6 @@ Options
277 Specifies the distro to be used as a base image to build the image using 277 Specifies the distro to be used as a base image to build the image using
278 diskimage-builder. 278 diskimage-builder.
279 279
280 .. attr:: build-timeout
281 :type: int
282
283 How long (in seconds) to wait for the diskimage build before giving up.
284 The default is 8 hours.
285
286 .. attr:: elements 280 .. attr:: elements
287 :type: list 281 :type: list
288 282
diff --git a/nodepool/builder.py b/nodepool/builder.py
index 16fe714..dce709e 100755
--- a/nodepool/builder.py
+++ b/nodepool/builder.py
@@ -739,28 +739,21 @@ class BuildWorker(BaseWorker):
739 if 'qcow2' in img_types: 739 if 'qcow2' in img_types:
740 qemu_img_options = DEFAULT_QEMU_IMAGE_COMPAT_OPTIONS 740 qemu_img_options = DEFAULT_QEMU_IMAGE_COMPAT_OPTIONS
741 741
742 log_fn = self._getBuildLog(diskimage.name, build_id) 742 cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s %s' %
743
744 cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s --logfile %s %s' %
745 (self.dib_cmd, img_types, qemu_img_options, filename, 743 (self.dib_cmd, img_types, qemu_img_options, filename,
746 log_fn, img_elements)) 744 img_elements))
747 745
748 self._pruneBuildLogs(diskimage.name) 746 self._pruneBuildLogs(diskimage.name)
747 log_fn = self._getBuildLog(diskimage.name, build_id)
749 748
750 self.log.info('Running %s' % (cmd,)) 749 self.log.info('Running %s' % (cmd,))
751 self.log.info('Logging to %s' % (log_fn,)) 750 self.log.info('Logging to %s' % (log_fn,))
752 751
753 start_time = time.monotonic() 752 start_time = time.monotonic()
754
755 # We used to use readline() on stdout to output the lines to the
756 # build log. Unfortunately, this would block as long as the process
757 # ran (with no easy way to timeout the read) and wedge the builder.
758 # Now we use --logfile option to the dib command and set a timeout
759 # on the wait() call to prevent the wedge.
760 did_timeout = False
761 try: 753 try:
762 p = subprocess.Popen( 754 p = subprocess.Popen(
763 shlex.split(cmd), 755 shlex.split(cmd),
756 stdout=subprocess.PIPE,
764 stderr=subprocess.STDOUT, 757 stderr=subprocess.STDOUT,
765 env=env) 758 env=env)
766 except OSError as e: 759 except OSError as e:
@@ -768,20 +761,17 @@ class BuildWorker(BaseWorker):
768 "Failed to exec '%s'. Error: '%s'" % (cmd, e.strerror) 761 "Failed to exec '%s'. Error: '%s'" % (cmd, e.strerror)
769 ) 762 )
770 763
771 try: 764 with open(log_fn, 'wb') as log:
772 rc = p.wait(timeout=diskimage.build_timeout) 765 while True:
773 except subprocess.TimeoutExpired: 766 ln = p.stdout.readline()
774 p.kill() 767 log.write(ln)
775 did_timeout = True 768 log.flush()
776 rc = 1 769 if not ln:
777 self.log.error( 770 break
778 "Build timeout for image %s, build %s (log: %s)", 771
779 diskimage.name, build_id, log_fn) 772 rc = p.wait()
780 else: 773 m = "Exit code: %s\n" % rc
781 # Append return code to dib's log file 774 log.write(m.encode('utf8'))
782 with open(log_fn, 'ab') as log:
783 m = "Exit code: %s\n" % rc
784 log.write(m.encode('utf8'))
785 775
786 # It's possible the connection to the ZK cluster could have been 776 # It's possible the connection to the ZK cluster could have been
787 # interrupted during the build. If so, wait for it to return. 777 # interrupted during the build. If so, wait for it to return.
@@ -806,10 +796,9 @@ class BuildWorker(BaseWorker):
806 self.log.info("ZooKeeper lost while building %s" % diskimage.name) 796 self.log.info("ZooKeeper lost while building %s" % diskimage.name)
807 self._zk.resetLostFlag() 797 self._zk.resetLostFlag()
808 build_data.state = zk.FAILED 798 build_data.state = zk.FAILED
809 elif p.returncode or did_timeout: 799 elif p.returncode:
810 self.log.info( 800 self.log.info(
811 "DIB failed creating %s (%s) (timeout=%s)" % ( 801 "DIB failed creating %s (%s)" % (diskimage.name, p.returncode))
812 diskimage.name, p.returncode, did_timeout))
813 build_data.state = zk.FAILED 802 build_data.state = zk.FAILED
814 else: 803 else:
815 self.log.info("DIB image %s is built" % diskimage.name) 804 self.log.info("DIB image %s is built" % diskimage.name)
diff --git a/nodepool/cmd/config_validator.py b/nodepool/cmd/config_validator.py
index f2646af..c3dde57 100644
--- a/nodepool/cmd/config_validator.py
+++ b/nodepool/cmd/config_validator.py
@@ -44,7 +44,6 @@ class ConfigValidator:
44 'rebuild-age': int, 44 'rebuild-age': int,
45 'env-vars': {str: str}, 45 'env-vars': {str: str},
46 'username': str, 46 'username': str,
47 'build-timeout': int,
48 } 47 }
49 48
50 webapp = { 49 webapp = {
diff --git a/nodepool/config.py b/nodepool/config.py
index 7f09961..2b5e4eb 100755
--- a/nodepool/config.py
+++ b/nodepool/config.py
@@ -118,7 +118,6 @@ class Config(ConfigValue):
118 d.image_types = set(diskimage.get('formats', [])) 118 d.image_types = set(diskimage.get('formats', []))
119 d.pause = bool(diskimage.get('pause', False)) 119 d.pause = bool(diskimage.get('pause', False))
120 d.username = diskimage.get('username', 'zuul') 120 d.username = diskimage.get('username', 'zuul')
121 d.build_timeout = diskimage.get('build-timeout', (8 * 60 * 60))
122 self.diskimages[d.name] = d 121 self.diskimages[d.name] = d
123 122
124 def setSecureDiskimageEnv(self, diskimages, secure_config_path): 123 def setSecureDiskimageEnv(self, diskimages, secure_config_path):
@@ -180,7 +179,6 @@ class DiskImage(ConfigValue):
180 self.image_types = None 179 self.image_types = None
181 self.pause = False 180 self.pause = False
182 self.username = None 181 self.username = None
183 self.build_timeout = None
184 182
185 def __eq__(self, other): 183 def __eq__(self, other):
186 if isinstance(other, DiskImage): 184 if isinstance(other, DiskImage):
@@ -191,8 +189,7 @@ class DiskImage(ConfigValue):
191 other.env_vars == self.env_vars and 189 other.env_vars == self.env_vars and
192 other.image_types == self.image_types and 190 other.image_types == self.image_types and
193 other.pause == self.pause and 191 other.pause == self.pause and
194 other.username == self.username and 192 other.username == self.username)
195 other.build_timeout == self.build_timeout)
196 return False 193 return False
197 194
198 def __repr__(self): 195 def __repr__(self):
diff --git a/nodepool/tests/__init__.py b/nodepool/tests/__init__.py
index 376b4fb..4a8e83d 100644
--- a/nodepool/tests/__init__.py
+++ b/nodepool/tests/__init__.py
@@ -410,25 +410,20 @@ class DBTestCase(BaseTestCase):
410 time.sleep(1) 410 time.sleep(1)
411 self.wait_for_threads() 411 self.wait_for_threads()
412 412
413 def waitForBuild(self, image_name, build_id, states=None): 413 def waitForBuild(self, image_name, build_id):
414 if states is None:
415 states = (zk.READY,)
416
417 base = "-".join([image_name, build_id]) 414 base = "-".join([image_name, build_id])
418
419 while True: 415 while True:
420 self.wait_for_threads() 416 self.wait_for_threads()
421 build = self.zk.getBuild(image_name, build_id) 417 files = builder.DibImageFile.from_image_id(
422 if build and build.state in states: 418 self._config_images_dir.path, base)
419 if files:
423 break 420 break
424 time.sleep(1) 421 time.sleep(1)
425 422
426 # We should only expect a dib manifest with a successful build. 423 while True:
427 while build.state == zk.READY:
428 self.wait_for_threads() 424 self.wait_for_threads()
429 files = builder.DibImageFile.from_image_id( 425 build = self.zk.getBuild(image_name, build_id)
430 self._config_images_dir.path, base) 426 if build and build.state == zk.READY:
431 if files:
432 break 427 break
433 time.sleep(1) 428 time.sleep(1)
434 429
diff --git a/nodepool/tests/fake-image-create b/nodepool/tests/fake-image-create
index 1a2406f..9d236ff 100755
--- a/nodepool/tests/fake-image-create
+++ b/nodepool/tests/fake-image-create
@@ -1,49 +1,10 @@
1#!/bin/bash 1#!/bin/bash
2 2
3outfile=
4outtypes=("qcow2")
5
6all_args=$*
7logfile=
8checksum=
9no_tmpfs=
10qemu_img_options=
11x=
12
13TEMP=$(getopt -o xo:t: --long qemu-img-options:,no-tmpfs,checksum,logfile: -- "$@")
14if [ $? -ne 0 ]; then
15 echo "Invalid option"
16 exit 1
17fi
18eval set -- "$TEMP"
19while true ; do
20 case "$1" in
21 --checksum)
22 checksum=1; shift 1;;
23 --no-tmpfs)
24 no_tmpfs=1; shift 1;;
25 --qemu-img-options)
26 qemu_img_options=$2; shift 2;;
27 --logfile)
28 logfile=$2; shift 2;;
29 -o) outfile=$2; shift 2;;
30 -t) IFS="," read -a outtypes <<< "$2"; shift 2;;
31 -x) x=1; shift;;
32 --) shift ; break ;;
33 *) echo "Unknown option : $1"; exit 1;;
34 esac
35done
36
37# If --logfile was given, direct stdout to it, as well
38if [ ! -z "$logfile" ]; then
39 exec > >(tee -a ${logfile})
40fi
41
42echo "*** fake-image-create: start" 3echo "*** fake-image-create: start"
43 4
44echo "arguments:" 5echo "arguments:"
45echo "----" 6echo "----"
46echo "$all_args" 7echo $*
47echo "----" 8echo "----"
48 9
49if [[ "${SHOULD_FAIL}" == 'true' ]]; then 10if [[ "${SHOULD_FAIL}" == 'true' ]]; then
@@ -77,21 +38,30 @@ if [[ "${BASE_IMAGE_FILE}" != "Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2"
77 exit 1 38 exit 1
78fi 39fi
79 40
80if [ ! -z "$logfile" ]; then 41outfile=
81 echo " -> logfile: $logfile" 42outtypes=("qcow2")
82fi 43
83if [ ! -z "$checksum" ]; then 44TEMP=$(getopt -o xo:t: --long qemu-img-options:,no-tmpfs,checksum -- "$@")
84 echo " -> set --checksum" 45if [ $? -ne 0 ]; then
85fi 46 echo "Invalid option"
86if [ ! -z "$no_tmpfs" ]; then 47 exit 1
87 echo " -> set --no-tmpfs"
88fi
89if [ ! -z "$qemu_img_options" ]; then
90 echo " -> qemu-img-options: $qemu_img_options"
91fi
92if [ ! -z "$x" ]; then
93 echo " -> debugging enabled"
94fi 48fi
49eval set -- "$TEMP"
50while true ; do
51 case "$1" in
52 --checksum)
53 echo " -> set --checksum"; shift 1;;
54 --no-tmpfs)
55 echo " -> set --no-tmpfs"; shift 1;;
56 --qemu-img-options)
57 echo " -> qemu-img-options: $2"; shift 2;;
58 -o) outfile=$2; shift 2;;
59 -t) IFS="," read -a outtypes <<< "$2"; shift 2;;
60 -x) echo " -> debugging enabled"; shift;;
61 --) shift ; break ;;
62 *) echo "Unknown option : $1"; exit 1;;
63 esac
64done
95 65
96if [ -z "$outfile" ]; then 66if [ -z "$outfile" ]; then
97 echo "No output file specified." 67 echo "No output file specified."
diff --git a/nodepool/tests/fixtures/config_validate/good.yaml b/nodepool/tests/fixtures/config_validate/good.yaml
index fc59937..db7d1ec 100644
--- a/nodepool/tests/fixtures/config_validate/good.yaml
+++ b/nodepool/tests/fixtures/config_validate/good.yaml
@@ -130,7 +130,6 @@ diskimages:
130 - cache-devstack 130 - cache-devstack
131 release: trusty 131 release: trusty
132 rebuild-age: 3600 132 rebuild-age: 3600
133 build-timeout: 3600
134 env-vars: 133 env-vars:
135 TMPDIR: /opt/dib_tmp 134 TMPDIR: /opt/dib_tmp
136 DIB_IMAGE_CACHE: /opt/dib_cache 135 DIB_IMAGE_CACHE: /opt/dib_cache
diff --git a/nodepool/tests/fixtures/diskimage_build_timeout.yaml b/nodepool/tests/fixtures/diskimage_build_timeout.yaml
deleted file mode 100644
index 01770bd..0000000
--- a/nodepool/tests/fixtures/diskimage_build_timeout.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
1elements-dir: .
2images-dir: '{images_dir}'
3build-log-dir: '{build_log_dir}'
4
5zookeeper-servers:
6 - host: {zookeeper_host}
7 port: {zookeeper_port}
8 chroot: {zookeeper_chroot}
9
10labels: []
11
12providers: []
13
14diskimages:
15 - name: fake-image
16 formats:
17 - tar
18 elements:
19 - fedora
20 - vm
21 release: 21
22 env-vars:
23 TMPDIR: /opt/dib_tmp
24 DIB_IMAGE_CACHE: /opt/dib_cache
25 DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
26 BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
diff --git a/nodepool/tests/unit/test_builder.py b/nodepool/tests/unit/test_builder.py
index cfea4b8..c992bf7 100644
--- a/nodepool/tests/unit/test_builder.py
+++ b/nodepool/tests/unit/test_builder.py
@@ -16,8 +16,6 @@
16import os 16import os
17import uuid 17import uuid
18import fixtures 18import fixtures
19import mock
20import subprocess
21 19
22from nodepool import builder, exceptions, tests 20from nodepool import builder, exceptions, tests
23from nodepool.driver.fake import provider as fakeprovider 21from nodepool.driver.fake import provider as fakeprovider
@@ -337,10 +335,3 @@ class TestNodePoolBuilder(tests.DBTestCase):
337 335
338 self.assertEqual(build_default._formats, ['qcow2']) 336 self.assertEqual(build_default._formats, ['qcow2'])
339 self.assertEqual(build_vhd._formats, ['vhd']) 337 self.assertEqual(build_vhd._formats, ['vhd'])
340
341 @mock.patch.object(subprocess.Popen, 'wait')
342 def test_diskimage_build_timeout(self, mock_wait):
343 mock_wait.side_effect = subprocess.TimeoutExpired('dib_cmd', 1)
344 configfile = self.setup_config('diskimage_build_timeout.yaml')
345 self.useBuilder(configfile, cleanup_interval=0)
346 self.waitForBuild('fake-image', '0000000001', states=(zk.FAILED,))
diff --git a/releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml b/releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml
deleted file mode 100644
index 6901903..0000000
--- a/releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
1---
2features:
3 - |
4 A new option (build-timeout) has been added to the builder diskimage
5 configuration to control how long the builder should wait for image
6 builds before giving up. The default is 8 hours.