Merge "zk: skip node already being deleted in cleanup leaked instance task"

This commit is contained in:
Zuul 2018-08-24 22:04:40 +00:00 committed by Gerrit Code Review
commit 168a12a0a6
3 changed files with 102 additions and 0 deletions

View File

@ -474,6 +474,13 @@ class CleanupWorker(BaseCleanupWorker):
'''
zk_conn = self._nodepool.getZK()
deleting_nodes = {}
for node in zk_conn.nodeIterator():
if node.state == zk.DELETING:
if node.provider not in deleting_nodes:
deleting_nodes[node.provider] = []
deleting_nodes[node.provider].append(node.external_id)
for provider in self._nodepool.config.providers.values():
manager = self._nodepool.getProviderManager(provider.name)
@ -488,6 +495,11 @@ class CleanupWorker(BaseCleanupWorker):
# with a different name, owns this.
continue
if (provider.name in deleting_nodes and
server.id in deleting_nodes[provider.name]):
# Already deleting this node
continue
if not zk_conn.getNode(meta['nodepool_node_id']):
self.log.warning(
"Marking for delete leaked instance %s (%s) in %s "

View File

@ -0,0 +1,49 @@
elements-dir: .
images-dir: '{images_dir}'
build-log-dir: '{build_log_dir}'
build-log-retention: 1
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -725,6 +725,47 @@ class TestLauncher(tests.DBTestCase):
self.assertEqual(len(nodes), 1)
self.assertEqual(nodes[0].provider, 'fake-provider')
def test_node_delete_error(self):
def error_delete(self, name):
# Set ERROR status instead of deleting the node
self._getClient()._server_list[0].status = 'ERROR'
self.useFixture(fixtures.MockPatchObject(
fakeprovider.FakeProvider, 'deleteServer', error_delete))
configfile = self.setup_config('node_delete_error.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
# request a node
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
self.zk.storeNodeRequest(req)
self.log.debug("Wait for request")
req = self.waitForNodeRequest(req)
self.assertEqual(req.state, zk.FULFILLED)
self.assertEqual(len(req.nodes), 1)
# remove the node from db
self.log.debug("deleting node %s", req.nodes[0])
node = self.zk.getNode(req.nodes[0])
self.zk.deleteNode(node)
# wait the cleanup thread to kick in
time.sleep(5)
zk_nodes = self.zk.getNodes()
self.assertEqual(len(zk_nodes), 1)
node = self.zk.getNode(zk_nodes[0])
self.assertEqual(node.state, zk.DELETING)
# remove error nodes
pool.getProviderManager(
'fake-provider')._getClient()._server_list.clear()
def test_leaked_node(self):
"""Test that a leaked node is deleted"""
configfile = self.setup_config('leaked_node.yaml')