Merge "Fix race when deleting Node znodes"

This commit is contained in:
Zuul 2018-12-04 19:20:05 +00:00 committed by Gerrit Code Review
commit 1b5d416f36
2 changed files with 32 additions and 1 deletions

View File

@ -704,6 +704,27 @@ class DeletedNodeWorker(BaseCleanupWorker):
except exceptions.ZKLockException:
continue
if (node.state == zk.DELETED or
node.provider is None):
# The node has been deleted out from under us --
# we only obtained the lock because in the
# recursive delete, the lock is deleted first and
# we locked the node between the time of the lock
# delete and the node delete. We need to clean up
# the mess.
try:
# This should delete the lock as well
zk_conn.deleteNode(node)
except Exception:
self.log.exception(
"Error deleting already deleted znode:")
try:
zk_conn.unlockNode(node)
except Exception:
self.log.exception(
"Error unlocking already deleted znode:")
continue
# Double check the state now that we have a lock since it
# may have changed on us.
if node.state not in cleanup_states:

View File

@ -57,6 +57,8 @@ INIT = 'init'
# Aborted due to a transient error like overquota that should not count as a
# failed launch attempt
ABORTED = 'aborted'
# The node has actually been deleted and the Znode should be deleted
DELETED = 'deleted'
# NOTE(Shrews): Importing this from nodepool.config causes an import error
@ -503,7 +505,8 @@ class Node(BaseModel):
Class representing a launched node.
'''
VALID_STATES = set([BUILDING, TESTING, READY, IN_USE, USED,
HOLD, DELETING, FAILED, INIT, ABORTED])
HOLD, DELETING, FAILED, INIT, ABORTED,
DELETED])
def __init__(self, id=None):
super(Node, self).__init__(id)
@ -1889,6 +1892,13 @@ class ZooKeeper(object):
return
path = self._nodePath(node.id)
# Set the node state to deleted before we start deleting
# anything so that we can detect a race condition where the
# lock is removed before the node deletion occurs.
node.state = DELETED
self.client.set(path, node.serialize())
try:
self.client.delete(path, recursive=True)
except kze.NoNodeError: