Merge "Fix race when deleting Node znodes"
This commit is contained in:
commit
1b5d416f36
|
@ -704,6 +704,27 @@ class DeletedNodeWorker(BaseCleanupWorker):
|
|||
except exceptions.ZKLockException:
|
||||
continue
|
||||
|
||||
if (node.state == zk.DELETED or
|
||||
node.provider is None):
|
||||
# The node has been deleted out from under us --
|
||||
# we only obtained the lock because in the
|
||||
# recursive delete, the lock is deleted first and
|
||||
# we locked the node between the time of the lock
|
||||
# delete and the node delete. We need to clean up
|
||||
# the mess.
|
||||
try:
|
||||
# This should delete the lock as well
|
||||
zk_conn.deleteNode(node)
|
||||
except Exception:
|
||||
self.log.exception(
|
||||
"Error deleting already deleted znode:")
|
||||
try:
|
||||
zk_conn.unlockNode(node)
|
||||
except Exception:
|
||||
self.log.exception(
|
||||
"Error unlocking already deleted znode:")
|
||||
continue
|
||||
|
||||
# Double check the state now that we have a lock since it
|
||||
# may have changed on us.
|
||||
if node.state not in cleanup_states:
|
||||
|
|
|
@ -57,6 +57,8 @@ INIT = 'init'
|
|||
# Aborted due to a transient error like overquota that should not count as a
|
||||
# failed launch attempt
|
||||
ABORTED = 'aborted'
|
||||
# The node has actually been deleted and the Znode should be deleted
|
||||
DELETED = 'deleted'
|
||||
|
||||
|
||||
# NOTE(Shrews): Importing this from nodepool.config causes an import error
|
||||
|
@ -503,7 +505,8 @@ class Node(BaseModel):
|
|||
Class representing a launched node.
|
||||
'''
|
||||
VALID_STATES = set([BUILDING, TESTING, READY, IN_USE, USED,
|
||||
HOLD, DELETING, FAILED, INIT, ABORTED])
|
||||
HOLD, DELETING, FAILED, INIT, ABORTED,
|
||||
DELETED])
|
||||
|
||||
def __init__(self, id=None):
|
||||
super(Node, self).__init__(id)
|
||||
|
@ -1889,6 +1892,13 @@ class ZooKeeper(object):
|
|||
return
|
||||
|
||||
path = self._nodePath(node.id)
|
||||
|
||||
# Set the node state to deleted before we start deleting
|
||||
# anything so that we can detect a race condition where the
|
||||
# lock is removed before the node deletion occurs.
|
||||
node.state = DELETED
|
||||
self.client.set(path, node.serialize())
|
||||
|
||||
try:
|
||||
self.client.delete(path, recursive=True)
|
||||
except kze.NoNodeError:
|
||||
|
|
Loading…
Reference in New Issue