Fix permanently broken git cache

In some cases like VM crashes it can happen that the cache git
repository is broken but the git.Repo object doesn't detect that. In
this case the cached repo is permanently broken and the executor
cannot recover from this except taking it out of service and manually
delete all repos before starting it again.

Instead catch the 'fatal: not a git repository' message from git and
delete it completely before advancing to _ensure_cloned which will
create a fresh clone. Without the fix _ensure_cloned will still detect
a 'valid' repo and won't do anything with it.

Change-Id: Ic1f3db5afc321712ab2d42f25482e9299fd2f408
This commit is contained in:
Tobias Henkel 2018-08-10 14:41:53 +02:00
parent 04524010a1
commit 477fe80dc0
No known key found for this signature in database
GPG Key ID: 03750DEC158E5FA2
2 changed files with 17 additions and 1 deletions

View File

@ -179,6 +179,14 @@ class TestMergerRepo(ZuulTestCase):
# And now reset the repo again. This should not crash
work_repo.reset()
# Now open a cache repo and break it in a way that git.Repo is happy
# at first but git won't be.
merger = self.executor_server.merger
cache_repo = merger.getRepo('gerrit', 'org/project')
with open(os.path.join(cache_repo.local_path, '.git/HEAD'), 'w'):
pass
cache_repo.update()
def test_broken_gitmodules(self):
parent_path = os.path.join(self.upstream_root, 'org/project1')
work_repo = Repo(parent_path, self.workspace_root,

View File

@ -173,13 +173,21 @@ class Repo(object):
break
except Exception as e:
if attempt < self.retry_attempts:
if 'fatal: bad config' in e.stderr:
if 'fatal: bad config' in e.stderr.lower():
# This error can be introduced by a merge conflict
# in the .gitmodules which was left by the last
# merge operation. In this case reset and clean
# the repo and try again immediately.
reset_repo_to_head(repo)
repo.git.clean('-x', '-f', '-d')
elif 'fatal: not a git repository' in e.stderr.lower():
# If we get here the git.Repo object was happy with its
# lightweight way of checking if this is a valid git
# repository. However if e.g. the .git/HEAD file is
# empty git operations fail. So there is something
# fundamentally broken with the repo and we need to
# delete it before advancing to _ensure_cloned.
shutil.rmtree(self.local_path)
else:
time.sleep(self.retry_interval)
self.log.exception("Retry %s: Fetch %s %s %s" % (