Fix error reporting for special task failures

For some tasks the Ansible log will not contain enough information to debug failures (e.g. missing role with include_role). Ansible treats those issues not like an error (exit code 1) but like a failed task, leading to an exit code of 2. Change-Id: Iea754814e3d55be6be1c2de7f2d45ceda757f480
2019-02-06 08:57:42 +01:00 · 2019-02-06 08:57:42 +01:00 · 6e424878ae
parent 85c70008d6
commit 6e424878ae
4 changed files with 37 additions and 11 deletions
--- a/tests/fixtures/config/job-output/git/common-config/playbooks/job-output-missing-role-include.yaml
+++ b/tests/fixtures/config/job-output/git/common-config/playbooks/job-output-missing-role-include.yaml
@ -0,0 +1,4 @@
+- hosts: all
+  tasks:
+    - include_role:
+        name: not_existing
--- a/tests/fixtures/config/job-output/git/common-config/zuul.yaml
+++ b/tests/fixtures/config/job-output/git/common-config/zuul.yaml
@ -30,6 +30,10 @@
    name: job-output-missing-role
    run: playbooks/job-output-missing-role.yaml

+- job:
+    name: job-output-missing-role-include
+    run: playbooks/job-output-missing-role-include.yaml
+
 - project:
    name: org/project
    check:
@ -47,3 +51,4 @@
    check:
      jobs:
        - job-output-missing-role
+        - job-output-missing-role-include
--- a/tests/unit/test_v3.py
+++ b/tests/unit/test_v3.py
@ -4360,11 +4360,15 @@ class TestJobOutput(AnsibleZuulTestCase):
        self.assertHistory([
            dict(name='job-output-missing-role', result='FAILURE',
                 changes='1,1'),
+            dict(name='job-output-missing-role-include', result='FAILURE',
+                 changes='1,1'),
        ], ordered=False)

-        job_output = self._get_file(self.history[0],
-                                    'work/logs/job-output.txt')
-        self.assertIn('the role \'not_existing\' was not found', job_output)
+        for history in self.history:
+            job_output = self._get_file(history,
+                                        'work/logs/job-output.txt')
+            self.assertIn('the role \'not_existing\' was not found',
+                          job_output)

    def test_job_output_failure_log(self):
        logger = logging.getLogger('zuul.AnsibleJob')
--- a/zuul/executor/server.py
+++ b/zuul/executor/server.py
@ -1948,14 +1948,27 @@ class AnsibleJob(object):
                        now=datetime.datetime.now(),
                        line=line.decode('utf-8').rstrip()))
        elif ret == 2:
-            # This is a workaround to detect winrm connection failures that are
-            # not detected by ansible. These can be detected if the string
-            # 'FATAL ERROR DURING FILE TRANSFER' is in the ansible output.
-            # In this case we should treat the host as unreachable and retry
-            # the job.
-            for line in syntax_buffer:
-                if b'FATAL ERROR DURING FILE TRANSFER' in line:
-                    return self.RESULT_UNREACHABLE, None
+            with open(self.jobdir.job_output_file, 'a') as job_output:
+                found_marker = False
+                for line in syntax_buffer:
+                    # This is a workaround to detect winrm connection failures
+                    # that are not detected by ansible. These can be detected
+                    # if the string 'FATAL ERROR DURING FILE TRANSFER' is in
+                    # the ansible output. In this case we should treat the
+                    # host as unreachable and retry the job.
+                    if b'FATAL ERROR DURING FILE TRANSFER' in line:
+                        return self.RESULT_UNREACHABLE, None
+
+                    # Extract errors for special cases that are treated like
+                    # task errors by Ansible (e.g. missing role when using
+                    # 'include_role').
+                    if line.startswith(b'ERROR!'):
+                        found_marker = True
+                    if not found_marker:
+                        continue
+                    job_output.write("{now} | {line}\n".format(
+                        now=datetime.datetime.now(),
+                        line=line.decode('utf-8').rstrip()))

        return (self.RESULT_NORMAL, ret)