validate-host: retry network tests and include unbound logs

Split the network testing component of the validate-host rule into a
separate task, so it can be retried a couple of times in case
something is a bit slow about bringing up external networking.  Add
failure collection of unbound logs if they appear to be in some common
locations (such as will be there on infra nodes).

Change-Id: Id12f1ba064fa2e5f75b9a5cfba76d238d23d3f57
This commit is contained in:
Ian Wienand 2018-11-06 11:33:22 +11:00
parent 76fdb33658
commit 7e00ba32da
4 changed files with 139 additions and 46 deletions

View File

@ -18,15 +18,10 @@
import os
import shlex
import subprocess
import traceback
command_map = {
'uname': 'uname -a',
'network_interfaces': 'ip address show',
'network_routing_v4': 'ip route show',
'network_routing_v6': 'ip -6 route show',
'network_neighbors': 'ip neighbor show',
}
@ -44,16 +39,14 @@ def main():
argument_spec=dict(
image_manifest=dict(required=False, type='str'),
image_manifest_files=dict(required=False, type='list'),
traceroute_host=dict(required=False, type='str'),
)
)
image_manifest = module.params['image_manifest']
traceroute_host = module.params['traceroute_host']
image_manifest_files = module.params['image_manifest_files']
if not image_manifest_files and image_manifest:
image_manifest_files = [image_manifest]
ret = {'image_manifest_files': [], 'traceroute': None}
ret = {'image_manifest_files': []}
for image_manifest in image_manifest_files:
if image_manifest and os.path.exists(image_manifest):
@ -63,31 +56,6 @@ def main():
'underline': len(image_manifest) * '-',
'content': open(image_manifest, 'r').read(),
})
if traceroute_host:
passed = False
try:
ret['traceroute_v6'] = run_command(
'traceroute6 -n {host}'.format(host=traceroute_host))
passed = True
except (subprocess.CalledProcessError, OSError) as e:
ret['traceroute_v6_exception'] = traceback.format_exc(e)
ret['traceroute_v6_output'] = e.output
ret['traceroute_v6_return'] = e.returncode
pass
try:
ret['traceroute_v4'] = run_command(
'traceroute -n {host}'.format(host=traceroute_host))
passed = True
except (subprocess.CalledProcessError, OSError) as e:
ret['traceroute_v4_exception'] = traceback.format_exc(e)
ret['traceroute_v4_output'] = e.output
ret['traceroute_v4_return'] = e.returncode
pass
if not passed:
module.fail_json(
msg="No viable v4 or v6 route found to {traceroute_host}."
" The build node is assumed to be invalid.".format(
traceroute_host=traceroute_host), **ret)
for key, command in command_map.items():
try:

View File

@ -0,0 +1,118 @@
#!/usr/bin/python
# Copyright (c) 2018 Red Hat
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this software. If not, see <http://www.gnu.org/licenses/>.
import os
import shlex
import subprocess
import traceback
command_map = {
'network_interfaces': 'ip address show',
'network_routing_v4': 'ip route show',
'network_routing_v6': 'ip -6 route show',
'network_neighbors': 'ip neighbor show',
}
def run_command(command):
env = os.environ.copy()
env['PATH'] = '{path}:/sbin:/usr/sbin'.format(path=env['PATH'])
return subprocess.check_output(
shlex.split(command),
stderr=subprocess.STDOUT,
env=env)
def collect_unbound_logs():
'''Look for unbound logs
This looks for unbound logs in common places and returns the
contents. Intended for the failure path to add more info if the
traceroutes have failed.
'''
ret = {}
# NOTE(ianw): keep this one first, the other exists but isn't
# populated on infra rpm images for ... reasons
if os.path.exists('/var/lib/unbound/unbound.log'):
unbound_log_file = '/var/lib/unbound/unbound.log'
elif os.path.exists('/var/log/unbound.log'):
unbound_log_file = '/var/log/unbound.log'
else:
return ret
with open(unbound_log_file) as f:
ret['unbound_log_file'] = unbound_log_file
# NOTE(ianw): At high verbosity this can be big ... but this
# is also intended to be used early which should limit it's
# size. We could tail it ...
ret['unbound_log_file_content'] = f.read()
return ret
def main():
module = AnsibleModule(
argument_spec=dict(
traceroute_host=dict(required=True, type='str'),
)
)
traceroute_host = module.params['traceroute_host']
ret = {}
for key, command in command_map.items():
try:
ret[key] = run_command(command)
except subprocess.CalledProcessError:
pass
passed = False
try:
ret['traceroute_v6'] = run_command(
'traceroute6 -n {host}'.format(host=traceroute_host))
passed = True
except (subprocess.CalledProcessError, OSError) as e:
ret['traceroute_v6_exception'] = traceback.format_exc(e)
ret['traceroute_v6_output'] = e.output
ret['traceroute_v6_return'] = e.returncode
pass
try:
ret['traceroute_v4'] = run_command(
'traceroute -n {host}'.format(host=traceroute_host))
passed = True
except (subprocess.CalledProcessError, OSError) as e:
ret['traceroute_v4_exception'] = traceback.format_exc(e)
ret['traceroute_v4_output'] = e.output
ret['traceroute_v4_return'] = e.returncode
pass
if not passed:
ret.update(collect_unbound_logs())
module.fail_json(
msg="No viable v4 or v6 route found to {traceroute_host}."
" The build node is assumed to be invalid.".format(
traceroute_host=traceroute_host), **ret)
module.exit_json(changed=False, _zuul_nolog_return=True, **ret)
from ansible.module_utils.basic import * # noqa
from ansible.module_utils.basic import AnsibleModule
if __name__ == '__main__':
main()

View File

@ -23,9 +23,16 @@
zuul_debug_info:
image_manifest: "{{ zuul_site_image_manifest|default(omit) }}"
image_manifest_files: "{{ zuul_site_image_manifest_files|default(omit) }}"
traceroute_host: "{{ zuul_site_traceroute_host|default(omit) }}"
register: zdi
- name: Collect network information from zuul worker
zuul_network_validate:
traceroute_host: "{{ zuul_site_traceroute_host|default(omit) }}"
register: znetinfo
retries: 3
delay: 5
until: znetinfo.failed == False
- name: Write out all zuul information for each host
delegate_to: localhost
template:

View File

@ -15,40 +15,40 @@ Host & kernel
{{ zdi.uname }}
{% endif %}
{% if 'network_interfaces' in zdi %}
{% if 'network_interfaces' in znetinfo %}
Network interface addresses
===========================
{{ zdi.network_interfaces }}
{{ znetinfo.network_interfaces }}
{% endif %}
{% if 'network_routing_v4' in zdi %}
{% if 'network_routing_v4' in znetinfo %}
Network routing tables v4
=========================
{{ zdi.network_routing_v4 }}
{{ znetinfo.network_routing_v4 }}
{% endif %}
{% if 'network_routing_v6' in zdi %}
{% if 'network_routing_v6' in znetinfo %}
Network routing tables v6
=========================
{{ zdi.network_routing_v6 }}
{{ znetinfo.network_routing_v6 }}
{% endif %}
{% if 'network_neighbors' in zdi %}
{% if 'network_neighbors' in znetinfo %}
Network neighbors
=================
{{ zdi.network_neighbors }}
{{ znetinfo.network_neighbors }}
{% endif %}
{% if 'traceroute_v4' in zdi %}
{% if 'traceroute_v4' in znetinfo %}
Route to Known Host v4
======================
Known Host: {{ zuul_site_traceroute_host }}
{{ zdi.traceroute_v4 }}
{{ znetinfo.traceroute_v4 }}
{% endif %}
{% if 'traceroute_v6' in zdi %}
{% if 'traceroute_v6' in znetinfo %}
Route to Known Host v6
======================
Known Host: {{ zuul_site_traceroute_host }}
{{ zdi.traceroute_v6 }}
{{ znetinfo.traceroute_v6 }}
{% endif %}