From 071426c223fe1e8afedf8f67736a2cb08d765573 Mon Sep 17 00:00:00 2001 From: James Parker Date: Fri, 14 Jan 2022 12:59:36 -0500 Subject: [PATCH] Test resize with mem_page_size in flavor These tests are meant to address issue [1]. It adds three new testcases: * test_hugepage_resize_keyword_large_to_small * test_hugepage_resize_explicit_pagesize_to_small * test_hugepage_resize_explicit_size_to_size All three tests follow the same basic procedure, spawn a guest with a flavor using hw:mem_page_size:, resize the guest to a flavor with a different size hw:mem_page_size:, and then resize the guest back to the original flavor. Throughout the tests XML checks are conducted to ensure the page size is accurate for the present flavor. Instead of trying to dynamically determine the hugepage sizes configured on the computes, a new config parameter was added to define what hugepage sizes are available on the host. To avoid dynamic ram calculation sizes for the guest based on available hugepages, a guest ram parameter was also added so users may define the size to use when spawning guests. We also need a new job that has multiple hugepage sizes configured. We cannot use our existing whitebox-devstack-multinode job because that one runs tests that dynamically turn on file backed memory, which is incompatible with hugepages. This commit adds tasks into job setup that allows for the setup of hugepages. In our devstack plugin.sh, we set track_instance_changes to True (devstack defaults it to False) to make sure the scheduler has the latest information about available huge pages, and avoid a race whereing instances failed to schedule because our lone 1G page still appeared used by an instance that had actually beed fully deleted. [1] https://bugs.launchpad.net/nova/+bug/1831269 Change-Id: I5282df3b20c24a909f3b7bb97214206bc07e5b91 --- .zuul.yaml | 32 ++- devstack/plugin.sh | 3 + devstack/settings | 2 + playbooks/whitebox/pre.yaml | 43 +++- whitebox_tempest_plugin/api/compute/base.py | 8 + .../api/compute/test_cpu_pinning.py | 8 - .../api/compute/test_hugepages.py | 196 ++++++++++++++++++ whitebox_tempest_plugin/config.py | 13 +- 8 files changed, 289 insertions(+), 16 deletions(-) create mode 100644 whitebox_tempest_plugin/api/compute/test_hugepages.py diff --git a/.zuul.yaml b/.zuul.yaml index 39ba9813..9e0c55ff 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -3,7 +3,11 @@ nodes: - name: controller label: nested-virt-ubuntu-jammy - - name: compute + # NOTE(artom) We can't name the node 'compute' because that seems to + # take precedence over the 'compute' group in playbooks, so things we + # want to run on all hosts in the 'compute' group would only run on the + # subnode. + - name: compute-host label: nested-virt-ubuntu-jammy groups: # Node where tests are executed and test results collected @@ -14,11 +18,11 @@ - name: compute nodes: - controller - - compute + - compute-host # Nodes that are not the controller - name: subnode nodes: - - compute + - compute-host # Switch node for multinode networking setup - name: switch nodes: @@ -26,7 +30,7 @@ # Peer nodes for multinode networking setup - name: peers nodes: - - compute + - compute-host - job: name: whitebox-devstack-multinode @@ -49,6 +53,7 @@ # open source implementation of UEFI for VMs via the OVMF package. In # addition to test vTPM hosts need swtpm as well extra_packages: ovmf,swtpm-tools + tempest_exclude_regex: ^whitebox_tempest_plugin\.api\.compute\.test_hugepages devstack_localrc: MAX_COMPUTE_NODES: 2 NOVA_SERVICE_REPORT_INTERVAL: 10 @@ -86,7 +91,6 @@ swtpm_group: swtpm group-vars: subnode: - num_hugepages: 2048 devstack_localrc: LIBVIRT_TYPE: kvm NOVA_SERVICE_REPORT_INTERVAL: 10 @@ -109,11 +113,24 @@ swtpm_user: swtpm swtpm_group: swtpm tempest: - num_hugepages: 512 devstack_plugins: barbican: https://opendev.org/openstack/barbican.git whitebox-tempest-plugin: https://opendev.org/openstack/whitebox-tempest-plugin.git +- job: + name: whitebox-devstack-multinode-hugepages + parent: whitebox-devstack-multinode + description: | + Runs the hugepages tests on a deployment that has set up hugepages on the hosts. + vars: + tempest_test_regex: ^whitebox_tempest_plugin\.api\.compute\.test_hugepages + # NOTE(artom) The parent job's exclude regex excludes the hugepages + # tests, so we need to overwrite it here with a regex that matches + # *nothing*. + tempest_exclude_regex: $^ + num_2M_pages: 512 + num_1G_pages: 1 + - job: name: whitebox-devstack-ceph-multinode parent: devstack-plugin-ceph-multinode-tempest-py3 @@ -174,3 +191,6 @@ - whitebox-devstack-multinode - whitebox-devstack-ceph-multinode - openstack-tox-pep8 + experimental: + jobs: + - whitebox-devstack-multinode-hugepages diff --git a/devstack/plugin.sh b/devstack/plugin.sh index 44a9e063..c8940541 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -19,6 +19,7 @@ function configure { iniset $TEMPEST_CONFIG whitebox default_video_model $WHITEBOX_DEFAULT_VIDEO_MODEL iniset $TEMPEST_CONFIG whitebox max_disk_devices_to_attach $WHITEBOX_MAX_DISK_DEVICES_TO_ATTACH iniset $TEMPEST_CONFIG whitebox nodes_yaml $WHITEBOX_NODES_YAML + iniset $TEMPEST_CONFIG whitebox hugepage_guest_ram_size $WHITEBOX_HUGEPAGE_GUEST_RAM_SIZE iniset $TEMPEST_CONFIG whitebox-database user $DATABASE_USER iniset $TEMPEST_CONFIG whitebox-database password $DATABASE_PASSWORD @@ -27,6 +28,7 @@ function configure { iniset $TEMPEST_CONFIG whitebox-hardware cpu_topology "$WHITEBOX_CPU_TOPOLOGY" iniset $TEMPEST_CONFIG whitebox-hardware dedicated_cpus_per_numa "$WHITEBOX_DEDICATED_CPUS_PER_NUMA" iniset $TEMPEST_CONFIG whitebox-hardware shared_cpus_per_numa "$WHITEBOX_SHARED_CPUS_PER_NUMA" + iniset $TEMPEST_CONFIG whitebox-hardware configured_hugepage_sizes "$WHITEBOX_CONFIGURED_HUGEPAGES" iniset $TEMPEST_CONFIG compute-feature-enabled virtio_rng "$COMPUTE_FEATURE_VIRTIO_RNG" iniset $TEMPEST_CONFIG compute-feature-enabled rbd_download "$COMPUTE_FEATURE_RBD_DOWNLOAD" @@ -39,6 +41,7 @@ function configure { # https://github.com/openstack/devstack/blob/6b0f055b4ed407f8a190f768d0e654235ac015dd/lib/nova#L46C36-L46C50 iniset $TEMPEST_CONFIG whitebox-nova-compute state_path $DATA_DIR/nova + iniset $NOVA_CONF filter_scheduler track_instance_changes True } if [[ "$1" == "stack" ]]; then diff --git a/devstack/settings b/devstack/settings index 23a946e8..17f3dc58 100644 --- a/devstack/settings +++ b/devstack/settings @@ -7,10 +7,12 @@ WHITEBOX_RX_QUEUE_SIZE=${WHITEBOX_RX_QUEUE_SIZE:-1024} WHITEBOX_DEFAULT_VIDEO_MODEL=${WHITEBOX_DEFAULT_VIDEO_MODEL:-'virtio'} WHITEBOX_MAX_DISK_DEVICES_TO_ATTACH=${WHITEBOX_MAX_DISK_DEVICES_TO_ATTACH:-7} WHITEBOX_NODES_YAML=${WHITEBOX_NODES_YAML:-'/home/zuul/compute_nodes.yaml'} +WHITEBOX_HUGEPAGE_GUEST_RAM_SIZE=${WHITEBOX_HUGEPAGE_GUEST_RAM_SIZE:-1024} WHITEBOX_CPU_TOPOLOGY=${WHITEBOX_CPU_TOPOLOGY:-''} WHITEBOX_DEDICATED_CPUS_PER_NUMA=${WHITEBOX_DEDICATED_CPUS_PER_NUMA:-4} WHITEBOX_SHARED_CPUS_PER_NUMA=${WHITEBOX_SHARED_CPUS_PER_NUMA:-2} +WHITEBOX_CONFIGURED_HUGEPAGES=${WHITEBOX_CONFIGURED_HUGEPAGES:-'2048,1048576'} COMPUTE_FEATURE_VIRTIO_RNG=${COMPUTE_FEATURE_VIRTIO_RNG:-'True'} COMPUTE_FEATURE_RBD_DOWNLOAD=${COMPUTE_FEATURE_RBD_DOWNLOAD:-'False'} diff --git a/playbooks/whitebox/pre.yaml b/playbooks/whitebox/pre.yaml index ba4dbe4b..42e2e819 100644 --- a/playbooks/whitebox/pre.yaml +++ b/playbooks/whitebox/pre.yaml @@ -44,4 +44,45 @@ shell: | cat /home/zuul/compute_nodes.yaml run_once: true - delegate_to: controller \ No newline at end of file + delegate_to: controller + +- hosts: compute + tasks: + - name: Create hugepages for computes + block: + + - name: Append to GRUB command line + lineinfile: + path: /etc/default/grub + state: present + backrefs: yes + regexp: GRUB_CMDLINE_LINUX="([^"]*)" + line: GRUB_CMDLINE_LINUX="\1 hugepagesz=2M hugepages={{ num_2M_pages }} hugepagesz=1G hugepages={{ num_1G_pages }} transparent_hugepage=never" + become: yes + + - name: Update grub.cfg + # NOTE(artom) This assumes an Ubuntu host + command: update-grub2 + become: yes + + - name: Reboot + reboot: + become: yes + + - name: (Re-)start the Zuul console streamer after the reboot + # NOTE(artom) The job will still work if we don't do this, but the + # console will get spammed with 'Waiting on logger' messages. See + # https://bugs.launchpad.net/openstack-gate/+bug/1806655 for more + # info. + import_role: + name: start-zuul-console + + - name: Add 1G hugetlbfs mount + # The 2M hugetlbfs is mounted automatically by the OS, but we need to + # manually add the 1G mount. + shell: | + mkdir /dev/hugepages1G + mount -t hugetlbfs -o pagesize=1G none /dev/hugepages1G + become: yes + + when: num_2M_pages is defined and num_1G_pages is defined diff --git a/whitebox_tempest_plugin/api/compute/base.py b/whitebox_tempest_plugin/api/compute/base.py index 84a0f4ff..9ba1c00f 100644 --- a/whitebox_tempest_plugin/api/compute/base.py +++ b/whitebox_tempest_plugin/api/compute/base.py @@ -435,3 +435,11 @@ class BaseWhiteboxComputeTest(base.BaseV2ComputeAdminTest): 'status = "%s"' % status) data = cursor.fetchall() return data[0]['COUNT(*)'] + + def _get_hugepage_xml_element(self, server_id): + """Gather and return all instances of the page element from XML element + 'memoryBacking/hugepages' in a given server's domain. + """ + root = self.get_server_xml(server_id) + huge_pages = root.findall('.memoryBacking/hugepages/page') + return huge_pages diff --git a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py index 732a5afc..d59bf30e 100644 --- a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py +++ b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py @@ -612,14 +612,6 @@ class NUMALiveMigrationBase(BasePinningTest): cpuset = root.find('./vcpu').attrib.get('cpuset', None) return hardware.parse_cpu_spec(cpuset) - def _get_hugepage_xml_element(self, server_id): - """Gather and return all instances of the page element from XML element - 'memoryBacking/hugepages' in a given server's domain. - """ - root = self.get_server_xml(server_id) - huge_pages = root.findall('.memoryBacking/hugepages/page') - return huge_pages - def _validate_hugepage_elements(self, server_id, pagesize): """Analyze the hugepage xml element(s) from a provided instance. Expect to find only one hugepage element in the domain. Return boolean result diff --git a/whitebox_tempest_plugin/api/compute/test_hugepages.py b/whitebox_tempest_plugin/api/compute/test_hugepages.py new file mode 100644 index 00000000..85a148a6 --- /dev/null +++ b/whitebox_tempest_plugin/api/compute/test_hugepages.py @@ -0,0 +1,196 @@ +# Copyright 2022 Red Hat Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from tempest import config +import testtools +from whitebox_tempest_plugin.api.compute import base + +from oslo_log import log as logging + +CONF = config.CONF +LOG = logging.getLogger(__name__) + + +class HugePageResize(base.BaseWhiteboxComputeTest): + + @classmethod + def skip_checks(cls): + super(HugePageResize, cls).skip_checks() + if len(getattr(CONF.whitebox_hardware, + 'configured_hugepage_sizes')) == 0: + msg = "configured_hugepage_sizes in whitebox-hardware is not " \ + "present" + raise cls.skipException(msg) + + def _get_xml_hugepage_size(self, server_id): + """Analyze the hugepage xml element(s) from a provided instance. Expect + to find only one hugepage element in the domain. Return boolean result + comparing if the found page size is equal to the expected page size. + """ + huge_pages_list = self._get_hugepage_xml_element(server_id) + self.assertEqual(1, len(huge_pages_list), "Expected to find 1 " + "hugepage XML element on server %s but found %s" + % (server_id, len(huge_pages_list))) + huge_page_xml = huge_pages_list[0] + return int(huge_page_xml.attrib['size']) + + def test_hugepage_resize_large_to_small(self): + """Resize a guest with large hugepages to small hugepages and back + + Create a guest using a flavor with hw:mem_page_size:large, resize it + to a flavor with hw:mem_page_size:small, and then resize it back to + the original flavor + """ + flavor_a = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': 'large'}) + + server = self.create_test_server(flavor=flavor_a['id'], + wait_until='ACTIVE') + + # Cannot assume the exact pagesize of the guest, verify the backing + # memory element is present on the guest and the found size is greater + # than or equal to the smallest potential size configured in the + # environment + large_page_size = self._get_xml_hugepage_size(server['id']) + minimum_pagesize_threshold = \ + min(CONF.whitebox_hardware.configured_hugepage_sizes) + self.assertTrue( + large_page_size >= minimum_pagesize_threshold, + "Pagesize found %s should be greater than or equal to pagesize " + "of %s for server %s" % + (large_page_size, minimum_pagesize_threshold, server['id']) + ) + + # Resize the guest using a flavor with hw:mem_page_size:small, + # memory backing element should not be present on guest currently so + # no need for XML verification + flavor_b = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': 'small'}) + self.resize_server(server['id'], flavor_b['id']) + + # Resize instance back to staring flavor size and repeat XML check of + # the guest + self.resize_server(server['id'], flavor_a['id']) + large_page_size = self._get_xml_hugepage_size(server['id']) + self.assertTrue( + large_page_size >= minimum_pagesize_threshold, + "After resizing back to original flavor, pagesize found %s should " + "be greater than or equal to pagesize of %s for server %s" % + (large_page_size, minimum_pagesize_threshold, server['id']) + ) + + def test_hugepage_resize_size_to_small(self): + """Resize a guest with a specified hugepage size to small hugepages + + Create a guest using a flavor with using an explicit hugepage size(s), + based on what is configured in whitebox_hardware. Resize the guest to a + flavor with hw:mem_page_size:small, and then resize it back to the + original flavor. Repeat this process for every hugepage size configured + in in whitebox_hardware.configured_hugepage_sizes + """ + flavor_small = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': 'small'}) + + # Create a flavor and launch an instance based on every configured + # hugepage size in the deployment. + for page_size in CONF.whitebox_hardware.configured_hugepage_sizes: + flavor_a = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': str(page_size)}) + + server = self.create_test_server(flavor=flavor_a['id'], + wait_until='ACTIVE') + + size_found = self._get_xml_hugepage_size(server['id']) + self.assertTrue( + page_size == size_found, + "Expected pagesize of %s not found on server %s instead " + "found %s" % (page_size, server['id'], size_found) + ) + + # Resize the guest using a flavor with hw:mem_page_size:small, + # memory backing will not be present in with guest so follow up + # XML verification is not necessary + self.resize_server(server['id'], flavor_small['id']) + + # Resize back to its original size and confirm memory backing + # element is present and has the correct size + self.resize_server(server['id'], flavor_a['id']) + size_found = self._get_xml_hugepage_size(server['id']) + self.assertTrue( + page_size == size_found, + "Expected pagesize of %s not found on server %s after " + "resizing back to original flavor size, instead found %s" % + (page_size, server['id'], size_found) + ) + + self.delete_server(server['id']) + + @testtools.skipUnless( + len(CONF.whitebox_hardware.configured_hugepage_sizes) > 1, + 'Need at least 2 configured hugepage sizes to execute test') + def test_hugepage_resize_size_to_size(self): + """Resize a guest with a specified hugepage size to another size + + Create two flavors based on the two provided hugepage sizes. The + flavors created use explicit sizes Create a + server using the first flavor, resize the guest to the second flavor, + and resize back to the original spec + """ + start_size, target_size = \ + CONF.whitebox_hardware.configured_hugepage_sizes + + flavor_a = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': str(start_size)}) + + server = self.create_test_server(flavor=flavor_a['id'], + wait_until='ACTIVE') + + size_found = self._get_xml_hugepage_size(server['id']) + self.assertTrue( + start_size == size_found, + "Expected pagesize of %s not found on server %s instead " + "found %s" % (start_size, server['id'], size_found) + ) + + flavor_b = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': str(target_size)}) + + # Resize to the target size and confirm memory backing element is + # present and has the correct size + self.resize_server(server['id'], flavor_b['id']) + size_found = self._get_xml_hugepage_size(server['id']) + self.assertTrue( + target_size == size_found, + "Expected pagesize of %s not found on server %s after resize " + "instead found %s" % (target_size, server['id'], size_found) + ) + + # Resize back to its original size and confirm memory backing + # element is present and has the correct size + self.resize_server(server['id'], flavor_a['id']) + size_found = self._get_xml_hugepage_size(server['id']) + + self.assertTrue( + start_size == size_found, + "Expected pagesize of %s not found on server %s after resizing " + "back to original flavor size, instead found %s" % + (start_size, server['id'], size_found) + ) diff --git a/whitebox_tempest_plugin/config.py b/whitebox_tempest_plugin/config.py index 8394f415..40fd9cbb 100644 --- a/whitebox_tempest_plugin/config.py +++ b/whitebox_tempest_plugin/config.py @@ -133,7 +133,12 @@ general_opts = [ 'libvirt_hw_machine_type', default='pc', choices=["pc", "q35"], - help='The machine type configured for the nova computes') + help='The machine type configured for the nova computes'), + cfg.IntOpt( + 'hugepage_guest_ram_size', + default=64, + help="RAM size in MB to use when launching the guests backed " + "by hugepages."), ] nova_compute_group = cfg.OptGroup( @@ -224,6 +229,12 @@ hardware_opts = [ '. For example, if NUMA node 0 has ' 'CPUs 0 and 1, and NUMA node 1 has CPUs 2 and 3, the value to ' 'set would be `0: [0,1], 1: [2, 3]`.'), + cfg.Opt( + 'configured_hugepage_sizes', + type=types.List(types.Integer()), + default=[], + help='List of configured hugepage sizes available in kB in the ' + 'environment e.g. 2048,1048576'), cfg.IntOpt( 'dedicated_cpus_per_numa', default=0,