Skip to content

Commit

Permalink
Merge "Follow-up for NUMA live migration functional tests"
Browse files Browse the repository at this point in the history
  • Loading branch information
Zuul authored and openstack-gerrit committed May 5, 2020
2 parents 8461c9c + ca8f1f4 commit 6ba06e2
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 53 deletions.
8 changes: 8 additions & 0 deletions nova/tests/functional/integrated_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,14 @@ def _resize_server(self, server, new_flavor):
}
self._migrate_or_resize(server, resize_req)

def _live_migrate(self, server, migration_final_status):
self.api.post_server_action(
server['id'],
{'os-migrateLive': {'host': None,
'block_migration': 'auto'}})
self._wait_for_state_change(server, 'ACTIVE')
self._wait_for_migration_status(server, [migration_final_status])


class _IntegratedTestBase(test.TestCase, InstanceHelperMixin):
REQUIRES_LOCKING = True
Expand Down
122 changes: 69 additions & 53 deletions nova/tests/functional/libvirt/test_numa_live_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from nova.compute import manager as compute_manager
from nova import context
from nova import objects
from nova import test
from nova.tests.functional import integrated_helpers
from nova.tests.functional.libvirt import base
from nova.tests.unit.virt.libvirt import fake_os_brick_connector
Expand Down Expand Up @@ -66,37 +67,35 @@ def setUp(self):
'nova.virt.libvirt.driver.connector',
fake_os_brick_connector))

def _migrate_stub(self, domain, destination, params, flags):
raise test.TestingException('_migrate_stub() must be implemented in '
' tests that expect the live migration '
' to start.')

def get_host(self, server_id):
server = self.api.get_server(server_id)
return server['OS-EXT-SRV-ATTR:host']

def _get_host_numa_topology(self, host):
ctxt = context.get_admin_context()
return objects.NUMATopology.obj_from_db_obj(
objects.ComputeNode.get_by_nodename(ctxt, host).numa_topology)

def _assert_no_migration_context(self, instance_uuid):
ctxt = context.get_admin_context()
self.assertFalse(
objects.MigrationContext.get_by_instance_uuid(ctxt, instance_uuid))

def _assert_has_migration_context(self, instance_uuid):
def _get_migration_context(self, instance_uuid):
ctxt = context.get_admin_context()
self.assertTrue(
objects.MigrationContext.get_by_instance_uuid(ctxt, instance_uuid))
return objects.MigrationContext.get_by_instance_uuid(ctxt,
instance_uuid)

def _assert_instance_pinned_cpus(self, uuid, instance_cpus, host_cpus):
ctxt = context.get_admin_context()
topology = objects.InstanceNUMATopology.get_by_instance_uuid(
ctxt, uuid)
self.assertEqual(1, len(topology.cells))
self.assertItemsEqual(instance_cpus,
# NOTE(artom) DictOfIntegersField has strings as keys, need to convert
self.assertItemsEqual([str(cpu) for cpu in instance_cpus],
topology.cells[0].cpu_pinning_raw.keys())
self.assertItemsEqual(host_cpus,
topology.cells[0].cpu_pinning_raw.values())

def _assert_host_consumed_cpus(self, host, cpus):
topology = self._get_host_numa_topology(host)
ctxt = context.get_admin_context()
topology = objects.NUMATopology.obj_from_db_obj(
objects.ComputeNode.get_by_nodename(ctxt, host).numa_topology)
self.assertItemsEqual(cpus, topology.cells[0].pinned_cpus)


Expand Down Expand Up @@ -132,17 +131,12 @@ def start_computes_and_servers(self):
# CPUs 0,1.
for server_name, host in [('server_a', 'host_a'),
('server_b', 'host_b')]:
server = self._build_server(
flavor_id=flavor,
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6')
server.update({'networks': 'none',
'host': host})
post = {'server': server}
server = self.api.post_server(post)
server = self._create_server(flavor_id=flavor, host=host,
networks='none')
setattr(self, server_name,
self._wait_for_state_change(server, 'ACTIVE'))
self.assertEqual(host, self.get_host(server['id']))
self._assert_instance_pinned_cpus(server['id'], ['0', '1'], [0, 1])
self._assert_instance_pinned_cpus(server['id'], [0, 1], [0, 1])

def _rpc_pin_host(self, hostname):
ctxt = context.get_admin_context()
Expand All @@ -153,14 +147,6 @@ def _rpc_pin_host(self, hostname):
dest_mgr.compute_rpcapi.router.client(
ctxt).can_send_version('5.3'))

def _live_migrate(self, server, migration_final_status):
self.api.post_server_action(
server['id'],
{'os-migrateLive': {'host': None,
'block_migration': 'auto'}})
self._wait_for_state_change(server, 'ACTIVE')
self._wait_for_migration_status(server, [migration_final_status])


class NUMALiveMigrationPositiveTests(NUMALiveMigrationPositiveBase):
"""Tests that expect the live migration to succeed. Stubs out fakelibvirt's
Expand All @@ -177,14 +163,23 @@ def _migrate_stub(self, domain, destination, params, flags):
until this method is done, the last thing we do is make fakelibvirt's
Domain.jobStats() return VIR_DOMAIN_JOB_COMPLETED.
"""
self._assert_has_migration_context(self.server_a['id'])
self.assertIsInstance(
self._get_migration_context(self.server_a['id']),
objects.MigrationContext)

# During the migration, server_a is consuming CPUs 0,1 on host_a, while
# all 4 of host_b's CPU are consumed by server_b and the incoming
# migration.
self._assert_host_consumed_cpus('host_a', [0, 1])
self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3])

host_a_rp = self._get_provider_uuid_by_name('host_a')
host_b_rp = self._get_provider_uuid_by_name('host_b')
usages_a = self._get_provider_usages(host_a_rp)
usages_b = self._get_provider_usages(host_b_rp)
self.assertEqual(2, usages_a['PCPU'])
self.assertEqual(4, usages_b['PCPU'])

# In a real live migration, libvirt and QEMU on the source and
# destination talk it out, resulting in the instance starting to exist
# on the destination. Fakelibvirt cannot do that, so we have to
Expand Down Expand Up @@ -230,7 +225,7 @@ def _test(self, pin_dest=False):
self._rpc_pin_host('host_b')
self._live_migrate(self.server_a, 'completed')
self.assertEqual('host_b', self.get_host(self.server_a['id']))
self._assert_no_migration_context(self.server_a['id'])
self.assertIsNone(self._get_migration_context(self.server_a['id']))

# At this point host_a should have no CPUs consumed (server_a has moved
# to host_b), and host_b should have all of its CPUs consumed. In
Expand All @@ -241,14 +236,14 @@ def _test(self, pin_dest=False):
self._assert_host_consumed_cpus('host_a', [])
self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3])
self._assert_instance_pinned_cpus(self.server_a['id'],
['0', '1'], [2, 3])
[0, 1], [2, 3])

self._run_periodics()

self._assert_host_consumed_cpus('host_a', [])
self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3])
self._assert_instance_pinned_cpus(self.server_a['id'],
['0', '1'], [2, 3])
[0, 1], [2, 3])

self.assertTrue(self.migrate_stub_ran)

Expand All @@ -262,8 +257,22 @@ def test_numa_live_migration_dest_pinned(self):
self._test(pin_dest=True)

def test_bug_1843639(self):
orig_live_migration = \
compute_manager.ComputeManager.live_migration
"""Live migrations in 'accepted' status were not considered in progress
before the fix for 1845146 merged, and were ignored by the update
available resources periodic task. From the task's POV, live-migrating
instances with migration status 'accepted' were considered to be on the
source, and any resource claims on the destination would get
erroneously removed. For that to happen, the task had to run at just
the "right" time, when the migration was in 'accepted' and had not yet
been moved to 'queued' by live_migration() in the compute manager.
This test triggers this race by wrapping around live_migration() and
running the update available resources periodic task while the
migration is still in 'accepted'.
"""

self.live_migration_ran = False
orig_live_migration = compute_manager.ComputeManager.live_migration

def live_migration(*args, **kwargs):
self._run_periodics()
Expand All @@ -272,12 +281,22 @@ def live_migration(*args, **kwargs):
# incoming # migration.
self._assert_host_consumed_cpus('host_a', [0, 1])
self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3])

# The migration should also be in 'accepted' at this point in time.
ctxt = context.get_admin_context()
self.assertIsInstance(
objects.Migration.get_by_instance_and_status(
ctxt, self.server_a['id'], 'accepted'),
objects.Migration)

self.live_migration_ran = True
return orig_live_migration(*args, **kwargs)

self.useFixture(fixtures.MonkeyPatch(
'nova.compute.manager.ComputeManager.live_migration',
live_migration))
self._test()
self.assertTrue(self.live_migration_ran)


class NUMALiveMigrationRollbackTests(NUMALiveMigrationPositiveBase):
Expand All @@ -290,7 +309,9 @@ def _migrate_stub(self, domain, destination, params, flags):
"""Designed to stub fakelibvirt's migrateToURI3 and "fail" the
live migration by monkeypatching jobStats() to return an error.
"""
self._assert_has_migration_context(self.server_a['id'])
self.assertIsInstance(
self._get_migration_context(self.server_a['id']),
objects.MigrationContext)

# During the migration, server_a is consuming CPUs 0,1 on host_a, while
# all 4 of host_b's CPU are consumed by server_b and the incoming
Expand Down Expand Up @@ -332,15 +353,15 @@ def _test(self, pin_dest=False):
self._rpc_pin_host('host_b')
self._live_migrate(self.server_a, 'error')
self.assertEqual('host_a', self.get_host(self.server_a['id']))
self._assert_no_migration_context(self.server_a['id'])
self.assertIsNone(self._get_migration_context(self.server_a['id']))

# Check consumed and pinned CPUs. Things should be as they were before
# the live migration, with CPUs 0,1 consumed on both hosts by the 2
# servers.
self._assert_host_consumed_cpus('host_a', [0, 1])
self._assert_host_consumed_cpus('host_b', [0, 1])
self._assert_instance_pinned_cpus(self.server_a['id'],
['0', '1'], [0, 1])
[0, 1], [0, 1])

def test_rollback(self):
self._test()
Expand Down Expand Up @@ -403,15 +424,8 @@ def _test(self, pin_source, pin_cond, expect_success=True):
extra_spec = {'hw:numa_nodes': 1,
'hw:cpu_policy': 'dedicated'}
flavor = self._create_flavor(vcpu=2, extra_spec=extra_spec)
server = self._build_server(
flavor_id=flavor,
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6')
server['networks'] = 'none'
post = {'server': server}
server1 = self.api.post_server(post)
server2 = self.api.post_server(post)
self._wait_for_state_change(server1, 'ACTIVE')
self._wait_for_state_change(server2, 'ACTIVE')
server1 = self._create_server(flavor_id=flavor, networks='none')
server2 = self._create_server(flavor_id=flavor, networks='none')
if self.get_host(server1['id']) == 'source':
self.migrating_server = server1
else:
Expand Down Expand Up @@ -445,7 +459,8 @@ def _migrate_stub(self, domain, destination, params, flags):
# NOTE(artom) This is the crucial bit: by asserting that the migrating
# instance has no migration context, we're making sure that we're
# hitting the old, pre-claims code paths.
self._assert_no_migration_context(self.migrating_server['id'])
self.assertIsNone(
self._get_migration_context(self.migrating_server['id']))
dest = self.computes['dest']
dest.driver._host.get_connection().createXML(
params['destination_xml'],
Expand Down Expand Up @@ -475,7 +490,8 @@ def _migrate_stub(self, domain, destination, params, flags):
# NOTE(artom) This is the crucial bit: by asserting that the migrating
# instance has no migration context, we're making sure that we're
# hitting the old, pre-claims code paths.
self._assert_no_migration_context(self.migrating_server['id'])
self.assertIsNone(
self._get_migration_context(self.migrating_server['id']))
source = self.computes['source']
conn = source.driver._host.get_connection()
dom = conn.lookupByUUIDString(self.migrating_server['id'])
Expand Down Expand Up @@ -531,7 +547,7 @@ def test_insufficient_resources(self):
check_response_status=[500])
self._wait_for_state_change(server, 'ACTIVE')
self._wait_for_migration_status(server, ['error'])
self._assert_no_migration_context(server['id'])
self.assertIsNone(self._get_migration_context(server['id']))
self.assertEqual('host_a', self.get_host(server['id']))
log_out = self.stdlog.logger.output
self.assertIn('Migration pre-check error: '
Expand Down Expand Up @@ -577,7 +593,7 @@ def test_different_page_sizes(self):
self._wait_for_state_change(server, 'ACTIVE')
self._wait_for_migration_status(server, ['error'])
self.assertEqual(initial_host, self.get_host(server['id']))
self._assert_no_migration_context(server['id'])
self.assertIsNone(self._get_migration_context(server['id']))
log_out = self.stdlog.logger.output
self.assertIn('Migration pre-check error: '
'Insufficient compute resources: '
Expand Down

0 comments on commit 6ba06e2

Please sign in to comment.