Skip to content

Commit

Permalink
Enable start/stop of instances with accelerators.
Browse files Browse the repository at this point in the history
. Do not delete accelerator requests in stop code paths.
. In the start code path, get the list of accelerator requests from
  Cyborg in the compute manager 'power_on'.
. Pass accel_info (said list) to the virt driver power_on.
. In libvirt driver, pass that accel_info to driver power_on.

Change-Id: I8c94504b87aa4450d163fe2b33f6aa0eb5dae5ff
Blueprint: nova-cyborg-interaction
  • Loading branch information
ns-sundar committed Mar 25, 2020
1 parent b5527c0 commit 536d42d
Show file tree
Hide file tree
Showing 12 changed files with 66 additions and 14 deletions.
3 changes: 2 additions & 1 deletion nova/compute/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3099,9 +3099,10 @@ def _power_on(self, context, instance):
network_info = self.network_api.get_instance_nw_info(context, instance)
block_device_info = self._get_instance_block_device_info(context,
instance)
accel_info = self._get_accel_info(context, instance)
self.driver.power_on(context, instance,
network_info,
block_device_info)
block_device_info, accel_info)

def _delete_snapshot_of_shelved_instance(self, context, instance,
snapshot_id):
Expand Down
28 changes: 27 additions & 1 deletion nova/tests/functional/test_servers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7782,8 +7782,10 @@ def setUp(self):
self._setup_compute_nodes_and_device_rps()

def _setup_compute_nodes_and_device_rps(self):
self.compute_services = []
for i in range(self.NUM_HOSTS):
self._start_compute(host='accel_host' + str(i))
svc = self._start_compute(host='accel_host' + str(i))
self.compute_services.append(svc)
self.compute_rp_uuids = [
rp['uuid'] for rp in self._get_all_providers()
if rp['uuid'] == rp['root_provider_uuid']]
Expand Down Expand Up @@ -7947,6 +7949,30 @@ def throw_error(*args, **kwargs):
# Verify that no allocations/usages remain after deletion
self._check_no_allocs_usage(server_uuid)

def test_create_server_with_local_delete(self):
"""Delete the server when compute service is down."""
server = self._get_server()
server_uuid = server['id']

# Stop the server.
self.api.post_server_action(server_uuid, {'os-stop': {}})
self._wait_for_state_change(server, 'SHUTOFF')
self._check_allocations_usage(server)
# Stop and force down the compute service.
compute_id = self.admin_api.get_services(
host='accel_host0', binary='nova-compute')[0]['id']
self.compute_services[0].stop()
self.admin_api.put_service(compute_id, {'forced_down': 'true'})

# Delete the server with compute service down.
self.api.delete_server(server_uuid)
self.cyborg.mock_del_arqs.assert_called_once_with(server_uuid)
self._check_no_allocs_usage(server_uuid)

# Restart the compute service to see if anything fails.
self.admin_api.put_service(compute_id, {'forced_down': 'false'})
self.compute_services[0].start()


class AcceleratorServerReschedTest(AcceleratorServerBase):

Expand Down
21 changes: 20 additions & 1 deletion nova/tests/unit/compute/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -2493,7 +2493,7 @@ def test_power_on(self):
called = {'power_on': False}

def fake_driver_power_on(self, context, instance, network_info,
block_device_info):
block_device_info, accel_device_info=None):
called['power_on'] = True

self.stub_out('nova.virt.fake.FakeDriver.power_on',
Expand All @@ -2512,6 +2512,25 @@ def fake_driver_power_on(self, context, instance, network_info,
self.assertTrue(called['power_on'])
self.compute.terminate_instance(self.context, inst_obj, [])

@mock.patch.object(compute_manager.ComputeManager,
'_get_instance_block_device_info')
@mock.patch('nova.network.neutron.API.get_instance_nw_info')
@mock.patch.object(fake.FakeDriver, 'power_on')
@mock.patch('nova.accelerator.cyborg._CyborgClient.get_arqs_for_instance')
def test_power_on_with_accels(self, mock_get_arqs,
mock_power_on, mock_nw_info, mock_blockdev):
instance = self._create_fake_instance_obj()
instance.flavor.extra_specs = {'accel:device_profile': 'mydp'}
accel_info = [{'k1': 'v1', 'k2': 'v2'}]
mock_get_arqs.return_value = accel_info
mock_nw_info.return_value = 'nw_info'
mock_blockdev.return_value = 'blockdev_info'

self.compute._power_on(self.context, instance)
mock_get_arqs.assert_called_once_with(instance['uuid'])
mock_power_on.assert_called_once_with(self.context,
instance, 'nw_info', 'blockdev_info', accel_info)

def test_power_off(self):
# Ensure instance can be powered off.

Expand Down
6 changes: 5 additions & 1 deletion nova/virt/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,10 +889,14 @@ def power_off(self, instance, timeout=0, retry_interval=0):
raise NotImplementedError()

def power_on(self, context, instance, network_info,
block_device_info=None):
block_device_info=None, accel_info=None):
"""Power on the specified instance.
:param instance: nova.objects.instance.Instance
:param network_info: instance network information
:param block_device_info: instance volume block device info
:param accel_info: List of accelerator request dicts. The exact
data struct is doc'd in nova/virt/driver.py::spawn().
"""
raise NotImplementedError()

Expand Down
2 changes: 1 addition & 1 deletion nova/virt/fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def power_off(self, instance, timeout=0, retry_interval=0):
raise exception.InstanceNotFound(instance_id=instance.uuid)

def power_on(self, context, instance, network_info,
block_device_info=None):
block_device_info=None, accel_info=None):
if instance.uuid in self.instances:
self.instances[instance.uuid].state = power_state.RUNNING
else:
Expand Down
2 changes: 1 addition & 1 deletion nova/virt/hyperv/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def power_off(self, instance, timeout=0, retry_interval=0):
self._vmops.power_off(instance, timeout, retry_interval)

def power_on(self, context, instance, network_info,
block_device_info=None):
block_device_info=None, accel_info=None):
self._vmops.power_on(instance, block_device_info, network_info)

def resume_state_on_host_boot(self, context, instance, network_info,
Expand Down
5 changes: 3 additions & 2 deletions nova/virt/ironic/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1474,7 +1474,7 @@ def power_off(self, instance, timeout=0, retry_interval=0):
node.uuid, instance=instance)

def power_on(self, context, instance, network_info,
block_device_info=None):
block_device_info=None, accel_info=None):
"""Power on the specified instance.

NOTE: Unlike the libvirt driver, this method does not delete
Expand All @@ -1486,7 +1486,8 @@ def power_on(self, context, instance, network_info,
this driver.
:param block_device_info: Instance block device
information. Ignored by this driver.
:param accel_info: List of accelerator requests for this instance.
Ignored by this driver.
"""
LOG.debug('Power on called for instance', instance=instance)
node = self._validate_instance_and_node(instance)
Expand Down
5 changes: 3 additions & 2 deletions nova/virt/libvirt/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3297,12 +3297,13 @@ def power_off(self, instance, timeout=0, retry_interval=0):
self._destroy(instance)

def power_on(self, context, instance, network_info,
block_device_info=None):
block_device_info=None, accel_info=None):
"""Power on the specified instance."""
# We use _hard_reboot here to ensure that all backing files,
# network, and block device connections, etc. are established
# and available before we attempt to start the instance.
self._hard_reboot(context, instance, network_info, block_device_info)
self._hard_reboot(context, instance, network_info, block_device_info,
accel_info)

def trigger_crash_dump(self, instance):

Expand Down
2 changes: 1 addition & 1 deletion nova/virt/powervm/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ def power_off(self, instance, timeout=0, retry_interval=0):
timeout=timeout)

def power_on(self, context, instance, network_info,
block_device_info=None):
block_device_info=None, accel_info=None):
"""Power on the specified instance.

:param instance: nova.objects.instance.Instance
Expand Down
2 changes: 1 addition & 1 deletion nova/virt/vmwareapi/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ def power_off(self, instance, timeout=0, retry_interval=0):
self._vmops.power_off(instance, timeout, retry_interval)

def power_on(self, context, instance, network_info,
block_device_info=None):
block_device_info=None, accel_info=None):
"""Power on the specified instance."""
self._vmops.power_on(instance)

Expand Down
2 changes: 1 addition & 1 deletion nova/virt/xenapi/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def power_off(self, instance, timeout=0, retry_interval=0):
self._vmops.power_off(instance)

def power_on(self, context, instance, network_info,
block_device_info=None):
block_device_info=None, accel_info=None):
"""Power on the specified instance."""
self._vmops.power_on(instance)

Expand Down
2 changes: 1 addition & 1 deletion nova/virt/zvm/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ def power_off(self, instance, timeout=0, retry_interval=0):
self._hypervisor.guest_softstop(instance.name)

def power_on(self, context, instance, network_info,
block_device_info=None):
block_device_info=None, accel_info=None):
self._hypervisor.guest_start(instance.name)

def pause(self, instance):
Expand Down

0 comments on commit 536d42d

Please sign in to comment.