Skip to content

Commit

Permalink
Merge "Expose instance action event details out of the API"
Browse files Browse the repository at this point in the history
  • Loading branch information
Zuul authored and openstack-gerrit committed Mar 30, 2020
2 parents 403fc67 + 8337bee commit 5497365
Show file tree
Hide file tree
Showing 15 changed files with 381 additions and 7 deletions.
2 changes: 1 addition & 1 deletion doc/api_samples/versions/v21-version-get-resp.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
}
],
"status": "CURRENT",
"version": "2.83",
"version": "2.84",
"min_version": "2.1",
"updated": "2013-07-23T11:33:21Z"
}
Expand Down
2 changes: 1 addition & 1 deletion doc/api_samples/versions/versions-get-resp.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
}
],
"status": "CURRENT",
"version": "2.83",
"version": "2.84",
"min_version": "2.1",
"updated": "2013-07-23T11:33:21Z"
}
Expand Down
3 changes: 2 additions & 1 deletion nova/api/openstack/api_version_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@
from Cyborg with ``GET /v2/accelerator_requests?instance={uuid}``
* 2.83 - Allow more filter parameters for ``GET /servers/detail`` and
``GET /servers`` for non-admin.
* 2.84 - Adds ``details`` field to instance action events.
"""

# The minimum and maximum versions of the API supported
Expand All @@ -234,7 +235,7 @@
# Note(cyeoh): This only applies for the v2.1 API once microversions
# support is fully merged. It does not affect the V2 API.
_MIN_API_VERSION = "2.1"
_MAX_API_VERSION = "2.83"
_MAX_API_VERSION = "2.84"
DEFAULT_API_VERSION = _MIN_API_VERSION

# Almost all proxy APIs which are related to network, images and baremetal
Expand Down
19 changes: 16 additions & 3 deletions nova/api/openstack/compute/instance_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ def _format_action(self, action_raw, action_keys):
action[key] = action_raw.get(key)
return action

def _format_event(self, event_raw, project_id, show_traceback=False,
show_host=False, show_hostid=False):
@staticmethod
def _format_event(event_raw, project_id, show_traceback=False,
show_host=False, show_hostid=False, show_details=False):
event = {}
for key in EVENT_KEYS:
# By default, non-admins are not allowed to see traceback details.
Expand All @@ -67,6 +68,8 @@ def _format_event(self, event_raw, project_id, show_traceback=False,
if show_hostid:
event['hostId'] = utils.generate_hostid(event_raw['host'],
project_id)
if show_details:
event['details'] = event_raw['details']
return event

@wsgi.Controller.api_version("2.1", "2.20")
Expand Down Expand Up @@ -178,6 +181,15 @@ def show(self, req, server_id, id):
show_hostid = api_version_request.is_supported(req, '2.62')

if show_events:
# NOTE(brinzhang): Event details are shown since microversion
# 2.84.
show_details = False
support_v284 = api_version_request.is_supported(req, '2.84')
if support_v284:
show_details = context.can(
ia_policies.BASE_POLICY_NAME % 'events:details',
target={'project_id': instance.project_id}, fatal=False)

events_raw = self.action_api.action_events_get(context, instance,
action_id)
# NOTE(takashin): The project IDs of instance action events
Expand All @@ -189,6 +201,7 @@ def show(self, req, server_id, id):
action['events'] = [self._format_event(
evt, action['project_id'] or instance.project_id,
show_traceback=show_traceback,
show_host=show_host, show_hostid=show_hostid
show_host=show_host, show_hostid=show_hostid,
show_details=show_details
) for evt in events_raw]
return {'instanceAction': action}
8 changes: 8 additions & 0 deletions nova/api/openstack/compute/rest_api_version_history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1105,3 +1105,11 @@ and ``GET /servers`` for non-admin :
* ``vm_state``
* ``progress``
* ``user_id``

2.84
----

The ``GET /servers/{server_id}/os-instance-actions/{request_id}`` API returns
a ``details`` parameter for each failed event with a fault message, similar to
the server ``fault.message`` parameter in ``GET /servers/{server_id}`` for a
server with status ``ERROR``.
21 changes: 21 additions & 0 deletions nova/policies/instance_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,27 @@
"""

instance_actions_policies = [
policy.DocumentedRuleDefault(
name=BASE_POLICY_NAME % 'events:details',
check_str=base.SYSTEM_READER,
description="""Add "details" key in action events for a server.
This check is performed only after the check
os_compute_api:os-instance-actions:show passes. Beginning with Microversion
2.84, new field 'details' is exposed via API which can have more details about
event failure. That field is controlled by this policy which is system reader
by default. Making the 'details' field visible to the non-admin user helps to
understand the nature of the problem (i.e. if the action can be retried),
but in the other hand it might leak information about the deployment
(e.g. the type of the hypervisor).
""",
operations=[
{
'method': 'GET',
'path': '/servers/{server_id}/os-instance-actions/{request_id}'
}
],
scope_types=['system']),
policy.DocumentedRuleDefault(
name=BASE_POLICY_NAME % 'events',
check_str=base.SYSTEM_READER,
Expand Down
2 changes: 2 additions & 0 deletions nova/tests/functional/integrated_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,12 @@ def _assert_resize_migrate_action_fail(self, server, action, error_in_tb):
:param server: API response dict of the server being resized/migrated
:param action: Either "resize" or "migrate" instance action.
:param error_in_tb: Some expected part of the error event traceback.
:returns: The instance action event dict from the API response
"""
event = self._wait_for_action_fail_completion(
server, action, 'conductor_migrate_server')
self.assertIn(error_in_tb, event['traceback'])
return event

def _wait_for_migration_status(self, server, expected_statuses):
"""Waits for a migration record with the given statuses to be found
Expand Down
222 changes: 222 additions & 0 deletions nova/tests/functional/test_instance_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,19 @@
# License for the specific language governing permissions and limitations
# under the License.

import mock
from oslo_policy import policy as oslo_policy

from nova import exception
from nova import policy
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional.api import client
from nova.tests.functional import fixtures as func_fixtures
from nova.tests.functional import integrated_helpers
from nova.tests.functional import test_servers
from nova.tests.unit.image import fake as fake_image
from nova.tests.unit import policy_fixture


class InstanceActionsTestV2(test_servers.ServersTestBase):
Expand Down Expand Up @@ -49,3 +60,214 @@ def test_get_instance_actions_deleted(self):
actions = self.api.get_instance_actions(server['id'])
self.assertEqual('delete', actions[0]['action'])
self.assertEqual('create', actions[1]['action'])


class HypervisorError(Exception):
"""This is just used to make sure the exception type is in the events."""
pass


class InstanceActionEventFaultsTestCase(
test.TestCase, integrated_helpers.InstanceHelperMixin):
"""Tests for the instance action event details reporting from the API"""

def setUp(self):
super(InstanceActionEventFaultsTestCase, self).setUp()
# Setup the standard fixtures.
fake_image.stub_out_image_service(self)
self.addCleanup(fake_image.FakeImageService_reset)
self.useFixture(nova_fixtures.NeutronFixture(self))
self.useFixture(func_fixtures.PlacementFixture())
self.useFixture(policy_fixture.RealPolicyFixture())

# Start the compute services.
self.start_service('conductor')
self.start_service('scheduler')
self.compute = self.start_service('compute')
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
api_version='v2.1'))
self.api = api_fixture.api
self.admin_api = api_fixture.admin_api

def _set_policy_rules(self, overwrite=True):
rules = {'os_compute_api:os-instance-actions:show': '',
'os_compute_api:os-instance-actions:events:details':
'project_id:%(project_id)s'}
policy.set_rules(oslo_policy.Rules.from_dict(rules),
overwrite=overwrite)

def test_instance_action_event_details_non_nova_exception(self):
"""Creates a server using the non-admin user, then reboot it which
will generate a non-NovaException fault and put the instance into
ERROR status. Then checks that fault details are visible.
"""

# Create the server with the non-admin user.
server = self._build_server(
networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}])
server = self.api.post_server({'server': server})
server = self._wait_for_state_change(server, 'ACTIVE')

# Stop the server before rebooting it so that after the driver.reboot
# method raises an exception, the fake driver does not report the
# instance power state as running - that will make the compute manager
# set the instance vm_state to error.
self.api.post_server_action(server['id'], {'os-stop': None})
server = self._wait_for_state_change(server, 'SHUTOFF')

# Stub out the compute driver reboot method to raise a non-nova
# exception to simulate some error from the underlying hypervisor
# which in this case we are going to say has sensitive content.
error_msg = 'sensitive info'
with mock.patch.object(
self.compute.manager.driver, 'reboot',
side_effect=HypervisorError(error_msg)) as mock_reboot:
reboot_request = {'reboot': {'type': 'HARD'}}
self.api.post_server_action(server['id'], reboot_request)
# In this case we wait for the status to change to ERROR using
# the non-admin user so we can assert the fault details. We also
# wait for the task_state to be None since the wrap_instance_fault
# decorator runs before the reverts_task_state decorator so we will
# be sure the fault is set on the server.
server = self._wait_for_server_parameter(
server, {'status': 'ERROR', 'OS-EXT-STS:task_state': None},
api=self.api)
mock_reboot.assert_called_once()

self._set_policy_rules(overwrite=False)

server_id = server['id']
# Calls GET on the server actions and verifies that the reboot
# action expected in the response.
response = self.api.api_get('/servers/%s/os-instance-actions' %
server_id)
server_actions = response.body['instanceActions']
for actions in server_actions:
if actions['action'] == 'reboot':
reboot_request_id = actions['request_id']
# non admin shows instance actions details and verifies the 'details'
# in the action events via 'request_id', since microversion 2.51 that
# we can show events, but in microversion 2.84 that we can show
# 'details' for non-admin.
self.api.microversion = '2.84'
action_events_response = self.api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
# Since reboot action failed, the 'message' property in reboot action
# should be 'Error', otherwise it's None.
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The instance action events from the non-admin user API response
# should not have 'traceback' in it.
self.assertNotIn('traceback', reboot_action_events[0])
# And the sensitive details from the non-nova exception should not be
# in the details.
self.assertIn('details', reboot_action_events[0])
self.assertNotIn(error_msg, reboot_action_events[0]['details'])
# The exception type class name should be in the details.
self.assertIn('HypervisorError', reboot_action_events[0]['details'])

# Get the server fault details for the admin user.
self.admin_api.microversion = '2.84'
action_events_response = self.admin_api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The admin can see the fault details which includes the traceback,
# and make sure the traceback is there by looking for part of it.
self.assertIn('traceback', reboot_action_events[0])
self.assertIn('in reboot_instance',
reboot_action_events[0]['traceback'])
# The exception type class name should be in the details for the admin
# user as well since the fault handling code cannot distinguish who
# is going to see the message so it only sets class name.
self.assertIn('HypervisorError', reboot_action_events[0]['details'])

def test_instance_action_event_details_with_nova_exception(self):
"""Creates a server using the non-admin user, then reboot it which
will generate a nova exception fault and put the instance into
ERROR status. Then checks that fault details are visible.
"""

# Create the server with the non-admin user.
server = self._build_server(
networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}])
server = self.api.post_server({'server': server})
server = self._wait_for_state_change(server, 'ACTIVE')

# Stop the server before rebooting it so that after the driver.reboot
# method raises an exception, the fake driver does not report the
# instance power state as running - that will make the compute manager
# set the instance vm_state to error.
self.api.post_server_action(server['id'], {'os-stop': None})
server = self._wait_for_state_change(server, 'SHUTOFF')

# Stub out the compute driver reboot method to raise a nova
# exception 'InstanceRebootFailure' to simulate some error.
exc_reason = 'reboot failure'
with mock.patch.object(
self.compute.manager.driver, 'reboot',
side_effect=exception.InstanceRebootFailure(reason=exc_reason)
) as mock_reboot:
reboot_request = {'reboot': {'type': 'HARD'}}
self.api.post_server_action(server['id'], reboot_request)
# In this case we wait for the status to change to ERROR using
# the non-admin user so we can assert the fault details. We also
# wait for the task_state to be None since the wrap_instance_fault
# decorator runs before the reverts_task_state decorator so we will
# be sure the fault is set on the server.
server = self._wait_for_server_parameter(
server, {'status': 'ERROR', 'OS-EXT-STS:task_state': None},
api=self.api)
mock_reboot.assert_called_once()

self._set_policy_rules(overwrite=False)

server_id = server['id']
# Calls GET on the server actions and verifies that the reboot
# action expected in the response.
response = self.api.api_get('/servers/%s/os-instance-actions' %
server_id)
server_actions = response.body['instanceActions']
for actions in server_actions:
if actions['action'] == 'reboot':
reboot_request_id = actions['request_id']

# non admin shows instance actions details and verifies the 'details'
# in the action events via 'request_id', since microversion 2.51 that
# we can show events, but in microversion 2.84 that we can show
# 'details' for non-admin.
self.api.microversion = '2.84'
action_events_response = self.api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
# Since reboot action failed, the 'message' property in reboot action
# should be 'Error', otherwise it's None.
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The instance action events from the non-admin user API response
# should not have 'traceback' in it.
self.assertNotIn('traceback', reboot_action_events[0])
# The nova exception format message should be in the details.
self.assertIn('details', reboot_action_events[0])
self.assertIn(exc_reason, reboot_action_events[0]['details'])

# Get the server fault details for the admin user.
self.admin_api.microversion = '2.84'
action_events_response = self.admin_api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The admin can see the fault details which includes the traceback,
# and make sure the traceback is there by looking for part of it.
self.assertIn('traceback', reboot_action_events[0])
self.assertIn('in reboot_instance',
reboot_action_events[0]['traceback'])
# The nova exception format message should be in the details.
self.assertIn(exc_reason, reboot_action_events[0]['details'])
8 changes: 7 additions & 1 deletion nova/tests/functional/test_servers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1931,10 +1931,16 @@ def test_resize_not_enough_resource(self):

self.api.post_server_action(
server['id'], resize_req, check_response_status=[202])
self._assert_resize_migrate_action_fail(
event = self._assert_resize_migrate_action_fail(
server, instance_actions.RESIZE, 'NoValidHost')
self.assertIn('details', event)
# This test case works in microversion 2.84.
self.assertIn('No valid host was found', event['details'])
server = self.admin_api.get_server(server['id'])
self.assertEqual(source_hostname, server['OS-EXT-SRV-ATTR:host'])
# The server is still ACTIVE and thus there is no fault message.
self.assertEqual('ACTIVE', server['status'])
self.assertNotIn('fault', server)

# only the source host shall have usages after the failed resize
self.assertFlavorMatchesUsage(source_rp_uuid, self.flavor1)
Expand Down
Loading

0 comments on commit 5497365

Please sign in to comment.