Skip to content

Commit

Permalink
Expose instance action event details out of the API
Browse files Browse the repository at this point in the history
This adds a new microversion to expose the instance action
event details in the
GET /servers/{server_id}/os-instance-actions/{request_id} API.

With the new microversion the "details" key is always returned
with each event dict but the value may be null because of old
records or events that did not fail.

The details are not constrained by policy like the traceback
field since the details are like a fault message on the server
resource when the server is in ERROR status and the fault
message is likewise not constraint by policy unlike the fault
details which is a traceback like the event traceback field.

This commit add a SYSTEM_READER ('rule: system_reader_api') role
to the Show Server Action Details API. With this default policy,
events fault details can be displayed. And also add some nova and
non-nova exception functional tests for os-instance-actions API.

Co-Authored-By: Brin Zhang <[email protected]>

Implements blueprint action-event-fault-details
Change-Id: I6fe4dd265b0030ce12f92771b255a3d795f03d01
  • Loading branch information
mriedem and 1049965823 committed Mar 27, 2020
1 parent 1dd760a commit 8337bee
Show file tree
Hide file tree
Showing 15 changed files with 381 additions and 7 deletions.
2 changes: 1 addition & 1 deletion doc/api_samples/versions/v21-version-get-resp.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
}
],
"status": "CURRENT",
"version": "2.83",
"version": "2.84",
"min_version": "2.1",
"updated": "2013-07-23T11:33:21Z"
}
Expand Down
2 changes: 1 addition & 1 deletion doc/api_samples/versions/versions-get-resp.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
}
],
"status": "CURRENT",
"version": "2.83",
"version": "2.84",
"min_version": "2.1",
"updated": "2013-07-23T11:33:21Z"
}
Expand Down
3 changes: 2 additions & 1 deletion nova/api/openstack/api_version_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@
from Cyborg with ``GET /v2/accelerator_requests?instance={uuid}``
* 2.83 - Allow more filter parameters for ``GET /servers/detail`` and
``GET /servers`` for non-admin.
* 2.84 - Adds ``details`` field to instance action events.
"""

# The minimum and maximum versions of the API supported
Expand All @@ -234,7 +235,7 @@
# Note(cyeoh): This only applies for the v2.1 API once microversions
# support is fully merged. It does not affect the V2 API.
_MIN_API_VERSION = "2.1"
_MAX_API_VERSION = "2.83"
_MAX_API_VERSION = "2.84"
DEFAULT_API_VERSION = _MIN_API_VERSION

# Almost all proxy APIs which are related to network, images and baremetal
Expand Down
19 changes: 16 additions & 3 deletions nova/api/openstack/compute/instance_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ def _format_action(self, action_raw, action_keys):
action[key] = action_raw.get(key)
return action

def _format_event(self, event_raw, project_id, show_traceback=False,
show_host=False, show_hostid=False):
@staticmethod
def _format_event(event_raw, project_id, show_traceback=False,
show_host=False, show_hostid=False, show_details=False):
event = {}
for key in EVENT_KEYS:
# By default, non-admins are not allowed to see traceback details.
Expand All @@ -67,6 +68,8 @@ def _format_event(self, event_raw, project_id, show_traceback=False,
if show_hostid:
event['hostId'] = utils.generate_hostid(event_raw['host'],
project_id)
if show_details:
event['details'] = event_raw['details']
return event

@wsgi.Controller.api_version("2.1", "2.20")
Expand Down Expand Up @@ -178,6 +181,15 @@ def show(self, req, server_id, id):
show_hostid = api_version_request.is_supported(req, '2.62')

if show_events:
# NOTE(brinzhang): Event details are shown since microversion
# 2.84.
show_details = False
support_v284 = api_version_request.is_supported(req, '2.84')
if support_v284:
show_details = context.can(
ia_policies.BASE_POLICY_NAME % 'events:details',
target={'project_id': instance.project_id}, fatal=False)

events_raw = self.action_api.action_events_get(context, instance,
action_id)
# NOTE(takashin): The project IDs of instance action events
Expand All @@ -189,6 +201,7 @@ def show(self, req, server_id, id):
action['events'] = [self._format_event(
evt, action['project_id'] or instance.project_id,
show_traceback=show_traceback,
show_host=show_host, show_hostid=show_hostid
show_host=show_host, show_hostid=show_hostid,
show_details=show_details
) for evt in events_raw]
return {'instanceAction': action}
8 changes: 8 additions & 0 deletions nova/api/openstack/compute/rest_api_version_history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1105,3 +1105,11 @@ and ``GET /servers`` for non-admin :
* ``vm_state``
* ``progress``
* ``user_id``

2.84
----

The ``GET /servers/{server_id}/os-instance-actions/{request_id}`` API returns
a ``details`` parameter for each failed event with a fault message, similar to
the server ``fault.message`` parameter in ``GET /servers/{server_id}`` for a
server with status ``ERROR``.
21 changes: 21 additions & 0 deletions nova/policies/instance_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,27 @@
"""

instance_actions_policies = [
policy.DocumentedRuleDefault(
name=BASE_POLICY_NAME % 'events:details',
check_str=base.SYSTEM_READER,
description="""Add "details" key in action events for a server.
This check is performed only after the check
os_compute_api:os-instance-actions:show passes. Beginning with Microversion
2.84, new field 'details' is exposed via API which can have more details about
event failure. That field is controlled by this policy which is system reader
by default. Making the 'details' field visible to the non-admin user helps to
understand the nature of the problem (i.e. if the action can be retried),
but in the other hand it might leak information about the deployment
(e.g. the type of the hypervisor).
""",
operations=[
{
'method': 'GET',
'path': '/servers/{server_id}/os-instance-actions/{request_id}'
}
],
scope_types=['system']),
policy.DocumentedRuleDefault(
name=BASE_POLICY_NAME % 'events',
check_str=base.SYSTEM_READER,
Expand Down
2 changes: 2 additions & 0 deletions nova/tests/functional/integrated_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,12 @@ def _assert_resize_migrate_action_fail(self, server, action, error_in_tb):
:param server: API response dict of the server being resized/migrated
:param action: Either "resize" or "migrate" instance action.
:param error_in_tb: Some expected part of the error event traceback.
:returns: The instance action event dict from the API response
"""
event = self._wait_for_action_fail_completion(
server, action, 'conductor_migrate_server')
self.assertIn(error_in_tb, event['traceback'])
return event

def _wait_for_migration_status(self, server, expected_statuses):
"""Waits for a migration record with the given statuses to be found
Expand Down
222 changes: 222 additions & 0 deletions nova/tests/functional/test_instance_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,19 @@
# License for the specific language governing permissions and limitations
# under the License.

import mock
from oslo_policy import policy as oslo_policy

from nova import exception
from nova import policy
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional.api import client
from nova.tests.functional import fixtures as func_fixtures
from nova.tests.functional import integrated_helpers
from nova.tests.functional import test_servers
from nova.tests.unit.image import fake as fake_image
from nova.tests.unit import policy_fixture


class InstanceActionsTestV2(test_servers.ServersTestBase):
Expand Down Expand Up @@ -49,3 +60,214 @@ def test_get_instance_actions_deleted(self):
actions = self.api.get_instance_actions(server['id'])
self.assertEqual('delete', actions[0]['action'])
self.assertEqual('create', actions[1]['action'])


class HypervisorError(Exception):
"""This is just used to make sure the exception type is in the events."""
pass


class InstanceActionEventFaultsTestCase(
test.TestCase, integrated_helpers.InstanceHelperMixin):
"""Tests for the instance action event details reporting from the API"""

def setUp(self):
super(InstanceActionEventFaultsTestCase, self).setUp()
# Setup the standard fixtures.
fake_image.stub_out_image_service(self)
self.addCleanup(fake_image.FakeImageService_reset)
self.useFixture(nova_fixtures.NeutronFixture(self))
self.useFixture(func_fixtures.PlacementFixture())
self.useFixture(policy_fixture.RealPolicyFixture())

# Start the compute services.
self.start_service('conductor')
self.start_service('scheduler')
self.compute = self.start_service('compute')
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
api_version='v2.1'))
self.api = api_fixture.api
self.admin_api = api_fixture.admin_api

def _set_policy_rules(self, overwrite=True):
rules = {'os_compute_api:os-instance-actions:show': '',
'os_compute_api:os-instance-actions:events:details':
'project_id:%(project_id)s'}
policy.set_rules(oslo_policy.Rules.from_dict(rules),
overwrite=overwrite)

def test_instance_action_event_details_non_nova_exception(self):
"""Creates a server using the non-admin user, then reboot it which
will generate a non-NovaException fault and put the instance into
ERROR status. Then checks that fault details are visible.
"""

# Create the server with the non-admin user.
server = self._build_server(
networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}])
server = self.api.post_server({'server': server})
server = self._wait_for_state_change(server, 'ACTIVE')

# Stop the server before rebooting it so that after the driver.reboot
# method raises an exception, the fake driver does not report the
# instance power state as running - that will make the compute manager
# set the instance vm_state to error.
self.api.post_server_action(server['id'], {'os-stop': None})
server = self._wait_for_state_change(server, 'SHUTOFF')

# Stub out the compute driver reboot method to raise a non-nova
# exception to simulate some error from the underlying hypervisor
# which in this case we are going to say has sensitive content.
error_msg = 'sensitive info'
with mock.patch.object(
self.compute.manager.driver, 'reboot',
side_effect=HypervisorError(error_msg)) as mock_reboot:
reboot_request = {'reboot': {'type': 'HARD'}}
self.api.post_server_action(server['id'], reboot_request)
# In this case we wait for the status to change to ERROR using
# the non-admin user so we can assert the fault details. We also
# wait for the task_state to be None since the wrap_instance_fault
# decorator runs before the reverts_task_state decorator so we will
# be sure the fault is set on the server.
server = self._wait_for_server_parameter(
server, {'status': 'ERROR', 'OS-EXT-STS:task_state': None},
api=self.api)
mock_reboot.assert_called_once()

self._set_policy_rules(overwrite=False)

server_id = server['id']
# Calls GET on the server actions and verifies that the reboot
# action expected in the response.
response = self.api.api_get('/servers/%s/os-instance-actions' %
server_id)
server_actions = response.body['instanceActions']
for actions in server_actions:
if actions['action'] == 'reboot':
reboot_request_id = actions['request_id']
# non admin shows instance actions details and verifies the 'details'
# in the action events via 'request_id', since microversion 2.51 that
# we can show events, but in microversion 2.84 that we can show
# 'details' for non-admin.
self.api.microversion = '2.84'
action_events_response = self.api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
# Since reboot action failed, the 'message' property in reboot action
# should be 'Error', otherwise it's None.
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The instance action events from the non-admin user API response
# should not have 'traceback' in it.
self.assertNotIn('traceback', reboot_action_events[0])
# And the sensitive details from the non-nova exception should not be
# in the details.
self.assertIn('details', reboot_action_events[0])
self.assertNotIn(error_msg, reboot_action_events[0]['details'])
# The exception type class name should be in the details.
self.assertIn('HypervisorError', reboot_action_events[0]['details'])

# Get the server fault details for the admin user.
self.admin_api.microversion = '2.84'
action_events_response = self.admin_api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The admin can see the fault details which includes the traceback,
# and make sure the traceback is there by looking for part of it.
self.assertIn('traceback', reboot_action_events[0])
self.assertIn('in reboot_instance',
reboot_action_events[0]['traceback'])
# The exception type class name should be in the details for the admin
# user as well since the fault handling code cannot distinguish who
# is going to see the message so it only sets class name.
self.assertIn('HypervisorError', reboot_action_events[0]['details'])

def test_instance_action_event_details_with_nova_exception(self):
"""Creates a server using the non-admin user, then reboot it which
will generate a nova exception fault and put the instance into
ERROR status. Then checks that fault details are visible.
"""

# Create the server with the non-admin user.
server = self._build_server(
networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}])
server = self.api.post_server({'server': server})
server = self._wait_for_state_change(server, 'ACTIVE')

# Stop the server before rebooting it so that after the driver.reboot
# method raises an exception, the fake driver does not report the
# instance power state as running - that will make the compute manager
# set the instance vm_state to error.
self.api.post_server_action(server['id'], {'os-stop': None})
server = self._wait_for_state_change(server, 'SHUTOFF')

# Stub out the compute driver reboot method to raise a nova
# exception 'InstanceRebootFailure' to simulate some error.
exc_reason = 'reboot failure'
with mock.patch.object(
self.compute.manager.driver, 'reboot',
side_effect=exception.InstanceRebootFailure(reason=exc_reason)
) as mock_reboot:
reboot_request = {'reboot': {'type': 'HARD'}}
self.api.post_server_action(server['id'], reboot_request)
# In this case we wait for the status to change to ERROR using
# the non-admin user so we can assert the fault details. We also
# wait for the task_state to be None since the wrap_instance_fault
# decorator runs before the reverts_task_state decorator so we will
# be sure the fault is set on the server.
server = self._wait_for_server_parameter(
server, {'status': 'ERROR', 'OS-EXT-STS:task_state': None},
api=self.api)
mock_reboot.assert_called_once()

self._set_policy_rules(overwrite=False)

server_id = server['id']
# Calls GET on the server actions and verifies that the reboot
# action expected in the response.
response = self.api.api_get('/servers/%s/os-instance-actions' %
server_id)
server_actions = response.body['instanceActions']
for actions in server_actions:
if actions['action'] == 'reboot':
reboot_request_id = actions['request_id']

# non admin shows instance actions details and verifies the 'details'
# in the action events via 'request_id', since microversion 2.51 that
# we can show events, but in microversion 2.84 that we can show
# 'details' for non-admin.
self.api.microversion = '2.84'
action_events_response = self.api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
# Since reboot action failed, the 'message' property in reboot action
# should be 'Error', otherwise it's None.
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The instance action events from the non-admin user API response
# should not have 'traceback' in it.
self.assertNotIn('traceback', reboot_action_events[0])
# The nova exception format message should be in the details.
self.assertIn('details', reboot_action_events[0])
self.assertIn(exc_reason, reboot_action_events[0]['details'])

# Get the server fault details for the admin user.
self.admin_api.microversion = '2.84'
action_events_response = self.admin_api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The admin can see the fault details which includes the traceback,
# and make sure the traceback is there by looking for part of it.
self.assertIn('traceback', reboot_action_events[0])
self.assertIn('in reboot_instance',
reboot_action_events[0]['traceback'])
# The nova exception format message should be in the details.
self.assertIn(exc_reason, reboot_action_events[0]['details'])
8 changes: 7 additions & 1 deletion nova/tests/functional/test_servers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1931,10 +1931,16 @@ def test_resize_not_enough_resource(self):

self.api.post_server_action(
server['id'], resize_req, check_response_status=[202])
self._assert_resize_migrate_action_fail(
event = self._assert_resize_migrate_action_fail(
server, instance_actions.RESIZE, 'NoValidHost')
self.assertIn('details', event)
# This test case works in microversion 2.84.
self.assertIn('No valid host was found', event['details'])
server = self.admin_api.get_server(server['id'])
self.assertEqual(source_hostname, server['OS-EXT-SRV-ATTR:host'])
# The server is still ACTIVE and thus there is no fault message.
self.assertEqual('ACTIVE', server['status'])
self.assertNotIn('fault', server)

# only the source host shall have usages after the failed resize
self.assertFlavorMatchesUsage(source_rp_uuid, self.flavor1)
Expand Down
Loading

0 comments on commit 8337bee

Please sign in to comment.