Skip to content

Commit

Permalink
Remove compute service level check for qos ops
Browse files Browse the repository at this point in the history
To support move operations with qos ports both the source and the
destination compute hosts need to be on Ussuri level. We have service
level checks implemented in Ussuri. In Victoria we could remove those
checks as nova only supports compatibility between N and N-1 computes.
But we kept them there just for extra safety. In the meanwhile we
codified [1] the rule that nova does not support N-2 computes any
more. So in Wallaby we can assume that the oldest compute is already
on Victoria (Ussuri would be enough too).

So this patch removes the unnecessary service level checks and related
test cases.

[1] Ie15ec8299ae52ae8f5334d591ed3944e9585cf71

Change-Id: I14177e35b9d6d27d49e092604bf0f288cd05f57e
  • Loading branch information
Balazs Gibizer committed Nov 9, 2020
1 parent be752b8 commit c163205
Show file tree
Hide file tree
Showing 16 changed files with 9 additions and 1,138 deletions.
35 changes: 0 additions & 35 deletions nova/api/openstack/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,9 @@
from nova.compute import task_states
from nova.compute import vm_states
import nova.conf
from nova import context as nova_context
from nova import exception
from nova.i18n import _
from nova.network import constants
from nova import objects
from nova.objects import service
from nova import quota
from nova import utils

Expand Down Expand Up @@ -557,35 +554,3 @@ def supports_port_resource_request(req):
port resource request support, False otherwise.
"""
return api_version_request.is_supported(req, '2.72')


def supports_port_resource_request_during_move():
"""Check to see if the global compute service version is high enough to
support port resource request during move operation.
:returns: True if the compute service version is high enough for
port resource request move support, False otherwise.
"""
return service.get_minimum_version_all_cells(
nova_context.get_admin_context(), ['nova-compute']) >= 49


def instance_has_port_with_resource_request(instance_uuid, network_api):

# TODO(gibi): Use instance.info_cache to see if there is VIFs with
# allocation key in the profile. If there is no such VIF for an instance
# and the instance is not shelve offloaded then we can be sure that the
# instance has no port with resource request. If the instance is shelve
# offloaded then we still have to hit neutron.
search_opts = {'device_id': instance_uuid,
'fields': [constants.RESOURCE_REQUEST]}
# NOTE(gibi): We need to use an admin context to query neutron ports as
# neutron does not fill the resource_request field in the port response if
# we query with a non admin context.
admin_context = nova_context.get_admin_context()
ports = network_api.list_ports(
admin_context, **search_opts).get('ports', [])
for port in ports:
if port.get(constants.RESOURCE_REQUEST):
return True
return False
16 changes: 0 additions & 16 deletions nova/api/openstack/compute/evacuate.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,22 +120,6 @@ def _evacuate(self, req, id, body):
msg = _("The target host can't be the same one.")
raise exc.HTTPBadRequest(explanation=msg)

# We could potentially move this check to conductor and avoid the
# extra API call to neutron when we support move operations with ports
# having resource requests.
if (common.instance_has_port_with_resource_request(
instance.uuid, self.network_api) and not
common.supports_port_resource_request_during_move()):
LOG.warning("The evacuate action on a server with ports "
"having resource requests, like a port with a QoS "
"minimum bandwidth policy, is not supported until "
"every nova-compute is upgraded to Ussuri")
msg = _("The evacuate action on a server with ports having "
"resource requests, like a port with a QoS minimum "
"bandwidth policy, is not supported by this cluster right "
"now")
raise exc.HTTPBadRequest(explanation=msg)

try:
self.compute_api.evacuate(context, instance, host,
on_shared_storage, password, force)
Expand Down
32 changes: 0 additions & 32 deletions nova/api/openstack/compute/migrate_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,10 @@
from nova import exception
from nova.i18n import _
from nova.network import neutron
from nova import objects
from nova.policies import migrate_server as ms_policies

LOG = logging.getLogger(__name__)

MIN_COMPUTE_MOVE_BANDWIDTH = 39


class MigrateServerController(wsgi.Controller):
def __init__(self):
Expand All @@ -59,19 +56,6 @@ def _migrate(self, req, id, body):
body['migrate'] is not None):
host_name = body['migrate'].get('host')

if common.instance_has_port_with_resource_request(
instance.uuid, self.network_api):
# TODO(gibi): Remove when nova only supports compute newer than
# Train
source_service = objects.Service.get_by_host_and_binary(
context, instance.host, 'nova-compute')
if source_service.version < MIN_COMPUTE_MOVE_BANDWIDTH:
msg = _("The migrate action on a server with ports having "
"resource requests, like a port with a QoS "
"minimum bandwidth policy, is not yet supported "
"on the source compute")
raise exc.HTTPConflict(explanation=msg)

try:
self.compute_api.resize(req.environ['nova.context'], instance,
host_name=host_name)
Expand Down Expand Up @@ -134,22 +118,6 @@ def _migrate_live(self, req, id, body):
disk_over_commit = strutils.bool_from_string(disk_over_commit,
strict=True)

# We could potentially move this check to conductor and avoid the
# extra API call to neutron when we support move operations with ports
# having resource requests.
if (common.instance_has_port_with_resource_request(
instance.uuid, self.network_api) and not
common.supports_port_resource_request_during_move()):
LOG.warning("The os-migrateLive action on a server with ports "
"having resource requests, like a port with a QoS "
"minimum bandwidth policy, is not supported until "
"every nova-compute is upgraded to Ussuri")
msg = _("The os-migrateLive action on a server with ports having "
"resource requests, like a port with a QoS minimum "
"bandwidth policy, is not supported by this cluster right "
"now")
raise exc.HTTPBadRequest(explanation=msg)

try:
self.compute_api.live_migrate(context, instance, block_migration,
disk_over_commit, host, force,
Expand Down
14 changes: 0 additions & 14 deletions nova/api/openstack/compute/servers.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@
exception.InvalidMixedInstanceDedicatedMask,
)

MIN_COMPUTE_MOVE_BANDWIDTH = 39


class ServersController(wsgi.Controller):
"""The Server API base controller class for the OpenStack API."""
Expand Down Expand Up @@ -946,18 +944,6 @@ def _resize(self, req, instance_id, flavor_id, auto_disk_config=None):
target={'user_id': instance.user_id,
'project_id': instance.project_id})

if common.instance_has_port_with_resource_request(
instance_id, self.network_api):
# TODO(gibi): Remove when nova only supports compute newer than
# Train
source_service = objects.Service.get_by_host_and_binary(
context, instance.host, 'nova-compute')
if source_service.version < MIN_COMPUTE_MOVE_BANDWIDTH:
msg = _("The resize action on a server with ports having "
"resource requests, like a port with a QoS "
"minimum bandwidth policy, is not yet supported.")
raise exc.HTTPConflict(explanation=msg)

try:
self.compute_api.resize(context, instance, flavor_id,
auto_disk_config=auto_disk_config)
Expand Down
19 changes: 0 additions & 19 deletions nova/api/openstack/compute/shelve.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
from nova.api.openstack import wsgi
from nova.api import validation
from nova.compute import api as compute
from nova.compute import vm_states
from nova import exception
from nova.i18n import _
from nova.network import neutron
from nova.policies import shelve as shelve_policies

Expand Down Expand Up @@ -99,23 +97,6 @@ def _unshelve(self, req, id, body):
if support_az and unshelve_dict:
new_az = unshelve_dict['availability_zone']

# We could potentially move this check to conductor and avoid the
# extra API call to neutron when we support move operations with ports
# having resource requests.
if (instance.vm_state == vm_states.SHELVED_OFFLOADED and
common.instance_has_port_with_resource_request(
instance.uuid, self.network_api) and
not common.supports_port_resource_request_during_move()):
LOG.warning("The unshelve action on a server with ports having "
"resource requests, like a port with a QoS minimum "
"bandwidth policy, is not supported until every "
"nova-compute is upgraded to Ussuri")
msg = _("The unshelve action on a server with ports having "
"resource requests, like a port with a QoS minimum "
"bandwidth policy, is not supported by this cluster right "
"now")
raise exc.HTTPBadRequest(explanation=msg)

try:
self.compute_api.unshelve(context, instance, new_az=new_az)
except (exception.InstanceIsLocked,
Expand Down
7 changes: 0 additions & 7 deletions nova/compute/rpcapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,13 +934,6 @@ def pre_live_migration(self, ctxt, instance, block_migration, disk,
block_migration=block_migration,
disk=disk, migrate_data=migrate_data)

def supports_resize_with_qos_port(self, ctxt):
"""Returns whether we can send 5.2, needed for migrating and resizing
servers with ports having resource request.
"""
client = self.router.client(ctxt)
return client.can_send_version('5.2')

# TODO(mriedem): Drop compat for request_spec being a legacy dict in v6.0.
def prep_resize(self, ctxt, instance, image, instance_type, host,
migration, request_spec, filter_properties, node,
Expand Down
115 changes: 2 additions & 113 deletions nova/conductor/tasks/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,106 +227,6 @@ def _is_selected_host_in_source_cell(self, selection):
instance=self.instance)
return same_cell

def _support_resource_request(self, selection):
"""Returns true if the host is new enough to support resource request
during migration and that the RPC API version is not pinned during
rolling upgrade.
"""
svc = objects.Service.get_by_host_and_binary(
self.context, selection.service_host, 'nova-compute')
return (svc.version >= 39 and
self.compute_rpcapi.supports_resize_with_qos_port(
self.context))

# TODO(gibi): Remove this compat code when nova doesn't need to support
# Train computes any more.
def _get_host_supporting_request(self, selection_list):
"""Return the first compute selection from the selection_list where
the service is new enough to support resource request during migration
and the resources claimed successfully.
:param selection_list: a list of Selection objects returned by the
scheduler
:return: A two tuple. The first item is a Selection object
representing the host that supports the request. The second item
is a list of Selection objects representing the remaining alternate
hosts.
:raises MaxRetriesExceeded: if none of the hosts in the selection_list
is new enough to support the request or we cannot claim resource
on any of the hosts that are new enough.
"""

if not self.request_spec.requested_resources:
return selection_list[0], selection_list[1:]

# Scheduler allocated resources on the first host. So check if the
# first host is new enough
if self._support_resource_request(selection_list[0]):
return selection_list[0], selection_list[1:]

# First host is old, so we need to use an alternate. Therefore we have
# to remove the allocation from the first host.
self.reportclient.delete_allocation_for_instance(
self.context, self.instance.uuid)
LOG.debug(
'Scheduler returned host %(host)s as a possible migration target '
'but that host is not new enough to support the migration with '
'resource request %(request)s or the compute RPC is pinned to '
'less than 5.2. Trying alternate hosts.',
{'host': selection_list[0].service_host,
'request': self.request_spec.requested_resources},
instance=self.instance)

alternates = selection_list[1:]

for i, selection in enumerate(alternates):
if self._support_resource_request(selection):
# this host is new enough so we need to try to claim resources
# on it
if selection.allocation_request:
alloc_req = jsonutils.loads(
selection.allocation_request)
resource_claimed = scheduler_utils.claim_resources(
self.context, self.reportclient, self.request_spec,
self.instance.uuid, alloc_req,
selection.allocation_request_version)

if not resource_claimed:
LOG.debug(
'Scheduler returned alternate host %(host)s as a '
'possible migration target but resource claim '
'failed on that host. Trying another alternate.',
{'host': selection.service_host},
instance=self.instance)
else:
return selection, alternates[i + 1:]

else:
# Some deployments use different schedulers that do not
# use Placement, so they will not have an
# allocation_request to claim with. For those cases,
# there is no concept of claiming, so just assume that
# the resources are available.
return selection, alternates[i + 1:]

else:
LOG.debug(
'Scheduler returned alternate host %(host)s as a possible '
'migration target but that host is not new enough to '
'support the migration with resource request %(request)s '
'or the compute RPC is pinned to less than 5.2. '
'Trying another alternate.',
{'host': selection.service_host,
'request': self.request_spec.requested_resources},
instance=self.instance)

# if we reach this point then none of the hosts was new enough for the
# request or we failed to claim resources on every alternate
reason = ("Exhausted all hosts available during compute service level "
"check for instance %(instance_uuid)s." %
{"instance_uuid": self.instance.uuid})
raise exception.MaxRetriesExceeded(reason=reason)

def _execute(self):
# NOTE(sbauza): Force_hosts/nodes needs to be reset if we want to make
# sure that the next destination is not forced to be the original host.
Expand Down Expand Up @@ -436,8 +336,8 @@ def _schedule(self):
# just need the first returned element.
selection_list = selection_lists[0]

selection, self.host_list = self._get_host_supporting_request(
selection_list)
# Scheduler allocated resources on the first host so try that first
selection, self.host_list = selection_list[0], selection_list[1:]

scheduler_utils.fill_provider_mapping(self.request_spec, selection)
return selection
Expand All @@ -452,17 +352,6 @@ def _reschedule(self):
selection = None
while self.host_list and not host_available:
selection = self.host_list.pop(0)
if (self.request_spec.requested_resources and not
self._support_resource_request(selection)):
LOG.debug(
'Scheduler returned alternate host %(host)s as a possible '
'migration target for re-schedule but that host is not '
'new enough to support the migration with resource '
'request %(request)s. Trying another alternate.',
{'host': selection.service_host,
'request': self.request_spec.requested_resources},
instance=self.instance)
continue
if selection.allocation_request:
alloc_req = jsonutils.loads(selection.allocation_request)
else:
Expand Down
Loading

0 comments on commit c163205

Please sign in to comment.