Remove compute service level check for qos ops

To support move operations with qos ports both the source and the destination compute hosts need to be on Ussuri level. We have service level checks implemented in Ussuri. In Victoria we could remove those checks as nova only supports compatibility between N and N-1 computes. But we kept them there just for extra safety. In the meanwhile we codified [1] the rule that nova does not support N-2 computes any more. So in Wallaby we can assume that the oldest compute is already on Victoria (Ussuri would be enough too). So this patch removes the unnecessary service level checks and related test cases. [1] Ie15ec8299ae52ae8f5334d591ed3944e9585cf71 Change-Id: I14177e35b9d6d27d49e092604bf0f288cd05f57e
Centrinix · Nov 9, 2020 · c163205 · c163205
1 parent be752b8
commit c163205
Show file tree

Hide file tree

Showing 16 changed files with 9 additions and 1,138 deletions.
diff --git a/nova/api/openstack/common.py b/nova/api/openstack/common.py
@@ -28,12 +28,9 @@
 from nova.compute import task_states
 from nova.compute import vm_states
 import nova.conf
-from nova import context as nova_context
 from nova import exception
 from nova.i18n import _
-from nova.network import constants
 from nova import objects
-from nova.objects import service
 from nova import quota
 from nova import utils
 
@@ -557,35 +554,3 @@ def supports_port_resource_request(req):
         port resource request support, False otherwise.
     """
     return api_version_request.is_supported(req, '2.72')
-
-
-def supports_port_resource_request_during_move():
-    """Check to see if the global compute service version is high enough to
-    support port resource request during move operation.
-
-    :returns: True if the compute service version is high enough for
-        port resource request move support, False otherwise.
-    """
-    return service.get_minimum_version_all_cells(
-        nova_context.get_admin_context(), ['nova-compute']) >= 49
-
-
-def instance_has_port_with_resource_request(instance_uuid, network_api):
-
-    # TODO(gibi): Use instance.info_cache to see if there is VIFs with
-    # allocation key in the profile. If there is no such VIF for an instance
-    # and the instance is not shelve offloaded then we can be sure that the
-    # instance has no port with resource request. If the instance is shelve
-    # offloaded then we still have to hit neutron.
-    search_opts = {'device_id': instance_uuid,
-                   'fields': [constants.RESOURCE_REQUEST]}
-    # NOTE(gibi): We need to use an admin context to query neutron ports as
-    # neutron does not fill the resource_request field in the port response if
-    # we query with a non admin context.
-    admin_context = nova_context.get_admin_context()
-    ports = network_api.list_ports(
-        admin_context, **search_opts).get('ports', [])
-    for port in ports:
-        if port.get(constants.RESOURCE_REQUEST):
-            return True
-    return False
diff --git a/nova/api/openstack/compute/evacuate.py b/nova/api/openstack/compute/evacuate.py
@@ -120,22 +120,6 @@ def _evacuate(self, req, id, body):
             msg = _("The target host can't be the same one.")
             raise exc.HTTPBadRequest(explanation=msg)
 
-        # We could potentially move this check to conductor and avoid the
-        # extra API call to neutron when we support move operations with ports
-        # having resource requests.
-        if (common.instance_has_port_with_resource_request(
-                instance.uuid, self.network_api) and not
-                common.supports_port_resource_request_during_move()):
-            LOG.warning("The evacuate action on a server with ports "
-                        "having resource requests, like a port with a QoS "
-                        "minimum bandwidth policy, is not supported until "
-                        "every nova-compute is upgraded to Ussuri")
-            msg = _("The evacuate action on a server with ports having "
-                    "resource requests, like a port with a QoS minimum "
-                    "bandwidth policy, is not supported by this cluster right "
-                    "now")
-            raise exc.HTTPBadRequest(explanation=msg)
-
         try:
             self.compute_api.evacuate(context, instance, host,
                                       on_shared_storage, password, force)

diff --git a/nova/api/openstack/compute/migrate_server.py b/nova/api/openstack/compute/migrate_server.py
@@ -27,13 +27,10 @@
 from nova import exception
 from nova.i18n import _
 from nova.network import neutron
-from nova import objects
 from nova.policies import migrate_server as ms_policies
 
 LOG = logging.getLogger(__name__)
 
-MIN_COMPUTE_MOVE_BANDWIDTH = 39
-
 
 class MigrateServerController(wsgi.Controller):
     def __init__(self):
@@ -59,19 +56,6 @@ def _migrate(self, req, id, body):
             body['migrate'] is not None):
             host_name = body['migrate'].get('host')
 
-        if common.instance_has_port_with_resource_request(
-                instance.uuid, self.network_api):
-            # TODO(gibi): Remove when nova only supports compute newer than
-            # Train
-            source_service = objects.Service.get_by_host_and_binary(
-                context, instance.host, 'nova-compute')
-            if source_service.version < MIN_COMPUTE_MOVE_BANDWIDTH:
-                msg = _("The migrate action on a server with ports having "
-                        "resource requests, like a port with a QoS "
-                        "minimum bandwidth policy, is not yet supported "
-                        "on the source compute")
-                raise exc.HTTPConflict(explanation=msg)
-
         try:
             self.compute_api.resize(req.environ['nova.context'], instance,
                                     host_name=host_name)
@@ -134,22 +118,6 @@ def _migrate_live(self, req, id, body):
             disk_over_commit = strutils.bool_from_string(disk_over_commit,
                                                          strict=True)
 
-        # We could potentially move this check to conductor and avoid the
-        # extra API call to neutron when we support move operations with ports
-        # having resource requests.
-        if (common.instance_has_port_with_resource_request(
-                    instance.uuid, self.network_api) and not
-                common.supports_port_resource_request_during_move()):
-            LOG.warning("The os-migrateLive action on a server with ports "
-                        "having resource requests, like a port with a QoS "
-                        "minimum bandwidth policy, is not supported until "
-                        "every nova-compute is upgraded to Ussuri")
-            msg = _("The os-migrateLive action on a server with ports having "
-                    "resource requests, like a port with a QoS minimum "
-                    "bandwidth policy, is not supported by this cluster right "
-                    "now")
-            raise exc.HTTPBadRequest(explanation=msg)
-
         try:
             self.compute_api.live_migrate(context, instance, block_migration,
                                           disk_over_commit, host, force,

diff --git a/nova/api/openstack/compute/servers.py b/nova/api/openstack/compute/servers.py
@@ -92,8 +92,6 @@
     exception.InvalidMixedInstanceDedicatedMask,
 )
 
-MIN_COMPUTE_MOVE_BANDWIDTH = 39
-
 
 class ServersController(wsgi.Controller):
     """The Server API base controller class for the OpenStack API."""
@@ -946,18 +944,6 @@ def _resize(self, req, instance_id, flavor_id, auto_disk_config=None):
                     target={'user_id': instance.user_id,
                             'project_id': instance.project_id})
 
-        if common.instance_has_port_with_resource_request(
-                instance_id, self.network_api):
-            # TODO(gibi): Remove when nova only supports compute newer than
-            # Train
-            source_service = objects.Service.get_by_host_and_binary(
-                context, instance.host, 'nova-compute')
-            if source_service.version < MIN_COMPUTE_MOVE_BANDWIDTH:
-                msg = _("The resize action on a server with ports having "
-                        "resource requests, like a port with a QoS "
-                        "minimum bandwidth policy, is not yet supported.")
-                raise exc.HTTPConflict(explanation=msg)
-
         try:
             self.compute_api.resize(context, instance, flavor_id,
                                     auto_disk_config=auto_disk_config)

diff --git a/nova/api/openstack/compute/shelve.py b/nova/api/openstack/compute/shelve.py
@@ -23,9 +23,7 @@
 from nova.api.openstack import wsgi
 from nova.api import validation
 from nova.compute import api as compute
-from nova.compute import vm_states
 from nova import exception
-from nova.i18n import _
 from nova.network import neutron
 from nova.policies import shelve as shelve_policies
 
@@ -99,23 +97,6 @@ def _unshelve(self, req, id, body):
         if support_az and unshelve_dict:
             new_az = unshelve_dict['availability_zone']
 
-        # We could potentially move this check to conductor and avoid the
-        # extra API call to neutron when we support move operations with ports
-        # having resource requests.
-        if (instance.vm_state == vm_states.SHELVED_OFFLOADED and
-                common.instance_has_port_with_resource_request(
-                    instance.uuid, self.network_api) and
-                not common.supports_port_resource_request_during_move()):
-            LOG.warning("The unshelve action on a server with ports having "
-                        "resource requests, like a port with a QoS minimum "
-                        "bandwidth policy, is not supported until every "
-                        "nova-compute is upgraded to Ussuri")
-            msg = _("The unshelve action on a server with ports having "
-                    "resource requests, like a port with a QoS minimum "
-                    "bandwidth policy, is not supported by this cluster right "
-                    "now")
-            raise exc.HTTPBadRequest(explanation=msg)
-
         try:
             self.compute_api.unshelve(context, instance, new_az=new_az)
         except (exception.InstanceIsLocked,

diff --git a/nova/compute/rpcapi.py b/nova/compute/rpcapi.py
@@ -934,13 +934,6 @@ def pre_live_migration(self, ctxt, instance, block_migration, disk,
                           block_migration=block_migration,
                           disk=disk, migrate_data=migrate_data)
 
-    def supports_resize_with_qos_port(self, ctxt):
-        """Returns whether we can send 5.2, needed for migrating and resizing
-        servers with ports having resource request.
-        """
-        client = self.router.client(ctxt)
-        return client.can_send_version('5.2')
-
     # TODO(mriedem): Drop compat for request_spec being a legacy dict in v6.0.
     def prep_resize(self, ctxt, instance, image, instance_type, host,
                     migration, request_spec, filter_properties, node,

diff --git a/nova/conductor/tasks/migrate.py b/nova/conductor/tasks/migrate.py
@@ -227,106 +227,6 @@ def _is_selected_host_in_source_cell(self, selection):
                       instance=self.instance)
         return same_cell
 
-    def _support_resource_request(self, selection):
-        """Returns true if the host is new enough to support resource request
-        during migration and that the RPC API version is not pinned during
-        rolling upgrade.
-        """
-        svc = objects.Service.get_by_host_and_binary(
-            self.context, selection.service_host, 'nova-compute')
-        return (svc.version >= 39 and
-                self.compute_rpcapi.supports_resize_with_qos_port(
-                    self.context))
-
-    # TODO(gibi): Remove this compat code when nova doesn't need to support
-    # Train computes any more.
-    def _get_host_supporting_request(self, selection_list):
-        """Return the first compute selection from the selection_list where
-        the service is new enough to support resource request during migration
-        and the resources claimed successfully.
-
-        :param selection_list: a list of Selection objects returned by the
-            scheduler
-        :return: A two tuple. The first item is a Selection object
-            representing the host that supports the request. The second item
-            is a list of Selection objects representing the remaining alternate
-            hosts.
-        :raises MaxRetriesExceeded: if none of the hosts in the selection_list
-            is new enough to support the request or we cannot claim resource
-            on any of the hosts that are new enough.
-        """
-
-        if not self.request_spec.requested_resources:
-            return selection_list[0], selection_list[1:]
-
-        # Scheduler allocated resources on the first host. So check if the
-        # first host is new enough
-        if self._support_resource_request(selection_list[0]):
-            return selection_list[0], selection_list[1:]
-
-        # First host is old, so we need to use an alternate. Therefore we have
-        # to remove the allocation from the first host.
-        self.reportclient.delete_allocation_for_instance(
-            self.context, self.instance.uuid)
-        LOG.debug(
-            'Scheduler returned host %(host)s as a possible migration target '
-            'but that host is not new enough to support the migration with '
-            'resource request %(request)s or the compute RPC is pinned to '
-            'less than 5.2. Trying alternate hosts.',
-            {'host': selection_list[0].service_host,
-             'request': self.request_spec.requested_resources},
-            instance=self.instance)
-
-        alternates = selection_list[1:]
-
-        for i, selection in enumerate(alternates):
-            if self._support_resource_request(selection):
-                # this host is new enough so we need to try to claim resources
-                # on it
-                if selection.allocation_request:
-                    alloc_req = jsonutils.loads(
-                        selection.allocation_request)
-                    resource_claimed = scheduler_utils.claim_resources(
-                        self.context, self.reportclient, self.request_spec,
-                        self.instance.uuid, alloc_req,
-                        selection.allocation_request_version)
-
-                    if not resource_claimed:
-                        LOG.debug(
-                            'Scheduler returned alternate host %(host)s as a '
-                            'possible migration target but resource claim '
-                            'failed on that host. Trying another alternate.',
-                            {'host': selection.service_host},
-                            instance=self.instance)
-                    else:
-                        return selection, alternates[i + 1:]
-
-                else:
-                    # Some deployments use different schedulers that do not
-                    # use Placement, so they will not have an
-                    # allocation_request to claim with. For those cases,
-                    # there is no concept of claiming, so just assume that
-                    # the resources are available.
-                    return selection, alternates[i + 1:]
-
-            else:
-                LOG.debug(
-                    'Scheduler returned alternate host %(host)s as a possible '
-                    'migration target but that host is not new enough to '
-                    'support the migration with resource request %(request)s '
-                    'or the compute RPC is pinned to less than 5.2. '
-                    'Trying another alternate.',
-                    {'host': selection.service_host,
-                     'request': self.request_spec.requested_resources},
-                    instance=self.instance)
-
-        # if we reach this point then none of the hosts was new enough for the
-        # request or we failed to claim resources on every alternate
-        reason = ("Exhausted all hosts available during compute service level "
-                  "check for instance %(instance_uuid)s." %
-                  {"instance_uuid": self.instance.uuid})
-        raise exception.MaxRetriesExceeded(reason=reason)
-
     def _execute(self):
         # NOTE(sbauza): Force_hosts/nodes needs to be reset if we want to make
         # sure that the next destination is not forced to be the original host.
@@ -436,8 +336,8 @@ def _schedule(self):
         # just need the first returned element.
         selection_list = selection_lists[0]
 
-        selection, self.host_list = self._get_host_supporting_request(
-            selection_list)
+        # Scheduler allocated resources on the first host so try that first
+        selection, self.host_list = selection_list[0], selection_list[1:]
 
         scheduler_utils.fill_provider_mapping(self.request_spec, selection)
         return selection
@@ -452,17 +352,6 @@ def _reschedule(self):
         selection = None
         while self.host_list and not host_available:
             selection = self.host_list.pop(0)
-            if (self.request_spec.requested_resources and not
-                    self._support_resource_request(selection)):
-                LOG.debug(
-                    'Scheduler returned alternate host %(host)s as a possible '
-                    'migration target for re-schedule but that host is not '
-                    'new enough to support the migration with resource '
-                    'request %(request)s. Trying another alternate.',
-                    {'host': selection.service_host,
-                     'request': self.request_spec.requested_resources},
-                    instance=self.instance)
-                continue
             if selection.allocation_request:
                 alloc_req = jsonutils.loads(selection.allocation_request)
             else: