Skip to content

Commit

Permalink
Add cache_images() to conductor
Browse files Browse the repository at this point in the history
This adds the bulk of the image pre-caching logic to the conductor
task manager. It takes an aggregate and list of image ids from the
API service and handles the process of calling to the relevant compute
nodes to initiate the image downloads, honoring the (new) config knob
for overall task parallelism.

Related to blueprint image-precache-support

Change-Id: Id7c0ab7ae0586d49d88ff2afae149e25e59a3489
  • Loading branch information
kk7ds committed Oct 14, 2019
1 parent ac16511 commit 11d909c
Show file tree
Hide file tree
Showing 11 changed files with 312 additions and 4 deletions.
19 changes: 19 additions & 0 deletions nova/conductor/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from nova import baserpc
from nova.conductor import rpcapi
import nova.conf
from nova import image

CONF = nova.conf.CONF

Expand Down Expand Up @@ -83,6 +84,7 @@ class ComputeTaskAPI(object):

def __init__(self):
self.conductor_compute_rpcapi = rpcapi.ComputeTaskAPI()
self.image_api = image.API()

# TODO(stephenfin): Remove the 'reservations' parameter since we don't use
# reservations anymore
Expand Down Expand Up @@ -155,3 +157,20 @@ def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
preserve_ephemeral=preserve_ephemeral,
host=host,
request_spec=request_spec)

def cache_images(self, context, aggregate, image_ids):
"""Request images be pre-cached on hosts within an aggregate.
:param context: The RequestContext
:param aggregate: The objects.Aggregate representing the hosts to
contact
:param image_ids: A list of image ID strings to send to the hosts
"""
for image_id in image_ids:
# Validate that we can get the image by id before we go
# ask a bunch of hosts to do the same. We let this bubble
# up to the API, which catches NovaException for the 4xx and
# otherwise 500s if this fails in some unexpected way.
self.image_api.get(context, image_id)
self.conductor_compute_rpcapi.cache_images(context, aggregate,
image_ids)
70 changes: 69 additions & 1 deletion nova/conductor/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import contextlib
import copy
import eventlet
import functools
import sys

Expand Down Expand Up @@ -230,7 +231,7 @@ class ComputeTaskManager(base.Base):
may involve coordinating activities on multiple compute nodes.
"""

target = messaging.Target(namespace='compute_task', version='1.20')
target = messaging.Target(namespace='compute_task', version='1.21')

def __init__(self):
super(ComputeTaskManager, self).__init__()
Expand Down Expand Up @@ -1629,3 +1630,70 @@ def _delete_build_request(self, context, build_request, instance, cell,
pass
return False
return True

def cache_images(self, context, aggregate, image_ids):
"""Cache a set of images on the set of hosts in an aggregate.
:param context: The RequestContext
:param aggregate: The Aggregate object from the request to constrain
the host list
:param image_id: The IDs of the image to cache
"""

# TODO(danms): Fix notification sample for IMAGE_CACHE action
compute_utils.notify_about_aggregate_action(
context, aggregate,
fields.NotificationAction.IMAGE_CACHE,
fields.NotificationPhase.START)

clock = timeutils.StopWatch()
threads = CONF.image_cache.precache_concurrency
fetch_pool = eventlet.GreenPool(size=threads)

hosts_by_cell = {}
cells_by_uuid = {}
# TODO(danms): Make this a much more efficient bulk query
for hostname in aggregate.hosts:
hmap = objects.HostMapping.get_by_host(context, hostname)
cells_by_uuid.setdefault(hmap.cell_mapping.uuid, hmap.cell_mapping)
hosts_by_cell.setdefault(hmap.cell_mapping.uuid, [])
hosts_by_cell[hmap.cell_mapping.uuid].append(hostname)

LOG.info('Preparing to request pre-caching of image(s) %(image_ids)s '
'on %(hosts)i hosts across %(cells)s cells.',
{'image_ids': ','.join(image_ids),
'hosts': len(aggregate.hosts),
'cells': len(hosts_by_cell)})
clock.start()

for cell_uuid, hosts in hosts_by_cell.items():
cell = cells_by_uuid[cell_uuid]
with nova_context.target_cell(context, cell) as target_ctxt:
for host in hosts:
service = objects.Service.get_by_compute_host(target_ctxt,
host)
if not self.servicegroup_api.service_is_up(service):
LOG.info(
'Skipping image pre-cache request to compute '
'%(host)r because it is not up',
{'host': host})
continue

fetch_pool.spawn_n(self.compute_rpcapi.cache_images,
target_ctxt,
host=host,
image_ids=image_ids)

# Wait until all those things finish
fetch_pool.waitall()

clock.stop()
LOG.info('Image pre-cache operation for image(s) %(image_ids)s '
'completed in %(time).2f seconds',
{'image_ids': ','.join(image_ids),
'time': clock.elapsed()})

compute_utils.notify_about_aggregate_action(
context, aggregate,
fields.NotificationAction.IMAGE_CACHE,
fields.NotificationPhase.END)
11 changes: 11 additions & 0 deletions nova/conductor/rpcapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from oslo_versionedobjects import base as ovo_base

import nova.conf
from nova import exception
from nova.objects import base as objects_base
from nova import profiler
from nova import rpc
Expand Down Expand Up @@ -281,6 +282,7 @@ class ComputeTaskAPI(object):
instance.
1.20 - migrate_server() now gets a 'host_list' parameter that represents
potential alternate hosts for retries within a cell.
1.21 - Added cache_images()
"""

def __init__(self):
Expand Down Expand Up @@ -436,3 +438,12 @@ def rebuild_instance(self, ctxt, instance, new_pass, injected_files,
del kw['request_spec']
cctxt = self.client.prepare(version=version)
cctxt.cast(ctxt, 'rebuild_instance', **kw)

def cache_images(self, ctxt, aggregate, image_ids):
version = '1.21'
if not self.client.can_send_version(version):
raise exception.NovaException('Conductor RPC version pin does not '
'allow cache_images() to be called')
cctxt = self.client.prepare(version=version)
cctxt.cast(ctxt, 'cache_images', aggregate=aggregate,
image_ids=image_ids)
2 changes: 2 additions & 0 deletions nova/conf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from nova.conf import glance
from nova.conf import guestfs
from nova.conf import hyperv
from nova.conf import imagecache
from nova.conf import ironic
from nova.conf import key_manager
from nova.conf import keystone
Expand Down Expand Up @@ -88,6 +89,7 @@
guestfs.register_opts(CONF)
hyperv.register_opts(CONF)
mks.register_opts(CONF)
imagecache.register_opts(CONF)
ironic.register_opts(CONF)
key_manager.register_opts(CONF)
keystone.register_opts(CONF)
Expand Down
47 changes: 47 additions & 0 deletions nova/conf/imagecache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

from oslo_config import cfg

imagecache_group = cfg.OptGroup(
'image_cache',
title='Image Cache Options',
help="""
A collection of options specific to image caching.
""")
imagecache_opts = [
cfg.IntOpt('precache_concurrency',
default=1,
min=1,
help="""
Maximum number of compute hosts to trigger image precaching in parallel.
When an image precache request is made, compute nodes will be contacted
to initiate the download. This number constrains the number of those that
will happen in parallel. Higher numbers will cause more computes to work
in parallel and may result in reduced time to complete the operation, but
may also DDoS the image service. Lower numbers will result in more sequential
operation, lower image service load, but likely longer runtime to completion.
"""),
]


ALL_OPTS = (imagecache_opts,)


def register_opts(conf):
conf.register_group(imagecache_group)
conf.register_opts(imagecache_opts, group=imagecache_group)


def list_opts():
return {imagecache_group: imagecache_opts}
3 changes: 2 additions & 1 deletion nova/notifications/objects/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ class EventType(NotificationObject):
# NotificationActionField enum
# Version 1.19: SELECT_DESTINATIONS is added to the NotificationActionField
# enum
VERSION = '1.19'
# Version 1.20: IMAGE_CACHE is added to the NotificationActionField enum
VERSION = '1.20'

fields = {
'object': fields.StringField(nullable=False),
Expand Down
3 changes: 2 additions & 1 deletion nova/objects/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,7 @@ class NotificationAction(BaseNovaEnum):
BUILD_INSTANCES = 'build_instances'
MIGRATE_SERVER = 'migrate_server'
REBUILD_SERVER = 'rebuild_server'
IMAGE_CACHE = 'cache_images'

ALL = (UPDATE, EXCEPTION, DELETE, PAUSE, UNPAUSE, RESIZE, VOLUME_SWAP,
SUSPEND, POWER_ON, REBOOT, SHUTDOWN, SNAPSHOT, INTERFACE_ATTACH,
Expand All @@ -877,7 +878,7 @@ class NotificationAction(BaseNovaEnum):
REMOVE_HOST, ADD_MEMBER, UPDATE_METADATA, LOCK, UNLOCK,
REBUILD_SCHEDULED, UPDATE_PROP, LIVE_MIGRATION_FORCE_COMPLETE,
CONNECT, USAGE, BUILD_INSTANCES, MIGRATE_SERVER, REBUILD_SERVER,
SELECT_DESTINATIONS)
SELECT_DESTINATIONS, IMAGE_CACHE)


# TODO(rlrossit): These should be changed over to be a StateMachine enum from
Expand Down
76 changes: 76 additions & 0 deletions nova/tests/functional/compute/test_cache_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

from oslo_utils.fixture import uuidsentinel as uuids

from nova import context
from nova import objects
from nova import test
from nova.tests.unit import fake_notifier


class ImageCacheTest(test.TestCase):
NUMBER_OF_CELLS = 2

def setUp(self):
super(ImageCacheTest, self).setUp()

self.flags(compute_driver='fake.FakeDriverWithCaching')

fake_notifier.stub_notifier(self)
self.addCleanup(fake_notifier.reset)
self.context = context.get_admin_context()

self.conductor = self.start_service('conductor')
self.compute1 = self.start_service('compute', host='compute1')
self.compute2 = self.start_service('compute', host='compute2')
self.compute3 = self.start_service('compute', host='compute3',
cell='cell2')
self.compute4 = self.start_service('compute', host='compute4',
cell='cell2')
self.compute5 = self.start_service('compute', host='compute5',
cell='cell2')

cell2 = self.cell_mappings['cell2']
with context.target_cell(self.context, cell2) as cctxt:
srv = objects.Service.get_by_compute_host(cctxt, 'compute5')
srv.forced_down = True
srv.save()

def test_cache_image(self):
"""Test caching images by injecting the request directly to
the conductor service and making sure it fans out and calls
the expected nodes.
"""

aggregate = objects.Aggregate(name='test',
uuid=uuids.aggregate,
id=1,
hosts=['compute1', 'compute3',
'compute4', 'compute5'])
self.conductor.compute_task_mgr.cache_images(
self.context, aggregate, ['an-image'])

# NOTE(danms): We expect only three image cache attempts because
# compute5 is marked as forced-down and compute2 is not in the
# requested aggregate.
for host in ['compute1', 'compute3', 'compute4']:
mgr = getattr(self, host)
self.assertEqual(set(['an-image']), mgr.driver.cached_images)
for host in ['compute2', 'compute5']:
mgr = getattr(self, host)
self.assertEqual(set(), mgr.driver.cached_images)

fake_notifier.wait_for_versioned_notifications(
'aggregate.cache_images.start')
fake_notifier.wait_for_versioned_notifications(
'aggregate.cache_images.end')
Loading

0 comments on commit 11d909c

Please sign in to comment.