diff --git a/src/vmm/src/arch/aarch64/fdt.rs b/src/vmm/src/arch/aarch64/fdt.rs index c708add44bf..61200cb2148 100644 --- a/src/vmm/src/arch/aarch64/fdt.rs +++ b/src/vmm/src/arch/aarch64/fdt.rs @@ -12,11 +12,12 @@ use std::fmt::Debug; use vm_fdt::{Error as VmFdtError, FdtWriter, FdtWriterNode}; use vm_memory::GuestMemoryError; -use super::super::{DeviceType, InitrdConfig}; +use super::super::DeviceType; use super::cache_info::{CacheEntry, read_cache_config}; use super::gic::GICDevice; use crate::device_manager::mmio::MMIODeviceInfo; use crate::devices::acpi::vmgenid::{VMGENID_MEM_SIZE, VmGenId}; +use crate::initrd::InitrdConfig; use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap}; // This is a value for uniquely identifying the FDT node declaring the interrupt controller. diff --git a/src/vmm/src/arch/aarch64/mod.rs b/src/vmm/src/arch/aarch64/mod.rs index a09721e1775..10ffe53c8fa 100644 --- a/src/vmm/src/arch/aarch64/mod.rs +++ b/src/vmm/src/arch/aarch64/mod.rs @@ -17,27 +17,38 @@ pub mod vcpu; pub mod vm; use std::cmp::min; -use std::collections::HashMap; -use std::ffi::CString; use std::fmt::Debug; +use std::fs::File; +use linux_loader::loader::pe::PE as Loader; +use linux_loader::loader::{Cmdline, KernelLoader}; use vm_memory::GuestMemoryError; -use self::gic::GICDevice; -use crate::arch::DeviceType; -use crate::device_manager::mmio::MMIODeviceInfo; -use crate::devices::acpi::vmgenid::VmGenId; +use crate::arch::{BootProtocol, EntryPoint}; +use crate::cpu_config::aarch64::{CpuConfiguration, CpuConfigurationError}; +use crate::cpu_config::templates::CustomCpuTemplate; +use crate::initrd::InitrdConfig; +use crate::utils::{align_up, usize_to_u64}; +use crate::vmm_config::machine_config::MachineConfig; use crate::vstate::memory::{Address, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap}; +use crate::vstate::vcpu::KvmVcpuError; +use crate::{Vcpu, VcpuConfig, Vmm}; /// Errors thrown while configuring aarch64 system. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum ConfigurationError { /// Failed to create a Flattened Device Tree for this aarch64 microVM: {0} SetupFDT(#[from] fdt::FdtError), - /// Failed to compute the initrd address. - InitrdAddress, /// Failed to write to guest memory. - MemoryError(GuestMemoryError), + MemoryError(#[from] GuestMemoryError), + /// Cannot copy kernel file fd + KernelFile, + /// Cannot load kernel due to invalid memory configuration or invalid kernel image: {0} + KernelLoader(#[from] linux_loader::loader::Error), + /// Error creating vcpu configuration: {0} + VcpuConfig(#[from] CpuConfigurationError), + /// Error configuring the vcpu: {0} + VcpuConfigure(#[from] KvmVcpuError), } /// The start of the memory area reserved for MMIO devices. @@ -52,39 +63,59 @@ pub fn arch_memory_regions(size: usize) -> Vec<(GuestAddress, usize)> { vec![(GuestAddress(layout::DRAM_MEM_START), dram_size)] } -/// Configures the system and should be called once per vm before starting vcpu threads. -/// For aarch64, we only setup the FDT. -/// -/// # Arguments -/// -/// * `guest_mem` - The memory to be used by the guest. -/// * `cmdline_cstring` - The kernel commandline. -/// * `vcpu_mpidr` - Array of MPIDR register values per vcpu. -/// * `device_info` - A hashmap containing the attached devices for building FDT device nodes. -/// * `gic_device` - The GIC device. -/// * `initrd` - Information about an optional initrd. -pub fn configure_system( - guest_mem: &GuestMemoryMmap, - cmdline_cstring: CString, - vcpu_mpidr: Vec, - device_info: &HashMap<(DeviceType, String), MMIODeviceInfo>, - gic_device: &GICDevice, - vmgenid: &Option, - initrd: &Option, +/// Configures the system for booting Linux. +pub fn configure_system_for_boot( + vmm: &mut Vmm, + vcpus: &mut [Vcpu], + machine_config: &MachineConfig, + cpu_template: &CustomCpuTemplate, + entry_point: EntryPoint, + initrd: &Option, + boot_cmdline: Cmdline, ) -> Result<(), ConfigurationError> { + // Construct the base CpuConfiguration to apply CPU template onto. + let cpu_config = CpuConfiguration::new(cpu_template, vcpus)?; + + // Apply CPU template to the base CpuConfiguration. + let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template); + + let vcpu_config = VcpuConfig { + vcpu_count: machine_config.vcpu_count, + smt: machine_config.smt, + cpu_config, + }; + + let optional_capabilities = vmm.kvm.optional_capabilities(); + // Configure vCPUs with normalizing and setting the generated CPU configuration. + for vcpu in vcpus.iter_mut() { + vcpu.kvm_vcpu.configure( + &vmm.guest_memory, + entry_point, + &vcpu_config, + &optional_capabilities, + )?; + } + let vcpu_mpidr = vcpus + .iter_mut() + .map(|cpu| cpu.kvm_vcpu.get_mpidr()) + .collect(); + let cmdline = boot_cmdline + .as_cstring() + .expect("Cannot create cstring from cmdline string"); + let fdt = fdt::create_fdt( - guest_mem, + &vmm.guest_memory, vcpu_mpidr, - cmdline_cstring, - device_info, - gic_device, - vmgenid, + cmdline, + vmm.mmio_device_manager.get_device_info(), + vmm.vm.get_irqchip(), + &vmm.acpi_device_manager.vmgenid, initrd, )?; - let fdt_address = GuestAddress(get_fdt_addr(guest_mem)); - guest_mem - .write_slice(fdt.as_slice(), fdt_address) - .map_err(ConfigurationError::MemoryError)?; + + let fdt_address = GuestAddress(get_fdt_addr(&vmm.guest_memory)); + vmm.guest_memory.write_slice(fdt.as_slice(), fdt_address)?; + Ok(()) } @@ -94,21 +125,20 @@ pub fn get_kernel_start() -> u64 { } /// Returns the memory address where the initrd could be loaded. -pub fn initrd_load_addr( - guest_mem: &GuestMemoryMmap, - initrd_size: usize, -) -> Result { - let round_to_pagesize = - |size| (size + (super::GUEST_PAGE_SIZE - 1)) & !(super::GUEST_PAGE_SIZE - 1); - match GuestAddress(get_fdt_addr(guest_mem)).checked_sub(round_to_pagesize(initrd_size) as u64) { +pub fn initrd_load_addr(guest_mem: &GuestMemoryMmap, initrd_size: usize) -> Option { + let rounded_size = align_up( + usize_to_u64(initrd_size), + usize_to_u64(super::GUEST_PAGE_SIZE), + ); + match GuestAddress(get_fdt_addr(guest_mem)).checked_sub(rounded_size) { Some(offset) => { if guest_mem.address_in_range(offset) { - Ok(offset.raw_value()) + Some(offset.raw_value()) } else { - Err(ConfigurationError::InitrdAddress) + None } } - None => Err(ConfigurationError::InitrdAddress), + None => None, } } @@ -127,6 +157,30 @@ fn get_fdt_addr(mem: &GuestMemoryMmap) -> u64 { layout::DRAM_MEM_START } +/// Load linux kernel into guest memory. +pub fn load_kernel( + kernel: &File, + guest_memory: &GuestMemoryMmap, +) -> Result { + // Need to clone the File because reading from it + // mutates it. + let mut kernel_file = kernel + .try_clone() + .map_err(|_| ConfigurationError::KernelFile)?; + + let entry_addr = Loader::load( + guest_memory, + Some(GuestAddress(get_kernel_start())), + &mut kernel_file, + None, + )?; + + Ok(EntryPoint { + entry_addr: entry_addr.kernel_load, + protocol: BootProtocol::LinuxBoot, + }) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index dd3ae178127..61d65fea1a5 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -20,9 +20,10 @@ pub use aarch64::vcpu::*; pub use aarch64::vm::{ArchVm, ArchVmError, VmState}; #[cfg(target_arch = "aarch64")] pub use aarch64::{ - ConfigurationError, MMIO_MEM_SIZE, MMIO_MEM_START, arch_memory_regions, configure_system, - get_kernel_start, initrd_load_addr, layout::CMDLINE_MAX_SIZE, layout::IRQ_BASE, - layout::IRQ_MAX, layout::SYSTEM_MEM_SIZE, layout::SYSTEM_MEM_START, + ConfigurationError, MMIO_MEM_SIZE, MMIO_MEM_START, arch_memory_regions, + configure_system_for_boot, get_kernel_start, initrd_load_addr, layout::CMDLINE_MAX_SIZE, + layout::IRQ_BASE, layout::IRQ_MAX, layout::SYSTEM_MEM_SIZE, layout::SYSTEM_MEM_START, + load_kernel, }; /// Module for x86_64 related functionality. @@ -35,12 +36,13 @@ pub use x86_64::kvm::{Kvm, KvmArchError}; pub use x86_64::vcpu::*; #[cfg(target_arch = "x86_64")] pub use x86_64::vm::{ArchVm, ArchVmError, VmState}; + #[cfg(target_arch = "x86_64")] -pub use x86_64::{ - ConfigurationError, MMIO_MEM_SIZE, MMIO_MEM_START, arch_memory_regions, configure_system, - get_kernel_start, initrd_load_addr, layout::APIC_ADDR, layout::CMDLINE_MAX_SIZE, - layout::IOAPIC_ADDR, layout::IRQ_BASE, layout::IRQ_MAX, layout::SYSTEM_MEM_SIZE, - layout::SYSTEM_MEM_START, +pub use crate::arch::x86_64::{ + ConfigurationError, MMIO_MEM_SIZE, MMIO_MEM_START, arch_memory_regions, + configure_system_for_boot, get_kernel_start, initrd_load_addr, layout::APIC_ADDR, + layout::CMDLINE_MAX_SIZE, layout::IOAPIC_ADDR, layout::IRQ_BASE, layout::IRQ_MAX, + layout::SYSTEM_MEM_SIZE, layout::SYSTEM_MEM_START, load_kernel, }; /// Types of devices that can get attached to this platform. @@ -58,15 +60,6 @@ pub enum DeviceType { BootTimer, } -/// Type for passing information about the initrd in the guest memory. -#[derive(Debug)] -pub struct InitrdConfig { - /// Load address of initrd in guest memory - pub address: crate::vstate::memory::GuestAddress, - /// Size of initrd in guest memory - pub size: usize, -} - /// Default page size for the guest OS. pub const GUEST_PAGE_SIZE: usize = 4096; diff --git a/src/vmm/src/arch/x86_64/mod.rs b/src/vmm/src/arch/x86_64/mod.rs index ad680d89cff..fdfce03b069 100644 --- a/src/vmm/src/arch/x86_64/mod.rs +++ b/src/vmm/src/arch/x86_64/mod.rs @@ -31,20 +31,33 @@ pub mod xstate; #[allow(missing_docs)] pub mod generated; +use std::fs::File; + +use layout::CMDLINE_START; use linux_loader::configurator::linux::LinuxBootConfigurator; use linux_loader::configurator::pvh::PvhBootConfigurator; use linux_loader::configurator::{BootConfigurator, BootParams}; use linux_loader::loader::bootparam::boot_params; +use linux_loader::loader::elf::Elf as Loader; use linux_loader::loader::elf::start_info::{ hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info, }; - -use crate::arch::{BootProtocol, InitrdConfig, SYSTEM_MEM_SIZE, SYSTEM_MEM_START}; -use crate::device_manager::resources::ResourceAllocator; -use crate::utils::{mib_to_bytes, u64_to_usize}; +use linux_loader::loader::{Cmdline, KernelLoader, PvhBootCapability, load_cmdline}; +use log::debug; + +use super::EntryPoint; +use crate::acpi::create_acpi_tables; +use crate::arch::{BootProtocol, SYSTEM_MEM_SIZE, SYSTEM_MEM_START}; +use crate::cpu_config::templates::{CustomCpuTemplate, GuestConfigError}; +use crate::cpu_config::x86_64::CpuConfiguration; +use crate::initrd::InitrdConfig; +use crate::utils::{align_down, mib_to_bytes, u64_to_usize, usize_to_u64}; +use crate::vmm_config::machine_config::MachineConfig; use crate::vstate::memory::{ Address, GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion, }; +use crate::vstate::vcpu::KvmVcpuConfigureError; +use crate::{Vcpu, VcpuConfig, Vmm}; // Value taken from https://elixir.bootlin.com/linux/v5.10.68/source/arch/x86/include/uapi/asm/e820.h#L31 // Usable normal RAM @@ -55,7 +68,7 @@ const E820_RESERVED: u32 = 2; const MEMMAP_TYPE_RAM: u32 = 1; /// Errors thrown while configuring x86_64 system. -#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +#[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum ConfigurationError { /// Invalid e820 setup params. E820Configuration, @@ -63,14 +76,24 @@ pub enum ConfigurationError { MpTableSetup(#[from] mptable::MptableError), /// Error writing the zero page of guest memory. ZeroPageSetup, - /// Failed to compute initrd address. - InitrdAddress, /// Error writing module entry to guest memory. ModlistSetup, /// Error writing memory map table to guest memory. MemmapTableSetup, /// Error writing hvm_start_info to guest memory. StartInfoSetup, + /// Cannot copy kernel file fd + KernelFile, + /// Cannot load kernel due to invalid memory configuration or invalid kernel image: {0} + KernelLoader(linux_loader::loader::Error), + /// Cannot load command line string: {0} + LoadCommandline(linux_loader::loader::Error), + /// Failed to create guest config: {0} + CreateGuestConfig(#[from] GuestConfigError), + /// Error configuring the vcpu for boot: {0} + VcpuConfigure(#[from] KvmVcpuConfigureError), + /// Error configuring ACPI: {0} + Acpi(#[from] crate::acpi::AcpiError), } /// First address that cannot be addressed using 32 bit anymore. @@ -107,55 +130,93 @@ pub fn get_kernel_start() -> u64 { } /// Returns the memory address where the initrd could be loaded. -pub fn initrd_load_addr( - guest_mem: &GuestMemoryMmap, - initrd_size: usize, -) -> Result { - let first_region = guest_mem - .find_region(GuestAddress::new(0)) - .ok_or(ConfigurationError::InitrdAddress)?; +pub fn initrd_load_addr(guest_mem: &GuestMemoryMmap, initrd_size: usize) -> Option { + let first_region = guest_mem.find_region(GuestAddress::new(0))?; let lowmem_size = u64_to_usize(first_region.len()); if lowmem_size < initrd_size { - return Err(ConfigurationError::InitrdAddress); + return None; } - let align_to_pagesize = |address| address & !(super::GUEST_PAGE_SIZE - 1); - Ok(align_to_pagesize(lowmem_size - initrd_size) as u64) + Some(align_down( + usize_to_u64(lowmem_size - initrd_size), + usize_to_u64(super::GUEST_PAGE_SIZE), + )) } -/// Configures the system and should be called once per vm before starting vcpu threads. -/// -/// # Arguments -/// -/// * `guest_mem` - The memory to be used by the guest. -/// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded. -/// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator. -/// * `initrd` - Information about where the ramdisk image was loaded in the `guest_mem`. -/// * `num_cpus` - Number of virtual CPUs the guest will have. -/// * `boot_prot` - Boot protocol that will be used to boot the guest. -pub fn configure_system( - guest_mem: &GuestMemoryMmap, - resource_allocator: &mut ResourceAllocator, - cmdline_addr: GuestAddress, - cmdline_size: usize, +/// Configures the system for booting Linux. +pub fn configure_system_for_boot( + vmm: &mut Vmm, + vcpus: &mut [Vcpu], + machine_config: &MachineConfig, + cpu_template: &CustomCpuTemplate, + entry_point: EntryPoint, initrd: &Option, - num_cpus: u8, - boot_prot: BootProtocol, + boot_cmdline: Cmdline, ) -> Result<(), ConfigurationError> { + // Construct the base CpuConfiguration to apply CPU template onto. + let cpu_config = + CpuConfiguration::new(vmm.kvm.supported_cpuid.clone(), cpu_template, &vcpus[0])?; + // Apply CPU template to the base CpuConfiguration. + let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template)?; + + let vcpu_config = VcpuConfig { + vcpu_count: machine_config.vcpu_count, + smt: machine_config.smt, + cpu_config, + }; + + // Configure vCPUs with normalizing and setting the generated CPU configuration. + for vcpu in vcpus.iter_mut() { + vcpu.kvm_vcpu + .configure(&vmm.guest_memory, entry_point, &vcpu_config)?; + } + + // Write the kernel command line to guest memory. This is x86_64 specific, since on + // aarch64 the command line will be specified through the FDT. + let cmdline_size = boot_cmdline + .as_cstring() + .map(|cmdline_cstring| cmdline_cstring.as_bytes_with_nul().len()) + .expect("Cannot create cstring from cmdline string"); + + load_cmdline( + &vmm.guest_memory, + GuestAddress(crate::arch::x86_64::layout::CMDLINE_START), + &boot_cmdline, + ) + .map_err(ConfigurationError::LoadCommandline)?; + // Note that this puts the mptable at the last 1k of Linux's 640k base RAM - mptable::setup_mptable(guest_mem, resource_allocator, num_cpus) - .map_err(ConfigurationError::MpTableSetup)?; + mptable::setup_mptable( + &vmm.guest_memory, + &mut vmm.resource_allocator, + vcpu_config.vcpu_count, + ) + .map_err(ConfigurationError::MpTableSetup)?; - match boot_prot { + match entry_point.protocol { BootProtocol::PvhBoot => { - configure_pvh(guest_mem, cmdline_addr, initrd)?; + configure_pvh(&vmm.guest_memory, GuestAddress(CMDLINE_START), initrd)?; } BootProtocol::LinuxBoot => { - configure_64bit_boot(guest_mem, cmdline_addr, cmdline_size, initrd)?; + configure_64bit_boot( + &vmm.guest_memory, + GuestAddress(CMDLINE_START), + cmdline_size, + initrd, + )?; } } + // Create ACPI tables and write them in guest memory + // For the time being we only support ACPI in x86_64 + create_acpi_tables( + &vmm.guest_memory, + &mut vmm.resource_allocator, + &vmm.mmio_device_manager, + &vmm.acpi_device_manager, + vcpus, + )?; Ok(()) } @@ -360,11 +421,47 @@ fn add_e820_entry( Ok(()) } +/// Load linux kernel into guest memory. +pub fn load_kernel( + kernel: &File, + guest_memory: &GuestMemoryMmap, +) -> Result { + // Need to clone the File because reading from it + // mutates it. + let mut kernel_file = kernel + .try_clone() + .map_err(|_| ConfigurationError::KernelFile)?; + + let entry_addr = Loader::load( + guest_memory, + None, + &mut kernel_file, + Some(GuestAddress(get_kernel_start())), + ) + .map_err(ConfigurationError::KernelLoader)?; + + let mut entry_point_addr: GuestAddress = entry_addr.kernel_load; + let mut boot_prot: BootProtocol = BootProtocol::LinuxBoot; + if let PvhBootCapability::PvhEntryPresent(pvh_entry_addr) = entry_addr.pvh_boot_cap { + // Use the PVH kernel entry point to boot the guest + entry_point_addr = pvh_entry_addr; + boot_prot = BootProtocol::PvhBoot; + } + + debug!("Kernel loaded using {boot_prot}"); + + Ok(EntryPoint { + entry_addr: entry_point_addr, + protocol: boot_prot, + }) +} + #[cfg(test)] mod tests { use linux_loader::loader::bootparam::boot_e820_entry; use super::*; + use crate::device_manager::resources::ResourceAllocator; use crate::test_utils::{arch_mem, single_region_mem}; #[test] @@ -388,94 +485,35 @@ mod tests { let no_vcpus = 4; let gm = single_region_mem(0x10000); let mut resource_allocator = ResourceAllocator::new().unwrap(); - let config_err = configure_system( - &gm, - &mut resource_allocator, - GuestAddress(0), - 0, - &None, - 1, - BootProtocol::LinuxBoot, - ); - assert_eq!( - config_err.unwrap_err(), - super::ConfigurationError::MpTableSetup(mptable::MptableError::NotEnoughMemory) - ); + let err = mptable::setup_mptable(&gm, &mut resource_allocator, 1); + assert!(matches!( + err.unwrap_err(), + mptable::MptableError::NotEnoughMemory + )); // Now assigning some memory that falls before the 32bit memory hole. let mem_size = mib_to_bytes(128); let gm = arch_mem(mem_size); let mut resource_allocator = ResourceAllocator::new().unwrap(); - configure_system( - &gm, - &mut resource_allocator, - GuestAddress(0), - 0, - &None, - no_vcpus, - BootProtocol::LinuxBoot, - ) - .unwrap(); - configure_system( - &gm, - &mut resource_allocator, - GuestAddress(0), - 0, - &None, - no_vcpus, - BootProtocol::PvhBoot, - ) - .unwrap(); + mptable::setup_mptable(&gm, &mut resource_allocator, no_vcpus).unwrap(); + configure_64bit_boot(&gm, GuestAddress(0), 0, &None).unwrap(); + configure_pvh(&gm, GuestAddress(0), &None).unwrap(); // Now assigning some memory that is equal to the start of the 32bit memory hole. let mem_size = mib_to_bytes(3328); let gm = arch_mem(mem_size); let mut resource_allocator = ResourceAllocator::new().unwrap(); - configure_system( - &gm, - &mut resource_allocator, - GuestAddress(0), - 0, - &None, - no_vcpus, - BootProtocol::LinuxBoot, - ) - .unwrap(); - configure_system( - &gm, - &mut resource_allocator, - GuestAddress(0), - 0, - &None, - no_vcpus, - BootProtocol::PvhBoot, - ) - .unwrap(); + mptable::setup_mptable(&gm, &mut resource_allocator, no_vcpus).unwrap(); + configure_64bit_boot(&gm, GuestAddress(0), 0, &None).unwrap(); + configure_pvh(&gm, GuestAddress(0), &None).unwrap(); // Now assigning some memory that falls after the 32bit memory hole. let mem_size = mib_to_bytes(3330); let gm = arch_mem(mem_size); let mut resource_allocator = ResourceAllocator::new().unwrap(); - configure_system( - &gm, - &mut resource_allocator, - GuestAddress(0), - 0, - &None, - no_vcpus, - BootProtocol::LinuxBoot, - ) - .unwrap(); - configure_system( - &gm, - &mut resource_allocator, - GuestAddress(0), - 0, - &None, - no_vcpus, - BootProtocol::PvhBoot, - ) - .unwrap(); + mptable::setup_mptable(&gm, &mut resource_allocator, no_vcpus).unwrap(); + configure_64bit_boot(&gm, GuestAddress(0), 0, &None).unwrap(); + configure_pvh(&gm, GuestAddress(0), &None).unwrap(); } #[test] diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index cd1e063fb40..360b255be44 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -3,10 +3,8 @@ //! Enables pre-boot setup, instantiation and booting of a Firecracker VMM. -#[cfg(target_arch = "x86_64")] -use std::convert::TryFrom; use std::fmt::Debug; -use std::io::{self, Seek, SeekFrom}; +use std::io; #[cfg(feature = "gdb")] use std::sync::mpsc; use std::sync::{Arc, Mutex}; @@ -14,30 +12,18 @@ use std::sync::{Arc, Mutex}; use event_manager::{MutEventSubscriber, SubscriberOps}; use libc::EFD_NONBLOCK; use linux_loader::cmdline::Cmdline as LoaderKernelCmdline; -use linux_loader::loader::KernelLoader; -#[cfg(target_arch = "x86_64")] -use linux_loader::loader::elf::Elf as Loader; -#[cfg(target_arch = "x86_64")] -use linux_loader::loader::elf::PvhBootCapability; -#[cfg(target_arch = "aarch64")] -use linux_loader::loader::pe::PE as Loader; use userfaultfd::Uffd; use utils::time::TimestampUs; -use vm_memory::ReadVolatile; #[cfg(target_arch = "aarch64")] use vm_superio::Rtc; use vm_superio::Serial; use vmm_sys_util::eventfd::EventFd; -#[cfg(target_arch = "x86_64")] -use crate::acpi; -use crate::arch::{BootProtocol, EntryPoint, InitrdConfig}; -use crate::builder::StartMicrovmError::Internal; +use crate::arch::{ConfigurationError, configure_system_for_boot, load_kernel}; #[cfg(target_arch = "aarch64")] use crate::construct_kvm_mpidrs; use crate::cpu_config::templates::{ - CpuConfiguration, CustomCpuTemplate, GetCpuTemplate, GetCpuTemplateError, GuestConfigError, - KvmCapability, + GetCpuTemplate, GetCpuTemplateError, GuestConfigError, KvmCapability, }; use crate::device_manager::acpi::ACPIDeviceManager; #[cfg(target_arch = "x86_64")] @@ -62,18 +48,17 @@ use crate::devices::virtio::rng::Entropy; use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; #[cfg(feature = "gdb")] use crate::gdb; +use crate::initrd::{InitrdConfig, InitrdError}; use crate::logger::{debug, error}; use crate::persist::{MicrovmState, MicrovmStateError}; use crate::resources::VmResources; use crate::seccomp::BpfThreadMap; use crate::snapshot::Persist; -use crate::utils::u64_to_usize; -use crate::vmm_config::boot_source::BootConfig; use crate::vmm_config::instance_info::InstanceInfo; -use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError}; +use crate::vmm_config::machine_config::MachineConfigError; use crate::vstate::kvm::Kvm; -use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap}; -use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuError}; +use crate::vstate::memory::GuestMemoryMmap; +use crate::vstate::vcpu::{Vcpu, VcpuError}; use crate::vstate::vm::Vm; use crate::{EventManager, Vmm, VmmError, device_manager}; @@ -85,7 +70,7 @@ pub enum StartMicrovmError { /// Unable to attach the VMGenID device: {0} AttachVmgenidDevice(kvm_ioctls::Error), /// System configuration error: {0} - ConfigureSystem(crate::arch::ConfigurationError), + ConfigureSystem(#[from] ConfigurationError), /// Failed to create guest config: {0} CreateGuestConfig(#[from] GuestConfigError), /// Cannot create network device: {0} @@ -99,18 +84,14 @@ pub enum StartMicrovmError { CreateVMGenID(VmGenIdError), /// Invalid Memory Configuration: {0} GuestMemory(crate::vstate::memory::MemoryError), - /// Cannot load initrd due to an invalid memory configuration. - InitrdLoad, - /// Cannot load initrd due to an invalid image: {0} - InitrdRead(io::Error), + /// Error with initrd initialization: {0}. + Initrd(#[from] InitrdError), /// Internal error while starting microVM: {0} Internal(#[from] VmmError), /// Failed to get CPU template: {0} GetCpuTemplate(#[from] GetCpuTemplateError), /// Invalid kernel command line: {0} KernelCmdline(String), - /// Cannot load kernel due to invalid memory configuration or invalid kernel image: {0} - KernelLoader(linux_loader::loader::Error), /// Cannot load command line string: {0} LoadCommandline(linux_loader::loader::Error), /// Cannot start microvm without kernel configuration. @@ -131,9 +112,8 @@ pub enum StartMicrovmError { SetVmResources(MachineConfigError), /// Cannot create the entropy device: {0} CreateEntropyDevice(crate::devices::virtio::rng::EntropyError), - /// Error configuring ACPI: {0} - #[cfg(target_arch = "x86_64")] - Acpi(#[from] crate::acpi::AcpiError), + /// Failed to allocate guest resource: {0} + AllocateResources(#[from] vm_allocator::Error), /// Error starting GDB debug session #[cfg(feature = "gdb")] GdbServer(gdb::target::GdbTargetError), @@ -240,8 +220,8 @@ pub fn build_microvm_for_boot( .allocate_guest_memory() .map_err(StartMicrovmError::GuestMemory)?; - let entry_point = load_kernel(boot_config, &guest_memory)?; - let initrd = load_initrd_from_config(boot_config, &guest_memory)?; + let entry_point = load_kernel(&boot_config.kernel_file, &guest_memory)?; + let initrd = InitrdConfig::from_config(boot_config, &guest_memory)?; // Clone the command-line so that a failed boot doesn't pollute the original. #[allow(unused_mut)] let mut boot_cmdline = boot_config.cmdline.clone(); @@ -449,7 +429,7 @@ pub fn build_microvm_from_snapshot( vm_resources.machine_config.vcpu_count, microvm_state.kvm_state.kvm_cap_modifiers.clone(), ) - .map_err(Internal)?; + .map_err(StartMicrovmError::Internal)?; #[cfg(target_arch = "x86_64")] { @@ -545,126 +525,6 @@ pub fn build_microvm_from_snapshot( Ok(vmm) } -#[cfg(target_arch = "x86_64")] -fn load_kernel( - boot_config: &BootConfig, - guest_memory: &GuestMemoryMmap, -) -> Result { - let mut kernel_file = boot_config - .kernel_file - .try_clone() - .map_err(VmmError::KernelFile)?; - - let entry_addr = Loader::load::( - guest_memory, - None, - &mut kernel_file, - Some(GuestAddress(crate::arch::get_kernel_start())), - ) - .map_err(StartMicrovmError::KernelLoader)?; - - let mut entry_point_addr: GuestAddress = entry_addr.kernel_load; - let mut boot_prot: BootProtocol = BootProtocol::LinuxBoot; - if let PvhBootCapability::PvhEntryPresent(pvh_entry_addr) = entry_addr.pvh_boot_cap { - // Use the PVH kernel entry point to boot the guest - entry_point_addr = pvh_entry_addr; - boot_prot = BootProtocol::PvhBoot; - } - - debug!("Kernel loaded using {boot_prot}"); - - Ok(EntryPoint { - entry_addr: entry_point_addr, - protocol: boot_prot, - }) -} - -#[cfg(target_arch = "aarch64")] -fn load_kernel( - boot_config: &BootConfig, - guest_memory: &GuestMemoryMmap, -) -> Result { - let mut kernel_file = boot_config - .kernel_file - .try_clone() - .map_err(VmmError::KernelFile)?; - - let entry_addr = Loader::load::( - guest_memory, - Some(GuestAddress(crate::arch::get_kernel_start())), - &mut kernel_file, - None, - ) - .map_err(StartMicrovmError::KernelLoader)?; - - Ok(EntryPoint { - entry_addr: entry_addr.kernel_load, - protocol: BootProtocol::LinuxBoot, - }) -} - -fn load_initrd_from_config( - boot_cfg: &BootConfig, - vm_memory: &GuestMemoryMmap, -) -> Result, StartMicrovmError> { - use self::StartMicrovmError::InitrdRead; - - Ok(match &boot_cfg.initrd_file { - Some(f) => Some(load_initrd( - vm_memory, - &mut f.try_clone().map_err(InitrdRead)?, - )?), - None => None, - }) -} - -/// Loads the initrd from a file into the given memory slice. -/// -/// * `vm_memory` - The guest memory the initrd is written to. -/// * `image` - The initrd image. -/// -/// Returns the result of initrd loading -fn load_initrd( - vm_memory: &GuestMemoryMmap, - image: &mut F, -) -> Result -where - F: ReadVolatile + Seek + Debug, -{ - use self::StartMicrovmError::{InitrdLoad, InitrdRead}; - - // Get the image size - let size = match image.seek(SeekFrom::End(0)) { - Err(err) => return Err(InitrdRead(err)), - Ok(0) => { - return Err(InitrdRead(io::Error::new( - io::ErrorKind::InvalidData, - "Initrd image seek returned a size of zero", - ))); - } - Ok(s) => u64_to_usize(s), - }; - // Go back to the image start - image.seek(SeekFrom::Start(0)).map_err(InitrdRead)?; - - // Get the target address - let address = crate::arch::initrd_load_addr(vm_memory, size).map_err(|_| InitrdLoad)?; - - // Load the image into memory - let mut slice = vm_memory - .get_slice(GuestAddress(address), size) - .map_err(|_| InitrdLoad)?; - - image - .read_exact_volatile(&mut slice) - .map_err(|_| InitrdLoad)?; - - Ok(InitrdConfig { - address: GuestAddress(address), - size, - }) -} - /// Sets up the serial device. pub fn setup_serial_device( event_manager: &mut EventManager, @@ -722,133 +582,6 @@ fn attach_legacy_devices_aarch64( .map_err(VmmError::RegisterMMIODevice) } -/// Configures the system for booting Linux. -#[cfg_attr(target_arch = "aarch64", allow(unused))] -pub fn configure_system_for_boot( - vmm: &mut Vmm, - vcpus: &mut [Vcpu], - machine_config: &MachineConfig, - cpu_template: &CustomCpuTemplate, - entry_point: EntryPoint, - initrd: &Option, - boot_cmdline: LoaderKernelCmdline, -) -> Result<(), StartMicrovmError> { - use self::StartMicrovmError::*; - - // Construct the base CpuConfiguration to apply CPU template onto. - #[cfg(target_arch = "x86_64")] - let cpu_config = { - use crate::cpu_config::x86_64::cpuid; - let cpuid = cpuid::Cpuid::try_from(vmm.kvm.supported_cpuid.clone()) - .map_err(GuestConfigError::CpuidFromKvmCpuid)?; - let msrs = vcpus[0] - .kvm_vcpu - .get_msrs(cpu_template.msr_index_iter()) - .map_err(GuestConfigError::VcpuIoctl)?; - CpuConfiguration { cpuid, msrs } - }; - - #[cfg(target_arch = "aarch64")] - let cpu_config = { - use crate::arch::aarch64::regs::Aarch64RegisterVec; - use crate::arch::aarch64::vcpu::get_registers; - - for vcpu in vcpus.iter_mut() { - vcpu.kvm_vcpu - .init(&cpu_template.vcpu_features) - .map_err(VmmError::VcpuInit)?; - } - - let mut regs = Aarch64RegisterVec::default(); - get_registers(&vcpus[0].kvm_vcpu.fd, &cpu_template.reg_list(), &mut regs) - .map_err(GuestConfigError)?; - CpuConfiguration { regs } - }; - - // Apply CPU template to the base CpuConfiguration. - let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template)?; - - let vcpu_config = VcpuConfig { - vcpu_count: machine_config.vcpu_count, - smt: machine_config.smt, - cpu_config, - }; - - #[cfg(target_arch = "x86_64")] - { - // Configure vCPUs with normalizing and setting the generated CPU configuration. - for vcpu in vcpus.iter_mut() { - vcpu.kvm_vcpu - .configure(vmm.guest_memory(), entry_point, &vcpu_config) - .map_err(VmmError::VcpuConfigure)?; - } - - // Write the kernel command line to guest memory. This is x86_64 specific, since on - // aarch64 the command line will be specified through the FDT. - let cmdline_size = boot_cmdline - .as_cstring() - .map(|cmdline_cstring| cmdline_cstring.as_bytes_with_nul().len())?; - - linux_loader::loader::load_cmdline::( - vmm.guest_memory(), - GuestAddress(crate::arch::x86_64::layout::CMDLINE_START), - &boot_cmdline, - ) - .map_err(LoadCommandline)?; - crate::arch::x86_64::configure_system( - &vmm.guest_memory, - &mut vmm.resource_allocator, - crate::vstate::memory::GuestAddress(crate::arch::x86_64::layout::CMDLINE_START), - cmdline_size, - initrd, - vcpu_config.vcpu_count, - entry_point.protocol, - ) - .map_err(ConfigureSystem)?; - - // Create ACPI tables and write them in guest memory - // For the time being we only support ACPI in x86_64 - acpi::create_acpi_tables( - &vmm.guest_memory, - &mut vmm.resource_allocator, - &vmm.mmio_device_manager, - &vmm.acpi_device_manager, - vcpus, - )?; - } - #[cfg(target_arch = "aarch64")] - { - let optional_capabilities = vmm.kvm.optional_capabilities(); - // Configure vCPUs with normalizing and setting the generated CPU configuration. - for vcpu in vcpus.iter_mut() { - vcpu.kvm_vcpu - .configure( - vmm.guest_memory(), - entry_point, - &vcpu_config, - &optional_capabilities, - ) - .map_err(VmmError::VcpuConfigure)?; - } - let vcpu_mpidr = vcpus - .iter_mut() - .map(|cpu| cpu.kvm_vcpu.get_mpidr()) - .collect(); - let cmdline = boot_cmdline.as_cstring()?; - crate::arch::aarch64::configure_system( - &vmm.guest_memory, - cmdline, - vcpu_mpidr, - vmm.mmio_device_manager.get_device_info(), - vmm.vm.get_irqchip(), - &vmm.acpi_device_manager.vmgenid, - initrd, - ) - .map_err(ConfigureSystem)?; - } - Ok(()) -} - /// Attaches a VirtioDevice device to the device manager and event manager. fn attach_virtio_device( event_manager: &mut EventManager, @@ -863,7 +596,7 @@ fn attach_virtio_device( event_manager.add_subscriber(device.clone()); // The device mutex mustn't be locked here otherwise it will deadlock. - let device = MmioTransport::new(vmm.guest_memory().clone(), device, is_vhost_user); + let device = MmioTransport::new(vmm.guest_memory.clone(), device, is_vhost_user); vmm.mmio_device_manager .register_mmio_virtio_for_boot( vmm.vm.fd(), @@ -1010,7 +743,6 @@ pub(crate) fn set_stdout_nonblocking() { #[cfg(test)] pub(crate) mod tests { - use std::io::Write; use linux_loader::cmdline::Cmdline; use vmm_sys_util::tempfile::TempFile; @@ -1024,7 +756,6 @@ pub(crate) mod tests { use crate::devices::virtio::{TYPE_BALLOON, TYPE_BLOCK, TYPE_RNG}; use crate::mmds::data_store::{Mmds, MmdsVersion}; use crate::mmds::ns::MmdsNetworkStack; - use crate::test_utils::{single_region_mem, single_region_mem_at}; use crate::utils::mib_to_bytes; use crate::vmm_config::balloon::{BALLOON_DEV_ID, BalloonBuilder, BalloonDeviceConfig}; use crate::vmm_config::boot_source::DEFAULT_KERNEL_CMDLINE; @@ -1270,67 +1001,6 @@ pub(crate) mod tests { ); } - fn make_test_bin() -> Vec { - let mut fake_bin = Vec::new(); - fake_bin.resize(1_000_000, 0xAA); - fake_bin - } - - #[test] - // Test that loading the initrd is successful on different archs. - fn test_load_initrd() { - use crate::vstate::memory::GuestMemory; - let image = make_test_bin(); - - let mem_size: usize = image.len() * 2 + crate::arch::GUEST_PAGE_SIZE; - - let tempfile = TempFile::new().unwrap(); - let mut tempfile = tempfile.into_file(); - tempfile.write_all(&image).unwrap(); - - #[cfg(target_arch = "x86_64")] - let gm = single_region_mem(mem_size); - - #[cfg(target_arch = "aarch64")] - let gm = single_region_mem(mem_size + crate::arch::aarch64::layout::FDT_MAX_SIZE); - - let res = load_initrd(&gm, &mut tempfile); - let initrd = res.unwrap(); - assert!(gm.address_in_range(initrd.address)); - assert_eq!(initrd.size, image.len()); - } - - #[test] - fn test_load_initrd_no_memory() { - let gm = single_region_mem(79); - let image = make_test_bin(); - let tempfile = TempFile::new().unwrap(); - let mut tempfile = tempfile.into_file(); - tempfile.write_all(&image).unwrap(); - let res = load_initrd(&gm, &mut tempfile); - assert!( - matches!(res, Err(StartMicrovmError::InitrdLoad)), - "{:?}", - res - ); - } - - #[test] - fn test_load_initrd_unaligned() { - let image = vec![1, 2, 3, 4]; - let tempfile = TempFile::new().unwrap(); - let mut tempfile = tempfile.into_file(); - tempfile.write_all(&image).unwrap(); - let gm = single_region_mem_at(crate::arch::GUEST_PAGE_SIZE as u64 + 1, image.len() * 2); - - let res = load_initrd(&gm, &mut tempfile); - assert!( - matches!(res, Err(StartMicrovmError::InitrdLoad)), - "{:?}", - res - ); - } - #[test] fn test_attach_net_devices() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); diff --git a/src/vmm/src/cpu_config/aarch64/mod.rs b/src/vmm/src/cpu_config/aarch64/mod.rs index 26ca48572ac..786183f3988 100644 --- a/src/vmm/src/cpu_config/aarch64/mod.rs +++ b/src/vmm/src/cpu_config/aarch64/mod.rs @@ -9,13 +9,19 @@ pub mod static_cpu_templates; pub mod test_utils; use super::templates::CustomCpuTemplate; +use crate::Vcpu; use crate::arch::aarch64::regs::{Aarch64RegisterVec, RegSize}; -use crate::arch::aarch64::vcpu::VcpuArchError; +use crate::arch::aarch64::vcpu::{VcpuArchError, get_registers}; +use crate::vstate::vcpu::KvmVcpuError; /// Errors thrown while configuring templates. -#[derive(Debug, PartialEq, Eq, thiserror::Error)] -#[error("Failed to create a guest cpu configuration: {0}")] -pub struct CpuConfigurationError(#[from] pub VcpuArchError); +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum CpuConfigurationError { + /// Error initializing the vcpu: {0} + VcpuInit(#[from] KvmVcpuError), + /// Error reading vcpu registers: {0} + VcpuGetRegs(#[from] VcpuArchError), +} /// CPU configuration for aarch64 #[derive(Debug, Default, Clone, PartialEq, Eq)] @@ -25,11 +31,22 @@ pub struct CpuConfiguration { } impl CpuConfiguration { - /// Creates new guest CPU config based on the provided template - pub fn apply_template( - mut self, - template: &CustomCpuTemplate, + /// Create new CpuConfiguration. + pub fn new( + cpu_template: &CustomCpuTemplate, + vcpus: &mut [Vcpu], ) -> Result { + for vcpu in vcpus.iter_mut() { + vcpu.kvm_vcpu.init(&cpu_template.vcpu_features)?; + } + + let mut regs = Aarch64RegisterVec::default(); + get_registers(&vcpus[0].kvm_vcpu.fd, &cpu_template.reg_list(), &mut regs)?; + Ok(CpuConfiguration { regs }) + } + + /// Creates new guest CPU config based on the provided template + pub fn apply_template(mut self, template: &CustomCpuTemplate) -> Self { for (modifier, mut reg) in template.reg_modifiers.iter().zip(self.regs.iter_mut()) { match reg.size() { RegSize::U32 => { @@ -50,7 +67,7 @@ impl CpuConfiguration { _ => unreachable!("Only 32, 64 and 128 bit wide registers are supported"), } } - Ok(self) + self } /// Returns ids of registers that are changed diff --git a/src/vmm/src/cpu_config/x86_64/mod.rs b/src/vmm/src/cpu_config/x86_64/mod.rs index 3a26c621194..ddf0b64dea0 100644 --- a/src/vmm/src/cpu_config/x86_64/mod.rs +++ b/src/vmm/src/cpu_config/x86_64/mod.rs @@ -12,8 +12,11 @@ pub mod test_utils; use std::collections::BTreeMap; +use kvm_bindings::CpuId; + use self::custom_cpu_template::CpuidRegister; use super::templates::CustomCpuTemplate; +use crate::Vcpu; use crate::cpu_config::x86_64::cpuid::{Cpuid, CpuidKey}; /// Errors thrown while configuring templates. @@ -24,9 +27,9 @@ pub enum CpuConfigurationError { /// Template changes an MSR entry not supported by KVM: Register Address: {0:0x} MsrNotSupported(u32), /// Can create cpuid from raw: {0} - CpuidFromKvmCpuid(crate::cpu_config::x86_64::cpuid::CpuidTryFromKvmCpuid), + CpuidFromKvmCpuid(#[from] crate::cpu_config::x86_64::cpuid::CpuidTryFromKvmCpuid), /// KVM vcpu ioctl failed: {0} - VcpuIoctl(crate::vstate::vcpu::KvmVcpuError), + VcpuIoctl(#[from] crate::vstate::vcpu::KvmVcpuError), } /// CPU configuration for x86_64 CPUs @@ -41,6 +44,19 @@ pub struct CpuConfiguration { } impl CpuConfiguration { + /// Create new CpuConfiguration. + pub fn new( + supported_cpuid: CpuId, + cpu_template: &CustomCpuTemplate, + first_vcpu: &Vcpu, + ) -> Result { + let cpuid = cpuid::Cpuid::try_from(supported_cpuid)?; + let msrs = first_vcpu + .kvm_vcpu + .get_msrs(cpu_template.msr_index_iter())?; + Ok(CpuConfiguration { cpuid, msrs }) + } + /// Modifies provided config with changes from template pub fn apply_template( self, diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index ffab7b81bfd..0b8ad8eb04d 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -805,7 +805,7 @@ mod tests { let device_states: DeviceStates = Snapshot::deserialize(&mut buf.as_slice()).unwrap(); let vm_resources = &mut VmResources::default(); let restore_args = MMIODevManagerConstructorArgs { - mem: vmm.guest_memory(), + mem: &vmm.guest_memory, vm: vmm.vm.fd(), event_manager: &mut event_manager, resource_allocator: &mut resource_allocator, diff --git a/src/vmm/src/devices/virtio/test_utils.rs b/src/vmm/src/devices/virtio/test_utils.rs index aa1ede08e58..8642d0a85f4 100644 --- a/src/vmm/src/devices/virtio/test_utils.rs +++ b/src/vmm/src/devices/virtio/test_utils.rs @@ -10,7 +10,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use crate::devices::virtio::queue::Queue; use crate::test_utils::single_region_mem; -use crate::utils::u64_to_usize; +use crate::utils::{align_up, u64_to_usize}; use crate::vstate::memory::{Address, Bytes, GuestAddress, GuestMemoryMmap}; #[macro_export] @@ -250,7 +250,7 @@ impl<'a> VirtQueue<'a> { const USED_ALIGN: u64 = 4; let mut x = avail.end().0; - x = (x + USED_ALIGN - 1) & !(USED_ALIGN - 1); + x = align_up(x, USED_ALIGN); let used = VirtqUsed::new(GuestAddress(x), mem, qsize, u64_to_usize(USED_ALIGN)); diff --git a/src/vmm/src/gdb/arch/aarch64.rs b/src/vmm/src/gdb/arch/aarch64.rs index efd5ad8ae01..9504a48fcc5 100644 --- a/src/vmm/src/gdb/arch/aarch64.rs +++ b/src/vmm/src/gdb/arch/aarch64.rs @@ -63,7 +63,7 @@ const PTE_ADDRESS_MASK: u64 = !0b111u64; /// Read a u64 value from a guest memory address fn read_address(vmm: &Vmm, address: u64) -> Result { let mut buf = [0; 8]; - vmm.guest_memory().read(&mut buf, GuestAddress(address))?; + vmm.guest_memory.read(&mut buf, GuestAddress(address))?; Ok(u64::from_le_bytes(buf)) } diff --git a/src/vmm/src/gdb/target.rs b/src/vmm/src/gdb/target.rs index c3293db1607..3ff96d0c8b5 100644 --- a/src/vmm/src/gdb/target.rs +++ b/src/vmm/src/gdb/target.rs @@ -399,7 +399,7 @@ impl MultiThreadBase for FirecrackerTarget { GUEST_PAGE_SIZE - (u64_to_usize(gpa) & (GUEST_PAGE_SIZE - 1)), ); - vmm.guest_memory() + vmm.guest_memory .read(&mut data[..read_len], GuestAddress(gpa as u64)) .map_err(|e| { error!("Error reading memory {e:?} gpa is {gpa}"); @@ -433,7 +433,7 @@ impl MultiThreadBase for FirecrackerTarget { GUEST_PAGE_SIZE - (u64_to_usize(gpa) & (GUEST_PAGE_SIZE - 1)), ); - vmm.guest_memory() + vmm.guest_memory .write(&data[..write_len], GuestAddress(gpa)) .map_err(|e| { error!("Error {e:?} writing memory at {gpa:#X}"); diff --git a/src/vmm/src/initrd.rs b/src/vmm/src/initrd.rs new file mode 100644 index 00000000000..9dfcd8bc16e --- /dev/null +++ b/src/vmm/src/initrd.rs @@ -0,0 +1,140 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::fs::File; +use std::os::unix::fs::MetadataExt; + +use vm_memory::{GuestAddress, GuestMemory, ReadVolatile, VolatileMemoryError}; + +use crate::arch::initrd_load_addr; +use crate::utils::u64_to_usize; +use crate::vmm_config::boot_source::BootConfig; +use crate::vstate::memory::GuestMemoryMmap; + +/// Errors associated with initrd loading. +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum InitrdError { + /// Failed to compute the initrd address. + Address, + /// Cannot load initrd due to an invalid memory configuration. + Load, + /// Cannot image metadata: {0} + Metadata(std::io::Error), + /// Cannot copy initrd file fd: {0} + CloneFd(std::io::Error), + /// Cannot load initrd due to an invalid image: {0} + Read(VolatileMemoryError), +} + +/// Type for passing information about the initrd in the guest memory. +#[derive(Debug)] +pub struct InitrdConfig { + /// Load address of initrd in guest memory + pub address: GuestAddress, + /// Size of initrd in guest memory + pub size: usize, +} + +impl InitrdConfig { + /// Load initrd into guest memory based on the boot config. + pub fn from_config( + boot_cfg: &BootConfig, + vm_memory: &GuestMemoryMmap, + ) -> Result, InitrdError> { + Ok(match &boot_cfg.initrd_file { + Some(f) => { + let f = f.try_clone().map_err(InitrdError::CloneFd)?; + Some(Self::from_file(vm_memory, f)?) + } + None => None, + }) + } + + /// Loads the initrd from a file into guest memory. + pub fn from_file(vm_memory: &GuestMemoryMmap, mut file: File) -> Result { + let size = file.metadata().map_err(InitrdError::Metadata)?.size(); + let size = u64_to_usize(size); + let Some(address) = initrd_load_addr(vm_memory, size) else { + return Err(InitrdError::Address); + }; + let mut slice = vm_memory + .get_slice(GuestAddress(address), size) + .map_err(|_| InitrdError::Load)?; + file.read_exact_volatile(&mut slice) + .map_err(InitrdError::Read)?; + + Ok(InitrdConfig { + address: GuestAddress(address), + size, + }) + } +} + +#[cfg(test)] +mod tests { + use std::io::{Seek, SeekFrom, Write}; + + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::arch::GUEST_PAGE_SIZE; + use crate::test_utils::{single_region_mem, single_region_mem_at}; + + fn make_test_bin() -> Vec { + let mut fake_bin = Vec::new(); + fake_bin.resize(1_000_000, 0xAA); + fake_bin + } + + #[test] + // Test that loading the initrd is successful on different archs. + fn test_load_initrd() { + let image = make_test_bin(); + + let mem_size: usize = image.len() * 2 + GUEST_PAGE_SIZE; + + let tempfile = TempFile::new().unwrap(); + let mut tempfile = tempfile.into_file(); + tempfile.write_all(&image).unwrap(); + + #[cfg(target_arch = "x86_64")] + let gm = single_region_mem(mem_size); + + #[cfg(target_arch = "aarch64")] + let gm = single_region_mem(mem_size + crate::arch::aarch64::layout::FDT_MAX_SIZE); + + // Need to reset the cursor to read initrd properly. + tempfile.seek(SeekFrom::Start(0)).unwrap(); + let initrd = InitrdConfig::from_file(&gm, tempfile).unwrap(); + assert!(gm.address_in_range(initrd.address)); + assert_eq!(initrd.size, image.len()); + } + + #[test] + fn test_load_initrd_no_memory() { + let gm = single_region_mem(79); + let image = make_test_bin(); + let tempfile = TempFile::new().unwrap(); + let mut tempfile = tempfile.into_file(); + tempfile.write_all(&image).unwrap(); + + // Need to reset the cursor to read initrd properly. + tempfile.seek(SeekFrom::Start(0)).unwrap(); + let res = InitrdConfig::from_file(&gm, tempfile); + assert!(matches!(res, Err(InitrdError::Address)), "{:?}", res); + } + + #[test] + fn test_load_initrd_unaligned() { + let image = vec![1, 2, 3, 4]; + let tempfile = TempFile::new().unwrap(); + let mut tempfile = tempfile.into_file(); + tempfile.write_all(&image).unwrap(); + let gm = single_region_mem_at(GUEST_PAGE_SIZE as u64 + 1, image.len() * 2); + + // Need to reset the cursor to read initrd properly. + tempfile.seek(SeekFrom::Start(0)).unwrap(); + let res = InitrdConfig::from_file(&gm, tempfile); + assert!(matches!(res, Err(InitrdError::Address)), "{:?}", res); + } +} diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 993409dc819..5f173d3ae9c 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -111,6 +111,9 @@ pub mod vmm_config; /// Module with virtual state structs. pub mod vstate; +/// Module with initrd. +pub mod initrd; + use std::collections::HashMap; use std::io; use std::os::unix::io::AsRawFd; @@ -128,7 +131,7 @@ use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::terminal::Terminal; use vstate::kvm::Kvm; -use vstate::vcpu::{self, KvmVcpuConfigureError, StartThreadedError, VcpuSendEventError}; +use vstate::vcpu::{self, StartThreadedError, VcpuSendEventError}; use crate::arch::DeviceType; use crate::cpu_config::templates::CpuConfiguration; @@ -218,8 +221,6 @@ pub enum VmmError { EventFd(io::Error), /// I8042 error: {0} I8042Error(devices::legacy::I8042DeviceError), - /// Cannot access kernel file: {0} - KernelFile(io::Error), #[cfg(target_arch = "x86_64")] /// Cannot add devices to the legacy I/O Bus. {0} LegacyIOBus(device_manager::legacy::LegacyDeviceError), @@ -233,17 +234,12 @@ pub enum VmmError { Serial(io::Error), /// Error creating timer fd: {0} TimerFd(io::Error), - /// Error configuring the vcpu for boot: {0} - VcpuConfigure(KvmVcpuConfigureError), /// Error creating the vcpu: {0} VcpuCreate(vstate::vcpu::VcpuError), /// Cannot send event to vCPU. {0} VcpuEvent(vstate::vcpu::VcpuError), /// Cannot create a vCPU handle. {0} VcpuHandle(vstate::vcpu::VcpuError), - #[cfg(target_arch = "aarch64")] - /// Error initializing the vcpu: {0} - VcpuInit(vstate::vcpu::KvmVcpuError), /// Failed to start vCPUs VcpuStart(StartVcpusError), /// Failed to pause the vCPUs. @@ -448,11 +444,6 @@ impl Vmm { Ok(()) } - /// Returns a reference to the inner `GuestMemoryMmap` object. - pub fn guest_memory(&self) -> &GuestMemoryMmap { - &self.guest_memory - } - /// Sets RDA bit in serial console pub fn emulate_serial_init(&self) -> Result<(), EmulateSerialInitError> { // When restoring from a previously saved state, there is no serial @@ -530,7 +521,7 @@ impl Vmm { }; let device_states = self.mmio_device_manager.save(); - let memory_state = self.guest_memory().describe(); + let memory_state = self.guest_memory.describe(); let acpi_dev_state = self.acpi_device_manager.save(); Ok(MicrovmState { @@ -745,7 +736,7 @@ impl Vmm { pub fn update_balloon_config(&mut self, amount_mib: u32) -> Result<(), BalloonError> { // The balloon cannot have a target size greater than the size of // the guest memory. - if u64::from(amount_mib) > mem_size_mib(self.guest_memory()) { + if u64::from(amount_mib) > mem_size_mib(&self.guest_memory) { return Err(BalloonError::TooManyPagesRequested); } diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 9d245e64821..64292612a16 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -219,7 +219,7 @@ fn snapshot_memory_to_file( .map_err(|err| MemoryBackingFile("open", err))?; // Determine what size our total memory area is. - let mem_size_mib = mem_size_mib(vmm.guest_memory()); + let mem_size_mib = mem_size_mib(&vmm.guest_memory); let expected_size = mem_size_mib * 1024 * 1024; if file_existed { @@ -248,15 +248,15 @@ fn snapshot_memory_to_file( match snapshot_type { SnapshotType::Diff => { let dirty_bitmap = vmm.get_dirty_bitmap().map_err(DirtyBitmap)?; - vmm.guest_memory() + vmm.guest_memory .dump_dirty(&mut file, &dirty_bitmap) .map_err(Memory) } SnapshotType::Full => { - let dump_res = vmm.guest_memory().dump(&mut file).map_err(Memory); + let dump_res = vmm.guest_memory.dump(&mut file).map_err(Memory); if dump_res.is_ok() { vmm.reset_dirty_bitmap(); - vmm.guest_memory().reset_dirty(); + vmm.guest_memory.reset_dirty(); } dump_res @@ -272,7 +272,7 @@ fn snapshot_memory_to_file( .for_each_virtio_device(|_, _, _, dev| { let d = dev.lock().unwrap(); if d.is_activated() { - d.mark_queue_memory_dirty(vmm.guest_memory()) + d.mark_queue_memory_dirty(&vmm.guest_memory) } else { Ok(()) } @@ -747,7 +747,7 @@ mod tests { assert!(states.vsock_device.is_some()); assert!(states.balloon_device.is_some()); - let memory_state = vmm.guest_memory().describe(); + let memory_state = vmm.guest_memory.describe(); let vcpu_states = vec![VcpuState::default()]; #[cfg(target_arch = "aarch64")] let mpidrs = construct_kvm_mpidrs(&vcpu_states); diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 2d472a3f3e2..837ddd91069 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -600,28 +600,6 @@ mod tests { } } - impl PartialEq for BootConfig { - fn eq(&self, other: &Self) -> bool { - self.cmdline.eq(&other.cmdline) - && self.kernel_file.metadata().unwrap().st_ino() - == other.kernel_file.metadata().unwrap().st_ino() - && self - .initrd_file - .as_ref() - .unwrap() - .metadata() - .unwrap() - .st_ino() - == other - .initrd_file - .as_ref() - .unwrap() - .metadata() - .unwrap() - .st_ino() - } - } - #[test] fn test_from_json() { let kernel_file = TempFile::new().unwrap(); diff --git a/src/vmm/src/utils/mod.rs b/src/vmm/src/utils/mod.rs index 9c81c7a3016..430f9fe9a71 100644 --- a/src/vmm/src/utils/mod.rs +++ b/src/vmm/src/utils/mod.rs @@ -53,3 +53,15 @@ pub const fn wrap_usize_to_u32(num: usize) -> Wrapping { pub const fn mib_to_bytes(mib: usize) -> usize { mib << MIB_TO_BYTES_SHIFT } + +/// Align address up to the aligment. +pub const fn align_up(addr: u64, align: u64) -> u64 { + debug_assert!(align != 0); + (addr + align - 1) & !(align - 1) +} + +/// Align address down to the aligment. +pub const fn align_down(addr: u64, align: u64) -> u64 { + debug_assert!(align != 0); + addr & !(align - 1) +} diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs index b7de0017e46..4e1bf8970a0 100644 --- a/src/vmm/src/vstate/vcpu.rs +++ b/src/vmm/src/vstate/vcpu.rs @@ -771,7 +771,6 @@ pub(crate) mod tests { use super::*; use crate::RECV_TIMEOUT_SEC; use crate::arch::{BootProtocol, EntryPoint}; - use crate::builder::StartMicrovmError; use crate::devices::BusDevice; use crate::devices::bus::DummyDevice; use crate::seccomp::get_empty_filters; @@ -952,12 +951,11 @@ pub(crate) mod tests { &mut kernel_file, Some(GuestAddress(crate::arch::get_kernel_start())), ) - .map_err(StartMicrovmError::KernelLoader); + .unwrap(); #[cfg(target_arch = "aarch64")] let entry_addr = - linux_loader::loader::pe::PE::load(vm_memory, None, &mut kernel_file, None) - .map_err(StartMicrovmError::KernelLoader); - entry_addr.unwrap().kernel_load + linux_loader::loader::pe::PE::load(vm_memory, None, &mut kernel_file, None).unwrap(); + entry_addr.kernel_load } fn vcpu_configured_for_boot() -> (VcpuHandle, EventFd, GuestMemoryMmap) {