From 3c9f7d9383761cdfe27e454e99fe54cfc163cf2c Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Fri, 7 Feb 2025 16:48:46 -0800 Subject: [PATCH] tpuproxy: resolve FIXMEs added by cl/723723714 - Remove the ability to mmap VFIO group FDs. As mentioned by the deleted FIXME, this is consistent with Linux. - Use MemoryTypeUncached for VFIO_GROUP_GET_DEVICE_FD FDs. On the KVM platform, this ensures that e.g. writes to memory-mapped device registers occur in program order and without combining, and no reads are cached or issued speculatively. On other platforms, this has no effect since application page table entries are controlled by the host Linux kernel. (Technically, on the KVM platform, this also has no effect on Intel CPUs for reasons described in gvisor.dev/issue/11436.) PiperOrigin-RevId: 724512151 --- pkg/abi/nvgpu/frontend.go | 14 ++- pkg/hostarch/BUILD | 1 + pkg/hostarch/memory_type.go | 84 +++++++++++++++++ pkg/ring0/pagetables/pagetables_aarch64.go | 59 ++++++------ pkg/ring0/pagetables/pagetables_amd64_test.go | 10 ++ pkg/ring0/pagetables/pagetables_arm64_test.go | 7 ++ pkg/ring0/pagetables/pagetables_x86.go | 32 ++++--- pkg/sentry/devices/nvproxy/frontend.go | 21 ++++- pkg/sentry/devices/nvproxy/frontend_mmap.go | 66 ++++++++++++++ pkg/sentry/devices/nvproxy/uvm_mmap.go | 2 + .../devices/tpuproxy/accel/accel_fd_mmap.go | 11 +-- pkg/sentry/devices/tpuproxy/vfio/BUILD | 1 - .../tpuproxy/vfio/pci_device_fd_mmap.go | 7 ++ pkg/sentry/devices/tpuproxy/vfio/tpu_fd.go | 7 +- .../devices/tpuproxy/vfio/tpu_fd_mmap.go | 91 ------------------- pkg/sentry/devices/tpuproxy/vfio/vfio.go | 1 - .../devices/tpuproxy/vfio/vfio_fd_mmap.go | 11 +-- pkg/sentry/fsimpl/erofs/regular_file.go | 1 + pkg/sentry/fsimpl/gofer/regular_file.go | 1 + pkg/sentry/fsimpl/gofer/special_file.go | 1 + pkg/sentry/fsimpl/kernfs/mmap_util.go | 1 + pkg/sentry/memmap/BUILD | 1 + pkg/sentry/memmap/memmap.go | 58 ++++++++++-- pkg/sentry/mm/debug.go | 2 +- pkg/sentry/pgalloc/pgalloc.go | 1 + pkg/sentry/platform/kvm/address_space.go | 12 ++- pkg/sentry/platform/kvm/kvm_const_arm64.go | 7 -- pkg/sentry/platform/kvm/machine_amd64.go | 5 + .../platform/kvm/machine_amd64_unsafe.go | 31 +++++++ .../platform/kvm/machine_arm64_unsafe.go | 8 +- 30 files changed, 371 insertions(+), 183 deletions(-) create mode 100644 pkg/hostarch/memory_type.go delete mode 100644 pkg/sentry/devices/tpuproxy/vfio/tpu_fd_mmap.go diff --git a/pkg/abi/nvgpu/frontend.go b/pkg/abi/nvgpu/frontend.go index 0d8dcb96c5..0f80110446 100644 --- a/pkg/abi/nvgpu/frontend.go +++ b/pkg/abi/nvgpu/frontend.go @@ -198,7 +198,7 @@ type NVOS02_PARAMETERS struct { Pad1 [4]byte } -// Bitfields in NVOS02Parameters.Flags: +// Bitfields in NVOS02_PARAMETERS.Flags: const ( NVOS02_FLAGS_ALLOC_SHIFT = 16 NVOS02_FLAGS_ALLOC_MASK = 0x3 @@ -470,6 +470,18 @@ type NVOS33_PARAMETERS struct { Flags uint32 } +// Bitfields in NVOS33_PARAMETERS.Flags: +const ( + NVOS33_FLAGS_CACHING_TYPE_SHIFT = 23 + NVOS33_FLAGS_CACHING_TYPE_MASK = 0x7 + NVOS33_FLAGS_CACHING_TYPE_CACHED = 0 + NVOS33_FLAGS_CACHING_TYPE_UNCACHED = 1 + NVOS33_FLAGS_CACHING_TYPE_WRITECOMBINED = 2 + NVOS33_FLAGS_CACHING_TYPE_WRITEBACK = 5 + NVOS33_FLAGS_CACHING_TYPE_DEFAULT = 6 + NVOS33_FLAGS_CACHING_TYPE_UNCACHED_WEAK = 7 +) + // NVOS34_PARAMETERS is the parameter type for NV_ESC_RM_UNMAP_MEMORY. // // +marshal diff --git a/pkg/hostarch/BUILD b/pkg/hostarch/BUILD index 3508c443fb..cde0764559 100644 --- a/pkg/hostarch/BUILD +++ b/pkg/hostarch/BUILD @@ -38,6 +38,7 @@ go_library( "hostarch.go", "hostarch_arm64.go", "hostarch_x86.go", + "memory_type.go", "sizes_util.go", ], visibility = ["//:sandbox"], diff --git a/pkg/hostarch/memory_type.go b/pkg/hostarch/memory_type.go new file mode 100644 index 0000000000..82d530b9b9 --- /dev/null +++ b/pkg/hostarch/memory_type.go @@ -0,0 +1,84 @@ +// Copyright 2025 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hostarch + +import "fmt" + +// MemoryType specifies CPU memory access behavior. +type MemoryType uint8 + +const ( + // MemoryTypeWriteBack is equivalent to Linux's default pgprot, or the + // following architectural memory types: + // + // - x86: Write-back (WB) + // + // - ARM64: Normal write-back cacheable + // + // This memory type is appropriate for typical application memory and must + // be the zero value for MemoryType. + MemoryTypeWriteBack MemoryType = iota + + // MemoryTypeWriteCombine is equivalent to Linux's pgprot_writecombine(), + // or the following architectural memory types: + // + // - x86: Write-combining (WC) + // + // - ARM64: Normal non-cacheable + MemoryTypeWriteCombine + + // MemoryTypeUncached is equivalent to Linux's pgprot_noncached(), or the + // following architectural memory types: + // + // - x86: Strong Uncacheable (UC) or Uncacheable (UC-); these differ in + // that UC- may be "downgraded" to WC by a setting of WC or (Intel only) WP + // in MTRR or EPT/NPT, but gVisor does not use MTRRs and KVM never sets WC + // or WP in EPT/NPT. + // + // - ARM64: Device-nGnRnE + MemoryTypeUncached + + // NumMemoryTypes is the number of memory types. + NumMemoryTypes +) + +// String implements fmt.Stringer.String. +func (mt MemoryType) String() string { + switch mt { + case MemoryTypeWriteBack: + return "WriteBack" + case MemoryTypeWriteCombine: + return "WriteCombine" + case MemoryTypeUncached: + return "Uncached" + default: + return fmt.Sprintf("%d", mt) + } +} + +// ShortString returns a two-character string compactly representing the +// MemoryType. +func (mt MemoryType) ShortString() string { + switch mt { + case MemoryTypeWriteBack: + return "WB" + case MemoryTypeWriteCombine: + return "WC" + case MemoryTypeUncached: + return "UC" + default: + return fmt.Sprintf("%02d", mt) + } +} diff --git a/pkg/ring0/pagetables/pagetables_aarch64.go b/pkg/ring0/pagetables/pagetables_aarch64.go index 6c2fe2a700..97ce934e08 100644 --- a/pkg/ring0/pagetables/pagetables_aarch64.go +++ b/pkg/ring0/pagetables/pagetables_aarch64.go @@ -52,29 +52,26 @@ func (p *PageTables) TTBR1_EL1(noFlush bool, asid uint16) uint64 { // Bits in page table entries. const ( - typeTable = 0x3 << 0 - typeSect = 0x1 << 0 - typePage = 0x3 << 0 - pteValid = 0x1 << 0 - pteTableBit = 0x1 << 1 - pteTypeMask = 0x3 << 0 - present = pteValid | pteTableBit - user = 0x1 << 6 /* AP[1] */ - readOnly = 0x1 << 7 /* AP[2] */ - accessed = 0x1 << 10 - dbm = 0x1 << 51 - writable = dbm - cont = 0x1 << 52 - pxn = 0x1 << 53 - xn = 0x1 << 54 - dirty = 0x1 << 55 - nG = 0x1 << 11 - shared = 0x3 << 8 -) - -const ( - mtDevicenGnRE = 0x1 << 2 - mtNormal = 0x4 << 2 + typeTable = 0x3 << 0 + typeSect = 0x1 << 0 + typePage = 0x3 << 0 + pteValid = 0x1 << 0 + pteTableBit = 0x1 << 1 + pteTypeMask = 0x3 << 0 + present = pteValid | pteTableBit + attrIndxShift = 2 + attrIndxMask = 0x7 + user = 0x1 << 6 /* AP[1] */ + readOnly = 0x1 << 7 /* AP[2] */ + accessed = 0x1 << 10 + dbm = 0x1 << 51 + writable = dbm + cont = 0x1 << 52 + pxn = 0x1 << 53 + xn = 0x1 << 54 + dirty = 0x1 << 55 + nG = 0x1 << 11 + shared = 0x3 << 8 ) const ( @@ -93,6 +90,9 @@ type MapOpts struct { // User indicates the page is a user page. User bool + + // MemoryType is the memory type. + MemoryType hostarch.MemoryType } // PTE is a page table entry. @@ -119,15 +119,15 @@ func (p *PTE) Valid() bool { //go:nosplit func (p *PTE) Opts() MapOpts { v := atomic.LoadUintptr((*uintptr)(p)) - return MapOpts{ AccessType: hostarch.AccessType{ Read: true, Write: v&readOnly == 0, Execute: v&xn == 0, }, - Global: v&nG == 0, - User: v&user != 0, + Global: v&nG == 0, + User: v&user != 0, + MemoryType: hostarch.MemoryType((v >> attrIndxShift) & attrIndxMask), } } @@ -191,11 +191,12 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) { if opts.User { v |= user - v |= mtNormal } else { v = v &^ user - v |= mtNormal } + + v |= uintptr(opts.MemoryType&attrIndxMask) << attrIndxShift + atomic.StoreUintptr((*uintptr)(p), v) } @@ -209,7 +210,7 @@ func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) { // This should never happen. panic("unaligned physical address!") } - v := addr | typeTable | protDefault | mtNormal + v := addr | typeTable | protDefault | (uintptr(hostarch.MemoryTypeWriteBack) << attrIndxShift) atomic.StoreUintptr((*uintptr)(p), v) } diff --git a/pkg/ring0/pagetables/pagetables_amd64_test.go b/pkg/ring0/pagetables/pagetables_amd64_test.go index c27b3b10a9..2c08cf8f4b 100644 --- a/pkg/ring0/pagetables/pagetables_amd64_test.go +++ b/pkg/ring0/pagetables/pagetables_amd64_test.go @@ -74,3 +74,13 @@ func TestSplit2MPage(t *testing.T) { {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: hostarch.Read}}, }) } + +func TestNumMemoryTypes(t *testing.T) { + // The PAT accommodates up to 8 entries. However, PTE.Set() currently + // assumes that NumMemoryTypes <= 4, since the location of the most + // significant bit of the PAT index in page table entries varies depending + // on page size (and is never bit 5 == writeThroughShift + 2). + if hostarch.NumMemoryTypes > 4 { + t.Errorf("PTE.Set() and PTE.Opts() must be altered to handle %d MemoryTypes", hostarch.NumMemoryTypes) + } +} diff --git a/pkg/ring0/pagetables/pagetables_arm64_test.go b/pkg/ring0/pagetables/pagetables_arm64_test.go index 1c919ec7d8..0c73e0f728 100644 --- a/pkg/ring0/pagetables/pagetables_arm64_test.go +++ b/pkg/ring0/pagetables/pagetables_arm64_test.go @@ -79,3 +79,10 @@ func TestSplit2MPage(t *testing.T) { {0x0000ff0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: hostarch.Read, User: true}}, }) } + +func TestNumMemoryTypes(t *testing.T) { + // MAIR accommodates up to 8 entries. + if hostarch.NumMemoryTypes > 8 { + t.Errorf("PTE.Set() and PTE.Opts() must be altered to map %d MemoryTypes to a smaller set of MAIR entries", hostarch.NumMemoryTypes) + } +} diff --git a/pkg/ring0/pagetables/pagetables_x86.go b/pkg/ring0/pagetables/pagetables_x86.go index dc98d8452c..2109ccdf33 100644 --- a/pkg/ring0/pagetables/pagetables_x86.go +++ b/pkg/ring0/pagetables/pagetables_x86.go @@ -49,16 +49,17 @@ func (p *PageTables) CR3(noFlush bool, pcid uint16) uint64 { // Bits in page table entries. const ( - present = 0x001 - writable = 0x002 - user = 0x004 - writeThrough = 0x008 - cacheDisable = 0x010 - accessed = 0x020 - dirty = 0x040 - super = 0x080 - global = 0x100 - optionMask = executeDisable | 0xfff + present = 0x001 + writable = 0x002 + user = 0x004 + accessed = 0x020 + dirty = 0x040 + super = 0x080 + global = 0x100 + optionMask = executeDisable | 0xfff + + writeThroughShift = 3 + patIndexMask = 0x3 ) // MapOpts are x86 options. @@ -71,6 +72,9 @@ type MapOpts struct { // User indicates the page is a user page. User bool + + // MemoryType is the memory type. + MemoryType hostarch.MemoryType } // PTE is a page table entry. @@ -103,8 +107,9 @@ func (p *PTE) Opts() MapOpts { Write: v&writable != 0, Execute: v&executeDisable == 0, }, - Global: v&global != 0, - User: v&user != 0, + Global: v&global != 0, + User: v&user != 0, + MemoryType: hostarch.MemoryType((v >> writeThroughShift) & patIndexMask), } } @@ -154,6 +159,7 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) { if opts.AccessType.Write { v |= writable | dirty } + v |= uintptr(opts.MemoryType&patIndexMask) << writeThroughShift if p.IsSuper() { // Note that this is inherited from the previous instance. Set // does not change the value of Super. See above. @@ -172,7 +178,7 @@ func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) { // This should never happen. panic("unaligned physical address!") } - v := addr | present | user | writable | accessed | dirty + v := addr | present | user | writable | accessed | dirty | (uintptr(hostarch.MemoryTypeWriteBack) << writeThroughShift) atomic.StoreUintptr((*uintptr)(p), v) } diff --git a/pkg/sentry/devices/nvproxy/frontend.go b/pkg/sentry/devices/nvproxy/frontend.go index 9e6a2d1804..171b320bdc 100644 --- a/pkg/sentry/devices/nvproxy/frontend.go +++ b/pkg/sentry/devices/nvproxy/frontend.go @@ -46,8 +46,12 @@ type frontendDevice struct { minor uint32 } +func (dev *frontendDevice) isCtlDevice() bool { + return dev.minor == nvgpu.NV_CONTROL_DEVICE_MINOR +} + func (dev *frontendDevice) basename() string { - if dev.minor == nvgpu.NV_CONTROL_DEVICE_MINOR { + if dev.isCtlDevice() { return "nvidiactl" } return fmt.Sprintf("nvidia%d", dev.minor) @@ -134,8 +138,9 @@ type frontendFD struct { // These fields are marked nosave since we do not automatically reinvoke // NV_ESC_RM_MAP_MEMORY after restore, so restored FDs have no // mmap_context. - mmapLength uint64 `state:"nosave"` - mmapInternal uintptr `state:"nosave"` + mmapLength uint64 `state:"nosave"` + mmapInternal uintptr `state:"nosave"` + mmapMemType hostarch.MemoryType `state:"nosave"` // clients are handles of clients owned by this frontendFD. clients is // protected by dev.nvp.objsMu. @@ -493,6 +498,7 @@ func rmAllocMemorySystem(fi *frontendIoctlState, ioctlParams *nvgpu.IoctlNVOS02P fi.fd.dev.nvp.objAdd(fi.ctx, ioctlParams.Params.HRoot, ioctlParams.Params.HObjectNew, ioctlParams.Params.HClass, &miscObject{}, ioctlParams.Params.HObjectParent) if createMmapCtx { mapFile.mmapLength = ioctlParams.Params.Limit + 1 + mapFile.mmapMemType = getMemoryType(fi.ctx, mapFile.dev, nvgpu.NVOS33_FLAGS_CACHING_TYPE_DEFAULT) } } fi.fd.dev.nvp.objsUnlock() @@ -1343,6 +1349,15 @@ func rmMapMemory(fi *frontendIoctlState) (uintptr, error) { } if ioctlParams.Params.Status == nvgpu.NV_OK { mapFile.mmapLength = ioctlParams.Params.Length + // src/nvidia/arch/nvalloc/unix/src/escape.c:RmIoctl() forces + // NVOS33_FLAGS_CACHING_TYPE_DEFAULT, but resMap implementations may + // override the "caching type", so in general the memory type depends + // on the mapped object. Conveniently, when this occurs, the caching + // type in pParms->flags must be updated for the call to + // rm_create_mmap_context(), and pParms is subsequently copied back out + // by kernel-open/nvidia/nv.c:nvidia_ioctl(), so we can get the final + // caching type from the updated ioctl params. + mapFile.mmapMemType = getMemoryType(fi.ctx, mapFile.dev, (ioctlParams.Params.Flags>>nvgpu.NVOS33_FLAGS_CACHING_TYPE_SHIFT)&nvgpu.NVOS33_FLAGS_CACHING_TYPE_MASK) } ioctlParams.FD = origFD diff --git a/pkg/sentry/devices/nvproxy/frontend_mmap.go b/pkg/sentry/devices/nvproxy/frontend_mmap.go index 8f15a2c490..7a99a43002 100644 --- a/pkg/sentry/devices/nvproxy/frontend_mmap.go +++ b/pkg/sentry/devices/nvproxy/frontend_mmap.go @@ -15,8 +15,10 @@ package nvproxy import ( + "gvisor.dev/gvisor/pkg/abi/nvgpu" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -75,6 +77,13 @@ func (mf *frontendFDMemmapFile) IncRef(fr memmap.FileRange, memCgID uint32) { func (mf *frontendFDMemmapFile) DecRef(fr memmap.FileRange) { } +// MemoryType implements memmap.File.MemoryType. +func (mf *frontendFDMemmapFile) MemoryType() hostarch.MemoryType { + mf.fd.mmapMu.Lock() + defer mf.fd.mmapMu.Unlock() + return mf.fd.mmapMemType +} + // DataFD implements memmap.File.DataFD. func (mf *frontendFDMemmapFile) DataFD(fr memmap.FileRange) (int, error) { return mf.FD(), nil @@ -84,3 +93,60 @@ func (mf *frontendFDMemmapFile) DataFD(fr memmap.FileRange) (int, error) { func (mf *frontendFDMemmapFile) FD() int { return int(mf.fd.hostFD) } + +func getMemoryType(ctx context.Context, mapDev *frontendDevice, cachingType uint32) hostarch.MemoryType { + // Compare kernel-open/nvidia/nv-mmap.c:nvidia_mmap_helper() => + // nv_encode_caching(). Each NVOS33_FLAGS_CACHING_TYPE_* corresponds + // directly to a NV_MEMORY_*; this is checked by asserts in + // src/nvidia/src/kernel/rmapi/mapping_cpu.c. + if !mapDev.isCtlDevice() { + // In the !NV_IS_CTL_DEVICE() branch of nvidia_mmap_helper(), + // mmap_context->caching is only honored if IS_FB_OFFSET() and + // !IS_UD_OFFSET(). We can get the information we need for + // IS_FB_OFFSET() from NV_ESC_CARD_INFO, but there doesn't seem to be + // any way for us to replicate IS_UD_OFFSET(). So we must + // conservatively specify uncacheable, which applies in all other + // cases. (This is unfortunate since it prevents us from using + // write-combining on framebuffer memory.) + if log.IsLogging(log.Debug) { + ctx.Debugf("nvproxy: inferred memory type %v for mapping of %s", hostarch.MemoryTypeUncached, mapDev.basename()) + } + return hostarch.MemoryTypeUncached + } + var memType hostarch.MemoryType + switch cachingType { + case nvgpu.NVOS33_FLAGS_CACHING_TYPE_CACHED, nvgpu.NVOS33_FLAGS_CACHING_TYPE_WRITEBACK: + // Note that nv_encode_caching() doesn't actually handle + // NV_MEMORY_WRITEBACK, so this case should fail during host mmap. + memType = hostarch.MemoryTypeWriteBack + case nvgpu.NVOS33_FLAGS_CACHING_TYPE_WRITECOMBINED, nvgpu.NVOS33_FLAGS_CACHING_TYPE_DEFAULT: + // NOTE(gvisor.dev/issue/11436): In the NV_IS_CTL_DEVICE() branch of + // nvidia_mmap_helper(), memory_type is never + // NV_MEMORY_TYPE_FRAMEBUFFER, so this corresponds to + // kernel-open/common/inc/nv-pgprot.h:NV_PGPROT_WRITE_COMBINED(). On + // ARM64, NV_PGPROT_WRITE_COMBINED() => NV_PGPROT_UNCACHED() implicitly + // uses MT_NORMAL (equivalent to our MemoryTypeWriteBack) rather than + // MT_NORMAL_NC when nvos_is_chipset_io_coherent() => + // PDB_PROP_CL_IS_CHIPSET_IO_COHERENT is true, which seems to be the + // case on most systems. We should clarify whether this is an + // optimization or required for correctness (cf. Armv8-M Architecture + // Reference Manual Sec. B7.16 "Mismatched memory attributes"), and + // subsequently whether to replicate it. + memType = hostarch.MemoryTypeWriteCombine + case nvgpu.NVOS33_FLAGS_CACHING_TYPE_UNCACHED, nvgpu.NVOS33_FLAGS_CACHING_TYPE_UNCACHED_WEAK: + // NOTE(gvisor.dev/issue/11436): On ARM64, nv_encode_caching() + // distinguishes between NV_PGPROT_UNCACHED() => MT_NORMAL/MT_NORMAL_NC + // and NV_PGPROT_UNCACHED_DEVICE() => MT_DEVICE_nGnRnE; in context, the + // former is used in the !peer_io (NV_MEMORY_TYPE_SYSTEM) case and the + // latter is used in the peer_io (NV_MEMORY_TYPE_DEVICE_MMIO) case. As + // above, we should clarify whether we need to replicate this behavior. + memType = hostarch.MemoryTypeUncached + default: + ctx.Warningf("nvproxy: unknown caching type %d", cachingType) + memType = hostarch.MemoryTypeUncached + } + if log.IsLogging(log.Debug) { + ctx.Debugf("nvproxy: inferred memory type %v for caching type %d", memType, cachingType) + } + return memType +} diff --git a/pkg/sentry/devices/nvproxy/uvm_mmap.go b/pkg/sentry/devices/nvproxy/uvm_mmap.go index f063b6c251..2241d4f37f 100644 --- a/pkg/sentry/devices/nvproxy/uvm_mmap.go +++ b/pkg/sentry/devices/nvproxy/uvm_mmap.go @@ -63,6 +63,8 @@ func (fd *uvmFD) InvalidateUnsavable(ctx context.Context) error { // +stateify savable type uvmFDMemmapFile struct { + memmap.DefaultMemoryType + fd *uvmFD } diff --git a/pkg/sentry/devices/tpuproxy/accel/accel_fd_mmap.go b/pkg/sentry/devices/tpuproxy/accel/accel_fd_mmap.go index ee06484856..b27eab9fbc 100644 --- a/pkg/sentry/devices/tpuproxy/accel/accel_fd_mmap.go +++ b/pkg/sentry/devices/tpuproxy/accel/accel_fd_mmap.go @@ -16,10 +16,7 @@ package accel import ( "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -61,7 +58,7 @@ func (fd *accelFD) InvalidateUnsavable(ctx context.Context) error { } type accelFDMemmapFile struct { - memmap.NoBufferedIOFallback + memmap.NoMapInternal fd *accelFD } @@ -74,12 +71,6 @@ func (mf *accelFDMemmapFile) IncRef(memmap.FileRange, uint32) { func (mf *accelFDMemmapFile) DecRef(fr memmap.FileRange) { } -// MapInternal implements memmap.File.MapInternal. -func (mf *accelFDMemmapFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { - log.Traceback("accel: rejecting accelFDMemmapFile.MapInternal") - return safemem.BlockSeq{}, linuxerr.EINVAL -} - // DataFD implements memmap.File.DataFD. func (mf *accelFDMemmapFile) DataFD(fr memmap.FileRange) (int, error) { return mf.FD(), nil diff --git a/pkg/sentry/devices/tpuproxy/vfio/BUILD b/pkg/sentry/devices/tpuproxy/vfio/BUILD index d79ef488dd..d63539078b 100644 --- a/pkg/sentry/devices/tpuproxy/vfio/BUILD +++ b/pkg/sentry/devices/tpuproxy/vfio/BUILD @@ -13,7 +13,6 @@ go_library( "pci_device_fd.go", "pci_device_fd_mmap.go", "tpu_fd.go", - "tpu_fd_mmap.go", "vfio.go", "vfio_fd.go", "vfio_fd_mmap.go", diff --git a/pkg/sentry/devices/tpuproxy/vfio/pci_device_fd_mmap.go b/pkg/sentry/devices/tpuproxy/vfio/pci_device_fd_mmap.go index 426804806f..8750bd7de0 100644 --- a/pkg/sentry/devices/tpuproxy/vfio/pci_device_fd_mmap.go +++ b/pkg/sentry/devices/tpuproxy/vfio/pci_device_fd_mmap.go @@ -91,6 +91,13 @@ func (mf *pciDeviceFdMemmapFile) MapInternal(fr memmap.FileRange, at hostarch.Ac return mf.pfm.MapInternal(fr, int(mf.fd.hostFD), at.Write) } +// MemoryType implements memmap.File.MemoryType. +func (mf *pciDeviceFdMemmapFile) MemoryType() hostarch.MemoryType { + // drivers/vfio/pci/vfio_pci_core.c:vfio_pci_core_mmap() uses + // pgprot_noncached(). + return hostarch.MemoryTypeUncached +} + // DataFD implements memmap.File.DataFD. func (mf *pciDeviceFdMemmapFile) DataFD(fr memmap.FileRange) (int, error) { return mf.FD(), nil diff --git a/pkg/sentry/devices/tpuproxy/vfio/tpu_fd.go b/pkg/sentry/devices/tpuproxy/vfio/tpu_fd.go index 94730f737d..cf78257fcc 100644 --- a/pkg/sentry/devices/tpuproxy/vfio/tpu_fd.go +++ b/pkg/sentry/devices/tpuproxy/vfio/tpu_fd.go @@ -59,10 +59,9 @@ type tpuFD struct { vfs.DentryMetadataFileDescriptionImpl vfs.NoLockFD - hostFD int32 - device *tpuDevice - queue waiter.Queue - memmapFile tpuFDMemmapFile + hostFD int32 + device *tpuDevice + queue waiter.Queue } // Release implements vfs.FileDescriptionImpl.Release. diff --git a/pkg/sentry/devices/tpuproxy/vfio/tpu_fd_mmap.go b/pkg/sentry/devices/tpuproxy/vfio/tpu_fd_mmap.go deleted file mode 100644 index 7e98dfa3bf..0000000000 --- a/pkg/sentry/devices/tpuproxy/vfio/tpu_fd_mmap.go +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2024 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfio - -import ( - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. -func (fd *tpuFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - return vfs.GenericProxyDeviceConfigureMMap(&fd.vfsfd, fd, opts) -} - -// AddMapping implements memmap.Mappable.AddMapping. -func (fd *tpuFD) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { - return nil -} - -// RemoveMapping implements memmap.Mappable.RemoveMapping. -func (fd *tpuFD) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { -} - -// CopyMapping implements memmap.Mappable.CopyMapping. -func (fd *tpuFD) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { - return nil -} - -// Translate implements memmap.Mappable.Translate. -func (fd *tpuFD) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { - return []memmap.Translation{ - { - Source: optional, - File: &fd.memmapFile, - Offset: optional.Start, - Perms: hostarch.AnyAccess, - }, - }, nil -} - -// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. -func (fd *tpuFD) InvalidateUnsavable(ctx context.Context) error { - return nil -} - -type tpuFDMemmapFile struct { - memmap.NoBufferedIOFallback - - fd *tpuFD -} - -// IncRef implements memmap.File.IncRef. -func (mf *tpuFDMemmapFile) IncRef(memmap.FileRange, uint32) { -} - -// DecRef implements memmap.File.DecRef. -func (mf *tpuFDMemmapFile) DecRef(fr memmap.FileRange) { -} - -// MapInternal implements memmap.File.MapInternal. -func (mf *tpuFDMemmapFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { - log.Traceback("tpuproxy: rejecting tpuFdMemmapFile.MapInternal") - return safemem.BlockSeq{}, linuxerr.EINVAL -} - -// DataFD implements memmap.File.DataFD. -func (mf *tpuFDMemmapFile) DataFD(fr memmap.FileRange) (int, error) { - return mf.FD(), nil -} - -// FD implements memmap.File.FD. -func (mf *tpuFDMemmapFile) FD() int { - return int(mf.fd.hostFD) -} diff --git a/pkg/sentry/devices/tpuproxy/vfio/vfio.go b/pkg/sentry/devices/tpuproxy/vfio/vfio.go index 4485063451..097a39209c 100644 --- a/pkg/sentry/devices/tpuproxy/vfio/vfio.go +++ b/pkg/sentry/devices/tpuproxy/vfio/vfio.go @@ -105,7 +105,6 @@ func (dev *tpuDevice) Open(ctx context.Context, mnt *vfs.Mount, d *vfs.Dentry, o unix.Close(hostFD) return nil, err } - fd.memmapFile.fd = fd return &fd.vfsfd, nil } diff --git a/pkg/sentry/devices/tpuproxy/vfio/vfio_fd_mmap.go b/pkg/sentry/devices/tpuproxy/vfio/vfio_fd_mmap.go index 361a0cc613..0e14b4c598 100644 --- a/pkg/sentry/devices/tpuproxy/vfio/vfio_fd_mmap.go +++ b/pkg/sentry/devices/tpuproxy/vfio/vfio_fd_mmap.go @@ -16,10 +16,7 @@ package vfio import ( "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -61,7 +58,7 @@ func (fd *vfioFD) InvalidateUnsavable(ctx context.Context) error { } type vfioFDMemmapFile struct { - memmap.NoBufferedIOFallback + memmap.NoMapInternal fd *vfioFD } @@ -74,12 +71,6 @@ func (mf *vfioFDMemmapFile) IncRef(memmap.FileRange, uint32) { func (mf *vfioFDMemmapFile) DecRef(fr memmap.FileRange) { } -// MapInternal implements memmap.File.MapInternal. -func (mf *vfioFDMemmapFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { - log.Traceback("tpuproxy: rejecting vfioFdMemmapFile.MapInternal") - return safemem.BlockSeq{}, linuxerr.EINVAL -} - // DataFD implements memmap.File.DataFD. func (mf *vfioFDMemmapFile) DataFD(fr memmap.FileRange) (int, error) { return mf.FD(), nil diff --git a/pkg/sentry/fsimpl/erofs/regular_file.go b/pkg/sentry/fsimpl/erofs/regular_file.go index 6d5617153f..0dd37a095a 100644 --- a/pkg/sentry/fsimpl/erofs/regular_file.go +++ b/pkg/sentry/fsimpl/erofs/regular_file.go @@ -200,6 +200,7 @@ func (i *inode) InvalidateUnsavable(ctx context.Context) error { // +stateify savable type imageMemmapFile struct { + memmap.DefaultMemoryType memmap.NoBufferedIOFallback image *erofs.Image diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index 42836a3761..f6b83f69a0 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -920,6 +920,7 @@ func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) { // // +stateify savable type dentryPlatformFile struct { + memmap.DefaultMemoryType memmap.NoBufferedIOFallback *dentry diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index 1a423f9c2e..d974e05f9b 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -43,6 +43,7 @@ import ( type specialFileFD struct { fileDescription specialFDEntry + memmap.DefaultMemoryType memmap.NoBufferedIOFallback // releaseMu synchronizes the closing of fd.handle with fd.sync(). It's safe diff --git a/pkg/sentry/fsimpl/kernfs/mmap_util.go b/pkg/sentry/fsimpl/kernfs/mmap_util.go index 85ca66bf09..cb01d194bb 100644 --- a/pkg/sentry/fsimpl/kernfs/mmap_util.go +++ b/pkg/sentry/fsimpl/kernfs/mmap_util.go @@ -28,6 +28,7 @@ import ( // // +stateify savable type inodePlatformFile struct { + memmap.DefaultMemoryType memmap.NoBufferedIOFallback // hostFD contains the host fd that this file was originally created from, diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD index 66c9a4731f..d120e52e48 100644 --- a/pkg/sentry/memmap/BUILD +++ b/pkg/sentry/memmap/BUILD @@ -54,6 +54,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/safemem", diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go index f4f2226b1a..eb5b18bb4c 100644 --- a/pkg/sentry/memmap/memmap.go +++ b/pkg/sentry/memmap/memmap.go @@ -19,7 +19,9 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" ) @@ -470,15 +472,14 @@ type File interface { // reference is held on the mapped pages. MapInternal(fr FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) - // DataFD blocks until offsets fr in the file contain valid data, then - // returns the file descriptor represented by the File. - // - // Note that fr.Start and fr.End need not be page-aligned. + // MemoryType returns the memory type that must be used by page table + // entries mapping memory returned by MapInternal. Most implementations of + // File can embed DefaultMemoryType to obtain an appropriate implementation + // of MemoryType. // // Preconditions: - // * fr.Length() > 0. - // * At least one reference must be held on all pages in fr. - DataFD(fr FileRange) (int, error) + // * MapInternal() returned a non-empty BlockSeq. + MemoryType() hostarch.MemoryType // BufferReadAt reads len(dst) bytes from the file into dst, starting at // file offset off. It returns the number of bytes read. Like @@ -506,6 +507,16 @@ type File interface { // * At least one reference must be held on all written pages. BufferWriteAt(off uint64, src []byte) (uint64, error) + // DataFD blocks until offsets fr in the file contain valid data, then + // returns the file descriptor represented by the File. + // + // Note that fr.Start and fr.End need not be page-aligned. + // + // Preconditions: + // * fr.Length() > 0. + // * At least one reference must be held on all pages in fr. + DataFD(fr FileRange) (int, error) + // FD returns the file descriptor represented by the File. The returned // file descriptor should not be used to implement // platform.AddressSpace.MapFile, since the contents of the File may not be @@ -513,6 +524,15 @@ type File interface { FD() int } +// DefaultMemoryType implements File.MemoryType() for implementations of File +// backed by ordinary system memory. +type DefaultMemoryType struct{} + +// MemoryType implements File.MemoryType. +func (DefaultMemoryType) MemoryType() hostarch.MemoryType { + return hostarch.MemoryTypeWriteBack +} + // BufferedIOFallbackErr is returned (by value) by implementations of // File.MapInternal() that cannot succeed, but can still support memory-mapped // I/O by falling back to buffered reads and writes. @@ -538,6 +558,30 @@ func (NoBufferedIOFallback) BufferWriteAt(off uint64, src []byte) (uint64, error panic("unimplemented: memmap.File.MapInternal() should not have returned BufferedIOFallbackErr") } +// NoMapInternal implements File.MapInternal(), File.MemoryType(), +// File.BufferReadAt(), and File.BufferWriteAt() for implementations of File +// that do not support MapInternal. +type NoMapInternal struct { + NoBufferedIOFallback +} + +// MapInternal implements File.MapInternal. +func (NoMapInternal) MapInternal(fr FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { + // There is no equivalent to this situation in Linux, and hence no clear + // errno to return. We choose ENODEV since mmap() returns this in a + // somewhat similar case (mmap() called on a non-mmappable file), and + // ENODEV is relatively uncommon (compared to e.g. EINVAL) so it should be + // somewhat more distinctive if it results in an application-reported + // error. + log.Traceback("no memmap.File.MapInternal implementation available, returning ENODEV") + return safemem.BlockSeq{}, linuxerr.ENODEV +} + +// MemoryType implements File.MemoryType. +func (NoMapInternal) MemoryType() hostarch.MemoryType { + panic("memmap.File.MemoryType called without MapInternal support") +} + // FileRange represents a range of uint64 offsets into a File. // // type FileRange diff --git a/pkg/sentry/mm/debug.go b/pkg/sentry/mm/debug.go index d927b17026..0e7fa82a9f 100644 --- a/pkg/sentry/mm/debug.go +++ b/pkg/sentry/mm/debug.go @@ -91,6 +91,6 @@ func (pseg pmaIterator) debugStringEntryLocked() []byte { b.WriteByte('s') } - fmt.Fprintf(&b, " %08x %T\n", pma.off, pma.file) + fmt.Fprintf(&b, " %s %08x %T\n", pma.file.MemoryType().ShortString(), pma.off, pma.file) return b.Bytes() } diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index e7d8f8dc6f..281389c97e 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -43,6 +43,7 @@ const pagesPerHugePage = hostarch.HugePageSize / hostarch.PageSize // MemoryFile is a memmap.File whose pages may be allocated to arbitrary // users. type MemoryFile struct { + memmap.DefaultMemoryType memmap.NoBufferedIOFallback // MemoryFile owns a single backing file. Each page in the backing file is diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index 1b16dcb3c5..41dc9e6a2f 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -98,8 +98,9 @@ func (as *addressSpace) Touch(c *vCPU) bool { } type hostMapEntry struct { - addr uintptr - length uintptr + addr uintptr + length uintptr + memType hostarch.MemoryType } // mapLocked maps the given host entry. @@ -130,6 +131,7 @@ func (as *addressSpace) mapLocked(addr hostarch.Addr, m hostMapEntry, at hostarc inv = as.pageTables.Map(addr, length, pagetables.MapOpts{ AccessType: at, User: true, + MemoryType: m.memType, }, physical) || inv m.addr += length m.length -= length @@ -161,6 +163,7 @@ func (as *addressSpace) MapFile(addr hostarch.Addr, f memmap.File, fr memmap.Fil if err != nil { return err } + mt := f.MemoryType() // See block in mapLocked. as.pageTables.Allocator.(*allocator).cpu = as.machine.Get() @@ -186,8 +189,9 @@ func (as *addressSpace) MapFile(addr hostarch.Addr, f memmap.File, fr memmap.Fil // Perform the mapping. prev := as.mapLocked(addr, hostMapEntry{ - addr: b.Addr(), - length: uintptr(b.Len()), + addr: b.Addr(), + length: uintptr(b.Len()), + memType: mt, }, at) inv = inv || prev addr += hostarch.Addr(b.Len()) diff --git a/pkg/sentry/platform/kvm/kvm_const_arm64.go b/pkg/sentry/platform/kvm/kvm_const_arm64.go index fa51e9180b..de40b68de4 100644 --- a/pkg/sentry/platform/kvm/kvm_const_arm64.go +++ b/pkg/sentry/platform/kvm/kvm_const_arm64.go @@ -119,12 +119,6 @@ const ( // Arm64: Memory Attribute Indirection Register EL1. const ( - _MT_DEVICE_nGnRnE = 0 - _MT_DEVICE_nGnRE = 1 - _MT_DEVICE_GRE = 2 - _MT_NORMAL_NC = 3 - _MT_NORMAL = 4 - _MT_NORMAL_WT = 5 _MT_ATTR_DEVICE_nGnRnE = 0x00 _MT_ATTR_DEVICE_nGnRE = 0x04 _MT_ATTR_DEVICE_GRE = 0x0c @@ -132,7 +126,6 @@ const ( _MT_ATTR_NORMAL_WT = 0xbb _MT_ATTR_NORMAL = 0xff _MT_ATTR_MASK = 0xff - _MT_EL1_INIT = (_MT_ATTR_DEVICE_nGnRnE << (_MT_DEVICE_nGnRnE * 8)) | (_MT_ATTR_DEVICE_nGnRE << (_MT_DEVICE_nGnRE * 8)) | (_MT_ATTR_DEVICE_GRE << (_MT_DEVICE_GRE * 8)) | (_MT_ATTR_NORMAL_NC << (_MT_NORMAL_NC * 8)) | (_MT_ATTR_NORMAL << (_MT_NORMAL * 8)) | (_MT_ATTR_NORMAL_WT << (_MT_NORMAL_WT * 8)) ) const ( diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index cb75ddf3c2..76c1179871 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -141,6 +141,11 @@ func (c *vCPU) initArchState() error { return err } + // Set up the PAT as required by ring0/pagetables. + if err := c.setPAT(); err != nil { + return err + } + // Set the entrypoint for the kernel. kernelUserRegs.RIP = uint64(ring0.AddrOfStart()) kernelUserRegs.RAX = uint64(reflect.ValueOf(&c.CPU).Pointer()) diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go index 0d28780022..8452cc7241 100644 --- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go @@ -23,6 +23,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/hostsyscall" ) @@ -72,6 +73,36 @@ func (c *vCPU) setCPUID() error { return nil } +func (c *vCPU) setPAT() error { + // See Intel SDM Vol. 3, Sec. 13.12.2 "IA32_PAT MSR", or AMD64 APM Vol. 2, + // Sec. 7.8.1 "PAT Register". + const ( + _MSR_IA32_PAT = 0x277 + + _PA_UC = 0x00 + _PA_WC = 0x01 + _PA_WB = 0x06 + ) + registers := modelControlRegisters{ + nmsrs: 1, + } + registers.entries[0].index = _MSR_IA32_PAT + if hostarch.NumMemoryTypes != 3 { + panic("additional memory types must be configured in PAT") + } + registers.entries[0].data |= _PA_WB << (hostarch.MemoryTypeWriteBack * 8) + registers.entries[0].data |= _PA_WC << (hostarch.MemoryTypeWriteCombine * 8) + registers.entries[0].data |= _PA_UC << (hostarch.MemoryTypeUncached * 8) + if errno := hostsyscall.RawSyscallErrno( + unix.SYS_IOCTL, + uintptr(c.fd), + KVM_SET_MSRS, + uintptr(unsafe.Pointer(®isters))); errno != 0 { + return fmt.Errorf("error setting PAT: %v", errno) + } + return nil +} + // getTSCFreq gets the TSC frequency. // // If mustSucceed is true, then this function panics on error. diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index 4420e6fb80..53f2924e27 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -91,7 +91,13 @@ func (c *vCPU) initArchState() error { } // mair_el1 - data = _MT_EL1_INIT + if hostarch.NumMemoryTypes != 3 { + panic("additional memory types must be configured in MAIR") + } + data = 0 + data |= _MT_ATTR_NORMAL << (hostarch.MemoryTypeWriteBack * 8) + data |= _MT_ATTR_NORMAL_NC << (hostarch.MemoryTypeWriteCombine * 8) + data |= _MT_ATTR_DEVICE_nGnRnE << (hostarch.MemoryTypeUncached * 8) reg.id = _KVM_ARM64_REGS_MAIR_EL1 if err := c.setOneRegister(®); err != nil { return err