Skip to content

Commit

Permalink
packages/nixos: bare-metal Kata GPU support
Browse files Browse the repository at this point in the history
This adds the necessary bits to facilitate GPU support in bare-metal Kata deployments to our NixOS image build.
  • Loading branch information
msanft committed Jan 14, 2025
1 parent a695965 commit 74a8665
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 10 deletions.
12 changes: 4 additions & 8 deletions packages/by-name/kata/kata-runtime/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ buildGoModule rec {

# Correctly type QEMU QMP command options for the `device_add` command.
# See: https://github.com/kata-containers/kata-containers/pull/10719
# TODO(msanft): Remove once upstream PR is released.
# TODO(msanft): Remove once upstream PR is released (v3.13.0).
./0019-runtime-use-actual-booleans-for-QMP-device_add-boole.patch
];
};
Expand Down Expand Up @@ -180,13 +180,9 @@ buildGoModule rec {
# is used when Kata starts a VM.
# For example, this command should do the job:
# `journalctl -t kata -l --no-pager | grep launching | tail -1`
passthru = {
inherit src;

cmdline = {
default = "tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k cryptomgr.notests net.ifnames=0 pci=lastbus=0 root=/dev/vda1 rootflags=ro rootfstype=erofs console=hvc0 console=hvc1 quiet systemd.show_status=false panic=1 nr_cpus=1 selinux=0 systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service systemd.mask=systemd-networkd.socket scsi_mod.scan=none";
debug = "tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k cryptomgr.notests net.ifnames=0 pci=lastbus=0 root=/dev/vda1 rootflags=ro rootfstype=erofs console=hvc0 console=hvc1 debug systemd.show_status=true systemd.log_level=debug panic=1 nr_cpus=1 selinux=0 systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service systemd.mask=systemd-networkd.socket scsi_mod.scan=none agent.log=debug agent.debug_console agent.debug_console_vport=1026";
};
passthru.cmdline = {
default = "tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k cryptomgr.notests net.ifnames=0 pci=lastbus=0 root=/dev/vda1 rootflags=ro rootfstype=erofs console=hvc0 console=hvc1 quiet systemd.show_status=false panic=1 nr_cpus=1 selinux=0 systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service systemd.mask=systemd-networkd.socket scsi_mod.scan=none";
debug = "tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k cryptomgr.notests net.ifnames=0 pci=lastbus=0 root=/dev/vda1 rootflags=ro rootfstype=erofs console=hvc0 console=hvc1 debug systemd.show_status=true systemd.log_level=debug panic=1 nr_cpus=1 selinux=0 systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service systemd.mask=systemd-networkd.socket scsi_mod.scan=none agent.log=debug agent.debug_console agent.debug_console_vport=1026";
};

meta.mainProgram = "containerd-shim-kata-v2";
Expand Down
50 changes: 48 additions & 2 deletions packages/nixos/gpu.nix
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,34 @@ let
});
};
});

# nix-store-mount-hook mounts the VM's nix store into the container.
# TODO(burgerdev): only do that for containers that actually get a GPU device.
nix-store-mount-hook = pkgs.writeShellApplication {
name = "nix-store-mount-hook";
runtimeInputs = with pkgs; [
coreutils
util-linux
jq
];
text = ''
# Reads from the state JSON supplied on stdin.
bundle="$(jq -r .bundle)"
rootfs="$bundle/rootfs"
id="$(basename "$bundle")"
lower=/nix/store
target="$rootfs$lower"
mkdir -p "$target"
overlays="/run/kata-containers/nix-overlays/$id"
upperdir="$overlays/upperdir"
workdir="$overlays/workdir"
mkdir -p "$upperdir" "$workdir"
mount -t overlay -o "lowerdir=$lower:$target,upperdir=$upperdir,workdir=$workdir" none "$target"
'';
};
in

{
Expand All @@ -90,15 +118,33 @@ in
videoAcceleration = false;
};

# Configure the persistenced for use with CC GPUs (e.g. H100).
# TODO(msanft): This needs to be adjusted for non-CC-GPUs.
# See: https://docs.nvidia.com/cc-deployment-guide-snp.pdf (Page 23 & 24)
systemd.services."nvidia-persistenced" = {
wantedBy = [ "kata-containers.target" ];
serviceConfig.ExecStart = lib.mkForce "${lib.getExe config.hardware.nvidia.package.persistenced} --uvm-persistence-mode --verbose";
};

# kata-containers.target needs to pull this in so that we get a valid
# CDI configuration inside the PodVM. This is not necessary, as we use the
# legacy mode as of now, but will be once we switch to CDI.
systemd.services."nvidia-container-toolkit-cdi-generator".wantedBy = [ "kata-containers.target" ];

hardware.nvidia-container-toolkit.enable = true;

# Make NVIDIA the "default" graphics driver to replace Mesa,
# which saves us another Perl dependency.
hardware.graphics.package = nvidiaPackage;
hardware.graphics.package32 = nvidiaPackage;

image.repart.partitions."10-root".contents."/usr/share/oci/hooks/prestart/nvidia-container-toolkit.sh".source =
lib.getExe pkgs.nvidia-ctk-oci-hook;
image.repart.partitions."10-root".contents = {
"/usr/share/oci/hooks/prestart/nvidia-container-toolkit.sh".source =
lib.getExe pkgs.nvidia-ctk-oci-hook;
"/usr/share/oci/hooks/prestart/nix-store-mount-hook.sh".source = lib.getExe nix-store-mount-hook;
};

environment.systemPackages = [ pkgs.nvidia-ctk-with-config ];

boot.initrd.kernelModules = [
# Extra kernel modules required to talk to the GPU in CC-Mode.
Expand Down

0 comments on commit 74a8665

Please sign in to comment.