Skip to content

Commit

Permalink
Remount /dev when user namespace is used to allow accessing device nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
nbdd0121 committed Oct 8, 2024
1 parent 3bcfdc0 commit c54ec38
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 5 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ tokio = { version = "1", features = ["full"] }
tokio-stream = "0.1"
async-stream = "0.3"
udev = "0.8"
rustix = { version = "0.38", features = ["fs", "stdio", "process", "thread", "pipe"] }
rustix = { version = "0.38", features = ["fs", "stdio", "process", "thread", "pipe", "mount"] }
bitflags = "2"
once_cell = "1"
humantime = "2"
Expand Down
111 changes: 107 additions & 4 deletions src/runc/container.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use std::fs::File;
use std::fs::{File, Permissions};
use std::io::{BufRead, BufReader, Seek};
use std::os::fd::AsFd;
use std::os::unix::fs::{FileTypeExt, MetadataExt, PermissionsExt};
use std::path::Path;

use anyhow::{bail, Context, Result};
use rustix::fs::{FileType, Mode};
use rustix::fs::{FileType, Mode, UnmountFlags};
use rustix::mount::{FsMountFlags, FsOpenFlags, MountAttrFlags, MoveMountFlags};
use rustix::process::{Pid, Signal};
use tokio::io::unix::AsyncFd;
use tokio::io::Interest;
Expand Down Expand Up @@ -89,13 +92,113 @@ impl Container {
Box::new(DeviceAccessControllerV2::new(&state.cgroup_paths.unified)?)
};

Ok(Self {
let container = Self {
uid: config.process.user.uid,
gid: config.process.user.gid,
pid: Pid::from_raw(state.init_process_pid.try_into()?).context("Invalid PID")?,
wait: recv,
cgroup_device_filter: Mutex::new(cgroup_device_filter),
})
};

container.remount_dev()?;

Ok(container)
}

/// Remount /dev inside the init namespace.
///
/// When user namespace is used, the /dev created by runc will be mounted inside the user namespace,
/// and will automatically gain SB_I_NODEV flag as a kernel security measure.
///
/// This is doing no favour for us because that flag will cause device node within it to be unopenable.
fn remount_dev(&self) -> Result<()> {
let ns = crate::util::namespace::MntNamespace::of_pid(self.pid)?;
if !ns.in_user_ns() {
return Ok(());
}

log::info!("Remount /dev to allow device node access");

// Create a tmpfs and mount in the init namespace.
// Note that while we have "mounted" it, it is not associated with any mount point yet.
// The actual mounting will happen after we moved into the mount namespace.
let dev_fs = rustix::mount::fsopen("tmpfs", FsOpenFlags::empty())?;
rustix::mount::fsconfig_create(dev_fs.as_fd())?;
let dev_mnt = rustix::mount::fsmount(
dev_fs.as_fd(),
FsMountFlags::FSMOUNT_CLOEXEC,
MountAttrFlags::empty(),
)?;

ns.enter(|| -> Result<_> {
// Don't interfere us setting the desired mode!
rustix::process::umask(Mode::empty());

// Move the existing mount elsewhere.
std::fs::create_dir("/olddev")?;
rustix::mount::mount_move("/dev", "/olddev")?;

// Move to our newly created `/dev` mount.
rustix::mount::move_mount(
dev_mnt.as_fd(),
"",
rustix::fs::CWD,
"/dev",
MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH,
)?;

// Make sure the /dev is now owned by the container root not host root.
std::os::unix::fs::chown("/dev", Some(ns.uid(0)?), Some(ns.gid(0)?))?;
std::fs::set_permissions("/dev", Permissions::from_mode(0o755))?;

for file in std::fs::read_dir("/olddev")? {
let file = file?;
let metadata = file.metadata()?;
let new_path = Path::new("/dev").join(file.file_name());

if file.file_name() == "console" {
// `console` is special, it's a file but it should be bind-mounted.
drop(
std::fs::OpenOptions::new()
.create(true)
.write(true)
.open(&new_path)?,
);
rustix::mount::mount_move(file.path(), new_path)?;
} else if metadata.file_type().is_dir() {
// This is a mount point, e.g. pts, mqueue, shm.
std::fs::create_dir(&new_path)?;
rustix::mount::mount_move(file.path(), new_path)?;
} else if metadata.file_type().is_symlink() {
// Recreate symlinks
let target = std::fs::read_link(file.path())?;
std::os::unix::fs::symlink(target, new_path)?;
} else if metadata.file_type().is_char_device() {
// Recreate device
let dev = metadata.rdev();
rustix::fs::mknodat(
rustix::fs::CWD,
&new_path,
FileType::CharacterDevice,
Mode::from_raw_mode(metadata.mode()),
dev,
)?;

// The old file might be a bind mount. Try umount it.
let _ = rustix::mount::unmount(file.path(), UnmountFlags::DETACH);
} else {
anyhow::bail!("Unknown file present in /dev");
}
}

// Now we have moved everything to the new /dev, obliterate the old one.
rustix::mount::unmount("/olddev", UnmountFlags::DETACH)?;
std::fs::remove_dir("/olddev")?;

Ok(())
})??;

Ok(())
}

pub async fn kill(&self, signal: Signal) -> Result<()> {
Expand Down
5 changes: 5 additions & 0 deletions src/util/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ impl MntNamespace {
})
}

/// Check if we're in an user namespace.
pub fn in_user_ns(&self) -> bool {
!(self.uid_map.map == &[(0, 0, u32::MAX)] && self.gid_map.map == &[(0, 0, u32::MAX)])
}

/// Translate user ID into a UID in the namespace.
pub fn uid(&self, uid: u32) -> Result<u32> {
Ok(self.uid_map.translate(uid).context("UID overflows")?)
Expand Down

0 comments on commit c54ec38

Please sign in to comment.