Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,10 @@ Importing this module will run `btrfs device scan` and pull btrfs modules.

When enabled, attempts to resume after hibernation if resume= is passed on the kernel command line.

> Please use the following option with **CAUTION** as it can be unstable in certain configurations! Any writes to disks that occur pre-resume run the risk of causing system instability! For more information have a read of the warnings in the [kernel docs](https://www.kernel.org/doc/html/latest/power/swsusp.html).

* `late_resume` (true) When enabled will attempt to resume from hibernation after decryption and device mapping, allowing resume from encrypted or otherwise hidden swap devices.

### Cryptographic modules

Several cryptographic modules are provided, mostly to assist in mounting encrypted volumes and handling keyfiles.
Expand Down
2 changes: 1 addition & 1 deletion src/ugrd/base/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def find_kernel_path(self):
if not (self["_kmod_dir"] / "vmlinuz").exists():
for search_dir in ["/boot", "/efi"]:
for prefix in ["vmlinuz", "kernel", "linux", "bzImage"]:
kernel_path = Path(search_dir) / f'{prefix}-{self["kernel_version"]}'
kernel_path = Path(search_dir) / f"{prefix}-{self['kernel_version']}"
if kernel_path.exists():
break
if kernel_path.exists():
Expand Down
6 changes: 4 additions & 2 deletions src/ugrd/base/test.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ test_rootfs_name = 'ugrd-test-rootfs'
test_rootfs_build_dir = 'initramfs_test_rootfs'
test_image_size = 16

test_copy_config = ["_mounts", "mounts", "out_dir", "tmpdir", "clean", "test_image_size", "test_flag", "cryptsetup"]
test_copy_config = ["mounts", "out_dir", "tmpdir", "clean", "test_image_size", "test_flag", "cryptsetup"]
kmod_init = ["ata_piix", "sd_mod"]

test_memory = '256M'
test_cpu = 'host'
test_arch = 'x86_64'
test_timeout = 15
test_cmdline = 'console=ttyS0,115200 panic=1'
qemu_bool_args = ['nographic', 'no-reboot', 'enable-kvm']
#qemu_bool_args = ['nographic', 'no-reboot', 'enable-kvm']
qemu_bool_args = ['nographic', 'enable-kvm' ]

[imports.build_pre]
"ugrd.base.test" = [ "init_test_vars" ]
Expand Down
7 changes: 5 additions & 2 deletions src/ugrd/fs/mdraid.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
__version__ = '0.1.2'
__version__ = '0.2.0'


def md_init(self):
return 'einfo "Assembling MD devices: $(mdadm --assemble --scan 2>&1)"'
return """
export MDADM_NO_UDEV=1
einfo "Assembling MD devices: $(mdadm --assemble --scan 2>&1)"
"""
72 changes: 58 additions & 14 deletions src/ugrd/fs/resume.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
__version__ = "0.4.2"

from zenlib.util import contains

def handle_resume(self) -> None:
"""Returns a shell script handling resume from hibernation.

def resume(self) -> None:
"""Returns a bash script handling resume from hibernation.
Checks that /sys/power/resume is writable, resume= is set, and noresume is not set, if so,
checks if PARTUUID= is in the resume var, and tries to use blkid to find the resume device.
checks if UUID= or PARTUUID= or LABEL= is in the resume var,
and tries to use blkid to find the resume device.
If the specified device exists, writes resume device to /sys/power/resume.
In the event of failure, it prints an error message, a list of block devuices, then runs rd_fail.

Expand All @@ -14,24 +17,65 @@ def handle_resume(self) -> None:
If the system is freshly booted, it will not be able to resume, as there is no hibernation image.
Distinguising between a fresh boot and missing/borked hibernation image is not possible at run time.
"""
return [
# Check resume support
'[ -n "$1" ] || (ewarn "No device?" ; return 1)',
'[ -w /sys/power/resume ] || (ewarn "Kernel does not support resume!" ; return 1)',
'[[ ! "$(cat /sys/power/resume)" == "0:0" ]] || ewarn "/sys/power/resume not empty, resume has already been attempted!"',
# Safety checks
"if ! [ -z $(lsblk -Q MOUNTPOINT)] ; then",
r' eerror "Cannot safely resume with mounted block devices:\n$(lsblk -Q MOUNTPOINT -no PATH)"',
" return 1",
"fi",
'[ -b "$1" ] || (ewarn "\'$1\' is not a valid block device!" ; return 1)',
'einfo "Attempting resume from: $1"',
'echo -n "$1" > /sys/power/resume',
'einfo "No image on $resume"',
"return 0",
]


def handle_early_resume(self) -> None:
return [
"resumeval=$(readvar resume)", # read the cmdline resume var
'if ! check_var noresume && [ -n "$resumeval" ] && [ -w /sys/power/resume ]; then',
' if echo "$resumeval" | grep -q "PARTUUID="; then', # resolve partuuid to device
' if echo "$resumeval" | grep -q "UUID=" ||', # resolve uuid to device
' echo "$resumeval" | grep -q "PARTUUID=" ||', # or resolve partuuid to device
' echo "$resumeval" | grep -q "LABEL=" ; then', # or resolve label to device
' resume=$(blkid -t "$resumeval" -o device)',
" else",
" resume=$resumeval",
' resume="$resumeval"',
" fi",
' if [ -e "$resume" ]; then', # Check if the resume device exists
' einfo "Resuming from: $resume"',
' printf "%s" "$resume" > /sys/power/resume', # Attempt to resume
' ewarn "Failed to resume from: $resume"',
" if ! [ -z $resume ] ; then",
' if ! resume "$resume" ; then',
' eerror "If you wish to continue booting, remove the resume= kernel parameter."',
''' eerror " or run 'setvar noresume 1' from the recovery shell to skip resuming."''',
' rd_fail "Failed to resume from $(readvar resume)."',
" fi",
" else",
' ewarn "Resume device not found: $resume)"', # Warn if the resume device does not exist
r' eerror "Block devices:\n$(blkid)"',
' eerror "If you wish to continue booting, remove the resume= kernel parameter."',
''' eerror " or run 'setvar noresume 1' from the recovery shell to skip resuming."''',
' rd_fail "Failed to resume from $(readvar resume)."',
" einfo \"Resume device '$resumeval' not found\"",
" fi",
"fi",
]


@contains("late_resume")
def handle_late_resume(self) -> None:
self.logger.warning(
"[late_resume] enabled, this can result in data loss if filesystems are modified before resuming. Read the docs for more info."
)
return handle_early_resume(
self
) # At the moment it's the same code but delayed, will change when more features are added


@contains("test_resume")
def test_init_swap_uuid(self):
if "test_cpu" in self:
from uuid import uuid4

self["test_swap_uuid"] = swap_uuid = uuid4()

# append to test kernel cmdline and adjust planned image size to allow enough space
self["test_cmdline"] = f"{self.get('test_cmdline')} resume=UUID={swap_uuid}"
self["test_image_size"] = 256 + self.get("test_image_size")
20 changes: 18 additions & 2 deletions src/ugrd/fs/resume.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,23 @@
cmdline_strings = [ "resume" ]
binaries = [ 'lsblk' ]
test_copy_config = [ "test_resume", "test_swap_uuid" ]

[imports.init_main]
"ugrd.fs.resume" = [ "handle_resume" ]
[imports.build_pre]
"ugrd.fs.resume" = [ "test_init_swap_uuid" ]

[imports.init_main]
"ugrd.fs.resume" = [ "handle_early_resume" ]

[imports.init_premount]
"ugrd.fs.resume" = [ "handle_late_resume"]

[imports.functions]
"ugrd.fs.resume" = ["resume"]

[custom_parameters]
late_resume = "bool"
test_resume = "bool"
test_swap_uuid = "str"

[import_order.before]
handle_resume = "mount_fstab"
81 changes: 70 additions & 11 deletions src/ugrd/fs/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from tempfile import TemporaryDirectory

from zenlib.util import colorize as c_
from zenlib.util import contains
from zenlib.util import colorize, contains
from time import sleep


@contains("test_flag", "A test flag must be set to create a test image", raise_exception=True)
Expand All @@ -12,18 +12,45 @@ def init_banner(self):
self["banner"] = f"echo {self['test_flag']}"


@contains("test_resume")
def resume_tests(self):
return [
'if [ "$(</sys/power/resume)" != "0:0" ] ; then',
' [ -e "/resumed" ] && (rm /resumed ; echo c > /proc/sysrq-trigger)',
# Set correct resume parameters
" echo reboot > /sys/power/disk",
# trigger resume
" echo disk > /sys/power/state",
' [ -e "/resume" ] || echo c > /proc/sysrq-trigger',
# if we reach this point, resume was successful
# reset environment in case resume needs to be rerun
" rm /resumed",
' echo "Resume completed without error.',
"else",
' echo "No resume device found! Resume test not possible!',
"fi",
]


def complete_tests(self):
return [
"echo s > /proc/sysrq-trigger",
"echo o > /proc/sysrq-trigger",
]


def _allocate_image(self, image_path, padding=0):
"""Allocate the test image size"""
self._mkdir(image_path.parent, resolve_build=False) # Make sure the parent directory exists
if image_path.exists():
if self.clean:
self.logger.warning("Removing existing filesystem image file: %s" % c_(image_path, "red"))
self.logger.warning("Removing existing filesystem image file: %s" % colorize(image_path, "red"))
image_path.unlink()
else:
raise Exception("File already exists and 'clean' is off: %s" % c_(image_path, "red", bold=True))
raise Exception("File already exists and 'clean' is off: %s" % colorize(image_path, "red", bold=True))

with open(image_path, "wb") as f:
self.logger.info("Allocating test image file: %s" % c_(f.name, "green"))
self.logger.info("Allocating test image file: %s" % colorize(f.name, "green"))
f.write(b"\0" * (self.test_image_size + padding) * 2**20)


Expand Down Expand Up @@ -61,8 +88,8 @@ def make_test_luks_image(self, image_path):
pass
_allocate_image(self, image_path, padding=32) # First allocate the image file, adding padding for the LUKS header
keyfile_path = _get_luks_keyfile(self)
self.logger.info("Using LUKS keyfile: %s" % c_(keyfile_path, "green"))
self.logger.info("Creating LUKS image: %s" % c_(image_path, "green"))
self.logger.info("Using LUKS keyfile: %s" % colorize(keyfile_path, "green"))
self.logger.info("Creating LUKS image: %s" % colorize(image_path, "green"))
self._run(
[
"cryptsetup",
Expand All @@ -75,14 +102,14 @@ def make_test_luks_image(self, image_path):
keyfile_path,
]
)
self.logger.info("Opening LUKS image: %s" % c_(image_path, "magenta"))
self.logger.info("Opening LUKS image: %s" % colorize(image_path, "magenta"))
self._run(["cryptsetup", "luksOpen", image_path, "test_image", "--key-file", keyfile_path])


def make_test_image(self):
"""Creates a test image from the build dir"""
build_dir = self._get_build_path("/").resolve()
self.logger.info("Creating test image from: %s" % c_(build_dir, "blue", bold=True))
self.logger.info("Creating test image from: %s" % colorize(build_dir, "blue", bold=True))

rootfs_type = self["mounts"]["root"]["type"]
try:
Expand All @@ -99,6 +126,33 @@ def make_test_image(self):
else:
_allocate_image(self, image_path)

loopback = None
if self.get("test_resume"):
try:
self._run(["sgdisk", "-og", image_path])
self._run(["sgdisk", "-n", "1:0:+256", image_path])
self._run(["sgdisk", "-n", "2:0", image_path])
except RuntimeError as e:
raise RuntimeError("Failed to partition test disk: %s", e)

try:
out = self._run(["losetup", "--show", "-fP", image_path])
loopback = out.stdout.decode("utf-8").strip()

image_path = f"{loopback}p2"
except RuntimeError as e:
raise RuntimeError("Failed to allocate loopback device for disk creation: %s", e)

# sleep for 100ms, to give the loopback device time to scan for partitions
# usually fast, but losetup doesn't wait for this to complete before returning.
# TODO: replace with an proper check/wait loop
sleep(0.100)

try:
self._run(["mkswap", "-U", self["test_swap_uuid"], f"{loopback}p1"])
except RuntimeError as e:
raise RuntimeError("Failed to create swap partition on test disk: %s", e)

if rootfs_type == "ext4":
self._run(["mkfs", "-t", rootfs_type, "-d", build_dir, "-U", rootfs_uuid, "-F", image_path])
elif rootfs_type == "btrfs":
Expand All @@ -117,7 +171,7 @@ def make_test_image(self):
squashfs_image = self._get_out_path(f"squash/{self['squashfs_image']}")
if squashfs_image.exists():
if self.clean:
self.logger.warning("Removing existing squashfs image file: %s" % c_(squashfs_image, "red"))
self.logger.warning("Removing existing squashfs image file: %s" % colorize(squashfs_image, "red"))
squashfs_image.unlink()
else:
raise Exception("File already exists and 'clean' is off: %s" % squashfs_image)
Expand All @@ -129,6 +183,11 @@ def make_test_image(self):
else:
raise NotImplementedError("Unsupported test rootfs type: %s" % rootfs_type)

# Clean up loopback device used to access test image partitions
if loopback:
self.logger.info("Closing test image loopback device: %s", colorize(loopback, "magenta"))
self._run(["losetup", "-d", loopback])

if self.get("cryptsetup"): # Leave it open in the event of failure, close it before executing tests
self.logger.info("Closing LUKS image: %s" % c_(image_path, "magenta"))
self.logger.info("Closing LUKS image: %s" % colorize(image_path, "magenta"))
self._run(["cryptsetup", "luksClose", "test_image"])
8 changes: 7 additions & 1 deletion src/ugrd/fs/test_image.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ _cryptsetup_root = "root"
[imports.build_pre]
"ugrd.fs.test_image" = ["init_banner"]

[imports.init_main]
"ugrd.fs.test_image" = ["resume_tests"]

[imports.init_final]
"ugrd.fs.test_image" = ["complete_tests"]

[imports.pack]
"ugrd.fs.test_image" = ["make_test_image"]

Expand All @@ -20,4 +26,4 @@ cryptsetup = "dict" # Same as above
_cryptsetup_root = "str" # Define the root device for cryptsetup
test_image_size = "int" # Define the size of the test image in MiB
test_flag = "str" # Define the success flag used to determine if the test was successful

test_resume = "bool" # Enable code to test the suspend/resume pathways
2 changes: 1 addition & 1 deletion src/ugrd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def main():
logger.critical(e)
exit(1)
except Exception as e:
logger.criical("An unhandled exception occurred while building:")
logger.critical("An unhandled exception occurred while building:")
print(generator.config_dict)
logger.critical(e, exc_info=True)
exit(1)
Expand Down