Extensions requied to run memcr as non-root user, and secure the UNIX domain socket

meecash · mkozlowski · commit dc34b70a6ced · 2025-01-09T20:18:24.000+01:00
based communication:

* Add -g -G options to set group ID for created UNIX domain sockets files.
* Modify the code to limit the UNIX domain sockets file ownership and permissions
in according to the provided -g -G options.
* Create doc subdirectory and security_considerations.md describing the approach
to secure/limit UNIX domain sockets access, and provide requried priviledges
to memcr daemon when run as non-root user.
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@
 
 memcr was written as a PoC to demonstrate that it is possible to temporarily reduce RSS of a target process without killing it. This is achieved by freezing the process, checkpointing its memory to a file and restoring it later when needed.
 
-The idea is based on concepts seen in ptrace-parasite and early CRIU versions. The key difference is that the target process is kept alive and memcr manipulates its memory with `madvise()` `MADV_DONTNEED` syscall to reduce RSS. VM mappings are not changed.
+The idea is based on concepts seen in ptrace-parasite and early [CRIU](https://github.com/checkpoint-restore/criu) versions. The key difference is that the target process is kept alive and memcr manipulates its memory with `madvise()` `MADV_DONTNEED` syscall to reduce RSS. VM mappings are not changed.
 
 #### building
 
@@ -23,7 +23,7 @@ You can enable support for compression and checksumming of memory dump file:
  There is also `ENCRYPT` option for building `libencrypt.so` that provides sample implementation of encryption layer based on libcrypto API. memcr is not linked with libencrypt.so, but it can be preloaded with `LD_PRELOAD`.
  - `ENCRYPT=1` - requires libcrypto and openssl headers
 
-Ubuntu 22.04:
+##### compilation on Ubuntu 22.04:
 ```
 sudo apt-get install liblz4-dev liblz4-1
 sudo apt-get install libssl-dev libssl3
@@ -40,7 +40,7 @@ make CROSS_COMPILE=arm-linux-gnueabihf-
 make CROSS_COMPILE=aarch64-linux-gnu-
 ```
 ##### yocto
-There is a generic `memcr.bb` file provided that you can copy into your yocto layer and build memcr as any other packet with bitbake.
+There is a generic `memcr.bb` recipe provided that you can copy into your yocto layer and build memcr as any other packet with bitbake.
 ```
 bitbake memcr
 ```
@@ -59,11 +59,14 @@ options:
   -d --dir              dir where memory dump is stored (defaults to /tmp)
   -S --parasite-socket-dir      dir where socket to communicate with parasite is created
         (abstract socket will be used if no path specified)
+  -G --parasite-socket-gid     group ID for parasite UNIX domain socket file, valid only for if --parasite-socket-dir provided,
+                               note: the group ID provided need to be common for: the user running memcr daemon and the user running suspended process
   -N --parasite-socket-netns    use network namespace of parasite when connecting to socket
         (useful if parasite is running in a container with netns)
   -l --listen           work as a service waiting for requests on a socket
         -l PORT: TCP port number to listen for requests on
         -l PATH: filesystem path for UNIX domain socket file (will be created)
+  -g --listen-gid       group ID for listen UNIX domain socket file, valid only in service mode for UNIX domain socket
   -n --no-wait          no wait for key press
   -m --proc-mem         get pages from /proc/pid/mem
   -f --rss-file         include file mapped memory
@@ -85,3 +88,4 @@ memcr client:
 memcr-client -l 9000 -p 1234567 --checkpoint
 memcr-client -l 9000 -p 1234567 --restore
 ```
+Due to high priviledges of the memcr daemon it is recommended to run memcr daemon process as non-root user with elevated Linux capabilities and permissions, the details are described in: [doc/security_considerations.md](doc/security_considerations.md)
diff --git a/arch/arm/linux-abi.h b/arch/arm/linux-abi.h
@@ -66,7 +66,6 @@ static long syscall3(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 	return r0;
 }
 
-#if 0
 static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
 	register long r7 asm("r7") = nr;
@@ -80,9 +79,7 @@ static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 		     : "memory");
 	return r0;
 }
-#endif
 
-#if 0
 static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4)
 {
 	register long r7 asm("r7") = nr;
@@ -97,7 +94,6 @@ static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 		     : "memory");
 	return r0;
 }
-#endif
 
 #if 0
 static long syscall6(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
diff --git a/arch/arm64/linux-abi.h b/arch/arm64/linux-abi.h
@@ -66,7 +66,6 @@ static long syscall3(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 	return x0;
 }
 
-#if 0
 static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
 	register long x8 asm("x8") = nr;
@@ -80,9 +79,7 @@ static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 		     : "memory");
 	return x0;
 }
-#endif
 
-#if 0
 static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4)
 {
 	register long x8 asm("x8") = nr;
@@ -97,7 +94,6 @@ static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 		     : "memory");
 	return x0;
 }
-#endif
 
 #if 0
 static long syscall6(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
diff --git a/arch/riscv64/linux-abi.h b/arch/riscv64/linux-abi.h
@@ -66,7 +66,6 @@ static long syscall3(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 	return a0;
 }
 
-#if 0
 static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
 	register long a7 asm("a7") = nr;
@@ -80,9 +79,7 @@ static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 		     : "memory");
 	return a0;
 }
-#endif
 
-#if 0
 static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4)
 {
 	register long a7 asm("a7") = nr;
@@ -97,7 +94,6 @@ static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 		     : "memory");
 	return a0;
 }
-#endif
 
 #if 0
 static long syscall6(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
diff --git a/arch/syscall.c b/arch/syscall.c
@@ -19,6 +19,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/syscall.h>
+#include <linux/fcntl.h> /* for O_* and AT_* */
 
 #if defined(__x86_64__)
 #include "x86_64/linux-abi.h"
@@ -88,3 +89,35 @@ long sys_gettid(void)
 {
 	return syscall0(__NR_gettid);
 }
+
+int sys_fchmod(int fd, mode_t mode)
+{
+	return syscall2(__NR_fchmod, fd, mode);
+}
+
+int sys_chmod(char* path, mode_t mode)
+{
+#ifdef __NR_fchmodat
+	return syscall4(__NR_fchmodat, AT_FDCWD, (unsigned long)path, mode, 0);
+#elif defined(__NR_chmod)
+	return syscall2(__NR_chmod, (unsigned long)path, mode);
+#else
+	return -ENOSYS;
+#endif
+}
+
+int sys_chown(char* path, uid_t owner, gid_t group)
+{
+#ifdef __NR_fchownat
+	return syscall5(__NR_fchownat, AT_FDCWD, (unsigned long)path, owner, group, 0);
+#elif defined(__NR_chown)
+	return syscall3(__NR_chown, (unsigned long)path, owner, group);
+#else
+	return -ENOSYS;
+#endif
+}
+
+int sys_getuid(void)
+{
+	return syscall0(__NR_getuid);
+}
diff --git a/arch/syscall.h b/arch/syscall.h
@@ -30,5 +30,9 @@ int sys_bind(int fd, struct sockaddr *addr, socklen_t len);
 int sys_listen(int fd, int n);
 int sys_exit(int error_code);
 long sys_gettid(void);
+int sys_fchmod(int fd, mode_t mode);
+int sys_chmod(char* path, mode_t mode);
+int sys_chown(char* path, uid_t owner, gid_t group);
+int sys_getuid(void);
 
 #endif
diff --git a/arch/x86_64/linux-abi.h b/arch/x86_64/linux-abi.h
@@ -59,7 +59,6 @@ static long syscall3(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 	return ret;
 }
 
-#if 0
 static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
 	register unsigned long r10 asm("r10") = r10;
@@ -72,9 +71,7 @@ static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 		     : "memory");
 	return ret;
 }
-#endif
 
-#if 0
 static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4)
 {
 	register unsigned long r10 asm("r10") = r10;
@@ -89,7 +86,6 @@ static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned lo
 		     : "memory");
 	return ret;
 }
-#endif
 
 #if 0
 static long syscall6(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
diff --git a/doc/security_considerations.md b/doc/security_considerations.md
@@ -0,0 +1,143 @@
+# Security considerations
+
+## socket communication
+
+### parasite socket
+
+The memcr uses a UNIX domain socket for communication between the parasite (code injected to the suspended process) and memcr utility/daemon, let's call it parasite_socket.
+The parasaite_socket is a UNIX domain socket, and depends on the memcr options (`-S --parasite-socket-dir`) can be a named UNIX domain socket created in the pointed directory, or abstract UNIX domain socket.
+
+Using abstract UNIX domain socket is more straightforward as do not require any option to memcr, or take care of the socket ownership and permissions, but it is less secure, as:
+- the name/ID of the created socket is generated as: ***memcr\<pid of the suspended process\>*** so it is easy to guess,
+- there is no user/permissions-based access control to abstract UNIX domain sockets, any user in the system can list them and connect to them if know the socket name.
+It is recommended to not use abstract UNIX domain socket in a systems where security measures should be applied.
+
+Access to UNIX domain socket file can be controlled by socket file node permissions and ownership. 
+By default parasaite_socket is created as owned by the user  runnig the suspended process, with RW rights only for the owner. It is enough if memcr is run as root (so it can access any file).
+If the system is configured to be more secure `-G --parasite-socket-gid` option may be specified for memcr to provide group ID which will own the parasite_socket with RW access to it. This is useful for the possible solutions where memcr is run as a non-root user with elevated Linux capabilities.
+
+### restore socket
+
+The restore socket is a UNIX domain socket used internally by memcr to communicate between main process and forked process watching the suspended process (one instance created per suspended process).
+Analogically to the parasite_socket it is created as named UNIX domain socket (named ***memcrRestore\<pid of the suspended process\>***), or abstract UNIX domain socket depends on `-S --parasite-socket-dir` option.
+The owner of the named UNIX domain socket is memcr effective user, permissions are set to RW for owner only.
+
+### daemon socket
+
+The second socket is created when memcr is run as a daemon (`-l --listen option`) and is used to send the commands to memcr daemon by memcr-client utility, let's call it daemon_socket.
+The daemon_socket can be a UNIX domain socket created as a file node pointed by `-l` option, or TCP socket listening on port number defined with `-l` option.
+
+For TCP socket, the access to the provided port can be controlled by a network access control mechanism (iptables).
+
+For UNIX domain socket: it is named socket node, by default owned by effective UID and gid of the user running memcr, and having access permissions based on the umask set for the memcr process. (Note, that in most cases it means that running memcr daemon as root will require running memcr-client as root as well.)
+Setting the chosen group ID with `-g --listen-gid` memcr option, the file group ownership is changed to the provided gid, and file node permission is set to RW for the owner and the group.
+This way one can limit access to memcr daemon to the user(s) being part of the selected group. It is recommended to create a separate group for that purpose to strictly limit the access.
+
+### examples
+
+1. memcr daemon running as root with TCP daemon socket (port 9000), abstract UNIX domain sockets used for parasite and restore sockets:
+
+```
+sudo memcr -zc -l 9000
+```
+
+memcr client, run as non-root user, connects to TCP socket:
+
+```
+memcr-client -l 9000 -p <pid> --checkpoint
+memcr-client -l 9000 -p <pid> --restore
+```
+
+> [!NOTE]
+> no memcr daemon access control, no memcr internal sockets protection.
+
+2. memcr daemon running as root with UNIX domain daemon socket, abstract UNIX domain sockets used for parasite and restore sockets:
+
+```
+sudo memcr -zc -l /tmp/memcr/memcr.sock
+
+/tmp/memcr$ ls -l
+total 0
+srwxr-xr-x 1 root root 0 gru 31 19:35 memcr.sock
+```
+
+memcr client, run as root (to be able to connect to daemon), connects to UNIX domain socket:
+
+```
+sudo memcr-client -l /tmp/memcr/memcr.sock -p <pid> --checkpoint
+sudo memcr-client -l /tmp/memcr/memcr.sock -p <pid> --restore
+```
+
+> [!NOTE]
+> memcr daemon access control by /tmp/memcr/memcr.sock owner/permissions, no memcr internal sockets protection.
+
+3. memcr daemon running as root with with UNIX domain daemon socket, UNIX domain sockets used for parasite and restore sockets, gid 1000 set for daemon and parasite sockets:
+
+```
+sudo memcr -zc -l /tmp/memcr/memcr.sock -g 1000 -S /tmp/memcr -G 1000
+
+/tmp/memcr$ ls -l
+total 0
+srw-rw---- 1 root user 0 gru 31 19:38 memcr.sock
+```
+
+memcr client, run as non-root user, connects to UNIX domain socket (suspended process pid: 33239)
+
+```
+memcr-client -l /tmp/memcr/memcr.sock -p 33239 --checkpoint
+
+/tmp/memcr$ ls -l
+total 0
+srw-rw---- 1 user user 0 gru 31 19:40 memcr33239
+srw------- 1 root root 0 gru 31 19:40 memcrRestore33239
+srw-rw---- 1 root user 0 gru 31 19:39 memcr.sock
+
+memcr-client -l /tmp/memcr/memcr.sock -p 33239 --restore
+```
+
+> [!NOTE]
+> memcr daemon access control by /tmp/memcr/memcr.sock owner/permissions, memcr parasite and restore sockets (memcr33239, memcrRestore33239) access control by sockets file node owner/permissions
+
+## Linux capabilities/filesystem permissions required by memcr to operate
+
+In order to run memcr as a non-root user it is required to grant to memcr process/user Linux capabilities and filesystem nodes permissions required for memcr operation. Information provided in this section should allow to run memcr (as daemon as well) as non-root, and make your system more secure. Another step recommended for even better security is to run memcr as a daemon in a sandbox, for example using switch root or Linux Container (LXC).
+
+> [!CAUTION]
+> Linux capabilities required to effectively freeze the process and dump its memory are real security threats - granting them to a non-root user running the memcr daemon process should be done carefully with a full understanding of the required changes and their consequences. It is recommended to create a separate user and group for memcr daemon, and selectively grant access to the /proc data of the suspended process by a dedicated group.
+
+### CAP_SYS_PTRACE
+
+memcr process does require Linux capability CAP_SYS_PTRACE to be able to call ptrace() in order to attach and control suspeneded process (see ptrace(2) and capabilities(7) for more infromation).
+
+setcap command line utility can be used to set a memcr executable file capabilities attribute to the specified capability. This way the capability is granted by the OS to the process created by running such a file. (see setcap(8) / getcap(8) for more details).
+
+```
+$ sudo setcap 'cap_sys_ptrace=ep' ./memcr
+
+$ getcap ./memcr
+memcr cap_sys_ptrace=ep
+```
+
+### /proc access
+
+memcr does require access to data in /proc
+
+1. Read access to /proc/kpageflags node.
+
+In most systems, by default it is allowed only: for root to read /proc/kpageflags.
+The recommended solution here would be to add read (only) access for a dedicated group, and add only the user running memcr to this group.
+
+2. Read/Write access to data in /proc/\<suspended process pid\>/:
+
+* /proc/\<suspended process pid\>/maps
+* /proc/\<suspended process pid\>/mem
+* /proc/\<suspended process pid\>/ns/net
+* /proc/\<suspended process pid\>/pagemap
+* /proc/\<suspended process pid\>/status
+* /proc/\<suspended process pid\>/task
+
+This access is granted to the user running the particular process and its default group. The quickest solution would be to add the user running the memcr to the suspended process default group. Sometimes such a solution could be too "wide", for example when this group grants access to some other resources owned by this process, which are not suppoused to be available for memcr daemon. The generation of dedicated groups for this purpose could be a better solution. A careful analysis of each case is recommended.
+
+> [!NOTE]
+> To quickly test the memcr daemon working as non-root user memcr can be run as the same user as a process which will be suspended. In such a case it will be enough to grant the capability to the memcr file and access to /proc/kpageflags for the user used in the test.
+
diff --git a/memcr.c b/memcr.c
diff --git a/memcr.h b/memcr.h
diff --git a/parasite.c b/parasite.c

Original file line number	Diff line number	Diff line change
`@@ -66,7 +66,6 @@ static long syscall3(int nr, unsigned long arg0, unsigned long arg1, unsigned lo`
`66`	`66`	`return r0;`
`67`	`67`	`}`
`68`	`68`
`69`		`-#if 0`
`70`	`69`	`static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3)`
`71`	`70`	`{`
`72`	`71`	`register long r7 asm("r7") = nr;`
`@@ -80,9 +79,7 @@ static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned lo`
`80`	`79`	`: "memory");`
`81`	`80`	`return r0;`
`82`	`81`	`}`
`83`		`-#endif`
`84`	`82`
`85`		`-#if 0`
`86`	`83`	`static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4)`
`87`	`84`	`{`
`88`	`85`	`register long r7 asm("r7") = nr;`
`@@ -97,7 +94,6 @@ static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned lo`
`97`	`94`	`: "memory");`
`98`	`95`	`return r0;`
`99`	`96`	`}`
`100`		`-#endif`
`101`	`97`
`102`	`98`	`#if 0`
`103`	`99`	`static long syscall6(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)`
Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,6 @@ static long syscall3(int nr, unsigned long arg0, unsigned long arg1, unsigned lo`
`59`	`59`	`return ret;`
`60`	`60`	`}`
`61`	`61`
`62`		`-#if 0`
`63`	`62`	`static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3)`
`64`	`63`	`{`
`65`	`64`	`register unsigned long r10 asm("r10") = r10;`
`@@ -72,9 +71,7 @@ static long syscall4(int nr, unsigned long arg0, unsigned long arg1, unsigned lo`
`72`	`71`	`: "memory");`
`73`	`72`	`return ret;`
`74`	`73`	`}`
`75`		`-#endif`
`76`	`74`
`77`		`-#if 0`
`78`	`75`	`static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4)`
`79`	`76`	`{`
`80`	`77`	`register unsigned long r10 asm("r10") = r10;`
`@@ -89,7 +86,6 @@ static long syscall5(int nr, unsigned long arg0, unsigned long arg1, unsigned lo`
`89`	`86`	`: "memory");`
`90`	`87`	`return ret;`
`91`	`88`	`}`
`92`		`-#endif`
`93`	`89`
`94`	`90`	`#if 0`
`95`	`91`	`static long syscall6(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)`