I'm studying posix capabilities and namespace in linux and I wrote some lines of code inspired by these impressive articles to better comprehend how the capabilities are seen from different namespaces. Some piece of code are taken from the examples of the article, not my play...
#define _GNU_SOURCE
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sched.h>
#include <sys/capability.h>
#include "caputilities.h"
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
#define MAXLEN 255
/* Replace commas in mapping string arguments with newlines */
static void get_mapstr(char *map){
if (map==NULL) return;
size_t map_len = strlen(map);
for (int j = 0; j < map_len; j++)
if (map[j] == ',') map[j] = '\n';
}
static void save_map(char *map, char *map_file){
int fd;
fd = open(map_file, O_RDWR);
if (fd == -1) {
fprintf(stderr, "open %s: %s\n", map_file, strerror(errno));
exit(EXIT_FAILURE);
}
size_t map_len = strlen(map);
if (write(fd, map, map_len) != map_len) {
fprintf(stderr, "write %s: %s\n", map_file, strerror(errno));
exit(EXIT_FAILURE);
}
close(fd);
}
/* Start function for cloned child */
static int childFunc(void *arg){
pid_t pid = getpid();
fprintf(stderr, "cloned child pid %ld\n", (long)pid);
fprintf(stderr, "child process capabilities %s\n", cap_to_text(cap_get_proc(), NULL));
fprintf(stderr, "euid %ld, egid %ld\n", (long)geteuid(), (long)getegid());
if (arg!=NULL){ //user ns enabled
char *uidmap = ((char **)arg)[0];
char *gidmap = ((char **)arg)[1];
if (uidmap!=NULL) fprintf(stderr, "setting uid map %s\n", uidmap);
if (gidmap!=NULL) fprintf(stderr, "setting gid map %s\n", gidmap);
char map_path[MAXLEN + 1];
if (uidmap != NULL){
snprintf(map_path, MAXLEN, "/proc/%ld/uid_map", (long)pid);
save_map(uidmap, map_path);
}
if (gidmap != NULL){
snprintf(map_path, MAXLEN, "/proc/%ld/gid_map", (long)pid);
save_map(gidmap, map_path);
}
fprintf(stderr, "child process capabilities %s\n", cap_to_text(cap_get_proc(), NULL));
fprintf(stderr, "euid %ld, egid %ld\n", (long)geteuid(), (long)getegid());
}
sleep(200);
exit(0);
}
static void usage(char *pname){
fprintf(stderr, "Usage: %s -U -M mapstring -G mapstring\n", pname);
fprintf(stderr, " -U use user namespace\n");
fprintf(stderr, " -M uid mapping\n");
fprintf(stderr, " -G gid mapping\n");
fprintf(stderr, " mapstring is a comma separated list of mapping of the form:\n");
fprintf(stderr, " ID_inside-ns ID-outside-ns length [,ID_inside-ns ID-outside-ns length, ...]\n");
exit(EXIT_FAILURE);
}
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE]; /* Space for child's stack */
/* Receive a UID and/or GID mapping as arguments
Every mapping consists of a list of tuple (separated by new line) of the form:
ID_inside-ns ID-outside-ns length
Requiring the user to supply a string that contains newlines is
of course inconvenient for command-line use. Thus, we permit the
use of commas to delimit records in this string, and replace them
with newlines before writing the string to the file. */
int main(int argc, char *argv[]){
int flags = 0;
char *gid_map = NULL, *uid_map = NULL;
int opt;
while ((opt = getopt(argc, argv, "UM:G:")) != -1) {
switch (opt){
case 'U': flags |= CLONE_NEWUSER;
case 'M': uid_map = optarg; break;
case 'G': gid_map = optarg; break;
default: usage(argv[0]);
}
}
if ((uid_map != NULL || gid_map != NULL) && !(flags & CLONE_NEWUSER)){
fprintf(stderr,"what about give me the user namespace option? what's in your mind today?\n");
usage(argv[0]);
}
char* args[2];
get_mapstr(uid_map); args[0] = uid_map;
get_mapstr(gid_map); args[1] = gid_map;
pid_t child_pid = clone(childFunc, child_stack + STACK_SIZE, flags | SIGCHLD, (flags & CLONE_NEWUSER) ? &args : NULL);
if (child_pid == -1) errExit("clone");
sleep(1);
fprintf(stderr, "child process pid capabilities from parent: %s\n", cap_to_text(cap_get_pid(child_pid), NULL));
fprintf(stderr, "euid %ld, egid %ld\n", (long)geteuid(), (long)getegid());
exit(0);
}
I proved that from the child in the new namespace it's only possible to map the effective user id in the external namespace of the parent process to any uid in the new namespace, root included, but if you try to map different external users from the child you get error. That's ok.
$ ./testcap3 -U -M"1000 39 1"
cloned child pid 7659
child process capabilities = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 65534, egid 65534
setting uid map 1000 39 1
write /proc/7659/uid_map: Operation not permitted
child process pid capabilities from parent: = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 1000, egid 1000
$ ./testcap3 -U -M"0 1000 1"
cloned child pid 7665
child process capabilities = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 65534, egid 65534
setting uid map 0 1000 1
child process capabilities = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 0, egid 65534
child process pid capabilities from parent: = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 1000, egid 1000
I don't get why the capabilities of the child process are shown as all enabled when printed from the parent process. I would've expected to see no priviliges in the external namespace, am I wrong? Clearly the binary testcap3 is not privileged (neither setuid/setgid bit nor capabilities are set on the file and the effective user is not an admin) How the capabilities are stored? How the data structures are related with namespace?