From df321c9e038bccf6c99f82a1687e11a075635c93 Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Thu, 17 Oct 2024 11:14:49 +0200 Subject: [PATCH 1/7] nsenter: add functions to enable/disable namespaces Currently, enabled namespaces are those with an open file descriptor. However, if we support pidfd, this will become unnecessary and we will need an FD-independent enable/disable mechanism. It also makes sense to delay opening --target namespaces files until everything is ready and only handle it in one place. Signed-off-by: Karel Zak --- sys-utils/nsenter.c | 145 +++++++++++++++++++++++++++----------------- 1 file changed, 88 insertions(+), 57 deletions(-) diff --git a/sys-utils/nsenter.c b/sys-utils/nsenter.c index eb41149e0c7..b72ca345bd8 100644 --- a/sys-utils/nsenter.c +++ b/sys-utils/nsenter.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ static struct namespace_file { int nstype; const char *name; int fd; + bool enabled; } namespace_files[] = { /* Careful the order is significant in this array. * @@ -128,7 +130,7 @@ static int env_fd = -1; static int uid_gid_fd = -1; static int cgroup_procs_fd = -1; -static void set_parent_user_ns_fd(void) +static void open_parent_user_ns_fd(void) { struct namespace_file *nsfile = NULL; struct namespace_file *user_nsfile = NULL; @@ -150,8 +152,10 @@ static void set_parent_user_ns_fd(void) if (parent_ns < 0) errx(EXIT_FAILURE, _("no namespaces to get parent of")); - if (user_nsfile) + if (user_nsfile) { user_nsfile->fd = parent_ns; + user_nsfile->enabled = true; + } } @@ -177,21 +181,77 @@ static void open_target_fd(int *fd, const char *type, const char *path) err(EXIT_FAILURE, _("cannot open %s"), path); } -static void open_namespace_fd(int nstype, const char *path) +static void enable_nsfile(struct namespace_file *n, const char *path) +{ + if (path) + open_target_fd(&n->fd, n->name, path); + n->enabled = true; +} + +static void disable_nsfile(struct namespace_file *n) +{ + if (n->fd >= 0) + close(n->fd); + n->fd = -1; + n->enabled = false; +} + +/* Enable namespace; optionally open @path if not NULL. */ +static void enable_namespace(int nstype, const char *path) { struct namespace_file *nsfile; for (nsfile = namespace_files; nsfile->nstype; nsfile++) { if (nstype != nsfile->nstype) continue; - - open_target_fd(&nsfile->fd, nsfile->name, path); + enable_nsfile(nsfile, path); return; } /* This should never happen */ assert(nsfile->nstype); } +/* Returns mask of all enabled namespaces */ +static int get_namespaces(void) +{ + struct namespace_file *n; + int mask = 0; + + for (n = namespace_files; n->nstype; n++) { + if (n->enabled) + mask |= n->nstype; + } + return mask; +} + +static int get_namespaces_without_fd(void) +{ + struct namespace_file *n; + int mask = 0; + + for (n = namespace_files; n->nstype; n++) { + if (n->enabled && n->fd < 0) + mask |= n->nstype; + } + return mask; +} + +/* Open /proc/#/ns/ files for enabled namespaces specified in @namespaces + * if they have not been opened yet. */ +static void open_namespaces(int namespaces) +{ + struct namespace_file *n; + + for (n = namespace_files; n->nstype; n++) { + if (!n->enabled) + continue; + if (!(n->nstype & namespaces)) + continue; + if (n->fd < 0) + open_target_fd(&n->fd, n->name, NULL); + } +} + static void open_target_sk_netns(int pid, int sock_fd) { struct namespace_file *nsfile; @@ -222,6 +282,7 @@ static void open_target_sk_netns(int pid, int sock_fd) if (nsfile->fd >= 0) close(nsfile->fd); nsfile->fd = nsfd; + nsfile->enabled = true; close(sk); close(pidfd); } @@ -429,56 +490,32 @@ int main(int argc, char *argv[]) strtoul_or_err(optarg, _("failed to parse pid")); break; case 'm': - if (optarg) - open_namespace_fd(CLONE_NEWNS, optarg); - else - namespaces |= CLONE_NEWNS; + enable_namespace(CLONE_NEWNS, optarg); break; case 'u': - if (optarg) - open_namespace_fd(CLONE_NEWUTS, optarg); - else - namespaces |= CLONE_NEWUTS; + enable_namespace(CLONE_NEWUTS, optarg); break; case 'i': - if (optarg) - open_namespace_fd(CLONE_NEWIPC, optarg); - else - namespaces |= CLONE_NEWIPC; + enable_namespace(CLONE_NEWIPC, optarg); break; case 'n': - if (optarg) - open_namespace_fd(CLONE_NEWNET, optarg); - else - namespaces |= CLONE_NEWNET; + enable_namespace(CLONE_NEWNET, optarg); break; case 'N': sock_fd = str2num_or_err(optarg, 10, _("failed to parse file descriptor"), 0, INT_MAX); break; case 'p': - if (optarg) - open_namespace_fd(CLONE_NEWPID, optarg); - else - namespaces |= CLONE_NEWPID; + enable_namespace(CLONE_NEWPID, optarg); break; case 'C': - if (optarg) - open_namespace_fd(CLONE_NEWCGROUP, optarg); - else - namespaces |= CLONE_NEWCGROUP; + enable_namespace(CLONE_NEWCGROUP, optarg); break; case 'U': - if (optarg) - open_namespace_fd(CLONE_NEWUSER, optarg); - else - namespaces |= CLONE_NEWUSER; + enable_namespace(CLONE_NEWUSER, optarg); break; case 'T': - if (optarg) - open_namespace_fd(CLONE_NEWTIME, optarg); - else - namespaces |= CLONE_NEWTIME; + enable_namespace(CLONE_NEWTIME, optarg); break; case 'S': if (strcmp(optarg, "follow") == 0) @@ -557,25 +594,27 @@ int main(int argc, char *argv[]) #endif if (do_all) { - if (!namespace_target_pid) - errx(EXIT_FAILURE, _("no target PID specified for --all")); for (nsfile = namespace_files; nsfile->nstype; nsfile++) { - if (nsfile->fd >= 0) + if (nsfile->enabled) continue; /* namespace already specified */ if (!is_usable_namespace(namespace_target_pid, nsfile)) continue; - namespaces |= nsfile->nstype; + enable_nsfile(nsfile, NULL); } } /* * Open remaining namespace and directory descriptors. */ - for (nsfile = namespace_files; nsfile->nstype; nsfile++) - if (nsfile->nstype & namespaces) - open_namespace_fd(nsfile->nstype, NULL); + namespaces = get_namespaces_without_fd(); + if (namespaces) { + if (!namespace_target_pid) + errx(EXIT_FAILURE, _("no target PID specified")); + open_namespaces(namespaces); + } + if (do_rd) open_target_fd(&root_fd, "root", NULL); if (do_wd) @@ -594,24 +633,17 @@ int main(int argc, char *argv[]) * Get parent userns from any available ns. */ if (do_user_parent) - set_parent_user_ns_fd(); - - /* - * Update namespaces variable to contain all requested namespaces - */ - for (nsfile = namespace_files; nsfile->nstype; nsfile++) { - if (nsfile->fd < 0) - continue; - namespaces |= nsfile->nstype; - } + open_parent_user_ns_fd(); if (sock_fd != -1) { if (!namespace_target_pid) errx(EXIT_FAILURE, _("--net-socket needs target PID")); open_target_sk_netns(namespace_target_pid, sock_fd); - namespaces |= CLONE_NEWNET; } + /* All initialized, get final set of namespaces */ + namespaces = get_namespaces(); + /* for user namespaces we always set UID and GID (default is 0) * and clear root's groups if --preserve-credentials is no specified */ if ((namespaces & CLONE_NEWUSER) && !preserve_cred) { @@ -646,8 +678,7 @@ int main(int argc, char *argv[]) continue; } - close(nsfile->fd); - nsfile->fd = -1; + disable_nsfile(nsfile); } } From ed0da317e078bc2c86ccf4d882b3867edd726253 Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Thu, 17 Oct 2024 12:17:10 +0200 Subject: [PATCH 2/7] nsenter: use separate function to enter namespaces Remove the complicated for-loop from the main() function and instead use a separate function to enable namespaces based on a given mask. Signed-off-by: Karel Zak --- sys-utils/nsenter.c | 62 +++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/sys-utils/nsenter.c b/sys-utils/nsenter.c index b72ca345bd8..66f278aadc3 100644 --- a/sys-utils/nsenter.c +++ b/sys-utils/nsenter.c @@ -252,6 +252,27 @@ static void open_namespaces(int namespaces) } } +static void enter_namespaces(int namespaces, bool ignore_errors) +{ + struct namespace_file *n; + + for (n = namespace_files; n->nstype; n++) { + if (!n->enabled) + continue; + if (!(n->nstype & namespaces)) + continue; + if (n->fd < 0) + continue; + + if (setns(n->fd, n->nstype) == 0) + disable_nsfile(n); /* sucess */ + else if (!ignore_errors) + err(EXIT_FAILURE, + _("reassociate to namespace '%s' failed"), + n->name); + } +} + static void open_target_sk_netns(int pid, int sock_fd) { struct namespace_file *nsfile; @@ -455,7 +476,7 @@ int main(int argc, char *argv[]) int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; struct namespace_file *nsfile; - int c, pass, namespaces = 0, setgroups_nerrs = 0, preserve_cred = 0; + int c, namespaces = 0, setgroups_nerrs = 0, preserve_cred = 0; bool do_rd = false, do_wd = false, do_uid = false, force_uid = false, do_gid = false, force_gid = false, do_env = false, do_all = false, do_join_cgroup = false, do_user_parent = false; @@ -643,6 +664,11 @@ int main(int argc, char *argv[]) /* All initialized, get final set of namespaces */ namespaces = get_namespaces(); + if (!namespaces) + errx(EXIT_FAILURE, _("no namespace specified")); + + if ((namespaces & CLONE_NEWPID) && do_fork == -1) + do_fork = 1; /* for user namespaces we always set UID and GID (default is 0) * and clear root's groups if --preserve-credentials is no specified */ @@ -656,31 +682,17 @@ int main(int argc, char *argv[]) } /* - * Now that we know which namespaces we want to enter, enter - * them. Do this in two passes, not entering the user - * namespace on the first pass. So if we're deprivileging the - * container we'll enter the user namespace last and if we're - * privileging it then we enter the user namespace first - * (because the initial setns will fail). + * Now that we know which namespaces we want to enter, enter them. Do + * this in two passes, not entering the user namespace on the first + * pass. So if we're deprivileging the container we'll enter the user + * namespace last and if we're privileging it then we enter the user + * namespace first (because the initial setns will fail). */ - for (pass = 0; pass < 2; pass ++) { - for (nsfile = namespace_files + 1 - pass; nsfile->nstype; nsfile++) { - if (nsfile->fd < 0) - continue; - if (nsfile->nstype == CLONE_NEWPID && do_fork == -1) - do_fork = 1; - if (setns(nsfile->fd, nsfile->nstype)) { - if (pass != 0) - err(EXIT_FAILURE, - _("reassociate to namespace '%s' failed"), - nsfile->name); - else - continue; - } - - disable_nsfile(nsfile); - } - } + enter_namespaces(namespaces & ~CLONE_NEWUSER, 1); /* ignore errors */ + + namespaces = get_namespaces(); + if (namespaces) + enter_namespaces(namespaces, 0); /* report errors */ /* Remember the current working directory if I'm not changing it */ if (root_fd >= 0 && wd_fd < 0 && wdns == NULL) { From f18be0ca5aa764404aa626c84cff315256c9ee73 Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Fri, 18 Oct 2024 12:16:04 +0200 Subject: [PATCH 3/7] nsenter: use pidfd to enter target namespaces The typical use case is to enter namespaces of the task (--target ). The original nsenter opens /proc//ns/* files and uses the file descriptors to enter the namespaces by setns(). The recent kernel allows using the pid file descriptor instead of the files in /proc, making it possible to enter multiple namespaces with one setns call. This solution reduces the number of syscalls (open+setns for each namespace), removes the dependence on /proc, and allows entering nested namespaces. This commit should be backwardly compatible, meaning it can be used on systems without pidfd_open(). Explicitly specified namespaces by filenames are still supported, and user namespaces are still entered first/last according to permissions privileging/deprivileging. Addresses: https://github.com/util-linux/util-linux/pull/301 Signed-off-by: Karel Zak --- sys-utils/nsenter.c | 66 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/sys-utils/nsenter.c b/sys-utils/nsenter.c index 66f278aadc3..53c3641fbbd 100644 --- a/sys-utils/nsenter.c +++ b/sys-utils/nsenter.c @@ -211,6 +211,17 @@ static void enable_namespace(int nstype, const char *path) assert(nsfile->nstype); } +static void disable_namespaces(int namespaces) +{ + struct namespace_file *n; + + for (n = namespace_files; n->nstype; n++) { + if (!(namespaces & n->nstype)) + continue; + disable_nsfile(n); + } +} + /* Returns mask of all enabled namespaces */ static int get_namespaces(void) { @@ -252,24 +263,38 @@ static void open_namespaces(int namespaces) } } -static void enter_namespaces(int namespaces, bool ignore_errors) +static int do_setns(int fd, int ns, const char *name, bool ignore_errors) +{ + int rc = setns(fd, ns); + + if (rc < 0 && !ignore_errors) { + if (name) + err(EXIT_FAILURE, _("reassociate to namespace '%s' failed"), name); + else + err(EXIT_FAILURE, _("reassociate to namespaces failed")); + } + return rc; +} + +static void enter_namespaces(int pid_fd, int namespaces, bool ignore_errors) { struct namespace_file *n; + if (pid_fd) { + int ns = 0; + for (n = namespace_files; n->nstype; n++) { + if (n->enabled && (n->nstype & namespaces) && n->fd < 0) + ns |= n->nstype; + } + if (ns && do_setns(pid_fd, ns, NULL, ignore_errors) == 0) + disable_namespaces(ns); + } + for (n = namespace_files; n->nstype; n++) { - if (!n->enabled) + if (!n->enabled || !(n->nstype & namespaces) || n->fd < 0) continue; - if (!(n->nstype & namespaces)) - continue; - if (n->fd < 0) - continue; - - if (setns(n->fd, n->nstype) == 0) - disable_nsfile(n); /* sucess */ - else if (!ignore_errors) - err(EXIT_FAILURE, - _("reassociate to namespace '%s' failed"), - n->name); + if (do_setns(n->fd, n->nstype, n->name, ignore_errors) == 0) + disable_nsfile(n); } } @@ -486,6 +511,7 @@ int main(int argc, char *argv[]) gid_t gid = 0; int keepcaps = 0; int sock_fd = -1; + int pid_fd = -1; struct ul_env_list *envls; #ifdef HAVE_LIBSELINUX bool selinux = 0; @@ -633,7 +659,12 @@ int main(int argc, char *argv[]) if (namespaces) { if (!namespace_target_pid) errx(EXIT_FAILURE, _("no target PID specified")); - open_namespaces(namespaces); + + pid_fd = pidfd_open(namespace_target_pid, 0); + if (pid_fd < 0) { + /* fallback to classic way */ + open_namespaces(namespaces); + } } if (do_rd) @@ -688,11 +719,14 @@ int main(int argc, char *argv[]) * namespace last and if we're privileging it then we enter the user * namespace first (because the initial setns will fail). */ - enter_namespaces(namespaces & ~CLONE_NEWUSER, 1); /* ignore errors */ + enter_namespaces(pid_fd, namespaces & ~CLONE_NEWUSER, 1); /* ignore errors */ namespaces = get_namespaces(); if (namespaces) - enter_namespaces(namespaces, 0); /* report errors */ + enter_namespaces(pid_fd, namespaces, 0); /* report errors */ + + if (pid_fd >= 0) + close(pid_fd); /* Remember the current working directory if I'm not changing it */ if (root_fd >= 0 && wd_fd < 0 && wdns == NULL) { From 2f283d0dd32660161728576d107b9f9812ebd742 Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Fri, 18 Oct 2024 14:08:35 +0200 Subject: [PATCH 4/7] nsenter: use macros to access the nsfiles array This change implements macros to access the nsfiles array, resulting in a slight reduction in code. Signed-off-by: Karel Zak --- sys-utils/nsenter.c | 94 ++++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 44 deletions(-) diff --git a/sys-utils/nsenter.c b/sys-utils/nsenter.c index 53c3641fbbd..e60cc4d12fb 100644 --- a/sys-utils/nsenter.c +++ b/sys-utils/nsenter.c @@ -130,6 +130,30 @@ static int env_fd = -1; static int uid_gid_fd = -1; static int cgroup_procs_fd = -1; +static inline struct namespace_file *__next_nsfile(struct namespace_file *n, int namespaces, bool enabled) +{ + if (!n) + n = namespace_files; + else if (n->nstype != 0) + n++; + + for ( ; n && n->nstype; n++) { + if (namespaces && !(n->nstype & namespaces)) + continue; + if (enabled && !n->enabled) + continue; + return n; + } + + return NULL; +} + +#define next_nsfile(_n, _ns) __next_nsfile(_n, _ns, 0) +#define next_enabled_nsfile(_n, _ns) __next_nsfile(_n, _ns, 1) + +#define get_nsfile(_ns) __next_nsfile(NULL, _ns, 0) +#define get_enabled_nsfile(_ns) __next_nsfile(NULL, _ns, 1) + static void open_parent_user_ns_fd(void) { struct namespace_file *nsfile = NULL; @@ -199,51 +223,43 @@ static void disable_nsfile(struct namespace_file *n) /* Enable namespace; optionally open @path if not NULL. */ static void enable_namespace(int nstype, const char *path) { - struct namespace_file *nsfile; + struct namespace_file *nsfile = get_nsfile(nstype); - for (nsfile = namespace_files; nsfile->nstype; nsfile++) { - if (nstype != nsfile->nstype) - continue; + if (nsfile) enable_nsfile(nsfile, path); - return; - } - /* This should never happen */ - assert(nsfile->nstype); + else + assert(nsfile); } static void disable_namespaces(int namespaces) { - struct namespace_file *n; + struct namespace_file *n = NULL; - for (n = namespace_files; n->nstype; n++) { - if (!(namespaces & n->nstype)) - continue; + while ((n = next_enabled_nsfile(n, namespaces))) disable_nsfile(n); - } } /* Returns mask of all enabled namespaces */ static int get_namespaces(void) { - struct namespace_file *n; + struct namespace_file *n = NULL; int mask = 0; - for (n = namespace_files; n->nstype; n++) { - if (n->enabled) - mask |= n->nstype; - } + while ((n = next_enabled_nsfile(n, 0))) + mask |= n->nstype; return mask; } static int get_namespaces_without_fd(void) { - struct namespace_file *n; + struct namespace_file *n = NULL; int mask = 0; - for (n = namespace_files; n->nstype; n++) { - if (n->enabled && n->fd < 0) + while ((n = next_enabled_nsfile(n, 0))) { + if (n->fd < 0) mask |= n->nstype; } + return mask; } @@ -251,13 +267,9 @@ static int get_namespaces_without_fd(void) * if they have not been opened yet. */ static void open_namespaces(int namespaces) { - struct namespace_file *n; + struct namespace_file *n = NULL; - for (n = namespace_files; n->nstype; n++) { - if (!n->enabled) - continue; - if (!(n->nstype & namespaces)) - continue; + while ((n = next_enabled_nsfile(n, namespaces))) { if (n->fd < 0) open_target_fd(&n->fd, n->name, NULL); } @@ -278,20 +290,21 @@ static int do_setns(int fd, int ns, const char *name, bool ignore_errors) static void enter_namespaces(int pid_fd, int namespaces, bool ignore_errors) { - struct namespace_file *n; + struct namespace_file *n = NULL; if (pid_fd) { int ns = 0; - for (n = namespace_files; n->nstype; n++) { - if (n->enabled && (n->nstype & namespaces) && n->fd < 0) + while ((n = next_enabled_nsfile(n, namespaces))) { + if (n->fd < 0) ns |= n->nstype; } if (ns && do_setns(pid_fd, ns, NULL, ignore_errors) == 0) disable_namespaces(ns); } - for (n = namespace_files; n->nstype; n++) { - if (!n->enabled || !(n->nstype & namespaces) || n->fd < 0) + n = NULL; + while ((n = next_enabled_nsfile(n, namespaces))) { + if (n->fd < 0) continue; if (do_setns(n->fd, n->nstype, n->name, ignore_errors) == 0) disable_nsfile(n); @@ -304,10 +317,7 @@ static void open_target_sk_netns(int pid, int sock_fd) struct stat sb; int pidfd, sk, nsfd; - for (nsfile = namespace_files; nsfile->nstype; nsfile++) { - if (nsfile->nstype == CLONE_NEWNET) - break; - } + nsfile = get_nsfile(CLONE_NEWNET); assert(nsfile->nstype); pidfd = pidfd_open(pid, 0); @@ -500,7 +510,6 @@ int main(int argc, char *argv[]) }; int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; - struct namespace_file *nsfile; int c, namespaces = 0, setgroups_nerrs = 0, preserve_cred = 0; bool do_rd = false, do_wd = false, do_uid = false, force_uid = false, do_gid = false, force_gid = false, do_env = false, do_all = false, @@ -641,14 +650,11 @@ int main(int argc, char *argv[]) #endif if (do_all) { - for (nsfile = namespace_files; nsfile->nstype; nsfile++) { - if (nsfile->enabled) - continue; /* namespace already specified */ - - if (!is_usable_namespace(namespace_target_pid, nsfile)) + struct namespace_file *n = NULL; + while ((n = next_nsfile(n, 0))) { + if (n->enabled || !is_usable_namespace(namespace_target_pid, n)) continue; - - enable_nsfile(nsfile, NULL); + enable_nsfile(n, NULL); } } From 78e4452ba910db2f0175edce7732c1f1a2cb779c Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Fri, 18 Oct 2024 14:50:11 +0200 Subject: [PATCH 5/7] nsenter: reuse pidfd for --net-socket Signed-off-by: Karel Zak --- sys-utils/nsenter.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/sys-utils/nsenter.c b/sys-utils/nsenter.c index e60cc4d12fb..183d2675f21 100644 --- a/sys-utils/nsenter.c +++ b/sys-utils/nsenter.c @@ -311,19 +311,15 @@ static void enter_namespaces(int pid_fd, int namespaces, bool ignore_errors) } } -static void open_target_sk_netns(int pid, int sock_fd) +static void open_target_sk_netns(int pidfd, int sock_fd) { struct namespace_file *nsfile; struct stat sb; - int pidfd, sk, nsfd; + int sk, nsfd; nsfile = get_nsfile(CLONE_NEWNET); assert(nsfile->nstype); - pidfd = pidfd_open(pid, 0); - if (pidfd < 0) - err(EXIT_FAILURE, _("failed to pidfd_open() for %d"), pid); - sk = pidfd_getfd(pidfd, sock_fd, 0); if (sk < 0) err(EXIT_FAILURE, _("pidfd_getfd(%d, %u)"), pidfd, sock_fd); @@ -340,7 +336,6 @@ static void open_target_sk_netns(int pid, int sock_fd) nsfile->fd = nsfd; nsfile->enabled = true; close(sk); - close(pidfd); } static int get_ns_ino(const char *path, ino_t *ino) @@ -662,14 +657,16 @@ int main(int argc, char *argv[]) * Open remaining namespace and directory descriptors. */ namespaces = get_namespaces_without_fd(); - if (namespaces) { + if (namespaces || sock_fd >= 0) { if (!namespace_target_pid) errx(EXIT_FAILURE, _("no target PID specified")); pid_fd = pidfd_open(namespace_target_pid, 0); if (pid_fd < 0) { - /* fallback to classic way */ - open_namespaces(namespaces); + if (sock_fd >= 0) + err(EXIT_FAILURE, _("failed to pidfd_open() for %d"), namespace_target_pid); + if (namespaces) + open_namespaces(namespaces); /* fallback */ } } @@ -693,11 +690,8 @@ int main(int argc, char *argv[]) if (do_user_parent) open_parent_user_ns_fd(); - if (sock_fd != -1) { - if (!namespace_target_pid) - errx(EXIT_FAILURE, _("--net-socket needs target PID")); - open_target_sk_netns(namespace_target_pid, sock_fd); - } + if (sock_fd >= 0) + open_target_sk_netns(pid_fd, sock_fd); /* All initialized, get final set of namespaces */ namespaces = get_namespaces(); From 7d2bfa92b463115aafcf0e73b48bf47cdca72f0b Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Mon, 21 Oct 2024 13:08:36 +0200 Subject: [PATCH 6/7] include/pidfd-utils: add namespaces ioctls Signed-off-by: Karel Zak --- include/pidfd-utils.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/include/pidfd-utils.h b/include/pidfd-utils.h index d43bad26fe2..5bd59694a5b 100644 --- a/include/pidfd-utils.h +++ b/include/pidfd-utils.h @@ -8,14 +8,35 @@ #include #include +#ifdef HAVE_SYS_PIDFD_H +# include +#endif + +/* + * pidfd ioctls + * + * All added by commit to kernel 6.11, commit 5b08bd408534bfb3a7cf5778da5b27d4e4fffe12. + */ +#ifndef PIDFS_IOCTL_MAGIC +# define PIDFS_IOCTL_MAGIC 0xFF +# define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +# define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +# define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +# define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +# define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +# define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +# define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +# define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +# define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +# define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) +#endif + + #ifdef HAVE_SYS_SYSCALL_H # include # include # if defined(SYS_pidfd_send_signal) && defined(SYS_pidfd_open) -# ifdef HAVE_SYS_PIDFD_H -# include -# endif # ifndef HAVE_PIDFD_SEND_SIGNAL static inline int pidfd_send_signal(int pidfd, int sig, siginfo_t *info, unsigned int flags) From 7e60554fc4e8d2dd653b201556aa9e3fd956c968 Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Mon, 21 Oct 2024 13:09:43 +0200 Subject: [PATCH 7/7] nsenter: Rewrite --user-parent to use pidfd The latest kernel pidfd supports ioctls to ask for the target's namespaces. It seems we can use it for --user-parent if no user namespace is explicitly specified. The fallback is to use any other namespace or open the target's /proc//ns/user file directly. Signed-off-by: Karel Zak --- sys-utils/nsenter.c | 77 +++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/sys-utils/nsenter.c b/sys-utils/nsenter.c index 183d2675f21..27962a9ecdf 100644 --- a/sys-utils/nsenter.c +++ b/sys-utils/nsenter.c @@ -154,35 +154,6 @@ static inline struct namespace_file *__next_nsfile(struct namespace_file *n, int #define get_nsfile(_ns) __next_nsfile(NULL, _ns, 0) #define get_enabled_nsfile(_ns) __next_nsfile(NULL, _ns, 1) -static void open_parent_user_ns_fd(void) -{ - struct namespace_file *nsfile = NULL; - struct namespace_file *user_nsfile = NULL; - int parent_ns = -1; - - for (nsfile = namespace_files; nsfile->nstype; nsfile++) { - if (nsfile->nstype == CLONE_NEWUSER) - user_nsfile = nsfile; - - if (nsfile->fd == -1) - continue; - - parent_ns = ioctl(nsfile->fd, NS_GET_USERNS); - if (parent_ns < 0) - err(EXIT_FAILURE, _("failed to open parent ns of %s"), nsfile->name); - - break; - } - - if (parent_ns < 0) - errx(EXIT_FAILURE, _("no namespaces to get parent of")); - if (user_nsfile) { - user_nsfile->fd = parent_ns; - user_nsfile->enabled = true; - } -} - - static void open_target_fd(int *fd, const char *type, const char *path) { char pathbuf[PATH_MAX]; @@ -311,6 +282,50 @@ static void enter_namespaces(int pid_fd, int namespaces, bool ignore_errors) } } +static void open_parent_user_ns_fd(int pid_fd) +{ + struct namespace_file *user = NULL; + int fd = -1, parent_fd = -1; + bool islocal = false; + + /* try user namespace if FD defined */ + user = get_nsfile(CLONE_NEWUSER); + if (user->enabled) + fd = user->fd; + + /* try pidfd to get FD */ + if (fd < 0 && pid_fd >= 0) { + fd = ioctl(pid_fd, PIDFD_GET_USER_NAMESPACE, 0); + if (fd >= 0) + islocal = true; + } + + /* try any enabled namespace */ + if (fd < 0) { + struct namespace_file *n = get_enabled_nsfile(0); + if (n) + fd = n->fd; + } + + /* try directly open the NS */ + if (fd < 0) { + open_target_fd(&fd, "ns/user", NULL); + islocal = true; + } + + parent_fd = ioctl(fd, NS_GET_USERNS); + if (parent_fd < 0) + err(EXIT_FAILURE, _("failed to open parent namespace")); + + if (islocal) + close(fd); + if (user->fd > 0) + close(user->fd); + user->fd = parent_fd; + user->enabled = true; +} + + static void open_target_sk_netns(int pidfd, int sock_fd) { struct namespace_file *nsfile; @@ -657,7 +672,7 @@ int main(int argc, char *argv[]) * Open remaining namespace and directory descriptors. */ namespaces = get_namespaces_without_fd(); - if (namespaces || sock_fd >= 0) { + if (namespaces || sock_fd >= 0 || do_user_parent) { if (!namespace_target_pid) errx(EXIT_FAILURE, _("no target PID specified")); @@ -688,7 +703,7 @@ int main(int argc, char *argv[]) * Get parent userns from any available ns. */ if (do_user_parent) - open_parent_user_ns_fd(); + open_parent_user_ns_fd(pid_fd); if (sock_fd >= 0) open_target_sk_netns(pid_fd, sock_fd);