source: MondoRescue/branches/3.3/mindi-busybox/util-linux/unshare.c@ 3625

Last change on this file since 3625 was 3621, checked in by Bruno Cornec, 10 years ago

New 3?3 banch for incorporation of latest busybox 1.25. Changing minor version to handle potential incompatibilities.

  • Property svn:eol-style set to native
File size: 10.7 KB
Line 
1/* vi: set sw=4 ts=4: */
2/*
3 * Mini unshare implementation for busybox.
4 *
5 * Copyright (C) 2016 by Bartosz Golaszewski <bartekgola@gmail.com>
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9
10//config:config UNSHARE
11//config: bool "unshare"
12//config: default y
13//config: depends on LONG_OPTS && !NOMMU
14//config: select PLATFORM_LINUX
15//config: help
16//config: Run program with some namespaces unshared from parent.
17
18// depends on LONG_OPTS: it is awkward to exclude code which handles --propagation
19// and --setgroups based on LONG_OPTS, so instead applet requires LONG_OPTS.
20// depends on !NOMMU: we need fork()
21
22//applet:IF_UNSHARE(APPLET(unshare, BB_DIR_USR_BIN, BB_SUID_DROP))
23
24//kbuild:lib-$(CONFIG_UNSHARE) += unshare.o
25
26//usage:#define unshare_trivial_usage
27//usage: "[OPTIONS] [PROG [ARGS]]"
28//usage:#define unshare_full_usage "\n"
29//usage: "\n -m, --mount[=FILE] Unshare mount namespace"
30//usage: "\n -u, --uts[=FILE] Unshare UTS namespace (hostname etc.)"
31//usage: "\n -i, --ipc[=FILE] Unshare System V IPC namespace"
32//usage: "\n -n, --net[=FILE] Unshare network namespace"
33//usage: "\n -p, --pid[=FILE] Unshare PID namespace"
34//usage: "\n -U, --user[=FILE} Unshare user namespace"
35//usage: "\n -f, --fork Fork before execing PROG"
36//usage: "\n -r, --map-root-user Map current user to root (implies -u)"
37//usage: "\n --mount-proc[=DIR] Mount /proc filesystem first (implies -m)"
38//usage: "\n --propagation slave|shared|private|unchanged"
39//usage: "\n Modify mount propagation in mount namespace"
40//usage: "\n --setgroups allow|deny Control the setgroups syscall in user namespaces"
41
42#include <sched.h>
43#ifndef CLONE_NEWUTS
44# define CLONE_NEWUTS 0x04000000
45#endif
46#ifndef CLONE_NEWIPC
47# define CLONE_NEWIPC 0x08000000
48#endif
49#ifndef CLONE_NEWUSER
50# define CLONE_NEWUSER 0x10000000
51#endif
52#ifndef CLONE_NEWPID
53# define CLONE_NEWPID 0x20000000
54#endif
55#ifndef CLONE_NEWNET
56# define CLONE_NEWNET 0x40000000
57#endif
58
59#include <sys/mount.h>
60#ifndef MS_REC
61# define MS_REC (1 << 14)
62#endif
63#ifndef MS_PRIVATE
64# define MS_PRIVATE (1 << 18)
65#endif
66#ifndef MS_SLAVE
67# define MS_SLAVE (1 << 19)
68#endif
69#ifndef MS_SHARED
70# define MS_SHARED (1 << 20)
71#endif
72
73#include "libbb.h"
74
75static void mount_or_die(const char *source, const char *target,
76 const char *fstype, unsigned long mountflags)
77{
78 if (mount(source, target, fstype, mountflags, NULL)) {
79 bb_perror_msg_and_die("can't mount %s on %s (flags:0x%lx)",
80 source, target, mountflags);
81 /* fstype is always either NULL or "proc".
82 * "proc" is only used to mount /proc.
83 * No need to clutter up error message with fstype,
84 * it is easily deductible.
85 */
86 }
87}
88
89#define PATH_PROC_SETGROUPS "/proc/self/setgroups"
90#define PATH_PROC_UIDMAP "/proc/self/uid_map"
91#define PATH_PROC_GIDMAP "/proc/self/gid_map"
92
93struct namespace_descr {
94 int flag;
95 const char nsfile4[4];
96};
97
98struct namespace_ctx {
99 char *path;
100};
101
102enum {
103 OPT_mount = 1 << 0,
104 OPT_uts = 1 << 1,
105 OPT_ipc = 1 << 2,
106 OPT_network = 1 << 3,
107 OPT_pid = 1 << 4,
108 OPT_user = 1 << 5, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
109 OPT_fork = 1 << 6,
110 OPT_map_root = 1 << 7,
111 OPT_mount_proc = 1 << 8,
112 OPT_propagation = 1 << 9,
113 OPT_setgroups = 1 << 10,
114};
115enum {
116 NS_MNT_POS = 0,
117 NS_UTS_POS,
118 NS_IPC_POS,
119 NS_NET_POS,
120 NS_PID_POS,
121 NS_USR_POS, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
122 NS_COUNT,
123};
124static const struct namespace_descr ns_list[] = {
125 { CLONE_NEWNS, "mnt" },
126 { CLONE_NEWUTS, "uts" },
127 { CLONE_NEWIPC, "ipc" },
128 { CLONE_NEWNET, "net" },
129 { CLONE_NEWPID, "pid" },
130 { CLONE_NEWUSER, "user" }, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
131};
132
133/*
134 * Upstream unshare doesn't support short options for --mount-proc,
135 * --propagation, --setgroups.
136 * Optional arguments (namespace mountpoints) exist only for long opts,
137 * we are forced to use "fake" letters for them.
138 * '+': stop at first non-option.
139 */
140static const char opt_str[] ALIGN1 = "+muinpU""fr""\xfd::""\xfe:""\xff:";
141static const char unshare_longopts[] ALIGN1 =
142 "mount\0" Optional_argument "\xf0"
143 "uts\0" Optional_argument "\xf1"
144 "ipc\0" Optional_argument "\xf2"
145 "network\0" Optional_argument "\xf3"
146 "pid\0" Optional_argument "\xf4"
147 "user\0" Optional_argument "\xf5"
148 "fork\0" No_argument "f"
149 "map-root-user\0" No_argument "r"
150 "mount-proc\0" Optional_argument "\xfd"
151 "propagation\0" Required_argument "\xfe"
152 "setgroups\0" Required_argument "\xff"
153;
154
155/* Ugly-looking string reuse trick */
156#define PRIVATE_STR "private\0""unchanged\0""shared\0""slave\0"
157#define PRIVATE_UNCHANGED_SHARED_SLAVE PRIVATE_STR
158
159static unsigned long parse_propagation(const char *prop_str)
160{
161 int i = index_in_strings(PRIVATE_UNCHANGED_SHARED_SLAVE, prop_str);
162 if (i < 0)
163 bb_error_msg_and_die("unrecognized: --%s=%s", "propagation", prop_str);
164 if (i == 0)
165 return MS_REC | MS_PRIVATE;
166 if (i == 1)
167 return 0;
168 if (i == 2)
169 return MS_REC | MS_SHARED;
170 return MS_REC | MS_SLAVE;
171}
172
173static void mount_namespaces(pid_t pid, struct namespace_ctx *ns_ctx_list)
174{
175 const struct namespace_descr *ns;
176 struct namespace_ctx *ns_ctx;
177 int i;
178
179 for (i = 0; i < NS_COUNT; i++) {
180 char nsf[sizeof("/proc/%u/ns/AAAA") + sizeof(int)*3];
181
182 ns = &ns_list[i];
183 ns_ctx = &ns_ctx_list[i];
184 if (!ns_ctx->path)
185 continue;
186 sprintf(nsf, "/proc/%u/ns/%.4s", (unsigned)pid, ns->nsfile4);
187 mount_or_die(nsf, ns_ctx->path, NULL, MS_BIND);
188 }
189}
190
191int unshare_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
192int unshare_main(int argc UNUSED_PARAM, char **argv)
193{
194 int i;
195 unsigned int opts;
196 int unsflags;
197 uintptr_t need_mount;
198 const char *proc_mnt_target;
199 const char *prop_str;
200 const char *setgrp_str;
201 unsigned long prop_flags;
202 uid_t reuid = geteuid();
203 gid_t regid = getegid();
204 struct fd_pair fdp;
205 pid_t child = child; /* for compiler */
206 struct namespace_ctx ns_ctx_list[NS_COUNT];
207
208 memset(ns_ctx_list, 0, sizeof(ns_ctx_list));
209 proc_mnt_target = "/proc";
210 prop_str = PRIVATE_STR;
211 setgrp_str = NULL;
212
213 opt_complementary =
214 "\xf0""m" /* long opts (via their "fake chars") imply short opts */
215 ":\xf1""u"
216 ":\xf2""i"
217 ":\xf3""n"
218 ":\xf4""p"
219 ":\xf5""U"
220 ":ru" /* --map-root-user or -r implies -u */
221 ":\xfd""m" /* --mount-proc implies -m */
222 ;
223 applet_long_options = unshare_longopts;
224 opts = getopt32(argv, opt_str,
225 &proc_mnt_target, &prop_str, &setgrp_str,
226 &ns_ctx_list[NS_MNT_POS].path,
227 &ns_ctx_list[NS_UTS_POS].path,
228 &ns_ctx_list[NS_IPC_POS].path,
229 &ns_ctx_list[NS_NET_POS].path,
230 &ns_ctx_list[NS_PID_POS].path,
231 &ns_ctx_list[NS_USR_POS].path
232 );
233 argv += optind;
234 //bb_error_msg("opts:0x%x", opts);
235 //bb_error_msg("mount:%s", ns_ctx_list[NS_MNT_POS].path);
236 //bb_error_msg("proc_mnt_target:%s", proc_mnt_target);
237 //bb_error_msg("prop_str:%s", prop_str);
238 //bb_error_msg("setgrp_str:%s", setgrp_str);
239 //exit(1);
240
241 if (setgrp_str) {
242 if (strcmp(setgrp_str, "allow") == 0) {
243 if (opts & OPT_map_root) {
244 bb_error_msg_and_die(
245 "--setgroups=allow and --map-root-user "
246 "are mutually exclusive"
247 );
248 }
249 } else {
250 /* It's not "allow", must be "deny" */
251 if (strcmp(setgrp_str, "deny") != 0)
252 bb_error_msg_and_die("unrecognized: --%s=%s",
253 "setgroups", setgrp_str);
254 }
255 }
256
257 unsflags = 0;
258 need_mount = 0;
259 for (i = 0; i < NS_COUNT; i++) {
260 const struct namespace_descr *ns = &ns_list[i];
261 struct namespace_ctx *ns_ctx = &ns_ctx_list[i];
262
263 if (opts & (1 << i))
264 unsflags |= ns->flag;
265
266 need_mount |= (uintptr_t)(ns_ctx->path);
267 }
268 /* need_mount != 0 if at least one FILE was given */
269
270 prop_flags = MS_REC | MS_PRIVATE;
271 /* Silently ignore --propagation if --mount is not requested. */
272 if (opts & OPT_mount)
273 prop_flags = parse_propagation(prop_str);
274
275 /*
276 * Special case: if we were requested to unshare the mount namespace
277 * AND to make any namespace persistent (by bind mounting it) we need
278 * to spawn a child process which will wait for the parent to call
279 * unshare(), then mount parent's namespaces while still in the
280 * previous namespace.
281 */
282 fdp.wr = -1;
283 if (need_mount && (opts & OPT_mount)) {
284 /*
285 * Can't use getppid() in child, as we can be unsharing the
286 * pid namespace.
287 */
288 pid_t ppid = getpid();
289
290 xpiped_pair(fdp);
291
292 child = xfork();
293 if (child == 0) {
294 /* Child */
295 close(fdp.wr);
296
297 /* Wait until parent calls unshare() */
298 read(fdp.rd, ns_ctx_list, 1); /* ...using bogus buffer */
299 /*close(fdp.rd);*/
300
301 /* Mount parent's unshared namespaces. */
302 mount_namespaces(ppid, ns_ctx_list);
303 return EXIT_SUCCESS;
304 }
305 /* Parent continues */
306 }
307
308 if (unshare(unsflags) != 0)
309 bb_perror_msg_and_die("unshare(0x%x)", unsflags);
310
311 if (fdp.wr >= 0) {
312 close(fdp.wr); /* Release child */
313 close(fdp.rd); /* should close fd, to not confuse exec'ed PROG */
314 }
315
316 if (need_mount) {
317 /* Wait for the child to finish mounting the namespaces. */
318 if (opts & OPT_mount) {
319 int exit_status = wait_for_exitstatus(child);
320 if (WIFEXITED(exit_status) &&
321 WEXITSTATUS(exit_status) != EXIT_SUCCESS)
322 return WEXITSTATUS(exit_status);
323 } else {
324 /*
325 * Regular way - we were requested to mount some other
326 * namespaces: mount them after the call to unshare().
327 */
328 mount_namespaces(getpid(), ns_ctx_list);
329 }
330 }
331
332 /*
333 * When we're unsharing the pid namespace, it's not the process that
334 * calls unshare() that is put into the new namespace, but its first
335 * child. The user may want to use this option to spawn a new process
336 * that'll become PID 1 in this new namespace.
337 */
338 if (opts & OPT_fork) {
339 xvfork_parent_waits_and_exits();
340 /* Child continues */
341 }
342
343 if (opts & OPT_map_root) {
344 char uidmap_buf[sizeof("%u 0 1") + sizeof(int)*3];
345
346 /*
347 * Since Linux 3.19 unprivileged writing of /proc/self/gid_map
348 * has been disabled unless /proc/self/setgroups is written
349 * first to permanently disable the ability to call setgroups
350 * in that user namespace.
351 */
352 xopen_xwrite_close(PATH_PROC_SETGROUPS, "deny");
353 sprintf(uidmap_buf, "%u 0 1", (unsigned)reuid);
354 xopen_xwrite_close(PATH_PROC_UIDMAP, uidmap_buf);
355 sprintf(uidmap_buf, "%u 0 1", (unsigned)regid);
356 xopen_xwrite_close(PATH_PROC_GIDMAP, uidmap_buf);
357 } else
358 if (setgrp_str) {
359 /* Write "allow" or "deny" */
360 xopen_xwrite_close(PATH_PROC_SETGROUPS, setgrp_str);
361 }
362
363 if (opts & OPT_mount) {
364 mount_or_die("none", "/", NULL, prop_flags);
365 }
366
367 if (opts & OPT_mount_proc) {
368 /*
369 * When creating a new pid namespace, we might want the pid
370 * subdirectories in /proc to remain consistent with the new
371 * process IDs. Without --mount-proc the pids in /proc would
372 * still reflect the old pid namespace. This is why we make
373 * /proc private here and then do a fresh mount.
374 */
375 mount_or_die("none", proc_mnt_target, NULL, MS_PRIVATE | MS_REC);
376 mount_or_die("proc", proc_mnt_target, "proc", MS_NOSUID | MS_NOEXEC | MS_NODEV);
377 }
378
379 exec_prog_or_SHELL(argv);
380}
Note: See TracBrowser for help on using the repository browser.