1 | /* vi: set sw=4 ts=4: */
|
---|
2 | /* Copyright 2005 Rob Landley <rob@landley.net>
|
---|
3 | *
|
---|
4 | * Switch from rootfs to another filesystem as the root of the mount tree.
|
---|
5 | *
|
---|
6 | * Licensed under GPLv2, see file LICENSE in this source tree.
|
---|
7 | */
|
---|
8 | #include <sys/vfs.h>
|
---|
9 | #include <sys/mount.h>
|
---|
10 | #include "libbb.h"
|
---|
11 | // Make up for header deficiencies
|
---|
12 | #ifndef RAMFS_MAGIC
|
---|
13 | # define RAMFS_MAGIC ((unsigned)0x858458f6)
|
---|
14 | #endif
|
---|
15 | #ifndef TMPFS_MAGIC
|
---|
16 | # define TMPFS_MAGIC ((unsigned)0x01021994)
|
---|
17 | #endif
|
---|
18 | #ifndef MS_MOVE
|
---|
19 | # define MS_MOVE 8192
|
---|
20 | #endif
|
---|
21 |
|
---|
22 | // Recursively delete contents of rootfs
|
---|
23 | static void delete_contents(const char *directory, dev_t rootdev)
|
---|
24 | {
|
---|
25 | DIR *dir;
|
---|
26 | struct dirent *d;
|
---|
27 | struct stat st;
|
---|
28 |
|
---|
29 | // Don't descend into other filesystems
|
---|
30 | if (lstat(directory, &st) || st.st_dev != rootdev)
|
---|
31 | return;
|
---|
32 |
|
---|
33 | // Recursively delete the contents of directories
|
---|
34 | if (S_ISDIR(st.st_mode)) {
|
---|
35 | dir = opendir(directory);
|
---|
36 | if (dir) {
|
---|
37 | while ((d = readdir(dir))) {
|
---|
38 | char *newdir = d->d_name;
|
---|
39 |
|
---|
40 | // Skip . and ..
|
---|
41 | if (DOT_OR_DOTDOT(newdir))
|
---|
42 | continue;
|
---|
43 |
|
---|
44 | // Recurse to delete contents
|
---|
45 | newdir = concat_path_file(directory, newdir);
|
---|
46 | delete_contents(newdir, rootdev);
|
---|
47 | free(newdir);
|
---|
48 | }
|
---|
49 | closedir(dir);
|
---|
50 |
|
---|
51 | // Directory should now be empty, zap it
|
---|
52 | rmdir(directory);
|
---|
53 | }
|
---|
54 | } else {
|
---|
55 | // It wasn't a directory, zap it
|
---|
56 | unlink(directory);
|
---|
57 | }
|
---|
58 | }
|
---|
59 |
|
---|
60 | int switch_root_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
|
---|
61 | int switch_root_main(int argc UNUSED_PARAM, char **argv)
|
---|
62 | {
|
---|
63 | char *newroot, *console = NULL;
|
---|
64 | struct stat st;
|
---|
65 | struct statfs stfs;
|
---|
66 | dev_t rootdev;
|
---|
67 |
|
---|
68 | // Parse args (-c console)
|
---|
69 | opt_complementary = "-2"; // minimum 2 params
|
---|
70 | getopt32(argv, "+c:", &console); // '+': stop at first non-option
|
---|
71 | argv += optind;
|
---|
72 | newroot = *argv++;
|
---|
73 |
|
---|
74 | // Change to new root directory and verify it's a different fs
|
---|
75 | xchdir(newroot);
|
---|
76 | xstat("/", &st);
|
---|
77 | rootdev = st.st_dev;
|
---|
78 | xstat(".", &st);
|
---|
79 | if (st.st_dev == rootdev || getpid() != 1) {
|
---|
80 | // Show usage, it says new root must be a mountpoint
|
---|
81 | // and we must be PID 1
|
---|
82 | bb_show_usage();
|
---|
83 | }
|
---|
84 |
|
---|
85 | // Additional sanity checks: we're about to rm -rf /, so be REALLY SURE
|
---|
86 | // we mean it. I could make this a CONFIG option, but I would get email
|
---|
87 | // from all the people who WILL destroy their filesystems.
|
---|
88 | if (stat("/init", &st) != 0 || !S_ISREG(st.st_mode)) {
|
---|
89 | bb_error_msg_and_die("/init is not a regular file");
|
---|
90 | }
|
---|
91 | statfs("/", &stfs); // this never fails
|
---|
92 | if ((unsigned)stfs.f_type != RAMFS_MAGIC
|
---|
93 | && (unsigned)stfs.f_type != TMPFS_MAGIC
|
---|
94 | ) {
|
---|
95 | bb_error_msg_and_die("root filesystem is not ramfs/tmpfs");
|
---|
96 | }
|
---|
97 |
|
---|
98 | // Zap everything out of rootdev
|
---|
99 | delete_contents("/", rootdev);
|
---|
100 |
|
---|
101 | // Overmount / with newdir and chroot into it
|
---|
102 | if (mount(".", "/", NULL, MS_MOVE, NULL)) {
|
---|
103 | // For example, fails when newroot is not a mountpoint
|
---|
104 | bb_perror_msg_and_die("error moving root");
|
---|
105 | }
|
---|
106 | xchroot(".");
|
---|
107 | // The chdir is needed to recalculate "." and ".." links
|
---|
108 | xchdir("/");
|
---|
109 |
|
---|
110 | // If a new console specified, redirect stdin/stdout/stderr to it
|
---|
111 | if (console) {
|
---|
112 | close(0);
|
---|
113 | xopen(console, O_RDWR);
|
---|
114 | xdup2(0, 1);
|
---|
115 | xdup2(0, 2);
|
---|
116 | }
|
---|
117 |
|
---|
118 | // Exec real init
|
---|
119 | execv(argv[0], argv);
|
---|
120 | bb_perror_msg_and_die("can't execute '%s'", argv[0]);
|
---|
121 | }
|
---|
122 |
|
---|
123 | /*
|
---|
124 | From: Rob Landley <rob@landley.net>
|
---|
125 | Date: Tue, Jun 16, 2009 at 7:47 PM
|
---|
126 | Subject: Re: switch_root...
|
---|
127 |
|
---|
128 | ...
|
---|
129 | ...
|
---|
130 | ...
|
---|
131 |
|
---|
132 | If you're _not_ running out of init_ramfs (if for example you're using initrd
|
---|
133 | instead), you probably shouldn't use switch_root because it's the wrong tool.
|
---|
134 |
|
---|
135 | Basically what the sucker does is something like the following shell script:
|
---|
136 |
|
---|
137 | find / -xdev | xargs rm -rf
|
---|
138 | cd "$1"
|
---|
139 | shift
|
---|
140 | mount --move . /
|
---|
141 | exec chroot . "$@"
|
---|
142 |
|
---|
143 | There are a couple reasons that won't work as a shell script:
|
---|
144 |
|
---|
145 | 1) If you delete the commands out of your $PATH, your shell scripts can't run
|
---|
146 | more commands, but you can't start using dynamically linked _new_ commands
|
---|
147 | until after you do the chroot because the path to the dynamic linker is wrong.
|
---|
148 | So there's a step that needs to be sort of atomic but can't be as a shell
|
---|
149 | script. (You can work around this with static linking or very carefully laid
|
---|
150 | out paths and sequencing, but it's brittle, ugly, and non-obvious.)
|
---|
151 |
|
---|
152 | 2) The "find | rm" bit will acually delete everything because the mount points
|
---|
153 | still show up (even if their contents don't), and rm -rf will then happily zap
|
---|
154 | that. So the first line is an oversimplification of what you need to do _not_
|
---|
155 | to descend into other filesystems and delete their contents.
|
---|
156 |
|
---|
157 | The reason we do this is to free up memory, by the way. Since initramfs is a
|
---|
158 | ramfs, deleting its contents frees up the memory it uses. (We leave it with
|
---|
159 | one remaining dentry for the new mount point, but that's ok.)
|
---|
160 |
|
---|
161 | Note that you cannot ever umount rootfs, for approximately the same reason you
|
---|
162 | can't kill PID 1. The kernel tracks mount points as a doubly linked list, and
|
---|
163 | the pointer to the start/end of that list always points to an entry that's
|
---|
164 | known to be there (rootfs), so it never has to worry about moving that pointer
|
---|
165 | and it never has to worry about the list being empty. (Back around 2.6.13
|
---|
166 | there _was_ a bug that let you umount rootfs, and the system locked hard the
|
---|
167 | instant you did so endlessly looping to find the end of the mount list and
|
---|
168 | never stopping. They fixed it.)
|
---|
169 |
|
---|
170 | Oh, and the reason we mount --move _and_ do the chroot is due to the way "/"
|
---|
171 | works. Each process has two special symlinks, ".", and "/". Each of them
|
---|
172 | points to the dentry of a directory, and give you a location paths can start
|
---|
173 | from. (Historically ".." was also special, because you could enter a
|
---|
174 | directory via a symlink so backing out to the directory you came from doesn't
|
---|
175 | necessarily mean the one physically above where "." points to. These days I
|
---|
176 | think it's just handed off to the filesystem.)
|
---|
177 |
|
---|
178 | Anyway, path resolution starts with "." or "/" (although the "./" at the start
|
---|
179 | of the path may be implicit), meaning it's relative to one of those two
|
---|
180 | directories. Your current directory, and your current root directory. The
|
---|
181 | chdir() syscall changes where "." points to, and the chroot() syscall changes
|
---|
182 | where "/" points to. (Again, both are per-process which is why chroot only
|
---|
183 | affects your current process and its child processes.)
|
---|
184 |
|
---|
185 | Note that chroot() does _not_ change where "." points to, and back before they
|
---|
186 | put crazy security checks into the kernel your current directory could be
|
---|
187 | somewhere you could no longer access after the chroot. (The command line
|
---|
188 | chroot does a cd as well, the chroot _syscall_ is what I'm talking about.)
|
---|
189 |
|
---|
190 | The reason mounting something new over / has no obvious effect is the same
|
---|
191 | reason mounting something over your current directory has no obvious effect:
|
---|
192 | the . and / links aren't recalculated after a mount, so they still point to
|
---|
193 | the same dentry they did before, even if that dentry is no longer accessible
|
---|
194 | by other means. Note that "cd ." is a NOP, and "chroot /" is a nop; both look
|
---|
195 | up the cached dentry and set it right back. They don't re-parse any paths,
|
---|
196 | because they're what all paths your process uses would be relative to.
|
---|
197 |
|
---|
198 | That's why the careful sequencing above: we cd into the new mount point before
|
---|
199 | we do the mount --move. Moving the mount point would otherwise make it
|
---|
200 | totally inaccessible to is because cd-ing to the old path wouldn't give it to
|
---|
201 | us anymore, and cd "/" just gives us the cached dentry from when the process
|
---|
202 | was created (in this case the old initramfs one). But the "." symlink gives
|
---|
203 | us the dentry of the filesystem we just moved, so we can then "chroot ." to
|
---|
204 | copy that dentry to "/" and get the new filesystem. If we _didn't_ save that
|
---|
205 | dentry in "." we couldn't get it back after the mount --move.
|
---|
206 |
|
---|
207 | (Yes, this is all screwy and I had to email questions to Linus Torvalds to get
|
---|
208 | it straight myself. I keep meaning to write up a "how mount actually works"
|
---|
209 | document someday...)
|
---|
210 | */
|
---|