source: MondoRescue/branches/3.3/mindi-busybox/archival/libarchive/get_header_tar.c@ 3909

Last change on this file since 3909 was 3621, checked in by Bruno Cornec, 10 years ago

New 3?3 banch for incorporation of latest busybox 1.25. Changing minor version to handle potential incompatibilities.

  • Property svn:eol-style set to native
File size: 14.6 KB
Line 
1/* vi: set sw=4 ts=4: */
2/* Licensed under GPLv2 or later, see file LICENSE in this source tree.
3 *
4 * FIXME:
5 * In privileged mode if uname and gname map to a uid and gid then use the
6 * mapped value instead of the uid/gid values in tar header
7 *
8 * References:
9 * GNU tar and star man pages,
10 * Opengroup's ustar interchange format,
11 * http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
12 */
13
14#include "libbb.h"
15#include "bb_archive.h"
16
17typedef uint32_t aliased_uint32_t FIX_ALIASING;
18typedef off_t aliased_off_t FIX_ALIASING;
19
20/* NB: _DESTROYS_ str[len] character! */
21static unsigned long long getOctal(char *str, int len)
22{
23 unsigned long long v;
24 char *end;
25 /* NB: leading spaces are allowed. Using strtoull to handle that.
26 * The downside is that we accept e.g. "-123" too :(
27 */
28 str[len] = '\0';
29 v = strtoull(str, &end, 8);
30 /* std: "Each numeric field is terminated by one or more
31 * <space> or NUL characters". We must support ' '! */
32 if (*end != '\0' && *end != ' ') {
33 int8_t first = str[0];
34 if (!(first & 0x80))
35 bb_error_msg_and_die("corrupted octal value in tar header");
36 /*
37 * GNU tar uses "base-256 encoding" for very large numbers.
38 * Encoding is binary, with highest bit always set as a marker
39 * and sign in next-highest bit:
40 * 80 00 .. 00 - zero
41 * bf ff .. ff - largest positive number
42 * ff ff .. ff - minus 1
43 * c0 00 .. 00 - smallest negative number
44 *
45 * Example of tar file with 8914993153 (0x213600001) byte file.
46 * Field starts at offset 7c:
47 * 00070 30 30 30 00 30 30 30 30 30 30 30 00 80 00 00 00 |000.0000000.....|
48 * 00080 00 00 00 02 13 60 00 01 31 31 31 32 30 33 33 36 |.....`..11120336|
49 *
50 * NB: tarballs with NEGATIVE unix times encoded that way were seen!
51 */
52 /* Sign-extend 7bit 'first' to 64bit 'v' (that is, using 6th bit as sign): */
53 first <<= 1;
54 first >>= 1; /* now 7th bit = 6th bit */
55 v = first; /* sign-extend 8 bits to 64 */
56 while (--len != 0)
57 v = (v << 8) + (uint8_t) *++str;
58 }
59 return v;
60}
61#define GET_OCTAL(a) getOctal((a), sizeof(a))
62
63#define TAR_EXTD (ENABLE_FEATURE_TAR_GNU_EXTENSIONS || ENABLE_FEATURE_TAR_SELINUX)
64#if !TAR_EXTD
65#define process_pax_hdr(archive_handle, sz, global) \
66 process_pax_hdr(archive_handle, sz)
67#endif
68/* "global" is 0 or 1 */
69static void process_pax_hdr(archive_handle_t *archive_handle, unsigned sz, int global)
70{
71#if !TAR_EXTD
72 unsigned blk_sz = (sz + 511) & (~511);
73 seek_by_read(archive_handle->src_fd, blk_sz);
74#else
75 unsigned blk_sz = (sz + 511) & (~511);
76 char *buf, *p;
77
78 p = buf = xmalloc(blk_sz + 1);
79 xread(archive_handle->src_fd, buf, blk_sz);
80 archive_handle->offset += blk_sz;
81
82 /* prevent bb_strtou from running off the buffer */
83 buf[sz] = '\0';
84
85 while (sz != 0) {
86 char *end, *value;
87 unsigned len;
88
89 /* Every record has this format: "LEN NAME=VALUE\n" */
90 len = bb_strtou(p, &end, 10);
91 /* expect errno to be EINVAL, because the character
92 * following the digits should be a space
93 */
94 p += len;
95 sz -= len;
96 if (
97 /** (int)sz < 0 - not good enough for huge malicious VALUE of 2^32-1 */
98 (int)(sz|len) < 0 /* this works */
99 || len == 0
100 || errno != EINVAL
101 || *end != ' '
102 ) {
103 bb_error_msg("malformed extended header, skipped");
104 // More verbose version:
105 //bb_error_msg("malformed extended header at %"OFF_FMT"d, skipped",
106 // archive_handle->offset - (sz + len));
107 break;
108 }
109 /* overwrite the terminating newline with NUL
110 * (we do not bother to check that it *was* a newline)
111 */
112 p[-1] = '\0';
113 value = end + 1;
114
115# if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
116 if (!global && is_prefixed_with(value, "path=")) {
117 value += sizeof("path=") - 1;
118 free(archive_handle->tar__longname);
119 archive_handle->tar__longname = xstrdup(value);
120 continue;
121 }
122# endif
123
124# if ENABLE_FEATURE_TAR_SELINUX
125 /* Scan for SELinux contexts, via "RHT.security.selinux" keyword.
126 * This is what Red Hat's patched version of tar uses.
127 */
128# define SELINUX_CONTEXT_KEYWORD "RHT.security.selinux"
129 if (is_prefixed_with(value, SELINUX_CONTEXT_KEYWORD"=")) {
130 value += sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1;
131 free(archive_handle->tar__sctx[global]);
132 archive_handle->tar__sctx[global] = xstrdup(value);
133 continue;
134 }
135# endif
136 }
137
138 free(buf);
139#endif
140}
141
142char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
143{
144 file_header_t *file_header = archive_handle->file_header;
145 struct tar_header_t tar;
146 char *cp;
147 int i, sum_u, sum;
148#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
149 int sum_s;
150#endif
151 int parse_names;
152
153 /* Our "private data" */
154#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
155# define p_longname (archive_handle->tar__longname)
156# define p_linkname (archive_handle->tar__linkname)
157#else
158# define p_longname 0
159# define p_linkname 0
160#endif
161
162#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS || ENABLE_FEATURE_TAR_SELINUX
163 again:
164#endif
165 /* Align header */
166 data_align(archive_handle, 512);
167
168 again_after_align:
169
170#if ENABLE_DESKTOP || ENABLE_FEATURE_TAR_AUTODETECT
171 /* to prevent misdetection of bz2 sig */
172 *(aliased_uint32_t*)&tar = 0;
173 i = full_read(archive_handle->src_fd, &tar, 512);
174 /* If GNU tar sees EOF in above read, it says:
175 * "tar: A lone zero block at N", where N = kilobyte
176 * where EOF was met (not EOF block, actual EOF!),
177 * and exits with EXIT_SUCCESS.
178 * We will mimic exit(EXIT_SUCCESS), although we will not mimic
179 * the message and we don't check whether we indeed
180 * saw zero block directly before this. */
181 if (i == 0) {
182 bb_error_msg("short read");
183 /* this merely signals end of archive, not exit(1): */
184 return EXIT_FAILURE;
185 }
186 if (i != 512) {
187 IF_FEATURE_TAR_AUTODETECT(goto autodetect;)
188 bb_error_msg_and_die("short read");
189 }
190
191#else
192 i = 512;
193 xread(archive_handle->src_fd, &tar, i);
194#endif
195 archive_handle->offset += i;
196
197 /* If there is no filename its an empty header */
198 if (tar.name[0] == 0 && tar.prefix[0] == 0) {
199 if (archive_handle->tar__end) {
200 /* Second consecutive empty header - end of archive.
201 * Read until the end to empty the pipe from gz or bz2
202 */
203 while (full_read(archive_handle->src_fd, &tar, 512) == 512)
204 continue;
205 return EXIT_FAILURE; /* "end of archive" */
206 }
207 archive_handle->tar__end = 1;
208 return EXIT_SUCCESS; /* "decoded one header" */
209 }
210 archive_handle->tar__end = 0;
211
212 /* Check header has valid magic, "ustar" is for the proper tar,
213 * five NULs are for the old tar format */
214 if (!is_prefixed_with(tar.magic, "ustar")
215 && (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY
216 || memcmp(tar.magic, "\0\0\0\0", 5) != 0)
217 ) {
218#if ENABLE_FEATURE_TAR_AUTODETECT
219 autodetect:
220 /* Two different causes for lseek() != 0:
221 * unseekable fd (would like to support that too, but...),
222 * or not first block (false positive, it's not .gz/.bz2!) */
223 if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0)
224 goto err;
225 if (setup_unzip_on_fd(archive_handle->src_fd, /*fail_if_not_compressed:*/ 0) != 0)
226 err:
227 bb_error_msg_and_die("invalid tar magic");
228 archive_handle->offset = 0;
229 goto again_after_align;
230#endif
231 bb_error_msg_and_die("invalid tar magic");
232 }
233
234 /* Do checksum on headers.
235 * POSIX says that checksum is done on unsigned bytes, but
236 * Sun and HP-UX gets it wrong... more details in
237 * GNU tar source. */
238#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
239 sum_s = ' ' * sizeof(tar.chksum);
240#endif
241 sum_u = ' ' * sizeof(tar.chksum);
242 for (i = 0; i < 148; i++) {
243 sum_u += ((unsigned char*)&tar)[i];
244#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
245 sum_s += ((signed char*)&tar)[i];
246#endif
247 }
248 for (i = 156; i < 512; i++) {
249 sum_u += ((unsigned char*)&tar)[i];
250#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
251 sum_s += ((signed char*)&tar)[i];
252#endif
253 }
254 /* This field does not need special treatment (getOctal) */
255 {
256 char *endp; /* gcc likes temp var for &endp */
257 sum = strtoul(tar.chksum, &endp, 8);
258 if ((*endp != '\0' && *endp != ' ')
259 || (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum))
260 ) {
261 bb_error_msg_and_die("invalid tar header checksum");
262 }
263 }
264 /* don't use xstrtoul, tar.chksum may have leading spaces */
265 sum = strtoul(tar.chksum, NULL, 8);
266 if (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) {
267 bb_error_msg_and_die("invalid tar header checksum");
268 }
269
270 /* 0 is reserved for high perf file, treat as normal file */
271 if (!tar.typeflag) tar.typeflag = '0';
272 parse_names = (tar.typeflag >= '0' && tar.typeflag <= '7');
273
274 /* getOctal trashes subsequent field, therefore we call it
275 * on fields in reverse order */
276 if (tar.devmajor[0]) {
277 char t = tar.prefix[0];
278 /* we trash prefix[0] here, but we DO need it later! */
279 unsigned minor = GET_OCTAL(tar.devminor);
280 unsigned major = GET_OCTAL(tar.devmajor);
281 file_header->device = makedev(major, minor);
282 tar.prefix[0] = t;
283 }
284 file_header->link_target = NULL;
285 if (!p_linkname && parse_names && tar.linkname[0]) {
286 file_header->link_target = xstrndup(tar.linkname, sizeof(tar.linkname));
287 /* FIXME: what if we have non-link object with link_target? */
288 /* Will link_target be free()ed? */
289 }
290#if ENABLE_FEATURE_TAR_UNAME_GNAME
291 file_header->tar__uname = tar.uname[0] ? xstrndup(tar.uname, sizeof(tar.uname)) : NULL;
292 file_header->tar__gname = tar.gname[0] ? xstrndup(tar.gname, sizeof(tar.gname)) : NULL;
293#endif
294 file_header->mtime = GET_OCTAL(tar.mtime);
295 file_header->size = GET_OCTAL(tar.size);
296 file_header->gid = GET_OCTAL(tar.gid);
297 file_header->uid = GET_OCTAL(tar.uid);
298 /* Set bits 0-11 of the files mode */
299 file_header->mode = 07777 & GET_OCTAL(tar.mode);
300
301 file_header->name = NULL;
302 if (!p_longname && parse_names) {
303 /* we trash mode[0] here, it's ok */
304 //tar.name[sizeof(tar.name)] = '\0'; - gcc 4.3.0 would complain
305 tar.mode[0] = '\0';
306 if (tar.prefix[0]) {
307 /* and padding[0] */
308 //tar.prefix[sizeof(tar.prefix)] = '\0'; - gcc 4.3.0 would complain
309 tar.padding[0] = '\0';
310 file_header->name = concat_path_file(tar.prefix, tar.name);
311 } else
312 file_header->name = xstrdup(tar.name);
313 }
314
315 /* Set bits 12-15 of the files mode */
316 /* (typeflag was not trashed because chksum does not use getOctal) */
317 switch (tar.typeflag) {
318 case '1': /* hardlink */
319 /* we mark hardlinks as regular files with zero size and a link name */
320 file_header->mode |= S_IFREG;
321 /* on size of link fields from star(4)
322 * ... For tar archives written by pre POSIX.1-1988
323 * implementations, the size field usually contains the size of
324 * the file and needs to be ignored as no data may follow this
325 * header type. For POSIX.1- 1988 compliant archives, the size
326 * field needs to be 0. For POSIX.1-2001 compliant archives,
327 * the size field may be non zero, indicating that file data is
328 * included in the archive.
329 * i.e; always assume this is zero for safety.
330 */
331 goto size0;
332 case '7':
333 /* case 0: */
334 case '0':
335#if ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY
336 if (last_char_is(file_header->name, '/')) {
337 goto set_dir;
338 }
339#endif
340 file_header->mode |= S_IFREG;
341 break;
342 case '2':
343 file_header->mode |= S_IFLNK;
344 /* have seen tarballs with size field containing
345 * the size of the link target's name */
346 size0:
347 file_header->size = 0;
348 break;
349 case '3':
350 file_header->mode |= S_IFCHR;
351 goto size0; /* paranoia */
352 case '4':
353 file_header->mode |= S_IFBLK;
354 goto size0;
355 case '5':
356 IF_FEATURE_TAR_OLDGNU_COMPATIBILITY(set_dir:)
357 file_header->mode |= S_IFDIR;
358 goto size0;
359 case '6':
360 file_header->mode |= S_IFIFO;
361 goto size0;
362 case 'g': /* pax global header */
363 case 'x': { /* pax extended header */
364 if ((uoff_t)file_header->size > 0xfffff) /* paranoia */
365 goto skip_ext_hdr;
366 process_pax_hdr(archive_handle, file_header->size, (tar.typeflag == 'g'));
367 goto again_after_align;
368#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
369/* See http://www.gnu.org/software/tar/manual/html_node/Extensions.html */
370 case 'L':
371 /* free: paranoia: tar with several consecutive longnames */
372 free(p_longname);
373 /* For paranoia reasons we allocate extra NUL char */
374 p_longname = xzalloc(file_header->size + 1);
375 /* We read ASCIZ string, including NUL */
376 xread(archive_handle->src_fd, p_longname, file_header->size);
377 archive_handle->offset += file_header->size;
378 /* return get_header_tar(archive_handle); */
379 /* gcc 4.1.1 didn't optimize it into jump */
380 /* so we will do it ourself, this also saves stack */
381 goto again;
382 case 'K':
383 free(p_linkname);
384 p_linkname = xzalloc(file_header->size + 1);
385 xread(archive_handle->src_fd, p_linkname, file_header->size);
386 archive_handle->offset += file_header->size;
387 /* return get_header_tar(archive_handle); */
388 goto again;
389/*
390 * case 'S': // Sparse file
391 * Was seen in the wild. Not supported (yet?).
392 * See https://www.gnu.org/software/tar/manual/html_section/tar_92.html
393 * for the format. (An "Old GNU Format" was seen, not PAX formats).
394 */
395// case 'D': /* GNU dump dir */
396// case 'M': /* Continuation of multi volume archive */
397// case 'N': /* Old GNU for names > 100 characters */
398// case 'V': /* Volume header */
399#endif
400 }
401 skip_ext_hdr:
402 {
403 off_t sz;
404 bb_error_msg("warning: skipping header '%c'", tar.typeflag);
405 sz = (file_header->size + 511) & ~(off_t)511;
406 archive_handle->offset += sz;
407 sz >>= 9; /* sz /= 512 but w/o contortions for signed div */
408 while (sz--)
409 xread(archive_handle->src_fd, &tar, 512);
410 /* return get_header_tar(archive_handle); */
411 goto again_after_align;
412 }
413 default:
414 bb_error_msg_and_die("unknown typeflag: 0x%x", tar.typeflag);
415 }
416
417#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
418 if (p_longname) {
419 file_header->name = p_longname;
420 p_longname = NULL;
421 }
422 if (p_linkname) {
423 file_header->link_target = p_linkname;
424 p_linkname = NULL;
425 }
426#endif
427
428 /* Everything up to and including last ".." component is stripped */
429 overlapping_strcpy(file_header->name, strip_unsafe_prefix(file_header->name));
430//TODO: do the same for file_header->link_target?
431
432 /* Strip trailing '/' in directories */
433 /* Must be done after mode is set as '/' is used to check if it's a directory */
434 cp = last_char_is(file_header->name, '/');
435
436 if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) {
437 archive_handle->action_header(/*archive_handle->*/ file_header);
438 /* Note that we kill the '/' only after action_header() */
439 /* (like GNU tar 1.15.1: verbose mode outputs "dir/dir/") */
440 if (cp)
441 *cp = '\0';
442 archive_handle->action_data(archive_handle);
443 if (archive_handle->accept || archive_handle->reject
444 || (archive_handle->ah_flags & ARCHIVE_REMEMBER_NAMES)
445 ) {
446 llist_add_to(&archive_handle->passed, file_header->name);
447 } else /* Caller isn't interested in list of unpacked files */
448 free(file_header->name);
449 } else {
450 data_skip(archive_handle);
451 free(file_header->name);
452 }
453 archive_handle->offset += file_header->size;
454
455 free(file_header->link_target);
456 /* Do not free(file_header->name)!
457 * It might be inserted in archive_handle->passed - see above */
458#if ENABLE_FEATURE_TAR_UNAME_GNAME
459 free(file_header->tar__uname);
460 free(file_header->tar__gname);
461#endif
462 return EXIT_SUCCESS; /* "decoded one header" */
463}
Note: See TracBrowser for help on using the repository browser.