source: MondoRescue/branches/3.0/mindi-busybox/coreutils/od_bloaty.c@ 3085

Last change on this file since 3085 was 2725, checked in by Bruno Cornec, 13 years ago
  • Update mindi-busybox to 1.18.3 to avoid problems with the tar command which is now failing on recent versions with busybox 1.7.3
  • Property svn:eol-style set to native
File size: 37.1 KB
Line 
1/* od -- dump files in octal and other formats
2 Copyright (C) 92, 1995-2004 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
17
18/* Written by Jim Meyering. */
19
20/* Busyboxed by Denys Vlasenko
21
22Based on od.c from coreutils-5.2.1
23Top bloat sources:
24
2500000073 t parse_old_offset
260000007b t get_lcm
2700000090 r long_options
2800000092 t print_named_ascii
29000000bf t print_ascii
3000000168 t write_block
3100000366 t decode_format_string
3200000a71 T od_main
33
34Tested for compat with coreutils 6.3
35using this script. Minor differences fixed.
36
37#!/bin/sh
38echo STD
39time /path/to/coreutils/od \
40...params... \
41>std
42echo Exit code $?
43echo BBOX
44time ./busybox od \
45...params... \
46>bbox
47echo Exit code $?
48diff -u -a std bbox >bbox.diff || { echo Different!; sleep 1; }
49
50*/
51
52#include "libbb.h"
53
54#define assert(a) ((void)0)
55
56/* Check for 0x7f is a coreutils 6.3 addition */
57#define ISPRINT(c) (((c)>=' ') && (c) != 0x7f)
58
59typedef long double longdouble_t;
60typedef unsigned long long ulonglong_t;
61typedef long long llong;
62
63#if ENABLE_LFS
64# define xstrtooff_sfx xstrtoull_sfx
65#else
66# define xstrtooff_sfx xstrtoul_sfx
67#endif
68
69/* The default number of input bytes per output line. */
70#define DEFAULT_BYTES_PER_BLOCK 16
71
72/* The number of decimal digits of precision in a float. */
73#ifndef FLT_DIG
74# define FLT_DIG 7
75#endif
76
77/* The number of decimal digits of precision in a double. */
78#ifndef DBL_DIG
79# define DBL_DIG 15
80#endif
81
82/* The number of decimal digits of precision in a long double. */
83#ifndef LDBL_DIG
84# define LDBL_DIG DBL_DIG
85#endif
86
87enum size_spec {
88 NO_SIZE,
89 CHAR,
90 SHORT,
91 INT,
92 LONG,
93 LONG_LONG,
94 FLOAT_SINGLE,
95 FLOAT_DOUBLE,
96 FLOAT_LONG_DOUBLE,
97 N_SIZE_SPECS
98};
99
100enum output_format {
101 SIGNED_DECIMAL,
102 UNSIGNED_DECIMAL,
103 OCTAL,
104 HEXADECIMAL,
105 FLOATING_POINT,
106 NAMED_CHARACTER,
107 CHARACTER
108};
109
110/* Each output format specification (from '-t spec' or from
111 old-style options) is represented by one of these structures. */
112struct tspec {
113 enum output_format fmt;
114 enum size_spec size;
115 void (*print_function) (size_t, const char *, const char *);
116 char *fmt_string;
117 int hexl_mode_trailer;
118 int field_width;
119};
120
121/* Convert the number of 8-bit bytes of a binary representation to
122 the number of characters (digits + sign if the type is signed)
123 required to represent the same quantity in the specified base/type.
124 For example, a 32-bit (4-byte) quantity may require a field width
125 as wide as the following for these types:
126 11 unsigned octal
127 11 signed decimal
128 10 unsigned decimal
129 8 unsigned hexadecimal */
130
131static const uint8_t bytes_to_oct_digits[] ALIGN1 =
132{0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
133
134static const uint8_t bytes_to_signed_dec_digits[] ALIGN1 =
135{1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40};
136
137static const uint8_t bytes_to_unsigned_dec_digits[] ALIGN1 =
138{0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39};
139
140static const uint8_t bytes_to_hex_digits[] ALIGN1 =
141{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
142
143/* Convert enum size_spec to the size of the named type. */
144static const signed char width_bytes[] ALIGN1 = {
145 -1,
146 sizeof(char),
147 sizeof(short),
148 sizeof(int),
149 sizeof(long),
150 sizeof(ulonglong_t),
151 sizeof(float),
152 sizeof(double),
153 sizeof(longdouble_t)
154};
155/* Ensure that for each member of 'enum size_spec' there is an
156 initializer in the width_bytes array. */
157struct ERR_width_bytes_has_bad_size {
158 char ERR_width_bytes_has_bad_size[ARRAY_SIZE(width_bytes) == N_SIZE_SPECS ? 1 : -1];
159};
160
161static smallint flag_dump_strings;
162/* Non-zero if an old-style 'pseudo-address' was specified. */
163static smallint flag_pseudo_start;
164static smallint limit_bytes_to_format;
165/* When zero and two or more consecutive blocks are equal, format
166 only the first block and output an asterisk alone on the following
167 line to indicate that identical blocks have been elided. */
168static smallint verbose;
169static smallint ioerror;
170
171static size_t string_min;
172
173/* An array of specs describing how to format each input block. */
174static size_t n_specs;
175static struct tspec *spec;
176
177/* Function that accepts an address and an optional following char,
178 and prints the address and char to stdout. */
179static void (*format_address)(off_t, char);
180/* The difference between the old-style pseudo starting address and
181 the number of bytes to skip. */
182static off_t pseudo_offset;
183/* When zero, MAX_BYTES_TO_FORMAT and END_OFFSET are ignored, and all
184 input is formatted. */
185
186/* The number of input bytes formatted per output line. It must be
187 a multiple of the least common multiple of the sizes associated with
188 the specified output types. It should be as large as possible, but
189 no larger than 16 -- unless specified with the -w option. */
190static unsigned bytes_per_block = 32; /* have to use unsigned, not size_t */
191
192/* A NULL-terminated list of the file-arguments from the command line. */
193static const char *const *file_list;
194
195/* The input stream associated with the current file. */
196static FILE *in_stream;
197
198#define MAX_INTEGRAL_TYPE_SIZE sizeof(ulonglong_t)
199static const unsigned char integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1] ALIGN1 = {
200 [sizeof(char)] = CHAR,
201#if USHRT_MAX != UCHAR_MAX
202 [sizeof(short)] = SHORT,
203#endif
204#if UINT_MAX != USHRT_MAX
205 [sizeof(int)] = INT,
206#endif
207#if ULONG_MAX != UINT_MAX
208 [sizeof(long)] = LONG,
209#endif
210#if ULLONG_MAX != ULONG_MAX
211 [sizeof(ulonglong_t)] = LONG_LONG,
212#endif
213};
214
215#define MAX_FP_TYPE_SIZE sizeof(longdouble_t)
216static const unsigned char fp_type_size[MAX_FP_TYPE_SIZE + 1] ALIGN1 = {
217 /* gcc seems to allow repeated indexes. Last one stays */
218 [sizeof(longdouble_t)] = FLOAT_LONG_DOUBLE,
219 [sizeof(double)] = FLOAT_DOUBLE,
220 [sizeof(float)] = FLOAT_SINGLE
221};
222
223
224static unsigned
225gcd(unsigned u, unsigned v)
226{
227 unsigned t;
228 while (v != 0) {
229 t = u % v;
230 u = v;
231 v = t;
232 }
233 return u;
234}
235
236/* Compute the least common multiple of U and V. */
237static unsigned
238lcm(unsigned u, unsigned v) {
239 unsigned t = gcd(u, v);
240 if (t == 0)
241 return 0;
242 return u * v / t;
243}
244
245static void
246print_s_char(size_t n_bytes, const char *block, const char *fmt_string)
247{
248 while (n_bytes--) {
249 int tmp = *(signed char *) block;
250 printf(fmt_string, tmp);
251 block += sizeof(unsigned char);
252 }
253}
254
255static void
256print_char(size_t n_bytes, const char *block, const char *fmt_string)
257{
258 while (n_bytes--) {
259 unsigned tmp = *(unsigned char *) block;
260 printf(fmt_string, tmp);
261 block += sizeof(unsigned char);
262 }
263}
264
265static void
266print_s_short(size_t n_bytes, const char *block, const char *fmt_string)
267{
268 n_bytes /= sizeof(signed short);
269 while (n_bytes--) {
270 int tmp = *(signed short *) block;
271 printf(fmt_string, tmp);
272 block += sizeof(unsigned short);
273 }
274}
275
276static void
277print_short(size_t n_bytes, const char *block, const char *fmt_string)
278{
279 n_bytes /= sizeof(unsigned short);
280 while (n_bytes--) {
281 unsigned tmp = *(unsigned short *) block;
282 printf(fmt_string, tmp);
283 block += sizeof(unsigned short);
284 }
285}
286
287static void
288print_int(size_t n_bytes, const char *block, const char *fmt_string)
289{
290 n_bytes /= sizeof(unsigned);
291 while (n_bytes--) {
292 unsigned tmp = *(unsigned *) block;
293 printf(fmt_string, tmp);
294 block += sizeof(unsigned);
295 }
296}
297
298#if UINT_MAX == ULONG_MAX
299# define print_long print_int
300#else
301static void
302print_long(size_t n_bytes, const char *block, const char *fmt_string)
303{
304 n_bytes /= sizeof(unsigned long);
305 while (n_bytes--) {
306 unsigned long tmp = *(unsigned long *) block;
307 printf(fmt_string, tmp);
308 block += sizeof(unsigned long);
309 }
310}
311#endif
312
313#if ULONG_MAX == ULLONG_MAX
314# define print_long_long print_long
315#else
316static void
317print_long_long(size_t n_bytes, const char *block, const char *fmt_string)
318{
319 n_bytes /= sizeof(ulonglong_t);
320 while (n_bytes--) {
321 ulonglong_t tmp = *(ulonglong_t *) block;
322 printf(fmt_string, tmp);
323 block += sizeof(ulonglong_t);
324 }
325}
326#endif
327
328static void
329print_float(size_t n_bytes, const char *block, const char *fmt_string)
330{
331 n_bytes /= sizeof(float);
332 while (n_bytes--) {
333 float tmp = *(float *) block;
334 printf(fmt_string, tmp);
335 block += sizeof(float);
336 }
337}
338
339static void
340print_double(size_t n_bytes, const char *block, const char *fmt_string)
341{
342 n_bytes /= sizeof(double);
343 while (n_bytes--) {
344 double tmp = *(double *) block;
345 printf(fmt_string, tmp);
346 block += sizeof(double);
347 }
348}
349
350static void
351print_long_double(size_t n_bytes, const char *block, const char *fmt_string)
352{
353 n_bytes /= sizeof(longdouble_t);
354 while (n_bytes--) {
355 longdouble_t tmp = *(longdouble_t *) block;
356 printf(fmt_string, tmp);
357 block += sizeof(longdouble_t);
358 }
359}
360
361/* print_[named]_ascii are optimized for speed.
362 * Remember, someday you may want to pump gigabytes through this thing.
363 * Saving a dozen of .text bytes here is counter-productive */
364
365static void
366print_named_ascii(size_t n_bytes, const char *block,
367 const char *unused_fmt_string UNUSED_PARAM)
368{
369 /* Names for some non-printing characters. */
370 static const char charname[33][3] ALIGN1 = {
371 "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
372 " bs", " ht", " nl", " vt", " ff", " cr", " so", " si",
373 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
374 "can", " em", "sub", "esc", " fs", " gs", " rs", " us",
375 " sp"
376 };
377 // buf[N] pos: 01234 56789
378 char buf[12] = " x\0 0xx\0";
379 // actually " x\0 xxx\0", but I want to share the string with below.
380 // [12] because we take three 32bit stack slots anyway, and
381 // gcc is too dumb to initialize with constant stores,
382 // it copies initializer from rodata. Oh well.
383
384 while (n_bytes--) {
385 unsigned masked_c = *(unsigned char *) block++;
386
387 masked_c &= 0x7f;
388 if (masked_c == 0x7f) {
389 fputs(" del", stdout);
390 continue;
391 }
392 if (masked_c > ' ') {
393 buf[3] = masked_c;
394 fputs(buf, stdout);
395 continue;
396 }
397 /* Why? Because printf(" %3.3s") is much slower... */
398 buf[6] = charname[masked_c][0];
399 buf[7] = charname[masked_c][1];
400 buf[8] = charname[masked_c][2];
401 fputs(buf+5, stdout);
402 }
403}
404
405static void
406print_ascii(size_t n_bytes, const char *block,
407 const char *unused_fmt_string UNUSED_PARAM)
408{
409 // buf[N] pos: 01234 56789
410 char buf[12] = " x\0 0xx\0";
411
412 while (n_bytes--) {
413 const char *s;
414 unsigned c = *(unsigned char *) block++;
415
416 if (ISPRINT(c)) {
417 buf[3] = c;
418 fputs(buf, stdout);
419 continue;
420 }
421 switch (c) {
422 case '\0':
423 s = " \\0";
424 break;
425 case '\007':
426 s = " \\a";
427 break;
428 case '\b':
429 s = " \\b";
430 break;
431 case '\f':
432 s = " \\f";
433 break;
434 case '\n':
435 s = " \\n";
436 break;
437 case '\r':
438 s = " \\r";
439 break;
440 case '\t':
441 s = " \\t";
442 break;
443 case '\v':
444 s = " \\v";
445 break;
446 case '\x7f':
447 s = " 177";
448 break;
449 default: /* c is never larger than 040 */
450 buf[7] = (c >> 3) + '0';
451 buf[8] = (c & 7) + '0';
452 s = buf + 5;
453 }
454 fputs(s, stdout);
455 }
456}
457
458/* Given a list of one or more input filenames FILE_LIST, set the global
459 file pointer IN_STREAM and the global string INPUT_FILENAME to the
460 first one that can be successfully opened. Modify FILE_LIST to
461 reference the next filename in the list. A file name of "-" is
462 interpreted as standard input. If any file open fails, give an error
463 message and return nonzero. */
464
465static void
466open_next_file(void)
467{
468 while (1) {
469 if (!*file_list)
470 return;
471 in_stream = fopen_or_warn_stdin(*file_list++);
472 if (in_stream) {
473 break;
474 }
475 ioerror = 1;
476 }
477
478 if (limit_bytes_to_format && !flag_dump_strings)
479 setbuf(in_stream, NULL);
480}
481
482/* Test whether there have been errors on in_stream, and close it if
483 it is not standard input. Return nonzero if there has been an error
484 on in_stream or stdout; return zero otherwise. This function will
485 report more than one error only if both a read and a write error
486 have occurred. IN_ERRNO, if nonzero, is the error number
487 corresponding to the most recent action for IN_STREAM. */
488
489static void
490check_and_close(void)
491{
492 if (in_stream) {
493 if (ferror(in_stream)) {
494 bb_error_msg("%s: read error", (in_stream == stdin)
495 ? bb_msg_standard_input
496 : file_list[-1]
497 );
498 ioerror = 1;
499 }
500 fclose_if_not_stdin(in_stream);
501 in_stream = NULL;
502 }
503
504 if (ferror(stdout)) {
505 bb_error_msg(bb_msg_write_error);
506 ioerror = 1;
507 }
508}
509
510/* If S points to a single valid modern od format string, put
511 a description of that format in *TSPEC, return pointer to
512 character following the just-decoded format.
513 For example, if S were "d4afL", we will return a rtp to "afL"
514 and *TSPEC would be
515 {
516 fmt = SIGNED_DECIMAL;
517 size = INT or LONG; (whichever integral_type_size[4] resolves to)
518 print_function = print_int; (assuming size == INT)
519 fmt_string = "%011d%c";
520 }
521 S_ORIG is solely for reporting errors. It should be the full format
522 string argument. */
523
524static NOINLINE const char *
525decode_one_format(const char *s_orig, const char *s, struct tspec *tspec)
526{
527 enum size_spec size_spec;
528 unsigned size;
529 enum output_format fmt;
530 const char *p;
531 char *end;
532 char *fmt_string = NULL;
533 void (*print_function) (size_t, const char *, const char *);
534 unsigned c;
535 unsigned field_width = 0;
536 int pos;
537
538
539 switch (*s) {
540 case 'd':
541 case 'o':
542 case 'u':
543 case 'x': {
544 static const char CSIL[] ALIGN1 = "CSIL";
545
546 c = *s++;
547 p = strchr(CSIL, *s);
548 /* if *s == NUL, p != NULL! Testcase: "od -tx" */
549 if (!p || *p == '\0') {
550 size = sizeof(int);
551 if (isdigit(s[0])) {
552 size = bb_strtou(s, &end, 0);
553 if (errno == ERANGE
554 || MAX_INTEGRAL_TYPE_SIZE < size
555 || integral_type_size[size] == NO_SIZE
556 ) {
557 bb_error_msg_and_die("invalid type string '%s'; "
558 "%u-byte %s type is not supported",
559 s_orig, size, "integral");
560 }
561 s = end;
562 }
563 } else {
564 static const uint8_t CSIL_sizeof[4] = {
565 sizeof(char),
566 sizeof(short),
567 sizeof(int),
568 sizeof(long),
569 };
570 size = CSIL_sizeof[p - CSIL];
571 s++; /* skip C/S/I/L */
572 }
573
574#define ISPEC_TO_FORMAT(Spec, Min_format, Long_format, Max_format) \
575 ((Spec) == LONG_LONG ? (Max_format) \
576 : ((Spec) == LONG ? (Long_format) : (Min_format)))
577
578#define FMT_BYTES_ALLOCATED 9
579 size_spec = integral_type_size[size];
580
581 {
582 static const char doux[] ALIGN1 = "doux";
583 static const char doux_fmt_letter[][4] = {
584 "lld", "llo", "llu", "llx"
585 };
586 static const enum output_format doux_fmt[] = {
587 SIGNED_DECIMAL,
588 OCTAL,
589 UNSIGNED_DECIMAL,
590 HEXADECIMAL,
591 };
592 static const uint8_t *const doux_bytes_to_XXX[] = {
593 bytes_to_signed_dec_digits,
594 bytes_to_oct_digits,
595 bytes_to_unsigned_dec_digits,
596 bytes_to_hex_digits,
597 };
598 static const char doux_fmtstring[][sizeof(" %%0%u%s")] = {
599 " %%%u%s",
600 " %%0%u%s",
601 " %%%u%s",
602 " %%0%u%s",
603 };
604
605 pos = strchr(doux, c) - doux;
606 fmt = doux_fmt[pos];
607 field_width = doux_bytes_to_XXX[pos][size];
608 p = doux_fmt_letter[pos] + 2;
609 if (size_spec == LONG) p--;
610 if (size_spec == LONG_LONG) p -= 2;
611 fmt_string = xasprintf(doux_fmtstring[pos], field_width, p);
612 }
613
614 switch (size_spec) {
615 case CHAR:
616 print_function = (fmt == SIGNED_DECIMAL
617 ? print_s_char
618 : print_char);
619 break;
620 case SHORT:
621 print_function = (fmt == SIGNED_DECIMAL
622 ? print_s_short
623 : print_short);
624 break;
625 case INT:
626 print_function = print_int;
627 break;
628 case LONG:
629 print_function = print_long;
630 break;
631 default: /* case LONG_LONG: */
632 print_function = print_long_long;
633 break;
634 }
635 break;
636 }
637
638 case 'f': {
639 static const char FDL[] ALIGN1 = "FDL";
640
641 fmt = FLOATING_POINT;
642 ++s;
643 p = strchr(FDL, *s);
644 if (!p) {
645 size = sizeof(double);
646 if (isdigit(s[0])) {
647 size = bb_strtou(s, &end, 0);
648 if (errno == ERANGE || size > MAX_FP_TYPE_SIZE
649 || fp_type_size[size] == NO_SIZE
650 ) {
651 bb_error_msg_and_die("invalid type string '%s'; "
652 "%u-byte %s type is not supported",
653 s_orig, size, "floating point");
654 }
655 s = end;
656 }
657 } else {
658 static const uint8_t FDL_sizeof[] = {
659 sizeof(float),
660 sizeof(double),
661 sizeof(longdouble_t),
662 };
663
664 size = FDL_sizeof[p - FDL];
665 }
666
667 size_spec = fp_type_size[size];
668
669 switch (size_spec) {
670 case FLOAT_SINGLE:
671 print_function = print_float;
672 field_width = FLT_DIG + 8;
673 /* Don't use %#e; not all systems support it. */
674 fmt_string = xasprintf(" %%%d.%de", field_width, FLT_DIG);
675 break;
676 case FLOAT_DOUBLE:
677 print_function = print_double;
678 field_width = DBL_DIG + 8;
679 fmt_string = xasprintf(" %%%d.%de", field_width, DBL_DIG);
680 break;
681 default: /* case FLOAT_LONG_DOUBLE: */
682 print_function = print_long_double;
683 field_width = LDBL_DIG + 8;
684 fmt_string = xasprintf(" %%%d.%dLe", field_width, LDBL_DIG);
685 break;
686 }
687 break;
688 }
689
690 case 'a':
691 ++s;
692 fmt = NAMED_CHARACTER;
693 size_spec = CHAR;
694 print_function = print_named_ascii;
695 field_width = 3;
696 break;
697 case 'c':
698 ++s;
699 fmt = CHARACTER;
700 size_spec = CHAR;
701 print_function = print_ascii;
702 field_width = 3;
703 break;
704 default:
705 bb_error_msg_and_die("invalid character '%c' "
706 "in type string '%s'", *s, s_orig);
707 }
708
709 tspec->size = size_spec;
710 tspec->fmt = fmt;
711 tspec->print_function = print_function;
712 tspec->fmt_string = fmt_string;
713
714 tspec->field_width = field_width;
715 tspec->hexl_mode_trailer = (*s == 'z');
716 if (tspec->hexl_mode_trailer)
717 s++;
718
719 return s;
720}
721
722/* Decode the modern od format string S. Append the decoded
723 representation to the global array SPEC, reallocating SPEC if
724 necessary. */
725
726static void
727decode_format_string(const char *s)
728{
729 const char *s_orig = s;
730
731 while (*s != '\0') {
732 struct tspec tspec;
733 const char *next;
734
735 next = decode_one_format(s_orig, s, &tspec);
736
737 assert(s != next);
738 s = next;
739 spec = xrealloc_vector(spec, 4, n_specs);
740 memcpy(&spec[n_specs], &tspec, sizeof(spec[0]));
741 n_specs++;
742 }
743}
744
745/* Given a list of one or more input filenames FILE_LIST, set the global
746 file pointer IN_STREAM to position N_SKIP in the concatenation of
747 those files. If any file operation fails or if there are fewer than
748 N_SKIP bytes in the combined input, give an error message and return
749 nonzero. When possible, use seek rather than read operations to
750 advance IN_STREAM. */
751
752static void
753skip(off_t n_skip)
754{
755 if (n_skip == 0)
756 return;
757
758 while (in_stream) { /* !EOF */
759 struct stat file_stats;
760
761 /* First try seeking. For large offsets, this extra work is
762 worthwhile. If the offset is below some threshold it may be
763 more efficient to move the pointer by reading. There are two
764 issues when trying to seek:
765 - the file must be seekable.
766 - before seeking to the specified position, make sure
767 that the new position is in the current file.
768 Try to do that by getting file's size using fstat.
769 But that will work only for regular files. */
770
771 /* The st_size field is valid only for regular files
772 (and for symbolic links, which cannot occur here).
773 If the number of bytes left to skip is at least
774 as large as the size of the current file, we can
775 decrement n_skip and go on to the next file. */
776 if (fstat(fileno(in_stream), &file_stats) == 0
777 && S_ISREG(file_stats.st_mode) && file_stats.st_size > 0
778 ) {
779 if (file_stats.st_size < n_skip) {
780 n_skip -= file_stats.st_size;
781 /* take "check & close / open_next" route */
782 } else {
783 if (fseeko(in_stream, n_skip, SEEK_CUR) != 0)
784 ioerror = 1;
785 return;
786 }
787 } else {
788 /* If it's not a regular file with positive size,
789 position the file pointer by reading. */
790 char buf[1024];
791 size_t n_bytes_to_read = 1024;
792 size_t n_bytes_read;
793
794 while (n_skip > 0) {
795 if (n_skip < n_bytes_to_read)
796 n_bytes_to_read = n_skip;
797 n_bytes_read = fread(buf, 1, n_bytes_to_read, in_stream);
798 n_skip -= n_bytes_read;
799 if (n_bytes_read != n_bytes_to_read)
800 break; /* EOF on this file or error */
801 }
802 }
803 if (n_skip == 0)
804 return;
805
806 check_and_close();
807 open_next_file();
808 }
809
810 if (n_skip)
811 bb_error_msg_and_die("can't skip past end of combined input");
812}
813
814
815typedef void FN_format_address(off_t address, char c);
816
817static void
818format_address_none(off_t address UNUSED_PARAM, char c UNUSED_PARAM)
819{
820}
821
822static char address_fmt[] ALIGN1 = "%0n"OFF_FMT"xc";
823/* Corresponds to 'x' above */
824#define address_base_char address_fmt[sizeof(address_fmt)-3]
825/* Corresponds to 'n' above */
826#define address_pad_len_char address_fmt[2]
827
828static void
829format_address_std(off_t address, char c)
830{
831 /* Corresponds to 'c' */
832 address_fmt[sizeof(address_fmt)-2] = c;
833 printf(address_fmt, address);
834}
835
836#if ENABLE_LONG_OPTS
837/* only used with --traditional */
838static void
839format_address_paren(off_t address, char c)
840{
841 putchar('(');
842 format_address_std(address, ')');
843 if (c) putchar(c);
844}
845
846static void
847format_address_label(off_t address, char c)
848{
849 format_address_std(address, ' ');
850 format_address_paren(address + pseudo_offset, c);
851}
852#endif
853
854static void
855dump_hexl_mode_trailer(size_t n_bytes, const char *block)
856{
857 fputs(" >", stdout);
858 while (n_bytes--) {
859 unsigned c = *(unsigned char *) block++;
860 c = (ISPRINT(c) ? c : '.');
861 putchar(c);
862 }
863 putchar('<');
864}
865
866/* Write N_BYTES bytes from CURR_BLOCK to standard output once for each
867 of the N_SPEC format specs. CURRENT_OFFSET is the byte address of
868 CURR_BLOCK in the concatenation of input files, and it is printed
869 (optionally) only before the output line associated with the first
870 format spec. When duplicate blocks are being abbreviated, the output
871 for a sequence of identical input blocks is the output for the first
872 block followed by an asterisk alone on a line. It is valid to compare
873 the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK.
874 That condition may be false only for the last input block -- and then
875 only when it has not been padded to length BYTES_PER_BLOCK. */
876
877static void
878write_block(off_t current_offset, size_t n_bytes,
879 const char *prev_block, const char *curr_block)
880{
881 static char first = 1;
882 static char prev_pair_equal = 0;
883 size_t i;
884
885 if (!verbose && !first
886 && n_bytes == bytes_per_block
887 && memcmp(prev_block, curr_block, bytes_per_block) == 0
888 ) {
889 if (prev_pair_equal) {
890 /* The two preceding blocks were equal, and the current
891 block is the same as the last one, so print nothing. */
892 } else {
893 puts("*");
894 prev_pair_equal = 1;
895 }
896 } else {
897 first = 0;
898 prev_pair_equal = 0;
899 for (i = 0; i < n_specs; i++) {
900 if (i == 0)
901 format_address(current_offset, '\0');
902 else
903 printf("%*s", address_pad_len_char - '0', "");
904 (*spec[i].print_function) (n_bytes, curr_block, spec[i].fmt_string);
905 if (spec[i].hexl_mode_trailer) {
906 /* space-pad out to full line width, then dump the trailer */
907 int datum_width = width_bytes[spec[i].size];
908 int blank_fields = (bytes_per_block - n_bytes) / datum_width;
909 int field_width = spec[i].field_width + 1;
910 printf("%*s", blank_fields * field_width, "");
911 dump_hexl_mode_trailer(n_bytes, curr_block);
912 }
913 putchar('\n');
914 }
915 }
916}
917
918static void
919read_block(size_t n, char *block, size_t *n_bytes_in_buffer)
920{
921 assert(0 < n && n <= bytes_per_block);
922
923 *n_bytes_in_buffer = 0;
924
925 if (n == 0)
926 return;
927
928 while (in_stream != NULL) { /* EOF. */
929 size_t n_needed;
930 size_t n_read;
931
932 n_needed = n - *n_bytes_in_buffer;
933 n_read = fread(block + *n_bytes_in_buffer, 1, n_needed, in_stream);
934 *n_bytes_in_buffer += n_read;
935 if (n_read == n_needed)
936 break;
937 /* error check is done in check_and_close */
938 check_and_close();
939 open_next_file();
940 }
941}
942
943/* Return the least common multiple of the sizes associated
944 with the format specs. */
945
946static int
947get_lcm(void)
948{
949 size_t i;
950 int l_c_m = 1;
951
952 for (i = 0; i < n_specs; i++)
953 l_c_m = lcm(l_c_m, width_bytes[(int) spec[i].size]);
954 return l_c_m;
955}
956
957#if ENABLE_LONG_OPTS
958/* If S is a valid traditional offset specification with an optional
959 leading '+' return nonzero and set *OFFSET to the offset it denotes. */
960
961static int
962parse_old_offset(const char *s, off_t *offset)
963{
964 static const struct suffix_mult Bb[] = {
965 { "B", 1024 },
966 { "b", 512 },
967 { "", 0 }
968 };
969 char *p;
970 int radix;
971
972 /* Skip over any leading '+'. */
973 if (s[0] == '+') ++s;
974
975 /* Determine the radix we'll use to interpret S. If there is a '.',
976 * it's decimal, otherwise, if the string begins with '0X'or '0x',
977 * it's hexadecimal, else octal. */
978 p = strchr(s, '.');
979 radix = 8;
980 if (p) {
981 p[0] = '\0'; /* cheating */
982 radix = 10;
983 } else if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
984 radix = 16;
985
986 *offset = xstrtooff_sfx(s, radix, Bb);
987 if (p) p[0] = '.';
988
989 return (*offset >= 0);
990}
991#endif
992
993/* Read a chunk of size BYTES_PER_BLOCK from the input files, write the
994 formatted block to standard output, and repeat until the specified
995 maximum number of bytes has been read or until all input has been
996 processed. If the last block read is smaller than BYTES_PER_BLOCK
997 and its size is not a multiple of the size associated with a format
998 spec, extend the input block with zero bytes until its length is a
999 multiple of all format spec sizes. Write the final block. Finally,
1000 write on a line by itself the offset of the byte after the last byte
1001 read. */
1002
1003static void
1004dump(off_t current_offset, off_t end_offset)
1005{
1006 char *block[2];
1007 int idx;
1008 size_t n_bytes_read;
1009
1010 block[0] = xmalloc(2*bytes_per_block);
1011 block[1] = block[0] + bytes_per_block;
1012
1013 idx = 0;
1014 if (limit_bytes_to_format) {
1015 while (1) {
1016 size_t n_needed;
1017 if (current_offset >= end_offset) {
1018 n_bytes_read = 0;
1019 break;
1020 }
1021 n_needed = MIN(end_offset - current_offset,
1022 (off_t) bytes_per_block);
1023 read_block(n_needed, block[idx], &n_bytes_read);
1024 if (n_bytes_read < bytes_per_block)
1025 break;
1026 assert(n_bytes_read == bytes_per_block);
1027 write_block(current_offset, n_bytes_read,
1028 block[!idx], block[idx]);
1029 current_offset += n_bytes_read;
1030 idx = !idx;
1031 }
1032 } else {
1033 while (1) {
1034 read_block(bytes_per_block, block[idx], &n_bytes_read);
1035 if (n_bytes_read < bytes_per_block)
1036 break;
1037 assert(n_bytes_read == bytes_per_block);
1038 write_block(current_offset, n_bytes_read,
1039 block[!idx], block[idx]);
1040 current_offset += n_bytes_read;
1041 idx = !idx;
1042 }
1043 }
1044
1045 if (n_bytes_read > 0) {
1046 int l_c_m;
1047 size_t bytes_to_write;
1048
1049 l_c_m = get_lcm();
1050
1051 /* Make bytes_to_write the smallest multiple of l_c_m that
1052 is at least as large as n_bytes_read. */
1053 bytes_to_write = l_c_m * ((n_bytes_read + l_c_m - 1) / l_c_m);
1054
1055 memset(block[idx] + n_bytes_read, 0, bytes_to_write - n_bytes_read);
1056 write_block(current_offset, bytes_to_write,
1057 block[!idx], block[idx]);
1058 current_offset += n_bytes_read;
1059 }
1060
1061 format_address(current_offset, '\n');
1062
1063 if (limit_bytes_to_format && current_offset >= end_offset)
1064 check_and_close();
1065
1066 free(block[0]);
1067}
1068
1069/* Read a single byte into *C from the concatenation of the input files
1070 named in the global array FILE_LIST. On the first call to this
1071 function, the global variable IN_STREAM is expected to be an open
1072 stream associated with the input file INPUT_FILENAME. If IN_STREAM
1073 is at end-of-file, close it and update the global variables IN_STREAM
1074 and INPUT_FILENAME so they correspond to the next file in the list.
1075 Then try to read a byte from the newly opened file. Repeat if
1076 necessary until EOF is reached for the last file in FILE_LIST, then
1077 set *C to EOF and return. Subsequent calls do likewise. */
1078
1079static void
1080read_char(int *c)
1081{
1082 while (in_stream) { /* !EOF */
1083 *c = fgetc(in_stream);
1084 if (*c != EOF)
1085 return;
1086 check_and_close();
1087 open_next_file();
1088 }
1089 *c = EOF;
1090}
1091
1092/* Read N bytes into BLOCK from the concatenation of the input files
1093 named in the global array FILE_LIST. On the first call to this
1094 function, the global variable IN_STREAM is expected to be an open
1095 stream associated with the input file INPUT_FILENAME. If all N
1096 bytes cannot be read from IN_STREAM, close IN_STREAM and update
1097 the global variables IN_STREAM and INPUT_FILENAME. Then try to
1098 read the remaining bytes from the newly opened file. Repeat if
1099 necessary until EOF is reached for the last file in FILE_LIST.
1100 On subsequent calls, don't modify BLOCK and return zero. Set
1101 *N_BYTES_IN_BUFFER to the number of bytes read. If an error occurs,
1102 it will be detected through ferror when the stream is about to be
1103 closed. If there is an error, give a message but continue reading
1104 as usual and return nonzero. Otherwise return zero. */
1105
1106/* STRINGS mode. Find each "string constant" in the input.
1107 A string constant is a run of at least 'string_min' ASCII
1108 graphic (or formatting) characters terminated by a null.
1109 Based on a function written by Richard Stallman for a
1110 traditional version of od. */
1111
1112static void
1113dump_strings(off_t address, off_t end_offset)
1114{
1115 size_t bufsize = MAX(100, string_min);
1116 char *buf = xmalloc(bufsize);
1117
1118 while (1) {
1119 size_t i;
1120 int c;
1121
1122 /* See if the next 'string_min' chars are all printing chars. */
1123 tryline:
1124 if (limit_bytes_to_format && (end_offset - string_min <= address))
1125 break;
1126 i = 0;
1127 while (!limit_bytes_to_format || address < end_offset) {
1128 if (i == bufsize) {
1129 bufsize += bufsize/8;
1130 buf = xrealloc(buf, bufsize);
1131 }
1132 read_char(&c);
1133 if (c < 0) { /* EOF */
1134 free(buf);
1135 return;
1136 }
1137 address++;
1138 if (!c)
1139 break;
1140 if (!ISPRINT(c))
1141 goto tryline; /* It isn't; give up on this string. */
1142 buf[i++] = c; /* String continues; store it all. */
1143 }
1144
1145 if (i < string_min) /* Too short! */
1146 goto tryline;
1147
1148 /* If we get here, the string is all printable and NUL-terminated,
1149 * so print it. It is all in 'buf' and 'i' is its length. */
1150 buf[i] = 0;
1151 format_address(address - i - 1, ' ');
1152
1153 for (i = 0; (c = buf[i]); i++) {
1154 switch (c) {
1155 case '\007': fputs("\\a", stdout); break;
1156 case '\b': fputs("\\b", stdout); break;
1157 case '\f': fputs("\\f", stdout); break;
1158 case '\n': fputs("\\n", stdout); break;
1159 case '\r': fputs("\\r", stdout); break;
1160 case '\t': fputs("\\t", stdout); break;
1161 case '\v': fputs("\\v", stdout); break;
1162 default: putchar(c);
1163 }
1164 }
1165 putchar('\n');
1166 }
1167
1168 /* We reach this point only if we search through
1169 (max_bytes_to_format - string_min) bytes before reaching EOF. */
1170 free(buf);
1171
1172 check_and_close();
1173}
1174
1175int od_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1176int od_main(int argc, char **argv)
1177{
1178 static const struct suffix_mult bkm[] = {
1179 { "b", 512 },
1180 { "k", 1024 },
1181 { "m", 1024*1024 },
1182 { "", 0 }
1183 };
1184 enum {
1185 OPT_A = 1 << 0,
1186 OPT_N = 1 << 1,
1187 OPT_a = 1 << 2,
1188 OPT_b = 1 << 3,
1189 OPT_c = 1 << 4,
1190 OPT_d = 1 << 5,
1191 OPT_f = 1 << 6,
1192 OPT_h = 1 << 7,
1193 OPT_i = 1 << 8,
1194 OPT_j = 1 << 9,
1195 OPT_l = 1 << 10,
1196 OPT_o = 1 << 11,
1197 OPT_t = 1 << 12,
1198 OPT_v = 1 << 13,
1199 OPT_x = 1 << 14,
1200 OPT_s = 1 << 15,
1201 OPT_S = 1 << 16,
1202 OPT_w = 1 << 17,
1203 OPT_traditional = (1 << 18) * ENABLE_LONG_OPTS,
1204 };
1205#if ENABLE_LONG_OPTS
1206 static const char od_longopts[] ALIGN1 =
1207 "skip-bytes\0" Required_argument "j"
1208 "address-radix\0" Required_argument "A"
1209 "read-bytes\0" Required_argument "N"
1210 "format\0" Required_argument "t"
1211 "output-duplicates\0" No_argument "v"
1212 "strings\0" Optional_argument "S"
1213 "width\0" Optional_argument "w"
1214 "traditional\0" No_argument "\xff"
1215 ;
1216#endif
1217 char *str_A, *str_N, *str_j, *str_S;
1218 llist_t *lst_t = NULL;
1219 unsigned opt;
1220 int l_c_m;
1221 /* The old-style 'pseudo starting address' to be printed in parentheses
1222 after any true address. */
1223 off_t pseudo_start = pseudo_start; // for gcc
1224 /* The number of input bytes to skip before formatting and writing. */
1225 off_t n_bytes_to_skip = 0;
1226 /* The offset of the first byte after the last byte to be formatted. */
1227 off_t end_offset = 0;
1228 /* The maximum number of bytes that will be formatted. */
1229 off_t max_bytes_to_format = 0;
1230
1231 spec = NULL;
1232 format_address = format_address_std;
1233 address_base_char = 'o';
1234 address_pad_len_char = '7';
1235 /* flag_dump_strings = 0; - already is */
1236
1237 /* Parse command line */
1238 opt_complementary = "w+:t::"; /* -w N, -t is a list */
1239#if ENABLE_LONG_OPTS
1240 applet_long_options = od_longopts;
1241#endif
1242 opt = getopt32(argv, "A:N:abcdfhij:lot:vxsS:"
1243 "w::", // -w with optional param
1244 // -S was -s and also had optional parameter
1245 // but in coreutils 6.3 it was renamed and now has
1246 // _mandatory_ parameter
1247 &str_A, &str_N, &str_j, &lst_t, &str_S, &bytes_per_block);
1248 argc -= optind;
1249 argv += optind;
1250 if (opt & OPT_A) {
1251 static const char doxn[] ALIGN1 = "doxn";
1252 static const char doxn_address_base_char[] ALIGN1 = {
1253 'u', 'o', 'x', /* '?' fourth one is not important */
1254 };
1255 static const uint8_t doxn_address_pad_len_char[] ALIGN1 = {
1256 '7', '7', '6', /* '?' */
1257 };
1258 char *p;
1259 int pos;
1260 p = strchr(doxn, str_A[0]);
1261 if (!p)
1262 bb_error_msg_and_die("bad output address radix "
1263 "'%c' (must be [doxn])", str_A[0]);
1264 pos = p - doxn;
1265 if (pos == 3) format_address = format_address_none;
1266 address_base_char = doxn_address_base_char[pos];
1267 address_pad_len_char = doxn_address_pad_len_char[pos];
1268 }
1269 if (opt & OPT_N) {
1270 limit_bytes_to_format = 1;
1271 max_bytes_to_format = xstrtooff_sfx(str_N, 0, bkm);
1272 }
1273 if (opt & OPT_a) decode_format_string("a");
1274 if (opt & OPT_b) decode_format_string("oC");
1275 if (opt & OPT_c) decode_format_string("c");
1276 if (opt & OPT_d) decode_format_string("u2");
1277 if (opt & OPT_f) decode_format_string("fF");
1278 if (opt & OPT_h) decode_format_string("x2");
1279 if (opt & OPT_i) decode_format_string("d2");
1280 if (opt & OPT_j) n_bytes_to_skip = xstrtooff_sfx(str_j, 0, bkm);
1281 if (opt & OPT_l) decode_format_string("d4");
1282 if (opt & OPT_o) decode_format_string("o2");
1283 //if (opt & OPT_t)...
1284 while (lst_t) {
1285 decode_format_string(llist_pop(&lst_t));
1286 }
1287 if (opt & OPT_v) verbose = 1;
1288 if (opt & OPT_x) decode_format_string("x2");
1289 if (opt & OPT_s) decode_format_string("d2");
1290 if (opt & OPT_S) {
1291 string_min = 3;
1292 string_min = xstrtou_sfx(str_S, 0, bkm);
1293 flag_dump_strings = 1;
1294 }
1295 //if (opt & OPT_w)...
1296 //if (opt & OPT_traditional)...
1297
1298 if (flag_dump_strings && n_specs > 0)
1299 bb_error_msg_and_die("no type may be specified when dumping strings");
1300
1301 /* If the --traditional option is used, there may be from
1302 * 0 to 3 remaining command line arguments; handle each case
1303 * separately.
1304 * od [file] [[+]offset[.][b] [[+]label[.][b]]]
1305 * The offset and pseudo_start have the same syntax.
1306 *
1307 * FIXME: POSIX 1003.1-2001 with XSI requires support for the
1308 * traditional syntax even if --traditional is not given. */
1309
1310#if ENABLE_LONG_OPTS
1311 if (opt & OPT_traditional) {
1312 off_t o1, o2;
1313
1314 if (argc == 1) {
1315 if (parse_old_offset(argv[0], &o1)) {
1316 n_bytes_to_skip = o1;
1317 --argc;
1318 ++argv;
1319 }
1320 } else if (argc == 2) {
1321 if (parse_old_offset(argv[0], &o1)
1322 && parse_old_offset(argv[1], &o2)
1323 ) {
1324 n_bytes_to_skip = o1;
1325 flag_pseudo_start = 1;
1326 pseudo_start = o2;
1327 argv += 2;
1328 argc -= 2;
1329 } else if (parse_old_offset(argv[1], &o2)) {
1330 n_bytes_to_skip = o2;
1331 --argc;
1332 argv[1] = argv[0];
1333 ++argv;
1334 } else {
1335 bb_error_msg_and_die("invalid second operand "
1336 "in compatibility mode '%s'", argv[1]);
1337 }
1338 } else if (argc == 3) {
1339 if (parse_old_offset(argv[1], &o1)
1340 && parse_old_offset(argv[2], &o2)
1341 ) {
1342 n_bytes_to_skip = o1;
1343 flag_pseudo_start = 1;
1344 pseudo_start = o2;
1345 argv[2] = argv[0];
1346 argv += 2;
1347 argc -= 2;
1348 } else {
1349 bb_error_msg_and_die("in compatibility mode "
1350 "the last two arguments must be offsets");
1351 }
1352 } else if (argc > 3) {
1353 bb_error_msg_and_die("compatibility mode supports "
1354 "at most three arguments");
1355 }
1356
1357 if (flag_pseudo_start) {
1358 if (format_address == format_address_none) {
1359 address_base_char = 'o';
1360 address_pad_len_char = '7';
1361 format_address = format_address_paren;
1362 } else
1363 format_address = format_address_label;
1364 }
1365 }
1366#endif
1367
1368 if (limit_bytes_to_format) {
1369 end_offset = n_bytes_to_skip + max_bytes_to_format;
1370 if (end_offset < n_bytes_to_skip)
1371 bb_error_msg_and_die("skip-bytes + read-bytes is too large");
1372 }
1373
1374 if (n_specs == 0) {
1375 decode_format_string("o2");
1376 n_specs = 1;
1377 }
1378
1379 /* If no files were listed on the command line,
1380 set the global pointer FILE_LIST so that it
1381 references the null-terminated list of one name: "-". */
1382 file_list = bb_argv_dash;
1383 if (argc > 0) {
1384 /* Set the global pointer FILE_LIST so that it
1385 references the first file-argument on the command-line. */
1386 file_list = (char const *const *) argv;
1387 }
1388
1389 /* open the first input file */
1390 open_next_file();
1391 /* skip over any unwanted header bytes */
1392 skip(n_bytes_to_skip);
1393 if (!in_stream)
1394 return EXIT_FAILURE;
1395
1396 pseudo_offset = (flag_pseudo_start ? pseudo_start - n_bytes_to_skip : 0);
1397
1398 /* Compute output block length. */
1399 l_c_m = get_lcm();
1400
1401 if (opt & OPT_w) { /* -w: width */
1402 if (!bytes_per_block || bytes_per_block % l_c_m != 0) {
1403 bb_error_msg("warning: invalid width %u; using %d instead",
1404 (unsigned)bytes_per_block, l_c_m);
1405 bytes_per_block = l_c_m;
1406 }
1407 } else {
1408 bytes_per_block = l_c_m;
1409 if (l_c_m < DEFAULT_BYTES_PER_BLOCK)
1410 bytes_per_block *= DEFAULT_BYTES_PER_BLOCK / l_c_m;
1411 }
1412
1413#ifdef DEBUG
1414 for (i = 0; i < n_specs; i++) {
1415 printf("%d: fmt=\"%s\" width=%d\n",
1416 i, spec[i].fmt_string, width_bytes[spec[i].size]);
1417 }
1418#endif
1419
1420 if (flag_dump_strings)
1421 dump_strings(n_bytes_to_skip, end_offset);
1422 else
1423 dump(n_bytes_to_skip, end_offset);
1424
1425 if (fclose(stdin) == EOF)
1426 bb_perror_msg_and_die(bb_msg_standard_input);
1427
1428 return ioerror;
1429}
Note: See TracBrowser for help on using the repository browser.