source: MondoRescue/branches/2.2.5/mindi-busybox/coreutils/od_bloaty.c@ 1765

Last change on this file since 1765 was 1765, checked in by Bruno Cornec, 16 years ago

Update to busybox 1.7.2

  • Property svn:eol-style set to native
File size: 37.8 KB
Line 
1/* od -- dump files in octal and other formats
2 Copyright (C) 92, 1995-2004 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
17
18/* Written by Jim Meyering. */
19
20/* Busyboxed by Denis Vlasenko
21
22Based on od.c from coreutils-5.2.1
23Top bloat sources:
24
2500000073 t parse_old_offset
260000007b t get_lcm
2700000090 r long_options
2800000092 t print_named_ascii
29000000bf t print_ascii
3000000168 t write_block
3100000366 t decode_format_string
3200000a71 T od_main
33
34Tested for compat with coreutils 6.3
35using this script. Minor differences fixed.
36
37#!/bin/sh
38echo STD
39time /path/to/coreutils/od \
40...params... \
41>std
42echo Exit code $?
43echo BBOX
44time ./busybox od \
45...params... \
46>bbox
47echo Exit code $?
48diff -u -a std bbox >bbox.diff || { echo Different!; sleep 1; }
49
50*/
51
52#include "libbb.h"
53#include <getopt.h>
54
55#define assert(a) ((void)0)
56
57/* Check for 0x7f is a coreutils 6.3 addition */
58#define ISPRINT(c) (((c)>=' ') && (c) != 0x7f)
59
60typedef long double longdouble_t;
61typedef unsigned long long ulonglong_t;
62typedef long long llong;
63
64#if ENABLE_LFS
65# define xstrtooff_sfx xstrtoull_sfx
66#else
67# define xstrtooff_sfx xstrtoul_sfx
68#endif
69
70/* The default number of input bytes per output line. */
71#define DEFAULT_BYTES_PER_BLOCK 16
72
73/* The number of decimal digits of precision in a float. */
74#ifndef FLT_DIG
75# define FLT_DIG 7
76#endif
77
78/* The number of decimal digits of precision in a double. */
79#ifndef DBL_DIG
80# define DBL_DIG 15
81#endif
82
83/* The number of decimal digits of precision in a long double. */
84#ifndef LDBL_DIG
85# define LDBL_DIG DBL_DIG
86#endif
87
88enum size_spec {
89 NO_SIZE,
90 CHAR,
91 SHORT,
92 INT,
93 LONG,
94 LONG_LONG,
95 FLOAT_SINGLE,
96 FLOAT_DOUBLE,
97 FLOAT_LONG_DOUBLE,
98 N_SIZE_SPECS
99};
100
101enum output_format {
102 SIGNED_DECIMAL,
103 UNSIGNED_DECIMAL,
104 OCTAL,
105 HEXADECIMAL,
106 FLOATING_POINT,
107 NAMED_CHARACTER,
108 CHARACTER
109};
110
111/* Each output format specification (from '-t spec' or from
112 old-style options) is represented by one of these structures. */
113struct tspec {
114 enum output_format fmt;
115 enum size_spec size;
116 void (*print_function) (size_t, const char *, const char *);
117 char *fmt_string;
118 int hexl_mode_trailer;
119 int field_width;
120};
121
122/* Convert the number of 8-bit bytes of a binary representation to
123 the number of characters (digits + sign if the type is signed)
124 required to represent the same quantity in the specified base/type.
125 For example, a 32-bit (4-byte) quantity may require a field width
126 as wide as the following for these types:
127 11 unsigned octal
128 11 signed decimal
129 10 unsigned decimal
130 8 unsigned hexadecimal */
131
132static const uint8_t bytes_to_oct_digits[] ALIGN1 =
133{0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
134
135static const uint8_t bytes_to_signed_dec_digits[] ALIGN1 =
136{1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40};
137
138static const uint8_t bytes_to_unsigned_dec_digits[] ALIGN1 =
139{0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39};
140
141static const uint8_t bytes_to_hex_digits[] ALIGN1 =
142{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
143
144/* Convert enum size_spec to the size of the named type. */
145static const signed char width_bytes[] ALIGN1 = {
146 -1,
147 sizeof(char),
148 sizeof(short),
149 sizeof(int),
150 sizeof(long),
151 sizeof(ulonglong_t),
152 sizeof(float),
153 sizeof(double),
154 sizeof(longdouble_t)
155};
156/* Ensure that for each member of 'enum size_spec' there is an
157 initializer in the width_bytes array. */
158struct ERR_width_bytes_has_bad_size {
159 char ERR_width_bytes_has_bad_size[ARRAY_SIZE(width_bytes) == N_SIZE_SPECS ? 1 : -1];
160};
161
162static smallint flag_dump_strings;
163/* Non-zero if an old-style 'pseudo-address' was specified. */
164static smallint flag_pseudo_start;
165static smallint limit_bytes_to_format;
166/* When zero and two or more consecutive blocks are equal, format
167 only the first block and output an asterisk alone on the following
168 line to indicate that identical blocks have been elided. */
169static smallint verbose;
170static smallint ioerror;
171
172static size_t string_min;
173
174/* An array of specs describing how to format each input block. */
175static size_t n_specs;
176static struct tspec *spec;
177
178/* Function that accepts an address and an optional following char,
179 and prints the address and char to stdout. */
180static void (*format_address)(off_t, char);
181/* The difference between the old-style pseudo starting address and
182 the number of bytes to skip. */
183static off_t pseudo_offset;
184/* The number of input bytes to skip before formatting and writing. */
185static off_t n_bytes_to_skip;
186/* When zero, MAX_BYTES_TO_FORMAT and END_OFFSET are ignored, and all
187 input is formatted. */
188/* The maximum number of bytes that will be formatted. */
189static off_t max_bytes_to_format;
190/* The offset of the first byte after the last byte to be formatted. */
191static off_t end_offset;
192
193/* The number of input bytes formatted per output line. It must be
194 a multiple of the least common multiple of the sizes associated with
195 the specified output types. It should be as large as possible, but
196 no larger than 16 -- unless specified with the -w option. */
197static size_t bytes_per_block;
198
199/* Human-readable representation of *file_list (for error messages).
200 It differs from *file_list only when *file_list is "-". */
201static char const *input_filename;
202
203/* A NULL-terminated list of the file-arguments from the command line. */
204static char const *const *file_list;
205
206/* Initializer for file_list if no file-arguments
207 were specified on the command line. */
208static char const *const default_file_list[] = { "-", NULL };
209
210/* The input stream associated with the current file. */
211static FILE *in_stream;
212
213#define MAX_INTEGRAL_TYPE_SIZE sizeof(ulonglong_t)
214static unsigned char integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1] ALIGN1 = {
215 [sizeof(char)] = CHAR,
216#if USHRT_MAX != UCHAR_MAX
217 [sizeof(short)] = SHORT,
218#endif
219#if UINT_MAX != USHRT_MAX
220 [sizeof(int)] = INT,
221#endif
222#if ULONG_MAX != UINT_MAX
223 [sizeof(long)] = LONG,
224#endif
225#if ULLONG_MAX != ULONG_MAX
226 [sizeof(ulonglong_t)] = LONG_LONG,
227#endif
228};
229
230#define MAX_FP_TYPE_SIZE sizeof(longdouble_t)
231static unsigned char fp_type_size[MAX_FP_TYPE_SIZE + 1] ALIGN1 = {
232 /* gcc seems to allow repeated indexes. Last one stays */
233 [sizeof(longdouble_t)] = FLOAT_LONG_DOUBLE,
234 [sizeof(double)] = FLOAT_DOUBLE,
235 [sizeof(float)] = FLOAT_SINGLE
236};
237
238
239static unsigned
240gcd(unsigned u, unsigned v)
241{
242 unsigned t;
243 while (v != 0) {
244 t = u % v;
245 u = v;
246 v = t;
247 }
248 return u;
249}
250
251/* Compute the least common multiple of U and V. */
252static unsigned
253lcm(unsigned u, unsigned v) {
254 unsigned t = gcd(u, v);
255 if (t == 0)
256 return 0;
257 return u * v / t;
258}
259
260static void
261print_s_char(size_t n_bytes, const char *block, const char *fmt_string)
262{
263 while (n_bytes--) {
264 int tmp = *(signed char *) block;
265 printf(fmt_string, tmp);
266 block += sizeof(unsigned char);
267 }
268}
269
270static void
271print_char(size_t n_bytes, const char *block, const char *fmt_string)
272{
273 while (n_bytes--) {
274 unsigned tmp = *(unsigned char *) block;
275 printf(fmt_string, tmp);
276 block += sizeof(unsigned char);
277 }
278}
279
280static void
281print_s_short(size_t n_bytes, const char *block, const char *fmt_string)
282{
283 n_bytes /= sizeof(signed short);
284 while (n_bytes--) {
285 int tmp = *(signed short *) block;
286 printf(fmt_string, tmp);
287 block += sizeof(unsigned short);
288 }
289}
290
291static void
292print_short(size_t n_bytes, const char *block, const char *fmt_string)
293{
294 n_bytes /= sizeof(unsigned short);
295 while (n_bytes--) {
296 unsigned tmp = *(unsigned short *) block;
297 printf(fmt_string, tmp);
298 block += sizeof(unsigned short);
299 }
300}
301
302static void
303print_int(size_t n_bytes, const char *block, const char *fmt_string)
304{
305 n_bytes /= sizeof(unsigned);
306 while (n_bytes--) {
307 unsigned tmp = *(unsigned *) block;
308 printf(fmt_string, tmp);
309 block += sizeof(unsigned);
310 }
311}
312
313#if UINT_MAX == ULONG_MAX
314# define print_long print_int
315#else
316static void
317print_long(size_t n_bytes, const char *block, const char *fmt_string)
318{
319 n_bytes /= sizeof(unsigned long);
320 while (n_bytes--) {
321 unsigned long tmp = *(unsigned long *) block;
322 printf(fmt_string, tmp);
323 block += sizeof(unsigned long);
324 }
325}
326#endif
327
328#if ULONG_MAX == ULLONG_MAX
329# define print_long_long print_long
330#else
331static void
332print_long_long(size_t n_bytes, const char *block, const char *fmt_string)
333{
334 n_bytes /= sizeof(ulonglong_t);
335 while (n_bytes--) {
336 ulonglong_t tmp = *(ulonglong_t *) block;
337 printf(fmt_string, tmp);
338 block += sizeof(ulonglong_t);
339 }
340}
341#endif
342
343static void
344print_float(size_t n_bytes, const char *block, const char *fmt_string)
345{
346 n_bytes /= sizeof(float);
347 while (n_bytes--) {
348 float tmp = *(float *) block;
349 printf(fmt_string, tmp);
350 block += sizeof(float);
351 }
352}
353
354static void
355print_double(size_t n_bytes, const char *block, const char *fmt_string)
356{
357 n_bytes /= sizeof(double);
358 while (n_bytes--) {
359 double tmp = *(double *) block;
360 printf(fmt_string, tmp);
361 block += sizeof(double);
362 }
363}
364
365static void
366print_long_double(size_t n_bytes, const char *block, const char *fmt_string)
367{
368 n_bytes /= sizeof(longdouble_t);
369 while (n_bytes--) {
370 longdouble_t tmp = *(longdouble_t *) block;
371 printf(fmt_string, tmp);
372 block += sizeof(longdouble_t);
373 }
374}
375
376/* print_[named]_ascii are optimized for speed.
377 * Remember, someday you may want to pump gigabytes thru this thing.
378 * Saving a dozen of .text bytes here is counter-productive */
379
380static void
381print_named_ascii(size_t n_bytes, const char *block,
382 const char *unused_fmt_string ATTRIBUTE_UNUSED)
383{
384 /* Names for some non-printing characters. */
385 static const char charname[33][3] ALIGN1 = {
386 "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
387 " bs", " ht", " nl", " vt", " ff", " cr", " so", " si",
388 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
389 "can", " em", "sub", "esc", " fs", " gs", " rs", " us",
390 " sp"
391 };
392 // buf[N] pos: 01234 56789
393 char buf[12] = " x\0 0xx\0";
394 // actually " x\0 xxx\0", but I want to share the string with below.
395 // [12] because we take three 32bit stack slots anyway, and
396 // gcc is too dumb to initialize with constant stores,
397 // it copies initializer from rodata. Oh well.
398
399 while (n_bytes--) {
400 unsigned masked_c = *(unsigned char *) block++;
401
402 masked_c &= 0x7f;
403 if (masked_c == 0x7f) {
404 fputs(" del", stdout);
405 continue;
406 }
407 if (masked_c > ' ') {
408 buf[3] = masked_c;
409 fputs(buf, stdout);
410 continue;
411 }
412 /* Why? Because printf(" %3.3s") is much slower... */
413 buf[6] = charname[masked_c][0];
414 buf[7] = charname[masked_c][1];
415 buf[8] = charname[masked_c][2];
416 fputs(buf+5, stdout);
417 }
418}
419
420static void
421print_ascii(size_t n_bytes, const char *block,
422 const char *unused_fmt_string ATTRIBUTE_UNUSED)
423{
424 // buf[N] pos: 01234 56789
425 char buf[12] = " x\0 0xx\0";
426
427 while (n_bytes--) {
428 const char *s;
429 unsigned c = *(unsigned char *) block++;
430
431 if (ISPRINT(c)) {
432 buf[3] = c;
433 fputs(buf, stdout);
434 continue;
435 }
436 switch (c) {
437 case '\0':
438 s = " \\0";
439 break;
440 case '\007':
441 s = " \\a";
442 break;
443 case '\b':
444 s = " \\b";
445 break;
446 case '\f':
447 s = " \\f";
448 break;
449 case '\n':
450 s = " \\n";
451 break;
452 case '\r':
453 s = " \\r";
454 break;
455 case '\t':
456 s = " \\t";
457 break;
458 case '\v':
459 s = " \\v";
460 break;
461 case '\x7f':
462 s = " 177";
463 break;
464 default: /* c is never larger than 040 */
465 buf[7] = (c >> 3) + '0';
466 buf[8] = (c & 7) + '0';
467 s = buf + 5;
468 }
469 fputs(s, stdout);
470 }
471}
472
473/* Given a list of one or more input filenames FILE_LIST, set the global
474 file pointer IN_STREAM and the global string INPUT_FILENAME to the
475 first one that can be successfully opened. Modify FILE_LIST to
476 reference the next filename in the list. A file name of "-" is
477 interpreted as standard input. If any file open fails, give an error
478 message and return nonzero. */
479
480static void
481open_next_file(void)
482{
483 while (1) {
484 input_filename = *file_list;
485 if (!input_filename)
486 return;
487 file_list++;
488 in_stream = fopen_or_warn_stdin(input_filename);
489 if (in_stream) {
490 if (in_stream == stdin)
491 input_filename = bb_msg_standard_input;
492 break;
493 }
494 ioerror = 1;
495 }
496
497 if (limit_bytes_to_format && !flag_dump_strings)
498 setbuf(in_stream, NULL);
499}
500
501/* Test whether there have been errors on in_stream, and close it if
502 it is not standard input. Return nonzero if there has been an error
503 on in_stream or stdout; return zero otherwise. This function will
504 report more than one error only if both a read and a write error
505 have occurred. IN_ERRNO, if nonzero, is the error number
506 corresponding to the most recent action for IN_STREAM. */
507
508static void
509check_and_close(void)
510{
511 if (in_stream) {
512 if (ferror(in_stream)) {
513 bb_error_msg("%s: read error", input_filename);
514 ioerror = 1;
515 }
516 fclose_if_not_stdin(in_stream);
517 in_stream = NULL;
518 }
519
520 if (ferror(stdout)) {
521 bb_error_msg("write error");
522 ioerror = 1;
523 }
524}
525
526/* If S points to a single valid modern od format string, put
527 a description of that format in *TSPEC, make *NEXT point at the
528 character following the just-decoded format (if *NEXT is non-NULL),
529 and return zero. For example, if S were "d4afL"
530 *NEXT would be set to "afL" and *TSPEC would be
531 {
532 fmt = SIGNED_DECIMAL;
533 size = INT or LONG; (whichever integral_type_size[4] resolves to)
534 print_function = print_int; (assuming size == INT)
535 fmt_string = "%011d%c";
536 }
537 S_ORIG is solely for reporting errors. It should be the full format
538 string argument. */
539
540static void
541decode_one_format(const char *s_orig, const char *s, const char **next,
542 struct tspec *tspec)
543{
544 enum size_spec size_spec;
545 unsigned size;
546 enum output_format fmt;
547 const char *p;
548 char *end;
549 char *fmt_string = NULL;
550 void (*print_function) (size_t, const char *, const char *);
551 unsigned c;
552 unsigned field_width = 0;
553 int pos;
554
555 assert(tspec != NULL);
556
557 switch (*s) {
558 case 'd':
559 case 'o':
560 case 'u':
561 case 'x': {
562 static const char CSIL[] ALIGN1 = "CSIL";
563
564 c = *s++;
565 p = strchr(CSIL, *s);
566 if (!p) {
567 size = sizeof(int);
568 if (isdigit(s[0])) {
569 size = bb_strtou(s, &end, 0);
570 if (errno == ERANGE
571 || MAX_INTEGRAL_TYPE_SIZE < size
572 || integral_type_size[size] == NO_SIZE
573 ) {
574 bb_error_msg_and_die("invalid type string '%s'; "
575 "%u-byte %s type is not supported",
576 s_orig, size, "integral");
577 }
578 s = end;
579 }
580 } else {
581 static const uint8_t CSIL_sizeof[] = {
582 sizeof(char),
583 sizeof(short),
584 sizeof(int),
585 sizeof(long),
586 };
587 size = CSIL_sizeof[p - CSIL];
588 }
589
590#define ISPEC_TO_FORMAT(Spec, Min_format, Long_format, Max_format) \
591 ((Spec) == LONG_LONG ? (Max_format) \
592 : ((Spec) == LONG ? (Long_format) : (Min_format)))
593
594#define FMT_BYTES_ALLOCATED 9
595 size_spec = integral_type_size[size];
596
597 {
598 static const char doux[] ALIGN1 = "doux";
599 static const char doux_fmt_letter[][4] = {
600 "lld", "llo", "llu", "llx"
601 };
602 static const enum output_format doux_fmt[] = {
603 SIGNED_DECIMAL,
604 OCTAL,
605 UNSIGNED_DECIMAL,
606 HEXADECIMAL,
607 };
608 static const uint8_t *const doux_bytes_to_XXX[] = {
609 bytes_to_signed_dec_digits,
610 bytes_to_oct_digits,
611 bytes_to_unsigned_dec_digits,
612 bytes_to_hex_digits,
613 };
614 static const char doux_fmtstring[][sizeof(" %%0%u%s")] = {
615 " %%%u%s",
616 " %%0%u%s",
617 " %%%u%s",
618 " %%0%u%s",
619 };
620
621 pos = strchr(doux, c) - doux;
622 fmt = doux_fmt[pos];
623 field_width = doux_bytes_to_XXX[pos][size];
624 p = doux_fmt_letter[pos] + 2;
625 if (size_spec == LONG) p--;
626 if (size_spec == LONG_LONG) p -= 2;
627 fmt_string = xasprintf(doux_fmtstring[pos], field_width, p);
628 }
629
630 switch (size_spec) {
631 case CHAR:
632 print_function = (fmt == SIGNED_DECIMAL
633 ? print_s_char
634 : print_char);
635 break;
636 case SHORT:
637 print_function = (fmt == SIGNED_DECIMAL
638 ? print_s_short
639 : print_short);
640 break;
641 case INT:
642 print_function = print_int;
643 break;
644 case LONG:
645 print_function = print_long;
646 break;
647 default: /* case LONG_LONG: */
648 print_function = print_long_long;
649 break;
650 }
651 break;
652 }
653
654 case 'f': {
655 static const char FDL[] ALIGN1 = "FDL";
656
657 fmt = FLOATING_POINT;
658 ++s;
659 p = strchr(FDL, *s);
660 if (!p) {
661 size = sizeof(double);
662 if (isdigit(s[0])) {
663 size = bb_strtou(s, &end, 0);
664 if (errno == ERANGE || size > MAX_FP_TYPE_SIZE
665 || fp_type_size[size] == NO_SIZE
666 ) {
667 bb_error_msg_and_die("invalid type string '%s'; "
668 "%u-byte %s type is not supported",
669 s_orig, size, "floating point");
670 }
671 s = end;
672 }
673 } else {
674 static const uint8_t FDL_sizeof[] = {
675 sizeof(float),
676 sizeof(double),
677 sizeof(longdouble_t),
678 };
679
680 size = FDL_sizeof[p - FDL];
681 }
682
683 size_spec = fp_type_size[size];
684
685 switch (size_spec) {
686 case FLOAT_SINGLE:
687 print_function = print_float;
688 field_width = FLT_DIG + 8;
689 /* Don't use %#e; not all systems support it. */
690 fmt_string = xasprintf(" %%%d.%de", field_width, FLT_DIG);
691 break;
692 case FLOAT_DOUBLE:
693 print_function = print_double;
694 field_width = DBL_DIG + 8;
695 fmt_string = xasprintf(" %%%d.%de", field_width, DBL_DIG);
696 break;
697 default: /* case FLOAT_LONG_DOUBLE: */
698 print_function = print_long_double;
699 field_width = LDBL_DIG + 8;
700 fmt_string = xasprintf(" %%%d.%dLe", field_width, LDBL_DIG);
701 break;
702 }
703 break;
704 }
705
706 case 'a':
707 ++s;
708 fmt = NAMED_CHARACTER;
709 size_spec = CHAR;
710 print_function = print_named_ascii;
711 field_width = 3;
712 break;
713 case 'c':
714 ++s;
715 fmt = CHARACTER;
716 size_spec = CHAR;
717 print_function = print_ascii;
718 field_width = 3;
719 break;
720 default:
721 bb_error_msg_and_die("invalid character '%c' "
722 "in type string '%s'", *s, s_orig);
723 }
724
725 tspec->size = size_spec;
726 tspec->fmt = fmt;
727 tspec->print_function = print_function;
728 tspec->fmt_string = fmt_string;
729
730 tspec->field_width = field_width;
731 tspec->hexl_mode_trailer = (*s == 'z');
732 if (tspec->hexl_mode_trailer)
733 s++;
734
735 if (next != NULL)
736 *next = s;
737}
738
739/* Decode the modern od format string S. Append the decoded
740 representation to the global array SPEC, reallocating SPEC if
741 necessary. Return zero if S is valid, nonzero otherwise. */
742
743static void
744decode_format_string(const char *s)
745{
746 const char *s_orig = s;
747
748 while (*s != '\0') {
749 struct tspec tspec;
750 const char *next;
751
752 decode_one_format(s_orig, s, &next, &tspec);
753
754 assert(s != next);
755 s = next;
756 n_specs++;
757 spec = xrealloc(spec, n_specs * sizeof(*spec));
758 memcpy(&spec[n_specs-1], &tspec, sizeof *spec);
759 }
760}
761
762/* Given a list of one or more input filenames FILE_LIST, set the global
763 file pointer IN_STREAM to position N_SKIP in the concatenation of
764 those files. If any file operation fails or if there are fewer than
765 N_SKIP bytes in the combined input, give an error message and return
766 nonzero. When possible, use seek rather than read operations to
767 advance IN_STREAM. */
768
769static void
770skip(off_t n_skip)
771{
772 if (n_skip == 0)
773 return;
774
775 while (in_stream) { /* !EOF */
776 struct stat file_stats;
777
778 /* First try seeking. For large offsets, this extra work is
779 worthwhile. If the offset is below some threshold it may be
780 more efficient to move the pointer by reading. There are two
781 issues when trying to seek:
782 - the file must be seekable.
783 - before seeking to the specified position, make sure
784 that the new position is in the current file.
785 Try to do that by getting file's size using fstat.
786 But that will work only for regular files. */
787
788 /* The st_size field is valid only for regular files
789 (and for symbolic links, which cannot occur here).
790 If the number of bytes left to skip is at least
791 as large as the size of the current file, we can
792 decrement n_skip and go on to the next file. */
793 if (fstat(fileno(in_stream), &file_stats) == 0
794 && S_ISREG(file_stats.st_mode) && file_stats.st_size >= 0
795 ) {
796 if (file_stats.st_size < n_skip) {
797 n_skip -= file_stats.st_size;
798 /* take check&close / open_next route */
799 } else {
800 if (fseeko(in_stream, n_skip, SEEK_CUR) != 0)
801 ioerror = 1;
802 return;
803 }
804 } else {
805 /* If it's not a regular file with nonnegative size,
806 position the file pointer by reading. */
807 char buf[BUFSIZ];
808 size_t n_bytes_read, n_bytes_to_read = BUFSIZ;
809
810 while (n_skip > 0) {
811 if (n_skip < n_bytes_to_read)
812 n_bytes_to_read = n_skip;
813 n_bytes_read = fread(buf, 1, n_bytes_to_read, in_stream);
814 n_skip -= n_bytes_read;
815 if (n_bytes_read != n_bytes_to_read)
816 break; /* EOF on this file or error */
817 }
818 }
819 if (n_skip == 0)
820 return;
821
822 check_and_close();
823 open_next_file();
824 }
825
826 if (n_skip)
827 bb_error_msg_and_die("cannot skip past end of combined input");
828}
829
830
831typedef void FN_format_address(off_t address, char c);
832
833static void
834format_address_none(off_t address ATTRIBUTE_UNUSED, char c ATTRIBUTE_UNUSED)
835{
836}
837
838static char address_fmt[] ALIGN1 = "%0n"OFF_FMT"xc";
839/* Corresponds to 'x' above */
840#define address_base_char address_fmt[sizeof(address_fmt)-3]
841/* Corresponds to 'n' above */
842#define address_pad_len_char address_fmt[2]
843
844static void
845format_address_std(off_t address, char c)
846{
847 /* Corresponds to 'c' */
848 address_fmt[sizeof(address_fmt)-2] = c;
849 printf(address_fmt, address);
850}
851
852#if ENABLE_GETOPT_LONG
853/* only used with --traditional */
854static void
855format_address_paren(off_t address, char c)
856{
857 putchar('(');
858 format_address_std(address, ')');
859 if (c) putchar(c);
860}
861
862static void
863format_address_label(off_t address, char c)
864{
865 format_address_std(address, ' ');
866 format_address_paren(address + pseudo_offset, c);
867}
868#endif
869
870static void
871dump_hexl_mode_trailer(size_t n_bytes, const char *block)
872{
873 fputs(" >", stdout);
874 while (n_bytes--) {
875 unsigned c = *(unsigned char *) block++;
876 c = (ISPRINT(c) ? c : '.');
877 putchar(c);
878 }
879 putchar('<');
880}
881
882/* Write N_BYTES bytes from CURR_BLOCK to standard output once for each
883 of the N_SPEC format specs. CURRENT_OFFSET is the byte address of
884 CURR_BLOCK in the concatenation of input files, and it is printed
885 (optionally) only before the output line associated with the first
886 format spec. When duplicate blocks are being abbreviated, the output
887 for a sequence of identical input blocks is the output for the first
888 block followed by an asterisk alone on a line. It is valid to compare
889 the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK.
890 That condition may be false only for the last input block -- and then
891 only when it has not been padded to length BYTES_PER_BLOCK. */
892
893static void
894write_block(off_t current_offset, size_t n_bytes,
895 const char *prev_block, const char *curr_block)
896{
897 static char first = 1;
898 static char prev_pair_equal = 0;
899 size_t i;
900
901 if (!verbose && !first
902 && n_bytes == bytes_per_block
903 && memcmp(prev_block, curr_block, bytes_per_block) == 0
904 ) {
905 if (prev_pair_equal) {
906 /* The two preceding blocks were equal, and the current
907 block is the same as the last one, so print nothing. */
908 } else {
909 puts("*");
910 prev_pair_equal = 1;
911 }
912 } else {
913 first = 0;
914 prev_pair_equal = 0;
915 for (i = 0; i < n_specs; i++) {
916 if (i == 0)
917 format_address(current_offset, '\0');
918 else
919 printf("%*s", address_pad_len_char - '0', "");
920 (*spec[i].print_function) (n_bytes, curr_block, spec[i].fmt_string);
921 if (spec[i].hexl_mode_trailer) {
922 /* space-pad out to full line width, then dump the trailer */
923 int datum_width = width_bytes[spec[i].size];
924 int blank_fields = (bytes_per_block - n_bytes) / datum_width;
925 int field_width = spec[i].field_width + 1;
926 printf("%*s", blank_fields * field_width, "");
927 dump_hexl_mode_trailer(n_bytes, curr_block);
928 }
929 putchar('\n');
930 }
931 }
932}
933
934static void
935read_block(size_t n, char *block, size_t *n_bytes_in_buffer)
936{
937 assert(0 < n && n <= bytes_per_block);
938
939 *n_bytes_in_buffer = 0;
940
941 if (n == 0)
942 return;
943
944 while (in_stream != NULL) { /* EOF. */
945 size_t n_needed;
946 size_t n_read;
947
948 n_needed = n - *n_bytes_in_buffer;
949 n_read = fread(block + *n_bytes_in_buffer, 1, n_needed, in_stream);
950 *n_bytes_in_buffer += n_read;
951 if (n_read == n_needed)
952 break;
953 /* error check is done in check_and_close */
954 check_and_close();
955 open_next_file();
956 }
957}
958
959/* Return the least common multiple of the sizes associated
960 with the format specs. */
961
962static int
963get_lcm(void)
964{
965 size_t i;
966 int l_c_m = 1;
967
968 for (i = 0; i < n_specs; i++)
969 l_c_m = lcm(l_c_m, width_bytes[(int) spec[i].size]);
970 return l_c_m;
971}
972
973#if ENABLE_GETOPT_LONG
974/* If S is a valid traditional offset specification with an optional
975 leading '+' return nonzero and set *OFFSET to the offset it denotes. */
976
977static int
978parse_old_offset(const char *s, off_t *offset)
979{
980 static const struct suffix_mult Bb[] = {
981 { "B", 1024 },
982 { "b", 512 },
983 { }
984 };
985 char *p;
986 int radix;
987
988 /* Skip over any leading '+'. */
989 if (s[0] == '+') ++s;
990
991 /* Determine the radix we'll use to interpret S. If there is a '.',
992 * it's decimal, otherwise, if the string begins with '0X'or '0x',
993 * it's hexadecimal, else octal. */
994 p = strchr(s, '.');
995 radix = 8;
996 if (p) {
997 p[0] = '\0'; /* cheating */
998 radix = 10;
999 } else if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
1000 radix = 16;
1001
1002 *offset = xstrtooff_sfx(s, radix, Bb);
1003 if (p) p[0] = '.';
1004
1005 return (*offset >= 0);
1006}
1007#endif
1008
1009/* Read a chunk of size BYTES_PER_BLOCK from the input files, write the
1010 formatted block to standard output, and repeat until the specified
1011 maximum number of bytes has been read or until all input has been
1012 processed. If the last block read is smaller than BYTES_PER_BLOCK
1013 and its size is not a multiple of the size associated with a format
1014 spec, extend the input block with zero bytes until its length is a
1015 multiple of all format spec sizes. Write the final block. Finally,
1016 write on a line by itself the offset of the byte after the last byte
1017 read. Accumulate return values from calls to read_block and
1018 check_and_close, and if any was nonzero, return nonzero.
1019 Otherwise, return zero. */
1020
1021static void
1022dump(void)
1023{
1024 char *block[2];
1025 off_t current_offset;
1026 int idx;
1027 size_t n_bytes_read;
1028
1029 block[0] = xmalloc(2*bytes_per_block);
1030 block[1] = block[0] + bytes_per_block;
1031
1032 current_offset = n_bytes_to_skip;
1033
1034 idx = 0;
1035 if (limit_bytes_to_format) {
1036 while (1) {
1037 size_t n_needed;
1038 if (current_offset >= end_offset) {
1039 n_bytes_read = 0;
1040 break;
1041 }
1042 n_needed = MIN(end_offset - current_offset,
1043 (off_t) bytes_per_block);
1044 read_block(n_needed, block[idx], &n_bytes_read);
1045 if (n_bytes_read < bytes_per_block)
1046 break;
1047 assert(n_bytes_read == bytes_per_block);
1048 write_block(current_offset, n_bytes_read,
1049 block[!idx], block[idx]);
1050 current_offset += n_bytes_read;
1051 idx = !idx;
1052 }
1053 } else {
1054 while (1) {
1055 read_block(bytes_per_block, block[idx], &n_bytes_read);
1056 if (n_bytes_read < bytes_per_block)
1057 break;
1058 assert(n_bytes_read == bytes_per_block);
1059 write_block(current_offset, n_bytes_read,
1060 block[!idx], block[idx]);
1061 current_offset += n_bytes_read;
1062 idx = !idx;
1063 }
1064 }
1065
1066 if (n_bytes_read > 0) {
1067 int l_c_m;
1068 size_t bytes_to_write;
1069
1070 l_c_m = get_lcm();
1071
1072 /* Make bytes_to_write the smallest multiple of l_c_m that
1073 is at least as large as n_bytes_read. */
1074 bytes_to_write = l_c_m * ((n_bytes_read + l_c_m - 1) / l_c_m);
1075
1076 memset(block[idx] + n_bytes_read, 0, bytes_to_write - n_bytes_read);
1077 write_block(current_offset, bytes_to_write,
1078 block[!idx], block[idx]);
1079 current_offset += n_bytes_read;
1080 }
1081
1082 format_address(current_offset, '\n');
1083
1084 if (limit_bytes_to_format && current_offset >= end_offset)
1085 check_and_close();
1086
1087 free(block[0]);
1088}
1089
1090/* Read a single byte into *C from the concatenation of the input files
1091 named in the global array FILE_LIST. On the first call to this
1092 function, the global variable IN_STREAM is expected to be an open
1093 stream associated with the input file INPUT_FILENAME. If IN_STREAM
1094 is at end-of-file, close it and update the global variables IN_STREAM
1095 and INPUT_FILENAME so they correspond to the next file in the list.
1096 Then try to read a byte from the newly opened file. Repeat if
1097 necessary until EOF is reached for the last file in FILE_LIST, then
1098 set *C to EOF and return. Subsequent calls do likewise. The return
1099 value is nonzero if any errors occured, zero otherwise. */
1100
1101static void
1102read_char(int *c)
1103{
1104 while (in_stream) { /* !EOF */
1105 *c = fgetc(in_stream);
1106 if (*c != EOF)
1107 return;
1108 check_and_close();
1109 open_next_file();
1110 }
1111 *c = EOF;
1112}
1113
1114/* Read N bytes into BLOCK from the concatenation of the input files
1115 named in the global array FILE_LIST. On the first call to this
1116 function, the global variable IN_STREAM is expected to be an open
1117 stream associated with the input file INPUT_FILENAME. If all N
1118 bytes cannot be read from IN_STREAM, close IN_STREAM and update
1119 the global variables IN_STREAM and INPUT_FILENAME. Then try to
1120 read the remaining bytes from the newly opened file. Repeat if
1121 necessary until EOF is reached for the last file in FILE_LIST.
1122 On subsequent calls, don't modify BLOCK and return zero. Set
1123 *N_BYTES_IN_BUFFER to the number of bytes read. If an error occurs,
1124 it will be detected through ferror when the stream is about to be
1125 closed. If there is an error, give a message but continue reading
1126 as usual and return nonzero. Otherwise return zero. */
1127
1128/* STRINGS mode. Find each "string constant" in the input.
1129 A string constant is a run of at least 'string_min' ASCII
1130 graphic (or formatting) characters terminated by a null.
1131 Based on a function written by Richard Stallman for a
1132 traditional version of od. Return nonzero if an error
1133 occurs. Otherwise, return zero. */
1134
1135static void
1136dump_strings(void)
1137{
1138 size_t bufsize = MAX(100, string_min);
1139 char *buf = xmalloc(bufsize);
1140 off_t address = n_bytes_to_skip;
1141
1142 while (1) {
1143 size_t i;
1144 int c;
1145
1146 /* See if the next 'string_min' chars are all printing chars. */
1147 tryline:
1148 if (limit_bytes_to_format && (end_offset - string_min <= address))
1149 break;
1150 i = 0;
1151 while (!limit_bytes_to_format || address < end_offset) {
1152 if (i == bufsize) {
1153 bufsize += bufsize/8;
1154 buf = xrealloc(buf, bufsize);
1155 }
1156 read_char(&c);
1157 if (c < 0) { /* EOF */
1158 free(buf);
1159 return;
1160 }
1161 address++;
1162 if (!c)
1163 break;
1164 if (!ISPRINT(c))
1165 goto tryline; /* It isn't; give up on this string. */
1166 buf[i++] = c; /* String continues; store it all. */
1167 }
1168
1169 if (i < string_min) /* Too short! */
1170 goto tryline;
1171
1172 /* If we get here, the string is all printable and null-terminated,
1173 * so print it. It is all in 'buf' and 'i' is its length. */
1174 buf[i] = 0;
1175 format_address(address - i - 1, ' ');
1176
1177 for (i = 0; (c = buf[i]); i++) {
1178 switch (c) {
1179 case '\007': fputs("\\a", stdout); break;
1180 case '\b': fputs("\\b", stdout); break;
1181 case '\f': fputs("\\f", stdout); break;
1182 case '\n': fputs("\\n", stdout); break;
1183 case '\r': fputs("\\r", stdout); break;
1184 case '\t': fputs("\\t", stdout); break;
1185 case '\v': fputs("\\v", stdout); break;
1186 default: putc(c, stdout);
1187 }
1188 }
1189 putchar('\n');
1190 }
1191
1192 /* We reach this point only if we search through
1193 (max_bytes_to_format - string_min) bytes before reaching EOF. */
1194 free(buf);
1195
1196 check_and_close();
1197}
1198
1199int od_main(int argc, char **argv);
1200int od_main(int argc, char **argv)
1201{
1202 static const struct suffix_mult bkm[] = {
1203 { "b", 512 },
1204 { "k", 1024 },
1205 { "m", 1024*1024 },
1206 { }
1207 };
1208 unsigned opt;
1209 int l_c_m;
1210 /* The old-style 'pseudo starting address' to be printed in parentheses
1211 after any true address. */
1212 off_t pseudo_start = 0; // only for gcc
1213 enum {
1214 OPT_A = 1 << 0,
1215 OPT_N = 1 << 1,
1216 OPT_a = 1 << 2,
1217 OPT_b = 1 << 3,
1218 OPT_c = 1 << 4,
1219 OPT_d = 1 << 5,
1220 OPT_f = 1 << 6,
1221 OPT_h = 1 << 7,
1222 OPT_i = 1 << 8,
1223 OPT_j = 1 << 9,
1224 OPT_l = 1 << 10,
1225 OPT_o = 1 << 11,
1226 OPT_t = 1 << 12,
1227 OPT_v = 1 << 13,
1228 OPT_x = 1 << 14,
1229 OPT_s = 1 << 15,
1230 OPT_S = 1 << 16,
1231 OPT_w = 1 << 17,
1232 OPT_traditional = (1 << 18) * ENABLE_GETOPT_LONG,
1233 };
1234#if ENABLE_GETOPT_LONG
1235 static const char od_longopts[] ALIGN1 =
1236 "skip-bytes\0" Required_argument "j"
1237 "address-radix\0" Required_argument "A"
1238 "read-bytes\0" Required_argument "N"
1239 "format\0" Required_argument "t"
1240 "output-duplicates\0" No_argument "v"
1241 "strings\0" Optional_argument "S"
1242 "width\0" Optional_argument "w"
1243 "traditional\0" No_argument "\xff"
1244 ;
1245#endif
1246 char *str_A, *str_N, *str_j, *str_S;
1247 char *str_w = NULL;
1248 llist_t *lst_t = NULL;
1249
1250 spec = NULL;
1251 format_address = format_address_std;
1252 address_base_char = 'o';
1253 address_pad_len_char = '7';
1254 /* flag_dump_strings = 0; - already is */
1255
1256 /* Parse command line */
1257 opt_complementary = "t::"; // list
1258#if ENABLE_GETOPT_LONG
1259 applet_long_options = od_longopts;
1260#endif
1261 opt = getopt32(argv, "A:N:abcdfhij:lot:vxsS:"
1262 "w::", // -w with optional param
1263 // -S was -s and also had optional parameter
1264 // but in coreutils 6.3 it was renamed and now has
1265 // _mandatory_ parameter
1266 &str_A, &str_N, &str_j, &lst_t, &str_S, &str_w);
1267 argc -= optind;
1268 argv += optind;
1269 if (opt & OPT_A) {
1270 static const char doxn[] ALIGN1 = "doxn";
1271 static const char doxn_address_base_char[] ALIGN1 = {
1272 'u', 'o', 'x', /* '?' fourth one is not important */
1273 };
1274 static const uint8_t doxn_address_pad_len_char[] ALIGN1 = {
1275 '7', '7', '6', /* '?' */
1276 };
1277 char *p;
1278 int pos;
1279 p = strchr(doxn, str_A[0]);
1280 if (!p)
1281 bb_error_msg_and_die("bad output address radix "
1282 "'%c' (must be [doxn])", str_A[0]);
1283 pos = p - doxn;
1284 if (pos == 3) format_address = format_address_none;
1285 address_base_char = doxn_address_base_char[pos];
1286 address_pad_len_char = doxn_address_pad_len_char[pos];
1287 }
1288 if (opt & OPT_N) {
1289 limit_bytes_to_format = 1;
1290 max_bytes_to_format = xstrtooff_sfx(str_N, 0, bkm);
1291 }
1292 if (opt & OPT_a) decode_format_string("a");
1293 if (opt & OPT_b) decode_format_string("oC");
1294 if (opt & OPT_c) decode_format_string("c");
1295 if (opt & OPT_d) decode_format_string("u2");
1296 if (opt & OPT_f) decode_format_string("fF");
1297 if (opt & OPT_h) decode_format_string("x2");
1298 if (opt & OPT_i) decode_format_string("d2");
1299 if (opt & OPT_j) n_bytes_to_skip = xstrtooff_sfx(str_j, 0, bkm);
1300 if (opt & OPT_l) decode_format_string("d4");
1301 if (opt & OPT_o) decode_format_string("o2");
1302 //if (opt & OPT_t)...
1303 while (lst_t) {
1304 decode_format_string(lst_t->data);
1305 lst_t = lst_t->link;
1306 }
1307 if (opt & OPT_v) verbose = 1;
1308 if (opt & OPT_x) decode_format_string("x2");
1309 if (opt & OPT_s) decode_format_string("d2");
1310 if (opt & OPT_S) {
1311 string_min = 3;
1312 string_min = xstrtou_sfx(str_S, 0, bkm);
1313 flag_dump_strings = 1;
1314 }
1315 //if (opt & OPT_w)...
1316 //if (opt & OPT_traditional)...
1317
1318 if (flag_dump_strings && n_specs > 0)
1319 bb_error_msg_and_die("no type may be specified when dumping strings");
1320
1321 /* If the --traditional option is used, there may be from
1322 * 0 to 3 remaining command line arguments; handle each case
1323 * separately.
1324 * od [file] [[+]offset[.][b] [[+]label[.][b]]]
1325 * The offset and pseudo_start have the same syntax.
1326 *
1327 * FIXME: POSIX 1003.1-2001 with XSI requires support for the
1328 * traditional syntax even if --traditional is not given. */
1329
1330#if ENABLE_GETOPT_LONG
1331 if (opt & OPT_traditional) {
1332 off_t o1, o2;
1333
1334 if (argc == 1) {
1335 if (parse_old_offset(argv[0], &o1)) {
1336 n_bytes_to_skip = o1;
1337 --argc;
1338 ++argv;
1339 }
1340 } else if (argc == 2) {
1341 if (parse_old_offset(argv[0], &o1)
1342 && parse_old_offset(argv[1], &o2)
1343 ) {
1344 n_bytes_to_skip = o1;
1345 flag_pseudo_start = 1;
1346 pseudo_start = o2;
1347 argv += 2;
1348 argc -= 2;
1349 } else if (parse_old_offset(argv[1], &o2)) {
1350 n_bytes_to_skip = o2;
1351 --argc;
1352 argv[1] = argv[0];
1353 ++argv;
1354 } else {
1355 bb_error_msg_and_die("invalid second operand "
1356 "in compatibility mode '%s'", argv[1]);
1357 }
1358 } else if (argc == 3) {
1359 if (parse_old_offset(argv[1], &o1)
1360 && parse_old_offset(argv[2], &o2)
1361 ) {
1362 n_bytes_to_skip = o1;
1363 flag_pseudo_start = 1;
1364 pseudo_start = o2;
1365 argv[2] = argv[0];
1366 argv += 2;
1367 argc -= 2;
1368 } else {
1369 bb_error_msg_and_die("in compatibility mode "
1370 "the last two arguments must be offsets");
1371 }
1372 } else if (argc > 3) {
1373 bb_error_msg_and_die("compatibility mode supports "
1374 "at most three arguments");
1375 }
1376
1377 if (flag_pseudo_start) {
1378 if (format_address == format_address_none) {
1379 address_base_char = 'o';
1380 address_pad_len_char = '7';
1381 format_address = format_address_paren;
1382 } else
1383 format_address = format_address_label;
1384 }
1385 }
1386#endif
1387
1388 if (limit_bytes_to_format) {
1389 end_offset = n_bytes_to_skip + max_bytes_to_format;
1390 if (end_offset < n_bytes_to_skip)
1391 bb_error_msg_and_die("skip-bytes + read-bytes is too large");
1392 }
1393
1394 if (n_specs == 0) {
1395 decode_format_string("o2");
1396 n_specs = 1;
1397 }
1398
1399 /* If no files were listed on the command line,
1400 set the global pointer FILE_LIST so that it
1401 references the null-terminated list of one name: "-". */
1402 file_list = default_file_list;
1403 if (argc > 0) {
1404 /* Set the global pointer FILE_LIST so that it
1405 references the first file-argument on the command-line. */
1406 file_list = (char const *const *) argv;
1407 }
1408
1409 /* open the first input file */
1410 open_next_file();
1411 /* skip over any unwanted header bytes */
1412 skip(n_bytes_to_skip);
1413 if (!in_stream)
1414 return 1;
1415
1416 pseudo_offset = (flag_pseudo_start ? pseudo_start - n_bytes_to_skip : 0);
1417
1418 /* Compute output block length. */
1419 l_c_m = get_lcm();
1420
1421 if (opt & OPT_w) { /* -w: width */
1422 bytes_per_block = 32;
1423 if (str_w)
1424 bytes_per_block = xatou(str_w);
1425 if (!bytes_per_block || bytes_per_block % l_c_m != 0) {
1426 bb_error_msg("warning: invalid width %zu; using %d instead",
1427 bytes_per_block, l_c_m);
1428 bytes_per_block = l_c_m;
1429 }
1430 } else {
1431 bytes_per_block = l_c_m;
1432 if (l_c_m < DEFAULT_BYTES_PER_BLOCK)
1433 bytes_per_block *= DEFAULT_BYTES_PER_BLOCK / l_c_m;
1434 }
1435
1436#ifdef DEBUG
1437 for (i = 0; i < n_specs; i++) {
1438 printf("%d: fmt=\"%s\" width=%d\n",
1439 i, spec[i].fmt_string, width_bytes[spec[i].size]);
1440 }
1441#endif
1442
1443 if (flag_dump_strings)
1444 dump_strings();
1445 else
1446 dump();
1447
1448 if (fclose(stdin) == EOF)
1449 bb_perror_msg_and_die(bb_msg_standard_input);
1450
1451 return ioerror;
1452}
Note: See TracBrowser for help on using the repository browser.