source: branches/3.2/mindi-busybox/coreutils/wc.c @ 3232

Last change on this file since 3232 was 3232, checked in by bruno, 5 years ago
  • Update mindi-busybox to 1.21.1
File size: 5.9 KB
Line 
1/* vi: set sw=4 ts=4: */
2/*
3 * wc implementation for busybox
4 *
5 * Copyright (C) 2003  Manuel Novoa III  <mjn3@codepoet.org>
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9
10/* BB_AUDIT SUSv3 compliant. */
11/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */
12
13/* Mar 16, 2003      Manuel Novoa III   (mjn3@codepoet.org)
14 *
15 * Rewritten to fix a number of problems and do some size optimizations.
16 * Problems in the previous busybox implementation (besides bloat) included:
17 *  1) broken 'wc -c' optimization (read note below)
18 *  2) broken handling of '-' args
19 *  3) no checking of ferror on EOF returns
20 *  4) isprint() wasn't considered when word counting.
21 *
22 * NOTES:
23 *
24 * The previous busybox wc attempted an optimization using stat for the
25 * case of counting chars only.  I omitted that because it was broken.
26 * It didn't take into account the possibility of input coming from a
27 * pipe, or input from a file with file pointer not at the beginning.
28 *
29 * To implement such a speed optimization correctly, not only do you
30 * need the size, but also the file position.  Note also that the
31 * file position may be past the end of file.  Consider the example
32 * (adapted from example in gnu wc.c)
33 *
34 *      echo hello > /tmp/testfile &&
35 *      (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile
36 *
37 * for which 'wc -c' should output '0'.
38 */
39#include "libbb.h"
40#include "unicode.h"
41
42#if !ENABLE_LOCALE_SUPPORT
43# undef isprint
44# undef isspace
45# define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20))
46# define isspace(c) ((c) == ' ')
47#endif
48
49#if ENABLE_FEATURE_WC_LARGE
50# define COUNT_T unsigned long long
51# define COUNT_FMT "llu"
52#else
53# define COUNT_T unsigned
54# define COUNT_FMT "u"
55#endif
56
57/* We support -m even when UNICODE_SUPPORT is off,
58 * we just don't advertise it in help text,
59 * since it is the same as -c in this case.
60 */
61
62//usage:#define wc_trivial_usage
63//usage:       "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..."
64//usage:
65//usage:#define wc_full_usage "\n\n"
66//usage:       "Count lines, words, and bytes for each FILE (or stdin)\n"
67//usage:     "\n    -c  Count bytes"
68//usage:    IF_UNICODE_SUPPORT(
69//usage:     "\n    -m  Count characters"
70//usage:    )
71//usage:     "\n    -l  Count newlines"
72//usage:     "\n    -w  Count words"
73//usage:     "\n    -L  Print longest line length"
74//usage:
75//usage:#define wc_example_usage
76//usage:       "$ wc /etc/passwd\n"
77//usage:       "     31      46    1365 /etc/passwd\n"
78
79/* Order is important if we want to be compatible with
80 * column order in "wc -cmlwL" output:
81 */
82enum {
83    WC_LINES    = 0, /* -l */
84    WC_WORDS    = 1, /* -w */
85    WC_UNICHARS = 2, /* -m */
86    WC_BYTES    = 3, /* -c */
87    WC_LENGTH   = 4, /* -L */
88    NUM_WCS     = 5,
89};
90
91int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
92int wc_main(int argc UNUSED_PARAM, char **argv)
93{
94    const char *arg;
95    const char *start_fmt = " %9"COUNT_FMT + 1;
96    const char *fname_fmt = " %s\n";
97    COUNT_T *pcounts;
98    COUNT_T counts[NUM_WCS];
99    COUNT_T totals[NUM_WCS];
100    int num_files;
101    smallint status = EXIT_SUCCESS;
102    unsigned print_type;
103
104    init_unicode();
105
106    print_type = getopt32(argv, "lwmcL");
107
108    if (print_type == 0) {
109        print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_BYTES);
110    }
111
112    argv += optind;
113    if (!argv[0]) {
114        *--argv = (char *) bb_msg_standard_input;
115        fname_fmt = "\n";
116    }
117    if (!argv[1]) { /* zero or one filename? */
118        if (!((print_type-1) & print_type)) /* exactly one option? */
119            start_fmt = "%"COUNT_FMT;
120    }
121
122    memset(totals, 0, sizeof(totals));
123
124    pcounts = counts;
125
126    num_files = 0;
127    while ((arg = *argv++) != NULL) {
128        FILE *fp;
129        const char *s;
130        unsigned u;
131        unsigned linepos;
132        smallint in_word;
133
134        ++num_files;
135        fp = fopen_or_warn_stdin(arg);
136        if (!fp) {
137            status = EXIT_FAILURE;
138            continue;
139        }
140
141        memset(counts, 0, sizeof(counts));
142        linepos = 0;
143        in_word = 0;
144
145        while (1) {
146            int c;
147            /* Our -w doesn't match GNU wc exactly... oh well */
148
149            c = getc(fp);
150            if (c == EOF) {
151                if (ferror(fp)) {
152                    bb_simple_perror_msg(arg);
153                    status = EXIT_FAILURE;
154                }
155                goto DO_EOF;  /* Treat an EOF as '\r'. */
156            }
157
158            /* Cater for -c and -m */
159            ++counts[WC_BYTES];
160            if (unicode_status != UNICODE_ON /* every byte is a new char */
161             || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */
162            ) {
163                ++counts[WC_UNICHARS];
164            }
165
166            if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */
167                ++linepos;
168                if (!isspace(c)) {
169                    in_word = 1;
170                    continue;
171                }
172            } else if ((unsigned)(c - 9) <= 4) {
173                /* \t  9
174                 * \n 10
175                 * \v 11
176                 * \f 12
177                 * \r 13
178                 */
179                if (c == '\t') {
180                    linepos = (linepos | 7) + 1;
181                } else {  /* '\n', '\r', '\f', or '\v' */
182 DO_EOF:
183                    if (linepos > counts[WC_LENGTH]) {
184                        counts[WC_LENGTH] = linepos;
185                    }
186                    if (c == '\n') {
187                        ++counts[WC_LINES];
188                    }
189                    if (c != '\v') {
190                        linepos = 0;
191                    }
192                }
193            } else {
194                continue;
195            }
196
197            counts[WC_WORDS] += in_word;
198            in_word = 0;
199            if (c == EOF) {
200                break;
201            }
202        }
203
204        fclose_if_not_stdin(fp);
205
206        if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
207            totals[WC_LENGTH] = counts[WC_LENGTH];
208        }
209        totals[WC_LENGTH] -= counts[WC_LENGTH];
210
211 OUTPUT:
212        /* coreutils wc tries hard to print pretty columns
213         * (saves results for all files, finds max col len etc...)
214         * we won't try that hard, it will bloat us too much */
215        s = start_fmt;
216        u = 0;
217        do {
218            if (print_type & (1 << u)) {
219                printf(s, pcounts[u]);
220                s = " %9"COUNT_FMT; /* Ok... restore the leading space. */
221            }
222            totals[u] += pcounts[u];
223        } while (++u < NUM_WCS);
224        printf(fname_fmt, arg);
225    }
226
227    /* If more than one file was processed, we want the totals.  To save some
228     * space, we set the pcounts ptr to the totals array.  This has the side
229     * effect of trashing the totals array after outputting it, but that's
230     * irrelavent since we no longer need it. */
231    if (num_files > 1) {
232        num_files = 0;  /* Make sure we don't get here again. */
233        arg = "total";
234        pcounts = totals;
235        --argv;
236        goto OUTPUT;
237    }
238
239    fflush_stdout_and_exit(status);
240}
Note: See TracBrowser for help on using the repository browser.