source: branches/2.2.2/mindi-busybox/coreutils/cut.c @ 1247

Last change on this file since 1247 was 821, checked in by Bruno Cornec, 14 years ago

Addition of busybox 1.2.1 as a mindi-busybox new package
This should avoid delivering binary files in mindi not built there (Fedora and Debian are quite serious about that)

File size: 9.0 KB
Line 
1/* vi: set sw=8 ts=8: */
2/*
3 * cut.c - minimalist version of cut
4 *
5 * Copyright (C) 1999,2000,2001 by Lineo, inc.
6 * Written by Mark Whitley <markw@codepoet.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 */
23
24#include <stdio.h>
25#include <stdlib.h>
26#include <unistd.h>
27#include <string.h>
28#include <limits.h>
29#include "busybox.h"
30
31
32/* option vars */
33static const char optstring[] = "b:c:f:d:sn";
34#define OPT_BYTE_FLGS    1
35#define OPT_CHAR_FLGS    2
36#define OPT_FIELDS_FLGS  4
37#define OPT_DELIM_FLGS   8
38#define OPT_SUPRESS_FLGS 16
39static char part; /* (b)yte, (c)har, (f)ields */
40static unsigned int supress_non_delimited_lines;
41static char delim = '\t'; /* delimiter, default is tab */
42
43struct cut_list {
44    int startpos;
45    int endpos;
46};
47
48enum {
49    BOL = 0,
50    EOL = INT_MAX,
51    NON_RANGE = -1
52};
53
54static struct cut_list *cut_lists = NULL; /* growable array holding a series of lists */
55static unsigned int nlists = 0; /* number of elements in above list */
56
57
58static int cmpfunc(const void *a, const void *b)
59{
60    struct cut_list *la = (struct cut_list *)a;
61    struct cut_list *lb = (struct cut_list *)b;
62
63    if (la->startpos > lb->startpos)
64        return 1;
65    if (la->startpos < lb->startpos)
66        return -1;
67    return 0;
68}
69
70
71/*
72 * parse_lists() - parses a list and puts values into startpos and endpos.
73 * valid list formats: N, N-, N-M, -M
74 * more than one list can be separated by commas
75 */
76static void parse_lists(char *lists)
77{
78    char *ltok = NULL;
79    char *ntok = NULL;
80    char *junk;
81    int s = 0, e = 0;
82
83    /* take apart the lists, one by one (they are separated with commas */
84    while ((ltok = strsep(&lists, ",")) != NULL) {
85
86        /* it's actually legal to pass an empty list */
87        if (strlen(ltok) == 0)
88            continue;
89
90        /* get the start pos */
91        ntok = strsep(&ltok, "-");
92        if (ntok == NULL) {
93            fprintf(stderr, "Help ntok is null for starting position! What do I do?\n");
94        } else if (strlen(ntok) == 0) {
95            s = BOL;
96        } else {
97            s = strtoul(ntok, &junk, 10);
98            if(*junk != '\0' || s < 0)
99                bb_error_msg_and_die("invalid byte or field list");
100
101            /* account for the fact that arrays are zero based, while the user
102             * expects the first char on the line to be char # 1 */
103            if (s != 0)
104                s--;
105        }
106
107        /* get the end pos */
108        ntok = strsep(&ltok, "-");
109        if (ntok == NULL) {
110            e = NON_RANGE;
111        } else if (strlen(ntok) == 0) {
112            e = EOL;
113        } else {
114            e = strtoul(ntok, &junk, 10);
115            if(*junk != '\0' || e < 0)
116                bb_error_msg_and_die("invalid byte or field list");
117            /* if the user specified and end position of 0, that means "til the
118             * end of the line */
119            if (e == 0)
120                e = INT_MAX;
121            e--; /* again, arrays are zero based, lines are 1 based */
122            if (e == s)
123                e = NON_RANGE;
124        }
125
126        /* if there's something left to tokenize, the user past an invalid list */
127        if (ltok)
128            bb_error_msg_and_die("invalid byte or field list");
129
130        /* add the new list */
131        cut_lists = xrealloc(cut_lists, sizeof(struct cut_list) * (++nlists));
132        cut_lists[nlists-1].startpos = s;
133        cut_lists[nlists-1].endpos = e;
134    }
135
136    /* make sure we got some cut positions out of all that */
137    if (nlists == 0)
138        bb_error_msg_and_die("missing list of positions");
139
140    /* now that the lists are parsed, we need to sort them to make life easier
141     * on us when it comes time to print the chars / fields / lines */
142    qsort(cut_lists, nlists, sizeof(struct cut_list), cmpfunc);
143
144}
145
146
147static void cut_line_by_chars(const char *line)
148{
149    int c, l;
150    /* set up a list so we can keep track of what's been printed */
151    char *printed = xcalloc(strlen(line), sizeof(char));
152
153    /* print the chars specified in each cut list */
154    for (c = 0; c < nlists; c++) {
155        l = cut_lists[c].startpos;
156        while (l < strlen(line)) {
157            if (!printed[l]) {
158                putchar(line[l]);
159                printed[l] = 'X';
160            }
161            l++;
162            if (cut_lists[c].endpos == NON_RANGE || l > cut_lists[c].endpos)
163                break;
164        }
165    }
166    putchar('\n'); /* cuz we were handed a chomped line */
167    free(printed);
168}
169
170
171static void cut_line_by_fields(char *line)
172{
173    int c, f;
174    int ndelim = -1; /* zero-based / one-based problem */
175    int nfields_printed = 0;
176    char *field = NULL;
177    char d[2] = { delim, 0 };
178    char *printed;
179
180    /* test the easy case first: does this line contain any delimiters? */
181    if (strchr(line, delim) == NULL) {
182        if (!supress_non_delimited_lines)
183            puts(line);
184        return;
185    }
186
187    /* set up a list so we can keep track of what's been printed */
188    printed = xcalloc(strlen(line), sizeof(char));
189
190    /* process each list on this line, for as long as we've got a line to process */
191    for (c = 0; c < nlists && line; c++) {
192        f = cut_lists[c].startpos;
193        do {
194
195            /* find the field we're looking for */
196            while (line && ndelim < f) {
197                field = strsep(&line, d);
198                ndelim++;
199            }
200
201            /* we found it, and it hasn't been printed yet */
202            if (field && ndelim == f && !printed[ndelim]) {
203                /* if this isn't our first time through, we need to print the
204                 * delimiter after the last field that was printed */
205                if (nfields_printed > 0)
206                    putchar(delim);
207                fputs(field, stdout);
208                printed[ndelim] = 'X';
209                nfields_printed++;
210            }
211
212            f++;
213
214            /* keep going as long as we have a line to work with, this is a
215             * list, and we're not at the end of that list */
216        } while (line && cut_lists[c].endpos != NON_RANGE && f <= cut_lists[c].endpos);
217    }
218
219    /* if we printed anything at all, we need to finish it with a newline cuz
220     * we were handed a chomped line */
221    putchar('\n');
222
223    free(printed);
224}
225
226
227static void cut_file_by_lines(const char *line, unsigned int linenum)
228{
229    static int c = 0;
230    static int l = -1;
231
232    /* I can't initialize this above cuz the "initializer isn't
233     * constant" *sigh* */
234    if (l == -1)
235        l = cut_lists[c].startpos;
236
237    /* get out if we have no more lists to process or if the lines are lower
238     * than what we're interested in */
239    if (c >= nlists || linenum < l)
240        return;
241
242    /* if the line we're looking for is lower than the one we were passed, it
243     * means we displayed it already, so move on */
244    while (l < linenum) {
245        l++;
246        /* move on to the next list if we're at the end of this one */
247        if (cut_lists[c].endpos == NON_RANGE || l > cut_lists[c].endpos) {
248            c++;
249            /* get out if there's no more lists to process */
250            if (c >= nlists)
251                return;
252            l = cut_lists[c].startpos;
253            /* get out if the current line is lower than the one we just became
254             * interested in */
255            if (linenum < l)
256                return;
257        }
258    }
259
260    /* If we made it here, it means we've found the line we're looking for, so print it */
261    puts(line);
262}
263
264
265/*
266 * snippy-snip
267 */
268static void cut_file(FILE *file)
269{
270    char *line = NULL;
271    unsigned int linenum = 0; /* keep these zero-based to be consistent */
272
273    /* go through every line in the file */
274    while ((line = bb_get_chomped_line_from_file(file)) != NULL) {
275
276        /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */
277        if ((part & (OPT_CHAR_FLGS | OPT_BYTE_FLGS)))
278            cut_line_by_chars(line);
279
280        /* cut based on fields */
281        else {
282            if (delim == '\n')
283                cut_file_by_lines(line, linenum);
284            else
285                cut_line_by_fields(line);
286        }
287
288        linenum++;
289        free(line);
290    }
291}
292
293
294int cut_main(int argc, char **argv)
295{
296    unsigned long opt;
297    char *sopt, *sdopt;
298
299    bb_opt_complementally = "b--bcf:c--bcf:f--bcf";
300    opt = bb_getopt_ulflags(argc, argv, optstring, &sopt, &sopt, &sopt, &sdopt);
301    part = opt & (OPT_BYTE_FLGS|OPT_CHAR_FLGS|OPT_FIELDS_FLGS);
302    if(part == 0)
303        bb_error_msg_and_die("you must specify a list of bytes, characters, or fields");
304    if(opt & BB_GETOPT_ERROR)
305        bb_error_msg_and_die("only one type of list may be specified");
306    parse_lists(sopt);
307    if((opt & (OPT_DELIM_FLGS))) {
308        if (strlen(sdopt) > 1) {
309            bb_error_msg_and_die("the delimiter must be a single character");
310        }
311        delim = sdopt[0];
312    }
313    supress_non_delimited_lines = opt & OPT_SUPRESS_FLGS;
314
315    /*  non-field (char or byte) cutting has some special handling */
316    if (part != OPT_FIELDS_FLGS) {
317        if (supress_non_delimited_lines) {
318            bb_error_msg_and_die("suppressing non-delimited lines makes sense"
319                    " only when operating on fields");
320        }
321        if (delim != '\t') {
322            bb_error_msg_and_die("a delimiter may be specified only when operating on fields");
323        }
324    }
325
326    /* argv[(optind)..(argc-1)] should be names of file to process. If no
327     * files were specified or '-' was specified, take input from stdin.
328     * Otherwise, we process all the files specified. */
329    if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) {
330        cut_file(stdin);
331    }
332    else {
333        int i;
334        FILE *file;
335        for (i = optind; i < argc; i++) {
336            file = bb_wfopen(argv[i], "r");
337            if(file) {
338                cut_file(file);
339                fclose(file);
340            }
341        }
342    }
343
344    return EXIT_SUCCESS;
345}
Note: See TracBrowser for help on using the repository browser.