source: MondoRescue/branches/3.3/mindi-busybox/coreutils/tr.c@ 3906

Last change on this file since 3906 was 3621, checked in by Bruno Cornec, 10 years ago

New 3?3 banch for incorporation of latest busybox 1.25. Changing minor version to handle potential incompatibilities.

File size: 9.5 KB
Line 
1/* vi: set sw=4 ts=4: */
2/*
3 * Mini tr implementation for busybox
4 *
5 ** Copyright (c) 1987,1997, Prentice Hall All rights reserved.
6 *
7 * The name of Prentice Hall may not be used to endorse or promote
8 * products derived from this software without specific prior
9 * written permission.
10 *
11 * Copyright (c) Michiel Huisjes
12 *
13 * This version of tr is adapted from Minix tr and was modified
14 * by Erik Andersen <andersen@codepoet.org> to be used in busybox.
15 *
16 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
17 */
18/* http://www.opengroup.org/onlinepubs/009695399/utilities/tr.html
19 * TODO: graph, print
20 */
21
22//kbuild:lib-$(CONFIG_TR) += tr.o
23
24//config:config TR
25//config: bool "tr"
26//config: default y
27//config: help
28//config: tr is used to squeeze, and/or delete characters from standard
29//config: input, writing to standard output.
30//config:
31//config:config FEATURE_TR_CLASSES
32//config: bool "Enable character classes (such as [:upper:])"
33//config: default y
34//config: depends on TR
35//config: help
36//config: Enable character classes, enabling commands such as:
37//config: tr [:upper:] [:lower:] to convert input into lowercase.
38//config:
39//config:config FEATURE_TR_EQUIV
40//config: bool "Enable equivalence classes"
41//config: default y
42//config: depends on TR
43//config: help
44//config: Enable equivalence classes, which essentially add the enclosed
45//config: character to the current set. For instance, tr [=a=] xyz would
46//config: replace all instances of 'a' with 'xyz'. This option is mainly
47//config: useful for cases when no other way of expressing a character
48//config: is possible.
49
50//usage:#define tr_trivial_usage
51//usage: "[-cds] STRING1 [STRING2]"
52//usage:#define tr_full_usage "\n\n"
53//usage: "Translate, squeeze, or delete characters from stdin, writing to stdout\n"
54//usage: "\n -c Take complement of STRING1"
55//usage: "\n -d Delete input characters coded STRING1"
56//usage: "\n -s Squeeze multiple output characters of STRING2 into one character"
57//usage:
58//usage:#define tr_example_usage
59//usage: "$ echo \"gdkkn vnqkc\" | tr [a-y] [b-z]\n"
60//usage: "hello world\n"
61
62#include "libbb.h"
63
64enum {
65 ASCII = 256,
66 /* string buffer needs to be at least as big as the whole "alphabet".
67 * BUFSIZ == ASCII is ok, but we will realloc in expand
68 * even for smallest patterns, let's avoid that by using *2:
69 */
70 TR_BUFSIZ = (BUFSIZ > ASCII*2) ? BUFSIZ : ASCII*2,
71};
72
73static void map(char *pvector,
74 char *string1, unsigned string1_len,
75 char *string2, unsigned string2_len)
76{
77 char last = '0';
78 unsigned i, j;
79
80 for (j = 0, i = 0; i < string1_len; i++) {
81 if (string2_len <= j)
82 pvector[(unsigned char)(string1[i])] = last;
83 else
84 pvector[(unsigned char)(string1[i])] = last = string2[j++];
85 }
86}
87
88/* supported constructs:
89 * Ranges, e.g., 0-9 ==> 0123456789
90 * Escapes, e.g., \a ==> Control-G
91 * Character classes, e.g. [:upper:] ==> A...Z
92 * Equiv classess, e.g. [=A=] ==> A (hmmmmmmm?)
93 * not supported:
94 * [x*N] - repeat char x N times
95 * [x*] - repeat char x until it fills STRING2:
96 * # echo qwe123 | /usr/bin/tr 123456789 '[d]'
97 * qwe[d]
98 * # echo qwe123 | /usr/bin/tr 123456789 '[d*]'
99 * qweddd
100 */
101static unsigned expand(char *arg, char **buffer_p)
102{
103 char *buffer = *buffer_p;
104 unsigned pos = 0;
105 unsigned size = TR_BUFSIZ;
106 unsigned i; /* can't be unsigned char: must be able to hold 256 */
107 unsigned char ac;
108
109 while (*arg) {
110 if (pos + ASCII > size) {
111 size += ASCII;
112 *buffer_p = buffer = xrealloc(buffer, size);
113 }
114 if (*arg == '\\') {
115 const char *z;
116 arg++;
117 z = arg;
118 ac = bb_process_escape_sequence(&z);
119 arg = (char *)z;
120 arg--;
121 *arg = ac;
122 /*
123 * fall through, there may be a range.
124 * If not, current char will be treated anyway.
125 */
126 }
127 if (arg[1] == '-') { /* "0-9..." */
128 ac = arg[2];
129 if (ac == '\0') { /* "0-": copy verbatim */
130 buffer[pos++] = *arg++; /* copy '0' */
131 continue; /* next iter will copy '-' and stop */
132 }
133 i = (unsigned char) *arg;
134 arg += 3; /* skip 0-9 or 0-\ */
135 if (ac == '\\') {
136 const char *z;
137 z = arg;
138 ac = bb_process_escape_sequence(&z);
139 arg = (char *)z;
140 }
141 while (i <= ac) /* ok: i is unsigned _int_ */
142 buffer[pos++] = i++;
143 continue;
144 }
145 if ((ENABLE_FEATURE_TR_CLASSES || ENABLE_FEATURE_TR_EQUIV)
146 && *arg == '['
147 ) {
148 arg++;
149 i = (unsigned char) *arg++;
150 /* "[xyz...". i=x, arg points to y */
151 if (ENABLE_FEATURE_TR_CLASSES && i == ':') { /* [:class:] */
152#define CLO ":]\0"
153 static const char classes[] ALIGN1 =
154 "alpha"CLO "alnum"CLO "digit"CLO
155 "lower"CLO "upper"CLO "space"CLO
156 "blank"CLO "punct"CLO "cntrl"CLO
157 "xdigit"CLO;
158 enum {
159 CLASS_invalid = 0, /* we increment the retval */
160 CLASS_alpha = 1,
161 CLASS_alnum = 2,
162 CLASS_digit = 3,
163 CLASS_lower = 4,
164 CLASS_upper = 5,
165 CLASS_space = 6,
166 CLASS_blank = 7,
167 CLASS_punct = 8,
168 CLASS_cntrl = 9,
169 CLASS_xdigit = 10,
170 //CLASS_graph = 11,
171 //CLASS_print = 12,
172 };
173 smalluint j;
174 char *tmp;
175
176 /* xdigit needs 8, not 7 */
177 i = 7 + (arg[0] == 'x');
178 tmp = xstrndup(arg, i);
179 j = index_in_strings(classes, tmp) + 1;
180 free(tmp);
181
182 if (j == CLASS_invalid)
183 goto skip_bracket;
184
185 arg += i;
186 if (j == CLASS_alnum || j == CLASS_digit || j == CLASS_xdigit) {
187 for (i = '0'; i <= '9'; i++)
188 buffer[pos++] = i;
189 }
190 if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_upper) {
191 for (i = 'A'; i <= 'Z'; i++)
192 buffer[pos++] = i;
193 }
194 if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_lower) {
195 for (i = 'a'; i <= 'z'; i++)
196 buffer[pos++] = i;
197 }
198 if (j == CLASS_space || j == CLASS_blank) {
199 buffer[pos++] = '\t';
200 if (j == CLASS_space) {
201 buffer[pos++] = '\n';
202 buffer[pos++] = '\v';
203 buffer[pos++] = '\f';
204 buffer[pos++] = '\r';
205 }
206 buffer[pos++] = ' ';
207 }
208 if (j == CLASS_punct || j == CLASS_cntrl) {
209 for (i = '\0'; i < ASCII; i++) {
210 if ((j == CLASS_punct && isprint_asciionly(i) && !isalnum(i) && !isspace(i))
211 || (j == CLASS_cntrl && iscntrl(i))
212 ) {
213 buffer[pos++] = i;
214 }
215 }
216 }
217 if (j == CLASS_xdigit) {
218 for (i = 'A'; i <= 'F'; i++) {
219 buffer[pos + 6] = i | 0x20;
220 buffer[pos++] = i;
221 }
222 pos += 6;
223 }
224 continue;
225 }
226 /* "[xyz...", i=x, arg points to y */
227 if (ENABLE_FEATURE_TR_EQUIV && i == '=') { /* [=CHAR=] */
228 buffer[pos++] = *arg; /* copy CHAR */
229 if (!arg[0] || arg[1] != '=' || arg[2] != ']')
230 bb_show_usage();
231 arg += 3; /* skip CHAR=] */
232 continue;
233 }
234 /* The rest of "[xyz..." cases is treated as normal
235 * string, "[" has no special meaning here:
236 * tr "[a-z]" "[A-Z]" can be written as tr "a-z" "A-Z",
237 * also try tr "[a-z]" "_A-Z+" and you'll see that
238 * [] is not special here.
239 */
240 skip_bracket:
241 arg -= 2; /* points to "[" in "[xyz..." */
242 }
243 buffer[pos++] = *arg++;
244 }
245 return pos;
246}
247
248/* NB: buffer is guaranteed to be at least TR_BUFSIZE
249 * (which is >= ASCII) big.
250 */
251static int complement(char *buffer, int buffer_len)
252{
253 int len;
254 char conv[ASCII];
255 unsigned char ch;
256
257 len = 0;
258 ch = '\0';
259 while (1) {
260 if (memchr(buffer, ch, buffer_len) == NULL)
261 conv[len++] = ch;
262 if (++ch == '\0')
263 break;
264 }
265 memcpy(buffer, conv, len);
266 return len;
267}
268
269int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
270int tr_main(int argc UNUSED_PARAM, char **argv)
271{
272 int i;
273 smalluint opts;
274 ssize_t read_chars;
275 size_t in_index, out_index;
276 unsigned last = UCHAR_MAX + 1; /* not equal to any char */
277 unsigned char coded, c;
278 char *str1 = xmalloc(TR_BUFSIZ);
279 char *str2 = xmalloc(TR_BUFSIZ);
280 int str2_length;
281 int str1_length;
282 char *vector = xzalloc(ASCII * 3);
283 char *invec = vector + ASCII;
284 char *outvec = vector + ASCII * 2;
285
286#define TR_OPT_complement (3 << 0)
287#define TR_OPT_delete (1 << 2)
288#define TR_OPT_squeeze_reps (1 << 3)
289
290 for (i = 0; i < ASCII; i++) {
291 vector[i] = i;
292 /*invec[i] = outvec[i] = FALSE; - done by xzalloc */
293 }
294
295 /* -C/-c difference is that -C complements "characters",
296 * and -c complements "values" (binary bytes I guess).
297 * In POSIX locale, these are the same.
298 */
299
300 opt_complementary = "-1";
301 opts = getopt32(argv, "+Ccds"); /* '+': stop at first non-option */
302 argv += optind;
303
304 str1_length = expand(*argv++, &str1);
305 str2_length = 0;
306 if (opts & TR_OPT_complement)
307 str1_length = complement(str1, str1_length);
308 if (*argv) {
309 if (argv[0][0] == '\0')
310 bb_error_msg_and_die("STRING2 cannot be empty");
311 str2_length = expand(*argv, &str2);
312 map(vector, str1, str1_length,
313 str2, str2_length);
314 }
315 for (i = 0; i < str1_length; i++)
316 invec[(unsigned char)(str1[i])] = TRUE;
317 for (i = 0; i < str2_length; i++)
318 outvec[(unsigned char)(str2[i])] = TRUE;
319
320 goto start_from;
321
322 /* In this loop, str1 space is reused as input buffer,
323 * str2 - as output one. */
324 for (;;) {
325 /* If we're out of input, flush output and read more input. */
326 if ((ssize_t)in_index == read_chars) {
327 if (out_index) {
328 xwrite(STDOUT_FILENO, str2, out_index);
329 start_from:
330 out_index = 0;
331 }
332 read_chars = safe_read(STDIN_FILENO, str1, TR_BUFSIZ);
333 if (read_chars <= 0) {
334 if (read_chars < 0)
335 bb_perror_msg_and_die(bb_msg_read_error);
336 break;
337 }
338 in_index = 0;
339 }
340 c = str1[in_index++];
341 if ((opts & TR_OPT_delete) && invec[c])
342 continue;
343 coded = vector[c];
344 if ((opts & TR_OPT_squeeze_reps) && last == coded
345 && (invec[c] || outvec[coded])
346 ) {
347 continue;
348 }
349 str2[out_index++] = last = coded;
350 }
351
352 if (ENABLE_FEATURE_CLEAN_UP) {
353 free(vector);
354 free(str2);
355 free(str1);
356 }
357
358 return EXIT_SUCCESS;
359}
Note: See TracBrowser for help on using the repository browser.