source: MondoRescue/branches/2.2.2/mindi-busybox/editors/awk.c@ 1247

Last change on this file since 1247 was 821, checked in by Bruno Cornec, 18 years ago

Addition of busybox 1.2.1 as a mindi-busybox new package
This should avoid delivering binary files in mindi not built there (Fedora and Debian are quite serious about that)

File size: 57.4 KB
Line 
1/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8 */
9
10#include <stdio.h>
11#include <stdlib.h>
12#include <unistd.h>
13#include <errno.h>
14#include <string.h>
15#include <strings.h>
16#include <time.h>
17#include <math.h>
18#include <ctype.h>
19#include <getopt.h>
20
21#include "xregex.h"
22#include "busybox.h"
23
24
25#define MAXVARFMT 240
26#define MINNVBLOCK 64
27
28/* variable flags */
29#define VF_NUMBER 0x0001 /* 1 = primary type is number */
30#define VF_ARRAY 0x0002 /* 1 = it's an array */
31
32#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
33#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
34#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
35#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
36#define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
37#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
38#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
39
40/* these flags are static, don't change them when value is changed */
41#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
42
43/* Variable */
44typedef struct var_s {
45 unsigned short type; /* flags */
46 double number;
47 char *string;
48 union {
49 int aidx; /* func arg idx (for compilation stage) */
50 struct xhash_s *array; /* array ptr */
51 struct var_s *parent; /* for func args, ptr to actual parameter */
52 char **walker; /* list of array elements (for..in) */
53 } x;
54} var;
55
56/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
57typedef struct chain_s {
58 struct node_s *first;
59 struct node_s *last;
60 char *programname;
61} chain;
62
63/* Function */
64typedef struct func_s {
65 unsigned short nargs;
66 struct chain_s body;
67} func;
68
69/* I/O stream */
70typedef struct rstream_s {
71 FILE *F;
72 char *buffer;
73 int adv;
74 int size;
75 int pos;
76 unsigned short is_pipe;
77} rstream;
78
79typedef struct hash_item_s {
80 union {
81 struct var_s v; /* variable/array hash */
82 struct rstream_s rs; /* redirect streams hash */
83 struct func_s f; /* functions hash */
84 } data;
85 struct hash_item_s *next; /* next in chain */
86 char name[1]; /* really it's longer */
87} hash_item;
88
89typedef struct xhash_s {
90 unsigned int nel; /* num of elements */
91 unsigned int csize; /* current hash size */
92 unsigned int nprime; /* next hash size in PRIMES[] */
93 unsigned int glen; /* summary length of item names */
94 struct hash_item_s **items;
95} xhash;
96
97/* Tree node */
98typedef struct node_s {
99 uint32_t info;
100 unsigned short lineno;
101 union {
102 struct node_s *n;
103 var *v;
104 int i;
105 char *s;
106 regex_t *re;
107 } l;
108 union {
109 struct node_s *n;
110 regex_t *ire;
111 func *f;
112 int argno;
113 } r;
114 union {
115 struct node_s *n;
116 } a;
117} node;
118
119/* Block of temporary variables */
120typedef struct nvblock_s {
121 int size;
122 var *pos;
123 struct nvblock_s *prev;
124 struct nvblock_s *next;
125 var nv[0];
126} nvblock;
127
128typedef struct tsplitter_s {
129 node n;
130 regex_t re[2];
131} tsplitter;
132
133/* simple token classes */
134/* Order and hex values are very important!!! See next_token() */
135#define TC_SEQSTART 1 /* ( */
136#define TC_SEQTERM (1 << 1) /* ) */
137#define TC_REGEXP (1 << 2) /* /.../ */
138#define TC_OUTRDR (1 << 3) /* | > >> */
139#define TC_UOPPOST (1 << 4) /* unary postfix operator */
140#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
141#define TC_BINOPX (1 << 6) /* two-opnd operator */
142#define TC_IN (1 << 7)
143#define TC_COMMA (1 << 8)
144#define TC_PIPE (1 << 9) /* input redirection pipe */
145#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
146#define TC_ARRTERM (1 << 11) /* ] */
147#define TC_GRPSTART (1 << 12) /* { */
148#define TC_GRPTERM (1 << 13) /* } */
149#define TC_SEMICOL (1 << 14)
150#define TC_NEWLINE (1 << 15)
151#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
152#define TC_WHILE (1 << 17)
153#define TC_ELSE (1 << 18)
154#define TC_BUILTIN (1 << 19)
155#define TC_GETLINE (1 << 20)
156#define TC_FUNCDECL (1 << 21) /* `function' `func' */
157#define TC_BEGIN (1 << 22)
158#define TC_END (1 << 23)
159#define TC_EOF (1 << 24)
160#define TC_VARIABLE (1 << 25)
161#define TC_ARRAY (1 << 26)
162#define TC_FUNCTION (1 << 27)
163#define TC_STRING (1 << 28)
164#define TC_NUMBER (1 << 29)
165
166#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
167
168/* combined token classes */
169#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
170#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
171#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
172 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
173
174#define TC_STATEMNT (TC_STATX | TC_WHILE)
175#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
176
177/* word tokens, cannot mean something else if not expected */
178#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
179 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
180
181/* discard newlines after these */
182#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
183 TC_BINOP | TC_OPTERM)
184
185/* what can expression begin with */
186#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
187/* what can group begin with */
188#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
189
190/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
191/* operator is inserted between them */
192#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
193 TC_STRING | TC_NUMBER | TC_UOPPOST)
194#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
195
196#define OF_RES1 0x010000
197#define OF_RES2 0x020000
198#define OF_STR1 0x040000
199#define OF_STR2 0x080000
200#define OF_NUM1 0x100000
201#define OF_CHECKED 0x200000
202
203/* combined operator flags */
204#define xx 0
205#define xV OF_RES2
206#define xS (OF_RES2 | OF_STR2)
207#define Vx OF_RES1
208#define VV (OF_RES1 | OF_RES2)
209#define Nx (OF_RES1 | OF_NUM1)
210#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
211#define Sx (OF_RES1 | OF_STR1)
212#define SV (OF_RES1 | OF_STR1 | OF_RES2)
213#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
214
215#define OPCLSMASK 0xFF00
216#define OPNMASK 0x007F
217
218/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
219 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
220 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
221 */
222#define P(x) (x << 24)
223#define PRIMASK 0x7F000000
224#define PRIMASK2 0x7E000000
225
226/* Operation classes */
227
228#define SHIFT_TIL_THIS 0x0600
229#define RECUR_FROM_THIS 0x1000
230
231enum {
232 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
233 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
234
235 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
236 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
237 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
238
239 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
240 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
241 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
242 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
243 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
244 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
245 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
246 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
247 OC_DONE=0x2800,
248
249 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
250 ST_WHILE=0x3300
251};
252
253/* simple builtins */
254enum {
255 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
256 F_ti, F_le, F_sy, F_ff, F_cl
257};
258
259/* builtins */
260enum {
261 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
262 B_ge, B_gs, B_su
263};
264
265/* tokens and their corresponding info values */
266
267#define NTC "\377" /* switch to next token class (tc<<1) */
268#define NTCC '\377'
269
270#define OC_B OC_BUILTIN
271
272static char * const tokenlist =
273 "\1(" NTC
274 "\1)" NTC
275 "\1/" NTC /* REGEXP */
276 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
277 "\2++" "\2--" NTC /* UOPPOST */
278 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
279 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
280 "\2*=" "\2/=" "\2%=" "\2^="
281 "\1+" "\1-" "\3**=" "\2**"
282 "\1/" "\1%" "\1^" "\1*"
283 "\2!=" "\2>=" "\2<=" "\1>"
284 "\1<" "\2!~" "\1~" "\2&&"
285 "\2||" "\1?" "\1:" NTC
286 "\2in" NTC
287 "\1," NTC
288 "\1|" NTC
289 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
290 "\1]" NTC
291 "\1{" NTC
292 "\1}" NTC
293 "\1;" NTC
294 "\1\n" NTC
295 "\2if" "\2do" "\3for" "\5break" /* STATX */
296 "\10continue" "\6delete" "\5print"
297 "\6printf" "\4next" "\10nextfile"
298 "\6return" "\4exit" NTC
299 "\5while" NTC
300 "\4else" NTC
301
302 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
303 "\3cos" "\3exp" "\3int" "\3log"
304 "\4rand" "\3sin" "\4sqrt" "\5srand"
305 "\6gensub" "\4gsub" "\5index" "\6length"
306 "\5match" "\5split" "\7sprintf" "\3sub"
307 "\6substr" "\7systime" "\10strftime"
308 "\7tolower" "\7toupper" NTC
309 "\7getline" NTC
310 "\4func" "\10function" NTC
311 "\5BEGIN" NTC
312 "\3END" "\0"
313 ;
314
315static const uint32_t tokeninfo[] = {
316
317 0,
318 0,
319 OC_REGEXP,
320 xS|'a', xS|'w', xS|'|',
321 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
322 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
323 OC_FIELD|xV|P(5),
324 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
325 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
326 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
327 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
328 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
329 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
330 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
331 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
332 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
333 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
334 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
335 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
336 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
337 OC_COLON|xx|P(67)|':',
338 OC_IN|SV|P(49),
339 OC_COMMA|SS|P(80),
340 OC_PGETLINE|SV|P(37),
341 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
342 OC_UNARY|xV|P(19)|'!',
343 0,
344 0,
345 0,
346 0,
347 0,
348 ST_IF, ST_DO, ST_FOR, OC_BREAK,
349 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
350 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
351 OC_RETURN|Vx, OC_EXIT|Nx,
352 ST_WHILE,
353 0,
354
355 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
356 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
357 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
358 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
359 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
360 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
361 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
362 OC_GETLINE|SV|P(0),
363 0, 0,
364 0,
365 0
366};
367
368/* internal variable names and their initial values */
369/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
370enum {
371 CONVFMT=0, OFMT, FS, OFS,
372 ORS, RS, RT, FILENAME,
373 SUBSEP, ARGIND, ARGC, ARGV,
374 ERRNO, FNR,
375 NR, NF, IGNORECASE,
376 ENVIRON, F0, _intvarcount_
377};
378
379static char * vNames =
380 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
381 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
382 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
383 "ERRNO\0" "FNR\0"
384 "NR\0" "NF\0*" "IGNORECASE\0*"
385 "ENVIRON\0" "$\0*" "\0";
386
387static char * vValues =
388 "%.6g\0" "%.6g\0" " \0" " \0"
389 "\n\0" "\n\0" "\0" "\0"
390 "\034\0"
391 "\377";
392
393/* hash size may grow to these values */
394#define FIRST_PRIME 61;
395static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
396enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
397
398/* globals */
399
400extern char **environ;
401
402static var * V[_intvarcount_];
403static chain beginseq, mainseq, endseq, *seq;
404static int nextrec, nextfile;
405static node *break_ptr, *continue_ptr;
406static rstream *iF;
407static xhash *vhash, *ahash, *fdhash, *fnhash;
408static char *programname;
409static short lineno;
410static int is_f0_split;
411static int nfields;
412static var *Fields;
413static tsplitter fsplitter, rsplitter;
414static nvblock *cb;
415static char *pos;
416static char *buf;
417static int icase;
418static int exiting;
419
420static struct {
421 uint32_t tclass;
422 uint32_t info;
423 char *string;
424 double number;
425 short lineno;
426 int rollback;
427} t;
428
429/* function prototypes */
430static void handle_special(var *);
431static node *parse_expr(uint32_t);
432static void chain_group(void);
433static var *evaluate(node *, var *);
434static rstream *next_input_file(void);
435static int fmt_num(char *, int, const char *, double, int);
436static int awk_exit(int) ATTRIBUTE_NORETURN;
437
438/* ---- error handling ---- */
439
440static const char EMSG_INTERNAL_ERROR[] = "Internal error";
441static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
442static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
443static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
444static const char EMSG_INV_FMT[] = "Invalid format specifier";
445static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
446static const char EMSG_NOT_ARRAY[] = "Not an array";
447static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
448static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
449#ifndef CONFIG_FEATURE_AWK_MATH
450static const char EMSG_NO_MATH[] = "Math support is not compiled in";
451#endif
452
453static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
454static void syntax_error(const char * const message)
455{
456 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
457}
458
459#define runtime_error(x) syntax_error(x)
460
461
462/* ---- hash stuff ---- */
463
464static unsigned int hashidx(const char *name)
465{
466 register unsigned int idx=0;
467
468 while (*name) idx = *name++ + (idx << 6) - idx;
469 return idx;
470}
471
472/* create new hash */
473static xhash *hash_init(void)
474{
475 xhash *newhash;
476
477 newhash = (xhash *)xzalloc(sizeof(xhash));
478 newhash->csize = FIRST_PRIME;
479 newhash->items = (hash_item **)xzalloc(newhash->csize * sizeof(hash_item *));
480
481 return newhash;
482}
483
484/* find item in hash, return ptr to data, NULL if not found */
485static void *hash_search(xhash *hash, const char *name)
486{
487 hash_item *hi;
488
489 hi = hash->items [ hashidx(name) % hash->csize ];
490 while (hi) {
491 if (strcmp(hi->name, name) == 0)
492 return &(hi->data);
493 hi = hi->next;
494 }
495 return NULL;
496}
497
498/* grow hash if it becomes too big */
499static void hash_rebuild(xhash *hash)
500{
501 unsigned int newsize, i, idx;
502 hash_item **newitems, *hi, *thi;
503
504 if (hash->nprime == NPRIMES)
505 return;
506
507 newsize = PRIMES[hash->nprime++];
508 newitems = (hash_item **)xzalloc(newsize * sizeof(hash_item *));
509
510 for (i=0; i<hash->csize; i++) {
511 hi = hash->items[i];
512 while (hi) {
513 thi = hi;
514 hi = thi->next;
515 idx = hashidx(thi->name) % newsize;
516 thi->next = newitems[idx];
517 newitems[idx] = thi;
518 }
519 }
520
521 free(hash->items);
522 hash->csize = newsize;
523 hash->items = newitems;
524}
525
526/* find item in hash, add it if necessary. Return ptr to data */
527static void *hash_find(xhash *hash, const char *name)
528{
529 hash_item *hi;
530 unsigned int idx;
531 int l;
532
533 hi = hash_search(hash, name);
534 if (! hi) {
535 if (++hash->nel / hash->csize > 10)
536 hash_rebuild(hash);
537
538 l = strlen(name) + 1;
539 hi = xzalloc(sizeof(hash_item) + l);
540 memcpy(hi->name, name, l);
541
542 idx = hashidx(name) % hash->csize;
543 hi->next = hash->items[idx];
544 hash->items[idx] = hi;
545 hash->glen += l;
546 }
547 return &(hi->data);
548}
549
550#define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
551#define newvar(name) (var *) hash_find ( vhash , (name) )
552#define newfile(name) (rstream *) hash_find ( fdhash , (name) )
553#define newfunc(name) (func *) hash_find ( fnhash , (name) )
554
555static void hash_remove(xhash *hash, const char *name)
556{
557 hash_item *hi, **phi;
558
559 phi = &(hash->items[ hashidx(name) % hash->csize ]);
560 while (*phi) {
561 hi = *phi;
562 if (strcmp(hi->name, name) == 0) {
563 hash->glen -= (strlen(name) + 1);
564 hash->nel--;
565 *phi = hi->next;
566 free(hi);
567 break;
568 }
569 phi = &(hi->next);
570 }
571}
572
573/* ------ some useful functions ------ */
574
575static void skip_spaces(char **s)
576{
577 register char *p = *s;
578
579 while(*p == ' ' || *p == '\t' ||
580 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
581 p++;
582 }
583 *s = p;
584}
585
586static char *nextword(char **s)
587{
588 register char *p = *s;
589
590 while (*(*s)++) ;
591
592 return p;
593}
594
595static char nextchar(char **s)
596{
597 register char c, *pps;
598
599 c = *((*s)++);
600 pps = *s;
601 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
602 if (c == '\\' && *s == pps) c = *((*s)++);
603 return c;
604}
605
606static inline int isalnum_(int c)
607{
608 return (isalnum(c) || c == '_');
609}
610
611static FILE *afopen(const char *path, const char *mode)
612{
613 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
614}
615
616/* -------- working with variables (set/get/copy/etc) -------- */
617
618static xhash *iamarray(var *v)
619{
620 var *a = v;
621
622 while (a->type & VF_CHILD)
623 a = a->x.parent;
624
625 if (! (a->type & VF_ARRAY)) {
626 a->type |= VF_ARRAY;
627 a->x.array = hash_init();
628 }
629 return a->x.array;
630}
631
632static void clear_array(xhash *array)
633{
634 unsigned int i;
635 hash_item *hi, *thi;
636
637 for (i=0; i<array->csize; i++) {
638 hi = array->items[i];
639 while (hi) {
640 thi = hi;
641 hi = hi->next;
642 free(thi->data.v.string);
643 free(thi);
644 }
645 array->items[i] = NULL;
646 }
647 array->glen = array->nel = 0;
648}
649
650/* clear a variable */
651static var *clrvar(var *v)
652{
653 if (!(v->type & VF_FSTR))
654 free(v->string);
655
656 v->type &= VF_DONTTOUCH;
657 v->type |= VF_DIRTY;
658 v->string = NULL;
659 return v;
660}
661
662/* assign string value to variable */
663static var *setvar_p(var *v, char *value)
664{
665 clrvar(v);
666 v->string = value;
667 handle_special(v);
668
669 return v;
670}
671
672/* same as setvar_p but make a copy of string */
673static var *setvar_s(var *v, const char *value)
674{
675 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
676}
677
678/* same as setvar_s but set USER flag */
679static var *setvar_u(var *v, const char *value)
680{
681 setvar_s(v, value);
682 v->type |= VF_USER;
683 return v;
684}
685
686/* set array element to user string */
687static void setari_u(var *a, int idx, const char *s)
688{
689 register var *v;
690 static char sidx[12];
691
692 sprintf(sidx, "%d", idx);
693 v = findvar(iamarray(a), sidx);
694 setvar_u(v, s);
695}
696
697/* assign numeric value to variable */
698static var *setvar_i(var *v, double value)
699{
700 clrvar(v);
701 v->type |= VF_NUMBER;
702 v->number = value;
703 handle_special(v);
704 return v;
705}
706
707static char *getvar_s(var *v)
708{
709 /* if v is numeric and has no cached string, convert it to string */
710 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
711 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
712 v->string = bb_xstrdup(buf);
713 v->type |= VF_CACHED;
714 }
715 return (v->string == NULL) ? "" : v->string;
716}
717
718static double getvar_i(var *v)
719{
720 char *s;
721
722 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
723 v->number = 0;
724 s = v->string;
725 if (s && *s) {
726 v->number = strtod(s, &s);
727 if (v->type & VF_USER) {
728 skip_spaces(&s);
729 if (*s != '\0')
730 v->type &= ~VF_USER;
731 }
732 } else {
733 v->type &= ~VF_USER;
734 }
735 v->type |= VF_CACHED;
736 }
737 return v->number;
738}
739
740static var *copyvar(var *dest, const var *src)
741{
742 if (dest != src) {
743 clrvar(dest);
744 dest->type |= (src->type & ~VF_DONTTOUCH);
745 dest->number = src->number;
746 if (src->string)
747 dest->string = bb_xstrdup(src->string);
748 }
749 handle_special(dest);
750 return dest;
751}
752
753static var *incvar(var *v)
754{
755 return setvar_i(v, getvar_i(v)+1.);
756}
757
758/* return true if v is number or numeric string */
759static int is_numeric(var *v)
760{
761 getvar_i(v);
762 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
763}
764
765/* return 1 when value of v corresponds to true, 0 otherwise */
766static int istrue(var *v)
767{
768 if (is_numeric(v))
769 return (v->number == 0) ? 0 : 1;
770 else
771 return (v->string && *(v->string)) ? 1 : 0;
772}
773
774/* temporary variables allocator. Last allocated should be first freed */
775static var *nvalloc(int n)
776{
777 nvblock *pb = NULL;
778 var *v, *r;
779 int size;
780
781 while (cb) {
782 pb = cb;
783 if ((cb->pos - cb->nv) + n <= cb->size) break;
784 cb = cb->next;
785 }
786
787 if (! cb) {
788 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
789 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
790 cb->size = size;
791 cb->pos = cb->nv;
792 cb->prev = pb;
793 cb->next = NULL;
794 if (pb) pb->next = cb;
795 }
796
797 v = r = cb->pos;
798 cb->pos += n;
799
800 while (v < cb->pos) {
801 v->type = 0;
802 v->string = NULL;
803 v++;
804 }
805
806 return r;
807}
808
809static void nvfree(var *v)
810{
811 var *p;
812
813 if (v < cb->nv || v >= cb->pos)
814 runtime_error(EMSG_INTERNAL_ERROR);
815
816 for (p=v; p<cb->pos; p++) {
817 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
818 clear_array(iamarray(p));
819 free(p->x.array->items);
820 free(p->x.array);
821 }
822 if (p->type & VF_WALK)
823 free(p->x.walker);
824
825 clrvar(p);
826 }
827
828 cb->pos = v;
829 while (cb->prev && cb->pos == cb->nv) {
830 cb = cb->prev;
831 }
832}
833
834/* ------- awk program text parsing ------- */
835
836/* Parse next token pointed by global pos, place results into global t.
837 * If token isn't expected, give away. Return token class
838 */
839static uint32_t next_token(uint32_t expected)
840{
841 char *p, *pp, *s;
842 char *tl;
843 uint32_t tc;
844 const uint32_t *ti;
845 int l;
846 static int concat_inserted;
847 static uint32_t save_tclass, save_info;
848 static uint32_t ltclass = TC_OPTERM;
849
850 if (t.rollback) {
851
852 t.rollback = FALSE;
853
854 } else if (concat_inserted) {
855
856 concat_inserted = FALSE;
857 t.tclass = save_tclass;
858 t.info = save_info;
859
860 } else {
861
862 p = pos;
863
864 readnext:
865 skip_spaces(&p);
866 lineno = t.lineno;
867 if (*p == '#')
868 while (*p != '\n' && *p != '\0') p++;
869
870 if (*p == '\n')
871 t.lineno++;
872
873 if (*p == '\0') {
874 tc = TC_EOF;
875
876 } else if (*p == '\"') {
877 /* it's a string */
878 t.string = s = ++p;
879 while (*p != '\"') {
880 if (*p == '\0' || *p == '\n')
881 syntax_error(EMSG_UNEXP_EOS);
882 *(s++) = nextchar(&p);
883 }
884 p++;
885 *s = '\0';
886 tc = TC_STRING;
887
888 } else if ((expected & TC_REGEXP) && *p == '/') {
889 /* it's regexp */
890 t.string = s = ++p;
891 while (*p != '/') {
892 if (*p == '\0' || *p == '\n')
893 syntax_error(EMSG_UNEXP_EOS);
894 if ((*s++ = *p++) == '\\') {
895 pp = p;
896 *(s-1) = bb_process_escape_sequence((const char **)&p);
897 if (*pp == '\\') *s++ = '\\';
898 if (p == pp) *s++ = *p++;
899 }
900 }
901 p++;
902 *s = '\0';
903 tc = TC_REGEXP;
904
905 } else if (*p == '.' || isdigit(*p)) {
906 /* it's a number */
907 t.number = strtod(p, &p);
908 if (*p == '.')
909 syntax_error(EMSG_UNEXP_TOKEN);
910 tc = TC_NUMBER;
911
912 } else {
913 /* search for something known */
914 tl = tokenlist;
915 tc = 0x00000001;
916 ti = tokeninfo;
917 while (*tl) {
918 l = *(tl++);
919 if (l == NTCC) {
920 tc <<= 1;
921 continue;
922 }
923 /* if token class is expected, token
924 * matches and it's not a longer word,
925 * then this is what we are looking for
926 */
927 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
928 *tl == *p && strncmp(p, tl, l) == 0 &&
929 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
930 t.info = *ti;
931 p += l;
932 break;
933 }
934 ti++;
935 tl += l;
936 }
937
938 if (! *tl) {
939 /* it's a name (var/array/function),
940 * otherwise it's something wrong
941 */
942 if (! isalnum_(*p))
943 syntax_error(EMSG_UNEXP_TOKEN);
944
945 t.string = --p;
946 while(isalnum_(*(++p))) {
947 *(p-1) = *p;
948 }
949 *(p-1) = '\0';
950 tc = TC_VARIABLE;
951 /* also consume whitespace between functionname and bracket */
952 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
953 if (*p == '(') {
954 tc = TC_FUNCTION;
955 } else {
956 if (*p == '[') {
957 p++;
958 tc = TC_ARRAY;
959 }
960 }
961 }
962 }
963 pos = p;
964
965 /* skipping newlines in some cases */
966 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
967 goto readnext;
968
969 /* insert concatenation operator when needed */
970 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
971 concat_inserted = TRUE;
972 save_tclass = tc;
973 save_info = t.info;
974 tc = TC_BINOP;
975 t.info = OC_CONCAT | SS | P(35);
976 }
977
978 t.tclass = tc;
979 }
980 ltclass = t.tclass;
981
982 /* Are we ready for this? */
983 if (! (ltclass & expected))
984 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
985 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
986
987 return ltclass;
988}
989
990static void rollback_token(void) { t.rollback = TRUE; }
991
992static node *new_node(uint32_t info)
993{
994 register node *n;
995
996 n = (node *)xzalloc(sizeof(node));
997 n->info = info;
998 n->lineno = lineno;
999 return n;
1000}
1001
1002static node *mk_re_node(char *s, node *n, regex_t *re)
1003{
1004 n->info = OC_REGEXP;
1005 n->l.re = re;
1006 n->r.ire = re + 1;
1007 xregcomp(re, s, REG_EXTENDED);
1008 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1009
1010 return n;
1011}
1012
1013static node *condition(void)
1014{
1015 next_token(TC_SEQSTART);
1016 return parse_expr(TC_SEQTERM);
1017}
1018
1019/* parse expression terminated by given argument, return ptr
1020 * to built subtree. Terminator is eaten by parse_expr */
1021static node *parse_expr(uint32_t iexp)
1022{
1023 node sn;
1024 node *cn = &sn;
1025 node *vn, *glptr;
1026 uint32_t tc, xtc;
1027 var *v;
1028
1029 sn.info = PRIMASK;
1030 sn.r.n = glptr = NULL;
1031 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1032
1033 while (! ((tc = next_token(xtc)) & iexp)) {
1034 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1035 /* input redirection (<) attached to glptr node */
1036 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1037 cn->a.n = glptr;
1038 xtc = TC_OPERAND | TC_UOPPRE;
1039 glptr = NULL;
1040
1041 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1042 /* for binary and postfix-unary operators, jump back over
1043 * previous operators with higher priority */
1044 vn = cn;
1045 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1046 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1047 vn = vn->a.n;
1048 if ((t.info & OPCLSMASK) == OC_TERNARY)
1049 t.info += P(6);
1050 cn = vn->a.n->r.n = new_node(t.info);
1051 cn->a.n = vn->a.n;
1052 if (tc & TC_BINOP) {
1053 cn->l.n = vn;
1054 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1055 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1056 /* it's a pipe */
1057 next_token(TC_GETLINE);
1058 /* give maximum priority to this pipe */
1059 cn->info &= ~PRIMASK;
1060 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1061 }
1062 } else {
1063 cn->r.n = vn;
1064 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1065 }
1066 vn->a.n = cn;
1067
1068 } else {
1069 /* for operands and prefix-unary operators, attach them
1070 * to last node */
1071 vn = cn;
1072 cn = vn->r.n = new_node(t.info);
1073 cn->a.n = vn;
1074 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1075 if (tc & (TC_OPERAND | TC_REGEXP)) {
1076 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1077 /* one should be very careful with switch on tclass -
1078 * only simple tclasses should be used! */
1079 switch (tc) {
1080 case TC_VARIABLE:
1081 case TC_ARRAY:
1082 cn->info = OC_VAR;
1083 if ((v = hash_search(ahash, t.string)) != NULL) {
1084 cn->info = OC_FNARG;
1085 cn->l.i = v->x.aidx;
1086 } else {
1087 cn->l.v = newvar(t.string);
1088 }
1089 if (tc & TC_ARRAY) {
1090 cn->info |= xS;
1091 cn->r.n = parse_expr(TC_ARRTERM);
1092 }
1093 break;
1094
1095 case TC_NUMBER:
1096 case TC_STRING:
1097 cn->info = OC_VAR;
1098 v = cn->l.v = xzalloc(sizeof(var));
1099 if (tc & TC_NUMBER)
1100 setvar_i(v, t.number);
1101 else
1102 setvar_s(v, t.string);
1103 break;
1104
1105 case TC_REGEXP:
1106 mk_re_node(t.string, cn,
1107 (regex_t *)xzalloc(sizeof(regex_t)*2));
1108 break;
1109
1110 case TC_FUNCTION:
1111 cn->info = OC_FUNC;
1112 cn->r.f = newfunc(t.string);
1113 cn->l.n = condition();
1114 break;
1115
1116 case TC_SEQSTART:
1117 cn = vn->r.n = parse_expr(TC_SEQTERM);
1118 cn->a.n = vn;
1119 break;
1120
1121 case TC_GETLINE:
1122 glptr = cn;
1123 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1124 break;
1125
1126 case TC_BUILTIN:
1127 cn->l.n = condition();
1128 break;
1129 }
1130 }
1131 }
1132 }
1133 return sn.r.n;
1134}
1135
1136/* add node to chain. Return ptr to alloc'd node */
1137static node *chain_node(uint32_t info)
1138{
1139 register node *n;
1140
1141 if (! seq->first)
1142 seq->first = seq->last = new_node(0);
1143
1144 if (seq->programname != programname) {
1145 seq->programname = programname;
1146 n = chain_node(OC_NEWSOURCE);
1147 n->l.s = bb_xstrdup(programname);
1148 }
1149
1150 n = seq->last;
1151 n->info = info;
1152 seq->last = n->a.n = new_node(OC_DONE);
1153
1154 return n;
1155}
1156
1157static void chain_expr(uint32_t info)
1158{
1159 node *n;
1160
1161 n = chain_node(info);
1162 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1163 if (t.tclass & TC_GRPTERM)
1164 rollback_token();
1165}
1166
1167static node *chain_loop(node *nn)
1168{
1169 node *n, *n2, *save_brk, *save_cont;
1170
1171 save_brk = break_ptr;
1172 save_cont = continue_ptr;
1173
1174 n = chain_node(OC_BR | Vx);
1175 continue_ptr = new_node(OC_EXEC);
1176 break_ptr = new_node(OC_EXEC);
1177 chain_group();
1178 n2 = chain_node(OC_EXEC | Vx);
1179 n2->l.n = nn;
1180 n2->a.n = n;
1181 continue_ptr->a.n = n2;
1182 break_ptr->a.n = n->r.n = seq->last;
1183
1184 continue_ptr = save_cont;
1185 break_ptr = save_brk;
1186
1187 return n;
1188}
1189
1190/* parse group and attach it to chain */
1191static void chain_group(void)
1192{
1193 uint32_t c;
1194 node *n, *n2, *n3;
1195
1196 do {
1197 c = next_token(TC_GRPSEQ);
1198 } while (c & TC_NEWLINE);
1199
1200 if (c & TC_GRPSTART) {
1201 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1202 if (t.tclass & TC_NEWLINE) continue;
1203 rollback_token();
1204 chain_group();
1205 }
1206 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1207 rollback_token();
1208 chain_expr(OC_EXEC | Vx);
1209 } else { /* TC_STATEMNT */
1210 switch (t.info & OPCLSMASK) {
1211 case ST_IF:
1212 n = chain_node(OC_BR | Vx);
1213 n->l.n = condition();
1214 chain_group();
1215 n2 = chain_node(OC_EXEC);
1216 n->r.n = seq->last;
1217 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1218 chain_group();
1219 n2->a.n = seq->last;
1220 } else {
1221 rollback_token();
1222 }
1223 break;
1224
1225 case ST_WHILE:
1226 n2 = condition();
1227 n = chain_loop(NULL);
1228 n->l.n = n2;
1229 break;
1230
1231 case ST_DO:
1232 n2 = chain_node(OC_EXEC);
1233 n = chain_loop(NULL);
1234 n2->a.n = n->a.n;
1235 next_token(TC_WHILE);
1236 n->l.n = condition();
1237 break;
1238
1239 case ST_FOR:
1240 next_token(TC_SEQSTART);
1241 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1242 if (t.tclass & TC_SEQTERM) { /* for-in */
1243 if ((n2->info & OPCLSMASK) != OC_IN)
1244 syntax_error(EMSG_UNEXP_TOKEN);
1245 n = chain_node(OC_WALKINIT | VV);
1246 n->l.n = n2->l.n;
1247 n->r.n = n2->r.n;
1248 n = chain_loop(NULL);
1249 n->info = OC_WALKNEXT | Vx;
1250 n->l.n = n2->l.n;
1251 } else { /* for(;;) */
1252 n = chain_node(OC_EXEC | Vx);
1253 n->l.n = n2;
1254 n2 = parse_expr(TC_SEMICOL);
1255 n3 = parse_expr(TC_SEQTERM);
1256 n = chain_loop(n3);
1257 n->l.n = n2;
1258 if (! n2)
1259 n->info = OC_EXEC;
1260 }
1261 break;
1262
1263 case OC_PRINT:
1264 case OC_PRINTF:
1265 n = chain_node(t.info);
1266 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1267 if (t.tclass & TC_OUTRDR) {
1268 n->info |= t.info;
1269 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1270 }
1271 if (t.tclass & TC_GRPTERM)
1272 rollback_token();
1273 break;
1274
1275 case OC_BREAK:
1276 n = chain_node(OC_EXEC);
1277 n->a.n = break_ptr;
1278 break;
1279
1280 case OC_CONTINUE:
1281 n = chain_node(OC_EXEC);
1282 n->a.n = continue_ptr;
1283 break;
1284
1285 /* delete, next, nextfile, return, exit */
1286 default:
1287 chain_expr(t.info);
1288
1289 }
1290 }
1291}
1292
1293static void parse_program(char *p)
1294{
1295 uint32_t tclass;
1296 node *cn;
1297 func *f;
1298 var *v;
1299
1300 pos = p;
1301 t.lineno = 1;
1302 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1303 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1304
1305 if (tclass & TC_OPTERM)
1306 continue;
1307
1308 seq = &mainseq;
1309 if (tclass & TC_BEGIN) {
1310 seq = &beginseq;
1311 chain_group();
1312
1313 } else if (tclass & TC_END) {
1314 seq = &endseq;
1315 chain_group();
1316
1317 } else if (tclass & TC_FUNCDECL) {
1318 next_token(TC_FUNCTION);
1319 pos++;
1320 f = newfunc(t.string);
1321 f->body.first = NULL;
1322 f->nargs = 0;
1323 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1324 v = findvar(ahash, t.string);
1325 v->x.aidx = (f->nargs)++;
1326
1327 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1328 break;
1329 }
1330 seq = &(f->body);
1331 chain_group();
1332 clear_array(ahash);
1333
1334 } else if (tclass & TC_OPSEQ) {
1335 rollback_token();
1336 cn = chain_node(OC_TEST);
1337 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1338 if (t.tclass & TC_GRPSTART) {
1339 rollback_token();
1340 chain_group();
1341 } else {
1342 chain_node(OC_PRINT);
1343 }
1344 cn->r.n = mainseq.last;
1345
1346 } else /* if (tclass & TC_GRPSTART) */ {
1347 rollback_token();
1348 chain_group();
1349 }
1350 }
1351}
1352
1353
1354/* -------- program execution part -------- */
1355
1356static node *mk_splitter(char *s, tsplitter *spl)
1357{
1358 register regex_t *re, *ire;
1359 node *n;
1360
1361 re = &spl->re[0];
1362 ire = &spl->re[1];
1363 n = &spl->n;
1364 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1365 regfree(re);
1366 regfree(ire);
1367 }
1368 if (strlen(s) > 1) {
1369 mk_re_node(s, n, re);
1370 } else {
1371 n->info = (uint32_t) *s;
1372 }
1373
1374 return n;
1375}
1376
1377/* use node as a regular expression. Supplied with node ptr and regex_t
1378 * storage space. Return ptr to regex (if result points to preg, it should
1379 * be later regfree'd manually
1380 */
1381static regex_t *as_regex(node *op, regex_t *preg)
1382{
1383 var *v;
1384 char *s;
1385
1386 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1387 return icase ? op->r.ire : op->l.re;
1388 } else {
1389 v = nvalloc(1);
1390 s = getvar_s(evaluate(op, v));
1391 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1392 nvfree(v);
1393 return preg;
1394 }
1395}
1396
1397/* gradually increasing buffer */
1398static void qrealloc(char **b, int n, int *size)
1399{
1400 if (! *b || n >= *size)
1401 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1402}
1403
1404/* resize field storage space */
1405static void fsrealloc(int size)
1406{
1407 static int maxfields = 0;
1408 int i;
1409
1410 if (size >= maxfields) {
1411 i = maxfields;
1412 maxfields = size + 16;
1413 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1414 for (; i<maxfields; i++) {
1415 Fields[i].type = VF_SPECIAL;
1416 Fields[i].string = NULL;
1417 }
1418 }
1419
1420 if (size < nfields) {
1421 for (i=size; i<nfields; i++) {
1422 clrvar(Fields+i);
1423 }
1424 }
1425 nfields = size;
1426}
1427
1428static int awk_split(char *s, node *spl, char **slist)
1429{
1430 int l, n=0;
1431 char c[4];
1432 char *s1;
1433 regmatch_t pmatch[2];
1434
1435 /* in worst case, each char would be a separate field */
1436 *slist = s1 = bb_xstrndup(s, strlen(s) * 2 + 3);
1437
1438 c[0] = c[1] = (char)spl->info;
1439 c[2] = c[3] = '\0';
1440 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1441
1442 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1443 while (*s) {
1444 l = strcspn(s, c+2);
1445 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1446 pmatch[0].rm_so <= l) {
1447 l = pmatch[0].rm_so;
1448 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1449 } else {
1450 pmatch[0].rm_eo = l;
1451 if (*(s+l)) pmatch[0].rm_eo++;
1452 }
1453
1454 memcpy(s1, s, l);
1455 *(s1+l) = '\0';
1456 nextword(&s1);
1457 s += pmatch[0].rm_eo;
1458 n++;
1459 }
1460 } else if (c[0] == '\0') { /* null split */
1461 while(*s) {
1462 *(s1++) = *(s++);
1463 *(s1++) = '\0';
1464 n++;
1465 }
1466 } else if (c[0] != ' ') { /* single-character split */
1467 if (icase) {
1468 c[0] = toupper(c[0]);
1469 c[1] = tolower(c[1]);
1470 }
1471 if (*s1) n++;
1472 while ((s1 = strpbrk(s1, c))) {
1473 *(s1++) = '\0';
1474 n++;
1475 }
1476 } else { /* space split */
1477 while (*s) {
1478 while (isspace(*s)) s++;
1479 if (! *s) break;
1480 n++;
1481 while (*s && !isspace(*s))
1482 *(s1++) = *(s++);
1483 *(s1++) = '\0';
1484 }
1485 }
1486 return n;
1487}
1488
1489static void split_f0(void)
1490{
1491 static char *fstrings = NULL;
1492 int i, n;
1493 char *s;
1494
1495 if (is_f0_split)
1496 return;
1497
1498 is_f0_split = TRUE;
1499 free(fstrings);
1500 fsrealloc(0);
1501 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1502 fsrealloc(n);
1503 s = fstrings;
1504 for (i=0; i<n; i++) {
1505 Fields[i].string = nextword(&s);
1506 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1507 }
1508
1509 /* set NF manually to avoid side effects */
1510 clrvar(V[NF]);
1511 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1512 V[NF]->number = nfields;
1513}
1514
1515/* perform additional actions when some internal variables changed */
1516static void handle_special(var *v)
1517{
1518 int n;
1519 char *b, *sep, *s;
1520 int sl, l, len, i, bsize;
1521
1522 if (! (v->type & VF_SPECIAL))
1523 return;
1524
1525 if (v == V[NF]) {
1526 n = (int)getvar_i(v);
1527 fsrealloc(n);
1528
1529 /* recalculate $0 */
1530 sep = getvar_s(V[OFS]);
1531 sl = strlen(sep);
1532 b = NULL;
1533 len = 0;
1534 for (i=0; i<n; i++) {
1535 s = getvar_s(&Fields[i]);
1536 l = strlen(s);
1537 if (b) {
1538 memcpy(b+len, sep, sl);
1539 len += sl;
1540 }
1541 qrealloc(&b, len+l+sl, &bsize);
1542 memcpy(b+len, s, l);
1543 len += l;
1544 }
1545 if (b) b[len] = '\0';
1546 setvar_p(V[F0], b);
1547 is_f0_split = TRUE;
1548
1549 } else if (v == V[F0]) {
1550 is_f0_split = FALSE;
1551
1552 } else if (v == V[FS]) {
1553 mk_splitter(getvar_s(v), &fsplitter);
1554
1555 } else if (v == V[RS]) {
1556 mk_splitter(getvar_s(v), &rsplitter);
1557
1558 } else if (v == V[IGNORECASE]) {
1559 icase = istrue(v);
1560
1561 } else { /* $n */
1562 n = getvar_i(V[NF]);
1563 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1564 /* right here v is invalid. Just to note... */
1565 }
1566}
1567
1568/* step through func/builtin/etc arguments */
1569static node *nextarg(node **pn)
1570{
1571 node *n;
1572
1573 n = *pn;
1574 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1575 *pn = n->r.n;
1576 n = n->l.n;
1577 } else {
1578 *pn = NULL;
1579 }
1580 return n;
1581}
1582
1583static void hashwalk_init(var *v, xhash *array)
1584{
1585 char **w;
1586 hash_item *hi;
1587 int i;
1588
1589 if (v->type & VF_WALK)
1590 free(v->x.walker);
1591
1592 v->type |= VF_WALK;
1593 w = v->x.walker = (char **)xzalloc(2 + 2*sizeof(char *) + array->glen);
1594 *w = *(w+1) = (char *)(w + 2);
1595 for (i=0; i<array->csize; i++) {
1596 hi = array->items[i];
1597 while(hi) {
1598 strcpy(*w, hi->name);
1599 nextword(w);
1600 hi = hi->next;
1601 }
1602 }
1603}
1604
1605static int hashwalk_next(var *v)
1606{
1607 char **w;
1608
1609 w = v->x.walker;
1610 if (*(w+1) == *w)
1611 return FALSE;
1612
1613 setvar_s(v, nextword(w+1));
1614 return TRUE;
1615}
1616
1617/* evaluate node, return 1 when result is true, 0 otherwise */
1618static int ptest(node *pattern)
1619{
1620 static var v;
1621 return istrue(evaluate(pattern, &v));
1622}
1623
1624/* read next record from stream rsm into a variable v */
1625static int awk_getline(rstream *rsm, var *v)
1626{
1627 char *b;
1628 regmatch_t pmatch[2];
1629 int a, p, pp=0, size;
1630 int fd, so, eo, r, rp;
1631 char c, *m, *s;
1632
1633 /* we're using our own buffer since we need access to accumulating
1634 * characters
1635 */
1636 fd = fileno(rsm->F);
1637 m = rsm->buffer;
1638 a = rsm->adv;
1639 p = rsm->pos;
1640 size = rsm->size;
1641 c = (char) rsplitter.n.info;
1642 rp = 0;
1643
1644 if (! m) qrealloc(&m, 256, &size);
1645 do {
1646 b = m + a;
1647 so = eo = p;
1648 r = 1;
1649 if (p > 0) {
1650 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1651 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1652 b, 1, pmatch, 0) == 0) {
1653 so = pmatch[0].rm_so;
1654 eo = pmatch[0].rm_eo;
1655 if (b[eo] != '\0')
1656 break;
1657 }
1658 } else if (c != '\0') {
1659 s = strchr(b+pp, c);
1660 if (! s) s = memchr(b+pp, '\0', p - pp);
1661 if (s) {
1662 so = eo = s-b;
1663 eo++;
1664 break;
1665 }
1666 } else {
1667 while (b[rp] == '\n')
1668 rp++;
1669 s = strstr(b+rp, "\n\n");
1670 if (s) {
1671 so = eo = s-b;
1672 while (b[eo] == '\n') eo++;
1673 if (b[eo] != '\0')
1674 break;
1675 }
1676 }
1677 }
1678
1679 if (a > 0) {
1680 memmove(m, (const void *)(m+a), p+1);
1681 b = m;
1682 a = 0;
1683 }
1684
1685 qrealloc(&m, a+p+128, &size);
1686 b = m + a;
1687 pp = p;
1688 p += safe_read(fd, b+p, size-p-1);
1689 if (p < pp) {
1690 p = 0;
1691 r = 0;
1692 setvar_i(V[ERRNO], errno);
1693 }
1694 b[p] = '\0';
1695
1696 } while (p > pp);
1697
1698 if (p == 0) {
1699 r--;
1700 } else {
1701 c = b[so]; b[so] = '\0';
1702 setvar_s(v, b+rp);
1703 v->type |= VF_USER;
1704 b[so] = c;
1705 c = b[eo]; b[eo] = '\0';
1706 setvar_s(V[RT], b+so);
1707 b[eo] = c;
1708 }
1709
1710 rsm->buffer = m;
1711 rsm->adv = a + eo;
1712 rsm->pos = p - eo;
1713 rsm->size = size;
1714
1715 return r;
1716}
1717
1718static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1719{
1720 int r=0;
1721 char c;
1722 const char *s=format;
1723
1724 if (int_as_int && n == (int)n) {
1725 r = snprintf(b, size, "%d", (int)n);
1726 } else {
1727 do { c = *s; } while (*s && *++s);
1728 if (strchr("diouxX", c)) {
1729 r = snprintf(b, size, format, (int)n);
1730 } else if (strchr("eEfgG", c)) {
1731 r = snprintf(b, size, format, n);
1732 } else {
1733 runtime_error(EMSG_INV_FMT);
1734 }
1735 }
1736 return r;
1737}
1738
1739
1740/* formatted output into an allocated buffer, return ptr to buffer */
1741static char *awk_printf(node *n)
1742{
1743 char *b = NULL;
1744 char *fmt, *s, *s1, *f;
1745 int i, j, incr, bsize;
1746 char c, c1;
1747 var *v, *arg;
1748
1749 v = nvalloc(1);
1750 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1751
1752 i = 0;
1753 while (*f) {
1754 s = f;
1755 while (*f && (*f != '%' || *(++f) == '%'))
1756 f++;
1757 while (*f && !isalpha(*f))
1758 f++;
1759
1760 incr = (f - s) + MAXVARFMT;
1761 qrealloc(&b, incr+i, &bsize);
1762 c = *f; if (c != '\0') f++;
1763 c1 = *f ; *f = '\0';
1764 arg = evaluate(nextarg(&n), v);
1765
1766 j = i;
1767 if (c == 'c' || !c) {
1768 i += sprintf(b+i, s,
1769 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1770
1771 } else if (c == 's') {
1772 s1 = getvar_s(arg);
1773 qrealloc(&b, incr+i+strlen(s1), &bsize);
1774 i += sprintf(b+i, s, s1);
1775
1776 } else {
1777 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1778 }
1779 *f = c1;
1780
1781 /* if there was an error while sprintf, return value is negative */
1782 if (i < j) i = j;
1783
1784 }
1785
1786 b = xrealloc(b, i+1);
1787 free(fmt);
1788 nvfree(v);
1789 b[i] = '\0';
1790 return b;
1791}
1792
1793/* common substitution routine
1794 * replace (nm) substring of (src) that match (n) with (repl), store
1795 * result into (dest), return number of substitutions. If nm=0, replace
1796 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1797 * subexpression matching (\1-\9)
1798 */
1799static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1800{
1801 char *ds = NULL;
1802 char *sp, *s;
1803 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1804 regmatch_t pmatch[10];
1805 regex_t sreg, *re;
1806
1807 re = as_regex(rn, &sreg);
1808 if (! src) src = V[F0];
1809 if (! dest) dest = V[F0];
1810
1811 i = di = 0;
1812 sp = getvar_s(src);
1813 rl = strlen(repl);
1814 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1815 so = pmatch[0].rm_so;
1816 eo = pmatch[0].rm_eo;
1817
1818 qrealloc(&ds, di + eo + rl, &dssize);
1819 memcpy(ds + di, sp, eo);
1820 di += eo;
1821 if (++i >= nm) {
1822 /* replace */
1823 di -= (eo - so);
1824 nbs = 0;
1825 for (s = repl; *s; s++) {
1826 ds[di++] = c = *s;
1827 if (c == '\\') {
1828 nbs++;
1829 continue;
1830 }
1831 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1832 di -= ((nbs + 3) >> 1);
1833 j = 0;
1834 if (c != '&') {
1835 j = c - '0';
1836 nbs++;
1837 }
1838 if (nbs % 2) {
1839 ds[di++] = c;
1840 } else {
1841 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1842 qrealloc(&ds, di + rl + n, &dssize);
1843 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1844 di += n;
1845 }
1846 }
1847 nbs = 0;
1848 }
1849 }
1850
1851 sp += eo;
1852 if (i == nm) break;
1853 if (eo == so) {
1854 if (! (ds[di++] = *sp++)) break;
1855 }
1856 }
1857
1858 qrealloc(&ds, di + strlen(sp), &dssize);
1859 strcpy(ds + di, sp);
1860 setvar_p(dest, ds);
1861 if (re == &sreg) regfree(re);
1862 return i;
1863}
1864
1865static var *exec_builtin(node *op, var *res)
1866{
1867 int (*to_xxx)(int);
1868 var *tv;
1869 node *an[4];
1870 var *av[4];
1871 char *as[4];
1872 regmatch_t pmatch[2];
1873 regex_t sreg, *re;
1874 static tsplitter tspl;
1875 node *spl;
1876 uint32_t isr, info;
1877 int nargs;
1878 time_t tt;
1879 char *s, *s1;
1880 int i, l, ll, n;
1881
1882 tv = nvalloc(4);
1883 isr = info = op->info;
1884 op = op->l.n;
1885
1886 av[2] = av[3] = NULL;
1887 for (i=0 ; i<4 && op ; i++) {
1888 an[i] = nextarg(&op);
1889 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1890 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1891 isr >>= 1;
1892 }
1893
1894 nargs = i;
1895 if (nargs < (info >> 30))
1896 runtime_error(EMSG_TOO_FEW_ARGS);
1897
1898 switch (info & OPNMASK) {
1899
1900 case B_a2:
1901#ifdef CONFIG_FEATURE_AWK_MATH
1902 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1903#else
1904 runtime_error(EMSG_NO_MATH);
1905#endif
1906 break;
1907
1908 case B_sp:
1909 if (nargs > 2) {
1910 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1911 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1912 } else {
1913 spl = &fsplitter.n;
1914 }
1915
1916 n = awk_split(as[0], spl, &s);
1917 s1 = s;
1918 clear_array(iamarray(av[1]));
1919 for (i=1; i<=n; i++)
1920 setari_u(av[1], i, nextword(&s1));
1921 free(s);
1922 setvar_i(res, n);
1923 break;
1924
1925 case B_ss:
1926 l = strlen(as[0]);
1927 i = getvar_i(av[1]) - 1;
1928 if (i>l) i=l; if (i<0) i=0;
1929 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1930 if (n<0) n=0;
1931 s = xmalloc(n+1);
1932 strncpy(s, as[0]+i, n);
1933 s[n] = '\0';
1934 setvar_p(res, s);
1935 break;
1936
1937 case B_lo:
1938 to_xxx = tolower;
1939 goto lo_cont;
1940
1941 case B_up:
1942 to_xxx = toupper;
1943lo_cont:
1944 s1 = s = bb_xstrdup(as[0]);
1945 while (*s1) {
1946 *s1 = (*to_xxx)(*s1);
1947 s1++;
1948 }
1949 setvar_p(res, s);
1950 break;
1951
1952 case B_ix:
1953 n = 0;
1954 ll = strlen(as[1]);
1955 l = strlen(as[0]) - ll;
1956 if (ll > 0 && l >= 0) {
1957 if (! icase) {
1958 s = strstr(as[0], as[1]);
1959 if (s) n = (s - as[0]) + 1;
1960 } else {
1961 /* this piece of code is terribly slow and
1962 * really should be rewritten
1963 */
1964 for (i=0; i<=l; i++) {
1965 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1966 n = i+1;
1967 break;
1968 }
1969 }
1970 }
1971 }
1972 setvar_i(res, n);
1973 break;
1974
1975 case B_ti:
1976 if (nargs > 1)
1977 tt = getvar_i(av[1]);
1978 else
1979 time(&tt);
1980 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1981 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1982 buf[i] = '\0';
1983 setvar_s(res, buf);
1984 break;
1985
1986 case B_ma:
1987 re = as_regex(an[1], &sreg);
1988 n = regexec(re, as[0], 1, pmatch, 0);
1989 if (n == 0) {
1990 pmatch[0].rm_so++;
1991 pmatch[0].rm_eo++;
1992 } else {
1993 pmatch[0].rm_so = 0;
1994 pmatch[0].rm_eo = -1;
1995 }
1996 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1997 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1998 setvar_i(res, pmatch[0].rm_so);
1999 if (re == &sreg) regfree(re);
2000 break;
2001
2002 case B_ge:
2003 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2004 break;
2005
2006 case B_gs:
2007 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2008 break;
2009
2010 case B_su:
2011 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2012 break;
2013 }
2014
2015 nvfree(tv);
2016 return res;
2017}
2018
2019/*
2020 * Evaluate node - the heart of the program. Supplied with subtree
2021 * and place where to store result. returns ptr to result.
2022 */
2023#define XC(n) ((n) >> 8)
2024
2025static var *evaluate(node *op, var *res)
2026{
2027 /* This procedure is recursive so we should count every byte */
2028 static var *fnargs = NULL;
2029 static unsigned int seed = 1;
2030 static regex_t sreg;
2031 node *op1;
2032 var *v1;
2033 union {
2034 var *v;
2035 char *s;
2036 double d;
2037 int i;
2038 } L, R;
2039 uint32_t opinfo;
2040 short opn;
2041 union {
2042 char *s;
2043 rstream *rsm;
2044 FILE *F;
2045 var *v;
2046 regex_t *re;
2047 uint32_t info;
2048 } X;
2049
2050 if (! op)
2051 return setvar_s(res, NULL);
2052
2053 v1 = nvalloc(2);
2054
2055 while (op) {
2056
2057 opinfo = op->info;
2058 opn = (short)(opinfo & OPNMASK);
2059 lineno = op->lineno;
2060
2061 /* execute inevitable things */
2062 op1 = op->l.n;
2063 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2064 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2065 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2066 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2067 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2068
2069 switch (XC(opinfo & OPCLSMASK)) {
2070
2071 /* -- iterative node type -- */
2072
2073 /* test pattern */
2074 case XC( OC_TEST ):
2075 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2076 /* it's range pattern */
2077 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2078 op->info |= OF_CHECKED;
2079 if (ptest(op1->r.n))
2080 op->info &= ~OF_CHECKED;
2081
2082 op = op->a.n;
2083 } else {
2084 op = op->r.n;
2085 }
2086 } else {
2087 op = (ptest(op1)) ? op->a.n : op->r.n;
2088 }
2089 break;
2090
2091 /* just evaluate an expression, also used as unconditional jump */
2092 case XC( OC_EXEC ):
2093 break;
2094
2095 /* branch, used in if-else and various loops */
2096 case XC( OC_BR ):
2097 op = istrue(L.v) ? op->a.n : op->r.n;
2098 break;
2099
2100 /* initialize for-in loop */
2101 case XC( OC_WALKINIT ):
2102 hashwalk_init(L.v, iamarray(R.v));
2103 break;
2104
2105 /* get next array item */
2106 case XC( OC_WALKNEXT ):
2107 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2108 break;
2109
2110 case XC( OC_PRINT ):
2111 case XC( OC_PRINTF ):
2112 X.F = stdout;
2113 if (op->r.n) {
2114 X.rsm = newfile(R.s);
2115 if (! X.rsm->F) {
2116 if (opn == '|') {
2117 if((X.rsm->F = popen(R.s, "w")) == NULL)
2118 bb_perror_msg_and_die("popen");
2119 X.rsm->is_pipe = 1;
2120 } else {
2121 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2122 }
2123 }
2124 X.F = X.rsm->F;
2125 }
2126
2127 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2128 if (! op1) {
2129 fputs(getvar_s(V[F0]), X.F);
2130 } else {
2131 while (op1) {
2132 L.v = evaluate(nextarg(&op1), v1);
2133 if (L.v->type & VF_NUMBER) {
2134 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2135 getvar_i(L.v), TRUE);
2136 fputs(buf, X.F);
2137 } else {
2138 fputs(getvar_s(L.v), X.F);
2139 }
2140
2141 if (op1) fputs(getvar_s(V[OFS]), X.F);
2142 }
2143 }
2144 fputs(getvar_s(V[ORS]), X.F);
2145
2146 } else { /* OC_PRINTF */
2147 L.s = awk_printf(op1);
2148 fputs(L.s, X.F);
2149 free(L.s);
2150 }
2151 fflush(X.F);
2152 break;
2153
2154 case XC( OC_DELETE ):
2155 X.info = op1->info & OPCLSMASK;
2156 if (X.info == OC_VAR) {
2157 R.v = op1->l.v;
2158 } else if (X.info == OC_FNARG) {
2159 R.v = &fnargs[op1->l.i];
2160 } else {
2161 runtime_error(EMSG_NOT_ARRAY);
2162 }
2163
2164 if (op1->r.n) {
2165 clrvar(L.v);
2166 L.s = getvar_s(evaluate(op1->r.n, v1));
2167 hash_remove(iamarray(R.v), L.s);
2168 } else {
2169 clear_array(iamarray(R.v));
2170 }
2171 break;
2172
2173 case XC( OC_NEWSOURCE ):
2174 programname = op->l.s;
2175 break;
2176
2177 case XC( OC_RETURN ):
2178 copyvar(res, L.v);
2179 break;
2180
2181 case XC( OC_NEXTFILE ):
2182 nextfile = TRUE;
2183 case XC( OC_NEXT ):
2184 nextrec = TRUE;
2185 case XC( OC_DONE ):
2186 clrvar(res);
2187 break;
2188
2189 case XC( OC_EXIT ):
2190 awk_exit(L.d);
2191
2192 /* -- recursive node type -- */
2193
2194 case XC( OC_VAR ):
2195 L.v = op->l.v;
2196 if (L.v == V[NF])
2197 split_f0();
2198 goto v_cont;
2199
2200 case XC( OC_FNARG ):
2201 L.v = &fnargs[op->l.i];
2202
2203v_cont:
2204 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2205 break;
2206
2207 case XC( OC_IN ):
2208 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2209 break;
2210
2211 case XC( OC_REGEXP ):
2212 op1 = op;
2213 L.s = getvar_s(V[F0]);
2214 goto re_cont;
2215
2216 case XC( OC_MATCH ):
2217 op1 = op->r.n;
2218re_cont:
2219 X.re = as_regex(op1, &sreg);
2220 R.i = regexec(X.re, L.s, 0, NULL, 0);
2221 if (X.re == &sreg) regfree(X.re);
2222 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2223 break;
2224
2225 case XC( OC_MOVE ):
2226 /* if source is a temporary string, jusk relink it to dest */
2227 if (R.v == v1+1 && R.v->string) {
2228 res = setvar_p(L.v, R.v->string);
2229 R.v->string = NULL;
2230 } else {
2231 res = copyvar(L.v, R.v);
2232 }
2233 break;
2234
2235 case XC( OC_TERNARY ):
2236 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2237 runtime_error(EMSG_POSSIBLE_ERROR);
2238 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2239 break;
2240
2241 case XC( OC_FUNC ):
2242 if (! op->r.f->body.first)
2243 runtime_error(EMSG_UNDEF_FUNC);
2244
2245 X.v = R.v = nvalloc(op->r.f->nargs+1);
2246 while (op1) {
2247 L.v = evaluate(nextarg(&op1), v1);
2248 copyvar(R.v, L.v);
2249 R.v->type |= VF_CHILD;
2250 R.v->x.parent = L.v;
2251 if (++R.v - X.v >= op->r.f->nargs)
2252 break;
2253 }
2254
2255 R.v = fnargs;
2256 fnargs = X.v;
2257
2258 L.s = programname;
2259 res = evaluate(op->r.f->body.first, res);
2260 programname = L.s;
2261
2262 nvfree(fnargs);
2263 fnargs = R.v;
2264 break;
2265
2266 case XC( OC_GETLINE ):
2267 case XC( OC_PGETLINE ):
2268 if (op1) {
2269 X.rsm = newfile(L.s);
2270 if (! X.rsm->F) {
2271 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2272 X.rsm->F = popen(L.s, "r");
2273 X.rsm->is_pipe = TRUE;
2274 } else {
2275 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
2276 }
2277 }
2278 } else {
2279 if (! iF) iF = next_input_file();
2280 X.rsm = iF;
2281 }
2282
2283 if (! X.rsm->F) {
2284 setvar_i(V[ERRNO], errno);
2285 setvar_i(res, -1);
2286 break;
2287 }
2288
2289 if (! op->r.n)
2290 R.v = V[F0];
2291
2292 L.i = awk_getline(X.rsm, R.v);
2293 if (L.i > 0) {
2294 if (! op1) {
2295 incvar(V[FNR]);
2296 incvar(V[NR]);
2297 }
2298 }
2299 setvar_i(res, L.i);
2300 break;
2301
2302 /* simple builtins */
2303 case XC( OC_FBLTIN ):
2304 switch (opn) {
2305
2306 case F_in:
2307 R.d = (int)L.d;
2308 break;
2309
2310 case F_rn:
2311 R.d = (double)rand() / (double)RAND_MAX;
2312 break;
2313
2314#ifdef CONFIG_FEATURE_AWK_MATH
2315 case F_co:
2316 R.d = cos(L.d);
2317 break;
2318
2319 case F_ex:
2320 R.d = exp(L.d);
2321 break;
2322
2323 case F_lg:
2324 R.d = log(L.d);
2325 break;
2326
2327 case F_si:
2328 R.d = sin(L.d);
2329 break;
2330
2331 case F_sq:
2332 R.d = sqrt(L.d);
2333 break;
2334#else
2335 case F_co:
2336 case F_ex:
2337 case F_lg:
2338 case F_si:
2339 case F_sq:
2340 runtime_error(EMSG_NO_MATH);
2341 break;
2342#endif
2343
2344 case F_sr:
2345 R.d = (double)seed;
2346 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2347 srand(seed);
2348 break;
2349
2350 case F_ti:
2351 R.d = time(NULL);
2352 break;
2353
2354 case F_le:
2355 if (! op1)
2356 L.s = getvar_s(V[F0]);
2357 R.d = strlen(L.s);
2358 break;
2359
2360 case F_sy:
2361 fflush(NULL);
2362 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
2363 break;
2364
2365 case F_ff:
2366 if (! op1)
2367 fflush(stdout);
2368 else {
2369 if (L.s && *L.s) {
2370 X.rsm = newfile(L.s);
2371 fflush(X.rsm->F);
2372 } else {
2373 fflush(NULL);
2374 }
2375 }
2376 break;
2377
2378 case F_cl:
2379 X.rsm = (rstream *)hash_search(fdhash, L.s);
2380 if (X.rsm) {
2381 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2382 free(X.rsm->buffer);
2383 hash_remove(fdhash, L.s);
2384 }
2385 if (R.i != 0)
2386 setvar_i(V[ERRNO], errno);
2387 R.d = (double)R.i;
2388 break;
2389 }
2390 setvar_i(res, R.d);
2391 break;
2392
2393 case XC( OC_BUILTIN ):
2394 res = exec_builtin(op, res);
2395 break;
2396
2397 case XC( OC_SPRINTF ):
2398 setvar_p(res, awk_printf(op1));
2399 break;
2400
2401 case XC( OC_UNARY ):
2402 X.v = R.v;
2403 L.d = R.d = getvar_i(R.v);
2404 switch (opn) {
2405 case 'P':
2406 L.d = ++R.d;
2407 goto r_op_change;
2408 case 'p':
2409 R.d++;
2410 goto r_op_change;
2411 case 'M':
2412 L.d = --R.d;
2413 goto r_op_change;
2414 case 'm':
2415 R.d--;
2416 goto r_op_change;
2417 case '!':
2418 L.d = istrue(X.v) ? 0 : 1;
2419 break;
2420 case '-':
2421 L.d = -R.d;
2422 break;
2423 r_op_change:
2424 setvar_i(X.v, R.d);
2425 }
2426 setvar_i(res, L.d);
2427 break;
2428
2429 case XC( OC_FIELD ):
2430 R.i = (int)getvar_i(R.v);
2431 if (R.i == 0) {
2432 res = V[F0];
2433 } else {
2434 split_f0();
2435 if (R.i > nfields)
2436 fsrealloc(R.i);
2437
2438 res = &Fields[R.i-1];
2439 }
2440 break;
2441
2442 /* concatenation (" ") and index joining (",") */
2443 case XC( OC_CONCAT ):
2444 case XC( OC_COMMA ):
2445 opn = strlen(L.s) + strlen(R.s) + 2;
2446 X.s = (char *)xmalloc(opn);
2447 strcpy(X.s, L.s);
2448 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2449 L.s = getvar_s(V[SUBSEP]);
2450 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2451 strcat(X.s, L.s);
2452 }
2453 strcat(X.s, R.s);
2454 setvar_p(res, X.s);
2455 break;
2456
2457 case XC( OC_LAND ):
2458 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2459 break;
2460
2461 case XC( OC_LOR ):
2462 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2463 break;
2464
2465 case XC( OC_BINARY ):
2466 case XC( OC_REPLACE ):
2467 R.d = getvar_i(R.v);
2468 switch (opn) {
2469 case '+':
2470 L.d += R.d;
2471 break;
2472 case '-':
2473 L.d -= R.d;
2474 break;
2475 case '*':
2476 L.d *= R.d;
2477 break;
2478 case '/':
2479 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2480 L.d /= R.d;
2481 break;
2482 case '&':
2483#ifdef CONFIG_FEATURE_AWK_MATH
2484 L.d = pow(L.d, R.d);
2485#else
2486 runtime_error(EMSG_NO_MATH);
2487#endif
2488 break;
2489 case '%':
2490 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2491 L.d -= (int)(L.d / R.d) * R.d;
2492 break;
2493 }
2494 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2495 break;
2496
2497 case XC( OC_COMPARE ):
2498 if (is_numeric(L.v) && is_numeric(R.v)) {
2499 L.d = getvar_i(L.v) - getvar_i(R.v);
2500 } else {
2501 L.s = getvar_s(L.v);
2502 R.s = getvar_s(R.v);
2503 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2504 }
2505 switch (opn & 0xfe) {
2506 case 0:
2507 R.i = (L.d > 0);
2508 break;
2509 case 2:
2510 R.i = (L.d >= 0);
2511 break;
2512 case 4:
2513 R.i = (L.d == 0);
2514 break;
2515 }
2516 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2517 break;
2518
2519 default:
2520 runtime_error(EMSG_POSSIBLE_ERROR);
2521 }
2522 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2523 op = op->a.n;
2524 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2525 break;
2526 if (nextrec)
2527 break;
2528 }
2529 nvfree(v1);
2530 return res;
2531}
2532
2533
2534/* -------- main & co. -------- */
2535
2536static int awk_exit(int r)
2537{
2538 unsigned int i;
2539 hash_item *hi;
2540 static var tv;
2541
2542 if (! exiting) {
2543 exiting = TRUE;
2544 nextrec = FALSE;
2545 evaluate(endseq.first, &tv);
2546 }
2547
2548 /* waiting for children */
2549 for (i=0; i<fdhash->csize; i++) {
2550 hi = fdhash->items[i];
2551 while(hi) {
2552 if (hi->data.rs.F && hi->data.rs.is_pipe)
2553 pclose(hi->data.rs.F);
2554 hi = hi->next;
2555 }
2556 }
2557
2558 exit(r);
2559}
2560
2561/* if expr looks like "var=value", perform assignment and return 1,
2562 * otherwise return 0 */
2563static int is_assignment(const char *expr)
2564{
2565 char *exprc, *s, *s0, *s1;
2566
2567 exprc = bb_xstrdup(expr);
2568 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2569 free(exprc);
2570 return FALSE;
2571 }
2572
2573 *(s++) = '\0';
2574 s0 = s1 = s;
2575 while (*s)
2576 *(s1++) = nextchar(&s);
2577
2578 *s1 = '\0';
2579 setvar_u(newvar(exprc), s0);
2580 free(exprc);
2581 return TRUE;
2582}
2583
2584/* switch to next input file */
2585static rstream *next_input_file(void)
2586{
2587 static rstream rsm;
2588 FILE *F = NULL;
2589 char *fname, *ind;
2590 static int files_happen = FALSE;
2591
2592 if (rsm.F) fclose(rsm.F);
2593 rsm.F = NULL;
2594 rsm.pos = rsm.adv = 0;
2595
2596 do {
2597 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2598 if (files_happen)
2599 return NULL;
2600 fname = "-";
2601 F = stdin;
2602 } else {
2603 ind = getvar_s(incvar(V[ARGIND]));
2604 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2605 if (fname && *fname && !is_assignment(fname))
2606 F = afopen(fname, "r");
2607 }
2608 } while (!F);
2609
2610 files_happen = TRUE;
2611 setvar_s(V[FILENAME], fname);
2612 rsm.F = F;
2613 return &rsm;
2614}
2615
2616int awk_main(int argc, char **argv)
2617{
2618 char *s, *s1;
2619 int i, j, c, flen;
2620 var *v;
2621 static var tv;
2622 char **envp;
2623 static int from_file = FALSE;
2624 rstream *rsm;
2625 FILE *F, *stdfiles[3];
2626 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2627
2628 /* allocate global buffer */
2629 buf = xmalloc(MAXVARFMT+1);
2630
2631 vhash = hash_init();
2632 ahash = hash_init();
2633 fdhash = hash_init();
2634 fnhash = hash_init();
2635
2636 /* initialize variables */
2637 for (i=0; *vNames; i++) {
2638 V[i] = v = newvar(nextword(&vNames));
2639 if (*vValues != '\377')
2640 setvar_s(v, nextword(&vValues));
2641 else
2642 setvar_i(v, 0);
2643
2644 if (*vNames == '*') {
2645 v->type |= VF_SPECIAL;
2646 vNames++;
2647 }
2648 }
2649
2650 handle_special(V[FS]);
2651 handle_special(V[RS]);
2652
2653 stdfiles[0] = stdin;
2654 stdfiles[1] = stdout;
2655 stdfiles[2] = stderr;
2656 for (i=0; i<3; i++) {
2657 rsm = newfile(nextword(&stdnames));
2658 rsm->F = stdfiles[i];
2659 }
2660
2661 for (envp=environ; *envp; envp++) {
2662 s = bb_xstrdup(*envp);
2663 s1 = strchr(s, '=');
2664 if (!s1) {
2665 goto keep_going;
2666 }
2667 *(s1++) = '\0';
2668 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2669keep_going:
2670 free(s);
2671 }
2672
2673 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2674 switch (c) {
2675 case 'F':
2676 setvar_s(V[FS], optarg);
2677 break;
2678 case 'v':
2679 if (! is_assignment(optarg))
2680 bb_show_usage();
2681 break;
2682 case 'f':
2683 from_file = TRUE;
2684 F = afopen(programname = optarg, "r");
2685 s = NULL;
2686 /* one byte is reserved for some trick in next_token */
2687 if (fseek(F, 0, SEEK_END) == 0) {
2688 flen = ftell(F);
2689 s = (char *)xmalloc(flen+4);
2690 fseek(F, 0, SEEK_SET);
2691 i = 1 + fread(s+1, 1, flen, F);
2692 } else {
2693 for (i=j=1; j>0; i+=j) {
2694 s = (char *)xrealloc(s, i+4096);
2695 j = fread(s+i, 1, 4094, F);
2696 }
2697 }
2698 s[i] = '\0';
2699 fclose(F);
2700 parse_program(s+1);
2701 free(s);
2702 break;
2703 case 'W':
2704 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2705 break;
2706
2707 default:
2708 bb_show_usage();
2709 }
2710 }
2711
2712 if (!from_file) {
2713 if (argc == optind)
2714 bb_show_usage();
2715 programname="cmd. line";
2716 parse_program(argv[optind++]);
2717
2718 }
2719
2720 /* fill in ARGV array */
2721 setvar_i(V[ARGC], argc - optind + 1);
2722 setari_u(V[ARGV], 0, "awk");
2723 for(i=optind; i < argc; i++)
2724 setari_u(V[ARGV], i+1-optind, argv[i]);
2725
2726 evaluate(beginseq.first, &tv);
2727 if (! mainseq.first && ! endseq.first)
2728 awk_exit(EXIT_SUCCESS);
2729
2730 /* input file could already be opened in BEGIN block */
2731 if (! iF) iF = next_input_file();
2732
2733 /* passing through input files */
2734 while (iF) {
2735
2736 nextfile = FALSE;
2737 setvar_i(V[FNR], 0);
2738
2739 while ((c = awk_getline(iF, V[F0])) > 0) {
2740
2741 nextrec = FALSE;
2742 incvar(V[NR]);
2743 incvar(V[FNR]);
2744 evaluate(mainseq.first, &tv);
2745
2746 if (nextfile)
2747 break;
2748 }
2749
2750 if (c < 0)
2751 runtime_error(strerror(errno));
2752
2753 iF = next_input_file();
2754
2755 }
2756
2757 awk_exit(EXIT_SUCCESS);
2758
2759 return 0;
2760}
2761
Note: See TracBrowser for help on using the repository browser.