source: MondoRescue/branches/3.0/mindi-busybox/editors/awk.c@ 2899

Last change on this file since 2899 was 2725, checked in by Bruno Cornec, 13 years ago
  • Update mindi-busybox to 1.18.3 to avoid problems with the tar command which is now failing on recent versions with busybox 1.7.3
File size: 69.1 KB
Line 
1/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9
10#include "libbb.h"
11#include "xregex.h"
12#include <math.h>
13
14/* This is a NOEXEC applet. Be very careful! */
15
16
17/* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19#define debug_printf_walker(...) do {} while (0)
20#define debug_printf_eval(...) do {} while (0)
21
22#ifndef debug_printf_walker
23# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
24#endif
25#ifndef debug_printf_eval
26# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
27#endif
28
29
30
31#define MAXVARFMT 240
32#define MINNVBLOCK 64
33
34/* variable flags */
35#define VF_NUMBER 0x0001 /* 1 = primary type is number */
36#define VF_ARRAY 0x0002 /* 1 = it's an array */
37
38#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
39#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
40#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
41#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
42#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
43#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
44#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
45
46/* these flags are static, don't change them when value is changed */
47#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
48
49typedef struct walker_list {
50 char *end;
51 char *cur;
52 struct walker_list *prev;
53 char wbuf[1];
54} walker_list;
55
56/* Variable */
57typedef struct var_s {
58 unsigned type; /* flags */
59 double number;
60 char *string;
61 union {
62 int aidx; /* func arg idx (for compilation stage) */
63 struct xhash_s *array; /* array ptr */
64 struct var_s *parent; /* for func args, ptr to actual parameter */
65 walker_list *walker; /* list of array elements (for..in) */
66 } x;
67} var;
68
69/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
70typedef struct chain_s {
71 struct node_s *first;
72 struct node_s *last;
73 const char *programname;
74} chain;
75
76/* Function */
77typedef struct func_s {
78 unsigned nargs;
79 struct chain_s body;
80} func;
81
82/* I/O stream */
83typedef struct rstream_s {
84 FILE *F;
85 char *buffer;
86 int adv;
87 int size;
88 int pos;
89 smallint is_pipe;
90} rstream;
91
92typedef struct hash_item_s {
93 union {
94 struct var_s v; /* variable/array hash */
95 struct rstream_s rs; /* redirect streams hash */
96 struct func_s f; /* functions hash */
97 } data;
98 struct hash_item_s *next; /* next in chain */
99 char name[1]; /* really it's longer */
100} hash_item;
101
102typedef struct xhash_s {
103 unsigned nel; /* num of elements */
104 unsigned csize; /* current hash size */
105 unsigned nprime; /* next hash size in PRIMES[] */
106 unsigned glen; /* summary length of item names */
107 struct hash_item_s **items;
108} xhash;
109
110/* Tree node */
111typedef struct node_s {
112 uint32_t info;
113 unsigned lineno;
114 union {
115 struct node_s *n;
116 var *v;
117 int aidx;
118 char *new_progname;
119 regex_t *re;
120 } l;
121 union {
122 struct node_s *n;
123 regex_t *ire;
124 func *f;
125 } r;
126 union {
127 struct node_s *n;
128 } a;
129} node;
130
131/* Block of temporary variables */
132typedef struct nvblock_s {
133 int size;
134 var *pos;
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
137 var nv[];
138} nvblock;
139
140typedef struct tsplitter_s {
141 node n;
142 regex_t re[2];
143} tsplitter;
144
145/* simple token classes */
146/* Order and hex values are very important!!! See next_token() */
147#define TC_SEQSTART 1 /* ( */
148#define TC_SEQTERM (1 << 1) /* ) */
149#define TC_REGEXP (1 << 2) /* /.../ */
150#define TC_OUTRDR (1 << 3) /* | > >> */
151#define TC_UOPPOST (1 << 4) /* unary postfix operator */
152#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153#define TC_BINOPX (1 << 6) /* two-opnd operator */
154#define TC_IN (1 << 7)
155#define TC_COMMA (1 << 8)
156#define TC_PIPE (1 << 9) /* input redirection pipe */
157#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158#define TC_ARRTERM (1 << 11) /* ] */
159#define TC_GRPSTART (1 << 12) /* { */
160#define TC_GRPTERM (1 << 13) /* } */
161#define TC_SEMICOL (1 << 14)
162#define TC_NEWLINE (1 << 15)
163#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164#define TC_WHILE (1 << 17)
165#define TC_ELSE (1 << 18)
166#define TC_BUILTIN (1 << 19)
167#define TC_GETLINE (1 << 20)
168#define TC_FUNCDECL (1 << 21) /* `function' `func' */
169#define TC_BEGIN (1 << 22)
170#define TC_END (1 << 23)
171#define TC_EOF (1 << 24)
172#define TC_VARIABLE (1 << 25)
173#define TC_ARRAY (1 << 26)
174#define TC_FUNCTION (1 << 27)
175#define TC_STRING (1 << 28)
176#define TC_NUMBER (1 << 29)
177
178#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
179
180/* combined token classes */
181#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
184 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186#define TC_STATEMNT (TC_STATX | TC_WHILE)
187#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
188
189/* word tokens, cannot mean something else if not expected */
190#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
191 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193/* discard newlines after these */
194#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
195 | TC_BINOP | TC_OPTERM)
196
197/* what can expression begin with */
198#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199/* what can group begin with */
200#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203/* operator is inserted between them */
204#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
205 | TC_STRING | TC_NUMBER | TC_UOPPOST)
206#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
207
208#define OF_RES1 0x010000
209#define OF_RES2 0x020000
210#define OF_STR1 0x040000
211#define OF_STR2 0x080000
212#define OF_NUM1 0x100000
213#define OF_CHECKED 0x200000
214
215/* combined operator flags */
216#define xx 0
217#define xV OF_RES2
218#define xS (OF_RES2 | OF_STR2)
219#define Vx OF_RES1
220#define VV (OF_RES1 | OF_RES2)
221#define Nx (OF_RES1 | OF_NUM1)
222#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223#define Sx (OF_RES1 | OF_STR1)
224#define SV (OF_RES1 | OF_STR1 | OF_RES2)
225#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227#define OPCLSMASK 0xFF00
228#define OPNMASK 0x007F
229
230/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233 */
234#define P(x) (x << 24)
235#define PRIMASK 0x7F000000
236#define PRIMASK2 0x7E000000
237
238/* Operation classes */
239
240#define SHIFT_TIL_THIS 0x0600
241#define RECUR_FROM_THIS 0x1000
242
243enum {
244 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
245 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
246
247 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
248 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
249 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
250
251 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
252 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
253 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
254 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
255 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
256 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
257 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
258 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
259 OC_DONE = 0x2800,
260
261 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
262 ST_WHILE = 0x3300
263};
264
265/* simple builtins */
266enum {
267 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
269};
270
271/* builtins */
272enum {
273 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
274 B_ge, B_gs, B_su,
275 B_an, B_co, B_ls, B_or, B_rs, B_xo,
276};
277
278/* tokens and their corresponding info values */
279
280#define NTC "\377" /* switch to next token class (tc<<1) */
281#define NTCC '\377'
282
283#define OC_B OC_BUILTIN
284
285static const char tokenlist[] ALIGN1 =
286 "\1(" NTC
287 "\1)" NTC
288 "\1/" NTC /* REGEXP */
289 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
290 "\2++" "\2--" NTC /* UOPPOST */
291 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
292 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
293 "\2*=" "\2/=" "\2%=" "\2^="
294 "\1+" "\1-" "\3**=" "\2**"
295 "\1/" "\1%" "\1^" "\1*"
296 "\2!=" "\2>=" "\2<=" "\1>"
297 "\1<" "\2!~" "\1~" "\2&&"
298 "\2||" "\1?" "\1:" NTC
299 "\2in" NTC
300 "\1," NTC
301 "\1|" NTC
302 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
303 "\1]" NTC
304 "\1{" NTC
305 "\1}" NTC
306 "\1;" NTC
307 "\1\n" NTC
308 "\2if" "\2do" "\3for" "\5break" /* STATX */
309 "\10continue" "\6delete" "\5print"
310 "\6printf" "\4next" "\10nextfile"
311 "\6return" "\4exit" NTC
312 "\5while" NTC
313 "\4else" NTC
314
315 "\3and" "\5compl" "\6lshift" "\2or"
316 "\6rshift" "\3xor"
317 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
318 "\3cos" "\3exp" "\3int" "\3log"
319 "\4rand" "\3sin" "\4sqrt" "\5srand"
320 "\6gensub" "\4gsub" "\5index" "\6length"
321 "\5match" "\5split" "\7sprintf" "\3sub"
322 "\6substr" "\7systime" "\10strftime" "\6mktime"
323 "\7tolower" "\7toupper" NTC
324 "\7getline" NTC
325 "\4func" "\10function" NTC
326 "\5BEGIN" NTC
327 "\3END"
328 /* compiler adds trailing "\0" */
329 ;
330
331static const uint32_t tokeninfo[] = {
332 0,
333 0,
334 OC_REGEXP,
335 xS|'a', xS|'w', xS|'|',
336 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
337 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
338 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
339 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
341 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
342 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
343 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
344 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
345 OC_IN|SV|P(49), /* in */
346 OC_COMMA|SS|P(80),
347 OC_PGETLINE|SV|P(37),
348 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
349 0, /* ] */
350 0,
351 0,
352 0,
353 0, /* \n */
354 ST_IF, ST_DO, ST_FOR, OC_BREAK,
355 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
356 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
357 OC_RETURN|Vx, OC_EXIT|Nx,
358 ST_WHILE,
359 0, /* else */
360
361 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
362 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
363 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
364 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
365 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
366 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
367 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
368 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
369 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
370 OC_GETLINE|SV|P(0),
371 0, 0,
372 0,
373 0 /* END */
374};
375
376/* internal variable names and their initial values */
377/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
378enum {
379 CONVFMT, OFMT, FS, OFS,
380 ORS, RS, RT, FILENAME,
381 SUBSEP, F0, ARGIND, ARGC,
382 ARGV, ERRNO, FNR, NR,
383 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
384};
385
386static const char vNames[] ALIGN1 =
387 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
388 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
389 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
390 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
391 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
392
393static const char vValues[] ALIGN1 =
394 "%.6g\0" "%.6g\0" " \0" " \0"
395 "\n\0" "\n\0" "\0" "\0"
396 "\034\0" "\0" "\377";
397
398/* hash size may grow to these values */
399#define FIRST_PRIME 61
400static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
401
402
403/* Globals. Split in two parts so that first one is addressed
404 * with (mostly short) negative offsets.
405 * NB: it's unsafe to put members of type "double"
406 * into globals2 (gcc may fail to align them).
407 */
408struct globals {
409 double t_double;
410 chain beginseq, mainseq, endseq;
411 chain *seq;
412 node *break_ptr, *continue_ptr;
413 rstream *iF;
414 xhash *vhash, *ahash, *fdhash, *fnhash;
415 const char *g_progname;
416 int g_lineno;
417 int nfields;
418 int maxfields; /* used in fsrealloc() only */
419 var *Fields;
420 nvblock *g_cb;
421 char *g_pos;
422 char *g_buf;
423 smallint icase;
424 smallint exiting;
425 smallint nextrec;
426 smallint nextfile;
427 smallint is_f0_split;
428};
429struct globals2 {
430 uint32_t t_info; /* often used */
431 uint32_t t_tclass;
432 char *t_string;
433 int t_lineno;
434 int t_rollback;
435
436 var *intvar[NUM_INTERNAL_VARS]; /* often used */
437
438 /* former statics from various functions */
439 char *split_f0__fstrings;
440
441 uint32_t next_token__save_tclass;
442 uint32_t next_token__save_info;
443 uint32_t next_token__ltclass;
444 smallint next_token__concat_inserted;
445
446 smallint next_input_file__files_happen;
447 rstream next_input_file__rsm;
448
449 var *evaluate__fnargs;
450 unsigned evaluate__seed;
451 regex_t evaluate__sreg;
452
453 var ptest__v;
454
455 tsplitter exec_builtin__tspl;
456
457 /* biggest and least used members go last */
458 tsplitter fsplitter, rsplitter;
459};
460#define G1 (ptr_to_globals[-1])
461#define G (*(struct globals2 *)ptr_to_globals)
462/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
463/*char G1size[sizeof(G1)]; - 0x74 */
464/*char Gsize[sizeof(G)]; - 0x1c4 */
465/* Trying to keep most of members accessible with short offsets: */
466/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
467#define t_double (G1.t_double )
468#define beginseq (G1.beginseq )
469#define mainseq (G1.mainseq )
470#define endseq (G1.endseq )
471#define seq (G1.seq )
472#define break_ptr (G1.break_ptr )
473#define continue_ptr (G1.continue_ptr)
474#define iF (G1.iF )
475#define vhash (G1.vhash )
476#define ahash (G1.ahash )
477#define fdhash (G1.fdhash )
478#define fnhash (G1.fnhash )
479#define g_progname (G1.g_progname )
480#define g_lineno (G1.g_lineno )
481#define nfields (G1.nfields )
482#define maxfields (G1.maxfields )
483#define Fields (G1.Fields )
484#define g_cb (G1.g_cb )
485#define g_pos (G1.g_pos )
486#define g_buf (G1.g_buf )
487#define icase (G1.icase )
488#define exiting (G1.exiting )
489#define nextrec (G1.nextrec )
490#define nextfile (G1.nextfile )
491#define is_f0_split (G1.is_f0_split )
492#define t_info (G.t_info )
493#define t_tclass (G.t_tclass )
494#define t_string (G.t_string )
495#define t_lineno (G.t_lineno )
496#define t_rollback (G.t_rollback )
497#define intvar (G.intvar )
498#define fsplitter (G.fsplitter )
499#define rsplitter (G.rsplitter )
500#define INIT_G() do { \
501 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
502 G.next_token__ltclass = TC_OPTERM; \
503 G.evaluate__seed = 1; \
504} while (0)
505
506
507/* function prototypes */
508static void handle_special(var *);
509static node *parse_expr(uint32_t);
510static void chain_group(void);
511static var *evaluate(node *, var *);
512static rstream *next_input_file(void);
513static int fmt_num(char *, int, const char *, double, int);
514static int awk_exit(int) NORETURN;
515
516/* ---- error handling ---- */
517
518static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
519static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
520static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
521static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
522static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
523static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
524static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
525static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
526static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
527static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
528
529static void zero_out_var(var *vp)
530{
531 memset(vp, 0, sizeof(*vp));
532}
533
534static void syntax_error(const char *message) NORETURN;
535static void syntax_error(const char *message)
536{
537 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
538}
539
540/* ---- hash stuff ---- */
541
542static unsigned hashidx(const char *name)
543{
544 unsigned idx = 0;
545
546 while (*name)
547 idx = *name++ + (idx << 6) - idx;
548 return idx;
549}
550
551/* create new hash */
552static xhash *hash_init(void)
553{
554 xhash *newhash;
555
556 newhash = xzalloc(sizeof(*newhash));
557 newhash->csize = FIRST_PRIME;
558 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
559
560 return newhash;
561}
562
563/* find item in hash, return ptr to data, NULL if not found */
564static void *hash_search(xhash *hash, const char *name)
565{
566 hash_item *hi;
567
568 hi = hash->items[hashidx(name) % hash->csize];
569 while (hi) {
570 if (strcmp(hi->name, name) == 0)
571 return &hi->data;
572 hi = hi->next;
573 }
574 return NULL;
575}
576
577/* grow hash if it becomes too big */
578static void hash_rebuild(xhash *hash)
579{
580 unsigned newsize, i, idx;
581 hash_item **newitems, *hi, *thi;
582
583 if (hash->nprime == ARRAY_SIZE(PRIMES))
584 return;
585
586 newsize = PRIMES[hash->nprime++];
587 newitems = xzalloc(newsize * sizeof(newitems[0]));
588
589 for (i = 0; i < hash->csize; i++) {
590 hi = hash->items[i];
591 while (hi) {
592 thi = hi;
593 hi = thi->next;
594 idx = hashidx(thi->name) % newsize;
595 thi->next = newitems[idx];
596 newitems[idx] = thi;
597 }
598 }
599
600 free(hash->items);
601 hash->csize = newsize;
602 hash->items = newitems;
603}
604
605/* find item in hash, add it if necessary. Return ptr to data */
606static void *hash_find(xhash *hash, const char *name)
607{
608 hash_item *hi;
609 unsigned idx;
610 int l;
611
612 hi = hash_search(hash, name);
613 if (!hi) {
614 if (++hash->nel / hash->csize > 10)
615 hash_rebuild(hash);
616
617 l = strlen(name) + 1;
618 hi = xzalloc(sizeof(*hi) + l);
619 strcpy(hi->name, name);
620
621 idx = hashidx(name) % hash->csize;
622 hi->next = hash->items[idx];
623 hash->items[idx] = hi;
624 hash->glen += l;
625 }
626 return &hi->data;
627}
628
629#define findvar(hash, name) ((var*) hash_find((hash), (name)))
630#define newvar(name) ((var*) hash_find(vhash, (name)))
631#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
632#define newfunc(name) ((func*) hash_find(fnhash, (name)))
633
634static void hash_remove(xhash *hash, const char *name)
635{
636 hash_item *hi, **phi;
637
638 phi = &hash->items[hashidx(name) % hash->csize];
639 while (*phi) {
640 hi = *phi;
641 if (strcmp(hi->name, name) == 0) {
642 hash->glen -= (strlen(name) + 1);
643 hash->nel--;
644 *phi = hi->next;
645 free(hi);
646 break;
647 }
648 phi = &hi->next;
649 }
650}
651
652/* ------ some useful functions ------ */
653
654static char *skip_spaces(char *p)
655{
656 while (1) {
657 if (*p == '\\' && p[1] == '\n') {
658 p++;
659 t_lineno++;
660 } else if (*p != ' ' && *p != '\t') {
661 break;
662 }
663 p++;
664 }
665 return p;
666}
667
668/* returns old *s, advances *s past word and terminating NUL */
669static char *nextword(char **s)
670{
671 char *p = *s;
672 while (*(*s)++ != '\0')
673 continue;
674 return p;
675}
676
677static char nextchar(char **s)
678{
679 char c, *pps;
680
681 c = *(*s)++;
682 pps = *s;
683 if (c == '\\')
684 c = bb_process_escape_sequence((const char**)s);
685 if (c == '\\' && *s == pps) { /* unrecognized \z? */
686 c = *(*s); /* yes, fetch z */
687 if (c)
688 (*s)++; /* advance unless z = NUL */
689 }
690 return c;
691}
692
693static ALWAYS_INLINE int isalnum_(int c)
694{
695 return (isalnum(c) || c == '_');
696}
697
698static double my_strtod(char **pp)
699{
700 char *cp = *pp;
701 if (ENABLE_DESKTOP && cp[0] == '0') {
702 /* Might be hex or octal integer: 0x123abc or 07777 */
703 char c = (cp[1] | 0x20);
704 if (c == 'x' || isdigit(cp[1])) {
705 unsigned long long ull = strtoull(cp, pp, 0);
706 if (c == 'x')
707 return ull;
708 c = **pp;
709 if (!isdigit(c) && c != '.')
710 return ull;
711 /* else: it may be a floating number. Examples:
712 * 009.123 (*pp points to '9')
713 * 000.123 (*pp points to '.')
714 * fall through to strtod.
715 */
716 }
717 }
718 return strtod(cp, pp);
719}
720
721/* -------- working with variables (set/get/copy/etc) -------- */
722
723static xhash *iamarray(var *v)
724{
725 var *a = v;
726
727 while (a->type & VF_CHILD)
728 a = a->x.parent;
729
730 if (!(a->type & VF_ARRAY)) {
731 a->type |= VF_ARRAY;
732 a->x.array = hash_init();
733 }
734 return a->x.array;
735}
736
737static void clear_array(xhash *array)
738{
739 unsigned i;
740 hash_item *hi, *thi;
741
742 for (i = 0; i < array->csize; i++) {
743 hi = array->items[i];
744 while (hi) {
745 thi = hi;
746 hi = hi->next;
747 free(thi->data.v.string);
748 free(thi);
749 }
750 array->items[i] = NULL;
751 }
752 array->glen = array->nel = 0;
753}
754
755/* clear a variable */
756static var *clrvar(var *v)
757{
758 if (!(v->type & VF_FSTR))
759 free(v->string);
760
761 v->type &= VF_DONTTOUCH;
762 v->type |= VF_DIRTY;
763 v->string = NULL;
764 return v;
765}
766
767/* assign string value to variable */
768static var *setvar_p(var *v, char *value)
769{
770 clrvar(v);
771 v->string = value;
772 handle_special(v);
773 return v;
774}
775
776/* same as setvar_p but make a copy of string */
777static var *setvar_s(var *v, const char *value)
778{
779 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
780}
781
782/* same as setvar_s but sets USER flag */
783static var *setvar_u(var *v, const char *value)
784{
785 v = setvar_s(v, value);
786 v->type |= VF_USER;
787 return v;
788}
789
790/* set array element to user string */
791static void setari_u(var *a, int idx, const char *s)
792{
793 var *v;
794
795 v = findvar(iamarray(a), itoa(idx));
796 setvar_u(v, s);
797}
798
799/* assign numeric value to variable */
800static var *setvar_i(var *v, double value)
801{
802 clrvar(v);
803 v->type |= VF_NUMBER;
804 v->number = value;
805 handle_special(v);
806 return v;
807}
808
809static const char *getvar_s(var *v)
810{
811 /* if v is numeric and has no cached string, convert it to string */
812 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
813 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
814 v->string = xstrdup(g_buf);
815 v->type |= VF_CACHED;
816 }
817 return (v->string == NULL) ? "" : v->string;
818}
819
820static double getvar_i(var *v)
821{
822 char *s;
823
824 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
825 v->number = 0;
826 s = v->string;
827 if (s && *s) {
828 debug_printf_eval("getvar_i: '%s'->", s);
829 v->number = my_strtod(&s);
830 debug_printf_eval("%f (s:'%s')\n", v->number, s);
831 if (v->type & VF_USER) {
832 s = skip_spaces(s);
833 if (*s != '\0')
834 v->type &= ~VF_USER;
835 }
836 } else {
837 debug_printf_eval("getvar_i: '%s'->zero\n", s);
838 v->type &= ~VF_USER;
839 }
840 v->type |= VF_CACHED;
841 }
842 debug_printf_eval("getvar_i: %f\n", v->number);
843 return v->number;
844}
845
846/* Used for operands of bitwise ops */
847static unsigned long getvar_i_int(var *v)
848{
849 double d = getvar_i(v);
850
851 /* Casting doubles to longs is undefined for values outside
852 * of target type range. Try to widen it as much as possible */
853 if (d >= 0)
854 return (unsigned long)d;
855 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
856 return - (long) (unsigned long) (-d);
857}
858
859static var *copyvar(var *dest, const var *src)
860{
861 if (dest != src) {
862 clrvar(dest);
863 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
864 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
865 dest->number = src->number;
866 if (src->string)
867 dest->string = xstrdup(src->string);
868 }
869 handle_special(dest);
870 return dest;
871}
872
873static var *incvar(var *v)
874{
875 return setvar_i(v, getvar_i(v) + 1.0);
876}
877
878/* return true if v is number or numeric string */
879static int is_numeric(var *v)
880{
881 getvar_i(v);
882 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
883}
884
885/* return 1 when value of v corresponds to true, 0 otherwise */
886static int istrue(var *v)
887{
888 if (is_numeric(v))
889 return (v->number != 0);
890 return (v->string && v->string[0]);
891}
892
893/* temporary variables allocator. Last allocated should be first freed */
894static var *nvalloc(int n)
895{
896 nvblock *pb = NULL;
897 var *v, *r;
898 int size;
899
900 while (g_cb) {
901 pb = g_cb;
902 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
903 break;
904 g_cb = g_cb->next;
905 }
906
907 if (!g_cb) {
908 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
909 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
910 g_cb->size = size;
911 g_cb->pos = g_cb->nv;
912 g_cb->prev = pb;
913 /*g_cb->next = NULL; - xzalloc did it */
914 if (pb)
915 pb->next = g_cb;
916 }
917
918 v = r = g_cb->pos;
919 g_cb->pos += n;
920
921 while (v < g_cb->pos) {
922 v->type = 0;
923 v->string = NULL;
924 v++;
925 }
926
927 return r;
928}
929
930static void nvfree(var *v)
931{
932 var *p;
933
934 if (v < g_cb->nv || v >= g_cb->pos)
935 syntax_error(EMSG_INTERNAL_ERROR);
936
937 for (p = v; p < g_cb->pos; p++) {
938 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
939 clear_array(iamarray(p));
940 free(p->x.array->items);
941 free(p->x.array);
942 }
943 if (p->type & VF_WALK) {
944 walker_list *n;
945 walker_list *w = p->x.walker;
946 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
947 p->x.walker = NULL;
948 while (w) {
949 n = w->prev;
950 debug_printf_walker(" free(%p)\n", w);
951 free(w);
952 w = n;
953 }
954 }
955 clrvar(p);
956 }
957
958 g_cb->pos = v;
959 while (g_cb->prev && g_cb->pos == g_cb->nv) {
960 g_cb = g_cb->prev;
961 }
962}
963
964/* ------- awk program text parsing ------- */
965
966/* Parse next token pointed by global pos, place results into global ttt.
967 * If token isn't expected, give away. Return token class
968 */
969static uint32_t next_token(uint32_t expected)
970{
971#define concat_inserted (G.next_token__concat_inserted)
972#define save_tclass (G.next_token__save_tclass)
973#define save_info (G.next_token__save_info)
974/* Initialized to TC_OPTERM: */
975#define ltclass (G.next_token__ltclass)
976
977 char *p, *s;
978 const char *tl;
979 uint32_t tc;
980 const uint32_t *ti;
981
982 if (t_rollback) {
983 t_rollback = FALSE;
984
985 } else if (concat_inserted) {
986 concat_inserted = FALSE;
987 t_tclass = save_tclass;
988 t_info = save_info;
989
990 } else {
991 p = g_pos;
992 readnext:
993 p = skip_spaces(p);
994 g_lineno = t_lineno;
995 if (*p == '#')
996 while (*p != '\n' && *p != '\0')
997 p++;
998
999 if (*p == '\n')
1000 t_lineno++;
1001
1002 if (*p == '\0') {
1003 tc = TC_EOF;
1004
1005 } else if (*p == '\"') {
1006 /* it's a string */
1007 t_string = s = ++p;
1008 while (*p != '\"') {
1009 char *pp;
1010 if (*p == '\0' || *p == '\n')
1011 syntax_error(EMSG_UNEXP_EOS);
1012 pp = p;
1013 *s++ = nextchar(&pp);
1014 p = pp;
1015 }
1016 p++;
1017 *s = '\0';
1018 tc = TC_STRING;
1019
1020 } else if ((expected & TC_REGEXP) && *p == '/') {
1021 /* it's regexp */
1022 t_string = s = ++p;
1023 while (*p != '/') {
1024 if (*p == '\0' || *p == '\n')
1025 syntax_error(EMSG_UNEXP_EOS);
1026 *s = *p++;
1027 if (*s++ == '\\') {
1028 char *pp = p;
1029 s[-1] = bb_process_escape_sequence((const char **)&pp);
1030 if (*p == '\\')
1031 *s++ = '\\';
1032 if (pp == p)
1033 *s++ = *p++;
1034 else
1035 p = pp;
1036 }
1037 }
1038 p++;
1039 *s = '\0';
1040 tc = TC_REGEXP;
1041
1042 } else if (*p == '.' || isdigit(*p)) {
1043 /* it's a number */
1044 char *pp = p;
1045 t_double = my_strtod(&pp);
1046 p = pp;
1047 if (*p == '.')
1048 syntax_error(EMSG_UNEXP_TOKEN);
1049 tc = TC_NUMBER;
1050
1051 } else {
1052 /* search for something known */
1053 tl = tokenlist;
1054 tc = 0x00000001;
1055 ti = tokeninfo;
1056 while (*tl) {
1057 int l = (unsigned char) *tl++;
1058 if (l == (unsigned char) NTCC) {
1059 tc <<= 1;
1060 continue;
1061 }
1062 /* if token class is expected,
1063 * token matches,
1064 * and it's not a longer word,
1065 */
1066 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1067 && strncmp(p, tl, l) == 0
1068 && !((tc & TC_WORD) && isalnum_(p[l]))
1069 ) {
1070 /* then this is what we are looking for */
1071 t_info = *ti;
1072 p += l;
1073 goto token_found;
1074 }
1075 ti++;
1076 tl += l;
1077 }
1078 /* not a known token */
1079
1080 /* is it a name? (var/array/function) */
1081 if (!isalnum_(*p))
1082 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1083 /* yes */
1084 t_string = --p;
1085 while (isalnum_(*++p)) {
1086 p[-1] = *p;
1087 }
1088 p[-1] = '\0';
1089 tc = TC_VARIABLE;
1090 /* also consume whitespace between functionname and bracket */
1091 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1092 p = skip_spaces(p);
1093 if (*p == '(') {
1094 tc = TC_FUNCTION;
1095 } else {
1096 if (*p == '[') {
1097 p++;
1098 tc = TC_ARRAY;
1099 }
1100 }
1101 token_found: ;
1102 }
1103 g_pos = p;
1104
1105 /* skipping newlines in some cases */
1106 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1107 goto readnext;
1108
1109 /* insert concatenation operator when needed */
1110 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1111 concat_inserted = TRUE;
1112 save_tclass = tc;
1113 save_info = t_info;
1114 tc = TC_BINOP;
1115 t_info = OC_CONCAT | SS | P(35);
1116 }
1117
1118 t_tclass = tc;
1119 }
1120 ltclass = t_tclass;
1121
1122 /* Are we ready for this? */
1123 if (!(ltclass & expected))
1124 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1125 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1126
1127 return ltclass;
1128#undef concat_inserted
1129#undef save_tclass
1130#undef save_info
1131#undef ltclass
1132}
1133
1134static void rollback_token(void)
1135{
1136 t_rollback = TRUE;
1137}
1138
1139static node *new_node(uint32_t info)
1140{
1141 node *n;
1142
1143 n = xzalloc(sizeof(node));
1144 n->info = info;
1145 n->lineno = g_lineno;
1146 return n;
1147}
1148
1149static void mk_re_node(const char *s, node *n, regex_t *re)
1150{
1151 n->info = OC_REGEXP;
1152 n->l.re = re;
1153 n->r.ire = re + 1;
1154 xregcomp(re, s, REG_EXTENDED);
1155 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1156}
1157
1158static node *condition(void)
1159{
1160 next_token(TC_SEQSTART);
1161 return parse_expr(TC_SEQTERM);
1162}
1163
1164/* parse expression terminated by given argument, return ptr
1165 * to built subtree. Terminator is eaten by parse_expr */
1166static node *parse_expr(uint32_t iexp)
1167{
1168 node sn;
1169 node *cn = &sn;
1170 node *vn, *glptr;
1171 uint32_t tc, xtc;
1172 var *v;
1173
1174 sn.info = PRIMASK;
1175 sn.r.n = glptr = NULL;
1176 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1177
1178 while (!((tc = next_token(xtc)) & iexp)) {
1179
1180 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1181 /* input redirection (<) attached to glptr node */
1182 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1183 cn->a.n = glptr;
1184 xtc = TC_OPERAND | TC_UOPPRE;
1185 glptr = NULL;
1186
1187 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1188 /* for binary and postfix-unary operators, jump back over
1189 * previous operators with higher priority */
1190 vn = cn;
1191 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1192 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1193 ) {
1194 vn = vn->a.n;
1195 }
1196 if ((t_info & OPCLSMASK) == OC_TERNARY)
1197 t_info += P(6);
1198 cn = vn->a.n->r.n = new_node(t_info);
1199 cn->a.n = vn->a.n;
1200 if (tc & TC_BINOP) {
1201 cn->l.n = vn;
1202 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1203 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1204 /* it's a pipe */
1205 next_token(TC_GETLINE);
1206 /* give maximum priority to this pipe */
1207 cn->info &= ~PRIMASK;
1208 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1209 }
1210 } else {
1211 cn->r.n = vn;
1212 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1213 }
1214 vn->a.n = cn;
1215
1216 } else {
1217 /* for operands and prefix-unary operators, attach them
1218 * to last node */
1219 vn = cn;
1220 cn = vn->r.n = new_node(t_info);
1221 cn->a.n = vn;
1222 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1223 if (tc & (TC_OPERAND | TC_REGEXP)) {
1224 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1225 /* one should be very careful with switch on tclass -
1226 * only simple tclasses should be used! */
1227 switch (tc) {
1228 case TC_VARIABLE:
1229 case TC_ARRAY:
1230 cn->info = OC_VAR;
1231 v = hash_search(ahash, t_string);
1232 if (v != NULL) {
1233 cn->info = OC_FNARG;
1234 cn->l.aidx = v->x.aidx;
1235 } else {
1236 cn->l.v = newvar(t_string);
1237 }
1238 if (tc & TC_ARRAY) {
1239 cn->info |= xS;
1240 cn->r.n = parse_expr(TC_ARRTERM);
1241 }
1242 break;
1243
1244 case TC_NUMBER:
1245 case TC_STRING:
1246 cn->info = OC_VAR;
1247 v = cn->l.v = xzalloc(sizeof(var));
1248 if (tc & TC_NUMBER)
1249 setvar_i(v, t_double);
1250 else
1251 setvar_s(v, t_string);
1252 break;
1253
1254 case TC_REGEXP:
1255 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1256 break;
1257
1258 case TC_FUNCTION:
1259 cn->info = OC_FUNC;
1260 cn->r.f = newfunc(t_string);
1261 cn->l.n = condition();
1262 break;
1263
1264 case TC_SEQSTART:
1265 cn = vn->r.n = parse_expr(TC_SEQTERM);
1266 cn->a.n = vn;
1267 break;
1268
1269 case TC_GETLINE:
1270 glptr = cn;
1271 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1272 break;
1273
1274 case TC_BUILTIN:
1275 cn->l.n = condition();
1276 break;
1277 }
1278 }
1279 }
1280 }
1281 return sn.r.n;
1282}
1283
1284/* add node to chain. Return ptr to alloc'd node */
1285static node *chain_node(uint32_t info)
1286{
1287 node *n;
1288
1289 if (!seq->first)
1290 seq->first = seq->last = new_node(0);
1291
1292 if (seq->programname != g_progname) {
1293 seq->programname = g_progname;
1294 n = chain_node(OC_NEWSOURCE);
1295 n->l.new_progname = xstrdup(g_progname);
1296 }
1297
1298 n = seq->last;
1299 n->info = info;
1300 seq->last = n->a.n = new_node(OC_DONE);
1301
1302 return n;
1303}
1304
1305static void chain_expr(uint32_t info)
1306{
1307 node *n;
1308
1309 n = chain_node(info);
1310 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1311 if (t_tclass & TC_GRPTERM)
1312 rollback_token();
1313}
1314
1315static node *chain_loop(node *nn)
1316{
1317 node *n, *n2, *save_brk, *save_cont;
1318
1319 save_brk = break_ptr;
1320 save_cont = continue_ptr;
1321
1322 n = chain_node(OC_BR | Vx);
1323 continue_ptr = new_node(OC_EXEC);
1324 break_ptr = new_node(OC_EXEC);
1325 chain_group();
1326 n2 = chain_node(OC_EXEC | Vx);
1327 n2->l.n = nn;
1328 n2->a.n = n;
1329 continue_ptr->a.n = n2;
1330 break_ptr->a.n = n->r.n = seq->last;
1331
1332 continue_ptr = save_cont;
1333 break_ptr = save_brk;
1334
1335 return n;
1336}
1337
1338/* parse group and attach it to chain */
1339static void chain_group(void)
1340{
1341 uint32_t c;
1342 node *n, *n2, *n3;
1343
1344 do {
1345 c = next_token(TC_GRPSEQ);
1346 } while (c & TC_NEWLINE);
1347
1348 if (c & TC_GRPSTART) {
1349 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1350 if (t_tclass & TC_NEWLINE)
1351 continue;
1352 rollback_token();
1353 chain_group();
1354 }
1355 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1356 rollback_token();
1357 chain_expr(OC_EXEC | Vx);
1358 } else { /* TC_STATEMNT */
1359 switch (t_info & OPCLSMASK) {
1360 case ST_IF:
1361 n = chain_node(OC_BR | Vx);
1362 n->l.n = condition();
1363 chain_group();
1364 n2 = chain_node(OC_EXEC);
1365 n->r.n = seq->last;
1366 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1367 chain_group();
1368 n2->a.n = seq->last;
1369 } else {
1370 rollback_token();
1371 }
1372 break;
1373
1374 case ST_WHILE:
1375 n2 = condition();
1376 n = chain_loop(NULL);
1377 n->l.n = n2;
1378 break;
1379
1380 case ST_DO:
1381 n2 = chain_node(OC_EXEC);
1382 n = chain_loop(NULL);
1383 n2->a.n = n->a.n;
1384 next_token(TC_WHILE);
1385 n->l.n = condition();
1386 break;
1387
1388 case ST_FOR:
1389 next_token(TC_SEQSTART);
1390 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1391 if (t_tclass & TC_SEQTERM) { /* for-in */
1392 if ((n2->info & OPCLSMASK) != OC_IN)
1393 syntax_error(EMSG_UNEXP_TOKEN);
1394 n = chain_node(OC_WALKINIT | VV);
1395 n->l.n = n2->l.n;
1396 n->r.n = n2->r.n;
1397 n = chain_loop(NULL);
1398 n->info = OC_WALKNEXT | Vx;
1399 n->l.n = n2->l.n;
1400 } else { /* for (;;) */
1401 n = chain_node(OC_EXEC | Vx);
1402 n->l.n = n2;
1403 n2 = parse_expr(TC_SEMICOL);
1404 n3 = parse_expr(TC_SEQTERM);
1405 n = chain_loop(n3);
1406 n->l.n = n2;
1407 if (!n2)
1408 n->info = OC_EXEC;
1409 }
1410 break;
1411
1412 case OC_PRINT:
1413 case OC_PRINTF:
1414 n = chain_node(t_info);
1415 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1416 if (t_tclass & TC_OUTRDR) {
1417 n->info |= t_info;
1418 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1419 }
1420 if (t_tclass & TC_GRPTERM)
1421 rollback_token();
1422 break;
1423
1424 case OC_BREAK:
1425 n = chain_node(OC_EXEC);
1426 n->a.n = break_ptr;
1427 break;
1428
1429 case OC_CONTINUE:
1430 n = chain_node(OC_EXEC);
1431 n->a.n = continue_ptr;
1432 break;
1433
1434 /* delete, next, nextfile, return, exit */
1435 default:
1436 chain_expr(t_info);
1437 }
1438 }
1439}
1440
1441static void parse_program(char *p)
1442{
1443 uint32_t tclass;
1444 node *cn;
1445 func *f;
1446 var *v;
1447
1448 g_pos = p;
1449 t_lineno = 1;
1450 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1451 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1452
1453 if (tclass & TC_OPTERM)
1454 continue;
1455
1456 seq = &mainseq;
1457 if (tclass & TC_BEGIN) {
1458 seq = &beginseq;
1459 chain_group();
1460
1461 } else if (tclass & TC_END) {
1462 seq = &endseq;
1463 chain_group();
1464
1465 } else if (tclass & TC_FUNCDECL) {
1466 next_token(TC_FUNCTION);
1467 g_pos++;
1468 f = newfunc(t_string);
1469 f->body.first = NULL;
1470 f->nargs = 0;
1471 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1472 v = findvar(ahash, t_string);
1473 v->x.aidx = f->nargs++;
1474
1475 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1476 break;
1477 }
1478 seq = &f->body;
1479 chain_group();
1480 clear_array(ahash);
1481
1482 } else if (tclass & TC_OPSEQ) {
1483 rollback_token();
1484 cn = chain_node(OC_TEST);
1485 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1486 if (t_tclass & TC_GRPSTART) {
1487 rollback_token();
1488 chain_group();
1489 } else {
1490 chain_node(OC_PRINT);
1491 }
1492 cn->r.n = mainseq.last;
1493
1494 } else /* if (tclass & TC_GRPSTART) */ {
1495 rollback_token();
1496 chain_group();
1497 }
1498 }
1499}
1500
1501
1502/* -------- program execution part -------- */
1503
1504static node *mk_splitter(const char *s, tsplitter *spl)
1505{
1506 regex_t *re, *ire;
1507 node *n;
1508
1509 re = &spl->re[0];
1510 ire = &spl->re[1];
1511 n = &spl->n;
1512 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1513 regfree(re);
1514 regfree(ire); // TODO: nuke ire, use re+1?
1515 }
1516 if (s[0] && s[1]) { /* strlen(s) > 1 */
1517 mk_re_node(s, n, re);
1518 } else {
1519 n->info = (uint32_t) s[0];
1520 }
1521
1522 return n;
1523}
1524
1525/* use node as a regular expression. Supplied with node ptr and regex_t
1526 * storage space. Return ptr to regex (if result points to preg, it should
1527 * be later regfree'd manually
1528 */
1529static regex_t *as_regex(node *op, regex_t *preg)
1530{
1531 int cflags;
1532 var *v;
1533 const char *s;
1534
1535 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1536 return icase ? op->r.ire : op->l.re;
1537 }
1538 v = nvalloc(1);
1539 s = getvar_s(evaluate(op, v));
1540
1541 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1542 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1543 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1544 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1545 * (maybe gsub is not supposed to use REG_EXTENDED?).
1546 */
1547 if (regcomp(preg, s, cflags)) {
1548 cflags &= ~REG_EXTENDED;
1549 xregcomp(preg, s, cflags);
1550 }
1551 nvfree(v);
1552 return preg;
1553}
1554
1555/* gradually increasing buffer.
1556 * note that we reallocate even if n == old_size,
1557 * and thus there is at least one extra allocated byte.
1558 */
1559static char* qrealloc(char *b, int n, int *size)
1560{
1561 if (!b || n >= *size) {
1562 *size = n + (n>>1) + 80;
1563 b = xrealloc(b, *size);
1564 }
1565 return b;
1566}
1567
1568/* resize field storage space */
1569static void fsrealloc(int size)
1570{
1571 int i;
1572
1573 if (size >= maxfields) {
1574 i = maxfields;
1575 maxfields = size + 16;
1576 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1577 for (; i < maxfields; i++) {
1578 Fields[i].type = VF_SPECIAL;
1579 Fields[i].string = NULL;
1580 }
1581 }
1582 /* if size < nfields, clear extra field variables */
1583 for (i = size; i < nfields; i++) {
1584 clrvar(Fields + i);
1585 }
1586 nfields = size;
1587}
1588
1589static int awk_split(const char *s, node *spl, char **slist)
1590{
1591 int l, n;
1592 char c[4];
1593 char *s1;
1594 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1595
1596 /* in worst case, each char would be a separate field */
1597 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1598 strcpy(s1, s);
1599
1600 c[0] = c[1] = (char)spl->info;
1601 c[2] = c[3] = '\0';
1602 if (*getvar_s(intvar[RS]) == '\0')
1603 c[2] = '\n';
1604
1605 n = 0;
1606 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1607 if (!*s)
1608 return n; /* "": zero fields */
1609 n++; /* at least one field will be there */
1610 do {
1611 l = strcspn(s, c+2); /* len till next NUL or \n */
1612 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1613 && pmatch[0].rm_so <= l
1614 ) {
1615 l = pmatch[0].rm_so;
1616 if (pmatch[0].rm_eo == 0) {
1617 l++;
1618 pmatch[0].rm_eo++;
1619 }
1620 n++; /* we saw yet another delimiter */
1621 } else {
1622 pmatch[0].rm_eo = l;
1623 if (s[l])
1624 pmatch[0].rm_eo++;
1625 }
1626 memcpy(s1, s, l);
1627 /* make sure we remove *all* of the separator chars */
1628 do {
1629 s1[l] = '\0';
1630 } while (++l < pmatch[0].rm_eo);
1631 nextword(&s1);
1632 s += pmatch[0].rm_eo;
1633 } while (*s);
1634 return n;
1635 }
1636 if (c[0] == '\0') { /* null split */
1637 while (*s) {
1638 *s1++ = *s++;
1639 *s1++ = '\0';
1640 n++;
1641 }
1642 return n;
1643 }
1644 if (c[0] != ' ') { /* single-character split */
1645 if (icase) {
1646 c[0] = toupper(c[0]);
1647 c[1] = tolower(c[1]);
1648 }
1649 if (*s1)
1650 n++;
1651 while ((s1 = strpbrk(s1, c)) != NULL) {
1652 *s1++ = '\0';
1653 n++;
1654 }
1655 return n;
1656 }
1657 /* space split */
1658 while (*s) {
1659 s = skip_whitespace(s);
1660 if (!*s)
1661 break;
1662 n++;
1663 while (*s && !isspace(*s))
1664 *s1++ = *s++;
1665 *s1++ = '\0';
1666 }
1667 return n;
1668}
1669
1670static void split_f0(void)
1671{
1672/* static char *fstrings; */
1673#define fstrings (G.split_f0__fstrings)
1674
1675 int i, n;
1676 char *s;
1677
1678 if (is_f0_split)
1679 return;
1680
1681 is_f0_split = TRUE;
1682 free(fstrings);
1683 fsrealloc(0);
1684 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1685 fsrealloc(n);
1686 s = fstrings;
1687 for (i = 0; i < n; i++) {
1688 Fields[i].string = nextword(&s);
1689 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1690 }
1691
1692 /* set NF manually to avoid side effects */
1693 clrvar(intvar[NF]);
1694 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1695 intvar[NF]->number = nfields;
1696#undef fstrings
1697}
1698
1699/* perform additional actions when some internal variables changed */
1700static void handle_special(var *v)
1701{
1702 int n;
1703 char *b;
1704 const char *sep, *s;
1705 int sl, l, len, i, bsize;
1706
1707 if (!(v->type & VF_SPECIAL))
1708 return;
1709
1710 if (v == intvar[NF]) {
1711 n = (int)getvar_i(v);
1712 fsrealloc(n);
1713
1714 /* recalculate $0 */
1715 sep = getvar_s(intvar[OFS]);
1716 sl = strlen(sep);
1717 b = NULL;
1718 len = 0;
1719 for (i = 0; i < n; i++) {
1720 s = getvar_s(&Fields[i]);
1721 l = strlen(s);
1722 if (b) {
1723 memcpy(b+len, sep, sl);
1724 len += sl;
1725 }
1726 b = qrealloc(b, len+l+sl, &bsize);
1727 memcpy(b+len, s, l);
1728 len += l;
1729 }
1730 if (b)
1731 b[len] = '\0';
1732 setvar_p(intvar[F0], b);
1733 is_f0_split = TRUE;
1734
1735 } else if (v == intvar[F0]) {
1736 is_f0_split = FALSE;
1737
1738 } else if (v == intvar[FS]) {
1739 mk_splitter(getvar_s(v), &fsplitter);
1740
1741 } else if (v == intvar[RS]) {
1742 mk_splitter(getvar_s(v), &rsplitter);
1743
1744 } else if (v == intvar[IGNORECASE]) {
1745 icase = istrue(v);
1746
1747 } else { /* $n */
1748 n = getvar_i(intvar[NF]);
1749 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1750 /* right here v is invalid. Just to note... */
1751 }
1752}
1753
1754/* step through func/builtin/etc arguments */
1755static node *nextarg(node **pn)
1756{
1757 node *n;
1758
1759 n = *pn;
1760 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1761 *pn = n->r.n;
1762 n = n->l.n;
1763 } else {
1764 *pn = NULL;
1765 }
1766 return n;
1767}
1768
1769static void hashwalk_init(var *v, xhash *array)
1770{
1771 hash_item *hi;
1772 unsigned i;
1773 walker_list *w;
1774 walker_list *prev_walker;
1775
1776 if (v->type & VF_WALK) {
1777 prev_walker = v->x.walker;
1778 } else {
1779 v->type |= VF_WALK;
1780 prev_walker = NULL;
1781 }
1782 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1783
1784 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1785 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1786 w->cur = w->end = w->wbuf;
1787 w->prev = prev_walker;
1788 for (i = 0; i < array->csize; i++) {
1789 hi = array->items[i];
1790 while (hi) {
1791 strcpy(w->end, hi->name);
1792 nextword(&w->end);
1793 hi = hi->next;
1794 }
1795 }
1796}
1797
1798static int hashwalk_next(var *v)
1799{
1800 walker_list *w = v->x.walker;
1801
1802 if (w->cur >= w->end) {
1803 walker_list *prev_walker = w->prev;
1804
1805 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1806 free(w);
1807 v->x.walker = prev_walker;
1808 return FALSE;
1809 }
1810
1811 setvar_s(v, nextword(&w->cur));
1812 return TRUE;
1813}
1814
1815/* evaluate node, return 1 when result is true, 0 otherwise */
1816static int ptest(node *pattern)
1817{
1818 /* ptest__v is "static": to save stack space? */
1819 return istrue(evaluate(pattern, &G.ptest__v));
1820}
1821
1822/* read next record from stream rsm into a variable v */
1823static int awk_getline(rstream *rsm, var *v)
1824{
1825 char *b;
1826 regmatch_t pmatch[2];
1827 int size, a, p, pp = 0;
1828 int fd, so, eo, r, rp;
1829 char c, *m, *s;
1830
1831 debug_printf_eval("entered %s()\n", __func__);
1832
1833 /* we're using our own buffer since we need access to accumulating
1834 * characters
1835 */
1836 fd = fileno(rsm->F);
1837 m = rsm->buffer;
1838 a = rsm->adv;
1839 p = rsm->pos;
1840 size = rsm->size;
1841 c = (char) rsplitter.n.info;
1842 rp = 0;
1843
1844 if (!m)
1845 m = qrealloc(m, 256, &size);
1846
1847 do {
1848 b = m + a;
1849 so = eo = p;
1850 r = 1;
1851 if (p > 0) {
1852 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1853 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1854 b, 1, pmatch, 0) == 0) {
1855 so = pmatch[0].rm_so;
1856 eo = pmatch[0].rm_eo;
1857 if (b[eo] != '\0')
1858 break;
1859 }
1860 } else if (c != '\0') {
1861 s = strchr(b+pp, c);
1862 if (!s)
1863 s = memchr(b+pp, '\0', p - pp);
1864 if (s) {
1865 so = eo = s-b;
1866 eo++;
1867 break;
1868 }
1869 } else {
1870 while (b[rp] == '\n')
1871 rp++;
1872 s = strstr(b+rp, "\n\n");
1873 if (s) {
1874 so = eo = s-b;
1875 while (b[eo] == '\n')
1876 eo++;
1877 if (b[eo] != '\0')
1878 break;
1879 }
1880 }
1881 }
1882
1883 if (a > 0) {
1884 memmove(m, m+a, p+1);
1885 b = m;
1886 a = 0;
1887 }
1888
1889 m = qrealloc(m, a+p+128, &size);
1890 b = m + a;
1891 pp = p;
1892 p += safe_read(fd, b+p, size-p-1);
1893 if (p < pp) {
1894 p = 0;
1895 r = 0;
1896 setvar_i(intvar[ERRNO], errno);
1897 }
1898 b[p] = '\0';
1899
1900 } while (p > pp);
1901
1902 if (p == 0) {
1903 r--;
1904 } else {
1905 c = b[so]; b[so] = '\0';
1906 setvar_s(v, b+rp);
1907 v->type |= VF_USER;
1908 b[so] = c;
1909 c = b[eo]; b[eo] = '\0';
1910 setvar_s(intvar[RT], b+so);
1911 b[eo] = c;
1912 }
1913
1914 rsm->buffer = m;
1915 rsm->adv = a + eo;
1916 rsm->pos = p - eo;
1917 rsm->size = size;
1918
1919 debug_printf_eval("returning from %s(): %d\n", __func__, r);
1920
1921 return r;
1922}
1923
1924static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1925{
1926 int r = 0;
1927 char c;
1928 const char *s = format;
1929
1930 if (int_as_int && n == (int)n) {
1931 r = snprintf(b, size, "%d", (int)n);
1932 } else {
1933 do { c = *s; } while (c && *++s);
1934 if (strchr("diouxX", c)) {
1935 r = snprintf(b, size, format, (int)n);
1936 } else if (strchr("eEfgG", c)) {
1937 r = snprintf(b, size, format, n);
1938 } else {
1939 syntax_error(EMSG_INV_FMT);
1940 }
1941 }
1942 return r;
1943}
1944
1945/* formatted output into an allocated buffer, return ptr to buffer */
1946static char *awk_printf(node *n)
1947{
1948 char *b = NULL;
1949 char *fmt, *s, *f;
1950 const char *s1;
1951 int i, j, incr, bsize;
1952 char c, c1;
1953 var *v, *arg;
1954
1955 v = nvalloc(1);
1956 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1957
1958 i = 0;
1959 while (*f) {
1960 s = f;
1961 while (*f && (*f != '%' || *++f == '%'))
1962 f++;
1963 while (*f && !isalpha(*f)) {
1964 if (*f == '*')
1965 syntax_error("%*x formats are not supported");
1966 f++;
1967 }
1968
1969 incr = (f - s) + MAXVARFMT;
1970 b = qrealloc(b, incr + i, &bsize);
1971 c = *f;
1972 if (c != '\0')
1973 f++;
1974 c1 = *f;
1975 *f = '\0';
1976 arg = evaluate(nextarg(&n), v);
1977
1978 j = i;
1979 if (c == 'c' || !c) {
1980 i += sprintf(b+i, s, is_numeric(arg) ?
1981 (char)getvar_i(arg) : *getvar_s(arg));
1982 } else if (c == 's') {
1983 s1 = getvar_s(arg);
1984 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1985 i += sprintf(b+i, s, s1);
1986 } else {
1987 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1988 }
1989 *f = c1;
1990
1991 /* if there was an error while sprintf, return value is negative */
1992 if (i < j)
1993 i = j;
1994 }
1995
1996 free(fmt);
1997 nvfree(v);
1998 b = xrealloc(b, i + 1);
1999 b[i] = '\0';
2000 return b;
2001}
2002
2003/* Common substitution routine.
2004 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2005 * store result into (dest), return number of substitutions.
2006 * If nm = 0, replace all matches.
2007 * If src or dst is NULL, use $0.
2008 * If subexp != 0, enable subexpression matching (\1-\9).
2009 */
2010static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2011{
2012 char *resbuf;
2013 const char *sp;
2014 int match_no, residx, replen, resbufsize;
2015 int regexec_flags;
2016 regmatch_t pmatch[10];
2017 regex_t sreg, *regex;
2018
2019 resbuf = NULL;
2020 residx = 0;
2021 match_no = 0;
2022 regexec_flags = 0;
2023 regex = as_regex(rn, &sreg);
2024 sp = getvar_s(src ? src : intvar[F0]);
2025 replen = strlen(repl);
2026 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2027 int so = pmatch[0].rm_so;
2028 int eo = pmatch[0].rm_eo;
2029
2030 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2031 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2032 memcpy(resbuf + residx, sp, eo);
2033 residx += eo;
2034 if (++match_no >= nm) {
2035 const char *s;
2036 int nbs;
2037
2038 /* replace */
2039 residx -= (eo - so);
2040 nbs = 0;
2041 for (s = repl; *s; s++) {
2042 char c = resbuf[residx++] = *s;
2043 if (c == '\\') {
2044 nbs++;
2045 continue;
2046 }
2047 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2048 int j;
2049 residx -= ((nbs + 3) >> 1);
2050 j = 0;
2051 if (c != '&') {
2052 j = c - '0';
2053 nbs++;
2054 }
2055 if (nbs % 2) {
2056 resbuf[residx++] = c;
2057 } else {
2058 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2059 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2060 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2061 residx += n;
2062 }
2063 }
2064 nbs = 0;
2065 }
2066 }
2067
2068 regexec_flags = REG_NOTBOL;
2069 sp += eo;
2070 if (match_no == nm)
2071 break;
2072 if (eo == so) {
2073 /* Empty match (e.g. "b*" will match anywhere).
2074 * Advance by one char. */
2075//BUG (bug 1333):
2076//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2077//... and will erroneously match "b" even though it is NOT at the word start.
2078//we need REG_NOTBOW but it does not exist...
2079//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2080//it should be able to do it correctly.
2081 /* Subtle: this is safe only because
2082 * qrealloc allocated at least one extra byte */
2083 resbuf[residx] = *sp;
2084 if (*sp == '\0')
2085 goto ret;
2086 sp++;
2087 residx++;
2088 }
2089 }
2090
2091 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2092 strcpy(resbuf + residx, sp);
2093 ret:
2094 //bb_error_msg("end sp:'%s'%p", sp,sp);
2095 setvar_p(dest ? dest : intvar[F0], resbuf);
2096 if (regex == &sreg)
2097 regfree(regex);
2098 return match_no;
2099}
2100
2101static NOINLINE int do_mktime(const char *ds)
2102{
2103 struct tm then;
2104 int count;
2105
2106 /*memset(&then, 0, sizeof(then)); - not needed */
2107 then.tm_isdst = -1; /* default is unknown */
2108
2109 /* manpage of mktime says these fields are ints,
2110 * so we can sscanf stuff directly into them */
2111 count = sscanf(ds, "%u %u %u %u %u %u %d",
2112 &then.tm_year, &then.tm_mon, &then.tm_mday,
2113 &then.tm_hour, &then.tm_min, &then.tm_sec,
2114 &then.tm_isdst);
2115
2116 if (count < 6
2117 || (unsigned)then.tm_mon < 1
2118 || (unsigned)then.tm_year < 1900
2119 ) {
2120 return -1;
2121 }
2122
2123 then.tm_mon -= 1;
2124 then.tm_year -= 1900;
2125
2126 return mktime(&then);
2127}
2128
2129static NOINLINE var *exec_builtin(node *op, var *res)
2130{
2131#define tspl (G.exec_builtin__tspl)
2132
2133 var *tv;
2134 node *an[4];
2135 var *av[4];
2136 const char *as[4];
2137 regmatch_t pmatch[2];
2138 regex_t sreg, *re;
2139 node *spl;
2140 uint32_t isr, info;
2141 int nargs;
2142 time_t tt;
2143 int i, l, ll, n;
2144
2145 tv = nvalloc(4);
2146 isr = info = op->info;
2147 op = op->l.n;
2148
2149 av[2] = av[3] = NULL;
2150 for (i = 0; i < 4 && op; i++) {
2151 an[i] = nextarg(&op);
2152 if (isr & 0x09000000)
2153 av[i] = evaluate(an[i], &tv[i]);
2154 if (isr & 0x08000000)
2155 as[i] = getvar_s(av[i]);
2156 isr >>= 1;
2157 }
2158
2159 nargs = i;
2160 if ((uint32_t)nargs < (info >> 30))
2161 syntax_error(EMSG_TOO_FEW_ARGS);
2162
2163 info &= OPNMASK;
2164 switch (info) {
2165
2166 case B_a2:
2167 if (ENABLE_FEATURE_AWK_LIBM)
2168 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2169 else
2170 syntax_error(EMSG_NO_MATH);
2171 break;
2172
2173 case B_sp: {
2174 char *s, *s1;
2175
2176 if (nargs > 2) {
2177 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2178 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2179 } else {
2180 spl = &fsplitter.n;
2181 }
2182
2183 n = awk_split(as[0], spl, &s);
2184 s1 = s;
2185 clear_array(iamarray(av[1]));
2186 for (i = 1; i <= n; i++)
2187 setari_u(av[1], i, nextword(&s));
2188 free(s1);
2189 setvar_i(res, n);
2190 break;
2191 }
2192
2193 case B_ss: {
2194 char *s;
2195
2196 l = strlen(as[0]);
2197 i = getvar_i(av[1]) - 1;
2198 if (i > l)
2199 i = l;
2200 if (i < 0)
2201 i = 0;
2202 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2203 if (n < 0)
2204 n = 0;
2205 s = xstrndup(as[0]+i, n);
2206 setvar_p(res, s);
2207 break;
2208 }
2209
2210 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2211 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2212 case B_an:
2213 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2214 break;
2215
2216 case B_co:
2217 setvar_i(res, ~getvar_i_int(av[0]));
2218 break;
2219
2220 case B_ls:
2221 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2222 break;
2223
2224 case B_or:
2225 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2226 break;
2227
2228 case B_rs:
2229 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2230 break;
2231
2232 case B_xo:
2233 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2234 break;
2235
2236 case B_lo:
2237 case B_up: {
2238 char *s, *s1;
2239 s1 = s = xstrdup(as[0]);
2240 while (*s1) {
2241 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2242 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2243 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2244 s1++;
2245 }
2246 setvar_p(res, s);
2247 break;
2248 }
2249
2250 case B_ix:
2251 n = 0;
2252 ll = strlen(as[1]);
2253 l = strlen(as[0]) - ll;
2254 if (ll > 0 && l >= 0) {
2255 if (!icase) {
2256 char *s = strstr(as[0], as[1]);
2257 if (s)
2258 n = (s - as[0]) + 1;
2259 } else {
2260 /* this piece of code is terribly slow and
2261 * really should be rewritten
2262 */
2263 for (i = 0; i <= l; i++) {
2264 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2265 n = i+1;
2266 break;
2267 }
2268 }
2269 }
2270 }
2271 setvar_i(res, n);
2272 break;
2273
2274 case B_ti:
2275 if (nargs > 1)
2276 tt = getvar_i(av[1]);
2277 else
2278 time(&tt);
2279 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2280 i = strftime(g_buf, MAXVARFMT,
2281 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2282 localtime(&tt));
2283 g_buf[i] = '\0';
2284 setvar_s(res, g_buf);
2285 break;
2286
2287 case B_mt:
2288 setvar_i(res, do_mktime(as[0]));
2289 break;
2290
2291 case B_ma:
2292 re = as_regex(an[1], &sreg);
2293 n = regexec(re, as[0], 1, pmatch, 0);
2294 if (n == 0) {
2295 pmatch[0].rm_so++;
2296 pmatch[0].rm_eo++;
2297 } else {
2298 pmatch[0].rm_so = 0;
2299 pmatch[0].rm_eo = -1;
2300 }
2301 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2302 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2303 setvar_i(res, pmatch[0].rm_so);
2304 if (re == &sreg)
2305 regfree(re);
2306 break;
2307
2308 case B_ge:
2309 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2310 break;
2311
2312 case B_gs:
2313 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2314 break;
2315
2316 case B_su:
2317 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2318 break;
2319 }
2320
2321 nvfree(tv);
2322 return res;
2323#undef tspl
2324}
2325
2326/*
2327 * Evaluate node - the heart of the program. Supplied with subtree
2328 * and place where to store result. returns ptr to result.
2329 */
2330#define XC(n) ((n) >> 8)
2331
2332static var *evaluate(node *op, var *res)
2333{
2334/* This procedure is recursive so we should count every byte */
2335#define fnargs (G.evaluate__fnargs)
2336/* seed is initialized to 1 */
2337#define seed (G.evaluate__seed)
2338#define sreg (G.evaluate__sreg)
2339
2340 var *v1;
2341
2342 if (!op)
2343 return setvar_s(res, NULL);
2344
2345 debug_printf_eval("entered %s()\n", __func__);
2346
2347 v1 = nvalloc(2);
2348
2349 while (op) {
2350 struct {
2351 var *v;
2352 const char *s;
2353 } L = L; /* for compiler */
2354 struct {
2355 var *v;
2356 const char *s;
2357 } R = R;
2358 double L_d = L_d;
2359 uint32_t opinfo;
2360 int opn;
2361 node *op1;
2362
2363 opinfo = op->info;
2364 opn = (opinfo & OPNMASK);
2365 g_lineno = op->lineno;
2366 op1 = op->l.n;
2367 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2368
2369 /* execute inevitable things */
2370 if (opinfo & OF_RES1)
2371 L.v = evaluate(op1, v1);
2372 if (opinfo & OF_RES2)
2373 R.v = evaluate(op->r.n, v1+1);
2374 if (opinfo & OF_STR1) {
2375 L.s = getvar_s(L.v);
2376 debug_printf_eval("L.s:'%s'\n", L.s);
2377 }
2378 if (opinfo & OF_STR2) {
2379 R.s = getvar_s(R.v);
2380 debug_printf_eval("R.s:'%s'\n", R.s);
2381 }
2382 if (opinfo & OF_NUM1) {
2383 L_d = getvar_i(L.v);
2384 debug_printf_eval("L_d:%f\n", L_d);
2385 }
2386
2387 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2388 switch (XC(opinfo & OPCLSMASK)) {
2389
2390 /* -- iterative node type -- */
2391
2392 /* test pattern */
2393 case XC( OC_TEST ):
2394 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2395 /* it's range pattern */
2396 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2397 op->info |= OF_CHECKED;
2398 if (ptest(op1->r.n))
2399 op->info &= ~OF_CHECKED;
2400 op = op->a.n;
2401 } else {
2402 op = op->r.n;
2403 }
2404 } else {
2405 op = ptest(op1) ? op->a.n : op->r.n;
2406 }
2407 break;
2408
2409 /* just evaluate an expression, also used as unconditional jump */
2410 case XC( OC_EXEC ):
2411 break;
2412
2413 /* branch, used in if-else and various loops */
2414 case XC( OC_BR ):
2415 op = istrue(L.v) ? op->a.n : op->r.n;
2416 break;
2417
2418 /* initialize for-in loop */
2419 case XC( OC_WALKINIT ):
2420 hashwalk_init(L.v, iamarray(R.v));
2421 break;
2422
2423 /* get next array item */
2424 case XC( OC_WALKNEXT ):
2425 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2426 break;
2427
2428 case XC( OC_PRINT ):
2429 case XC( OC_PRINTF ): {
2430 FILE *F = stdout;
2431
2432 if (op->r.n) {
2433 rstream *rsm = newfile(R.s);
2434 if (!rsm->F) {
2435 if (opn == '|') {
2436 rsm->F = popen(R.s, "w");
2437 if (rsm->F == NULL)
2438 bb_perror_msg_and_die("popen");
2439 rsm->is_pipe = 1;
2440 } else {
2441 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2442 }
2443 }
2444 F = rsm->F;
2445 }
2446
2447 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2448 if (!op1) {
2449 fputs(getvar_s(intvar[F0]), F);
2450 } else {
2451 while (op1) {
2452 var *v = evaluate(nextarg(&op1), v1);
2453 if (v->type & VF_NUMBER) {
2454 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2455 getvar_i(v), TRUE);
2456 fputs(g_buf, F);
2457 } else {
2458 fputs(getvar_s(v), F);
2459 }
2460
2461 if (op1)
2462 fputs(getvar_s(intvar[OFS]), F);
2463 }
2464 }
2465 fputs(getvar_s(intvar[ORS]), F);
2466
2467 } else { /* OC_PRINTF */
2468 char *s = awk_printf(op1);
2469 fputs(s, F);
2470 free(s);
2471 }
2472 fflush(F);
2473 break;
2474 }
2475
2476 case XC( OC_DELETE ): {
2477 uint32_t info = op1->info & OPCLSMASK;
2478 var *v;
2479
2480 if (info == OC_VAR) {
2481 v = op1->l.v;
2482 } else if (info == OC_FNARG) {
2483 v = &fnargs[op1->l.aidx];
2484 } else {
2485 syntax_error(EMSG_NOT_ARRAY);
2486 }
2487
2488 if (op1->r.n) {
2489 const char *s;
2490 clrvar(L.v);
2491 s = getvar_s(evaluate(op1->r.n, v1));
2492 hash_remove(iamarray(v), s);
2493 } else {
2494 clear_array(iamarray(v));
2495 }
2496 break;
2497 }
2498
2499 case XC( OC_NEWSOURCE ):
2500 g_progname = op->l.new_progname;
2501 break;
2502
2503 case XC( OC_RETURN ):
2504 copyvar(res, L.v);
2505 break;
2506
2507 case XC( OC_NEXTFILE ):
2508 nextfile = TRUE;
2509 case XC( OC_NEXT ):
2510 nextrec = TRUE;
2511 case XC( OC_DONE ):
2512 clrvar(res);
2513 break;
2514
2515 case XC( OC_EXIT ):
2516 awk_exit(L_d);
2517
2518 /* -- recursive node type -- */
2519
2520 case XC( OC_VAR ):
2521 L.v = op->l.v;
2522 if (L.v == intvar[NF])
2523 split_f0();
2524 goto v_cont;
2525
2526 case XC( OC_FNARG ):
2527 L.v = &fnargs[op->l.aidx];
2528 v_cont:
2529 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2530 break;
2531
2532 case XC( OC_IN ):
2533 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2534 break;
2535
2536 case XC( OC_REGEXP ):
2537 op1 = op;
2538 L.s = getvar_s(intvar[F0]);
2539 goto re_cont;
2540
2541 case XC( OC_MATCH ):
2542 op1 = op->r.n;
2543 re_cont:
2544 {
2545 regex_t *re = as_regex(op1, &sreg);
2546 int i = regexec(re, L.s, 0, NULL, 0);
2547 if (re == &sreg)
2548 regfree(re);
2549 setvar_i(res, (i == 0) ^ (opn == '!'));
2550 }
2551 break;
2552
2553 case XC( OC_MOVE ):
2554 debug_printf_eval("MOVE\n");
2555 /* if source is a temporary string, jusk relink it to dest */
2556//Disabled: if R.v is numeric but happens to have cached R.v->string,
2557//then L.v ends up being a string, which is wrong
2558// if (R.v == v1+1 && R.v->string) {
2559// res = setvar_p(L.v, R.v->string);
2560// R.v->string = NULL;
2561// } else {
2562 res = copyvar(L.v, R.v);
2563// }
2564 break;
2565
2566 case XC( OC_TERNARY ):
2567 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2568 syntax_error(EMSG_POSSIBLE_ERROR);
2569 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2570 break;
2571
2572 case XC( OC_FUNC ): {
2573 var *vbeg, *v;
2574 const char *sv_progname;
2575
2576 if (!op->r.f->body.first)
2577 syntax_error(EMSG_UNDEF_FUNC);
2578
2579 vbeg = v = nvalloc(op->r.f->nargs + 1);
2580 while (op1) {
2581 var *arg = evaluate(nextarg(&op1), v1);
2582 copyvar(v, arg);
2583 v->type |= VF_CHILD;
2584 v->x.parent = arg;
2585 if (++v - vbeg >= op->r.f->nargs)
2586 break;
2587 }
2588
2589 v = fnargs;
2590 fnargs = vbeg;
2591 sv_progname = g_progname;
2592
2593 res = evaluate(op->r.f->body.first, res);
2594
2595 g_progname = sv_progname;
2596 nvfree(fnargs);
2597 fnargs = v;
2598
2599 break;
2600 }
2601
2602 case XC( OC_GETLINE ):
2603 case XC( OC_PGETLINE ): {
2604 rstream *rsm;
2605 int i;
2606
2607 if (op1) {
2608 rsm = newfile(L.s);
2609 if (!rsm->F) {
2610 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2611 rsm->F = popen(L.s, "r");
2612 rsm->is_pipe = TRUE;
2613 } else {
2614 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2615 }
2616 }
2617 } else {
2618 if (!iF)
2619 iF = next_input_file();
2620 rsm = iF;
2621 }
2622
2623 if (!rsm->F) {
2624 setvar_i(intvar[ERRNO], errno);
2625 setvar_i(res, -1);
2626 break;
2627 }
2628
2629 if (!op->r.n)
2630 R.v = intvar[F0];
2631
2632 i = awk_getline(rsm, R.v);
2633 if (i > 0 && !op1) {
2634 incvar(intvar[FNR]);
2635 incvar(intvar[NR]);
2636 }
2637 setvar_i(res, i);
2638 break;
2639 }
2640
2641 /* simple builtins */
2642 case XC( OC_FBLTIN ): {
2643 double R_d = R_d; /* for compiler */
2644
2645 switch (opn) {
2646 case F_in:
2647 R_d = (int)L_d;
2648 break;
2649
2650 case F_rn:
2651 R_d = (double)rand() / (double)RAND_MAX;
2652 break;
2653
2654 case F_co:
2655 if (ENABLE_FEATURE_AWK_LIBM) {
2656 R_d = cos(L_d);
2657 break;
2658 }
2659
2660 case F_ex:
2661 if (ENABLE_FEATURE_AWK_LIBM) {
2662 R_d = exp(L_d);
2663 break;
2664 }
2665
2666 case F_lg:
2667 if (ENABLE_FEATURE_AWK_LIBM) {
2668 R_d = log(L_d);
2669 break;
2670 }
2671
2672 case F_si:
2673 if (ENABLE_FEATURE_AWK_LIBM) {
2674 R_d = sin(L_d);
2675 break;
2676 }
2677
2678 case F_sq:
2679 if (ENABLE_FEATURE_AWK_LIBM) {
2680 R_d = sqrt(L_d);
2681 break;
2682 }
2683
2684 syntax_error(EMSG_NO_MATH);
2685 break;
2686
2687 case F_sr:
2688 R_d = (double)seed;
2689 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2690 srand(seed);
2691 break;
2692
2693 case F_ti:
2694 R_d = time(NULL);
2695 break;
2696
2697 case F_le:
2698 if (!op1)
2699 L.s = getvar_s(intvar[F0]);
2700 R_d = strlen(L.s);
2701 break;
2702
2703 case F_sy:
2704 fflush_all();
2705 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2706 ? (system(L.s) >> 8) : 0;
2707 break;
2708
2709 case F_ff:
2710 if (!op1) {
2711 fflush(stdout);
2712 } else if (L.s && *L.s) {
2713 rstream *rsm = newfile(L.s);
2714 fflush(rsm->F);
2715 } else {
2716 fflush_all();
2717 }
2718 break;
2719
2720 case F_cl: {
2721 rstream *rsm;
2722 int err = 0;
2723 rsm = (rstream *)hash_search(fdhash, L.s);
2724 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2725 if (rsm) {
2726 debug_printf_eval("OC_FBLTIN F_cl "
2727 "rsm->is_pipe:%d, ->F:%p\n",
2728 rsm->is_pipe, rsm->F);
2729 /* Can be NULL if open failed. Example:
2730 * getline line <"doesnt_exist";
2731 * close("doesnt_exist"); <--- here rsm->F is NULL
2732 */
2733 if (rsm->F)
2734 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2735 free(rsm->buffer);
2736 hash_remove(fdhash, L.s);
2737 }
2738 if (err)
2739 setvar_i(intvar[ERRNO], errno);
2740 R_d = (double)err;
2741 break;
2742 }
2743 } /* switch */
2744 setvar_i(res, R_d);
2745 break;
2746 }
2747
2748 case XC( OC_BUILTIN ):
2749 res = exec_builtin(op, res);
2750 break;
2751
2752 case XC( OC_SPRINTF ):
2753 setvar_p(res, awk_printf(op1));
2754 break;
2755
2756 case XC( OC_UNARY ): {
2757 double Ld, R_d;
2758
2759 Ld = R_d = getvar_i(R.v);
2760 switch (opn) {
2761 case 'P':
2762 Ld = ++R_d;
2763 goto r_op_change;
2764 case 'p':
2765 R_d++;
2766 goto r_op_change;
2767 case 'M':
2768 Ld = --R_d;
2769 goto r_op_change;
2770 case 'm':
2771 R_d--;
2772 r_op_change:
2773 setvar_i(R.v, R_d);
2774 break;
2775 case '!':
2776 Ld = !istrue(R.v);
2777 break;
2778 case '-':
2779 Ld = -R_d;
2780 break;
2781 }
2782 setvar_i(res, Ld);
2783 break;
2784 }
2785
2786 case XC( OC_FIELD ): {
2787 int i = (int)getvar_i(R.v);
2788 if (i == 0) {
2789 res = intvar[F0];
2790 } else {
2791 split_f0();
2792 if (i > nfields)
2793 fsrealloc(i);
2794 res = &Fields[i - 1];
2795 }
2796 break;
2797 }
2798
2799 /* concatenation (" ") and index joining (",") */
2800 case XC( OC_CONCAT ):
2801 case XC( OC_COMMA ): {
2802 const char *sep = "";
2803 if ((opinfo & OPCLSMASK) == OC_COMMA)
2804 sep = getvar_s(intvar[SUBSEP]);
2805 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2806 break;
2807 }
2808
2809 case XC( OC_LAND ):
2810 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2811 break;
2812
2813 case XC( OC_LOR ):
2814 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2815 break;
2816
2817 case XC( OC_BINARY ):
2818 case XC( OC_REPLACE ): {
2819 double R_d = getvar_i(R.v);
2820 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2821 switch (opn) {
2822 case '+':
2823 L_d += R_d;
2824 break;
2825 case '-':
2826 L_d -= R_d;
2827 break;
2828 case '*':
2829 L_d *= R_d;
2830 break;
2831 case '/':
2832 if (R_d == 0)
2833 syntax_error(EMSG_DIV_BY_ZERO);
2834 L_d /= R_d;
2835 break;
2836 case '&':
2837 if (ENABLE_FEATURE_AWK_LIBM)
2838 L_d = pow(L_d, R_d);
2839 else
2840 syntax_error(EMSG_NO_MATH);
2841 break;
2842 case '%':
2843 if (R_d == 0)
2844 syntax_error(EMSG_DIV_BY_ZERO);
2845 L_d -= (int)(L_d / R_d) * R_d;
2846 break;
2847 }
2848 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2849 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2850 break;
2851 }
2852
2853 case XC( OC_COMPARE ): {
2854 int i = i; /* for compiler */
2855 double Ld;
2856
2857 if (is_numeric(L.v) && is_numeric(R.v)) {
2858 Ld = getvar_i(L.v) - getvar_i(R.v);
2859 } else {
2860 const char *l = getvar_s(L.v);
2861 const char *r = getvar_s(R.v);
2862 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2863 }
2864 switch (opn & 0xfe) {
2865 case 0:
2866 i = (Ld > 0);
2867 break;
2868 case 2:
2869 i = (Ld >= 0);
2870 break;
2871 case 4:
2872 i = (Ld == 0);
2873 break;
2874 }
2875 setvar_i(res, (i == 0) ^ (opn & 1));
2876 break;
2877 }
2878
2879 default:
2880 syntax_error(EMSG_POSSIBLE_ERROR);
2881 }
2882 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2883 op = op->a.n;
2884 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2885 break;
2886 if (nextrec)
2887 break;
2888 } /* while (op) */
2889
2890 nvfree(v1);
2891 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2892 return res;
2893#undef fnargs
2894#undef seed
2895#undef sreg
2896}
2897
2898
2899/* -------- main & co. -------- */
2900
2901static int awk_exit(int r)
2902{
2903 var tv;
2904 unsigned i;
2905 hash_item *hi;
2906
2907 zero_out_var(&tv);
2908
2909 if (!exiting) {
2910 exiting = TRUE;
2911 nextrec = FALSE;
2912 evaluate(endseq.first, &tv);
2913 }
2914
2915 /* waiting for children */
2916 for (i = 0; i < fdhash->csize; i++) {
2917 hi = fdhash->items[i];
2918 while (hi) {
2919 if (hi->data.rs.F && hi->data.rs.is_pipe)
2920 pclose(hi->data.rs.F);
2921 hi = hi->next;
2922 }
2923 }
2924
2925 exit(r);
2926}
2927
2928/* if expr looks like "var=value", perform assignment and return 1,
2929 * otherwise return 0 */
2930static int is_assignment(const char *expr)
2931{
2932 char *exprc, *val, *s, *s1;
2933
2934 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2935 return FALSE;
2936 }
2937
2938 exprc = xstrdup(expr);
2939 val = exprc + (val - expr);
2940 *val++ = '\0';
2941
2942 s = s1 = val;
2943 while ((*s1 = nextchar(&s)) != '\0')
2944 s1++;
2945
2946 setvar_u(newvar(exprc), val);
2947 free(exprc);
2948 return TRUE;
2949}
2950
2951/* switch to next input file */
2952static rstream *next_input_file(void)
2953{
2954#define rsm (G.next_input_file__rsm)
2955#define files_happen (G.next_input_file__files_happen)
2956
2957 FILE *F = NULL;
2958 const char *fname, *ind;
2959
2960 if (rsm.F)
2961 fclose(rsm.F);
2962 rsm.F = NULL;
2963 rsm.pos = rsm.adv = 0;
2964
2965 do {
2966 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2967 if (files_happen)
2968 return NULL;
2969 fname = "-";
2970 F = stdin;
2971 } else {
2972 ind = getvar_s(incvar(intvar[ARGIND]));
2973 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2974 if (fname && *fname && !is_assignment(fname))
2975 F = xfopen_stdin(fname);
2976 }
2977 } while (!F);
2978
2979 files_happen = TRUE;
2980 setvar_s(intvar[FILENAME], fname);
2981 rsm.F = F;
2982 return &rsm;
2983#undef rsm
2984#undef files_happen
2985}
2986
2987int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2988int awk_main(int argc, char **argv)
2989{
2990 unsigned opt;
2991 char *opt_F, *opt_W;
2992 llist_t *list_v = NULL;
2993 llist_t *list_f = NULL;
2994 int i, j;
2995 var *v;
2996 var tv;
2997 char **envp;
2998 char *vnames = (char *)vNames; /* cheat */
2999 char *vvalues = (char *)vValues;
3000
3001 INIT_G();
3002
3003 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3004 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3005 if (ENABLE_LOCALE_SUPPORT)
3006 setlocale(LC_NUMERIC, "C");
3007
3008 zero_out_var(&tv);
3009
3010 /* allocate global buffer */
3011 g_buf = xmalloc(MAXVARFMT + 1);
3012
3013 vhash = hash_init();
3014 ahash = hash_init();
3015 fdhash = hash_init();
3016 fnhash = hash_init();
3017
3018 /* initialize variables */
3019 for (i = 0; *vnames; i++) {
3020 intvar[i] = v = newvar(nextword(&vnames));
3021 if (*vvalues != '\377')
3022 setvar_s(v, nextword(&vvalues));
3023 else
3024 setvar_i(v, 0);
3025
3026 if (*vnames == '*') {
3027 v->type |= VF_SPECIAL;
3028 vnames++;
3029 }
3030 }
3031
3032 handle_special(intvar[FS]);
3033 handle_special(intvar[RS]);
3034
3035 newfile("/dev/stdin")->F = stdin;
3036 newfile("/dev/stdout")->F = stdout;
3037 newfile("/dev/stderr")->F = stderr;
3038
3039 /* Huh, people report that sometimes environ is NULL. Oh well. */
3040 if (environ) for (envp = environ; *envp; envp++) {
3041 /* environ is writable, thus we don't strdup it needlessly */
3042 char *s = *envp;
3043 char *s1 = strchr(s, '=');
3044 if (s1) {
3045 *s1 = '\0';
3046 /* Both findvar and setvar_u take const char*
3047 * as 2nd arg -> environment is not trashed */
3048 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3049 *s1 = '=';
3050 }
3051 }
3052 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3053 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3054 argv += optind;
3055 argc -= optind;
3056 if (opt & 0x1)
3057 setvar_s(intvar[FS], opt_F); // -F
3058 while (list_v) { /* -v */
3059 if (!is_assignment(llist_pop(&list_v)))
3060 bb_show_usage();
3061 }
3062 if (list_f) { /* -f */
3063 do {
3064 char *s = NULL;
3065 FILE *from_file;
3066
3067 g_progname = llist_pop(&list_f);
3068 from_file = xfopen_stdin(g_progname);
3069 /* one byte is reserved for some trick in next_token */
3070 for (i = j = 1; j > 0; i += j) {
3071 s = xrealloc(s, i + 4096);
3072 j = fread(s + i, 1, 4094, from_file);
3073 }
3074 s[i] = '\0';
3075 fclose(from_file);
3076 parse_program(s + 1);
3077 free(s);
3078 } while (list_f);
3079 argc++;
3080 } else { // no -f: take program from 1st parameter
3081 if (!argc)
3082 bb_show_usage();
3083 g_progname = "cmd. line";
3084 parse_program(*argv++);
3085 }
3086 if (opt & 0x8) // -W
3087 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3088
3089 /* fill in ARGV array */
3090 setvar_i(intvar[ARGC], argc);
3091 setari_u(intvar[ARGV], 0, "awk");
3092 i = 0;
3093 while (*argv)
3094 setari_u(intvar[ARGV], ++i, *argv++);
3095
3096 evaluate(beginseq.first, &tv);
3097 if (!mainseq.first && !endseq.first)
3098 awk_exit(EXIT_SUCCESS);
3099
3100 /* input file could already be opened in BEGIN block */
3101 if (!iF)
3102 iF = next_input_file();
3103
3104 /* passing through input files */
3105 while (iF) {
3106 nextfile = FALSE;
3107 setvar_i(intvar[FNR], 0);
3108
3109 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3110 nextrec = FALSE;
3111 incvar(intvar[NR]);
3112 incvar(intvar[FNR]);
3113 evaluate(mainseq.first, &tv);
3114
3115 if (nextfile)
3116 break;
3117 }
3118
3119 if (i < 0)
3120 syntax_error(strerror(errno));
3121
3122 iF = next_input_file();
3123 }
3124
3125 awk_exit(EXIT_SUCCESS);
3126 /*return 0;*/
3127}
Note: See TracBrowser for help on using the repository browser.