source: MondoRescue/branches/3.2/mindi-busybox/editors/awk.c

Last change on this file was 3232, checked in by Bruno Cornec, 10 years ago
  • Update mindi-busybox to 1.21.1
File size: 72.8 KB
Line 
1/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9
10//usage:#define awk_trivial_usage
11//usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12//usage:#define awk_full_usage "\n\n"
13//usage: " -v VAR=VAL Set variable"
14//usage: "\n -F SEP Use SEP as field separator"
15//usage: "\n -f FILE Read program from FILE"
16
17#include "libbb.h"
18#include "xregex.h"
19#include <math.h>
20
21/* This is a NOEXEC applet. Be very careful! */
22
23
24/* If you comment out one of these below, it will be #defined later
25 * to perform debug printfs to stderr: */
26#define debug_printf_walker(...) do {} while (0)
27#define debug_printf_eval(...) do {} while (0)
28#define debug_printf_parse(...) do {} while (0)
29
30#ifndef debug_printf_walker
31# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
32#endif
33#ifndef debug_printf_eval
34# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
35#endif
36#ifndef debug_printf_parse
37# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
38#endif
39
40
41
42#define MAXVARFMT 240
43#define MINNVBLOCK 64
44
45/* variable flags */
46#define VF_NUMBER 0x0001 /* 1 = primary type is number */
47#define VF_ARRAY 0x0002 /* 1 = it's an array */
48
49#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
50#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
51#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
52#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
53#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
54#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
55#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
56
57/* these flags are static, don't change them when value is changed */
58#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
59
60typedef struct walker_list {
61 char *end;
62 char *cur;
63 struct walker_list *prev;
64 char wbuf[1];
65} walker_list;
66
67/* Variable */
68typedef struct var_s {
69 unsigned type; /* flags */
70 double number;
71 char *string;
72 union {
73 int aidx; /* func arg idx (for compilation stage) */
74 struct xhash_s *array; /* array ptr */
75 struct var_s *parent; /* for func args, ptr to actual parameter */
76 walker_list *walker; /* list of array elements (for..in) */
77 } x;
78} var;
79
80/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
81typedef struct chain_s {
82 struct node_s *first;
83 struct node_s *last;
84 const char *programname;
85} chain;
86
87/* Function */
88typedef struct func_s {
89 unsigned nargs;
90 struct chain_s body;
91} func;
92
93/* I/O stream */
94typedef struct rstream_s {
95 FILE *F;
96 char *buffer;
97 int adv;
98 int size;
99 int pos;
100 smallint is_pipe;
101} rstream;
102
103typedef struct hash_item_s {
104 union {
105 struct var_s v; /* variable/array hash */
106 struct rstream_s rs; /* redirect streams hash */
107 struct func_s f; /* functions hash */
108 } data;
109 struct hash_item_s *next; /* next in chain */
110 char name[1]; /* really it's longer */
111} hash_item;
112
113typedef struct xhash_s {
114 unsigned nel; /* num of elements */
115 unsigned csize; /* current hash size */
116 unsigned nprime; /* next hash size in PRIMES[] */
117 unsigned glen; /* summary length of item names */
118 struct hash_item_s **items;
119} xhash;
120
121/* Tree node */
122typedef struct node_s {
123 uint32_t info;
124 unsigned lineno;
125 union {
126 struct node_s *n;
127 var *v;
128 int aidx;
129 char *new_progname;
130 regex_t *re;
131 } l;
132 union {
133 struct node_s *n;
134 regex_t *ire;
135 func *f;
136 } r;
137 union {
138 struct node_s *n;
139 } a;
140} node;
141
142/* Block of temporary variables */
143typedef struct nvblock_s {
144 int size;
145 var *pos;
146 struct nvblock_s *prev;
147 struct nvblock_s *next;
148 var nv[];
149} nvblock;
150
151typedef struct tsplitter_s {
152 node n;
153 regex_t re[2];
154} tsplitter;
155
156/* simple token classes */
157/* Order and hex values are very important!!! See next_token() */
158#define TC_SEQSTART 1 /* ( */
159#define TC_SEQTERM (1 << 1) /* ) */
160#define TC_REGEXP (1 << 2) /* /.../ */
161#define TC_OUTRDR (1 << 3) /* | > >> */
162#define TC_UOPPOST (1 << 4) /* unary postfix operator */
163#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
164#define TC_BINOPX (1 << 6) /* two-opnd operator */
165#define TC_IN (1 << 7)
166#define TC_COMMA (1 << 8)
167#define TC_PIPE (1 << 9) /* input redirection pipe */
168#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
169#define TC_ARRTERM (1 << 11) /* ] */
170#define TC_GRPSTART (1 << 12) /* { */
171#define TC_GRPTERM (1 << 13) /* } */
172#define TC_SEMICOL (1 << 14)
173#define TC_NEWLINE (1 << 15)
174#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
175#define TC_WHILE (1 << 17)
176#define TC_ELSE (1 << 18)
177#define TC_BUILTIN (1 << 19)
178#define TC_GETLINE (1 << 20)
179#define TC_FUNCDECL (1 << 21) /* `function' `func' */
180#define TC_BEGIN (1 << 22)
181#define TC_END (1 << 23)
182#define TC_EOF (1 << 24)
183#define TC_VARIABLE (1 << 25)
184#define TC_ARRAY (1 << 26)
185#define TC_FUNCTION (1 << 27)
186#define TC_STRING (1 << 28)
187#define TC_NUMBER (1 << 29)
188
189#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
190
191/* combined token classes */
192#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
193#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
194#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
195 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
196
197#define TC_STATEMNT (TC_STATX | TC_WHILE)
198#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
199
200/* word tokens, cannot mean something else if not expected */
201#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
202 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
203
204/* discard newlines after these */
205#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
206 | TC_BINOP | TC_OPTERM)
207
208/* what can expression begin with */
209#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
210/* what can group begin with */
211#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
212
213/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
214/* operator is inserted between them */
215#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
216 | TC_STRING | TC_NUMBER | TC_UOPPOST)
217#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
218
219#define OF_RES1 0x010000
220#define OF_RES2 0x020000
221#define OF_STR1 0x040000
222#define OF_STR2 0x080000
223#define OF_NUM1 0x100000
224#define OF_CHECKED 0x200000
225
226/* combined operator flags */
227#define xx 0
228#define xV OF_RES2
229#define xS (OF_RES2 | OF_STR2)
230#define Vx OF_RES1
231#define VV (OF_RES1 | OF_RES2)
232#define Nx (OF_RES1 | OF_NUM1)
233#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
234#define Sx (OF_RES1 | OF_STR1)
235#define SV (OF_RES1 | OF_STR1 | OF_RES2)
236#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
237
238#define OPCLSMASK 0xFF00
239#define OPNMASK 0x007F
240
241/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
242 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
243 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
244 */
245#undef P
246#undef PRIMASK
247#undef PRIMASK2
248#define P(x) (x << 24)
249#define PRIMASK 0x7F000000
250#define PRIMASK2 0x7E000000
251
252/* Operation classes */
253
254#define SHIFT_TIL_THIS 0x0600
255#define RECUR_FROM_THIS 0x1000
256
257enum {
258 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
259 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
260
261 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
262 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
263 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
264
265 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
266 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
267 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
268 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
269 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
270 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
271 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
272 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
273 OC_DONE = 0x2800,
274
275 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
276 ST_WHILE = 0x3300
277};
278
279/* simple builtins */
280enum {
281 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
282 F_ti, F_le, F_sy, F_ff, F_cl
283};
284
285/* builtins */
286enum {
287 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
288 B_ge, B_gs, B_su,
289 B_an, B_co, B_ls, B_or, B_rs, B_xo,
290};
291
292/* tokens and their corresponding info values */
293
294#define NTC "\377" /* switch to next token class (tc<<1) */
295#define NTCC '\377'
296
297#define OC_B OC_BUILTIN
298
299static const char tokenlist[] ALIGN1 =
300 "\1(" NTC
301 "\1)" NTC
302 "\1/" NTC /* REGEXP */
303 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
304 "\2++" "\2--" NTC /* UOPPOST */
305 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
306 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
307 "\2*=" "\2/=" "\2%=" "\2^="
308 "\1+" "\1-" "\3**=" "\2**"
309 "\1/" "\1%" "\1^" "\1*"
310 "\2!=" "\2>=" "\2<=" "\1>"
311 "\1<" "\2!~" "\1~" "\2&&"
312 "\2||" "\1?" "\1:" NTC
313 "\2in" NTC
314 "\1," NTC
315 "\1|" NTC
316 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
317 "\1]" NTC
318 "\1{" NTC
319 "\1}" NTC
320 "\1;" NTC
321 "\1\n" NTC
322 "\2if" "\2do" "\3for" "\5break" /* STATX */
323 "\10continue" "\6delete" "\5print"
324 "\6printf" "\4next" "\10nextfile"
325 "\6return" "\4exit" NTC
326 "\5while" NTC
327 "\4else" NTC
328
329 "\3and" "\5compl" "\6lshift" "\2or"
330 "\6rshift" "\3xor"
331 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
332 "\3cos" "\3exp" "\3int" "\3log"
333 "\4rand" "\3sin" "\4sqrt" "\5srand"
334 "\6gensub" "\4gsub" "\5index" "\6length"
335 "\5match" "\5split" "\7sprintf" "\3sub"
336 "\6substr" "\7systime" "\10strftime" "\6mktime"
337 "\7tolower" "\7toupper" NTC
338 "\7getline" NTC
339 "\4func" "\10function" NTC
340 "\5BEGIN" NTC
341 "\3END"
342 /* compiler adds trailing "\0" */
343 ;
344
345static const uint32_t tokeninfo[] = {
346 0,
347 0,
348 OC_REGEXP,
349 xS|'a', xS|'w', xS|'|',
350 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
351 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
352 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
353 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
354 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
355 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
356 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
357 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
358 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
359 OC_IN|SV|P(49), /* in */
360 OC_COMMA|SS|P(80),
361 OC_PGETLINE|SV|P(37),
362 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
363 0, /* ] */
364 0,
365 0,
366 0,
367 0, /* \n */
368 ST_IF, ST_DO, ST_FOR, OC_BREAK,
369 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
370 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
371 OC_RETURN|Vx, OC_EXIT|Nx,
372 ST_WHILE,
373 0, /* else */
374
375 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
376 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
377 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
378 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
379 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
380 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
381 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
382 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
383 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
384 OC_GETLINE|SV|P(0),
385 0, 0,
386 0,
387 0 /* END */
388};
389
390/* internal variable names and their initial values */
391/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
392enum {
393 CONVFMT, OFMT, FS, OFS,
394 ORS, RS, RT, FILENAME,
395 SUBSEP, F0, ARGIND, ARGC,
396 ARGV, ERRNO, FNR, NR,
397 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
398};
399
400static const char vNames[] ALIGN1 =
401 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
402 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
403 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
404 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
405 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
406
407static const char vValues[] ALIGN1 =
408 "%.6g\0" "%.6g\0" " \0" " \0"
409 "\n\0" "\n\0" "\0" "\0"
410 "\034\0" "\0" "\377";
411
412/* hash size may grow to these values */
413#define FIRST_PRIME 61
414static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
415
416
417/* Globals. Split in two parts so that first one is addressed
418 * with (mostly short) negative offsets.
419 * NB: it's unsafe to put members of type "double"
420 * into globals2 (gcc may fail to align them).
421 */
422struct globals {
423 double t_double;
424 chain beginseq, mainseq, endseq;
425 chain *seq;
426 node *break_ptr, *continue_ptr;
427 rstream *iF;
428 xhash *vhash, *ahash, *fdhash, *fnhash;
429 const char *g_progname;
430 int g_lineno;
431 int nfields;
432 int maxfields; /* used in fsrealloc() only */
433 var *Fields;
434 nvblock *g_cb;
435 char *g_pos;
436 char *g_buf;
437 smallint icase;
438 smallint exiting;
439 smallint nextrec;
440 smallint nextfile;
441 smallint is_f0_split;
442 smallint t_rollback;
443};
444struct globals2 {
445 uint32_t t_info; /* often used */
446 uint32_t t_tclass;
447 char *t_string;
448 int t_lineno;
449
450 var *intvar[NUM_INTERNAL_VARS]; /* often used */
451
452 /* former statics from various functions */
453 char *split_f0__fstrings;
454
455 uint32_t next_token__save_tclass;
456 uint32_t next_token__save_info;
457 uint32_t next_token__ltclass;
458 smallint next_token__concat_inserted;
459
460 smallint next_input_file__files_happen;
461 rstream next_input_file__rsm;
462
463 var *evaluate__fnargs;
464 unsigned evaluate__seed;
465 regex_t evaluate__sreg;
466
467 var ptest__v;
468
469 tsplitter exec_builtin__tspl;
470
471 /* biggest and least used members go last */
472 tsplitter fsplitter, rsplitter;
473};
474#define G1 (ptr_to_globals[-1])
475#define G (*(struct globals2 *)ptr_to_globals)
476/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
477/*char G1size[sizeof(G1)]; - 0x74 */
478/*char Gsize[sizeof(G)]; - 0x1c4 */
479/* Trying to keep most of members accessible with short offsets: */
480/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
481#define t_double (G1.t_double )
482#define beginseq (G1.beginseq )
483#define mainseq (G1.mainseq )
484#define endseq (G1.endseq )
485#define seq (G1.seq )
486#define break_ptr (G1.break_ptr )
487#define continue_ptr (G1.continue_ptr)
488#define iF (G1.iF )
489#define vhash (G1.vhash )
490#define ahash (G1.ahash )
491#define fdhash (G1.fdhash )
492#define fnhash (G1.fnhash )
493#define g_progname (G1.g_progname )
494#define g_lineno (G1.g_lineno )
495#define nfields (G1.nfields )
496#define maxfields (G1.maxfields )
497#define Fields (G1.Fields )
498#define g_cb (G1.g_cb )
499#define g_pos (G1.g_pos )
500#define g_buf (G1.g_buf )
501#define icase (G1.icase )
502#define exiting (G1.exiting )
503#define nextrec (G1.nextrec )
504#define nextfile (G1.nextfile )
505#define is_f0_split (G1.is_f0_split )
506#define t_rollback (G1.t_rollback )
507#define t_info (G.t_info )
508#define t_tclass (G.t_tclass )
509#define t_string (G.t_string )
510#define t_lineno (G.t_lineno )
511#define intvar (G.intvar )
512#define fsplitter (G.fsplitter )
513#define rsplitter (G.rsplitter )
514#define INIT_G() do { \
515 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
516 G.next_token__ltclass = TC_OPTERM; \
517 G.evaluate__seed = 1; \
518} while (0)
519
520
521/* function prototypes */
522static void handle_special(var *);
523static node *parse_expr(uint32_t);
524static void chain_group(void);
525static var *evaluate(node *, var *);
526static rstream *next_input_file(void);
527static int fmt_num(char *, int, const char *, double, int);
528static int awk_exit(int) NORETURN;
529
530/* ---- error handling ---- */
531
532static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
533static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
534static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
535static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
536static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
537static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
538static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
539static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
540static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
541static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
542
543static void zero_out_var(var *vp)
544{
545 memset(vp, 0, sizeof(*vp));
546}
547
548static void syntax_error(const char *message) NORETURN;
549static void syntax_error(const char *message)
550{
551 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
552}
553
554/* ---- hash stuff ---- */
555
556static unsigned hashidx(const char *name)
557{
558 unsigned idx = 0;
559
560 while (*name)
561 idx = *name++ + (idx << 6) - idx;
562 return idx;
563}
564
565/* create new hash */
566static xhash *hash_init(void)
567{
568 xhash *newhash;
569
570 newhash = xzalloc(sizeof(*newhash));
571 newhash->csize = FIRST_PRIME;
572 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
573
574 return newhash;
575}
576
577/* find item in hash, return ptr to data, NULL if not found */
578static void *hash_search(xhash *hash, const char *name)
579{
580 hash_item *hi;
581
582 hi = hash->items[hashidx(name) % hash->csize];
583 while (hi) {
584 if (strcmp(hi->name, name) == 0)
585 return &hi->data;
586 hi = hi->next;
587 }
588 return NULL;
589}
590
591/* grow hash if it becomes too big */
592static void hash_rebuild(xhash *hash)
593{
594 unsigned newsize, i, idx;
595 hash_item **newitems, *hi, *thi;
596
597 if (hash->nprime == ARRAY_SIZE(PRIMES))
598 return;
599
600 newsize = PRIMES[hash->nprime++];
601 newitems = xzalloc(newsize * sizeof(newitems[0]));
602
603 for (i = 0; i < hash->csize; i++) {
604 hi = hash->items[i];
605 while (hi) {
606 thi = hi;
607 hi = thi->next;
608 idx = hashidx(thi->name) % newsize;
609 thi->next = newitems[idx];
610 newitems[idx] = thi;
611 }
612 }
613
614 free(hash->items);
615 hash->csize = newsize;
616 hash->items = newitems;
617}
618
619/* find item in hash, add it if necessary. Return ptr to data */
620static void *hash_find(xhash *hash, const char *name)
621{
622 hash_item *hi;
623 unsigned idx;
624 int l;
625
626 hi = hash_search(hash, name);
627 if (!hi) {
628 if (++hash->nel / hash->csize > 10)
629 hash_rebuild(hash);
630
631 l = strlen(name) + 1;
632 hi = xzalloc(sizeof(*hi) + l);
633 strcpy(hi->name, name);
634
635 idx = hashidx(name) % hash->csize;
636 hi->next = hash->items[idx];
637 hash->items[idx] = hi;
638 hash->glen += l;
639 }
640 return &hi->data;
641}
642
643#define findvar(hash, name) ((var*) hash_find((hash), (name)))
644#define newvar(name) ((var*) hash_find(vhash, (name)))
645#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
646#define newfunc(name) ((func*) hash_find(fnhash, (name)))
647
648static void hash_remove(xhash *hash, const char *name)
649{
650 hash_item *hi, **phi;
651
652 phi = &hash->items[hashidx(name) % hash->csize];
653 while (*phi) {
654 hi = *phi;
655 if (strcmp(hi->name, name) == 0) {
656 hash->glen -= (strlen(name) + 1);
657 hash->nel--;
658 *phi = hi->next;
659 free(hi);
660 break;
661 }
662 phi = &hi->next;
663 }
664}
665
666/* ------ some useful functions ------ */
667
668static char *skip_spaces(char *p)
669{
670 while (1) {
671 if (*p == '\\' && p[1] == '\n') {
672 p++;
673 t_lineno++;
674 } else if (*p != ' ' && *p != '\t') {
675 break;
676 }
677 p++;
678 }
679 return p;
680}
681
682/* returns old *s, advances *s past word and terminating NUL */
683static char *nextword(char **s)
684{
685 char *p = *s;
686 while (*(*s)++ != '\0')
687 continue;
688 return p;
689}
690
691static char nextchar(char **s)
692{
693 char c, *pps;
694
695 c = *(*s)++;
696 pps = *s;
697 if (c == '\\')
698 c = bb_process_escape_sequence((const char**)s);
699 /* Example awk statement:
700 * s = "abc\"def"
701 * we must treat \" as "
702 */
703 if (c == '\\' && *s == pps) { /* unrecognized \z? */
704 c = *(*s); /* yes, fetch z */
705 if (c)
706 (*s)++; /* advance unless z = NUL */
707 }
708 return c;
709}
710
711/* TODO: merge with strcpy_and_process_escape_sequences()?
712 */
713static void unescape_string_in_place(char *s1)
714{
715 char *s = s1;
716 while ((*s1 = nextchar(&s)) != '\0')
717 s1++;
718}
719
720static ALWAYS_INLINE int isalnum_(int c)
721{
722 return (isalnum(c) || c == '_');
723}
724
725static double my_strtod(char **pp)
726{
727 char *cp = *pp;
728 if (ENABLE_DESKTOP && cp[0] == '0') {
729 /* Might be hex or octal integer: 0x123abc or 07777 */
730 char c = (cp[1] | 0x20);
731 if (c == 'x' || isdigit(cp[1])) {
732 unsigned long long ull = strtoull(cp, pp, 0);
733 if (c == 'x')
734 return ull;
735 c = **pp;
736 if (!isdigit(c) && c != '.')
737 return ull;
738 /* else: it may be a floating number. Examples:
739 * 009.123 (*pp points to '9')
740 * 000.123 (*pp points to '.')
741 * fall through to strtod.
742 */
743 }
744 }
745 return strtod(cp, pp);
746}
747
748/* -------- working with variables (set/get/copy/etc) -------- */
749
750static xhash *iamarray(var *v)
751{
752 var *a = v;
753
754 while (a->type & VF_CHILD)
755 a = a->x.parent;
756
757 if (!(a->type & VF_ARRAY)) {
758 a->type |= VF_ARRAY;
759 a->x.array = hash_init();
760 }
761 return a->x.array;
762}
763
764static void clear_array(xhash *array)
765{
766 unsigned i;
767 hash_item *hi, *thi;
768
769 for (i = 0; i < array->csize; i++) {
770 hi = array->items[i];
771 while (hi) {
772 thi = hi;
773 hi = hi->next;
774 free(thi->data.v.string);
775 free(thi);
776 }
777 array->items[i] = NULL;
778 }
779 array->glen = array->nel = 0;
780}
781
782/* clear a variable */
783static var *clrvar(var *v)
784{
785 if (!(v->type & VF_FSTR))
786 free(v->string);
787
788 v->type &= VF_DONTTOUCH;
789 v->type |= VF_DIRTY;
790 v->string = NULL;
791 return v;
792}
793
794/* assign string value to variable */
795static var *setvar_p(var *v, char *value)
796{
797 clrvar(v);
798 v->string = value;
799 handle_special(v);
800 return v;
801}
802
803/* same as setvar_p but make a copy of string */
804static var *setvar_s(var *v, const char *value)
805{
806 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
807}
808
809/* same as setvar_s but sets USER flag */
810static var *setvar_u(var *v, const char *value)
811{
812 v = setvar_s(v, value);
813 v->type |= VF_USER;
814 return v;
815}
816
817/* set array element to user string */
818static void setari_u(var *a, int idx, const char *s)
819{
820 var *v;
821
822 v = findvar(iamarray(a), itoa(idx));
823 setvar_u(v, s);
824}
825
826/* assign numeric value to variable */
827static var *setvar_i(var *v, double value)
828{
829 clrvar(v);
830 v->type |= VF_NUMBER;
831 v->number = value;
832 handle_special(v);
833 return v;
834}
835
836static const char *getvar_s(var *v)
837{
838 /* if v is numeric and has no cached string, convert it to string */
839 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
840 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
841 v->string = xstrdup(g_buf);
842 v->type |= VF_CACHED;
843 }
844 return (v->string == NULL) ? "" : v->string;
845}
846
847static double getvar_i(var *v)
848{
849 char *s;
850
851 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
852 v->number = 0;
853 s = v->string;
854 if (s && *s) {
855 debug_printf_eval("getvar_i: '%s'->", s);
856 v->number = my_strtod(&s);
857 debug_printf_eval("%f (s:'%s')\n", v->number, s);
858 if (v->type & VF_USER) {
859 s = skip_spaces(s);
860 if (*s != '\0')
861 v->type &= ~VF_USER;
862 }
863 } else {
864 debug_printf_eval("getvar_i: '%s'->zero\n", s);
865 v->type &= ~VF_USER;
866 }
867 v->type |= VF_CACHED;
868 }
869 debug_printf_eval("getvar_i: %f\n", v->number);
870 return v->number;
871}
872
873/* Used for operands of bitwise ops */
874static unsigned long getvar_i_int(var *v)
875{
876 double d = getvar_i(v);
877
878 /* Casting doubles to longs is undefined for values outside
879 * of target type range. Try to widen it as much as possible */
880 if (d >= 0)
881 return (unsigned long)d;
882 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
883 return - (long) (unsigned long) (-d);
884}
885
886static var *copyvar(var *dest, const var *src)
887{
888 if (dest != src) {
889 clrvar(dest);
890 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
891 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
892 dest->number = src->number;
893 if (src->string)
894 dest->string = xstrdup(src->string);
895 }
896 handle_special(dest);
897 return dest;
898}
899
900static var *incvar(var *v)
901{
902 return setvar_i(v, getvar_i(v) + 1.0);
903}
904
905/* return true if v is number or numeric string */
906static int is_numeric(var *v)
907{
908 getvar_i(v);
909 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
910}
911
912/* return 1 when value of v corresponds to true, 0 otherwise */
913static int istrue(var *v)
914{
915 if (is_numeric(v))
916 return (v->number != 0);
917 return (v->string && v->string[0]);
918}
919
920/* temporary variables allocator. Last allocated should be first freed */
921static var *nvalloc(int n)
922{
923 nvblock *pb = NULL;
924 var *v, *r;
925 int size;
926
927 while (g_cb) {
928 pb = g_cb;
929 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
930 break;
931 g_cb = g_cb->next;
932 }
933
934 if (!g_cb) {
935 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
936 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
937 g_cb->size = size;
938 g_cb->pos = g_cb->nv;
939 g_cb->prev = pb;
940 /*g_cb->next = NULL; - xzalloc did it */
941 if (pb)
942 pb->next = g_cb;
943 }
944
945 v = r = g_cb->pos;
946 g_cb->pos += n;
947
948 while (v < g_cb->pos) {
949 v->type = 0;
950 v->string = NULL;
951 v++;
952 }
953
954 return r;
955}
956
957static void nvfree(var *v)
958{
959 var *p;
960
961 if (v < g_cb->nv || v >= g_cb->pos)
962 syntax_error(EMSG_INTERNAL_ERROR);
963
964 for (p = v; p < g_cb->pos; p++) {
965 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
966 clear_array(iamarray(p));
967 free(p->x.array->items);
968 free(p->x.array);
969 }
970 if (p->type & VF_WALK) {
971 walker_list *n;
972 walker_list *w = p->x.walker;
973 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
974 p->x.walker = NULL;
975 while (w) {
976 n = w->prev;
977 debug_printf_walker(" free(%p)\n", w);
978 free(w);
979 w = n;
980 }
981 }
982 clrvar(p);
983 }
984
985 g_cb->pos = v;
986 while (g_cb->prev && g_cb->pos == g_cb->nv) {
987 g_cb = g_cb->prev;
988 }
989}
990
991/* ------- awk program text parsing ------- */
992
993/* Parse next token pointed by global pos, place results into global ttt.
994 * If token isn't expected, give away. Return token class
995 */
996static uint32_t next_token(uint32_t expected)
997{
998#define concat_inserted (G.next_token__concat_inserted)
999#define save_tclass (G.next_token__save_tclass)
1000#define save_info (G.next_token__save_info)
1001/* Initialized to TC_OPTERM: */
1002#define ltclass (G.next_token__ltclass)
1003
1004 char *p, *s;
1005 const char *tl;
1006 uint32_t tc;
1007 const uint32_t *ti;
1008
1009 if (t_rollback) {
1010 t_rollback = FALSE;
1011
1012 } else if (concat_inserted) {
1013 concat_inserted = FALSE;
1014 t_tclass = save_tclass;
1015 t_info = save_info;
1016
1017 } else {
1018 p = g_pos;
1019 readnext:
1020 p = skip_spaces(p);
1021 g_lineno = t_lineno;
1022 if (*p == '#')
1023 while (*p != '\n' && *p != '\0')
1024 p++;
1025
1026 if (*p == '\n')
1027 t_lineno++;
1028
1029 if (*p == '\0') {
1030 tc = TC_EOF;
1031 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1032
1033 } else if (*p == '\"') {
1034 /* it's a string */
1035 t_string = s = ++p;
1036 while (*p != '\"') {
1037 char *pp;
1038 if (*p == '\0' || *p == '\n')
1039 syntax_error(EMSG_UNEXP_EOS);
1040 pp = p;
1041 *s++ = nextchar(&pp);
1042 p = pp;
1043 }
1044 p++;
1045 *s = '\0';
1046 tc = TC_STRING;
1047 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1048
1049 } else if ((expected & TC_REGEXP) && *p == '/') {
1050 /* it's regexp */
1051 t_string = s = ++p;
1052 while (*p != '/') {
1053 if (*p == '\0' || *p == '\n')
1054 syntax_error(EMSG_UNEXP_EOS);
1055 *s = *p++;
1056 if (*s++ == '\\') {
1057 char *pp = p;
1058 s[-1] = bb_process_escape_sequence((const char **)&pp);
1059 if (*p == '\\')
1060 *s++ = '\\';
1061 if (pp == p)
1062 *s++ = *p++;
1063 else
1064 p = pp;
1065 }
1066 }
1067 p++;
1068 *s = '\0';
1069 tc = TC_REGEXP;
1070 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1071
1072 } else if (*p == '.' || isdigit(*p)) {
1073 /* it's a number */
1074 char *pp = p;
1075 t_double = my_strtod(&pp);
1076 p = pp;
1077 if (*p == '.')
1078 syntax_error(EMSG_UNEXP_TOKEN);
1079 tc = TC_NUMBER;
1080 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1081
1082 } else {
1083 /* search for something known */
1084 tl = tokenlist;
1085 tc = 0x00000001;
1086 ti = tokeninfo;
1087 while (*tl) {
1088 int l = (unsigned char) *tl++;
1089 if (l == (unsigned char) NTCC) {
1090 tc <<= 1;
1091 continue;
1092 }
1093 /* if token class is expected,
1094 * token matches,
1095 * and it's not a longer word,
1096 */
1097 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1098 && strncmp(p, tl, l) == 0
1099 && !((tc & TC_WORD) && isalnum_(p[l]))
1100 ) {
1101 /* then this is what we are looking for */
1102 t_info = *ti;
1103 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1104 p += l;
1105 goto token_found;
1106 }
1107 ti++;
1108 tl += l;
1109 }
1110 /* not a known token */
1111
1112 /* is it a name? (var/array/function) */
1113 if (!isalnum_(*p))
1114 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1115 /* yes */
1116 t_string = --p;
1117 while (isalnum_(*++p)) {
1118 p[-1] = *p;
1119 }
1120 p[-1] = '\0';
1121 tc = TC_VARIABLE;
1122 /* also consume whitespace between functionname and bracket */
1123 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1124 p = skip_spaces(p);
1125 if (*p == '(') {
1126 tc = TC_FUNCTION;
1127 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1128 } else {
1129 if (*p == '[') {
1130 p++;
1131 tc = TC_ARRAY;
1132 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1133 } else
1134 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1135 }
1136 }
1137 token_found:
1138 g_pos = p;
1139
1140 /* skipping newlines in some cases */
1141 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1142 goto readnext;
1143
1144 /* insert concatenation operator when needed */
1145 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1146 concat_inserted = TRUE;
1147 save_tclass = tc;
1148 save_info = t_info;
1149 tc = TC_BINOP;
1150 t_info = OC_CONCAT | SS | P(35);
1151 }
1152
1153 t_tclass = tc;
1154 }
1155 ltclass = t_tclass;
1156
1157 /* Are we ready for this? */
1158 if (!(ltclass & expected))
1159 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1160 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1161
1162 return ltclass;
1163#undef concat_inserted
1164#undef save_tclass
1165#undef save_info
1166#undef ltclass
1167}
1168
1169static void rollback_token(void)
1170{
1171 t_rollback = TRUE;
1172}
1173
1174static node *new_node(uint32_t info)
1175{
1176 node *n;
1177
1178 n = xzalloc(sizeof(node));
1179 n->info = info;
1180 n->lineno = g_lineno;
1181 return n;
1182}
1183
1184static void mk_re_node(const char *s, node *n, regex_t *re)
1185{
1186 n->info = OC_REGEXP;
1187 n->l.re = re;
1188 n->r.ire = re + 1;
1189 xregcomp(re, s, REG_EXTENDED);
1190 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1191}
1192
1193static node *condition(void)
1194{
1195 next_token(TC_SEQSTART);
1196 return parse_expr(TC_SEQTERM);
1197}
1198
1199/* parse expression terminated by given argument, return ptr
1200 * to built subtree. Terminator is eaten by parse_expr */
1201static node *parse_expr(uint32_t iexp)
1202{
1203 node sn;
1204 node *cn = &sn;
1205 node *vn, *glptr;
1206 uint32_t tc, xtc;
1207 var *v;
1208
1209 debug_printf_parse("%s(%x)\n", __func__, iexp);
1210
1211 sn.info = PRIMASK;
1212 sn.r.n = glptr = NULL;
1213 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1214
1215 while (!((tc = next_token(xtc)) & iexp)) {
1216
1217 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1218 /* input redirection (<) attached to glptr node */
1219 debug_printf_parse("%s: input redir\n", __func__);
1220 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1221 cn->a.n = glptr;
1222 xtc = TC_OPERAND | TC_UOPPRE;
1223 glptr = NULL;
1224
1225 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1226 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1227 /* for binary and postfix-unary operators, jump back over
1228 * previous operators with higher priority */
1229 vn = cn;
1230 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1231 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1232 ) {
1233 vn = vn->a.n;
1234 }
1235 if ((t_info & OPCLSMASK) == OC_TERNARY)
1236 t_info += P(6);
1237 cn = vn->a.n->r.n = new_node(t_info);
1238 cn->a.n = vn->a.n;
1239 if (tc & TC_BINOP) {
1240 cn->l.n = vn;
1241 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1242 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1243 /* it's a pipe */
1244 next_token(TC_GETLINE);
1245 /* give maximum priority to this pipe */
1246 cn->info &= ~PRIMASK;
1247 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1248 }
1249 } else {
1250 cn->r.n = vn;
1251 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1252 }
1253 vn->a.n = cn;
1254
1255 } else {
1256 debug_printf_parse("%s: other\n", __func__);
1257 /* for operands and prefix-unary operators, attach them
1258 * to last node */
1259 vn = cn;
1260 cn = vn->r.n = new_node(t_info);
1261 cn->a.n = vn;
1262 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1263 if (tc & (TC_OPERAND | TC_REGEXP)) {
1264 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1265 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1266 /* one should be very careful with switch on tclass -
1267 * only simple tclasses should be used! */
1268 switch (tc) {
1269 case TC_VARIABLE:
1270 case TC_ARRAY:
1271 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1272 cn->info = OC_VAR;
1273 v = hash_search(ahash, t_string);
1274 if (v != NULL) {
1275 cn->info = OC_FNARG;
1276 cn->l.aidx = v->x.aidx;
1277 } else {
1278 cn->l.v = newvar(t_string);
1279 }
1280 if (tc & TC_ARRAY) {
1281 cn->info |= xS;
1282 cn->r.n = parse_expr(TC_ARRTERM);
1283 }
1284 break;
1285
1286 case TC_NUMBER:
1287 case TC_STRING:
1288 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1289 cn->info = OC_VAR;
1290 v = cn->l.v = xzalloc(sizeof(var));
1291 if (tc & TC_NUMBER)
1292 setvar_i(v, t_double);
1293 else
1294 setvar_s(v, t_string);
1295 break;
1296
1297 case TC_REGEXP:
1298 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1299 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1300 break;
1301
1302 case TC_FUNCTION:
1303 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1304 cn->info = OC_FUNC;
1305 cn->r.f = newfunc(t_string);
1306 cn->l.n = condition();
1307 break;
1308
1309 case TC_SEQSTART:
1310 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1311 cn = vn->r.n = parse_expr(TC_SEQTERM);
1312 if (!cn)
1313 syntax_error("Empty sequence");
1314 cn->a.n = vn;
1315 break;
1316
1317 case TC_GETLINE:
1318 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1319 glptr = cn;
1320 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1321 break;
1322
1323 case TC_BUILTIN:
1324 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1325 cn->l.n = condition();
1326 break;
1327 }
1328 }
1329 }
1330 }
1331
1332 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1333 return sn.r.n;
1334}
1335
1336/* add node to chain. Return ptr to alloc'd node */
1337static node *chain_node(uint32_t info)
1338{
1339 node *n;
1340
1341 if (!seq->first)
1342 seq->first = seq->last = new_node(0);
1343
1344 if (seq->programname != g_progname) {
1345 seq->programname = g_progname;
1346 n = chain_node(OC_NEWSOURCE);
1347 n->l.new_progname = xstrdup(g_progname);
1348 }
1349
1350 n = seq->last;
1351 n->info = info;
1352 seq->last = n->a.n = new_node(OC_DONE);
1353
1354 return n;
1355}
1356
1357static void chain_expr(uint32_t info)
1358{
1359 node *n;
1360
1361 n = chain_node(info);
1362 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1363 if (t_tclass & TC_GRPTERM)
1364 rollback_token();
1365}
1366
1367static node *chain_loop(node *nn)
1368{
1369 node *n, *n2, *save_brk, *save_cont;
1370
1371 save_brk = break_ptr;
1372 save_cont = continue_ptr;
1373
1374 n = chain_node(OC_BR | Vx);
1375 continue_ptr = new_node(OC_EXEC);
1376 break_ptr = new_node(OC_EXEC);
1377 chain_group();
1378 n2 = chain_node(OC_EXEC | Vx);
1379 n2->l.n = nn;
1380 n2->a.n = n;
1381 continue_ptr->a.n = n2;
1382 break_ptr->a.n = n->r.n = seq->last;
1383
1384 continue_ptr = save_cont;
1385 break_ptr = save_brk;
1386
1387 return n;
1388}
1389
1390/* parse group and attach it to chain */
1391static void chain_group(void)
1392{
1393 uint32_t c;
1394 node *n, *n2, *n3;
1395
1396 do {
1397 c = next_token(TC_GRPSEQ);
1398 } while (c & TC_NEWLINE);
1399
1400 if (c & TC_GRPSTART) {
1401 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1402 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1403 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1404 if (t_tclass & TC_NEWLINE)
1405 continue;
1406 rollback_token();
1407 chain_group();
1408 }
1409 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1410 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1411 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1412 rollback_token();
1413 chain_expr(OC_EXEC | Vx);
1414 } else {
1415 /* TC_STATEMNT */
1416 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1417 switch (t_info & OPCLSMASK) {
1418 case ST_IF:
1419 debug_printf_parse("%s: ST_IF\n", __func__);
1420 n = chain_node(OC_BR | Vx);
1421 n->l.n = condition();
1422 chain_group();
1423 n2 = chain_node(OC_EXEC);
1424 n->r.n = seq->last;
1425 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1426 chain_group();
1427 n2->a.n = seq->last;
1428 } else {
1429 rollback_token();
1430 }
1431 break;
1432
1433 case ST_WHILE:
1434 debug_printf_parse("%s: ST_WHILE\n", __func__);
1435 n2 = condition();
1436 n = chain_loop(NULL);
1437 n->l.n = n2;
1438 break;
1439
1440 case ST_DO:
1441 debug_printf_parse("%s: ST_DO\n", __func__);
1442 n2 = chain_node(OC_EXEC);
1443 n = chain_loop(NULL);
1444 n2->a.n = n->a.n;
1445 next_token(TC_WHILE);
1446 n->l.n = condition();
1447 break;
1448
1449 case ST_FOR:
1450 debug_printf_parse("%s: ST_FOR\n", __func__);
1451 next_token(TC_SEQSTART);
1452 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1453 if (t_tclass & TC_SEQTERM) { /* for-in */
1454 if ((n2->info & OPCLSMASK) != OC_IN)
1455 syntax_error(EMSG_UNEXP_TOKEN);
1456 n = chain_node(OC_WALKINIT | VV);
1457 n->l.n = n2->l.n;
1458 n->r.n = n2->r.n;
1459 n = chain_loop(NULL);
1460 n->info = OC_WALKNEXT | Vx;
1461 n->l.n = n2->l.n;
1462 } else { /* for (;;) */
1463 n = chain_node(OC_EXEC | Vx);
1464 n->l.n = n2;
1465 n2 = parse_expr(TC_SEMICOL);
1466 n3 = parse_expr(TC_SEQTERM);
1467 n = chain_loop(n3);
1468 n->l.n = n2;
1469 if (!n2)
1470 n->info = OC_EXEC;
1471 }
1472 break;
1473
1474 case OC_PRINT:
1475 case OC_PRINTF:
1476 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1477 n = chain_node(t_info);
1478 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1479 if (t_tclass & TC_OUTRDR) {
1480 n->info |= t_info;
1481 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1482 }
1483 if (t_tclass & TC_GRPTERM)
1484 rollback_token();
1485 break;
1486
1487 case OC_BREAK:
1488 debug_printf_parse("%s: OC_BREAK\n", __func__);
1489 n = chain_node(OC_EXEC);
1490 n->a.n = break_ptr;
1491 break;
1492
1493 case OC_CONTINUE:
1494 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1495 n = chain_node(OC_EXEC);
1496 n->a.n = continue_ptr;
1497 break;
1498
1499 /* delete, next, nextfile, return, exit */
1500 default:
1501 debug_printf_parse("%s: default\n", __func__);
1502 chain_expr(t_info);
1503 }
1504 }
1505}
1506
1507static void parse_program(char *p)
1508{
1509 uint32_t tclass;
1510 node *cn;
1511 func *f;
1512 var *v;
1513
1514 g_pos = p;
1515 t_lineno = 1;
1516 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1517 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1518
1519 if (tclass & TC_OPTERM) {
1520 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1521 continue;
1522 }
1523
1524 seq = &mainseq;
1525 if (tclass & TC_BEGIN) {
1526 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1527 seq = &beginseq;
1528 chain_group();
1529
1530 } else if (tclass & TC_END) {
1531 debug_printf_parse("%s: TC_END\n", __func__);
1532 seq = &endseq;
1533 chain_group();
1534
1535 } else if (tclass & TC_FUNCDECL) {
1536 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1537 next_token(TC_FUNCTION);
1538 g_pos++;
1539 f = newfunc(t_string);
1540 f->body.first = NULL;
1541 f->nargs = 0;
1542 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1543 v = findvar(ahash, t_string);
1544 v->x.aidx = f->nargs++;
1545
1546 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1547 break;
1548 }
1549 seq = &f->body;
1550 chain_group();
1551 clear_array(ahash);
1552
1553 } else if (tclass & TC_OPSEQ) {
1554 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1555 rollback_token();
1556 cn = chain_node(OC_TEST);
1557 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1558 if (t_tclass & TC_GRPSTART) {
1559 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1560 rollback_token();
1561 chain_group();
1562 } else {
1563 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1564 chain_node(OC_PRINT);
1565 }
1566 cn->r.n = mainseq.last;
1567
1568 } else /* if (tclass & TC_GRPSTART) */ {
1569 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1570 rollback_token();
1571 chain_group();
1572 }
1573 }
1574 debug_printf_parse("%s: TC_EOF\n", __func__);
1575}
1576
1577
1578/* -------- program execution part -------- */
1579
1580static node *mk_splitter(const char *s, tsplitter *spl)
1581{
1582 regex_t *re, *ire;
1583 node *n;
1584
1585 re = &spl->re[0];
1586 ire = &spl->re[1];
1587 n = &spl->n;
1588 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1589 regfree(re);
1590 regfree(ire); // TODO: nuke ire, use re+1?
1591 }
1592 if (s[0] && s[1]) { /* strlen(s) > 1 */
1593 mk_re_node(s, n, re);
1594 } else {
1595 n->info = (uint32_t) s[0];
1596 }
1597
1598 return n;
1599}
1600
1601/* use node as a regular expression. Supplied with node ptr and regex_t
1602 * storage space. Return ptr to regex (if result points to preg, it should
1603 * be later regfree'd manually
1604 */
1605static regex_t *as_regex(node *op, regex_t *preg)
1606{
1607 int cflags;
1608 var *v;
1609 const char *s;
1610
1611 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1612 return icase ? op->r.ire : op->l.re;
1613 }
1614 v = nvalloc(1);
1615 s = getvar_s(evaluate(op, v));
1616
1617 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1618 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1619 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1620 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1621 * (maybe gsub is not supposed to use REG_EXTENDED?).
1622 */
1623 if (regcomp(preg, s, cflags)) {
1624 cflags &= ~REG_EXTENDED;
1625 xregcomp(preg, s, cflags);
1626 }
1627 nvfree(v);
1628 return preg;
1629}
1630
1631/* gradually increasing buffer.
1632 * note that we reallocate even if n == old_size,
1633 * and thus there is at least one extra allocated byte.
1634 */
1635static char* qrealloc(char *b, int n, int *size)
1636{
1637 if (!b || n >= *size) {
1638 *size = n + (n>>1) + 80;
1639 b = xrealloc(b, *size);
1640 }
1641 return b;
1642}
1643
1644/* resize field storage space */
1645static void fsrealloc(int size)
1646{
1647 int i;
1648
1649 if (size >= maxfields) {
1650 i = maxfields;
1651 maxfields = size + 16;
1652 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1653 for (; i < maxfields; i++) {
1654 Fields[i].type = VF_SPECIAL;
1655 Fields[i].string = NULL;
1656 }
1657 }
1658 /* if size < nfields, clear extra field variables */
1659 for (i = size; i < nfields; i++) {
1660 clrvar(Fields + i);
1661 }
1662 nfields = size;
1663}
1664
1665static int awk_split(const char *s, node *spl, char **slist)
1666{
1667 int l, n;
1668 char c[4];
1669 char *s1;
1670 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1671
1672 /* in worst case, each char would be a separate field */
1673 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1674 strcpy(s1, s);
1675
1676 c[0] = c[1] = (char)spl->info;
1677 c[2] = c[3] = '\0';
1678 if (*getvar_s(intvar[RS]) == '\0')
1679 c[2] = '\n';
1680
1681 n = 0;
1682 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1683 if (!*s)
1684 return n; /* "": zero fields */
1685 n++; /* at least one field will be there */
1686 do {
1687 l = strcspn(s, c+2); /* len till next NUL or \n */
1688 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1689 && pmatch[0].rm_so <= l
1690 ) {
1691 l = pmatch[0].rm_so;
1692 if (pmatch[0].rm_eo == 0) {
1693 l++;
1694 pmatch[0].rm_eo++;
1695 }
1696 n++; /* we saw yet another delimiter */
1697 } else {
1698 pmatch[0].rm_eo = l;
1699 if (s[l])
1700 pmatch[0].rm_eo++;
1701 }
1702 memcpy(s1, s, l);
1703 /* make sure we remove *all* of the separator chars */
1704 do {
1705 s1[l] = '\0';
1706 } while (++l < pmatch[0].rm_eo);
1707 nextword(&s1);
1708 s += pmatch[0].rm_eo;
1709 } while (*s);
1710 return n;
1711 }
1712 if (c[0] == '\0') { /* null split */
1713 while (*s) {
1714 *s1++ = *s++;
1715 *s1++ = '\0';
1716 n++;
1717 }
1718 return n;
1719 }
1720 if (c[0] != ' ') { /* single-character split */
1721 if (icase) {
1722 c[0] = toupper(c[0]);
1723 c[1] = tolower(c[1]);
1724 }
1725 if (*s1)
1726 n++;
1727 while ((s1 = strpbrk(s1, c)) != NULL) {
1728 *s1++ = '\0';
1729 n++;
1730 }
1731 return n;
1732 }
1733 /* space split */
1734 while (*s) {
1735 s = skip_whitespace(s);
1736 if (!*s)
1737 break;
1738 n++;
1739 while (*s && !isspace(*s))
1740 *s1++ = *s++;
1741 *s1++ = '\0';
1742 }
1743 return n;
1744}
1745
1746static void split_f0(void)
1747{
1748/* static char *fstrings; */
1749#define fstrings (G.split_f0__fstrings)
1750
1751 int i, n;
1752 char *s;
1753
1754 if (is_f0_split)
1755 return;
1756
1757 is_f0_split = TRUE;
1758 free(fstrings);
1759 fsrealloc(0);
1760 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1761 fsrealloc(n);
1762 s = fstrings;
1763 for (i = 0; i < n; i++) {
1764 Fields[i].string = nextword(&s);
1765 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1766 }
1767
1768 /* set NF manually to avoid side effects */
1769 clrvar(intvar[NF]);
1770 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1771 intvar[NF]->number = nfields;
1772#undef fstrings
1773}
1774
1775/* perform additional actions when some internal variables changed */
1776static void handle_special(var *v)
1777{
1778 int n;
1779 char *b;
1780 const char *sep, *s;
1781 int sl, l, len, i, bsize;
1782
1783 if (!(v->type & VF_SPECIAL))
1784 return;
1785
1786 if (v == intvar[NF]) {
1787 n = (int)getvar_i(v);
1788 fsrealloc(n);
1789
1790 /* recalculate $0 */
1791 sep = getvar_s(intvar[OFS]);
1792 sl = strlen(sep);
1793 b = NULL;
1794 len = 0;
1795 for (i = 0; i < n; i++) {
1796 s = getvar_s(&Fields[i]);
1797 l = strlen(s);
1798 if (b) {
1799 memcpy(b+len, sep, sl);
1800 len += sl;
1801 }
1802 b = qrealloc(b, len+l+sl, &bsize);
1803 memcpy(b+len, s, l);
1804 len += l;
1805 }
1806 if (b)
1807 b[len] = '\0';
1808 setvar_p(intvar[F0], b);
1809 is_f0_split = TRUE;
1810
1811 } else if (v == intvar[F0]) {
1812 is_f0_split = FALSE;
1813
1814 } else if (v == intvar[FS]) {
1815 /*
1816 * The POSIX-2008 standard says that changing FS should have no effect on the
1817 * current input line, but only on the next one. The language is:
1818 *
1819 * > Before the first reference to a field in the record is evaluated, the record
1820 * > shall be split into fields, according to the rules in Regular Expressions,
1821 * > using the value of FS that was current at the time the record was read.
1822 *
1823 * So, split up current line before assignment to FS:
1824 */
1825 split_f0();
1826
1827 mk_splitter(getvar_s(v), &fsplitter);
1828
1829 } else if (v == intvar[RS]) {
1830 mk_splitter(getvar_s(v), &rsplitter);
1831
1832 } else if (v == intvar[IGNORECASE]) {
1833 icase = istrue(v);
1834
1835 } else { /* $n */
1836 n = getvar_i(intvar[NF]);
1837 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1838 /* right here v is invalid. Just to note... */
1839 }
1840}
1841
1842/* step through func/builtin/etc arguments */
1843static node *nextarg(node **pn)
1844{
1845 node *n;
1846
1847 n = *pn;
1848 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1849 *pn = n->r.n;
1850 n = n->l.n;
1851 } else {
1852 *pn = NULL;
1853 }
1854 return n;
1855}
1856
1857static void hashwalk_init(var *v, xhash *array)
1858{
1859 hash_item *hi;
1860 unsigned i;
1861 walker_list *w;
1862 walker_list *prev_walker;
1863
1864 if (v->type & VF_WALK) {
1865 prev_walker = v->x.walker;
1866 } else {
1867 v->type |= VF_WALK;
1868 prev_walker = NULL;
1869 }
1870 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1871
1872 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1873 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1874 w->cur = w->end = w->wbuf;
1875 w->prev = prev_walker;
1876 for (i = 0; i < array->csize; i++) {
1877 hi = array->items[i];
1878 while (hi) {
1879 strcpy(w->end, hi->name);
1880 nextword(&w->end);
1881 hi = hi->next;
1882 }
1883 }
1884}
1885
1886static int hashwalk_next(var *v)
1887{
1888 walker_list *w = v->x.walker;
1889
1890 if (w->cur >= w->end) {
1891 walker_list *prev_walker = w->prev;
1892
1893 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1894 free(w);
1895 v->x.walker = prev_walker;
1896 return FALSE;
1897 }
1898
1899 setvar_s(v, nextword(&w->cur));
1900 return TRUE;
1901}
1902
1903/* evaluate node, return 1 when result is true, 0 otherwise */
1904static int ptest(node *pattern)
1905{
1906 /* ptest__v is "static": to save stack space? */
1907 return istrue(evaluate(pattern, &G.ptest__v));
1908}
1909
1910/* read next record from stream rsm into a variable v */
1911static int awk_getline(rstream *rsm, var *v)
1912{
1913 char *b;
1914 regmatch_t pmatch[2];
1915 int size, a, p, pp = 0;
1916 int fd, so, eo, r, rp;
1917 char c, *m, *s;
1918
1919 debug_printf_eval("entered %s()\n", __func__);
1920
1921 /* we're using our own buffer since we need access to accumulating
1922 * characters
1923 */
1924 fd = fileno(rsm->F);
1925 m = rsm->buffer;
1926 a = rsm->adv;
1927 p = rsm->pos;
1928 size = rsm->size;
1929 c = (char) rsplitter.n.info;
1930 rp = 0;
1931
1932 if (!m)
1933 m = qrealloc(m, 256, &size);
1934
1935 do {
1936 b = m + a;
1937 so = eo = p;
1938 r = 1;
1939 if (p > 0) {
1940 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1941 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1942 b, 1, pmatch, 0) == 0) {
1943 so = pmatch[0].rm_so;
1944 eo = pmatch[0].rm_eo;
1945 if (b[eo] != '\0')
1946 break;
1947 }
1948 } else if (c != '\0') {
1949 s = strchr(b+pp, c);
1950 if (!s)
1951 s = memchr(b+pp, '\0', p - pp);
1952 if (s) {
1953 so = eo = s-b;
1954 eo++;
1955 break;
1956 }
1957 } else {
1958 while (b[rp] == '\n')
1959 rp++;
1960 s = strstr(b+rp, "\n\n");
1961 if (s) {
1962 so = eo = s-b;
1963 while (b[eo] == '\n')
1964 eo++;
1965 if (b[eo] != '\0')
1966 break;
1967 }
1968 }
1969 }
1970
1971 if (a > 0) {
1972 memmove(m, m+a, p+1);
1973 b = m;
1974 a = 0;
1975 }
1976
1977 m = qrealloc(m, a+p+128, &size);
1978 b = m + a;
1979 pp = p;
1980 p += safe_read(fd, b+p, size-p-1);
1981 if (p < pp) {
1982 p = 0;
1983 r = 0;
1984 setvar_i(intvar[ERRNO], errno);
1985 }
1986 b[p] = '\0';
1987
1988 } while (p > pp);
1989
1990 if (p == 0) {
1991 r--;
1992 } else {
1993 c = b[so]; b[so] = '\0';
1994 setvar_s(v, b+rp);
1995 v->type |= VF_USER;
1996 b[so] = c;
1997 c = b[eo]; b[eo] = '\0';
1998 setvar_s(intvar[RT], b+so);
1999 b[eo] = c;
2000 }
2001
2002 rsm->buffer = m;
2003 rsm->adv = a + eo;
2004 rsm->pos = p - eo;
2005 rsm->size = size;
2006
2007 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2008
2009 return r;
2010}
2011
2012static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2013{
2014 int r = 0;
2015 char c;
2016 const char *s = format;
2017
2018 if (int_as_int && n == (int)n) {
2019 r = snprintf(b, size, "%d", (int)n);
2020 } else {
2021 do { c = *s; } while (c && *++s);
2022 if (strchr("diouxX", c)) {
2023 r = snprintf(b, size, format, (int)n);
2024 } else if (strchr("eEfgG", c)) {
2025 r = snprintf(b, size, format, n);
2026 } else {
2027 syntax_error(EMSG_INV_FMT);
2028 }
2029 }
2030 return r;
2031}
2032
2033/* formatted output into an allocated buffer, return ptr to buffer */
2034static char *awk_printf(node *n)
2035{
2036 char *b = NULL;
2037 char *fmt, *s, *f;
2038 const char *s1;
2039 int i, j, incr, bsize;
2040 char c, c1;
2041 var *v, *arg;
2042
2043 v = nvalloc(1);
2044 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2045
2046 i = 0;
2047 while (*f) {
2048 s = f;
2049 while (*f && (*f != '%' || *++f == '%'))
2050 f++;
2051 while (*f && !isalpha(*f)) {
2052 if (*f == '*')
2053 syntax_error("%*x formats are not supported");
2054 f++;
2055 }
2056
2057 incr = (f - s) + MAXVARFMT;
2058 b = qrealloc(b, incr + i, &bsize);
2059 c = *f;
2060 if (c != '\0')
2061 f++;
2062 c1 = *f;
2063 *f = '\0';
2064 arg = evaluate(nextarg(&n), v);
2065
2066 j = i;
2067 if (c == 'c' || !c) {
2068 i += sprintf(b+i, s, is_numeric(arg) ?
2069 (char)getvar_i(arg) : *getvar_s(arg));
2070 } else if (c == 's') {
2071 s1 = getvar_s(arg);
2072 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2073 i += sprintf(b+i, s, s1);
2074 } else {
2075 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2076 }
2077 *f = c1;
2078
2079 /* if there was an error while sprintf, return value is negative */
2080 if (i < j)
2081 i = j;
2082 }
2083
2084 free(fmt);
2085 nvfree(v);
2086 b = xrealloc(b, i + 1);
2087 b[i] = '\0';
2088 return b;
2089}
2090
2091/* Common substitution routine.
2092 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2093 * store result into (dest), return number of substitutions.
2094 * If nm = 0, replace all matches.
2095 * If src or dst is NULL, use $0.
2096 * If subexp != 0, enable subexpression matching (\1-\9).
2097 */
2098static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2099{
2100 char *resbuf;
2101 const char *sp;
2102 int match_no, residx, replen, resbufsize;
2103 int regexec_flags;
2104 regmatch_t pmatch[10];
2105 regex_t sreg, *regex;
2106
2107 resbuf = NULL;
2108 residx = 0;
2109 match_no = 0;
2110 regexec_flags = 0;
2111 regex = as_regex(rn, &sreg);
2112 sp = getvar_s(src ? src : intvar[F0]);
2113 replen = strlen(repl);
2114 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2115 int so = pmatch[0].rm_so;
2116 int eo = pmatch[0].rm_eo;
2117
2118 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2119 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2120 memcpy(resbuf + residx, sp, eo);
2121 residx += eo;
2122 if (++match_no >= nm) {
2123 const char *s;
2124 int nbs;
2125
2126 /* replace */
2127 residx -= (eo - so);
2128 nbs = 0;
2129 for (s = repl; *s; s++) {
2130 char c = resbuf[residx++] = *s;
2131 if (c == '\\') {
2132 nbs++;
2133 continue;
2134 }
2135 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2136 int j;
2137 residx -= ((nbs + 3) >> 1);
2138 j = 0;
2139 if (c != '&') {
2140 j = c - '0';
2141 nbs++;
2142 }
2143 if (nbs % 2) {
2144 resbuf[residx++] = c;
2145 } else {
2146 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2147 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2148 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2149 residx += n;
2150 }
2151 }
2152 nbs = 0;
2153 }
2154 }
2155
2156 regexec_flags = REG_NOTBOL;
2157 sp += eo;
2158 if (match_no == nm)
2159 break;
2160 if (eo == so) {
2161 /* Empty match (e.g. "b*" will match anywhere).
2162 * Advance by one char. */
2163//BUG (bug 1333):
2164//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2165//... and will erroneously match "b" even though it is NOT at the word start.
2166//we need REG_NOTBOW but it does not exist...
2167//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2168//it should be able to do it correctly.
2169 /* Subtle: this is safe only because
2170 * qrealloc allocated at least one extra byte */
2171 resbuf[residx] = *sp;
2172 if (*sp == '\0')
2173 goto ret;
2174 sp++;
2175 residx++;
2176 }
2177 }
2178
2179 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2180 strcpy(resbuf + residx, sp);
2181 ret:
2182 //bb_error_msg("end sp:'%s'%p", sp,sp);
2183 setvar_p(dest ? dest : intvar[F0], resbuf);
2184 if (regex == &sreg)
2185 regfree(regex);
2186 return match_no;
2187}
2188
2189static NOINLINE int do_mktime(const char *ds)
2190{
2191 struct tm then;
2192 int count;
2193
2194 /*memset(&then, 0, sizeof(then)); - not needed */
2195 then.tm_isdst = -1; /* default is unknown */
2196
2197 /* manpage of mktime says these fields are ints,
2198 * so we can sscanf stuff directly into them */
2199 count = sscanf(ds, "%u %u %u %u %u %u %d",
2200 &then.tm_year, &then.tm_mon, &then.tm_mday,
2201 &then.tm_hour, &then.tm_min, &then.tm_sec,
2202 &then.tm_isdst);
2203
2204 if (count < 6
2205 || (unsigned)then.tm_mon < 1
2206 || (unsigned)then.tm_year < 1900
2207 ) {
2208 return -1;
2209 }
2210
2211 then.tm_mon -= 1;
2212 then.tm_year -= 1900;
2213
2214 return mktime(&then);
2215}
2216
2217static NOINLINE var *exec_builtin(node *op, var *res)
2218{
2219#define tspl (G.exec_builtin__tspl)
2220
2221 var *tv;
2222 node *an[4];
2223 var *av[4];
2224 const char *as[4];
2225 regmatch_t pmatch[2];
2226 regex_t sreg, *re;
2227 node *spl;
2228 uint32_t isr, info;
2229 int nargs;
2230 time_t tt;
2231 int i, l, ll, n;
2232
2233 tv = nvalloc(4);
2234 isr = info = op->info;
2235 op = op->l.n;
2236
2237 av[2] = av[3] = NULL;
2238 for (i = 0; i < 4 && op; i++) {
2239 an[i] = nextarg(&op);
2240 if (isr & 0x09000000)
2241 av[i] = evaluate(an[i], &tv[i]);
2242 if (isr & 0x08000000)
2243 as[i] = getvar_s(av[i]);
2244 isr >>= 1;
2245 }
2246
2247 nargs = i;
2248 if ((uint32_t)nargs < (info >> 30))
2249 syntax_error(EMSG_TOO_FEW_ARGS);
2250
2251 info &= OPNMASK;
2252 switch (info) {
2253
2254 case B_a2:
2255 if (ENABLE_FEATURE_AWK_LIBM)
2256 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2257 else
2258 syntax_error(EMSG_NO_MATH);
2259 break;
2260
2261 case B_sp: {
2262 char *s, *s1;
2263
2264 if (nargs > 2) {
2265 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2266 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2267 } else {
2268 spl = &fsplitter.n;
2269 }
2270
2271 n = awk_split(as[0], spl, &s);
2272 s1 = s;
2273 clear_array(iamarray(av[1]));
2274 for (i = 1; i <= n; i++)
2275 setari_u(av[1], i, nextword(&s));
2276 free(s1);
2277 setvar_i(res, n);
2278 break;
2279 }
2280
2281 case B_ss: {
2282 char *s;
2283
2284 l = strlen(as[0]);
2285 i = getvar_i(av[1]) - 1;
2286 if (i > l)
2287 i = l;
2288 if (i < 0)
2289 i = 0;
2290 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2291 if (n < 0)
2292 n = 0;
2293 s = xstrndup(as[0]+i, n);
2294 setvar_p(res, s);
2295 break;
2296 }
2297
2298 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2299 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2300 case B_an:
2301 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2302 break;
2303
2304 case B_co:
2305 setvar_i(res, ~getvar_i_int(av[0]));
2306 break;
2307
2308 case B_ls:
2309 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2310 break;
2311
2312 case B_or:
2313 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2314 break;
2315
2316 case B_rs:
2317 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2318 break;
2319
2320 case B_xo:
2321 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2322 break;
2323
2324 case B_lo:
2325 case B_up: {
2326 char *s, *s1;
2327 s1 = s = xstrdup(as[0]);
2328 while (*s1) {
2329 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2330 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2331 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2332 s1++;
2333 }
2334 setvar_p(res, s);
2335 break;
2336 }
2337
2338 case B_ix:
2339 n = 0;
2340 ll = strlen(as[1]);
2341 l = strlen(as[0]) - ll;
2342 if (ll > 0 && l >= 0) {
2343 if (!icase) {
2344 char *s = strstr(as[0], as[1]);
2345 if (s)
2346 n = (s - as[0]) + 1;
2347 } else {
2348 /* this piece of code is terribly slow and
2349 * really should be rewritten
2350 */
2351 for (i = 0; i <= l; i++) {
2352 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2353 n = i+1;
2354 break;
2355 }
2356 }
2357 }
2358 }
2359 setvar_i(res, n);
2360 break;
2361
2362 case B_ti:
2363 if (nargs > 1)
2364 tt = getvar_i(av[1]);
2365 else
2366 time(&tt);
2367 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2368 i = strftime(g_buf, MAXVARFMT,
2369 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2370 localtime(&tt));
2371 g_buf[i] = '\0';
2372 setvar_s(res, g_buf);
2373 break;
2374
2375 case B_mt:
2376 setvar_i(res, do_mktime(as[0]));
2377 break;
2378
2379 case B_ma:
2380 re = as_regex(an[1], &sreg);
2381 n = regexec(re, as[0], 1, pmatch, 0);
2382 if (n == 0) {
2383 pmatch[0].rm_so++;
2384 pmatch[0].rm_eo++;
2385 } else {
2386 pmatch[0].rm_so = 0;
2387 pmatch[0].rm_eo = -1;
2388 }
2389 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2390 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2391 setvar_i(res, pmatch[0].rm_so);
2392 if (re == &sreg)
2393 regfree(re);
2394 break;
2395
2396 case B_ge:
2397 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2398 break;
2399
2400 case B_gs:
2401 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2402 break;
2403
2404 case B_su:
2405 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2406 break;
2407 }
2408
2409 nvfree(tv);
2410 return res;
2411#undef tspl
2412}
2413
2414/*
2415 * Evaluate node - the heart of the program. Supplied with subtree
2416 * and place where to store result. returns ptr to result.
2417 */
2418#define XC(n) ((n) >> 8)
2419
2420static var *evaluate(node *op, var *res)
2421{
2422/* This procedure is recursive so we should count every byte */
2423#define fnargs (G.evaluate__fnargs)
2424/* seed is initialized to 1 */
2425#define seed (G.evaluate__seed)
2426#define sreg (G.evaluate__sreg)
2427
2428 var *v1;
2429
2430 if (!op)
2431 return setvar_s(res, NULL);
2432
2433 debug_printf_eval("entered %s()\n", __func__);
2434
2435 v1 = nvalloc(2);
2436
2437 while (op) {
2438 struct {
2439 var *v;
2440 const char *s;
2441 } L = L; /* for compiler */
2442 struct {
2443 var *v;
2444 const char *s;
2445 } R = R;
2446 double L_d = L_d;
2447 uint32_t opinfo;
2448 int opn;
2449 node *op1;
2450
2451 opinfo = op->info;
2452 opn = (opinfo & OPNMASK);
2453 g_lineno = op->lineno;
2454 op1 = op->l.n;
2455 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2456
2457 /* execute inevitable things */
2458 if (opinfo & OF_RES1)
2459 L.v = evaluate(op1, v1);
2460 if (opinfo & OF_RES2)
2461 R.v = evaluate(op->r.n, v1+1);
2462 if (opinfo & OF_STR1) {
2463 L.s = getvar_s(L.v);
2464 debug_printf_eval("L.s:'%s'\n", L.s);
2465 }
2466 if (opinfo & OF_STR2) {
2467 R.s = getvar_s(R.v);
2468 debug_printf_eval("R.s:'%s'\n", R.s);
2469 }
2470 if (opinfo & OF_NUM1) {
2471 L_d = getvar_i(L.v);
2472 debug_printf_eval("L_d:%f\n", L_d);
2473 }
2474
2475 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2476 switch (XC(opinfo & OPCLSMASK)) {
2477
2478 /* -- iterative node type -- */
2479
2480 /* test pattern */
2481 case XC( OC_TEST ):
2482 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2483 /* it's range pattern */
2484 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2485 op->info |= OF_CHECKED;
2486 if (ptest(op1->r.n))
2487 op->info &= ~OF_CHECKED;
2488 op = op->a.n;
2489 } else {
2490 op = op->r.n;
2491 }
2492 } else {
2493 op = ptest(op1) ? op->a.n : op->r.n;
2494 }
2495 break;
2496
2497 /* just evaluate an expression, also used as unconditional jump */
2498 case XC( OC_EXEC ):
2499 break;
2500
2501 /* branch, used in if-else and various loops */
2502 case XC( OC_BR ):
2503 op = istrue(L.v) ? op->a.n : op->r.n;
2504 break;
2505
2506 /* initialize for-in loop */
2507 case XC( OC_WALKINIT ):
2508 hashwalk_init(L.v, iamarray(R.v));
2509 break;
2510
2511 /* get next array item */
2512 case XC( OC_WALKNEXT ):
2513 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2514 break;
2515
2516 case XC( OC_PRINT ):
2517 case XC( OC_PRINTF ): {
2518 FILE *F = stdout;
2519
2520 if (op->r.n) {
2521 rstream *rsm = newfile(R.s);
2522 if (!rsm->F) {
2523 if (opn == '|') {
2524 rsm->F = popen(R.s, "w");
2525 if (rsm->F == NULL)
2526 bb_perror_msg_and_die("popen");
2527 rsm->is_pipe = 1;
2528 } else {
2529 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2530 }
2531 }
2532 F = rsm->F;
2533 }
2534
2535 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2536 if (!op1) {
2537 fputs(getvar_s(intvar[F0]), F);
2538 } else {
2539 while (op1) {
2540 var *v = evaluate(nextarg(&op1), v1);
2541 if (v->type & VF_NUMBER) {
2542 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2543 getvar_i(v), TRUE);
2544 fputs(g_buf, F);
2545 } else {
2546 fputs(getvar_s(v), F);
2547 }
2548
2549 if (op1)
2550 fputs(getvar_s(intvar[OFS]), F);
2551 }
2552 }
2553 fputs(getvar_s(intvar[ORS]), F);
2554
2555 } else { /* OC_PRINTF */
2556 char *s = awk_printf(op1);
2557 fputs(s, F);
2558 free(s);
2559 }
2560 fflush(F);
2561 break;
2562 }
2563
2564 case XC( OC_DELETE ): {
2565 uint32_t info = op1->info & OPCLSMASK;
2566 var *v;
2567
2568 if (info == OC_VAR) {
2569 v = op1->l.v;
2570 } else if (info == OC_FNARG) {
2571 v = &fnargs[op1->l.aidx];
2572 } else {
2573 syntax_error(EMSG_NOT_ARRAY);
2574 }
2575
2576 if (op1->r.n) {
2577 const char *s;
2578 clrvar(L.v);
2579 s = getvar_s(evaluate(op1->r.n, v1));
2580 hash_remove(iamarray(v), s);
2581 } else {
2582 clear_array(iamarray(v));
2583 }
2584 break;
2585 }
2586
2587 case XC( OC_NEWSOURCE ):
2588 g_progname = op->l.new_progname;
2589 break;
2590
2591 case XC( OC_RETURN ):
2592 copyvar(res, L.v);
2593 break;
2594
2595 case XC( OC_NEXTFILE ):
2596 nextfile = TRUE;
2597 case XC( OC_NEXT ):
2598 nextrec = TRUE;
2599 case XC( OC_DONE ):
2600 clrvar(res);
2601 break;
2602
2603 case XC( OC_EXIT ):
2604 awk_exit(L_d);
2605
2606 /* -- recursive node type -- */
2607
2608 case XC( OC_VAR ):
2609 L.v = op->l.v;
2610 if (L.v == intvar[NF])
2611 split_f0();
2612 goto v_cont;
2613
2614 case XC( OC_FNARG ):
2615 L.v = &fnargs[op->l.aidx];
2616 v_cont:
2617 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2618 break;
2619
2620 case XC( OC_IN ):
2621 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2622 break;
2623
2624 case XC( OC_REGEXP ):
2625 op1 = op;
2626 L.s = getvar_s(intvar[F0]);
2627 goto re_cont;
2628
2629 case XC( OC_MATCH ):
2630 op1 = op->r.n;
2631 re_cont:
2632 {
2633 regex_t *re = as_regex(op1, &sreg);
2634 int i = regexec(re, L.s, 0, NULL, 0);
2635 if (re == &sreg)
2636 regfree(re);
2637 setvar_i(res, (i == 0) ^ (opn == '!'));
2638 }
2639 break;
2640
2641 case XC( OC_MOVE ):
2642 debug_printf_eval("MOVE\n");
2643 /* if source is a temporary string, jusk relink it to dest */
2644//Disabled: if R.v is numeric but happens to have cached R.v->string,
2645//then L.v ends up being a string, which is wrong
2646// if (R.v == v1+1 && R.v->string) {
2647// res = setvar_p(L.v, R.v->string);
2648// R.v->string = NULL;
2649// } else {
2650 res = copyvar(L.v, R.v);
2651// }
2652 break;
2653
2654 case XC( OC_TERNARY ):
2655 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2656 syntax_error(EMSG_POSSIBLE_ERROR);
2657 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2658 break;
2659
2660 case XC( OC_FUNC ): {
2661 var *vbeg, *v;
2662 const char *sv_progname;
2663
2664 if (!op->r.f->body.first)
2665 syntax_error(EMSG_UNDEF_FUNC);
2666
2667 vbeg = v = nvalloc(op->r.f->nargs + 1);
2668 while (op1) {
2669 var *arg = evaluate(nextarg(&op1), v1);
2670 copyvar(v, arg);
2671 v->type |= VF_CHILD;
2672 v->x.parent = arg;
2673 if (++v - vbeg >= op->r.f->nargs)
2674 break;
2675 }
2676
2677 v = fnargs;
2678 fnargs = vbeg;
2679 sv_progname = g_progname;
2680
2681 res = evaluate(op->r.f->body.first, res);
2682
2683 g_progname = sv_progname;
2684 nvfree(fnargs);
2685 fnargs = v;
2686
2687 break;
2688 }
2689
2690 case XC( OC_GETLINE ):
2691 case XC( OC_PGETLINE ): {
2692 rstream *rsm;
2693 int i;
2694
2695 if (op1) {
2696 rsm = newfile(L.s);
2697 if (!rsm->F) {
2698 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2699 rsm->F = popen(L.s, "r");
2700 rsm->is_pipe = TRUE;
2701 } else {
2702 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2703 }
2704 }
2705 } else {
2706 if (!iF)
2707 iF = next_input_file();
2708 rsm = iF;
2709 }
2710
2711 if (!rsm || !rsm->F) {
2712 setvar_i(intvar[ERRNO], errno);
2713 setvar_i(res, -1);
2714 break;
2715 }
2716
2717 if (!op->r.n)
2718 R.v = intvar[F0];
2719
2720 i = awk_getline(rsm, R.v);
2721 if (i > 0 && !op1) {
2722 incvar(intvar[FNR]);
2723 incvar(intvar[NR]);
2724 }
2725 setvar_i(res, i);
2726 break;
2727 }
2728
2729 /* simple builtins */
2730 case XC( OC_FBLTIN ): {
2731 double R_d = R_d; /* for compiler */
2732
2733 switch (opn) {
2734 case F_in:
2735 R_d = (int)L_d;
2736 break;
2737
2738 case F_rn:
2739 R_d = (double)rand() / (double)RAND_MAX;
2740 break;
2741
2742 case F_co:
2743 if (ENABLE_FEATURE_AWK_LIBM) {
2744 R_d = cos(L_d);
2745 break;
2746 }
2747
2748 case F_ex:
2749 if (ENABLE_FEATURE_AWK_LIBM) {
2750 R_d = exp(L_d);
2751 break;
2752 }
2753
2754 case F_lg:
2755 if (ENABLE_FEATURE_AWK_LIBM) {
2756 R_d = log(L_d);
2757 break;
2758 }
2759
2760 case F_si:
2761 if (ENABLE_FEATURE_AWK_LIBM) {
2762 R_d = sin(L_d);
2763 break;
2764 }
2765
2766 case F_sq:
2767 if (ENABLE_FEATURE_AWK_LIBM) {
2768 R_d = sqrt(L_d);
2769 break;
2770 }
2771
2772 syntax_error(EMSG_NO_MATH);
2773 break;
2774
2775 case F_sr:
2776 R_d = (double)seed;
2777 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2778 srand(seed);
2779 break;
2780
2781 case F_ti:
2782 R_d = time(NULL);
2783 break;
2784
2785 case F_le:
2786 if (!op1)
2787 L.s = getvar_s(intvar[F0]);
2788 R_d = strlen(L.s);
2789 break;
2790
2791 case F_sy:
2792 fflush_all();
2793 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2794 ? (system(L.s) >> 8) : 0;
2795 break;
2796
2797 case F_ff:
2798 if (!op1) {
2799 fflush(stdout);
2800 } else if (L.s && *L.s) {
2801 rstream *rsm = newfile(L.s);
2802 fflush(rsm->F);
2803 } else {
2804 fflush_all();
2805 }
2806 break;
2807
2808 case F_cl: {
2809 rstream *rsm;
2810 int err = 0;
2811 rsm = (rstream *)hash_search(fdhash, L.s);
2812 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2813 if (rsm) {
2814 debug_printf_eval("OC_FBLTIN F_cl "
2815 "rsm->is_pipe:%d, ->F:%p\n",
2816 rsm->is_pipe, rsm->F);
2817 /* Can be NULL if open failed. Example:
2818 * getline line <"doesnt_exist";
2819 * close("doesnt_exist"); <--- here rsm->F is NULL
2820 */
2821 if (rsm->F)
2822 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2823 free(rsm->buffer);
2824 hash_remove(fdhash, L.s);
2825 }
2826 if (err)
2827 setvar_i(intvar[ERRNO], errno);
2828 R_d = (double)err;
2829 break;
2830 }
2831 } /* switch */
2832 setvar_i(res, R_d);
2833 break;
2834 }
2835
2836 case XC( OC_BUILTIN ):
2837 res = exec_builtin(op, res);
2838 break;
2839
2840 case XC( OC_SPRINTF ):
2841 setvar_p(res, awk_printf(op1));
2842 break;
2843
2844 case XC( OC_UNARY ): {
2845 double Ld, R_d;
2846
2847 Ld = R_d = getvar_i(R.v);
2848 switch (opn) {
2849 case 'P':
2850 Ld = ++R_d;
2851 goto r_op_change;
2852 case 'p':
2853 R_d++;
2854 goto r_op_change;
2855 case 'M':
2856 Ld = --R_d;
2857 goto r_op_change;
2858 case 'm':
2859 R_d--;
2860 r_op_change:
2861 setvar_i(R.v, R_d);
2862 break;
2863 case '!':
2864 Ld = !istrue(R.v);
2865 break;
2866 case '-':
2867 Ld = -R_d;
2868 break;
2869 }
2870 setvar_i(res, Ld);
2871 break;
2872 }
2873
2874 case XC( OC_FIELD ): {
2875 int i = (int)getvar_i(R.v);
2876 if (i == 0) {
2877 res = intvar[F0];
2878 } else {
2879 split_f0();
2880 if (i > nfields)
2881 fsrealloc(i);
2882 res = &Fields[i - 1];
2883 }
2884 break;
2885 }
2886
2887 /* concatenation (" ") and index joining (",") */
2888 case XC( OC_CONCAT ):
2889 case XC( OC_COMMA ): {
2890 const char *sep = "";
2891 if ((opinfo & OPCLSMASK) == OC_COMMA)
2892 sep = getvar_s(intvar[SUBSEP]);
2893 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2894 break;
2895 }
2896
2897 case XC( OC_LAND ):
2898 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2899 break;
2900
2901 case XC( OC_LOR ):
2902 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2903 break;
2904
2905 case XC( OC_BINARY ):
2906 case XC( OC_REPLACE ): {
2907 double R_d = getvar_i(R.v);
2908 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2909 switch (opn) {
2910 case '+':
2911 L_d += R_d;
2912 break;
2913 case '-':
2914 L_d -= R_d;
2915 break;
2916 case '*':
2917 L_d *= R_d;
2918 break;
2919 case '/':
2920 if (R_d == 0)
2921 syntax_error(EMSG_DIV_BY_ZERO);
2922 L_d /= R_d;
2923 break;
2924 case '&':
2925 if (ENABLE_FEATURE_AWK_LIBM)
2926 L_d = pow(L_d, R_d);
2927 else
2928 syntax_error(EMSG_NO_MATH);
2929 break;
2930 case '%':
2931 if (R_d == 0)
2932 syntax_error(EMSG_DIV_BY_ZERO);
2933 L_d -= (int)(L_d / R_d) * R_d;
2934 break;
2935 }
2936 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2937 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2938 break;
2939 }
2940
2941 case XC( OC_COMPARE ): {
2942 int i = i; /* for compiler */
2943 double Ld;
2944
2945 if (is_numeric(L.v) && is_numeric(R.v)) {
2946 Ld = getvar_i(L.v) - getvar_i(R.v);
2947 } else {
2948 const char *l = getvar_s(L.v);
2949 const char *r = getvar_s(R.v);
2950 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2951 }
2952 switch (opn & 0xfe) {
2953 case 0:
2954 i = (Ld > 0);
2955 break;
2956 case 2:
2957 i = (Ld >= 0);
2958 break;
2959 case 4:
2960 i = (Ld == 0);
2961 break;
2962 }
2963 setvar_i(res, (i == 0) ^ (opn & 1));
2964 break;
2965 }
2966
2967 default:
2968 syntax_error(EMSG_POSSIBLE_ERROR);
2969 }
2970 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2971 op = op->a.n;
2972 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2973 break;
2974 if (nextrec)
2975 break;
2976 } /* while (op) */
2977
2978 nvfree(v1);
2979 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2980 return res;
2981#undef fnargs
2982#undef seed
2983#undef sreg
2984}
2985
2986
2987/* -------- main & co. -------- */
2988
2989static int awk_exit(int r)
2990{
2991 var tv;
2992 unsigned i;
2993 hash_item *hi;
2994
2995 zero_out_var(&tv);
2996
2997 if (!exiting) {
2998 exiting = TRUE;
2999 nextrec = FALSE;
3000 evaluate(endseq.first, &tv);
3001 }
3002
3003 /* waiting for children */
3004 for (i = 0; i < fdhash->csize; i++) {
3005 hi = fdhash->items[i];
3006 while (hi) {
3007 if (hi->data.rs.F && hi->data.rs.is_pipe)
3008 pclose(hi->data.rs.F);
3009 hi = hi->next;
3010 }
3011 }
3012
3013 exit(r);
3014}
3015
3016/* if expr looks like "var=value", perform assignment and return 1,
3017 * otherwise return 0 */
3018static int is_assignment(const char *expr)
3019{
3020 char *exprc, *val;
3021
3022 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3023 return FALSE;
3024 }
3025
3026 exprc = xstrdup(expr);
3027 val = exprc + (val - expr);
3028 *val++ = '\0';
3029
3030 unescape_string_in_place(val);
3031 setvar_u(newvar(exprc), val);
3032 free(exprc);
3033 return TRUE;
3034}
3035
3036/* switch to next input file */
3037static rstream *next_input_file(void)
3038{
3039#define rsm (G.next_input_file__rsm)
3040#define files_happen (G.next_input_file__files_happen)
3041
3042 FILE *F;
3043 const char *fname, *ind;
3044
3045 if (rsm.F)
3046 fclose(rsm.F);
3047 rsm.F = NULL;
3048 rsm.pos = rsm.adv = 0;
3049
3050 for (;;) {
3051 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3052 if (files_happen)
3053 return NULL;
3054 fname = "-";
3055 F = stdin;
3056 break;
3057 }
3058 ind = getvar_s(incvar(intvar[ARGIND]));
3059 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3060 if (fname && *fname && !is_assignment(fname)) {
3061 F = xfopen_stdin(fname);
3062 break;
3063 }
3064 }
3065
3066 files_happen = TRUE;
3067 setvar_s(intvar[FILENAME], fname);
3068 rsm.F = F;
3069 return &rsm;
3070#undef rsm
3071#undef files_happen
3072}
3073
3074int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3075int awk_main(int argc, char **argv)
3076{
3077 unsigned opt;
3078 char *opt_F;
3079 llist_t *list_v = NULL;
3080 llist_t *list_f = NULL;
3081 int i, j;
3082 var *v;
3083 var tv;
3084 char **envp;
3085 char *vnames = (char *)vNames; /* cheat */
3086 char *vvalues = (char *)vValues;
3087
3088 INIT_G();
3089
3090 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3091 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3092 if (ENABLE_LOCALE_SUPPORT)
3093 setlocale(LC_NUMERIC, "C");
3094
3095 zero_out_var(&tv);
3096
3097 /* allocate global buffer */
3098 g_buf = xmalloc(MAXVARFMT + 1);
3099
3100 vhash = hash_init();
3101 ahash = hash_init();
3102 fdhash = hash_init();
3103 fnhash = hash_init();
3104
3105 /* initialize variables */
3106 for (i = 0; *vnames; i++) {
3107 intvar[i] = v = newvar(nextword(&vnames));
3108 if (*vvalues != '\377')
3109 setvar_s(v, nextword(&vvalues));
3110 else
3111 setvar_i(v, 0);
3112
3113 if (*vnames == '*') {
3114 v->type |= VF_SPECIAL;
3115 vnames++;
3116 }
3117 }
3118
3119 handle_special(intvar[FS]);
3120 handle_special(intvar[RS]);
3121
3122 newfile("/dev/stdin")->F = stdin;
3123 newfile("/dev/stdout")->F = stdout;
3124 newfile("/dev/stderr")->F = stderr;
3125
3126 /* Huh, people report that sometimes environ is NULL. Oh well. */
3127 if (environ) for (envp = environ; *envp; envp++) {
3128 /* environ is writable, thus we don't strdup it needlessly */
3129 char *s = *envp;
3130 char *s1 = strchr(s, '=');
3131 if (s1) {
3132 *s1 = '\0';
3133 /* Both findvar and setvar_u take const char*
3134 * as 2nd arg -> environment is not trashed */
3135 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3136 *s1 = '=';
3137 }
3138 }
3139 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3140 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
3141 argv += optind;
3142 argc -= optind;
3143 if (opt & 0x1) { /* -F */
3144 unescape_string_in_place(opt_F);
3145 setvar_s(intvar[FS], opt_F);
3146 }
3147 while (list_v) { /* -v */
3148 if (!is_assignment(llist_pop(&list_v)))
3149 bb_show_usage();
3150 }
3151 if (list_f) { /* -f */
3152 do {
3153 char *s = NULL;
3154 FILE *from_file;
3155
3156 g_progname = llist_pop(&list_f);
3157 from_file = xfopen_stdin(g_progname);
3158 /* one byte is reserved for some trick in next_token */
3159 for (i = j = 1; j > 0; i += j) {
3160 s = xrealloc(s, i + 4096);
3161 j = fread(s + i, 1, 4094, from_file);
3162 }
3163 s[i] = '\0';
3164 fclose(from_file);
3165 parse_program(s + 1);
3166 free(s);
3167 } while (list_f);
3168 argc++;
3169 } else { // no -f: take program from 1st parameter
3170 if (!argc)
3171 bb_show_usage();
3172 g_progname = "cmd. line";
3173 parse_program(*argv++);
3174 }
3175 if (opt & 0x8) // -W
3176 bb_error_msg("warning: option -W is ignored");
3177
3178 /* fill in ARGV array */
3179 setvar_i(intvar[ARGC], argc);
3180 setari_u(intvar[ARGV], 0, "awk");
3181 i = 0;
3182 while (*argv)
3183 setari_u(intvar[ARGV], ++i, *argv++);
3184
3185 evaluate(beginseq.first, &tv);
3186 if (!mainseq.first && !endseq.first)
3187 awk_exit(EXIT_SUCCESS);
3188
3189 /* input file could already be opened in BEGIN block */
3190 if (!iF)
3191 iF = next_input_file();
3192
3193 /* passing through input files */
3194 while (iF) {
3195 nextfile = FALSE;
3196 setvar_i(intvar[FNR], 0);
3197
3198 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3199 nextrec = FALSE;
3200 incvar(intvar[NR]);
3201 incvar(intvar[FNR]);
3202 evaluate(mainseq.first, &tv);
3203
3204 if (nextfile)
3205 break;
3206 }
3207
3208 if (i < 0)
3209 syntax_error(strerror(errno));
3210
3211 iF = next_input_file();
3212 }
3213
3214 awk_exit(EXIT_SUCCESS);
3215 /*return 0;*/
3216}
Note: See TracBrowser for help on using the repository browser.