source: MondoRescue/branches/3.3/mindi-busybox/editors/awk.c@ 3625

Last change on this file since 3625 was 3621, checked in by Bruno Cornec, 10 years ago

New 3?3 banch for incorporation of latest busybox 1.25. Changing minor version to handle potential incompatibilities.

File size: 76.5 KB
RevLine 
[821]1/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
[2725]7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
[821]8 */
9
[3621]10//config:config AWK
11//config: bool "awk"
12//config: default y
13//config: help
14//config: Awk is used as a pattern scanning and processing language. This is
15//config: the BusyBox implementation of that programming language.
16//config:
17//config:config FEATURE_AWK_LIBM
18//config: bool "Enable math functions (requires libm)"
19//config: default y
20//config: depends on AWK
21//config: help
22//config: Enable math functions of the Awk programming language.
23//config: NOTE: This will require libm to be present for linking.
24//config:
25//config:config FEATURE_AWK_GNU_EXTENSIONS
26//config: bool "Enable a few GNU extensions"
27//config: default y
28//config: depends on AWK
29//config: help
30//config: Enable a few features from gawk:
31//config: * command line option -e AWK_PROGRAM
32//config: * simultaneous use of -f and -e on the command line.
33//config: This enables the use of awk library files.
34//config: Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
35
36//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
37
38//kbuild:lib-$(CONFIG_AWK) += awk.o
39
[3232]40//usage:#define awk_trivial_usage
41//usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
42//usage:#define awk_full_usage "\n\n"
43//usage: " -v VAR=VAL Set variable"
44//usage: "\n -F SEP Use SEP as field separator"
45//usage: "\n -f FILE Read program from FILE"
[3621]46//usage: IF_FEATURE_AWK_GNU_EXTENSIONS(
47//usage: "\n -e AWK_PROGRAM"
48//usage: )
[3232]49
[1765]50#include "libbb.h"
51#include "xregex.h"
[821]52#include <math.h>
53
[1765]54/* This is a NOEXEC applet. Be very careful! */
[821]55
56
[2725]57/* If you comment out one of these below, it will be #defined later
58 * to perform debug printfs to stderr: */
59#define debug_printf_walker(...) do {} while (0)
60#define debug_printf_eval(...) do {} while (0)
[3232]61#define debug_printf_parse(...) do {} while (0)
[2725]62
63#ifndef debug_printf_walker
64# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
65#endif
66#ifndef debug_printf_eval
67# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
68#endif
[3232]69#ifndef debug_printf_parse
70# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
71#endif
[2725]72
73
[3621]74#define OPTSTR_AWK \
75 "F:v:f:" \
76 IF_FEATURE_AWK_GNU_EXTENSIONS("e:") \
77 "W:"
78#define OPTCOMPLSTR_AWK \
79 "v::f::" \
80 IF_FEATURE_AWK_GNU_EXTENSIONS("e::")
81enum {
82 OPTBIT_F, /* define field separator */
83 OPTBIT_v, /* define variable */
84 OPTBIT_f, /* pull in awk program from file */
85 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
86 OPTBIT_W, /* -W ignored */
87 OPT_F = 1 << OPTBIT_F,
88 OPT_v = 1 << OPTBIT_v,
89 OPT_f = 1 << OPTBIT_f,
90 OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
91 OPT_W = 1 << OPTBIT_W
92};
[2725]93
[1765]94#define MAXVARFMT 240
95#define MINNVBLOCK 64
[821]96
97/* variable flags */
[1765]98#define VF_NUMBER 0x0001 /* 1 = primary type is number */
99#define VF_ARRAY 0x0002 /* 1 = it's an array */
[821]100
[1765]101#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
102#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
103#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
104#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
105#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
106#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
107#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
[821]108
109/* these flags are static, don't change them when value is changed */
[1765]110#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
[821]111
[2725]112typedef struct walker_list {
113 char *end;
114 char *cur;
115 struct walker_list *prev;
116 char wbuf[1];
117} walker_list;
118
[821]119/* Variable */
120typedef struct var_s {
[1765]121 unsigned type; /* flags */
[821]122 double number;
123 char *string;
124 union {
[1765]125 int aidx; /* func arg idx (for compilation stage) */
126 struct xhash_s *array; /* array ptr */
127 struct var_s *parent; /* for func args, ptr to actual parameter */
[2725]128 walker_list *walker; /* list of array elements (for..in) */
[821]129 } x;
130} var;
131
132/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
133typedef struct chain_s {
134 struct node_s *first;
135 struct node_s *last;
[1765]136 const char *programname;
[821]137} chain;
138
139/* Function */
140typedef struct func_s {
[1765]141 unsigned nargs;
[821]142 struct chain_s body;
143} func;
144
145/* I/O stream */
146typedef struct rstream_s {
147 FILE *F;
148 char *buffer;
149 int adv;
150 int size;
151 int pos;
[1765]152 smallint is_pipe;
[821]153} rstream;
154
155typedef struct hash_item_s {
156 union {
[1765]157 struct var_s v; /* variable/array hash */
158 struct rstream_s rs; /* redirect streams hash */
159 struct func_s f; /* functions hash */
[821]160 } data;
[1765]161 struct hash_item_s *next; /* next in chain */
162 char name[1]; /* really it's longer */
[821]163} hash_item;
164
165typedef struct xhash_s {
[1765]166 unsigned nel; /* num of elements */
167 unsigned csize; /* current hash size */
168 unsigned nprime; /* next hash size in PRIMES[] */
169 unsigned glen; /* summary length of item names */
[821]170 struct hash_item_s **items;
171} xhash;
172
173/* Tree node */
174typedef struct node_s {
175 uint32_t info;
[1765]176 unsigned lineno;
[821]177 union {
178 struct node_s *n;
179 var *v;
[2725]180 int aidx;
181 char *new_progname;
[821]182 regex_t *re;
183 } l;
184 union {
185 struct node_s *n;
186 regex_t *ire;
187 func *f;
188 } r;
189 union {
190 struct node_s *n;
191 } a;
192} node;
193
194/* Block of temporary variables */
195typedef struct nvblock_s {
196 int size;
197 var *pos;
198 struct nvblock_s *prev;
199 struct nvblock_s *next;
[2725]200 var nv[];
[821]201} nvblock;
202
203typedef struct tsplitter_s {
204 node n;
205 regex_t re[2];
206} tsplitter;
207
208/* simple token classes */
209/* Order and hex values are very important!!! See next_token() */
[3621]210#define TC_SEQSTART (1 << 0) /* ( */
[821]211#define TC_SEQTERM (1 << 1) /* ) */
212#define TC_REGEXP (1 << 2) /* /.../ */
213#define TC_OUTRDR (1 << 3) /* | > >> */
214#define TC_UOPPOST (1 << 4) /* unary postfix operator */
215#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
216#define TC_BINOPX (1 << 6) /* two-opnd operator */
217#define TC_IN (1 << 7)
218#define TC_COMMA (1 << 8)
219#define TC_PIPE (1 << 9) /* input redirection pipe */
220#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
221#define TC_ARRTERM (1 << 11) /* ] */
222#define TC_GRPSTART (1 << 12) /* { */
223#define TC_GRPTERM (1 << 13) /* } */
224#define TC_SEMICOL (1 << 14)
225#define TC_NEWLINE (1 << 15)
226#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
227#define TC_WHILE (1 << 17)
228#define TC_ELSE (1 << 18)
229#define TC_BUILTIN (1 << 19)
[3621]230/* This costs ~50 bytes of code.
231 * A separate class to support deprecated "length" form. If we don't need that
232 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
233 * can be merged with TC_BUILTIN:
234 */
235#define TC_LENGTH (1 << 20)
236#define TC_GETLINE (1 << 21)
237#define TC_FUNCDECL (1 << 22) /* `function' `func' */
238#define TC_BEGIN (1 << 23)
239#define TC_END (1 << 24)
240#define TC_EOF (1 << 25)
241#define TC_VARIABLE (1 << 26)
242#define TC_ARRAY (1 << 27)
243#define TC_FUNCTION (1 << 28)
244#define TC_STRING (1 << 29)
245#define TC_NUMBER (1 << 30)
[821]246
[1765]247#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
[821]248
249/* combined token classes */
[1765]250#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
[3621]251//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
[1765]252#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
[3621]253 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
254 | TC_SEQSTART | TC_STRING | TC_NUMBER)
[821]255
[1765]256#define TC_STATEMNT (TC_STATX | TC_WHILE)
257#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
[821]258
259/* word tokens, cannot mean something else if not expected */
[3621]260#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \
261 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
262 | TC_FUNCDECL | TC_BEGIN | TC_END)
[821]263
264/* discard newlines after these */
[1765]265#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
266 | TC_BINOP | TC_OPTERM)
[821]267
268/* what can expression begin with */
[1765]269#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
[821]270/* what can group begin with */
[1765]271#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
[821]272
273/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
274/* operator is inserted between them */
[1765]275#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
276 | TC_STRING | TC_NUMBER | TC_UOPPOST)
277#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
[821]278
[1765]279#define OF_RES1 0x010000
280#define OF_RES2 0x020000
281#define OF_STR1 0x040000
282#define OF_STR2 0x080000
283#define OF_NUM1 0x100000
284#define OF_CHECKED 0x200000
[821]285
286/* combined operator flags */
287#define xx 0
288#define xV OF_RES2
289#define xS (OF_RES2 | OF_STR2)
290#define Vx OF_RES1
291#define VV (OF_RES1 | OF_RES2)
292#define Nx (OF_RES1 | OF_NUM1)
293#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
294#define Sx (OF_RES1 | OF_STR1)
295#define SV (OF_RES1 | OF_STR1 | OF_RES2)
296#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
297
[1765]298#define OPCLSMASK 0xFF00
299#define OPNMASK 0x007F
[821]300
301/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
302 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
303 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
304 */
[3232]305#undef P
306#undef PRIMASK
307#undef PRIMASK2
[1765]308#define P(x) (x << 24)
309#define PRIMASK 0x7F000000
310#define PRIMASK2 0x7E000000
[821]311
312/* Operation classes */
313
314#define SHIFT_TIL_THIS 0x0600
315#define RECUR_FROM_THIS 0x1000
316
317enum {
[1765]318 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
319 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
[821]320
[1765]321 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
322 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
323 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
[821]324
[1765]325 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
326 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
327 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
328 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
329 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
330 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
331 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
332 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
333 OC_DONE = 0x2800,
[821]334
[1765]335 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
336 ST_WHILE = 0x3300
[821]337};
338
339/* simple builtins */
340enum {
[1765]341 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
[821]342 F_ti, F_le, F_sy, F_ff, F_cl
343};
344
345/* builtins */
346enum {
[2725]347 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
[1765]348 B_ge, B_gs, B_su,
349 B_an, B_co, B_ls, B_or, B_rs, B_xo,
[821]350};
351
352/* tokens and their corresponding info values */
353
[2725]354#define NTC "\377" /* switch to next token class (tc<<1) */
355#define NTCC '\377'
[821]356
[1765]357static const char tokenlist[] ALIGN1 =
[3621]358 "\1(" NTC /* TC_SEQSTART */
359 "\1)" NTC /* TC_SEQTERM */
360 "\1/" NTC /* TC_REGEXP */
361 "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */
362 "\2++" "\2--" NTC /* TC_UOPPOST */
363 "\2++" "\2--" "\1$" NTC /* TC_UOPPRE1 */
364 "\2==" "\1=" "\2+=" "\2-=" /* TC_BINOPX */
[2725]365 "\2*=" "\2/=" "\2%=" "\2^="
366 "\1+" "\1-" "\3**=" "\2**"
367 "\1/" "\1%" "\1^" "\1*"
368 "\2!=" "\2>=" "\2<=" "\1>"
369 "\1<" "\2!~" "\1~" "\2&&"
370 "\2||" "\1?" "\1:" NTC
[3621]371 "\2in" NTC /* TC_IN */
372 "\1," NTC /* TC_COMMA */
373 "\1|" NTC /* TC_PIPE */
374 "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */
375 "\1]" NTC /* TC_ARRTERM */
376 "\1{" NTC /* TC_GRPSTART */
377 "\1}" NTC /* TC_GRPTERM */
378 "\1;" NTC /* TC_SEMICOL */
379 "\1\n" NTC /* TC_NEWLINE */
380 "\2if" "\2do" "\3for" "\5break" /* TC_STATX */
[2725]381 "\10continue" "\6delete" "\5print"
382 "\6printf" "\4next" "\10nextfile"
383 "\6return" "\4exit" NTC
[3621]384 "\5while" NTC /* TC_WHILE */
385 "\4else" NTC /* TC_ELSE */
386 "\3and" "\5compl" "\6lshift" "\2or" /* TC_BUILTIN */
[2725]387 "\6rshift" "\3xor"
[3621]388 "\5close" "\6system" "\6fflush" "\5atan2"
[2725]389 "\3cos" "\3exp" "\3int" "\3log"
390 "\4rand" "\3sin" "\4sqrt" "\5srand"
[3621]391 "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
[2725]392 "\5match" "\5split" "\7sprintf" "\3sub"
393 "\6substr" "\7systime" "\10strftime" "\6mktime"
394 "\7tolower" "\7toupper" NTC
[3621]395 "\6length" NTC /* TC_LENGTH */
396 "\7getline" NTC /* TC_GETLINE */
397 "\4func" "\10function" NTC /* TC_FUNCDECL */
398 "\5BEGIN" NTC /* TC_BEGIN */
399 "\3END" /* TC_END */
[2725]400 /* compiler adds trailing "\0" */
[821]401 ;
402
[3621]403#define OC_B OC_BUILTIN
404
[821]405static const uint32_t tokeninfo[] = {
406 0,
407 0,
408 OC_REGEXP,
[2725]409 xS|'a', xS|'w', xS|'|',
410 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
411 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
412 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
413 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
414 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
415 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
416 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
417 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
418 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
[3621]419 OC_IN|SV|P(49), /* TC_IN */
[821]420 OC_COMMA|SS|P(80),
421 OC_PGETLINE|SV|P(37),
[2725]422 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
423 0, /* ] */
[821]424 0,
425 0,
426 0,
[2725]427 0, /* \n */
428 ST_IF, ST_DO, ST_FOR, OC_BREAK,
429 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
430 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
431 OC_RETURN|Vx, OC_EXIT|Nx,
[821]432 ST_WHILE,
[2725]433 0, /* else */
[1765]434 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
435 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
[821]436 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
437 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
438 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
[3621]439 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
[821]440 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
[2725]441 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
[821]442 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
[3621]443 OC_FBLTIN|Sx|F_le, /* TC_LENGTH */
[821]444 OC_GETLINE|SV|P(0),
[2725]445 0, 0,
[821]446 0,
[3621]447 0 /* TC_END */
[821]448};
449
450/* internal variable names and their initial values */
451/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
452enum {
[1765]453 CONVFMT, OFMT, FS, OFS,
454 ORS, RS, RT, FILENAME,
[2725]455 SUBSEP, F0, ARGIND, ARGC,
456 ARGV, ERRNO, FNR, NR,
457 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
[821]458};
459
[1765]460static const char vNames[] ALIGN1 =
461 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
462 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
[2725]463 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
464 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
465 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
[821]466
[1765]467static const char vValues[] ALIGN1 =
468 "%.6g\0" "%.6g\0" " \0" " \0"
469 "\n\0" "\n\0" "\0" "\0"
[2725]470 "\034\0" "\0" "\377";
[821]471
472/* hash size may grow to these values */
[1765]473#define FIRST_PRIME 61
474static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
[821]475
476
[1765]477/* Globals. Split in two parts so that first one is addressed
[2725]478 * with (mostly short) negative offsets.
479 * NB: it's unsafe to put members of type "double"
480 * into globals2 (gcc may fail to align them).
481 */
[1765]482struct globals {
[2725]483 double t_double;
484 chain beginseq, mainseq, endseq;
485 chain *seq;
[1765]486 node *break_ptr, *continue_ptr;
487 rstream *iF;
488 xhash *vhash, *ahash, *fdhash, *fnhash;
489 const char *g_progname;
490 int g_lineno;
491 int nfields;
492 int maxfields; /* used in fsrealloc() only */
493 var *Fields;
494 nvblock *g_cb;
495 char *g_pos;
496 char *g_buf;
497 smallint icase;
498 smallint exiting;
499 smallint nextrec;
500 smallint nextfile;
501 smallint is_f0_split;
[3232]502 smallint t_rollback;
[1765]503};
504struct globals2 {
505 uint32_t t_info; /* often used */
506 uint32_t t_tclass;
507 char *t_string;
508 int t_lineno;
[821]509
[1765]510 var *intvar[NUM_INTERNAL_VARS]; /* often used */
[821]511
[1765]512 /* former statics from various functions */
513 char *split_f0__fstrings;
[821]514
[1765]515 uint32_t next_token__save_tclass;
516 uint32_t next_token__save_info;
517 uint32_t next_token__ltclass;
518 smallint next_token__concat_inserted;
519
520 smallint next_input_file__files_happen;
521 rstream next_input_file__rsm;
522
523 var *evaluate__fnargs;
524 unsigned evaluate__seed;
525 regex_t evaluate__sreg;
526
527 var ptest__v;
528
529 tsplitter exec_builtin__tspl;
530
531 /* biggest and least used members go last */
532 tsplitter fsplitter, rsplitter;
533};
534#define G1 (ptr_to_globals[-1])
[2725]535#define G (*(struct globals2 *)ptr_to_globals)
[1765]536/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
[2725]537/*char G1size[sizeof(G1)]; - 0x74 */
538/*char Gsize[sizeof(G)]; - 0x1c4 */
[1765]539/* Trying to keep most of members accessible with short offsets: */
[2725]540/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
541#define t_double (G1.t_double )
[1765]542#define beginseq (G1.beginseq )
543#define mainseq (G1.mainseq )
544#define endseq (G1.endseq )
545#define seq (G1.seq )
546#define break_ptr (G1.break_ptr )
547#define continue_ptr (G1.continue_ptr)
548#define iF (G1.iF )
549#define vhash (G1.vhash )
550#define ahash (G1.ahash )
551#define fdhash (G1.fdhash )
552#define fnhash (G1.fnhash )
553#define g_progname (G1.g_progname )
554#define g_lineno (G1.g_lineno )
555#define nfields (G1.nfields )
556#define maxfields (G1.maxfields )
557#define Fields (G1.Fields )
558#define g_cb (G1.g_cb )
559#define g_pos (G1.g_pos )
560#define g_buf (G1.g_buf )
561#define icase (G1.icase )
562#define exiting (G1.exiting )
563#define nextrec (G1.nextrec )
564#define nextfile (G1.nextfile )
565#define is_f0_split (G1.is_f0_split )
[3232]566#define t_rollback (G1.t_rollback )
[1765]567#define t_info (G.t_info )
568#define t_tclass (G.t_tclass )
569#define t_string (G.t_string )
570#define t_lineno (G.t_lineno )
571#define intvar (G.intvar )
572#define fsplitter (G.fsplitter )
573#define rsplitter (G.rsplitter )
574#define INIT_G() do { \
[2725]575 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
[1765]576 G.next_token__ltclass = TC_OPTERM; \
577 G.evaluate__seed = 1; \
578} while (0)
579
580
[821]581/* function prototypes */
582static void handle_special(var *);
583static node *parse_expr(uint32_t);
584static void chain_group(void);
585static var *evaluate(node *, var *);
586static rstream *next_input_file(void);
587static int fmt_num(char *, int, const char *, double, int);
[2725]588static int awk_exit(int) NORETURN;
[821]589
590/* ---- error handling ---- */
591
[1765]592static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
593static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
594static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
595static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
596static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
597static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
598static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
599static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
600static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
601static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
[821]602
[2725]603static void zero_out_var(var *vp)
[821]604{
[1765]605 memset(vp, 0, sizeof(*vp));
[821]606}
607
[2725]608static void syntax_error(const char *message) NORETURN;
609static void syntax_error(const char *message)
[1765]610{
611 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
612}
[821]613
614/* ---- hash stuff ---- */
615
[1765]616static unsigned hashidx(const char *name)
[821]617{
[1765]618 unsigned idx = 0;
[821]619
[2725]620 while (*name)
621 idx = *name++ + (idx << 6) - idx;
[821]622 return idx;
623}
624
625/* create new hash */
626static xhash *hash_init(void)
627{
628 xhash *newhash;
629
[2725]630 newhash = xzalloc(sizeof(*newhash));
[821]631 newhash->csize = FIRST_PRIME;
[2725]632 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
[821]633
634 return newhash;
635}
636
637/* find item in hash, return ptr to data, NULL if not found */
638static void *hash_search(xhash *hash, const char *name)
639{
640 hash_item *hi;
641
[2725]642 hi = hash->items[hashidx(name) % hash->csize];
[821]643 while (hi) {
644 if (strcmp(hi->name, name) == 0)
[2725]645 return &hi->data;
[821]646 hi = hi->next;
647 }
648 return NULL;
649}
650
651/* grow hash if it becomes too big */
652static void hash_rebuild(xhash *hash)
653{
[1765]654 unsigned newsize, i, idx;
[821]655 hash_item **newitems, *hi, *thi;
656
[1765]657 if (hash->nprime == ARRAY_SIZE(PRIMES))
[821]658 return;
659
660 newsize = PRIMES[hash->nprime++];
[2725]661 newitems = xzalloc(newsize * sizeof(newitems[0]));
[821]662
[1765]663 for (i = 0; i < hash->csize; i++) {
[821]664 hi = hash->items[i];
665 while (hi) {
666 thi = hi;
667 hi = thi->next;
668 idx = hashidx(thi->name) % newsize;
669 thi->next = newitems[idx];
670 newitems[idx] = thi;
671 }
672 }
673
674 free(hash->items);
675 hash->csize = newsize;
676 hash->items = newitems;
677}
678
679/* find item in hash, add it if necessary. Return ptr to data */
680static void *hash_find(xhash *hash, const char *name)
681{
682 hash_item *hi;
[1765]683 unsigned idx;
[821]684 int l;
685
686 hi = hash_search(hash, name);
[1765]687 if (!hi) {
[821]688 if (++hash->nel / hash->csize > 10)
689 hash_rebuild(hash);
690
691 l = strlen(name) + 1;
[2725]692 hi = xzalloc(sizeof(*hi) + l);
693 strcpy(hi->name, name);
[821]694
695 idx = hashidx(name) % hash->csize;
696 hi->next = hash->items[idx];
697 hash->items[idx] = hi;
698 hash->glen += l;
699 }
[2725]700 return &hi->data;
[821]701}
702
[1765]703#define findvar(hash, name) ((var*) hash_find((hash), (name)))
704#define newvar(name) ((var*) hash_find(vhash, (name)))
705#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
706#define newfunc(name) ((func*) hash_find(fnhash, (name)))
[821]707
708static void hash_remove(xhash *hash, const char *name)
709{
710 hash_item *hi, **phi;
711
[2725]712 phi = &hash->items[hashidx(name) % hash->csize];
[821]713 while (*phi) {
714 hi = *phi;
715 if (strcmp(hi->name, name) == 0) {
716 hash->glen -= (strlen(name) + 1);
717 hash->nel--;
718 *phi = hi->next;
719 free(hi);
720 break;
721 }
[2725]722 phi = &hi->next;
[821]723 }
724}
725
726/* ------ some useful functions ------ */
727
[2725]728static char *skip_spaces(char *p)
[821]729{
[1765]730 while (1) {
731 if (*p == '\\' && p[1] == '\n') {
732 p++;
733 t_lineno++;
734 } else if (*p != ' ' && *p != '\t') {
735 break;
736 }
[821]737 p++;
738 }
[2725]739 return p;
[821]740}
741
[2725]742/* returns old *s, advances *s past word and terminating NUL */
[821]743static char *nextword(char **s)
744{
[1765]745 char *p = *s;
[2725]746 while (*(*s)++ != '\0')
747 continue;
[821]748 return p;
749}
750
751static char nextchar(char **s)
752{
[1765]753 char c, *pps;
[821]754
[2725]755 c = *(*s)++;
[821]756 pps = *s;
[2725]757 if (c == '\\')
758 c = bb_process_escape_sequence((const char**)s);
[3232]759 /* Example awk statement:
760 * s = "abc\"def"
761 * we must treat \" as "
762 */
[2725]763 if (c == '\\' && *s == pps) { /* unrecognized \z? */
764 c = *(*s); /* yes, fetch z */
765 if (c)
766 (*s)++; /* advance unless z = NUL */
767 }
[821]768 return c;
769}
770
[3232]771/* TODO: merge with strcpy_and_process_escape_sequences()?
772 */
773static void unescape_string_in_place(char *s1)
774{
775 char *s = s1;
776 while ((*s1 = nextchar(&s)) != '\0')
777 s1++;
778}
779
[2725]780static ALWAYS_INLINE int isalnum_(int c)
[821]781{
782 return (isalnum(c) || c == '_');
783}
784
[2725]785static double my_strtod(char **pp)
[821]786{
[2725]787 char *cp = *pp;
788 if (ENABLE_DESKTOP && cp[0] == '0') {
789 /* Might be hex or octal integer: 0x123abc or 07777 */
790 char c = (cp[1] | 0x20);
791 if (c == 'x' || isdigit(cp[1])) {
792 unsigned long long ull = strtoull(cp, pp, 0);
793 if (c == 'x')
794 return ull;
795 c = **pp;
796 if (!isdigit(c) && c != '.')
797 return ull;
798 /* else: it may be a floating number. Examples:
799 * 009.123 (*pp points to '9')
800 * 000.123 (*pp points to '.')
801 * fall through to strtod.
802 */
803 }
804 }
805 return strtod(cp, pp);
[821]806}
807
808/* -------- working with variables (set/get/copy/etc) -------- */
809
810static xhash *iamarray(var *v)
811{
812 var *a = v;
813
814 while (a->type & VF_CHILD)
815 a = a->x.parent;
816
[1765]817 if (!(a->type & VF_ARRAY)) {
[821]818 a->type |= VF_ARRAY;
819 a->x.array = hash_init();
820 }
821 return a->x.array;
822}
823
824static void clear_array(xhash *array)
825{
[1765]826 unsigned i;
[821]827 hash_item *hi, *thi;
828
[1765]829 for (i = 0; i < array->csize; i++) {
[821]830 hi = array->items[i];
831 while (hi) {
832 thi = hi;
833 hi = hi->next;
834 free(thi->data.v.string);
835 free(thi);
836 }
837 array->items[i] = NULL;
838 }
839 array->glen = array->nel = 0;
840}
841
842/* clear a variable */
843static var *clrvar(var *v)
844{
845 if (!(v->type & VF_FSTR))
846 free(v->string);
847
848 v->type &= VF_DONTTOUCH;
849 v->type |= VF_DIRTY;
850 v->string = NULL;
851 return v;
852}
853
854/* assign string value to variable */
855static var *setvar_p(var *v, char *value)
856{
857 clrvar(v);
858 v->string = value;
859 handle_special(v);
860 return v;
861}
862
863/* same as setvar_p but make a copy of string */
864static var *setvar_s(var *v, const char *value)
865{
[1765]866 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
[821]867}
868
[2725]869/* same as setvar_s but sets USER flag */
[821]870static var *setvar_u(var *v, const char *value)
871{
[2725]872 v = setvar_s(v, value);
[821]873 v->type |= VF_USER;
874 return v;
875}
876
877/* set array element to user string */
878static void setari_u(var *a, int idx, const char *s)
879{
[1765]880 var *v;
[821]881
[2725]882 v = findvar(iamarray(a), itoa(idx));
[821]883 setvar_u(v, s);
884}
885
886/* assign numeric value to variable */
887static var *setvar_i(var *v, double value)
888{
889 clrvar(v);
890 v->type |= VF_NUMBER;
891 v->number = value;
892 handle_special(v);
893 return v;
894}
895
[1765]896static const char *getvar_s(var *v)
[821]897{
898 /* if v is numeric and has no cached string, convert it to string */
899 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
[1765]900 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
901 v->string = xstrdup(g_buf);
[821]902 v->type |= VF_CACHED;
903 }
904 return (v->string == NULL) ? "" : v->string;
905}
906
907static double getvar_i(var *v)
908{
909 char *s;
910
911 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
912 v->number = 0;
913 s = v->string;
914 if (s && *s) {
[2725]915 debug_printf_eval("getvar_i: '%s'->", s);
916 v->number = my_strtod(&s);
917 debug_printf_eval("%f (s:'%s')\n", v->number, s);
[821]918 if (v->type & VF_USER) {
[2725]919 s = skip_spaces(s);
[821]920 if (*s != '\0')
921 v->type &= ~VF_USER;
922 }
923 } else {
[2725]924 debug_printf_eval("getvar_i: '%s'->zero\n", s);
[821]925 v->type &= ~VF_USER;
926 }
927 v->type |= VF_CACHED;
928 }
[2725]929 debug_printf_eval("getvar_i: %f\n", v->number);
[821]930 return v->number;
931}
932
[2725]933/* Used for operands of bitwise ops */
934static unsigned long getvar_i_int(var *v)
935{
936 double d = getvar_i(v);
937
938 /* Casting doubles to longs is undefined for values outside
939 * of target type range. Try to widen it as much as possible */
940 if (d >= 0)
941 return (unsigned long)d;
942 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
943 return - (long) (unsigned long) (-d);
944}
945
[821]946static var *copyvar(var *dest, const var *src)
947{
948 if (dest != src) {
949 clrvar(dest);
[1765]950 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
[2725]951 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
[821]952 dest->number = src->number;
953 if (src->string)
[1765]954 dest->string = xstrdup(src->string);
[821]955 }
956 handle_special(dest);
957 return dest;
958}
959
960static var *incvar(var *v)
961{
[2725]962 return setvar_i(v, getvar_i(v) + 1.0);
[821]963}
964
965/* return true if v is number or numeric string */
966static int is_numeric(var *v)
967{
968 getvar_i(v);
969 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
970}
971
972/* return 1 when value of v corresponds to true, 0 otherwise */
973static int istrue(var *v)
974{
975 if (is_numeric(v))
[2725]976 return (v->number != 0);
977 return (v->string && v->string[0]);
[821]978}
979
980/* temporary variables allocator. Last allocated should be first freed */
981static var *nvalloc(int n)
982{
983 nvblock *pb = NULL;
984 var *v, *r;
985 int size;
986
[1765]987 while (g_cb) {
988 pb = g_cb;
[2725]989 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
990 break;
[1765]991 g_cb = g_cb->next;
[821]992 }
993
[1765]994 if (!g_cb) {
[821]995 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
[2725]996 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
[1765]997 g_cb->size = size;
998 g_cb->pos = g_cb->nv;
999 g_cb->prev = pb;
[2725]1000 /*g_cb->next = NULL; - xzalloc did it */
1001 if (pb)
1002 pb->next = g_cb;
[821]1003 }
1004
[1765]1005 v = r = g_cb->pos;
1006 g_cb->pos += n;
[821]1007
[1765]1008 while (v < g_cb->pos) {
[821]1009 v->type = 0;
1010 v->string = NULL;
1011 v++;
1012 }
1013
1014 return r;
1015}
1016
1017static void nvfree(var *v)
1018{
1019 var *p;
1020
[1765]1021 if (v < g_cb->nv || v >= g_cb->pos)
1022 syntax_error(EMSG_INTERNAL_ERROR);
[821]1023
[1765]1024 for (p = v; p < g_cb->pos; p++) {
1025 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
[821]1026 clear_array(iamarray(p));
1027 free(p->x.array->items);
1028 free(p->x.array);
1029 }
[2725]1030 if (p->type & VF_WALK) {
1031 walker_list *n;
1032 walker_list *w = p->x.walker;
1033 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1034 p->x.walker = NULL;
1035 while (w) {
1036 n = w->prev;
1037 debug_printf_walker(" free(%p)\n", w);
1038 free(w);
1039 w = n;
1040 }
1041 }
[821]1042 clrvar(p);
1043 }
1044
[1765]1045 g_cb->pos = v;
1046 while (g_cb->prev && g_cb->pos == g_cb->nv) {
1047 g_cb = g_cb->prev;
[821]1048 }
1049}
1050
1051/* ------- awk program text parsing ------- */
1052
[1765]1053/* Parse next token pointed by global pos, place results into global ttt.
[821]1054 * If token isn't expected, give away. Return token class
1055 */
1056static uint32_t next_token(uint32_t expected)
1057{
[1765]1058#define concat_inserted (G.next_token__concat_inserted)
1059#define save_tclass (G.next_token__save_tclass)
1060#define save_info (G.next_token__save_info)
1061/* Initialized to TC_OPTERM: */
1062#define ltclass (G.next_token__ltclass)
1063
[2725]1064 char *p, *s;
[1765]1065 const char *tl;
[821]1066 uint32_t tc;
1067 const uint32_t *ti;
1068
[1765]1069 if (t_rollback) {
1070 t_rollback = FALSE;
[821]1071 } else if (concat_inserted) {
1072 concat_inserted = FALSE;
[1765]1073 t_tclass = save_tclass;
1074 t_info = save_info;
[821]1075 } else {
[1765]1076 p = g_pos;
1077 readnext:
[2725]1078 p = skip_spaces(p);
[1765]1079 g_lineno = t_lineno;
[821]1080 if (*p == '#')
[1765]1081 while (*p != '\n' && *p != '\0')
1082 p++;
[821]1083
1084 if (*p == '\n')
[1765]1085 t_lineno++;
[821]1086
1087 if (*p == '\0') {
1088 tc = TC_EOF;
[3232]1089 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
[821]1090 } else if (*p == '\"') {
1091 /* it's a string */
[1765]1092 t_string = s = ++p;
[821]1093 while (*p != '\"') {
[2725]1094 char *pp;
[821]1095 if (*p == '\0' || *p == '\n')
1096 syntax_error(EMSG_UNEXP_EOS);
[2725]1097 pp = p;
1098 *s++ = nextchar(&pp);
1099 p = pp;
[821]1100 }
1101 p++;
1102 *s = '\0';
1103 tc = TC_STRING;
[3232]1104 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
[821]1105 } else if ((expected & TC_REGEXP) && *p == '/') {
1106 /* it's regexp */
[1765]1107 t_string = s = ++p;
[821]1108 while (*p != '/') {
1109 if (*p == '\0' || *p == '\n')
1110 syntax_error(EMSG_UNEXP_EOS);
[1765]1111 *s = *p++;
1112 if (*s++ == '\\') {
[2725]1113 char *pp = p;
1114 s[-1] = bb_process_escape_sequence((const char **)&pp);
1115 if (*p == '\\')
[1765]1116 *s++ = '\\';
[2725]1117 if (pp == p)
[1765]1118 *s++ = *p++;
[2725]1119 else
1120 p = pp;
[821]1121 }
1122 }
1123 p++;
1124 *s = '\0';
1125 tc = TC_REGEXP;
[3232]1126 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
[821]1127
1128 } else if (*p == '.' || isdigit(*p)) {
1129 /* it's a number */
[2725]1130 char *pp = p;
1131 t_double = my_strtod(&pp);
1132 p = pp;
[821]1133 if (*p == '.')
1134 syntax_error(EMSG_UNEXP_TOKEN);
1135 tc = TC_NUMBER;
[3232]1136 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
[821]1137 } else {
1138 /* search for something known */
1139 tl = tokenlist;
1140 tc = 0x00000001;
1141 ti = tokeninfo;
1142 while (*tl) {
[2725]1143 int l = (unsigned char) *tl++;
1144 if (l == (unsigned char) NTCC) {
[821]1145 tc <<= 1;
1146 continue;
1147 }
[2725]1148 /* if token class is expected,
1149 * token matches,
1150 * and it's not a longer word,
[821]1151 */
[1765]1152 if ((tc & (expected | TC_WORD | TC_NEWLINE))
[2725]1153 && strncmp(p, tl, l) == 0
[1765]1154 && !((tc & TC_WORD) && isalnum_(p[l]))
1155 ) {
[2725]1156 /* then this is what we are looking for */
[1765]1157 t_info = *ti;
[3232]1158 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
[821]1159 p += l;
[2725]1160 goto token_found;
[821]1161 }
1162 ti++;
1163 tl += l;
1164 }
[2725]1165 /* not a known token */
[821]1166
[2725]1167 /* is it a name? (var/array/function) */
1168 if (!isalnum_(*p))
1169 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1170 /* yes */
1171 t_string = --p;
1172 while (isalnum_(*++p)) {
1173 p[-1] = *p;
1174 }
1175 p[-1] = '\0';
1176 tc = TC_VARIABLE;
1177 /* also consume whitespace between functionname and bracket */
1178 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1179 p = skip_spaces(p);
1180 if (*p == '(') {
1181 tc = TC_FUNCTION;
[3232]1182 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
[2725]1183 } else {
1184 if (*p == '[') {
1185 p++;
1186 tc = TC_ARRAY;
[3232]1187 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1188 } else
1189 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
[821]1190 }
1191 }
[3232]1192 token_found:
[1765]1193 g_pos = p;
[821]1194
1195 /* skipping newlines in some cases */
1196 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1197 goto readnext;
1198
1199 /* insert concatenation operator when needed */
[1765]1200 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
[821]1201 concat_inserted = TRUE;
1202 save_tclass = tc;
[1765]1203 save_info = t_info;
[821]1204 tc = TC_BINOP;
[1765]1205 t_info = OC_CONCAT | SS | P(35);
[821]1206 }
1207
[1765]1208 t_tclass = tc;
[821]1209 }
[1765]1210 ltclass = t_tclass;
[821]1211
1212 /* Are we ready for this? */
[3621]1213 if (!(ltclass & expected)) {
[821]1214 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
[1765]1215 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
[3621]1216 }
[821]1217
1218 return ltclass;
[1765]1219#undef concat_inserted
1220#undef save_tclass
1221#undef save_info
1222#undef ltclass
[821]1223}
1224
[1765]1225static void rollback_token(void)
1226{
1227 t_rollback = TRUE;
1228}
[821]1229
1230static node *new_node(uint32_t info)
1231{
[1765]1232 node *n;
[821]1233
[1765]1234 n = xzalloc(sizeof(node));
[821]1235 n->info = info;
[1765]1236 n->lineno = g_lineno;
[821]1237 return n;
1238}
1239
[2725]1240static void mk_re_node(const char *s, node *n, regex_t *re)
[821]1241{
1242 n->info = OC_REGEXP;
1243 n->l.re = re;
1244 n->r.ire = re + 1;
1245 xregcomp(re, s, REG_EXTENDED);
[1765]1246 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
[821]1247}
1248
1249static node *condition(void)
1250{
1251 next_token(TC_SEQSTART);
1252 return parse_expr(TC_SEQTERM);
1253}
1254
1255/* parse expression terminated by given argument, return ptr
1256 * to built subtree. Terminator is eaten by parse_expr */
1257static node *parse_expr(uint32_t iexp)
1258{
1259 node sn;
1260 node *cn = &sn;
1261 node *vn, *glptr;
1262 uint32_t tc, xtc;
1263 var *v;
1264
[3232]1265 debug_printf_parse("%s(%x)\n", __func__, iexp);
1266
[821]1267 sn.info = PRIMASK;
1268 sn.r.n = glptr = NULL;
1269 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1270
[1765]1271 while (!((tc = next_token(xtc)) & iexp)) {
[2725]1272
[1765]1273 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
[821]1274 /* input redirection (<) attached to glptr node */
[3232]1275 debug_printf_parse("%s: input redir\n", __func__);
[1765]1276 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
[821]1277 cn->a.n = glptr;
1278 xtc = TC_OPERAND | TC_UOPPRE;
1279 glptr = NULL;
1280
1281 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
[3232]1282 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
[821]1283 /* for binary and postfix-unary operators, jump back over
1284 * previous operators with higher priority */
1285 vn = cn;
[2725]1286 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1287 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1288 ) {
[821]1289 vn = vn->a.n;
[2725]1290 }
[1765]1291 if ((t_info & OPCLSMASK) == OC_TERNARY)
1292 t_info += P(6);
1293 cn = vn->a.n->r.n = new_node(t_info);
[821]1294 cn->a.n = vn->a.n;
1295 if (tc & TC_BINOP) {
1296 cn->l.n = vn;
1297 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
[1765]1298 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
[821]1299 /* it's a pipe */
1300 next_token(TC_GETLINE);
1301 /* give maximum priority to this pipe */
1302 cn->info &= ~PRIMASK;
1303 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1304 }
1305 } else {
1306 cn->r.n = vn;
1307 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1308 }
1309 vn->a.n = cn;
1310
1311 } else {
[3232]1312 debug_printf_parse("%s: other\n", __func__);
[821]1313 /* for operands and prefix-unary operators, attach them
1314 * to last node */
1315 vn = cn;
[1765]1316 cn = vn->r.n = new_node(t_info);
[821]1317 cn->a.n = vn;
1318 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1319 if (tc & (TC_OPERAND | TC_REGEXP)) {
[3232]1320 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
[821]1321 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1322 /* one should be very careful with switch on tclass -
1323 * only simple tclasses should be used! */
1324 switch (tc) {
[1765]1325 case TC_VARIABLE:
1326 case TC_ARRAY:
[3232]1327 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
[821]1328 cn->info = OC_VAR;
[1765]1329 v = hash_search(ahash, t_string);
1330 if (v != NULL) {
[821]1331 cn->info = OC_FNARG;
[2725]1332 cn->l.aidx = v->x.aidx;
[821]1333 } else {
[1765]1334 cn->l.v = newvar(t_string);
[821]1335 }
1336 if (tc & TC_ARRAY) {
1337 cn->info |= xS;
1338 cn->r.n = parse_expr(TC_ARRTERM);
1339 }
1340 break;
1341
[1765]1342 case TC_NUMBER:
1343 case TC_STRING:
[3232]1344 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
[821]1345 cn->info = OC_VAR;
1346 v = cn->l.v = xzalloc(sizeof(var));
1347 if (tc & TC_NUMBER)
[1765]1348 setvar_i(v, t_double);
[821]1349 else
[1765]1350 setvar_s(v, t_string);
[821]1351 break;
1352
[1765]1353 case TC_REGEXP:
[3232]1354 debug_printf_parse("%s: TC_REGEXP\n", __func__);
[1765]1355 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
[821]1356 break;
1357
[1765]1358 case TC_FUNCTION:
[3232]1359 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
[821]1360 cn->info = OC_FUNC;
[1765]1361 cn->r.f = newfunc(t_string);
[821]1362 cn->l.n = condition();
1363 break;
1364
[1765]1365 case TC_SEQSTART:
[3232]1366 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
[821]1367 cn = vn->r.n = parse_expr(TC_SEQTERM);
[3232]1368 if (!cn)
1369 syntax_error("Empty sequence");
[821]1370 cn->a.n = vn;
1371 break;
1372
[1765]1373 case TC_GETLINE:
[3232]1374 debug_printf_parse("%s: TC_GETLINE\n", __func__);
[821]1375 glptr = cn;
1376 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1377 break;
1378
[1765]1379 case TC_BUILTIN:
[3232]1380 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
[821]1381 cn->l.n = condition();
1382 break;
[3621]1383
1384 case TC_LENGTH:
1385 debug_printf_parse("%s: TC_LENGTH\n", __func__);
1386 next_token(TC_SEQSTART | TC_OPTERM | TC_GRPTERM);
1387 rollback_token();
1388 if (t_tclass & TC_SEQSTART) {
1389 /* It was a "(" token. Handle just like TC_BUILTIN */
1390 cn->l.n = condition();
1391 }
1392 break;
[821]1393 }
1394 }
1395 }
1396 }
[3232]1397
1398 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
[821]1399 return sn.r.n;
1400}
1401
1402/* add node to chain. Return ptr to alloc'd node */
1403static node *chain_node(uint32_t info)
1404{
[1765]1405 node *n;
[821]1406
[1765]1407 if (!seq->first)
[821]1408 seq->first = seq->last = new_node(0);
1409
[1765]1410 if (seq->programname != g_progname) {
1411 seq->programname = g_progname;
[821]1412 n = chain_node(OC_NEWSOURCE);
[2725]1413 n->l.new_progname = xstrdup(g_progname);
[821]1414 }
1415
1416 n = seq->last;
1417 n->info = info;
1418 seq->last = n->a.n = new_node(OC_DONE);
1419
1420 return n;
1421}
1422
1423static void chain_expr(uint32_t info)
1424{
1425 node *n;
1426
1427 n = chain_node(info);
1428 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
[1765]1429 if (t_tclass & TC_GRPTERM)
[821]1430 rollback_token();
1431}
1432
1433static node *chain_loop(node *nn)
1434{
1435 node *n, *n2, *save_brk, *save_cont;
1436
1437 save_brk = break_ptr;
1438 save_cont = continue_ptr;
1439
1440 n = chain_node(OC_BR | Vx);
1441 continue_ptr = new_node(OC_EXEC);
1442 break_ptr = new_node(OC_EXEC);
1443 chain_group();
1444 n2 = chain_node(OC_EXEC | Vx);
1445 n2->l.n = nn;
1446 n2->a.n = n;
1447 continue_ptr->a.n = n2;
1448 break_ptr->a.n = n->r.n = seq->last;
1449
1450 continue_ptr = save_cont;
1451 break_ptr = save_brk;
1452
1453 return n;
1454}
1455
1456/* parse group and attach it to chain */
1457static void chain_group(void)
1458{
1459 uint32_t c;
1460 node *n, *n2, *n3;
1461
1462 do {
1463 c = next_token(TC_GRPSEQ);
1464 } while (c & TC_NEWLINE);
1465
1466 if (c & TC_GRPSTART) {
[3232]1467 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
[1765]1468 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
[3232]1469 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
[2725]1470 if (t_tclass & TC_NEWLINE)
1471 continue;
[821]1472 rollback_token();
1473 chain_group();
1474 }
[3232]1475 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
[821]1476 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
[3232]1477 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
[821]1478 rollback_token();
1479 chain_expr(OC_EXEC | Vx);
[3232]1480 } else {
1481 /* TC_STATEMNT */
1482 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
[1765]1483 switch (t_info & OPCLSMASK) {
1484 case ST_IF:
[3232]1485 debug_printf_parse("%s: ST_IF\n", __func__);
[1765]1486 n = chain_node(OC_BR | Vx);
1487 n->l.n = condition();
1488 chain_group();
1489 n2 = chain_node(OC_EXEC);
1490 n->r.n = seq->last;
1491 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
[821]1492 chain_group();
[1765]1493 n2->a.n = seq->last;
1494 } else {
1495 rollback_token();
1496 }
1497 break;
[821]1498
[1765]1499 case ST_WHILE:
[3232]1500 debug_printf_parse("%s: ST_WHILE\n", __func__);
[1765]1501 n2 = condition();
1502 n = chain_loop(NULL);
1503 n->l.n = n2;
1504 break;
1505
1506 case ST_DO:
[3232]1507 debug_printf_parse("%s: ST_DO\n", __func__);
[1765]1508 n2 = chain_node(OC_EXEC);
1509 n = chain_loop(NULL);
1510 n2->a.n = n->a.n;
1511 next_token(TC_WHILE);
1512 n->l.n = condition();
1513 break;
1514
1515 case ST_FOR:
[3232]1516 debug_printf_parse("%s: ST_FOR\n", __func__);
[1765]1517 next_token(TC_SEQSTART);
1518 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1519 if (t_tclass & TC_SEQTERM) { /* for-in */
1520 if ((n2->info & OPCLSMASK) != OC_IN)
1521 syntax_error(EMSG_UNEXP_TOKEN);
1522 n = chain_node(OC_WALKINIT | VV);
1523 n->l.n = n2->l.n;
1524 n->r.n = n2->r.n;
[821]1525 n = chain_loop(NULL);
[1765]1526 n->info = OC_WALKNEXT | Vx;
1527 n->l.n = n2->l.n;
1528 } else { /* for (;;) */
1529 n = chain_node(OC_EXEC | Vx);
[821]1530 n->l.n = n2;
[1765]1531 n2 = parse_expr(TC_SEMICOL);
1532 n3 = parse_expr(TC_SEQTERM);
1533 n = chain_loop(n3);
1534 n->l.n = n2;
1535 if (!n2)
1536 n->info = OC_EXEC;
1537 }
1538 break;
[821]1539
[1765]1540 case OC_PRINT:
1541 case OC_PRINTF:
[3232]1542 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
[1765]1543 n = chain_node(t_info);
1544 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1545 if (t_tclass & TC_OUTRDR) {
1546 n->info |= t_info;
1547 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1548 }
1549 if (t_tclass & TC_GRPTERM)
1550 rollback_token();
1551 break;
[821]1552
[1765]1553 case OC_BREAK:
[3232]1554 debug_printf_parse("%s: OC_BREAK\n", __func__);
[1765]1555 n = chain_node(OC_EXEC);
1556 n->a.n = break_ptr;
[3621]1557 chain_expr(t_info);
[1765]1558 break;
[821]1559
[1765]1560 case OC_CONTINUE:
[3232]1561 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
[1765]1562 n = chain_node(OC_EXEC);
1563 n->a.n = continue_ptr;
[3621]1564 chain_expr(t_info);
[1765]1565 break;
[821]1566
[1765]1567 /* delete, next, nextfile, return, exit */
1568 default:
[3232]1569 debug_printf_parse("%s: default\n", __func__);
[1765]1570 chain_expr(t_info);
[821]1571 }
1572 }
1573}
1574
1575static void parse_program(char *p)
1576{
1577 uint32_t tclass;
1578 node *cn;
1579 func *f;
1580 var *v;
1581
[1765]1582 g_pos = p;
1583 t_lineno = 1;
1584 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1585 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
[821]1586
[3232]1587 if (tclass & TC_OPTERM) {
1588 debug_printf_parse("%s: TC_OPTERM\n", __func__);
[821]1589 continue;
[3232]1590 }
[821]1591
1592 seq = &mainseq;
1593 if (tclass & TC_BEGIN) {
[3232]1594 debug_printf_parse("%s: TC_BEGIN\n", __func__);
[821]1595 seq = &beginseq;
1596 chain_group();
1597 } else if (tclass & TC_END) {
[3232]1598 debug_printf_parse("%s: TC_END\n", __func__);
[821]1599 seq = &endseq;
1600 chain_group();
1601 } else if (tclass & TC_FUNCDECL) {
[3232]1602 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
[821]1603 next_token(TC_FUNCTION);
[1765]1604 g_pos++;
1605 f = newfunc(t_string);
[821]1606 f->body.first = NULL;
1607 f->nargs = 0;
[1765]1608 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1609 v = findvar(ahash, t_string);
[2725]1610 v->x.aidx = f->nargs++;
[821]1611
1612 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1613 break;
1614 }
[2725]1615 seq = &f->body;
[821]1616 chain_group();
1617 clear_array(ahash);
1618 } else if (tclass & TC_OPSEQ) {
[3232]1619 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
[821]1620 rollback_token();
1621 cn = chain_node(OC_TEST);
1622 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
[1765]1623 if (t_tclass & TC_GRPSTART) {
[3232]1624 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
[821]1625 rollback_token();
1626 chain_group();
1627 } else {
[3232]1628 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
[821]1629 chain_node(OC_PRINT);
1630 }
1631 cn->r.n = mainseq.last;
1632 } else /* if (tclass & TC_GRPSTART) */ {
[3232]1633 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
[821]1634 rollback_token();
1635 chain_group();
1636 }
1637 }
[3232]1638 debug_printf_parse("%s: TC_EOF\n", __func__);
[821]1639}
1640
1641
1642/* -------- program execution part -------- */
1643
[1765]1644static node *mk_splitter(const char *s, tsplitter *spl)
[821]1645{
[1765]1646 regex_t *re, *ire;
[821]1647 node *n;
1648
1649 re = &spl->re[0];
1650 ire = &spl->re[1];
1651 n = &spl->n;
[1765]1652 if ((n->info & OPCLSMASK) == OC_REGEXP) {
[821]1653 regfree(re);
[1765]1654 regfree(ire); // TODO: nuke ire, use re+1?
[821]1655 }
[2725]1656 if (s[0] && s[1]) { /* strlen(s) > 1 */
[821]1657 mk_re_node(s, n, re);
1658 } else {
[2725]1659 n->info = (uint32_t) s[0];
[821]1660 }
1661
1662 return n;
1663}
1664
1665/* use node as a regular expression. Supplied with node ptr and regex_t
1666 * storage space. Return ptr to regex (if result points to preg, it should
1667 * be later regfree'd manually
1668 */
1669static regex_t *as_regex(node *op, regex_t *preg)
1670{
[2725]1671 int cflags;
[821]1672 var *v;
[1765]1673 const char *s;
[821]1674
1675 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1676 return icase ? op->r.ire : op->l.re;
1677 }
[1765]1678 v = nvalloc(1);
1679 s = getvar_s(evaluate(op, v));
[2725]1680
1681 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1682 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1683 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1684 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1685 * (maybe gsub is not supposed to use REG_EXTENDED?).
1686 */
1687 if (regcomp(preg, s, cflags)) {
1688 cflags &= ~REG_EXTENDED;
1689 xregcomp(preg, s, cflags);
1690 }
[1765]1691 nvfree(v);
1692 return preg;
[821]1693}
1694
[2725]1695/* gradually increasing buffer.
1696 * note that we reallocate even if n == old_size,
1697 * and thus there is at least one extra allocated byte.
1698 */
1699static char* qrealloc(char *b, int n, int *size)
[821]1700{
[2725]1701 if (!b || n >= *size) {
1702 *size = n + (n>>1) + 80;
1703 b = xrealloc(b, *size);
1704 }
1705 return b;
[821]1706}
1707
1708/* resize field storage space */
1709static void fsrealloc(int size)
1710{
1711 int i;
1712
1713 if (size >= maxfields) {
1714 i = maxfields;
1715 maxfields = size + 16;
[2725]1716 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
[1765]1717 for (; i < maxfields; i++) {
[821]1718 Fields[i].type = VF_SPECIAL;
1719 Fields[i].string = NULL;
1720 }
1721 }
[2725]1722 /* if size < nfields, clear extra field variables */
1723 for (i = size; i < nfields; i++) {
1724 clrvar(Fields + i);
[821]1725 }
1726 nfields = size;
1727}
1728
[1765]1729static int awk_split(const char *s, node *spl, char **slist)
[821]1730{
[2725]1731 int l, n;
[821]1732 char c[4];
1733 char *s1;
[1765]1734 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
[821]1735
1736 /* in worst case, each char would be a separate field */
[1765]1737 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1738 strcpy(s1, s);
[821]1739
1740 c[0] = c[1] = (char)spl->info;
1741 c[2] = c[3] = '\0';
[1765]1742 if (*getvar_s(intvar[RS]) == '\0')
1743 c[2] = '\n';
[821]1744
[2725]1745 n = 0;
[1765]1746 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1747 if (!*s)
1748 return n; /* "": zero fields */
1749 n++; /* at least one field will be there */
1750 do {
1751 l = strcspn(s, c+2); /* len till next NUL or \n */
1752 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1753 && pmatch[0].rm_so <= l
1754 ) {
[821]1755 l = pmatch[0].rm_so;
[1765]1756 if (pmatch[0].rm_eo == 0) {
1757 l++;
1758 pmatch[0].rm_eo++;
1759 }
1760 n++; /* we saw yet another delimiter */
[821]1761 } else {
1762 pmatch[0].rm_eo = l;
[2725]1763 if (s[l])
1764 pmatch[0].rm_eo++;
[821]1765 }
1766 memcpy(s1, s, l);
[2725]1767 /* make sure we remove *all* of the separator chars */
1768 do {
1769 s1[l] = '\0';
1770 } while (++l < pmatch[0].rm_eo);
[821]1771 nextword(&s1);
1772 s += pmatch[0].rm_eo;
[1765]1773 } while (*s);
1774 return n;
1775 }
1776 if (c[0] == '\0') { /* null split */
1777 while (*s) {
1778 *s1++ = *s++;
1779 *s1++ = '\0';
[821]1780 n++;
1781 }
[1765]1782 return n;
1783 }
1784 if (c[0] != ' ') { /* single-character split */
[821]1785 if (icase) {
1786 c[0] = toupper(c[0]);
1787 c[1] = tolower(c[1]);
1788 }
[2725]1789 if (*s1)
1790 n++;
1791 while ((s1 = strpbrk(s1, c)) != NULL) {
[1765]1792 *s1++ = '\0';
[821]1793 n++;
1794 }
[1765]1795 return n;
[821]1796 }
[1765]1797 /* space split */
1798 while (*s) {
1799 s = skip_whitespace(s);
[2725]1800 if (!*s)
1801 break;
[1765]1802 n++;
1803 while (*s && !isspace(*s))
1804 *s1++ = *s++;
1805 *s1++ = '\0';
1806 }
[821]1807 return n;
1808}
1809
1810static void split_f0(void)
1811{
[1765]1812/* static char *fstrings; */
1813#define fstrings (G.split_f0__fstrings)
1814
[821]1815 int i, n;
1816 char *s;
1817
1818 if (is_f0_split)
1819 return;
1820
1821 is_f0_split = TRUE;
1822 free(fstrings);
1823 fsrealloc(0);
[1765]1824 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
[821]1825 fsrealloc(n);
1826 s = fstrings;
[1765]1827 for (i = 0; i < n; i++) {
[821]1828 Fields[i].string = nextword(&s);
1829 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1830 }
1831
1832 /* set NF manually to avoid side effects */
[1765]1833 clrvar(intvar[NF]);
1834 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1835 intvar[NF]->number = nfields;
1836#undef fstrings
[821]1837}
1838
1839/* perform additional actions when some internal variables changed */
1840static void handle_special(var *v)
1841{
1842 int n;
[1765]1843 char *b;
1844 const char *sep, *s;
[821]1845 int sl, l, len, i, bsize;
1846
[1765]1847 if (!(v->type & VF_SPECIAL))
[821]1848 return;
1849
[1765]1850 if (v == intvar[NF]) {
[821]1851 n = (int)getvar_i(v);
1852 fsrealloc(n);
1853
1854 /* recalculate $0 */
[1765]1855 sep = getvar_s(intvar[OFS]);
[821]1856 sl = strlen(sep);
1857 b = NULL;
1858 len = 0;
[1765]1859 for (i = 0; i < n; i++) {
[821]1860 s = getvar_s(&Fields[i]);
1861 l = strlen(s);
1862 if (b) {
1863 memcpy(b+len, sep, sl);
1864 len += sl;
1865 }
[2725]1866 b = qrealloc(b, len+l+sl, &bsize);
[821]1867 memcpy(b+len, s, l);
1868 len += l;
1869 }
[1765]1870 if (b)
1871 b[len] = '\0';
1872 setvar_p(intvar[F0], b);
[821]1873 is_f0_split = TRUE;
1874
[1765]1875 } else if (v == intvar[F0]) {
[821]1876 is_f0_split = FALSE;
1877
[1765]1878 } else if (v == intvar[FS]) {
[3232]1879 /*
1880 * The POSIX-2008 standard says that changing FS should have no effect on the
1881 * current input line, but only on the next one. The language is:
1882 *
1883 * > Before the first reference to a field in the record is evaluated, the record
1884 * > shall be split into fields, according to the rules in Regular Expressions,
1885 * > using the value of FS that was current at the time the record was read.
1886 *
1887 * So, split up current line before assignment to FS:
1888 */
1889 split_f0();
1890
[821]1891 mk_splitter(getvar_s(v), &fsplitter);
[1765]1892 } else if (v == intvar[RS]) {
[821]1893 mk_splitter(getvar_s(v), &rsplitter);
[1765]1894 } else if (v == intvar[IGNORECASE]) {
[821]1895 icase = istrue(v);
[1765]1896 } else { /* $n */
1897 n = getvar_i(intvar[NF]);
1898 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
[821]1899 /* right here v is invalid. Just to note... */
1900 }
1901}
1902
1903/* step through func/builtin/etc arguments */
1904static node *nextarg(node **pn)
1905{
1906 node *n;
1907
1908 n = *pn;
1909 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1910 *pn = n->r.n;
1911 n = n->l.n;
1912 } else {
1913 *pn = NULL;
1914 }
1915 return n;
1916}
1917
1918static void hashwalk_init(var *v, xhash *array)
1919{
1920 hash_item *hi;
[2725]1921 unsigned i;
1922 walker_list *w;
1923 walker_list *prev_walker;
[821]1924
[2725]1925 if (v->type & VF_WALK) {
1926 prev_walker = v->x.walker;
1927 } else {
1928 v->type |= VF_WALK;
1929 prev_walker = NULL;
1930 }
1931 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
[821]1932
[2725]1933 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1934 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1935 w->cur = w->end = w->wbuf;
1936 w->prev = prev_walker;
[1765]1937 for (i = 0; i < array->csize; i++) {
[821]1938 hi = array->items[i];
[1765]1939 while (hi) {
[2725]1940 strcpy(w->end, hi->name);
1941 nextword(&w->end);
[821]1942 hi = hi->next;
1943 }
1944 }
1945}
1946
1947static int hashwalk_next(var *v)
1948{
[2725]1949 walker_list *w = v->x.walker;
[821]1950
[2725]1951 if (w->cur >= w->end) {
1952 walker_list *prev_walker = w->prev;
1953
1954 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1955 free(w);
1956 v->x.walker = prev_walker;
[821]1957 return FALSE;
[2725]1958 }
[821]1959
[2725]1960 setvar_s(v, nextword(&w->cur));
[821]1961 return TRUE;
1962}
1963
1964/* evaluate node, return 1 when result is true, 0 otherwise */
1965static int ptest(node *pattern)
1966{
[1765]1967 /* ptest__v is "static": to save stack space? */
1968 return istrue(evaluate(pattern, &G.ptest__v));
[821]1969}
1970
1971/* read next record from stream rsm into a variable v */
1972static int awk_getline(rstream *rsm, var *v)
1973{
1974 char *b;
1975 regmatch_t pmatch[2];
[2725]1976 int size, a, p, pp = 0;
[821]1977 int fd, so, eo, r, rp;
1978 char c, *m, *s;
1979
[2725]1980 debug_printf_eval("entered %s()\n", __func__);
1981
[821]1982 /* we're using our own buffer since we need access to accumulating
1983 * characters
1984 */
1985 fd = fileno(rsm->F);
1986 m = rsm->buffer;
1987 a = rsm->adv;
1988 p = rsm->pos;
1989 size = rsm->size;
1990 c = (char) rsplitter.n.info;
1991 rp = 0;
1992
[2725]1993 if (!m)
1994 m = qrealloc(m, 256, &size);
1995
[821]1996 do {
1997 b = m + a;
1998 so = eo = p;
1999 r = 1;
2000 if (p > 0) {
2001 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
2002 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
[1765]2003 b, 1, pmatch, 0) == 0) {
[821]2004 so = pmatch[0].rm_so;
2005 eo = pmatch[0].rm_eo;
2006 if (b[eo] != '\0')
2007 break;
2008 }
2009 } else if (c != '\0') {
2010 s = strchr(b+pp, c);
[2725]2011 if (!s)
2012 s = memchr(b+pp, '\0', p - pp);
[821]2013 if (s) {
2014 so = eo = s-b;
2015 eo++;
2016 break;
2017 }
2018 } else {
2019 while (b[rp] == '\n')
2020 rp++;
2021 s = strstr(b+rp, "\n\n");
2022 if (s) {
2023 so = eo = s-b;
[2725]2024 while (b[eo] == '\n')
2025 eo++;
[821]2026 if (b[eo] != '\0')
2027 break;
2028 }
2029 }
2030 }
2031
2032 if (a > 0) {
[2725]2033 memmove(m, m+a, p+1);
[821]2034 b = m;
2035 a = 0;
2036 }
2037
[2725]2038 m = qrealloc(m, a+p+128, &size);
[821]2039 b = m + a;
2040 pp = p;
2041 p += safe_read(fd, b+p, size-p-1);
2042 if (p < pp) {
2043 p = 0;
2044 r = 0;
[1765]2045 setvar_i(intvar[ERRNO], errno);
[821]2046 }
2047 b[p] = '\0';
2048
2049 } while (p > pp);
2050
2051 if (p == 0) {
2052 r--;
2053 } else {
2054 c = b[so]; b[so] = '\0';
2055 setvar_s(v, b+rp);
2056 v->type |= VF_USER;
2057 b[so] = c;
2058 c = b[eo]; b[eo] = '\0';
[1765]2059 setvar_s(intvar[RT], b+so);
[821]2060 b[eo] = c;
2061 }
2062
2063 rsm->buffer = m;
2064 rsm->adv = a + eo;
2065 rsm->pos = p - eo;
2066 rsm->size = size;
2067
[2725]2068 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2069
[821]2070 return r;
2071}
2072
2073static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2074{
[1765]2075 int r = 0;
[821]2076 char c;
[1765]2077 const char *s = format;
[821]2078
[3621]2079 if (int_as_int && n == (long long)n) {
2080 r = snprintf(b, size, "%lld", (long long)n);
[821]2081 } else {
[1765]2082 do { c = *s; } while (c && *++s);
[821]2083 if (strchr("diouxX", c)) {
2084 r = snprintf(b, size, format, (int)n);
2085 } else if (strchr("eEfgG", c)) {
2086 r = snprintf(b, size, format, n);
2087 } else {
[1765]2088 syntax_error(EMSG_INV_FMT);
[821]2089 }
2090 }
2091 return r;
2092}
2093
2094/* formatted output into an allocated buffer, return ptr to buffer */
2095static char *awk_printf(node *n)
2096{
2097 char *b = NULL;
[1765]2098 char *fmt, *s, *f;
2099 const char *s1;
[821]2100 int i, j, incr, bsize;
2101 char c, c1;
2102 var *v, *arg;
2103
2104 v = nvalloc(1);
[1765]2105 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
[821]2106
2107 i = 0;
2108 while (*f) {
2109 s = f;
[2725]2110 while (*f && (*f != '%' || *++f == '%'))
[821]2111 f++;
[1765]2112 while (*f && !isalpha(*f)) {
2113 if (*f == '*')
2114 syntax_error("%*x formats are not supported");
[821]2115 f++;
[1765]2116 }
[821]2117
2118 incr = (f - s) + MAXVARFMT;
[2725]2119 b = qrealloc(b, incr + i, &bsize);
[1765]2120 c = *f;
[2725]2121 if (c != '\0')
2122 f++;
[1765]2123 c1 = *f;
2124 *f = '\0';
[821]2125 arg = evaluate(nextarg(&n), v);
2126
2127 j = i;
2128 if (c == 'c' || !c) {
[1765]2129 i += sprintf(b+i, s, is_numeric(arg) ?
2130 (char)getvar_i(arg) : *getvar_s(arg));
[821]2131 } else if (c == 's') {
[1765]2132 s1 = getvar_s(arg);
[2725]2133 b = qrealloc(b, incr+i+strlen(s1), &bsize);
[821]2134 i += sprintf(b+i, s, s1);
2135 } else {
2136 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2137 }
2138 *f = c1;
2139
2140 /* if there was an error while sprintf, return value is negative */
[2725]2141 if (i < j)
2142 i = j;
[821]2143 }
2144
2145 free(fmt);
2146 nvfree(v);
[2725]2147 b = xrealloc(b, i + 1);
[821]2148 b[i] = '\0';
2149 return b;
2150}
2151
[2725]2152/* Common substitution routine.
2153 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2154 * store result into (dest), return number of substitutions.
2155 * If nm = 0, replace all matches.
2156 * If src or dst is NULL, use $0.
2157 * If subexp != 0, enable subexpression matching (\1-\9).
[821]2158 */
[2725]2159static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
[821]2160{
[2725]2161 char *resbuf;
[1765]2162 const char *sp;
[2725]2163 int match_no, residx, replen, resbufsize;
2164 int regexec_flags;
[821]2165 regmatch_t pmatch[10];
[2725]2166 regex_t sreg, *regex;
[821]2167
[2725]2168 resbuf = NULL;
2169 residx = 0;
2170 match_no = 0;
2171 regexec_flags = 0;
2172 regex = as_regex(rn, &sreg);
2173 sp = getvar_s(src ? src : intvar[F0]);
2174 replen = strlen(repl);
2175 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2176 int so = pmatch[0].rm_so;
2177 int eo = pmatch[0].rm_eo;
[821]2178
[2725]2179 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2180 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2181 memcpy(resbuf + residx, sp, eo);
2182 residx += eo;
2183 if (++match_no >= nm) {
2184 const char *s;
2185 int nbs;
[821]2186
2187 /* replace */
[2725]2188 residx -= (eo - so);
[821]2189 nbs = 0;
2190 for (s = repl; *s; s++) {
[2725]2191 char c = resbuf[residx++] = *s;
[821]2192 if (c == '\\') {
2193 nbs++;
2194 continue;
2195 }
[2725]2196 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2197 int j;
2198 residx -= ((nbs + 3) >> 1);
[821]2199 j = 0;
2200 if (c != '&') {
2201 j = c - '0';
2202 nbs++;
2203 }
2204 if (nbs % 2) {
[2725]2205 resbuf[residx++] = c;
[821]2206 } else {
[2725]2207 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2208 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2209 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2210 residx += n;
[821]2211 }
2212 }
2213 nbs = 0;
2214 }
2215 }
2216
[2725]2217 regexec_flags = REG_NOTBOL;
[821]2218 sp += eo;
[2725]2219 if (match_no == nm)
2220 break;
[821]2221 if (eo == so) {
[2725]2222 /* Empty match (e.g. "b*" will match anywhere).
2223 * Advance by one char. */
2224//BUG (bug 1333):
2225//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2226//... and will erroneously match "b" even though it is NOT at the word start.
2227//we need REG_NOTBOW but it does not exist...
2228//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2229//it should be able to do it correctly.
2230 /* Subtle: this is safe only because
2231 * qrealloc allocated at least one extra byte */
2232 resbuf[residx] = *sp;
2233 if (*sp == '\0')
2234 goto ret;
2235 sp++;
2236 residx++;
[821]2237 }
2238 }
2239
[2725]2240 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2241 strcpy(resbuf + residx, sp);
2242 ret:
2243 //bb_error_msg("end sp:'%s'%p", sp,sp);
2244 setvar_p(dest ? dest : intvar[F0], resbuf);
2245 if (regex == &sreg)
2246 regfree(regex);
2247 return match_no;
[821]2248}
2249
[2725]2250static NOINLINE int do_mktime(const char *ds)
[821]2251{
[2725]2252 struct tm then;
2253 int count;
2254
2255 /*memset(&then, 0, sizeof(then)); - not needed */
2256 then.tm_isdst = -1; /* default is unknown */
2257
2258 /* manpage of mktime says these fields are ints,
2259 * so we can sscanf stuff directly into them */
2260 count = sscanf(ds, "%u %u %u %u %u %u %d",
2261 &then.tm_year, &then.tm_mon, &then.tm_mday,
2262 &then.tm_hour, &then.tm_min, &then.tm_sec,
2263 &then.tm_isdst);
2264
2265 if (count < 6
2266 || (unsigned)then.tm_mon < 1
2267 || (unsigned)then.tm_year < 1900
2268 ) {
2269 return -1;
2270 }
2271
2272 then.tm_mon -= 1;
2273 then.tm_year -= 1900;
2274
2275 return mktime(&then);
2276}
2277
2278static NOINLINE var *exec_builtin(node *op, var *res)
2279{
[1765]2280#define tspl (G.exec_builtin__tspl)
2281
[821]2282 var *tv;
2283 node *an[4];
[1765]2284 var *av[4];
2285 const char *as[4];
[821]2286 regmatch_t pmatch[2];
2287 regex_t sreg, *re;
2288 node *spl;
2289 uint32_t isr, info;
2290 int nargs;
2291 time_t tt;
2292 int i, l, ll, n;
2293
2294 tv = nvalloc(4);
2295 isr = info = op->info;
2296 op = op->l.n;
2297
2298 av[2] = av[3] = NULL;
[1765]2299 for (i = 0; i < 4 && op; i++) {
[821]2300 an[i] = nextarg(&op);
[2725]2301 if (isr & 0x09000000)
2302 av[i] = evaluate(an[i], &tv[i]);
2303 if (isr & 0x08000000)
2304 as[i] = getvar_s(av[i]);
[821]2305 isr >>= 1;
2306 }
2307
2308 nargs = i;
[2725]2309 if ((uint32_t)nargs < (info >> 30))
[1765]2310 syntax_error(EMSG_TOO_FEW_ARGS);
[821]2311
[2725]2312 info &= OPNMASK;
2313 switch (info) {
[821]2314
[1765]2315 case B_a2:
[2725]2316 if (ENABLE_FEATURE_AWK_LIBM)
2317 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2318 else
2319 syntax_error(EMSG_NO_MATH);
[821]2320 break;
2321
[2725]2322 case B_sp: {
2323 char *s, *s1;
2324
[821]2325 if (nargs > 2) {
2326 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2327 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2328 } else {
2329 spl = &fsplitter.n;
2330 }
2331
2332 n = awk_split(as[0], spl, &s);
2333 s1 = s;
2334 clear_array(iamarray(av[1]));
[2725]2335 for (i = 1; i <= n; i++)
2336 setari_u(av[1], i, nextword(&s));
2337 free(s1);
[821]2338 setvar_i(res, n);
2339 break;
[2725]2340 }
[821]2341
[2725]2342 case B_ss: {
2343 char *s;
2344
[821]2345 l = strlen(as[0]);
2346 i = getvar_i(av[1]) - 1;
[2725]2347 if (i > l)
2348 i = l;
2349 if (i < 0)
2350 i = 0;
[821]2351 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
[2725]2352 if (n < 0)
2353 n = 0;
2354 s = xstrndup(as[0]+i, n);
[821]2355 setvar_p(res, s);
2356 break;
[2725]2357 }
[821]2358
[2725]2359 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2360 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
[1765]2361 case B_an:
[2725]2362 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
[1765]2363 break;
2364
2365 case B_co:
[2725]2366 setvar_i(res, ~getvar_i_int(av[0]));
[1765]2367 break;
2368
2369 case B_ls:
[2725]2370 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
[1765]2371 break;
2372
2373 case B_or:
[2725]2374 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
[1765]2375 break;
2376
2377 case B_rs:
[2725]2378 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
[1765]2379 break;
2380
2381 case B_xo:
[2725]2382 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
[1765]2383 break;
2384
2385 case B_lo:
[2725]2386 case B_up: {
2387 char *s, *s1;
[1765]2388 s1 = s = xstrdup(as[0]);
[821]2389 while (*s1) {
[2725]2390 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2391 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2392 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
[821]2393 s1++;
2394 }
2395 setvar_p(res, s);
2396 break;
[2725]2397 }
[821]2398
[1765]2399 case B_ix:
[821]2400 n = 0;
2401 ll = strlen(as[1]);
2402 l = strlen(as[0]) - ll;
2403 if (ll > 0 && l >= 0) {
[1765]2404 if (!icase) {
[2725]2405 char *s = strstr(as[0], as[1]);
2406 if (s)
2407 n = (s - as[0]) + 1;
[821]2408 } else {
2409 /* this piece of code is terribly slow and
2410 * really should be rewritten
2411 */
[2725]2412 for (i = 0; i <= l; i++) {
[821]2413 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2414 n = i+1;
2415 break;
2416 }
2417 }
2418 }
2419 }
2420 setvar_i(res, n);
2421 break;
2422
[1765]2423 case B_ti:
[821]2424 if (nargs > 1)
2425 tt = getvar_i(av[1]);
2426 else
2427 time(&tt);
[1765]2428 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2429 i = strftime(g_buf, MAXVARFMT,
2430 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2431 localtime(&tt));
2432 g_buf[i] = '\0';
2433 setvar_s(res, g_buf);
[821]2434 break;
2435
[2725]2436 case B_mt:
2437 setvar_i(res, do_mktime(as[0]));
2438 break;
2439
[1765]2440 case B_ma:
[821]2441 re = as_regex(an[1], &sreg);
2442 n = regexec(re, as[0], 1, pmatch, 0);
2443 if (n == 0) {
2444 pmatch[0].rm_so++;
2445 pmatch[0].rm_eo++;
2446 } else {
2447 pmatch[0].rm_so = 0;
2448 pmatch[0].rm_eo = -1;
2449 }
2450 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2451 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2452 setvar_i(res, pmatch[0].rm_so);
[2725]2453 if (re == &sreg)
2454 regfree(re);
[821]2455 break;
2456
[1765]2457 case B_ge:
[821]2458 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2459 break;
2460
[1765]2461 case B_gs:
[821]2462 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2463 break;
2464
[1765]2465 case B_su:
[821]2466 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2467 break;
2468 }
2469
2470 nvfree(tv);
2471 return res;
[1765]2472#undef tspl
[821]2473}
2474
2475/*
2476 * Evaluate node - the heart of the program. Supplied with subtree
2477 * and place where to store result. returns ptr to result.
2478 */
2479#define XC(n) ((n) >> 8)
2480
2481static var *evaluate(node *op, var *res)
2482{
[1765]2483/* This procedure is recursive so we should count every byte */
2484#define fnargs (G.evaluate__fnargs)
2485/* seed is initialized to 1 */
2486#define seed (G.evaluate__seed)
[2725]2487#define sreg (G.evaluate__sreg)
[1765]2488
[821]2489 var *v1;
2490
[1765]2491 if (!op)
[821]2492 return setvar_s(res, NULL);
2493
[2725]2494 debug_printf_eval("entered %s()\n", __func__);
2495
[821]2496 v1 = nvalloc(2);
2497
2498 while (op) {
[2725]2499 struct {
2500 var *v;
2501 const char *s;
2502 } L = L; /* for compiler */
2503 struct {
2504 var *v;
2505 const char *s;
2506 } R = R;
2507 double L_d = L_d;
2508 uint32_t opinfo;
2509 int opn;
2510 node *op1;
2511
[821]2512 opinfo = op->info;
[1765]2513 opn = (opinfo & OPNMASK);
2514 g_lineno = op->lineno;
[2725]2515 op1 = op->l.n;
2516 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
[821]2517
2518 /* execute inevitable things */
[2725]2519 if (opinfo & OF_RES1)
2520 L.v = evaluate(op1, v1);
2521 if (opinfo & OF_RES2)
2522 R.v = evaluate(op->r.n, v1+1);
2523 if (opinfo & OF_STR1) {
2524 L.s = getvar_s(L.v);
2525 debug_printf_eval("L.s:'%s'\n", L.s);
2526 }
2527 if (opinfo & OF_STR2) {
2528 R.s = getvar_s(R.v);
2529 debug_printf_eval("R.s:'%s'\n", R.s);
2530 }
2531 if (opinfo & OF_NUM1) {
2532 L_d = getvar_i(L.v);
2533 debug_printf_eval("L_d:%f\n", L_d);
2534 }
[821]2535
[2725]2536 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
[821]2537 switch (XC(opinfo & OPCLSMASK)) {
2538
[1765]2539 /* -- iterative node type -- */
[821]2540
[1765]2541 /* test pattern */
2542 case XC( OC_TEST ):
[821]2543 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2544 /* it's range pattern */
2545 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2546 op->info |= OF_CHECKED;
2547 if (ptest(op1->r.n))
2548 op->info &= ~OF_CHECKED;
2549 op = op->a.n;
2550 } else {
2551 op = op->r.n;
2552 }
2553 } else {
[2725]2554 op = ptest(op1) ? op->a.n : op->r.n;
[821]2555 }
2556 break;
2557
[1765]2558 /* just evaluate an expression, also used as unconditional jump */
2559 case XC( OC_EXEC ):
[821]2560 break;
2561
[1765]2562 /* branch, used in if-else and various loops */
2563 case XC( OC_BR ):
[821]2564 op = istrue(L.v) ? op->a.n : op->r.n;
2565 break;
2566
[1765]2567 /* initialize for-in loop */
2568 case XC( OC_WALKINIT ):
[821]2569 hashwalk_init(L.v, iamarray(R.v));
2570 break;
2571
[1765]2572 /* get next array item */
2573 case XC( OC_WALKNEXT ):
[821]2574 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2575 break;
2576
[1765]2577 case XC( OC_PRINT ):
[2725]2578 case XC( OC_PRINTF ): {
2579 FILE *F = stdout;
2580
[821]2581 if (op->r.n) {
[2725]2582 rstream *rsm = newfile(R.s);
2583 if (!rsm->F) {
[821]2584 if (opn == '|') {
[2725]2585 rsm->F = popen(R.s, "w");
2586 if (rsm->F == NULL)
[821]2587 bb_perror_msg_and_die("popen");
[2725]2588 rsm->is_pipe = 1;
[821]2589 } else {
[2725]2590 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
[821]2591 }
2592 }
[2725]2593 F = rsm->F;
[821]2594 }
2595
2596 if ((opinfo & OPCLSMASK) == OC_PRINT) {
[1765]2597 if (!op1) {
[2725]2598 fputs(getvar_s(intvar[F0]), F);
[821]2599 } else {
2600 while (op1) {
[2725]2601 var *v = evaluate(nextarg(&op1), v1);
2602 if (v->type & VF_NUMBER) {
[1765]2603 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
[2725]2604 getvar_i(v), TRUE);
2605 fputs(g_buf, F);
[821]2606 } else {
[2725]2607 fputs(getvar_s(v), F);
[821]2608 }
2609
[2725]2610 if (op1)
2611 fputs(getvar_s(intvar[OFS]), F);
[821]2612 }
2613 }
[2725]2614 fputs(getvar_s(intvar[ORS]), F);
[821]2615
2616 } else { /* OC_PRINTF */
[2725]2617 char *s = awk_printf(op1);
2618 fputs(s, F);
2619 free(s);
[821]2620 }
[2725]2621 fflush(F);
[821]2622 break;
[2725]2623 }
[821]2624
[2725]2625 case XC( OC_DELETE ): {
2626 uint32_t info = op1->info & OPCLSMASK;
2627 var *v;
2628
2629 if (info == OC_VAR) {
2630 v = op1->l.v;
2631 } else if (info == OC_FNARG) {
2632 v = &fnargs[op1->l.aidx];
[821]2633 } else {
[1765]2634 syntax_error(EMSG_NOT_ARRAY);
[821]2635 }
2636
2637 if (op1->r.n) {
[2725]2638 const char *s;
[821]2639 clrvar(L.v);
[2725]2640 s = getvar_s(evaluate(op1->r.n, v1));
2641 hash_remove(iamarray(v), s);
[821]2642 } else {
[2725]2643 clear_array(iamarray(v));
[821]2644 }
2645 break;
[2725]2646 }
[821]2647
[1765]2648 case XC( OC_NEWSOURCE ):
[2725]2649 g_progname = op->l.new_progname;
[821]2650 break;
2651
[1765]2652 case XC( OC_RETURN ):
[821]2653 copyvar(res, L.v);
2654 break;
2655
[1765]2656 case XC( OC_NEXTFILE ):
[821]2657 nextfile = TRUE;
[1765]2658 case XC( OC_NEXT ):
[821]2659 nextrec = TRUE;
[1765]2660 case XC( OC_DONE ):
[821]2661 clrvar(res);
2662 break;
2663
[1765]2664 case XC( OC_EXIT ):
[2725]2665 awk_exit(L_d);
[821]2666
[1765]2667 /* -- recursive node type -- */
[821]2668
[1765]2669 case XC( OC_VAR ):
[821]2670 L.v = op->l.v;
[1765]2671 if (L.v == intvar[NF])
[821]2672 split_f0();
2673 goto v_cont;
2674
[1765]2675 case XC( OC_FNARG ):
[2725]2676 L.v = &fnargs[op->l.aidx];
[1765]2677 v_cont:
2678 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
[821]2679 break;
2680
[1765]2681 case XC( OC_IN ):
[821]2682 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2683 break;
2684
[1765]2685 case XC( OC_REGEXP ):
[821]2686 op1 = op;
[1765]2687 L.s = getvar_s(intvar[F0]);
[821]2688 goto re_cont;
2689
[1765]2690 case XC( OC_MATCH ):
[821]2691 op1 = op->r.n;
[1765]2692 re_cont:
[2725]2693 {
2694 regex_t *re = as_regex(op1, &sreg);
2695 int i = regexec(re, L.s, 0, NULL, 0);
2696 if (re == &sreg)
2697 regfree(re);
2698 setvar_i(res, (i == 0) ^ (opn == '!'));
2699 }
[821]2700 break;
2701
[1765]2702 case XC( OC_MOVE ):
[2725]2703 debug_printf_eval("MOVE\n");
[821]2704 /* if source is a temporary string, jusk relink it to dest */
[2725]2705//Disabled: if R.v is numeric but happens to have cached R.v->string,
2706//then L.v ends up being a string, which is wrong
2707// if (R.v == v1+1 && R.v->string) {
2708// res = setvar_p(L.v, R.v->string);
2709// R.v->string = NULL;
2710// } else {
[821]2711 res = copyvar(L.v, R.v);
[2725]2712// }
[821]2713 break;
2714
[1765]2715 case XC( OC_TERNARY ):
[821]2716 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
[1765]2717 syntax_error(EMSG_POSSIBLE_ERROR);
[821]2718 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2719 break;
2720
[2725]2721 case XC( OC_FUNC ): {
2722 var *vbeg, *v;
2723 const char *sv_progname;
2724
[3621]2725 /* The body might be empty, still has to eval the args */
2726 if (!op->r.n->info && !op->r.f->body.first)
[1765]2727 syntax_error(EMSG_UNDEF_FUNC);
[821]2728
[2725]2729 vbeg = v = nvalloc(op->r.f->nargs + 1);
[821]2730 while (op1) {
[2725]2731 var *arg = evaluate(nextarg(&op1), v1);
2732 copyvar(v, arg);
2733 v->type |= VF_CHILD;
2734 v->x.parent = arg;
2735 if (++v - vbeg >= op->r.f->nargs)
[821]2736 break;
2737 }
2738
[2725]2739 v = fnargs;
2740 fnargs = vbeg;
2741 sv_progname = g_progname;
[821]2742
2743 res = evaluate(op->r.f->body.first, res);
2744
[2725]2745 g_progname = sv_progname;
[821]2746 nvfree(fnargs);
[2725]2747 fnargs = v;
2748
[821]2749 break;
[2725]2750 }
[821]2751
[1765]2752 case XC( OC_GETLINE ):
[2725]2753 case XC( OC_PGETLINE ): {
2754 rstream *rsm;
2755 int i;
2756
[821]2757 if (op1) {
[2725]2758 rsm = newfile(L.s);
2759 if (!rsm->F) {
[821]2760 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
[2725]2761 rsm->F = popen(L.s, "r");
2762 rsm->is_pipe = TRUE;
[821]2763 } else {
[2725]2764 rsm->F = fopen_for_read(L.s); /* not xfopen! */
[821]2765 }
2766 }
2767 } else {
[2725]2768 if (!iF)
2769 iF = next_input_file();
2770 rsm = iF;
[821]2771 }
2772
[3232]2773 if (!rsm || !rsm->F) {
[1765]2774 setvar_i(intvar[ERRNO], errno);
[821]2775 setvar_i(res, -1);
2776 break;
2777 }
2778
[1765]2779 if (!op->r.n)
2780 R.v = intvar[F0];
[821]2781
[2725]2782 i = awk_getline(rsm, R.v);
2783 if (i > 0 && !op1) {
2784 incvar(intvar[FNR]);
2785 incvar(intvar[NR]);
[821]2786 }
[2725]2787 setvar_i(res, i);
[821]2788 break;
[2725]2789 }
[821]2790
[1765]2791 /* simple builtins */
[2725]2792 case XC( OC_FBLTIN ): {
2793 double R_d = R_d; /* for compiler */
2794
[821]2795 switch (opn) {
[1765]2796 case F_in:
[3621]2797 R_d = (long long)L_d;
[821]2798 break;
2799
[1765]2800 case F_rn:
[2725]2801 R_d = (double)rand() / (double)RAND_MAX;
[821]2802 break;
[2725]2803
[1765]2804 case F_co:
[2725]2805 if (ENABLE_FEATURE_AWK_LIBM) {
2806 R_d = cos(L_d);
2807 break;
2808 }
[821]2809
[1765]2810 case F_ex:
[2725]2811 if (ENABLE_FEATURE_AWK_LIBM) {
2812 R_d = exp(L_d);
2813 break;
2814 }
[821]2815
[1765]2816 case F_lg:
[2725]2817 if (ENABLE_FEATURE_AWK_LIBM) {
2818 R_d = log(L_d);
2819 break;
2820 }
[821]2821
[1765]2822 case F_si:
[2725]2823 if (ENABLE_FEATURE_AWK_LIBM) {
2824 R_d = sin(L_d);
2825 break;
2826 }
[821]2827
[1765]2828 case F_sq:
[2725]2829 if (ENABLE_FEATURE_AWK_LIBM) {
2830 R_d = sqrt(L_d);
2831 break;
2832 }
2833
[1765]2834 syntax_error(EMSG_NO_MATH);
[821]2835 break;
[2725]2836
[1765]2837 case F_sr:
[2725]2838 R_d = (double)seed;
2839 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
[821]2840 srand(seed);
2841 break;
2842
[1765]2843 case F_ti:
[2725]2844 R_d = time(NULL);
[821]2845 break;
2846
[1765]2847 case F_le:
[3621]2848 debug_printf_eval("length: L.s:'%s'\n", L.s);
2849 if (!op1) {
[1765]2850 L.s = getvar_s(intvar[F0]);
[3621]2851 debug_printf_eval("length: L.s='%s'\n", L.s);
2852 }
2853 else if (L.v->type & VF_ARRAY) {
2854 R_d = L.v->x.array->nel;
2855 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2856 break;
2857 }
[2725]2858 R_d = strlen(L.s);
[821]2859 break;
2860
[1765]2861 case F_sy:
[2725]2862 fflush_all();
2863 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
[1765]2864 ? (system(L.s) >> 8) : 0;
[821]2865 break;
2866
[1765]2867 case F_ff:
[2725]2868 if (!op1) {
[821]2869 fflush(stdout);
[2725]2870 } else if (L.s && *L.s) {
2871 rstream *rsm = newfile(L.s);
2872 fflush(rsm->F);
2873 } else {
2874 fflush_all();
[821]2875 }
2876 break;
2877
[2725]2878 case F_cl: {
2879 rstream *rsm;
2880 int err = 0;
2881 rsm = (rstream *)hash_search(fdhash, L.s);
2882 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2883 if (rsm) {
2884 debug_printf_eval("OC_FBLTIN F_cl "
2885 "rsm->is_pipe:%d, ->F:%p\n",
2886 rsm->is_pipe, rsm->F);
2887 /* Can be NULL if open failed. Example:
2888 * getline line <"doesnt_exist";
2889 * close("doesnt_exist"); <--- here rsm->F is NULL
2890 */
2891 if (rsm->F)
2892 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2893 free(rsm->buffer);
[821]2894 hash_remove(fdhash, L.s);
2895 }
[2725]2896 if (err)
[1765]2897 setvar_i(intvar[ERRNO], errno);
[2725]2898 R_d = (double)err;
[821]2899 break;
2900 }
[2725]2901 } /* switch */
2902 setvar_i(res, R_d);
[821]2903 break;
[2725]2904 }
[821]2905
[1765]2906 case XC( OC_BUILTIN ):
[821]2907 res = exec_builtin(op, res);
2908 break;
2909
[1765]2910 case XC( OC_SPRINTF ):
[821]2911 setvar_p(res, awk_printf(op1));
2912 break;
2913
[2725]2914 case XC( OC_UNARY ): {
2915 double Ld, R_d;
2916
2917 Ld = R_d = getvar_i(R.v);
[821]2918 switch (opn) {
[1765]2919 case 'P':
[2725]2920 Ld = ++R_d;
[821]2921 goto r_op_change;
[1765]2922 case 'p':
[2725]2923 R_d++;
[821]2924 goto r_op_change;
[1765]2925 case 'M':
[2725]2926 Ld = --R_d;
[821]2927 goto r_op_change;
[1765]2928 case 'm':
[2725]2929 R_d--;
2930 r_op_change:
2931 setvar_i(R.v, R_d);
2932 break;
[1765]2933 case '!':
[2725]2934 Ld = !istrue(R.v);
[821]2935 break;
[1765]2936 case '-':
[2725]2937 Ld = -R_d;
[821]2938 break;
2939 }
[2725]2940 setvar_i(res, Ld);
[821]2941 break;
[2725]2942 }
[821]2943
[2725]2944 case XC( OC_FIELD ): {
2945 int i = (int)getvar_i(R.v);
2946 if (i == 0) {
[1765]2947 res = intvar[F0];
[821]2948 } else {
2949 split_f0();
[2725]2950 if (i > nfields)
2951 fsrealloc(i);
2952 res = &Fields[i - 1];
[821]2953 }
2954 break;
[2725]2955 }
[821]2956
[1765]2957 /* concatenation (" ") and index joining (",") */
2958 case XC( OC_CONCAT ):
[2725]2959 case XC( OC_COMMA ): {
2960 const char *sep = "";
2961 if ((opinfo & OPCLSMASK) == OC_COMMA)
2962 sep = getvar_s(intvar[SUBSEP]);
2963 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
[821]2964 break;
[2725]2965 }
[821]2966
[1765]2967 case XC( OC_LAND ):
[821]2968 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2969 break;
2970
[1765]2971 case XC( OC_LOR ):
[821]2972 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2973 break;
2974
[1765]2975 case XC( OC_BINARY ):
[2725]2976 case XC( OC_REPLACE ): {
2977 double R_d = getvar_i(R.v);
2978 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
[821]2979 switch (opn) {
[1765]2980 case '+':
[2725]2981 L_d += R_d;
[821]2982 break;
[1765]2983 case '-':
[2725]2984 L_d -= R_d;
[821]2985 break;
[1765]2986 case '*':
[2725]2987 L_d *= R_d;
[821]2988 break;
[1765]2989 case '/':
[2725]2990 if (R_d == 0)
2991 syntax_error(EMSG_DIV_BY_ZERO);
2992 L_d /= R_d;
[821]2993 break;
[1765]2994 case '&':
[2725]2995 if (ENABLE_FEATURE_AWK_LIBM)
2996 L_d = pow(L_d, R_d);
2997 else
2998 syntax_error(EMSG_NO_MATH);
[821]2999 break;
[1765]3000 case '%':
[2725]3001 if (R_d == 0)
3002 syntax_error(EMSG_DIV_BY_ZERO);
[3621]3003 L_d -= (long long)(L_d / R_d) * R_d;
[821]3004 break;
3005 }
[2725]3006 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3007 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
[821]3008 break;
[2725]3009 }
[821]3010
[2725]3011 case XC( OC_COMPARE ): {
3012 int i = i; /* for compiler */
3013 double Ld;
3014
[821]3015 if (is_numeric(L.v) && is_numeric(R.v)) {
[2725]3016 Ld = getvar_i(L.v) - getvar_i(R.v);
[821]3017 } else {
[2725]3018 const char *l = getvar_s(L.v);
3019 const char *r = getvar_s(R.v);
3020 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
[821]3021 }
3022 switch (opn & 0xfe) {
[1765]3023 case 0:
[2725]3024 i = (Ld > 0);
[821]3025 break;
[1765]3026 case 2:
[2725]3027 i = (Ld >= 0);
[821]3028 break;
[1765]3029 case 4:
[2725]3030 i = (Ld == 0);
[821]3031 break;
3032 }
[2725]3033 setvar_i(res, (i == 0) ^ (opn & 1));
[821]3034 break;
[2725]3035 }
[821]3036
[1765]3037 default:
3038 syntax_error(EMSG_POSSIBLE_ERROR);
[821]3039 }
3040 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3041 op = op->a.n;
3042 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3043 break;
3044 if (nextrec)
3045 break;
[2725]3046 } /* while (op) */
3047
[821]3048 nvfree(v1);
[2725]3049 debug_printf_eval("returning from %s(): %p\n", __func__, res);
[821]3050 return res;
[1765]3051#undef fnargs
3052#undef seed
3053#undef sreg
[821]3054}
3055
3056
3057/* -------- main & co. -------- */
3058
3059static int awk_exit(int r)
3060{
[1765]3061 var tv;
3062 unsigned i;
[821]3063 hash_item *hi;
3064
[1765]3065 zero_out_var(&tv);
3066
3067 if (!exiting) {
[821]3068 exiting = TRUE;
3069 nextrec = FALSE;
3070 evaluate(endseq.first, &tv);
3071 }
3072
3073 /* waiting for children */
[1765]3074 for (i = 0; i < fdhash->csize; i++) {
[821]3075 hi = fdhash->items[i];
[1765]3076 while (hi) {
[821]3077 if (hi->data.rs.F && hi->data.rs.is_pipe)
3078 pclose(hi->data.rs.F);
3079 hi = hi->next;
3080 }
3081 }
3082
3083 exit(r);
3084}
3085
3086/* if expr looks like "var=value", perform assignment and return 1,
3087 * otherwise return 0 */
3088static int is_assignment(const char *expr)
3089{
[3232]3090 char *exprc, *val;
[821]3091
[2725]3092 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
[821]3093 return FALSE;
3094 }
3095
[2725]3096 exprc = xstrdup(expr);
3097 val = exprc + (val - expr);
3098 *val++ = '\0';
[821]3099
[3232]3100 unescape_string_in_place(val);
[2725]3101 setvar_u(newvar(exprc), val);
[821]3102 free(exprc);
3103 return TRUE;
3104}
3105
3106/* switch to next input file */
3107static rstream *next_input_file(void)
3108{
[1765]3109#define rsm (G.next_input_file__rsm)
3110#define files_happen (G.next_input_file__files_happen)
3111
[3232]3112 FILE *F;
[1765]3113 const char *fname, *ind;
[821]3114
[2725]3115 if (rsm.F)
3116 fclose(rsm.F);
[821]3117 rsm.F = NULL;
3118 rsm.pos = rsm.adv = 0;
3119
[3232]3120 for (;;) {
[1765]3121 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
[821]3122 if (files_happen)
3123 return NULL;
3124 fname = "-";
3125 F = stdin;
[3232]3126 break;
[821]3127 }
[3232]3128 ind = getvar_s(incvar(intvar[ARGIND]));
3129 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3130 if (fname && *fname && !is_assignment(fname)) {
3131 F = xfopen_stdin(fname);
3132 break;
3133 }
3134 }
[821]3135
3136 files_happen = TRUE;
[1765]3137 setvar_s(intvar[FILENAME], fname);
[821]3138 rsm.F = F;
3139 return &rsm;
[1765]3140#undef rsm
3141#undef files_happen
[821]3142}
3143
[2725]3144int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
[821]3145int awk_main(int argc, char **argv)
3146{
[1765]3147 unsigned opt;
[3232]3148 char *opt_F;
[2725]3149 llist_t *list_v = NULL;
3150 llist_t *list_f = NULL;
[3621]3151#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3152 llist_t *list_e = NULL;
3153#endif
[2725]3154 int i, j;
[821]3155 var *v;
[1765]3156 var tv;
[821]3157 char **envp;
[1765]3158 char *vnames = (char *)vNames; /* cheat */
3159 char *vvalues = (char *)vValues;
[821]3160
[1765]3161 INIT_G();
3162
3163 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3164 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3165 if (ENABLE_LOCALE_SUPPORT)
3166 setlocale(LC_NUMERIC, "C");
3167
3168 zero_out_var(&tv);
3169
[821]3170 /* allocate global buffer */
[1765]3171 g_buf = xmalloc(MAXVARFMT + 1);
[821]3172
3173 vhash = hash_init();
3174 ahash = hash_init();
3175 fdhash = hash_init();
3176 fnhash = hash_init();
3177
3178 /* initialize variables */
[1765]3179 for (i = 0; *vnames; i++) {
3180 intvar[i] = v = newvar(nextword(&vnames));
3181 if (*vvalues != '\377')
3182 setvar_s(v, nextword(&vvalues));
[821]3183 else
3184 setvar_i(v, 0);
3185
[1765]3186 if (*vnames == '*') {
[821]3187 v->type |= VF_SPECIAL;
[1765]3188 vnames++;
[821]3189 }
3190 }
3191
[1765]3192 handle_special(intvar[FS]);
3193 handle_special(intvar[RS]);
[821]3194
[1765]3195 newfile("/dev/stdin")->F = stdin;
3196 newfile("/dev/stdout")->F = stdout;
3197 newfile("/dev/stderr")->F = stderr;
[821]3198
[1765]3199 /* Huh, people report that sometimes environ is NULL. Oh well. */
3200 if (environ) for (envp = environ; *envp; envp++) {
3201 /* environ is writable, thus we don't strdup it needlessly */
3202 char *s = *envp;
3203 char *s1 = strchr(s, '=');
3204 if (s1) {
3205 *s1 = '\0';
3206 /* Both findvar and setvar_u take const char*
3207 * as 2nd arg -> environment is not trashed */
3208 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3209 *s1 = '=';
[821]3210 }
3211 }
[3621]3212 opt_complementary = OPTCOMPLSTR_AWK;
3213 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
[1765]3214 argv += optind;
3215 argc -= optind;
[3621]3216 if (opt & OPT_W)
3217 bb_error_msg("warning: option -W is ignored");
3218 if (opt & OPT_F) {
[3232]3219 unescape_string_in_place(opt_F);
3220 setvar_s(intvar[FS], opt_F);
3221 }
[3621]3222 while (list_v) {
[2725]3223 if (!is_assignment(llist_pop(&list_v)))
[1765]3224 bb_show_usage();
3225 }
[3621]3226 while (list_f) {
3227 char *s = NULL;
3228 FILE *from_file;
[2725]3229
[3621]3230 g_progname = llist_pop(&list_f);
3231 from_file = xfopen_stdin(g_progname);
3232 /* one byte is reserved for some trick in next_token */
3233 for (i = j = 1; j > 0; i += j) {
3234 s = xrealloc(s, i + 4096);
3235 j = fread(s + i, 1, 4094, from_file);
3236 }
3237 s[i] = '\0';
3238 fclose(from_file);
3239 parse_program(s + 1);
3240 free(s);
3241 }
3242 g_progname = "cmd. line";
3243#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3244 while (list_e) {
3245 parse_program(llist_pop(&list_e));
3246 }
3247#endif
3248 if (!(opt & (OPT_f | OPT_e))) {
3249 if (!*argv)
[821]3250 bb_show_usage();
[1765]3251 parse_program(*argv++);
[3621]3252 argc--;
[821]3253 }
3254
3255 /* fill in ARGV array */
[3621]3256 setvar_i(intvar[ARGC], argc + 1);
[1765]3257 setari_u(intvar[ARGV], 0, "awk");
3258 i = 0;
3259 while (*argv)
3260 setari_u(intvar[ARGV], ++i, *argv++);
[821]3261
3262 evaluate(beginseq.first, &tv);
[1765]3263 if (!mainseq.first && !endseq.first)
[821]3264 awk_exit(EXIT_SUCCESS);
3265
3266 /* input file could already be opened in BEGIN block */
[2725]3267 if (!iF)
3268 iF = next_input_file();
[821]3269
3270 /* passing through input files */
3271 while (iF) {
3272 nextfile = FALSE;
[1765]3273 setvar_i(intvar[FNR], 0);
[821]3274
[1765]3275 while ((i = awk_getline(iF, intvar[F0])) > 0) {
[821]3276 nextrec = FALSE;
[1765]3277 incvar(intvar[NR]);
3278 incvar(intvar[FNR]);
[821]3279 evaluate(mainseq.first, &tv);
3280
3281 if (nextfile)
3282 break;
3283 }
3284
[1765]3285 if (i < 0)
3286 syntax_error(strerror(errno));
[821]3287
3288 iF = next_input_file();
3289 }
3290
3291 awk_exit(EXIT_SUCCESS);
[1765]3292 /*return 0;*/
[821]3293}
Note: See TracBrowser for help on using the repository browser.