source: MondoRescue/branches/stable/mindi-busybox/editors/awk.c@ 1770

Last change on this file since 1770 was 1770, checked in by Bruno Cornec, 16 years ago
  • Better output for mindi-busybox revision
  • Remove dummy file created on NFS - report from Arnaud Tiger <arnaud.tiger_at_hp.com>
  • strace useful for debug
  • fix new versions for pb (2.0.0 for mindi and 1.7.2 for mindi-busybox)
  • fix build process for mindi-busybox + options used in that version (dd for label-partitions-as-necessary)
  • fix typo in label-partitions-as-necessary which doesn't seem to work
  • Update to busybox 1.7.2
  • perl is now required at restore time to support uuid swap partitions (and will be used for many other thigs

in the future for sure)

  • next mindi version will be 2.0.0 due to all the changes made in it (udev may break working distros)
  • small optimization in mindi on keyboard handling (one single find instead of multiple)
  • better interaction for USB device when launching mindi manually
  • attempt to automatically guess block disk size for ramdisk
  • fix typos in bkphw
  • Fix the remaining problem with UUID support for swap partitions
  • Updates mondoarchive man page for USB support
  • Adds preliminary Hardware support to mindi (Proliant SSSTK)
  • Tries to add udev support also for rhel4
  • Fix UUID support which was still broken.
  • Be conservative in test for the start-nfs script
  • Update config file for mindi-busybox for 1.7.2 migration
  • Try to run around a busybox bug (1.2.2 pb on inexistant links)
  • Add build content for mindi-busybox in pb
  • Remove distributions content for mindi-busybox
  • Fix a warning on inexistant raidtab
  • Solve problem on tmpfs in restore init (Problem of inexistant symlink and busybox)
  • Create MONDO_CACHE and use it everywhere + creation at start
  • Really never try to eject a USB device
  • Fix a issue with &> usage (replaced with 1> and 2>)
  • Adds magic file to depllist in order to have file working + ldd which helps for debugging issues
  • tty modes correct to avoid sh error messages
  • Use ext3 normally and not ext2 instead
  • USB device should be corrected after reading (take 1st part)
  • Adds a mount_USB_here function derived from mount_CDROM_here
  • usb detection place before /dev detection in device name at restore time
  • Fix when restoring from USB: media is asked in interactive mode
  • Adds USB support for mondorestore
  • mount_cdrom => mount_media
  • elilo.efi is now searched throughout /boot/efi and not in a fixed place as there is no standard
  • untar-and-softlink => untar (+ interface change)
  • suppress useless softlinks creation/removal in boot process
  • avoids udevd messages on groups
  • Increase # of disks to 99 as in mindi at restore time (should be a conf file parameter)
  • skip existing big file creation
  • seems to work correctly for USB mindi boot
  • Adds group and tty link to udev conf
  • Always load usb-torage (even 2.6) to initiate USB bus discovery
  • Better printing of messages
  • Attempt to fix a bug in supporting OpenSusE 10.3 kernel for initramfs (mindi may now use multiple regex for kernel initrd detection)
  • Links were not correctly done as non relative for modules in mindi
  • exclusion of modules denied now works
  • Also create modules in their ordinary place, so that classical modprobe works + copy modules.dep
  • Fix bugs for DENY_MODS handling
  • Add device /dev/console for udev
  • ide-generic should now really be excluded
  • Fix a bug in major number for tty
  • If udev then adds modprobe/insmod to rootfs
  • tty0 is also cretaed with udev
  • ide-generic put rather in DENY_MODS
  • udevd remove from deplist s handled in mindi directly
  • better default for mindi when using --usb
  • Handles dynamically linked busybox (in case we want to use it soon ;-)
  • Adds fixed devices to create for udev
  • ide-generic should not be part of the initrd when using libata v2
  • support a dynamically linked udev (case on Ubuntu 7.10 and Mandriva 2008.0 so should be quite generic) This will give incitation to move to dyn. linked binaries in the initrd which will help for other tasks (ia6 4)
  • Improvement in udev support (do not use cl options not available in busybox)
  • Udev in mindi
    • auto creation of the right links at boot time with udev-links.conf(from Mandriva 2008.0)
    • rework startup of udev as current makes kernel crash (from Mandriva 2008.0)
    • add support for 64 bits udev
  • Try to render MyInsmod silent at boot time
  • Adds udev support (mandatory for newest distributions to avoid remapping of devices in a different way as on the original system)
  • We also need vaft format support for USB boot
  • Adds libusual support (Ubuntu 7.10 needs it for USB)
  • Improve Ubuntu/Debian keyboard detection and support
  • pbinit adapted to new pb (0.8.10). Filtering of docs done in it
  • Suppress some mondo warnings and errors on USB again
  • Tries to fix lack of files in deb mindi package
  • Verify should now work for USB devices
  • More log/mesages improvement for USB support
  • - Supress g_erase_tmpdir_and_scratchdir
  • Improve some log messages for USB support
  • Try to improve install in mindi to avoid issues with isolinux.cfg not installed vene if in the pkg :-(
  • Improve mindi-busybox build
  • In conformity with pb 0.8.9
  • Add support for Ubuntu 7.10 in build process
  • Add USB Key button to Menu UI (CD streamer removed)
  • Attempt to fix error messages on tmp/scratch files at the end by removing those dir at the latest possible.
  • Fix a bug linked to the size of the -E param which could be used (Arnaud Tiger/René Ribaud).
  • Integrate ~/.pbrc content into mondorescue.pb (required project-builder >= 0.8.7)
  • Put mondorescue in conformity with new pb filtering rules
  • Add USB support at restore time (no test done yet). New start-usb script PB varibale added where useful
  • Unmounting USB device before removal of temporary scratchdir
  • Stil refining USB copy back to mondo (one command was not executed)
  • No need to have the image subdor in the csratchdir when USB.
  • umount the USB partition before attempting to use it
  • Remove useless copy from mindi to mondo at end of USB handling

(risky merge, we are raising the limits of 2 diverging branches. The status of stable is not completely sure as such. Will need lots of tests, but it's not yet done :-()
(merge -r1692:1769 $SVN_M/branches/2.2.5)

File size: 62.8 KB
RevLine 
[821]1/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8 */
9
[1770]10#include "libbb.h"
11#include "xregex.h"
[821]12#include <math.h>
[1770]13extern char **environ;
[821]14
[1770]15/* This is a NOEXEC applet. Be very careful! */
[821]16
17
[1770]18#define MAXVARFMT 240
19#define MINNVBLOCK 64
[821]20
21/* variable flags */
[1770]22#define VF_NUMBER 0x0001 /* 1 = primary type is number */
23#define VF_ARRAY 0x0002 /* 1 = it's an array */
[821]24
[1770]25#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
26#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
27#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
28#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
29#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
30#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
31#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
[821]32
33/* these flags are static, don't change them when value is changed */
[1770]34#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
[821]35
36/* Variable */
37typedef struct var_s {
[1770]38 unsigned type; /* flags */
[821]39 double number;
40 char *string;
41 union {
[1770]42 int aidx; /* func arg idx (for compilation stage) */
43 struct xhash_s *array; /* array ptr */
44 struct var_s *parent; /* for func args, ptr to actual parameter */
45 char **walker; /* list of array elements (for..in) */
[821]46 } x;
47} var;
48
49/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
50typedef struct chain_s {
51 struct node_s *first;
52 struct node_s *last;
[1770]53 const char *programname;
[821]54} chain;
55
56/* Function */
57typedef struct func_s {
[1770]58 unsigned nargs;
[821]59 struct chain_s body;
60} func;
61
62/* I/O stream */
63typedef struct rstream_s {
64 FILE *F;
65 char *buffer;
66 int adv;
67 int size;
68 int pos;
[1770]69 smallint is_pipe;
[821]70} rstream;
71
72typedef struct hash_item_s {
73 union {
[1770]74 struct var_s v; /* variable/array hash */
75 struct rstream_s rs; /* redirect streams hash */
76 struct func_s f; /* functions hash */
[821]77 } data;
[1770]78 struct hash_item_s *next; /* next in chain */
79 char name[1]; /* really it's longer */
[821]80} hash_item;
81
82typedef struct xhash_s {
[1770]83 unsigned nel; /* num of elements */
84 unsigned csize; /* current hash size */
85 unsigned nprime; /* next hash size in PRIMES[] */
86 unsigned glen; /* summary length of item names */
[821]87 struct hash_item_s **items;
88} xhash;
89
90/* Tree node */
91typedef struct node_s {
92 uint32_t info;
[1770]93 unsigned lineno;
[821]94 union {
95 struct node_s *n;
96 var *v;
97 int i;
98 char *s;
99 regex_t *re;
100 } l;
101 union {
102 struct node_s *n;
103 regex_t *ire;
104 func *f;
105 int argno;
106 } r;
107 union {
108 struct node_s *n;
109 } a;
110} node;
111
112/* Block of temporary variables */
113typedef struct nvblock_s {
114 int size;
115 var *pos;
116 struct nvblock_s *prev;
117 struct nvblock_s *next;
118 var nv[0];
119} nvblock;
120
121typedef struct tsplitter_s {
122 node n;
123 regex_t re[2];
124} tsplitter;
125
126/* simple token classes */
127/* Order and hex values are very important!!! See next_token() */
128#define TC_SEQSTART 1 /* ( */
129#define TC_SEQTERM (1 << 1) /* ) */
130#define TC_REGEXP (1 << 2) /* /.../ */
131#define TC_OUTRDR (1 << 3) /* | > >> */
132#define TC_UOPPOST (1 << 4) /* unary postfix operator */
133#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
134#define TC_BINOPX (1 << 6) /* two-opnd operator */
135#define TC_IN (1 << 7)
136#define TC_COMMA (1 << 8)
137#define TC_PIPE (1 << 9) /* input redirection pipe */
138#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
139#define TC_ARRTERM (1 << 11) /* ] */
140#define TC_GRPSTART (1 << 12) /* { */
141#define TC_GRPTERM (1 << 13) /* } */
142#define TC_SEMICOL (1 << 14)
143#define TC_NEWLINE (1 << 15)
144#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
145#define TC_WHILE (1 << 17)
146#define TC_ELSE (1 << 18)
147#define TC_BUILTIN (1 << 19)
148#define TC_GETLINE (1 << 20)
149#define TC_FUNCDECL (1 << 21) /* `function' `func' */
150#define TC_BEGIN (1 << 22)
151#define TC_END (1 << 23)
152#define TC_EOF (1 << 24)
153#define TC_VARIABLE (1 << 25)
154#define TC_ARRAY (1 << 26)
155#define TC_FUNCTION (1 << 27)
156#define TC_STRING (1 << 28)
157#define TC_NUMBER (1 << 29)
158
[1770]159#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
[821]160
161/* combined token classes */
[1770]162#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
163#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
164#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
165 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
[821]166
[1770]167#define TC_STATEMNT (TC_STATX | TC_WHILE)
168#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
[821]169
170/* word tokens, cannot mean something else if not expected */
[1770]171#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
172 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
[821]173
174/* discard newlines after these */
[1770]175#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
176 | TC_BINOP | TC_OPTERM)
[821]177
178/* what can expression begin with */
[1770]179#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
[821]180/* what can group begin with */
[1770]181#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
[821]182
183/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
184/* operator is inserted between them */
[1770]185#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
186 | TC_STRING | TC_NUMBER | TC_UOPPOST)
187#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
[821]188
[1770]189#define OF_RES1 0x010000
190#define OF_RES2 0x020000
191#define OF_STR1 0x040000
192#define OF_STR2 0x080000
193#define OF_NUM1 0x100000
194#define OF_CHECKED 0x200000
[821]195
196/* combined operator flags */
197#define xx 0
198#define xV OF_RES2
199#define xS (OF_RES2 | OF_STR2)
200#define Vx OF_RES1
201#define VV (OF_RES1 | OF_RES2)
202#define Nx (OF_RES1 | OF_NUM1)
203#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
204#define Sx (OF_RES1 | OF_STR1)
205#define SV (OF_RES1 | OF_STR1 | OF_RES2)
206#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207
[1770]208#define OPCLSMASK 0xFF00
209#define OPNMASK 0x007F
[821]210
211/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
212 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
213 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 */
[1770]215#define P(x) (x << 24)
216#define PRIMASK 0x7F000000
217#define PRIMASK2 0x7E000000
[821]218
219/* Operation classes */
220
221#define SHIFT_TIL_THIS 0x0600
222#define RECUR_FROM_THIS 0x1000
223
224enum {
[1770]225 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
226 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
[821]227
[1770]228 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
229 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
230 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
[821]231
[1770]232 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
233 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
234 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
235 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
236 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
237 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
238 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
239 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
240 OC_DONE = 0x2800,
[821]241
[1770]242 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
243 ST_WHILE = 0x3300
[821]244};
245
246/* simple builtins */
247enum {
[1770]248 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
[821]249 F_ti, F_le, F_sy, F_ff, F_cl
250};
251
252/* builtins */
253enum {
[1770]254 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
255 B_ge, B_gs, B_su,
256 B_an, B_co, B_ls, B_or, B_rs, B_xo,
[821]257};
258
259/* tokens and their corresponding info values */
260
[1770]261#define NTC "\377" /* switch to next token class (tc<<1) */
262#define NTCC '\377'
[821]263
264#define OC_B OC_BUILTIN
265
[1770]266static const char tokenlist[] ALIGN1 =
267 "\1(" NTC
268 "\1)" NTC
269 "\1/" NTC /* REGEXP */
270 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
271 "\2++" "\2--" NTC /* UOPPOST */
272 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
273 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
274 "\2*=" "\2/=" "\2%=" "\2^="
275 "\1+" "\1-" "\3**=" "\2**"
276 "\1/" "\1%" "\1^" "\1*"
277 "\2!=" "\2>=" "\2<=" "\1>"
278 "\1<" "\2!~" "\1~" "\2&&"
279 "\2||" "\1?" "\1:" NTC
280 "\2in" NTC
281 "\1," NTC
282 "\1|" NTC
283 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
284 "\1]" NTC
285 "\1{" NTC
286 "\1}" NTC
287 "\1;" NTC
288 "\1\n" NTC
289 "\2if" "\2do" "\3for" "\5break" /* STATX */
290 "\10continue" "\6delete" "\5print"
291 "\6printf" "\4next" "\10nextfile"
292 "\6return" "\4exit" NTC
293 "\5while" NTC
294 "\4else" NTC
[821]295
[1770]296 "\3and" "\5compl" "\6lshift" "\2or"
297 "\6rshift" "\3xor"
298 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
299 "\3cos" "\3exp" "\3int" "\3log"
300 "\4rand" "\3sin" "\4sqrt" "\5srand"
301 "\6gensub" "\4gsub" "\5index" "\6length"
302 "\5match" "\5split" "\7sprintf" "\3sub"
303 "\6substr" "\7systime" "\10strftime"
304 "\7tolower" "\7toupper" NTC
305 "\7getline" NTC
306 "\4func" "\10function" NTC
307 "\5BEGIN" NTC
308 "\3END" "\0"
[821]309 ;
310
311static const uint32_t tokeninfo[] = {
312 0,
313 0,
314 OC_REGEXP,
[1770]315 xS|'a', xS|'w', xS|'|',
316 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
317 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
318 OC_FIELD|xV|P(5),
319 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
320 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
321 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
322 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
323 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
324 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
325 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
326 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
327 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
328 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
329 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
330 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
331 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
332 OC_COLON|xx|P(67)|':',
[821]333 OC_IN|SV|P(49),
334 OC_COMMA|SS|P(80),
335 OC_PGETLINE|SV|P(37),
[1770]336 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
337 OC_UNARY|xV|P(19)|'!',
[821]338 0,
339 0,
340 0,
341 0,
342 0,
[1770]343 ST_IF, ST_DO, ST_FOR, OC_BREAK,
344 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
345 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
346 OC_RETURN|Vx, OC_EXIT|Nx,
[821]347 ST_WHILE,
348 0,
349
[1770]350 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
351 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
[821]352 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
353 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
354 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
355 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
356 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
357 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
358 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
359 OC_GETLINE|SV|P(0),
360 0, 0,
361 0,
362 0
363};
364
365/* internal variable names and their initial values */
366/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367enum {
[1770]368 CONVFMT, OFMT, FS, OFS,
369 ORS, RS, RT, FILENAME,
370 SUBSEP, ARGIND, ARGC, ARGV,
371 ERRNO, FNR,
372 NR, NF, IGNORECASE,
373 ENVIRON, F0, NUM_INTERNAL_VARS
[821]374};
375
[1770]376static const char vNames[] ALIGN1 =
377 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
378 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
379 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
380 "ERRNO\0" "FNR\0"
381 "NR\0" "NF\0*" "IGNORECASE\0*"
382 "ENVIRON\0" "$\0*" "\0";
[821]383
[1770]384static const char vValues[] ALIGN1 =
385 "%.6g\0" "%.6g\0" " \0" " \0"
386 "\n\0" "\n\0" "\0" "\0"
[821]387 "\034\0"
388 "\377";
389
390/* hash size may grow to these values */
[1770]391#define FIRST_PRIME 61
392static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
[821]393
394
[1770]395/* Globals. Split in two parts so that first one is addressed
396 * with (mostly short) negative offsets */
397struct globals {
398 chain beginseq, mainseq, endseq, *seq;
399 node *break_ptr, *continue_ptr;
400 rstream *iF;
401 xhash *vhash, *ahash, *fdhash, *fnhash;
402 const char *g_progname;
403 int g_lineno;
404 int nfields;
405 int maxfields; /* used in fsrealloc() only */
406 var *Fields;
407 nvblock *g_cb;
408 char *g_pos;
409 char *g_buf;
410 smallint icase;
411 smallint exiting;
412 smallint nextrec;
413 smallint nextfile;
414 smallint is_f0_split;
415};
416struct globals2 {
417 uint32_t t_info; /* often used */
418 uint32_t t_tclass;
419 char *t_string;
420 int t_lineno;
421 int t_rollback;
[821]422
[1770]423 var *intvar[NUM_INTERNAL_VARS]; /* often used */
[821]424
[1770]425 /* former statics from various functions */
426 char *split_f0__fstrings;
[821]427
[1770]428 uint32_t next_token__save_tclass;
429 uint32_t next_token__save_info;
430 uint32_t next_token__ltclass;
431 smallint next_token__concat_inserted;
432
433 smallint next_input_file__files_happen;
434 rstream next_input_file__rsm;
435
436 var *evaluate__fnargs;
437 unsigned evaluate__seed;
438 regex_t evaluate__sreg;
439
440 var ptest__v;
441
442 tsplitter exec_builtin__tspl;
443
444 /* biggest and least used members go last */
445 double t_double;
446 tsplitter fsplitter, rsplitter;
447};
448#define G1 (ptr_to_globals[-1])
449#define G (*(struct globals2 *const)ptr_to_globals)
450/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451/* char G1size[sizeof(G1)]; - 0x6c */
452/* char Gsize[sizeof(G)]; - 0x1cc */
453/* Trying to keep most of members accessible with short offsets: */
454/* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455#define beginseq (G1.beginseq )
456#define mainseq (G1.mainseq )
457#define endseq (G1.endseq )
458#define seq (G1.seq )
459#define break_ptr (G1.break_ptr )
460#define continue_ptr (G1.continue_ptr)
461#define iF (G1.iF )
462#define vhash (G1.vhash )
463#define ahash (G1.ahash )
464#define fdhash (G1.fdhash )
465#define fnhash (G1.fnhash )
466#define g_progname (G1.g_progname )
467#define g_lineno (G1.g_lineno )
468#define nfields (G1.nfields )
469#define maxfields (G1.maxfields )
470#define Fields (G1.Fields )
471#define g_cb (G1.g_cb )
472#define g_pos (G1.g_pos )
473#define g_buf (G1.g_buf )
474#define icase (G1.icase )
475#define exiting (G1.exiting )
476#define nextrec (G1.nextrec )
477#define nextfile (G1.nextfile )
478#define is_f0_split (G1.is_f0_split )
479#define t_info (G.t_info )
480#define t_tclass (G.t_tclass )
481#define t_string (G.t_string )
482#define t_double (G.t_double )
483#define t_lineno (G.t_lineno )
484#define t_rollback (G.t_rollback )
485#define intvar (G.intvar )
486#define fsplitter (G.fsplitter )
487#define rsplitter (G.rsplitter )
488#define INIT_G() do { \
489 PTR_TO_GLOBALS = xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
492} while (0)
493
494
[821]495/* function prototypes */
496static void handle_special(var *);
497static node *parse_expr(uint32_t);
498static void chain_group(void);
499static var *evaluate(node *, var *);
500static rstream *next_input_file(void);
501static int fmt_num(char *, int, const char *, double, int);
502static int awk_exit(int) ATTRIBUTE_NORETURN;
503
504/* ---- error handling ---- */
505
[1770]506static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515#if !ENABLE_FEATURE_AWK_MATH
516static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
[821]517#endif
518
[1770]519static void zero_out_var(var * vp)
[821]520{
[1770]521 memset(vp, 0, sizeof(*vp));
[821]522}
523
[1770]524static void syntax_error(const char *const message) ATTRIBUTE_NORETURN;
525static void syntax_error(const char *const message)
526{
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528}
[821]529
530/* ---- hash stuff ---- */
531
[1770]532static unsigned hashidx(const char *name)
[821]533{
[1770]534 unsigned idx = 0;
[821]535
[1770]536 while (*name) idx = *name++ + (idx << 6) - idx;
[821]537 return idx;
538}
539
540/* create new hash */
541static xhash *hash_init(void)
542{
543 xhash *newhash;
544
[1770]545 newhash = xzalloc(sizeof(xhash));
[821]546 newhash->csize = FIRST_PRIME;
[1770]547 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
[821]548
549 return newhash;
550}
551
552/* find item in hash, return ptr to data, NULL if not found */
553static void *hash_search(xhash *hash, const char *name)
554{
555 hash_item *hi;
556
557 hi = hash->items [ hashidx(name) % hash->csize ];
558 while (hi) {
559 if (strcmp(hi->name, name) == 0)
560 return &(hi->data);
561 hi = hi->next;
562 }
563 return NULL;
564}
565
566/* grow hash if it becomes too big */
567static void hash_rebuild(xhash *hash)
568{
[1770]569 unsigned newsize, i, idx;
[821]570 hash_item **newitems, *hi, *thi;
571
[1770]572 if (hash->nprime == ARRAY_SIZE(PRIMES))
[821]573 return;
574
575 newsize = PRIMES[hash->nprime++];
[1770]576 newitems = xzalloc(newsize * sizeof(hash_item *));
[821]577
[1770]578 for (i = 0; i < hash->csize; i++) {
[821]579 hi = hash->items[i];
580 while (hi) {
581 thi = hi;
582 hi = thi->next;
583 idx = hashidx(thi->name) % newsize;
584 thi->next = newitems[idx];
585 newitems[idx] = thi;
586 }
587 }
588
589 free(hash->items);
590 hash->csize = newsize;
591 hash->items = newitems;
592}
593
594/* find item in hash, add it if necessary. Return ptr to data */
595static void *hash_find(xhash *hash, const char *name)
596{
597 hash_item *hi;
[1770]598 unsigned idx;
[821]599 int l;
600
601 hi = hash_search(hash, name);
[1770]602 if (!hi) {
[821]603 if (++hash->nel / hash->csize > 10)
604 hash_rebuild(hash);
605
606 l = strlen(name) + 1;
607 hi = xzalloc(sizeof(hash_item) + l);
608 memcpy(hi->name, name, l);
609
610 idx = hashidx(name) % hash->csize;
611 hi->next = hash->items[idx];
612 hash->items[idx] = hi;
613 hash->glen += l;
614 }
615 return &(hi->data);
616}
617
[1770]618#define findvar(hash, name) ((var*) hash_find((hash), (name)))
619#define newvar(name) ((var*) hash_find(vhash, (name)))
620#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
621#define newfunc(name) ((func*) hash_find(fnhash, (name)))
[821]622
623static void hash_remove(xhash *hash, const char *name)
624{
625 hash_item *hi, **phi;
626
[1770]627 phi = &(hash->items[hashidx(name) % hash->csize]);
[821]628 while (*phi) {
629 hi = *phi;
630 if (strcmp(hi->name, name) == 0) {
631 hash->glen -= (strlen(name) + 1);
632 hash->nel--;
633 *phi = hi->next;
634 free(hi);
635 break;
636 }
637 phi = &(hi->next);
638 }
639}
640
641/* ------ some useful functions ------ */
642
643static void skip_spaces(char **s)
644{
[1770]645 char *p = *s;
[821]646
[1770]647 while (1) {
648 if (*p == '\\' && p[1] == '\n') {
649 p++;
650 t_lineno++;
651 } else if (*p != ' ' && *p != '\t') {
652 break;
653 }
[821]654 p++;
655 }
656 *s = p;
657}
658
659static char *nextword(char **s)
660{
[1770]661 char *p = *s;
[821]662
[1770]663 while (*(*s)++) /* */;
[821]664
665 return p;
666}
667
668static char nextchar(char **s)
669{
[1770]670 char c, *pps;
[821]671
672 c = *((*s)++);
673 pps = *s;
674 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675 if (c == '\\' && *s == pps) c = *((*s)++);
676 return c;
677}
678
[1770]679static int ALWAYS_INLINE isalnum_(int c)
[821]680{
681 return (isalnum(c) || c == '_');
682}
683
684static FILE *afopen(const char *path, const char *mode)
685{
[1770]686 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
[821]687}
688
689/* -------- working with variables (set/get/copy/etc) -------- */
690
691static xhash *iamarray(var *v)
692{
693 var *a = v;
694
695 while (a->type & VF_CHILD)
696 a = a->x.parent;
697
[1770]698 if (!(a->type & VF_ARRAY)) {
[821]699 a->type |= VF_ARRAY;
700 a->x.array = hash_init();
701 }
702 return a->x.array;
703}
704
705static void clear_array(xhash *array)
706{
[1770]707 unsigned i;
[821]708 hash_item *hi, *thi;
709
[1770]710 for (i = 0; i < array->csize; i++) {
[821]711 hi = array->items[i];
712 while (hi) {
713 thi = hi;
714 hi = hi->next;
715 free(thi->data.v.string);
716 free(thi);
717 }
718 array->items[i] = NULL;
719 }
720 array->glen = array->nel = 0;
721}
722
723/* clear a variable */
724static var *clrvar(var *v)
725{
726 if (!(v->type & VF_FSTR))
727 free(v->string);
728
729 v->type &= VF_DONTTOUCH;
730 v->type |= VF_DIRTY;
731 v->string = NULL;
732 return v;
733}
734
735/* assign string value to variable */
736static var *setvar_p(var *v, char *value)
737{
738 clrvar(v);
739 v->string = value;
740 handle_special(v);
741 return v;
742}
743
744/* same as setvar_p but make a copy of string */
745static var *setvar_s(var *v, const char *value)
746{
[1770]747 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
[821]748}
749
750/* same as setvar_s but set USER flag */
751static var *setvar_u(var *v, const char *value)
752{
753 setvar_s(v, value);
754 v->type |= VF_USER;
755 return v;
756}
757
758/* set array element to user string */
759static void setari_u(var *a, int idx, const char *s)
760{
[1770]761 char sidx[sizeof(int)*3 + 1];
762 var *v;
[821]763
764 sprintf(sidx, "%d", idx);
765 v = findvar(iamarray(a), sidx);
766 setvar_u(v, s);
767}
768
769/* assign numeric value to variable */
770static var *setvar_i(var *v, double value)
771{
772 clrvar(v);
773 v->type |= VF_NUMBER;
774 v->number = value;
775 handle_special(v);
776 return v;
777}
778
[1770]779static const char *getvar_s(var *v)
[821]780{
781 /* if v is numeric and has no cached string, convert it to string */
782 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
[1770]783 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
784 v->string = xstrdup(g_buf);
[821]785 v->type |= VF_CACHED;
786 }
787 return (v->string == NULL) ? "" : v->string;
788}
789
790static double getvar_i(var *v)
791{
792 char *s;
793
794 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
795 v->number = 0;
796 s = v->string;
797 if (s && *s) {
798 v->number = strtod(s, &s);
799 if (v->type & VF_USER) {
800 skip_spaces(&s);
801 if (*s != '\0')
802 v->type &= ~VF_USER;
803 }
804 } else {
805 v->type &= ~VF_USER;
806 }
807 v->type |= VF_CACHED;
808 }
809 return v->number;
810}
811
812static var *copyvar(var *dest, const var *src)
813{
814 if (dest != src) {
815 clrvar(dest);
[1770]816 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
[821]817 dest->number = src->number;
818 if (src->string)
[1770]819 dest->string = xstrdup(src->string);
[821]820 }
821 handle_special(dest);
822 return dest;
823}
824
825static var *incvar(var *v)
826{
[1770]827 return setvar_i(v, getvar_i(v) + 1.);
[821]828}
829
830/* return true if v is number or numeric string */
831static int is_numeric(var *v)
832{
833 getvar_i(v);
834 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
835}
836
837/* return 1 when value of v corresponds to true, 0 otherwise */
838static int istrue(var *v)
839{
840 if (is_numeric(v))
841 return (v->number == 0) ? 0 : 1;
[1770]842 return (v->string && *(v->string)) ? 1 : 0;
[821]843}
844
845/* temporary variables allocator. Last allocated should be first freed */
846static var *nvalloc(int n)
847{
848 nvblock *pb = NULL;
849 var *v, *r;
850 int size;
851
[1770]852 while (g_cb) {
853 pb = g_cb;
854 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
855 g_cb = g_cb->next;
[821]856 }
857
[1770]858 if (!g_cb) {
[821]859 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
[1770]860 g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
861 g_cb->size = size;
862 g_cb->pos = g_cb->nv;
863 g_cb->prev = pb;
864 g_cb->next = NULL;
865 if (pb) pb->next = g_cb;
[821]866 }
867
[1770]868 v = r = g_cb->pos;
869 g_cb->pos += n;
[821]870
[1770]871 while (v < g_cb->pos) {
[821]872 v->type = 0;
873 v->string = NULL;
874 v++;
875 }
876
877 return r;
878}
879
880static void nvfree(var *v)
881{
882 var *p;
883
[1770]884 if (v < g_cb->nv || v >= g_cb->pos)
885 syntax_error(EMSG_INTERNAL_ERROR);
[821]886
[1770]887 for (p = v; p < g_cb->pos; p++) {
888 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
[821]889 clear_array(iamarray(p));
890 free(p->x.array->items);
891 free(p->x.array);
892 }
893 if (p->type & VF_WALK)
894 free(p->x.walker);
895
896 clrvar(p);
897 }
898
[1770]899 g_cb->pos = v;
900 while (g_cb->prev && g_cb->pos == g_cb->nv) {
901 g_cb = g_cb->prev;
[821]902 }
903}
904
905/* ------- awk program text parsing ------- */
906
[1770]907/* Parse next token pointed by global pos, place results into global ttt.
[821]908 * If token isn't expected, give away. Return token class
909 */
910static uint32_t next_token(uint32_t expected)
911{
[1770]912#define concat_inserted (G.next_token__concat_inserted)
913#define save_tclass (G.next_token__save_tclass)
914#define save_info (G.next_token__save_info)
915/* Initialized to TC_OPTERM: */
916#define ltclass (G.next_token__ltclass)
917
[821]918 char *p, *pp, *s;
[1770]919 const char *tl;
[821]920 uint32_t tc;
921 const uint32_t *ti;
922 int l;
923
[1770]924 if (t_rollback) {
925 t_rollback = FALSE;
[821]926
927 } else if (concat_inserted) {
928 concat_inserted = FALSE;
[1770]929 t_tclass = save_tclass;
930 t_info = save_info;
[821]931
932 } else {
[1770]933 p = g_pos;
934 readnext:
[821]935 skip_spaces(&p);
[1770]936 g_lineno = t_lineno;
[821]937 if (*p == '#')
[1770]938 while (*p != '\n' && *p != '\0')
939 p++;
[821]940
941 if (*p == '\n')
[1770]942 t_lineno++;
[821]943
944 if (*p == '\0') {
945 tc = TC_EOF;
946
947 } else if (*p == '\"') {
948 /* it's a string */
[1770]949 t_string = s = ++p;
[821]950 while (*p != '\"') {
951 if (*p == '\0' || *p == '\n')
952 syntax_error(EMSG_UNEXP_EOS);
953 *(s++) = nextchar(&p);
954 }
955 p++;
956 *s = '\0';
957 tc = TC_STRING;
958
959 } else if ((expected & TC_REGEXP) && *p == '/') {
960 /* it's regexp */
[1770]961 t_string = s = ++p;
[821]962 while (*p != '/') {
963 if (*p == '\0' || *p == '\n')
964 syntax_error(EMSG_UNEXP_EOS);
[1770]965 *s = *p++;
966 if (*s++ == '\\') {
[821]967 pp = p;
968 *(s-1) = bb_process_escape_sequence((const char **)&p);
[1770]969 if (*pp == '\\')
970 *s++ = '\\';
971 if (p == pp)
972 *s++ = *p++;
[821]973 }
974 }
975 p++;
976 *s = '\0';
977 tc = TC_REGEXP;
978
979 } else if (*p == '.' || isdigit(*p)) {
980 /* it's a number */
[1770]981 t_double = strtod(p, &p);
[821]982 if (*p == '.')
983 syntax_error(EMSG_UNEXP_TOKEN);
984 tc = TC_NUMBER;
985
986 } else {
987 /* search for something known */
988 tl = tokenlist;
989 tc = 0x00000001;
990 ti = tokeninfo;
991 while (*tl) {
992 l = *(tl++);
993 if (l == NTCC) {
994 tc <<= 1;
995 continue;
996 }
997 /* if token class is expected, token
998 * matches and it's not a longer word,
999 * then this is what we are looking for
1000 */
[1770]1001 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1002 && *tl == *p && strncmp(p, tl, l) == 0
1003 && !((tc & TC_WORD) && isalnum_(p[l]))
1004 ) {
1005 t_info = *ti;
[821]1006 p += l;
1007 break;
1008 }
1009 ti++;
1010 tl += l;
1011 }
1012
[1770]1013 if (!*tl) {
[821]1014 /* it's a name (var/array/function),
1015 * otherwise it's something wrong
1016 */
[1770]1017 if (!isalnum_(*p))
[821]1018 syntax_error(EMSG_UNEXP_TOKEN);
1019
[1770]1020 t_string = --p;
1021 while (isalnum_(*(++p))) {
[821]1022 *(p-1) = *p;
1023 }
1024 *(p-1) = '\0';
1025 tc = TC_VARIABLE;
1026 /* also consume whitespace between functionname and bracket */
[1770]1027 if (!(expected & TC_VARIABLE))
1028 skip_spaces(&p);
[821]1029 if (*p == '(') {
1030 tc = TC_FUNCTION;
1031 } else {
1032 if (*p == '[') {
1033 p++;
1034 tc = TC_ARRAY;
1035 }
1036 }
1037 }
1038 }
[1770]1039 g_pos = p;
[821]1040
1041 /* skipping newlines in some cases */
1042 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1043 goto readnext;
1044
1045 /* insert concatenation operator when needed */
[1770]1046 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
[821]1047 concat_inserted = TRUE;
1048 save_tclass = tc;
[1770]1049 save_info = t_info;
[821]1050 tc = TC_BINOP;
[1770]1051 t_info = OC_CONCAT | SS | P(35);
[821]1052 }
1053
[1770]1054 t_tclass = tc;
[821]1055 }
[1770]1056 ltclass = t_tclass;
[821]1057
1058 /* Are we ready for this? */
[1770]1059 if (!(ltclass & expected))
[821]1060 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
[1770]1061 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
[821]1062
1063 return ltclass;
[1770]1064#undef concat_inserted
1065#undef save_tclass
1066#undef save_info
1067#undef ltclass
[821]1068}
1069
[1770]1070static void rollback_token(void)
1071{
1072 t_rollback = TRUE;
1073}
[821]1074
1075static node *new_node(uint32_t info)
1076{
[1770]1077 node *n;
[821]1078
[1770]1079 n = xzalloc(sizeof(node));
[821]1080 n->info = info;
[1770]1081 n->lineno = g_lineno;
[821]1082 return n;
1083}
1084
[1770]1085static node *mk_re_node(const char *s, node *n, regex_t *re)
[821]1086{
1087 n->info = OC_REGEXP;
1088 n->l.re = re;
1089 n->r.ire = re + 1;
1090 xregcomp(re, s, REG_EXTENDED);
[1770]1091 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
[821]1092
1093 return n;
1094}
1095
1096static node *condition(void)
1097{
1098 next_token(TC_SEQSTART);
1099 return parse_expr(TC_SEQTERM);
1100}
1101
1102/* parse expression terminated by given argument, return ptr
1103 * to built subtree. Terminator is eaten by parse_expr */
1104static node *parse_expr(uint32_t iexp)
1105{
1106 node sn;
1107 node *cn = &sn;
1108 node *vn, *glptr;
1109 uint32_t tc, xtc;
1110 var *v;
1111
1112 sn.info = PRIMASK;
1113 sn.r.n = glptr = NULL;
1114 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1115
[1770]1116 while (!((tc = next_token(xtc)) & iexp)) {
1117 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
[821]1118 /* input redirection (<) attached to glptr node */
[1770]1119 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
[821]1120 cn->a.n = glptr;
1121 xtc = TC_OPERAND | TC_UOPPRE;
1122 glptr = NULL;
1123
1124 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1125 /* for binary and postfix-unary operators, jump back over
1126 * previous operators with higher priority */
1127 vn = cn;
[1770]1128 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1129 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
[821]1130 vn = vn->a.n;
[1770]1131 if ((t_info & OPCLSMASK) == OC_TERNARY)
1132 t_info += P(6);
1133 cn = vn->a.n->r.n = new_node(t_info);
[821]1134 cn->a.n = vn->a.n;
1135 if (tc & TC_BINOP) {
1136 cn->l.n = vn;
1137 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
[1770]1138 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
[821]1139 /* it's a pipe */
1140 next_token(TC_GETLINE);
1141 /* give maximum priority to this pipe */
1142 cn->info &= ~PRIMASK;
1143 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1144 }
1145 } else {
1146 cn->r.n = vn;
1147 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1148 }
1149 vn->a.n = cn;
1150
1151 } else {
1152 /* for operands and prefix-unary operators, attach them
1153 * to last node */
1154 vn = cn;
[1770]1155 cn = vn->r.n = new_node(t_info);
[821]1156 cn->a.n = vn;
1157 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158 if (tc & (TC_OPERAND | TC_REGEXP)) {
1159 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1160 /* one should be very careful with switch on tclass -
1161 * only simple tclasses should be used! */
1162 switch (tc) {
[1770]1163 case TC_VARIABLE:
1164 case TC_ARRAY:
[821]1165 cn->info = OC_VAR;
[1770]1166 v = hash_search(ahash, t_string);
1167 if (v != NULL) {
[821]1168 cn->info = OC_FNARG;
1169 cn->l.i = v->x.aidx;
1170 } else {
[1770]1171 cn->l.v = newvar(t_string);
[821]1172 }
1173 if (tc & TC_ARRAY) {
1174 cn->info |= xS;
1175 cn->r.n = parse_expr(TC_ARRTERM);
1176 }
1177 break;
1178
[1770]1179 case TC_NUMBER:
1180 case TC_STRING:
[821]1181 cn->info = OC_VAR;
1182 v = cn->l.v = xzalloc(sizeof(var));
1183 if (tc & TC_NUMBER)
[1770]1184 setvar_i(v, t_double);
[821]1185 else
[1770]1186 setvar_s(v, t_string);
[821]1187 break;
1188
[1770]1189 case TC_REGEXP:
1190 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
[821]1191 break;
1192
[1770]1193 case TC_FUNCTION:
[821]1194 cn->info = OC_FUNC;
[1770]1195 cn->r.f = newfunc(t_string);
[821]1196 cn->l.n = condition();
1197 break;
1198
[1770]1199 case TC_SEQSTART:
[821]1200 cn = vn->r.n = parse_expr(TC_SEQTERM);
1201 cn->a.n = vn;
1202 break;
1203
[1770]1204 case TC_GETLINE:
[821]1205 glptr = cn;
1206 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1207 break;
1208
[1770]1209 case TC_BUILTIN:
[821]1210 cn->l.n = condition();
1211 break;
1212 }
1213 }
1214 }
1215 }
1216 return sn.r.n;
1217}
1218
1219/* add node to chain. Return ptr to alloc'd node */
1220static node *chain_node(uint32_t info)
1221{
[1770]1222 node *n;
[821]1223
[1770]1224 if (!seq->first)
[821]1225 seq->first = seq->last = new_node(0);
1226
[1770]1227 if (seq->programname != g_progname) {
1228 seq->programname = g_progname;
[821]1229 n = chain_node(OC_NEWSOURCE);
[1770]1230 n->l.s = xstrdup(g_progname);
[821]1231 }
1232
1233 n = seq->last;
1234 n->info = info;
1235 seq->last = n->a.n = new_node(OC_DONE);
1236
1237 return n;
1238}
1239
1240static void chain_expr(uint32_t info)
1241{
1242 node *n;
1243
1244 n = chain_node(info);
1245 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
[1770]1246 if (t_tclass & TC_GRPTERM)
[821]1247 rollback_token();
1248}
1249
1250static node *chain_loop(node *nn)
1251{
1252 node *n, *n2, *save_brk, *save_cont;
1253
1254 save_brk = break_ptr;
1255 save_cont = continue_ptr;
1256
1257 n = chain_node(OC_BR | Vx);
1258 continue_ptr = new_node(OC_EXEC);
1259 break_ptr = new_node(OC_EXEC);
1260 chain_group();
1261 n2 = chain_node(OC_EXEC | Vx);
1262 n2->l.n = nn;
1263 n2->a.n = n;
1264 continue_ptr->a.n = n2;
1265 break_ptr->a.n = n->r.n = seq->last;
1266
1267 continue_ptr = save_cont;
1268 break_ptr = save_brk;
1269
1270 return n;
1271}
1272
1273/* parse group and attach it to chain */
1274static void chain_group(void)
1275{
1276 uint32_t c;
1277 node *n, *n2, *n3;
1278
1279 do {
1280 c = next_token(TC_GRPSEQ);
1281 } while (c & TC_NEWLINE);
1282
1283 if (c & TC_GRPSTART) {
[1770]1284 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1285 if (t_tclass & TC_NEWLINE) continue;
[821]1286 rollback_token();
1287 chain_group();
1288 }
1289 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1290 rollback_token();
1291 chain_expr(OC_EXEC | Vx);
1292 } else { /* TC_STATEMNT */
[1770]1293 switch (t_info & OPCLSMASK) {
1294 case ST_IF:
1295 n = chain_node(OC_BR | Vx);
1296 n->l.n = condition();
1297 chain_group();
1298 n2 = chain_node(OC_EXEC);
1299 n->r.n = seq->last;
1300 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
[821]1301 chain_group();
[1770]1302 n2->a.n = seq->last;
1303 } else {
1304 rollback_token();
1305 }
1306 break;
[821]1307
[1770]1308 case ST_WHILE:
1309 n2 = condition();
1310 n = chain_loop(NULL);
1311 n->l.n = n2;
1312 break;
1313
1314 case ST_DO:
1315 n2 = chain_node(OC_EXEC);
1316 n = chain_loop(NULL);
1317 n2->a.n = n->a.n;
1318 next_token(TC_WHILE);
1319 n->l.n = condition();
1320 break;
1321
1322 case ST_FOR:
1323 next_token(TC_SEQSTART);
1324 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1325 if (t_tclass & TC_SEQTERM) { /* for-in */
1326 if ((n2->info & OPCLSMASK) != OC_IN)
1327 syntax_error(EMSG_UNEXP_TOKEN);
1328 n = chain_node(OC_WALKINIT | VV);
1329 n->l.n = n2->l.n;
1330 n->r.n = n2->r.n;
[821]1331 n = chain_loop(NULL);
[1770]1332 n->info = OC_WALKNEXT | Vx;
1333 n->l.n = n2->l.n;
1334 } else { /* for (;;) */
1335 n = chain_node(OC_EXEC | Vx);
[821]1336 n->l.n = n2;
[1770]1337 n2 = parse_expr(TC_SEMICOL);
1338 n3 = parse_expr(TC_SEQTERM);
1339 n = chain_loop(n3);
1340 n->l.n = n2;
1341 if (!n2)
1342 n->info = OC_EXEC;
1343 }
1344 break;
[821]1345
[1770]1346 case OC_PRINT:
1347 case OC_PRINTF:
1348 n = chain_node(t_info);
1349 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1350 if (t_tclass & TC_OUTRDR) {
1351 n->info |= t_info;
1352 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1353 }
1354 if (t_tclass & TC_GRPTERM)
1355 rollback_token();
1356 break;
[821]1357
[1770]1358 case OC_BREAK:
1359 n = chain_node(OC_EXEC);
1360 n->a.n = break_ptr;
1361 break;
[821]1362
[1770]1363 case OC_CONTINUE:
1364 n = chain_node(OC_EXEC);
1365 n->a.n = continue_ptr;
1366 break;
[821]1367
[1770]1368 /* delete, next, nextfile, return, exit */
1369 default:
1370 chain_expr(t_info);
[821]1371 }
1372 }
1373}
1374
1375static void parse_program(char *p)
1376{
1377 uint32_t tclass;
1378 node *cn;
1379 func *f;
1380 var *v;
1381
[1770]1382 g_pos = p;
1383 t_lineno = 1;
1384 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1385 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
[821]1386
1387 if (tclass & TC_OPTERM)
1388 continue;
1389
1390 seq = &mainseq;
1391 if (tclass & TC_BEGIN) {
1392 seq = &beginseq;
1393 chain_group();
1394
1395 } else if (tclass & TC_END) {
1396 seq = &endseq;
1397 chain_group();
1398
1399 } else if (tclass & TC_FUNCDECL) {
1400 next_token(TC_FUNCTION);
[1770]1401 g_pos++;
1402 f = newfunc(t_string);
[821]1403 f->body.first = NULL;
1404 f->nargs = 0;
[1770]1405 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1406 v = findvar(ahash, t_string);
[821]1407 v->x.aidx = (f->nargs)++;
1408
1409 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1410 break;
1411 }
1412 seq = &(f->body);
1413 chain_group();
1414 clear_array(ahash);
1415
1416 } else if (tclass & TC_OPSEQ) {
1417 rollback_token();
1418 cn = chain_node(OC_TEST);
1419 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
[1770]1420 if (t_tclass & TC_GRPSTART) {
[821]1421 rollback_token();
1422 chain_group();
1423 } else {
1424 chain_node(OC_PRINT);
1425 }
1426 cn->r.n = mainseq.last;
1427
1428 } else /* if (tclass & TC_GRPSTART) */ {
1429 rollback_token();
1430 chain_group();
1431 }
1432 }
1433}
1434
1435
1436/* -------- program execution part -------- */
1437
[1770]1438static node *mk_splitter(const char *s, tsplitter *spl)
[821]1439{
[1770]1440 regex_t *re, *ire;
[821]1441 node *n;
1442
1443 re = &spl->re[0];
1444 ire = &spl->re[1];
1445 n = &spl->n;
[1770]1446 if ((n->info & OPCLSMASK) == OC_REGEXP) {
[821]1447 regfree(re);
[1770]1448 regfree(ire); // TODO: nuke ire, use re+1?
[821]1449 }
1450 if (strlen(s) > 1) {
1451 mk_re_node(s, n, re);
1452 } else {
1453 n->info = (uint32_t) *s;
1454 }
1455
1456 return n;
1457}
1458
1459/* use node as a regular expression. Supplied with node ptr and regex_t
1460 * storage space. Return ptr to regex (if result points to preg, it should
1461 * be later regfree'd manually
1462 */
1463static regex_t *as_regex(node *op, regex_t *preg)
1464{
1465 var *v;
[1770]1466 const char *s;
[821]1467
1468 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1469 return icase ? op->r.ire : op->l.re;
1470 }
[1770]1471 v = nvalloc(1);
1472 s = getvar_s(evaluate(op, v));
1473 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1474 nvfree(v);
1475 return preg;
[821]1476}
1477
1478/* gradually increasing buffer */
1479static void qrealloc(char **b, int n, int *size)
1480{
[1770]1481 if (!*b || n >= *size)
[821]1482 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1483}
1484
1485/* resize field storage space */
1486static void fsrealloc(int size)
1487{
1488 int i;
1489
1490 if (size >= maxfields) {
1491 i = maxfields;
1492 maxfields = size + 16;
[1770]1493 Fields = xrealloc(Fields, maxfields * sizeof(var));
1494 for (; i < maxfields; i++) {
[821]1495 Fields[i].type = VF_SPECIAL;
1496 Fields[i].string = NULL;
1497 }
1498 }
1499
1500 if (size < nfields) {
[1770]1501 for (i = size; i < nfields; i++) {
1502 clrvar(Fields + i);
[821]1503 }
1504 }
1505 nfields = size;
1506}
1507
[1770]1508static int awk_split(const char *s, node *spl, char **slist)
[821]1509{
[1770]1510 int l, n = 0;
[821]1511 char c[4];
1512 char *s1;
[1770]1513 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
[821]1514
1515 /* in worst case, each char would be a separate field */
[1770]1516 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1517 strcpy(s1, s);
[821]1518
1519 c[0] = c[1] = (char)spl->info;
1520 c[2] = c[3] = '\0';
[1770]1521 if (*getvar_s(intvar[RS]) == '\0')
1522 c[2] = '\n';
[821]1523
[1770]1524 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1525 if (!*s)
1526 return n; /* "": zero fields */
1527 n++; /* at least one field will be there */
1528 do {
1529 l = strcspn(s, c+2); /* len till next NUL or \n */
1530 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1531 && pmatch[0].rm_so <= l
1532 ) {
[821]1533 l = pmatch[0].rm_so;
[1770]1534 if (pmatch[0].rm_eo == 0) {
1535 l++;
1536 pmatch[0].rm_eo++;
1537 }
1538 n++; /* we saw yet another delimiter */
[821]1539 } else {
1540 pmatch[0].rm_eo = l;
[1770]1541 if (s[l]) pmatch[0].rm_eo++;
[821]1542 }
1543 memcpy(s1, s, l);
[1770]1544 s1[l] = '\0';
[821]1545 nextword(&s1);
1546 s += pmatch[0].rm_eo;
[1770]1547 } while (*s);
1548 return n;
1549 }
1550 if (c[0] == '\0') { /* null split */
1551 while (*s) {
1552 *s1++ = *s++;
1553 *s1++ = '\0';
[821]1554 n++;
1555 }
[1770]1556 return n;
1557 }
1558 if (c[0] != ' ') { /* single-character split */
[821]1559 if (icase) {
1560 c[0] = toupper(c[0]);
1561 c[1] = tolower(c[1]);
1562 }
1563 if (*s1) n++;
1564 while ((s1 = strpbrk(s1, c))) {
[1770]1565 *s1++ = '\0';
[821]1566 n++;
1567 }
[1770]1568 return n;
[821]1569 }
[1770]1570 /* space split */
1571 while (*s) {
1572 s = skip_whitespace(s);
1573 if (!*s) break;
1574 n++;
1575 while (*s && !isspace(*s))
1576 *s1++ = *s++;
1577 *s1++ = '\0';
1578 }
[821]1579 return n;
1580}
1581
1582static void split_f0(void)
1583{
[1770]1584/* static char *fstrings; */
1585#define fstrings (G.split_f0__fstrings)
1586
[821]1587 int i, n;
1588 char *s;
1589
1590 if (is_f0_split)
1591 return;
1592
1593 is_f0_split = TRUE;
1594 free(fstrings);
1595 fsrealloc(0);
[1770]1596 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
[821]1597 fsrealloc(n);
1598 s = fstrings;
[1770]1599 for (i = 0; i < n; i++) {
[821]1600 Fields[i].string = nextword(&s);
1601 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1602 }
1603
1604 /* set NF manually to avoid side effects */
[1770]1605 clrvar(intvar[NF]);
1606 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1607 intvar[NF]->number = nfields;
1608#undef fstrings
[821]1609}
1610
1611/* perform additional actions when some internal variables changed */
1612static void handle_special(var *v)
1613{
1614 int n;
[1770]1615 char *b;
1616 const char *sep, *s;
[821]1617 int sl, l, len, i, bsize;
1618
[1770]1619 if (!(v->type & VF_SPECIAL))
[821]1620 return;
1621
[1770]1622 if (v == intvar[NF]) {
[821]1623 n = (int)getvar_i(v);
1624 fsrealloc(n);
1625
1626 /* recalculate $0 */
[1770]1627 sep = getvar_s(intvar[OFS]);
[821]1628 sl = strlen(sep);
1629 b = NULL;
1630 len = 0;
[1770]1631 for (i = 0; i < n; i++) {
[821]1632 s = getvar_s(&Fields[i]);
1633 l = strlen(s);
1634 if (b) {
1635 memcpy(b+len, sep, sl);
1636 len += sl;
1637 }
1638 qrealloc(&b, len+l+sl, &bsize);
1639 memcpy(b+len, s, l);
1640 len += l;
1641 }
[1770]1642 if (b)
1643 b[len] = '\0';
1644 setvar_p(intvar[F0], b);
[821]1645 is_f0_split = TRUE;
1646
[1770]1647 } else if (v == intvar[F0]) {
[821]1648 is_f0_split = FALSE;
1649
[1770]1650 } else if (v == intvar[FS]) {
[821]1651 mk_splitter(getvar_s(v), &fsplitter);
1652
[1770]1653 } else if (v == intvar[RS]) {
[821]1654 mk_splitter(getvar_s(v), &rsplitter);
1655
[1770]1656 } else if (v == intvar[IGNORECASE]) {
[821]1657 icase = istrue(v);
1658
[1770]1659 } else { /* $n */
1660 n = getvar_i(intvar[NF]);
1661 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
[821]1662 /* right here v is invalid. Just to note... */
1663 }
1664}
1665
1666/* step through func/builtin/etc arguments */
1667static node *nextarg(node **pn)
1668{
1669 node *n;
1670
1671 n = *pn;
1672 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1673 *pn = n->r.n;
1674 n = n->l.n;
1675 } else {
1676 *pn = NULL;
1677 }
1678 return n;
1679}
1680
1681static void hashwalk_init(var *v, xhash *array)
1682{
1683 char **w;
1684 hash_item *hi;
1685 int i;
1686
1687 if (v->type & VF_WALK)
1688 free(v->x.walker);
1689
1690 v->type |= VF_WALK;
[1770]1691 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1692 w[0] = w[1] = (char *)(w + 2);
1693 for (i = 0; i < array->csize; i++) {
[821]1694 hi = array->items[i];
[1770]1695 while (hi) {
[821]1696 strcpy(*w, hi->name);
1697 nextword(w);
1698 hi = hi->next;
1699 }
1700 }
1701}
1702
1703static int hashwalk_next(var *v)
1704{
1705 char **w;
1706
1707 w = v->x.walker;
[1770]1708 if (w[1] == w[0])
[821]1709 return FALSE;
1710
1711 setvar_s(v, nextword(w+1));
1712 return TRUE;
1713}
1714
1715/* evaluate node, return 1 when result is true, 0 otherwise */
1716static int ptest(node *pattern)
1717{
[1770]1718 /* ptest__v is "static": to save stack space? */
1719 return istrue(evaluate(pattern, &G.ptest__v));
[821]1720}
1721
1722/* read next record from stream rsm into a variable v */
1723static int awk_getline(rstream *rsm, var *v)
1724{
1725 char *b;
1726 regmatch_t pmatch[2];
1727 int a, p, pp=0, size;
1728 int fd, so, eo, r, rp;
1729 char c, *m, *s;
1730
1731 /* we're using our own buffer since we need access to accumulating
1732 * characters
1733 */
1734 fd = fileno(rsm->F);
1735 m = rsm->buffer;
1736 a = rsm->adv;
1737 p = rsm->pos;
1738 size = rsm->size;
1739 c = (char) rsplitter.n.info;
1740 rp = 0;
1741
[1770]1742 if (!m) qrealloc(&m, 256, &size);
[821]1743 do {
1744 b = m + a;
1745 so = eo = p;
1746 r = 1;
1747 if (p > 0) {
1748 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1749 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
[1770]1750 b, 1, pmatch, 0) == 0) {
[821]1751 so = pmatch[0].rm_so;
1752 eo = pmatch[0].rm_eo;
1753 if (b[eo] != '\0')
1754 break;
1755 }
1756 } else if (c != '\0') {
1757 s = strchr(b+pp, c);
[1770]1758 if (!s) s = memchr(b+pp, '\0', p - pp);
[821]1759 if (s) {
1760 so = eo = s-b;
1761 eo++;
1762 break;
1763 }
1764 } else {
1765 while (b[rp] == '\n')
1766 rp++;
1767 s = strstr(b+rp, "\n\n");
1768 if (s) {
1769 so = eo = s-b;
1770 while (b[eo] == '\n') eo++;
1771 if (b[eo] != '\0')
1772 break;
1773 }
1774 }
1775 }
1776
1777 if (a > 0) {
1778 memmove(m, (const void *)(m+a), p+1);
1779 b = m;
1780 a = 0;
1781 }
1782
1783 qrealloc(&m, a+p+128, &size);
1784 b = m + a;
1785 pp = p;
1786 p += safe_read(fd, b+p, size-p-1);
1787 if (p < pp) {
1788 p = 0;
1789 r = 0;
[1770]1790 setvar_i(intvar[ERRNO], errno);
[821]1791 }
1792 b[p] = '\0';
1793
1794 } while (p > pp);
1795
1796 if (p == 0) {
1797 r--;
1798 } else {
1799 c = b[so]; b[so] = '\0';
1800 setvar_s(v, b+rp);
1801 v->type |= VF_USER;
1802 b[so] = c;
1803 c = b[eo]; b[eo] = '\0';
[1770]1804 setvar_s(intvar[RT], b+so);
[821]1805 b[eo] = c;
1806 }
1807
1808 rsm->buffer = m;
1809 rsm->adv = a + eo;
1810 rsm->pos = p - eo;
1811 rsm->size = size;
1812
1813 return r;
1814}
1815
1816static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1817{
[1770]1818 int r = 0;
[821]1819 char c;
[1770]1820 const char *s = format;
[821]1821
1822 if (int_as_int && n == (int)n) {
1823 r = snprintf(b, size, "%d", (int)n);
1824 } else {
[1770]1825 do { c = *s; } while (c && *++s);
[821]1826 if (strchr("diouxX", c)) {
1827 r = snprintf(b, size, format, (int)n);
1828 } else if (strchr("eEfgG", c)) {
1829 r = snprintf(b, size, format, n);
1830 } else {
[1770]1831 syntax_error(EMSG_INV_FMT);
[821]1832 }
1833 }
1834 return r;
1835}
1836
1837
1838/* formatted output into an allocated buffer, return ptr to buffer */
1839static char *awk_printf(node *n)
1840{
1841 char *b = NULL;
[1770]1842 char *fmt, *s, *f;
1843 const char *s1;
[821]1844 int i, j, incr, bsize;
1845 char c, c1;
1846 var *v, *arg;
1847
1848 v = nvalloc(1);
[1770]1849 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
[821]1850
1851 i = 0;
1852 while (*f) {
1853 s = f;
1854 while (*f && (*f != '%' || *(++f) == '%'))
1855 f++;
[1770]1856 while (*f && !isalpha(*f)) {
1857 if (*f == '*')
1858 syntax_error("%*x formats are not supported");
[821]1859 f++;
[1770]1860 }
[821]1861
1862 incr = (f - s) + MAXVARFMT;
[1770]1863 qrealloc(&b, incr + i, &bsize);
1864 c = *f;
1865 if (c != '\0') f++;
1866 c1 = *f;
1867 *f = '\0';
[821]1868 arg = evaluate(nextarg(&n), v);
1869
1870 j = i;
1871 if (c == 'c' || !c) {
[1770]1872 i += sprintf(b+i, s, is_numeric(arg) ?
1873 (char)getvar_i(arg) : *getvar_s(arg));
[821]1874 } else if (c == 's') {
[1770]1875 s1 = getvar_s(arg);
[821]1876 qrealloc(&b, incr+i+strlen(s1), &bsize);
1877 i += sprintf(b+i, s, s1);
1878 } else {
1879 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1880 }
1881 *f = c1;
1882
1883 /* if there was an error while sprintf, return value is negative */
1884 if (i < j) i = j;
1885 }
1886
[1770]1887 b = xrealloc(b, i + 1);
[821]1888 free(fmt);
1889 nvfree(v);
1890 b[i] = '\0';
1891 return b;
1892}
1893
1894/* common substitution routine
1895 * replace (nm) substring of (src) that match (n) with (repl), store
1896 * result into (dest), return number of substitutions. If nm=0, replace
1897 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1898 * subexpression matching (\1-\9)
1899 */
[1770]1900static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
[821]1901{
1902 char *ds = NULL;
[1770]1903 const char *s;
1904 const char *sp;
[821]1905 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1906 regmatch_t pmatch[10];
1907 regex_t sreg, *re;
1908
1909 re = as_regex(rn, &sreg);
[1770]1910 if (!src) src = intvar[F0];
1911 if (!dest) dest = intvar[F0];
[821]1912
1913 i = di = 0;
1914 sp = getvar_s(src);
1915 rl = strlen(repl);
[1770]1916 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
[821]1917 so = pmatch[0].rm_so;
1918 eo = pmatch[0].rm_eo;
1919
1920 qrealloc(&ds, di + eo + rl, &dssize);
1921 memcpy(ds + di, sp, eo);
1922 di += eo;
1923 if (++i >= nm) {
1924 /* replace */
1925 di -= (eo - so);
1926 nbs = 0;
1927 for (s = repl; *s; s++) {
1928 ds[di++] = c = *s;
1929 if (c == '\\') {
1930 nbs++;
1931 continue;
1932 }
1933 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1934 di -= ((nbs + 3) >> 1);
1935 j = 0;
1936 if (c != '&') {
1937 j = c - '0';
1938 nbs++;
1939 }
1940 if (nbs % 2) {
1941 ds[di++] = c;
1942 } else {
1943 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1944 qrealloc(&ds, di + rl + n, &dssize);
1945 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1946 di += n;
1947 }
1948 }
1949 nbs = 0;
1950 }
1951 }
1952
1953 sp += eo;
1954 if (i == nm) break;
1955 if (eo == so) {
[1770]1956 ds[di] = *sp++;
1957 if (!ds[di++]) break;
[821]1958 }
1959 }
1960
1961 qrealloc(&ds, di + strlen(sp), &dssize);
1962 strcpy(ds + di, sp);
1963 setvar_p(dest, ds);
1964 if (re == &sreg) regfree(re);
1965 return i;
1966}
1967
1968static var *exec_builtin(node *op, var *res)
1969{
[1770]1970#define tspl (G.exec_builtin__tspl)
1971
[821]1972 int (*to_xxx)(int);
1973 var *tv;
1974 node *an[4];
[1770]1975 var *av[4];
1976 const char *as[4];
[821]1977 regmatch_t pmatch[2];
1978 regex_t sreg, *re;
1979 node *spl;
1980 uint32_t isr, info;
1981 int nargs;
1982 time_t tt;
1983 char *s, *s1;
1984 int i, l, ll, n;
1985
1986 tv = nvalloc(4);
1987 isr = info = op->info;
1988 op = op->l.n;
1989
1990 av[2] = av[3] = NULL;
[1770]1991 for (i = 0; i < 4 && op; i++) {
[821]1992 an[i] = nextarg(&op);
1993 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1994 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1995 isr >>= 1;
1996 }
1997
1998 nargs = i;
1999 if (nargs < (info >> 30))
[1770]2000 syntax_error(EMSG_TOO_FEW_ARGS);
[821]2001
2002 switch (info & OPNMASK) {
2003
[1770]2004 case B_a2:
2005#if ENABLE_FEATURE_AWK_MATH
[821]2006 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2007#else
[1770]2008 syntax_error(EMSG_NO_MATH);
[821]2009#endif
2010 break;
2011
[1770]2012 case B_sp:
[821]2013 if (nargs > 2) {
2014 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2015 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2016 } else {
2017 spl = &fsplitter.n;
2018 }
2019
2020 n = awk_split(as[0], spl, &s);
2021 s1 = s;
2022 clear_array(iamarray(av[1]));
2023 for (i=1; i<=n; i++)
2024 setari_u(av[1], i, nextword(&s1));
2025 free(s);
2026 setvar_i(res, n);
2027 break;
2028
[1770]2029 case B_ss:
[821]2030 l = strlen(as[0]);
2031 i = getvar_i(av[1]) - 1;
[1770]2032 if (i > l) i = l;
2033 if (i < 0) i = 0;
[821]2034 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
[1770]2035 if (n < 0) n = 0;
[821]2036 s = xmalloc(n+1);
2037 strncpy(s, as[0]+i, n);
2038 s[n] = '\0';
2039 setvar_p(res, s);
2040 break;
2041
[1770]2042 case B_an:
2043 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2044 break;
2045
2046 case B_co:
2047 setvar_i(res, ~(long)getvar_i(av[0]));
2048 break;
2049
2050 case B_ls:
2051 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2052 break;
2053
2054 case B_or:
2055 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2056 break;
2057
2058 case B_rs:
2059 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2060 break;
2061
2062 case B_xo:
2063 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2064 break;
2065
2066 case B_lo:
[821]2067 to_xxx = tolower;
2068 goto lo_cont;
2069
[1770]2070 case B_up:
[821]2071 to_xxx = toupper;
[1770]2072 lo_cont:
2073 s1 = s = xstrdup(as[0]);
[821]2074 while (*s1) {
2075 *s1 = (*to_xxx)(*s1);
2076 s1++;
2077 }
2078 setvar_p(res, s);
2079 break;
2080
[1770]2081 case B_ix:
[821]2082 n = 0;
2083 ll = strlen(as[1]);
2084 l = strlen(as[0]) - ll;
2085 if (ll > 0 && l >= 0) {
[1770]2086 if (!icase) {
[821]2087 s = strstr(as[0], as[1]);
2088 if (s) n = (s - as[0]) + 1;
2089 } else {
2090 /* this piece of code is terribly slow and
2091 * really should be rewritten
2092 */
2093 for (i=0; i<=l; i++) {
2094 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2095 n = i+1;
2096 break;
2097 }
2098 }
2099 }
2100 }
2101 setvar_i(res, n);
2102 break;
2103
[1770]2104 case B_ti:
[821]2105 if (nargs > 1)
2106 tt = getvar_i(av[1]);
2107 else
2108 time(&tt);
[1770]2109 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2110 i = strftime(g_buf, MAXVARFMT,
2111 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2112 localtime(&tt));
2113 g_buf[i] = '\0';
2114 setvar_s(res, g_buf);
[821]2115 break;
2116
[1770]2117 case B_ma:
[821]2118 re = as_regex(an[1], &sreg);
2119 n = regexec(re, as[0], 1, pmatch, 0);
2120 if (n == 0) {
2121 pmatch[0].rm_so++;
2122 pmatch[0].rm_eo++;
2123 } else {
2124 pmatch[0].rm_so = 0;
2125 pmatch[0].rm_eo = -1;
2126 }
2127 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2128 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2129 setvar_i(res, pmatch[0].rm_so);
2130 if (re == &sreg) regfree(re);
2131 break;
2132
[1770]2133 case B_ge:
[821]2134 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2135 break;
2136
[1770]2137 case B_gs:
[821]2138 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2139 break;
2140
[1770]2141 case B_su:
[821]2142 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2143 break;
2144 }
2145
2146 nvfree(tv);
2147 return res;
[1770]2148#undef tspl
[821]2149}
2150
2151/*
2152 * Evaluate node - the heart of the program. Supplied with subtree
2153 * and place where to store result. returns ptr to result.
2154 */
2155#define XC(n) ((n) >> 8)
2156
2157static var *evaluate(node *op, var *res)
2158{
[1770]2159/* This procedure is recursive so we should count every byte */
2160#define fnargs (G.evaluate__fnargs)
2161/* seed is initialized to 1 */
2162#define seed (G.evaluate__seed)
2163#define sreg (G.evaluate__sreg)
2164
[821]2165 node *op1;
2166 var *v1;
2167 union {
2168 var *v;
[1770]2169 const char *s;
[821]2170 double d;
2171 int i;
2172 } L, R;
2173 uint32_t opinfo;
[1770]2174 int opn;
[821]2175 union {
2176 char *s;
2177 rstream *rsm;
2178 FILE *F;
2179 var *v;
2180 regex_t *re;
2181 uint32_t info;
2182 } X;
2183
[1770]2184 if (!op)
[821]2185 return setvar_s(res, NULL);
2186
2187 v1 = nvalloc(2);
2188
2189 while (op) {
2190 opinfo = op->info;
[1770]2191 opn = (opinfo & OPNMASK);
2192 g_lineno = op->lineno;
[821]2193
2194 /* execute inevitable things */
2195 op1 = op->l.n;
2196 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2197 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2198 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2199 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2200 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2201
2202 switch (XC(opinfo & OPCLSMASK)) {
2203
[1770]2204 /* -- iterative node type -- */
[821]2205
[1770]2206 /* test pattern */
2207 case XC( OC_TEST ):
[821]2208 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2209 /* it's range pattern */
2210 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2211 op->info |= OF_CHECKED;
2212 if (ptest(op1->r.n))
2213 op->info &= ~OF_CHECKED;
2214
2215 op = op->a.n;
2216 } else {
2217 op = op->r.n;
2218 }
2219 } else {
2220 op = (ptest(op1)) ? op->a.n : op->r.n;
2221 }
2222 break;
2223
[1770]2224 /* just evaluate an expression, also used as unconditional jump */
2225 case XC( OC_EXEC ):
[821]2226 break;
2227
[1770]2228 /* branch, used in if-else and various loops */
2229 case XC( OC_BR ):
[821]2230 op = istrue(L.v) ? op->a.n : op->r.n;
2231 break;
2232
[1770]2233 /* initialize for-in loop */
2234 case XC( OC_WALKINIT ):
[821]2235 hashwalk_init(L.v, iamarray(R.v));
2236 break;
2237
[1770]2238 /* get next array item */
2239 case XC( OC_WALKNEXT ):
[821]2240 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2241 break;
2242
[1770]2243 case XC( OC_PRINT ):
2244 case XC( OC_PRINTF ):
[821]2245 X.F = stdout;
2246 if (op->r.n) {
2247 X.rsm = newfile(R.s);
[1770]2248 if (!X.rsm->F) {
[821]2249 if (opn == '|') {
[1770]2250 X.rsm->F = popen(R.s, "w");
2251 if (X.rsm->F == NULL)
[821]2252 bb_perror_msg_and_die("popen");
2253 X.rsm->is_pipe = 1;
2254 } else {
[1770]2255 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
[821]2256 }
2257 }
2258 X.F = X.rsm->F;
2259 }
2260
2261 if ((opinfo & OPCLSMASK) == OC_PRINT) {
[1770]2262 if (!op1) {
2263 fputs(getvar_s(intvar[F0]), X.F);
[821]2264 } else {
2265 while (op1) {
2266 L.v = evaluate(nextarg(&op1), v1);
2267 if (L.v->type & VF_NUMBER) {
[1770]2268 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2269 getvar_i(L.v), TRUE);
2270 fputs(g_buf, X.F);
[821]2271 } else {
2272 fputs(getvar_s(L.v), X.F);
2273 }
2274
[1770]2275 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
[821]2276 }
2277 }
[1770]2278 fputs(getvar_s(intvar[ORS]), X.F);
[821]2279
2280 } else { /* OC_PRINTF */
2281 L.s = awk_printf(op1);
2282 fputs(L.s, X.F);
[1770]2283 free((char*)L.s);
[821]2284 }
2285 fflush(X.F);
2286 break;
2287
[1770]2288 case XC( OC_DELETE ):
[821]2289 X.info = op1->info & OPCLSMASK;
2290 if (X.info == OC_VAR) {
2291 R.v = op1->l.v;
2292 } else if (X.info == OC_FNARG) {
2293 R.v = &fnargs[op1->l.i];
2294 } else {
[1770]2295 syntax_error(EMSG_NOT_ARRAY);
[821]2296 }
2297
2298 if (op1->r.n) {
2299 clrvar(L.v);
2300 L.s = getvar_s(evaluate(op1->r.n, v1));
2301 hash_remove(iamarray(R.v), L.s);
2302 } else {
2303 clear_array(iamarray(R.v));
2304 }
2305 break;
2306
[1770]2307 case XC( OC_NEWSOURCE ):
2308 g_progname = op->l.s;
[821]2309 break;
2310
[1770]2311 case XC( OC_RETURN ):
[821]2312 copyvar(res, L.v);
2313 break;
2314
[1770]2315 case XC( OC_NEXTFILE ):
[821]2316 nextfile = TRUE;
[1770]2317 case XC( OC_NEXT ):
[821]2318 nextrec = TRUE;
[1770]2319 case XC( OC_DONE ):
[821]2320 clrvar(res);
2321 break;
2322
[1770]2323 case XC( OC_EXIT ):
[821]2324 awk_exit(L.d);
2325
[1770]2326 /* -- recursive node type -- */
[821]2327
[1770]2328 case XC( OC_VAR ):
[821]2329 L.v = op->l.v;
[1770]2330 if (L.v == intvar[NF])
[821]2331 split_f0();
2332 goto v_cont;
2333
[1770]2334 case XC( OC_FNARG ):
[821]2335 L.v = &fnargs[op->l.i];
[1770]2336 v_cont:
2337 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
[821]2338 break;
2339
[1770]2340 case XC( OC_IN ):
[821]2341 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2342 break;
2343
[1770]2344 case XC( OC_REGEXP ):
[821]2345 op1 = op;
[1770]2346 L.s = getvar_s(intvar[F0]);
[821]2347 goto re_cont;
2348
[1770]2349 case XC( OC_MATCH ):
[821]2350 op1 = op->r.n;
[1770]2351 re_cont:
[821]2352 X.re = as_regex(op1, &sreg);
2353 R.i = regexec(X.re, L.s, 0, NULL, 0);
2354 if (X.re == &sreg) regfree(X.re);
2355 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2356 break;
2357
[1770]2358 case XC( OC_MOVE ):
[821]2359 /* if source is a temporary string, jusk relink it to dest */
2360 if (R.v == v1+1 && R.v->string) {
2361 res = setvar_p(L.v, R.v->string);
2362 R.v->string = NULL;
2363 } else {
2364 res = copyvar(L.v, R.v);
2365 }
2366 break;
2367
[1770]2368 case XC( OC_TERNARY ):
[821]2369 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
[1770]2370 syntax_error(EMSG_POSSIBLE_ERROR);
[821]2371 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2372 break;
2373
[1770]2374 case XC( OC_FUNC ):
2375 if (!op->r.f->body.first)
2376 syntax_error(EMSG_UNDEF_FUNC);
[821]2377
2378 X.v = R.v = nvalloc(op->r.f->nargs+1);
2379 while (op1) {
2380 L.v = evaluate(nextarg(&op1), v1);
2381 copyvar(R.v, L.v);
2382 R.v->type |= VF_CHILD;
2383 R.v->x.parent = L.v;
2384 if (++R.v - X.v >= op->r.f->nargs)
2385 break;
2386 }
2387
2388 R.v = fnargs;
2389 fnargs = X.v;
2390
[1770]2391 L.s = g_progname;
[821]2392 res = evaluate(op->r.f->body.first, res);
[1770]2393 g_progname = L.s;
[821]2394
2395 nvfree(fnargs);
2396 fnargs = R.v;
2397 break;
2398
[1770]2399 case XC( OC_GETLINE ):
2400 case XC( OC_PGETLINE ):
[821]2401 if (op1) {
2402 X.rsm = newfile(L.s);
[1770]2403 if (!X.rsm->F) {
[821]2404 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2405 X.rsm->F = popen(L.s, "r");
2406 X.rsm->is_pipe = TRUE;
2407 } else {
[1770]2408 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
[821]2409 }
2410 }
2411 } else {
[1770]2412 if (!iF) iF = next_input_file();
[821]2413 X.rsm = iF;
2414 }
2415
[1770]2416 if (!X.rsm->F) {
2417 setvar_i(intvar[ERRNO], errno);
[821]2418 setvar_i(res, -1);
2419 break;
2420 }
2421
[1770]2422 if (!op->r.n)
2423 R.v = intvar[F0];
[821]2424
2425 L.i = awk_getline(X.rsm, R.v);
2426 if (L.i > 0) {
[1770]2427 if (!op1) {
2428 incvar(intvar[FNR]);
2429 incvar(intvar[NR]);
[821]2430 }
2431 }
2432 setvar_i(res, L.i);
2433 break;
2434
[1770]2435 /* simple builtins */
2436 case XC( OC_FBLTIN ):
[821]2437 switch (opn) {
2438
[1770]2439 case F_in:
[821]2440 R.d = (int)L.d;
2441 break;
2442
[1770]2443 case F_rn:
2444 R.d = (double)rand() / (double)RAND_MAX;
[821]2445 break;
[1770]2446#if ENABLE_FEATURE_AWK_MATH
2447 case F_co:
[821]2448 R.d = cos(L.d);
2449 break;
2450
[1770]2451 case F_ex:
[821]2452 R.d = exp(L.d);
2453 break;
2454
[1770]2455 case F_lg:
[821]2456 R.d = log(L.d);
2457 break;
2458
[1770]2459 case F_si:
[821]2460 R.d = sin(L.d);
2461 break;
2462
[1770]2463 case F_sq:
[821]2464 R.d = sqrt(L.d);
2465 break;
2466#else
[1770]2467 case F_co:
2468 case F_ex:
2469 case F_lg:
2470 case F_si:
2471 case F_sq:
2472 syntax_error(EMSG_NO_MATH);
[821]2473 break;
2474#endif
[1770]2475 case F_sr:
[821]2476 R.d = (double)seed;
[1770]2477 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
[821]2478 srand(seed);
2479 break;
2480
[1770]2481 case F_ti:
[821]2482 R.d = time(NULL);
2483 break;
2484
[1770]2485 case F_le:
2486 if (!op1)
2487 L.s = getvar_s(intvar[F0]);
[821]2488 R.d = strlen(L.s);
2489 break;
2490
[1770]2491 case F_sy:
[821]2492 fflush(NULL);
[1770]2493 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2494 ? (system(L.s) >> 8) : 0;
[821]2495 break;
2496
[1770]2497 case F_ff:
2498 if (!op1)
[821]2499 fflush(stdout);
2500 else {
2501 if (L.s && *L.s) {
2502 X.rsm = newfile(L.s);
2503 fflush(X.rsm->F);
2504 } else {
2505 fflush(NULL);
2506 }
2507 }
2508 break;
2509
[1770]2510 case F_cl:
[821]2511 X.rsm = (rstream *)hash_search(fdhash, L.s);
2512 if (X.rsm) {
2513 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2514 free(X.rsm->buffer);
2515 hash_remove(fdhash, L.s);
2516 }
2517 if (R.i != 0)
[1770]2518 setvar_i(intvar[ERRNO], errno);
[821]2519 R.d = (double)R.i;
2520 break;
2521 }
2522 setvar_i(res, R.d);
2523 break;
2524
[1770]2525 case XC( OC_BUILTIN ):
[821]2526 res = exec_builtin(op, res);
2527 break;
2528
[1770]2529 case XC( OC_SPRINTF ):
[821]2530 setvar_p(res, awk_printf(op1));
2531 break;
2532
[1770]2533 case XC( OC_UNARY ):
[821]2534 X.v = R.v;
2535 L.d = R.d = getvar_i(R.v);
2536 switch (opn) {
[1770]2537 case 'P':
[821]2538 L.d = ++R.d;
2539 goto r_op_change;
[1770]2540 case 'p':
[821]2541 R.d++;
2542 goto r_op_change;
[1770]2543 case 'M':
[821]2544 L.d = --R.d;
2545 goto r_op_change;
[1770]2546 case 'm':
[821]2547 R.d--;
2548 goto r_op_change;
[1770]2549 case '!':
2550 L.d = istrue(X.v) ? 0 : 1;
[821]2551 break;
[1770]2552 case '-':
[821]2553 L.d = -R.d;
2554 break;
[1770]2555 r_op_change:
[821]2556 setvar_i(X.v, R.d);
2557 }
2558 setvar_i(res, L.d);
2559 break;
2560
[1770]2561 case XC( OC_FIELD ):
[821]2562 R.i = (int)getvar_i(R.v);
2563 if (R.i == 0) {
[1770]2564 res = intvar[F0];
[821]2565 } else {
2566 split_f0();
2567 if (R.i > nfields)
2568 fsrealloc(R.i);
[1770]2569 res = &Fields[R.i - 1];
[821]2570 }
2571 break;
2572
[1770]2573 /* concatenation (" ") and index joining (",") */
2574 case XC( OC_CONCAT ):
2575 case XC( OC_COMMA ):
[821]2576 opn = strlen(L.s) + strlen(R.s) + 2;
[1770]2577 X.s = xmalloc(opn);
[821]2578 strcpy(X.s, L.s);
2579 if ((opinfo & OPCLSMASK) == OC_COMMA) {
[1770]2580 L.s = getvar_s(intvar[SUBSEP]);
2581 X.s = xrealloc(X.s, opn + strlen(L.s));
[821]2582 strcat(X.s, L.s);
2583 }
2584 strcat(X.s, R.s);
2585 setvar_p(res, X.s);
2586 break;
2587
[1770]2588 case XC( OC_LAND ):
[821]2589 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2590 break;
2591
[1770]2592 case XC( OC_LOR ):
[821]2593 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2594 break;
2595
[1770]2596 case XC( OC_BINARY ):
2597 case XC( OC_REPLACE ):
[821]2598 R.d = getvar_i(R.v);
2599 switch (opn) {
[1770]2600 case '+':
[821]2601 L.d += R.d;
2602 break;
[1770]2603 case '-':
[821]2604 L.d -= R.d;
2605 break;
[1770]2606 case '*':
[821]2607 L.d *= R.d;
2608 break;
[1770]2609 case '/':
2610 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
[821]2611 L.d /= R.d;
2612 break;
[1770]2613 case '&':
2614#if ENABLE_FEATURE_AWK_MATH
[821]2615 L.d = pow(L.d, R.d);
2616#else
[1770]2617 syntax_error(EMSG_NO_MATH);
[821]2618#endif
2619 break;
[1770]2620 case '%':
2621 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
[821]2622 L.d -= (int)(L.d / R.d) * R.d;
2623 break;
2624 }
[1770]2625 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
[821]2626 break;
2627
[1770]2628 case XC( OC_COMPARE ):
[821]2629 if (is_numeric(L.v) && is_numeric(R.v)) {
2630 L.d = getvar_i(L.v) - getvar_i(R.v);
2631 } else {
2632 L.s = getvar_s(L.v);
2633 R.s = getvar_s(R.v);
2634 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2635 }
2636 switch (opn & 0xfe) {
[1770]2637 case 0:
[821]2638 R.i = (L.d > 0);
2639 break;
[1770]2640 case 2:
[821]2641 R.i = (L.d >= 0);
2642 break;
[1770]2643 case 4:
[821]2644 R.i = (L.d == 0);
2645 break;
2646 }
2647 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2648 break;
2649
[1770]2650 default:
2651 syntax_error(EMSG_POSSIBLE_ERROR);
[821]2652 }
2653 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2654 op = op->a.n;
2655 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2656 break;
2657 if (nextrec)
2658 break;
2659 }
2660 nvfree(v1);
2661 return res;
[1770]2662#undef fnargs
2663#undef seed
2664#undef sreg
[821]2665}
2666
2667
2668/* -------- main & co. -------- */
2669
2670static int awk_exit(int r)
2671{
[1770]2672 var tv;
2673 unsigned i;
[821]2674 hash_item *hi;
2675
[1770]2676 zero_out_var(&tv);
2677
2678 if (!exiting) {
[821]2679 exiting = TRUE;
2680 nextrec = FALSE;
2681 evaluate(endseq.first, &tv);
2682 }
2683
2684 /* waiting for children */
[1770]2685 for (i = 0; i < fdhash->csize; i++) {
[821]2686 hi = fdhash->items[i];
[1770]2687 while (hi) {
[821]2688 if (hi->data.rs.F && hi->data.rs.is_pipe)
2689 pclose(hi->data.rs.F);
2690 hi = hi->next;
2691 }
2692 }
2693
2694 exit(r);
2695}
2696
2697/* if expr looks like "var=value", perform assignment and return 1,
2698 * otherwise return 0 */
2699static int is_assignment(const char *expr)
2700{
2701 char *exprc, *s, *s0, *s1;
2702
[1770]2703 exprc = xstrdup(expr);
[821]2704 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2705 free(exprc);
2706 return FALSE;
2707 }
2708
2709 *(s++) = '\0';
2710 s0 = s1 = s;
2711 while (*s)
2712 *(s1++) = nextchar(&s);
2713
2714 *s1 = '\0';
2715 setvar_u(newvar(exprc), s0);
2716 free(exprc);
2717 return TRUE;
2718}
2719
2720/* switch to next input file */
2721static rstream *next_input_file(void)
2722{
[1770]2723#define rsm (G.next_input_file__rsm)
2724#define files_happen (G.next_input_file__files_happen)
2725
[821]2726 FILE *F = NULL;
[1770]2727 const char *fname, *ind;
[821]2728
2729 if (rsm.F) fclose(rsm.F);
2730 rsm.F = NULL;
2731 rsm.pos = rsm.adv = 0;
2732
2733 do {
[1770]2734 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
[821]2735 if (files_happen)
2736 return NULL;
2737 fname = "-";
2738 F = stdin;
2739 } else {
[1770]2740 ind = getvar_s(incvar(intvar[ARGIND]));
2741 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
[821]2742 if (fname && *fname && !is_assignment(fname))
2743 F = afopen(fname, "r");
2744 }
2745 } while (!F);
2746
2747 files_happen = TRUE;
[1770]2748 setvar_s(intvar[FILENAME], fname);
[821]2749 rsm.F = F;
2750 return &rsm;
[1770]2751#undef rsm
2752#undef files_happen
[821]2753}
2754
[1770]2755int awk_main(int argc, char **argv);
[821]2756int awk_main(int argc, char **argv)
2757{
[1770]2758 unsigned opt;
2759 char *opt_F, *opt_W;
2760 llist_t *opt_v = NULL;
2761 int i, j, flen;
[821]2762 var *v;
[1770]2763 var tv;
[821]2764 char **envp;
[1770]2765 char *vnames = (char *)vNames; /* cheat */
2766 char *vvalues = (char *)vValues;
[821]2767
[1770]2768 INIT_G();
2769
2770 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2771 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2772 if (ENABLE_LOCALE_SUPPORT)
2773 setlocale(LC_NUMERIC, "C");
2774
2775 zero_out_var(&tv);
2776
[821]2777 /* allocate global buffer */
[1770]2778 g_buf = xmalloc(MAXVARFMT + 1);
[821]2779
2780 vhash = hash_init();
2781 ahash = hash_init();
2782 fdhash = hash_init();
2783 fnhash = hash_init();
2784
2785 /* initialize variables */
[1770]2786 for (i = 0; *vnames; i++) {
2787 intvar[i] = v = newvar(nextword(&vnames));
2788 if (*vvalues != '\377')
2789 setvar_s(v, nextword(&vvalues));
[821]2790 else
2791 setvar_i(v, 0);
2792
[1770]2793 if (*vnames == '*') {
[821]2794 v->type |= VF_SPECIAL;
[1770]2795 vnames++;
[821]2796 }
2797 }
2798
[1770]2799 handle_special(intvar[FS]);
2800 handle_special(intvar[RS]);
[821]2801
[1770]2802 newfile("/dev/stdin")->F = stdin;
2803 newfile("/dev/stdout")->F = stdout;
2804 newfile("/dev/stderr")->F = stderr;
[821]2805
[1770]2806 /* Huh, people report that sometimes environ is NULL. Oh well. */
2807 if (environ) for (envp = environ; *envp; envp++) {
2808 /* environ is writable, thus we don't strdup it needlessly */
2809 char *s = *envp;
2810 char *s1 = strchr(s, '=');
2811 if (s1) {
2812 *s1 = '\0';
2813 /* Both findvar and setvar_u take const char*
2814 * as 2nd arg -> environment is not trashed */
2815 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2816 *s1 = '=';
[821]2817 }
2818 }
[1770]2819 opt_complementary = "v::";
2820 opt = getopt32(argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
2821 argv += optind;
2822 argc -= optind;
2823 if (opt & 0x1)
2824 setvar_s(intvar[FS], opt_F); // -F
2825 while (opt_v) { /* -v */
2826 if (!is_assignment(llist_pop(&opt_v)))
2827 bb_show_usage();
2828 }
2829 if (opt & 0x4) { // -f
2830 char *s = s; /* die, gcc, die */
2831 FILE *from_file = afopen(g_progname, "r");
2832 /* one byte is reserved for some trick in next_token */
2833 if (fseek(from_file, 0, SEEK_END) == 0) {
2834 flen = ftell(from_file);
2835 s = xmalloc(flen + 4);
2836 fseek(from_file, 0, SEEK_SET);
2837 i = 1 + fread(s + 1, 1, flen, from_file);
2838 } else {
2839 for (i = j = 1; j > 0; i += j) {
2840 s = xrealloc(s, i + 4096);
2841 j = fread(s + i, 1, 4094, from_file);
2842 }
[821]2843 }
[1770]2844 s[i] = '\0';
2845 fclose(from_file);
2846 parse_program(s + 1);
2847 free(s);
2848 } else { // no -f: take program from 1st parameter
2849 if (!argc)
[821]2850 bb_show_usage();
[1770]2851 g_progname = "cmd. line";
2852 parse_program(*argv++);
2853 argc--;
[821]2854 }
[1770]2855 if (opt & 0x8) // -W
2856 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
[821]2857
2858 /* fill in ARGV array */
[1770]2859 setvar_i(intvar[ARGC], argc + 1);
2860 setari_u(intvar[ARGV], 0, "awk");
2861 i = 0;
2862 while (*argv)
2863 setari_u(intvar[ARGV], ++i, *argv++);
[821]2864
2865 evaluate(beginseq.first, &tv);
[1770]2866 if (!mainseq.first && !endseq.first)
[821]2867 awk_exit(EXIT_SUCCESS);
2868
2869 /* input file could already be opened in BEGIN block */
[1770]2870 if (!iF) iF = next_input_file();
[821]2871
2872 /* passing through input files */
2873 while (iF) {
2874 nextfile = FALSE;
[1770]2875 setvar_i(intvar[FNR], 0);
[821]2876
[1770]2877 while ((i = awk_getline(iF, intvar[F0])) > 0) {
[821]2878 nextrec = FALSE;
[1770]2879 incvar(intvar[NR]);
2880 incvar(intvar[FNR]);
[821]2881 evaluate(mainseq.first, &tv);
2882
2883 if (nextfile)
2884 break;
2885 }
2886
[1770]2887 if (i < 0)
2888 syntax_error(strerror(errno));
[821]2889
2890 iF = next_input_file();
2891 }
2892
2893 awk_exit(EXIT_SUCCESS);
[1770]2894 /*return 0;*/
[821]2895}
Note: See TracBrowser for help on using the repository browser.