Changeset 3232 in MondoRescue for branches/3.2/mindi-busybox/editors/awk.c
- Timestamp:
- Jan 1, 2014, 12:47:38 AM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/3.2/mindi-busybox/editors/awk.c
r2725 r3232 8 8 */ 9 9 10 //usage:#define awk_trivial_usage 11 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..." 12 //usage:#define awk_full_usage "\n\n" 13 //usage: " -v VAR=VAL Set variable" 14 //usage: "\n -F SEP Use SEP as field separator" 15 //usage: "\n -f FILE Read program from FILE" 16 10 17 #include "libbb.h" 11 18 #include "xregex.h" … … 19 26 #define debug_printf_walker(...) do {} while (0) 20 27 #define debug_printf_eval(...) do {} while (0) 28 #define debug_printf_parse(...) do {} while (0) 21 29 22 30 #ifndef debug_printf_walker … … 25 33 #ifndef debug_printf_eval 26 34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__)) 35 #endif 36 #ifndef debug_printf_parse 37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__)) 27 38 #endif 28 39 … … 145 156 /* simple token classes */ 146 157 /* Order and hex values are very important!!! See next_token() */ 147 #define TC_SEQSTART 1/* ( */158 #define TC_SEQSTART 1 /* ( */ 148 159 #define TC_SEQTERM (1 << 1) /* ) */ 149 160 #define TC_REGEXP (1 << 2) /* /.../ */ … … 232 243 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string 233 244 */ 245 #undef P 246 #undef PRIMASK 247 #undef PRIMASK2 234 248 #define P(x) (x << 24) 235 249 #define PRIMASK 0x7F000000 … … 426 440 smallint nextfile; 427 441 smallint is_f0_split; 442 smallint t_rollback; 428 443 }; 429 444 struct globals2 { … … 432 447 char *t_string; 433 448 int t_lineno; 434 int t_rollback;435 449 436 450 var *intvar[NUM_INTERNAL_VARS]; /* often used */ … … 490 504 #define nextfile (G1.nextfile ) 491 505 #define is_f0_split (G1.is_f0_split ) 506 #define t_rollback (G1.t_rollback ) 492 507 #define t_info (G.t_info ) 493 508 #define t_tclass (G.t_tclass ) 494 509 #define t_string (G.t_string ) 495 510 #define t_lineno (G.t_lineno ) 496 #define t_rollback (G.t_rollback )497 511 #define intvar (G.intvar ) 498 512 #define fsplitter (G.fsplitter ) … … 683 697 if (c == '\\') 684 698 c = bb_process_escape_sequence((const char**)s); 699 /* Example awk statement: 700 * s = "abc\"def" 701 * we must treat \" as " 702 */ 685 703 if (c == '\\' && *s == pps) { /* unrecognized \z? */ 686 704 c = *(*s); /* yes, fetch z */ … … 689 707 } 690 708 return c; 709 } 710 711 /* TODO: merge with strcpy_and_process_escape_sequences()? 712 */ 713 static void unescape_string_in_place(char *s1) 714 { 715 char *s = s1; 716 while ((*s1 = nextchar(&s)) != '\0') 717 s1++; 691 718 } 692 719 … … 1002 1029 if (*p == '\0') { 1003 1030 tc = TC_EOF; 1031 debug_printf_parse("%s: token found: TC_EOF\n", __func__); 1004 1032 1005 1033 } else if (*p == '\"') { … … 1017 1045 *s = '\0'; 1018 1046 tc = TC_STRING; 1047 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string); 1019 1048 1020 1049 } else if ((expected & TC_REGEXP) && *p == '/') { … … 1039 1068 *s = '\0'; 1040 1069 tc = TC_REGEXP; 1070 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string); 1041 1071 1042 1072 } else if (*p == '.' || isdigit(*p)) { … … 1048 1078 syntax_error(EMSG_UNEXP_TOKEN); 1049 1079 tc = TC_NUMBER; 1080 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); 1050 1081 1051 1082 } else { … … 1070 1101 /* then this is what we are looking for */ 1071 1102 t_info = *ti; 1103 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info); 1072 1104 p += l; 1073 1105 goto token_found; … … 1093 1125 if (*p == '(') { 1094 1126 tc = TC_FUNCTION; 1127 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string); 1095 1128 } else { 1096 1129 if (*p == '[') { 1097 1130 p++; 1098 1131 tc = TC_ARRAY; 1099 } 1100 } 1101 token_found: ; 1102 } 1132 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); 1133 } else 1134 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); 1135 } 1136 } 1137 token_found: 1103 1138 g_pos = p; 1104 1139 … … 1172 1207 var *v; 1173 1208 1209 debug_printf_parse("%s(%x)\n", __func__, iexp); 1210 1174 1211 sn.info = PRIMASK; 1175 1212 sn.r.n = glptr = NULL; … … 1180 1217 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) { 1181 1218 /* input redirection (<) attached to glptr node */ 1219 debug_printf_parse("%s: input redir\n", __func__); 1182 1220 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); 1183 1221 cn->a.n = glptr; … … 1186 1224 1187 1225 } else if (tc & (TC_BINOP | TC_UOPPOST)) { 1226 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__); 1188 1227 /* for binary and postfix-unary operators, jump back over 1189 1228 * previous operators with higher priority */ … … 1215 1254 1216 1255 } else { 1256 debug_printf_parse("%s: other\n", __func__); 1217 1257 /* for operands and prefix-unary operators, attach them 1218 1258 * to last node */ … … 1222 1262 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; 1223 1263 if (tc & (TC_OPERAND | TC_REGEXP)) { 1264 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__); 1224 1265 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp; 1225 1266 /* one should be very careful with switch on tclass - … … 1228 1269 case TC_VARIABLE: 1229 1270 case TC_ARRAY: 1271 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); 1230 1272 cn->info = OC_VAR; 1231 1273 v = hash_search(ahash, t_string); … … 1244 1286 case TC_NUMBER: 1245 1287 case TC_STRING: 1288 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); 1246 1289 cn->info = OC_VAR; 1247 1290 v = cn->l.v = xzalloc(sizeof(var)); … … 1253 1296 1254 1297 case TC_REGEXP: 1298 debug_printf_parse("%s: TC_REGEXP\n", __func__); 1255 1299 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); 1256 1300 break; 1257 1301 1258 1302 case TC_FUNCTION: 1303 debug_printf_parse("%s: TC_FUNCTION\n", __func__); 1259 1304 cn->info = OC_FUNC; 1260 1305 cn->r.f = newfunc(t_string); … … 1263 1308 1264 1309 case TC_SEQSTART: 1310 debug_printf_parse("%s: TC_SEQSTART\n", __func__); 1265 1311 cn = vn->r.n = parse_expr(TC_SEQTERM); 1312 if (!cn) 1313 syntax_error("Empty sequence"); 1266 1314 cn->a.n = vn; 1267 1315 break; 1268 1316 1269 1317 case TC_GETLINE: 1318 debug_printf_parse("%s: TC_GETLINE\n", __func__); 1270 1319 glptr = cn; 1271 1320 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; … … 1273 1322 1274 1323 case TC_BUILTIN: 1324 debug_printf_parse("%s: TC_BUILTIN\n", __func__); 1275 1325 cn->l.n = condition(); 1276 1326 break; … … 1279 1329 } 1280 1330 } 1331 1332 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n); 1281 1333 return sn.r.n; 1282 1334 } … … 1347 1399 1348 1400 if (c & TC_GRPSTART) { 1401 debug_printf_parse("%s: TC_GRPSTART\n", __func__); 1349 1402 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { 1403 debug_printf_parse("%s: !TC_GRPTERM\n", __func__); 1350 1404 if (t_tclass & TC_NEWLINE) 1351 1405 continue; … … 1353 1407 chain_group(); 1354 1408 } 1409 debug_printf_parse("%s: TC_GRPTERM\n", __func__); 1355 1410 } else if (c & (TC_OPSEQ | TC_OPTERM)) { 1411 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__); 1356 1412 rollback_token(); 1357 1413 chain_expr(OC_EXEC | Vx); 1358 } else { /* TC_STATEMNT */ 1414 } else { 1415 /* TC_STATEMNT */ 1416 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__); 1359 1417 switch (t_info & OPCLSMASK) { 1360 1418 case ST_IF: 1419 debug_printf_parse("%s: ST_IF\n", __func__); 1361 1420 n = chain_node(OC_BR | Vx); 1362 1421 n->l.n = condition(); … … 1373 1432 1374 1433 case ST_WHILE: 1434 debug_printf_parse("%s: ST_WHILE\n", __func__); 1375 1435 n2 = condition(); 1376 1436 n = chain_loop(NULL); … … 1379 1439 1380 1440 case ST_DO: 1441 debug_printf_parse("%s: ST_DO\n", __func__); 1381 1442 n2 = chain_node(OC_EXEC); 1382 1443 n = chain_loop(NULL); … … 1387 1448 1388 1449 case ST_FOR: 1450 debug_printf_parse("%s: ST_FOR\n", __func__); 1389 1451 next_token(TC_SEQSTART); 1390 1452 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); … … 1412 1474 case OC_PRINT: 1413 1475 case OC_PRINTF: 1476 debug_printf_parse("%s: OC_PRINT[F]\n", __func__); 1414 1477 n = chain_node(t_info); 1415 1478 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); … … 1423 1486 1424 1487 case OC_BREAK: 1488 debug_printf_parse("%s: OC_BREAK\n", __func__); 1425 1489 n = chain_node(OC_EXEC); 1426 1490 n->a.n = break_ptr; … … 1428 1492 1429 1493 case OC_CONTINUE: 1494 debug_printf_parse("%s: OC_CONTINUE\n", __func__); 1430 1495 n = chain_node(OC_EXEC); 1431 1496 n->a.n = continue_ptr; … … 1434 1499 /* delete, next, nextfile, return, exit */ 1435 1500 default: 1501 debug_printf_parse("%s: default\n", __func__); 1436 1502 chain_expr(t_info); 1437 1503 } … … 1451 1517 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { 1452 1518 1453 if (tclass & TC_OPTERM) 1519 if (tclass & TC_OPTERM) { 1520 debug_printf_parse("%s: TC_OPTERM\n", __func__); 1454 1521 continue; 1522 } 1455 1523 1456 1524 seq = &mainseq; 1457 1525 if (tclass & TC_BEGIN) { 1526 debug_printf_parse("%s: TC_BEGIN\n", __func__); 1458 1527 seq = &beginseq; 1459 1528 chain_group(); 1460 1529 1461 1530 } else if (tclass & TC_END) { 1531 debug_printf_parse("%s: TC_END\n", __func__); 1462 1532 seq = &endseq; 1463 1533 chain_group(); 1464 1534 1465 1535 } else if (tclass & TC_FUNCDECL) { 1536 debug_printf_parse("%s: TC_FUNCDECL\n", __func__); 1466 1537 next_token(TC_FUNCTION); 1467 1538 g_pos++; … … 1481 1552 1482 1553 } else if (tclass & TC_OPSEQ) { 1554 debug_printf_parse("%s: TC_OPSEQ\n", __func__); 1483 1555 rollback_token(); 1484 1556 cn = chain_node(OC_TEST); 1485 1557 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); 1486 1558 if (t_tclass & TC_GRPSTART) { 1559 debug_printf_parse("%s: TC_GRPSTART\n", __func__); 1487 1560 rollback_token(); 1488 1561 chain_group(); 1489 1562 } else { 1563 debug_printf_parse("%s: !TC_GRPSTART\n", __func__); 1490 1564 chain_node(OC_PRINT); 1491 1565 } … … 1493 1567 1494 1568 } else /* if (tclass & TC_GRPSTART) */ { 1569 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__); 1495 1570 rollback_token(); 1496 1571 chain_group(); 1497 1572 } 1498 1573 } 1574 debug_printf_parse("%s: TC_EOF\n", __func__); 1499 1575 } 1500 1576 … … 1737 1813 1738 1814 } else if (v == intvar[FS]) { 1815 /* 1816 * The POSIX-2008 standard says that changing FS should have no effect on the 1817 * current input line, but only on the next one. The language is: 1818 * 1819 * > Before the first reference to a field in the record is evaluated, the record 1820 * > shall be split into fields, according to the rules in Regular Expressions, 1821 * > using the value of FS that was current at the time the record was read. 1822 * 1823 * So, split up current line before assignment to FS: 1824 */ 1825 split_f0(); 1826 1739 1827 mk_splitter(getvar_s(v), &fsplitter); 1740 1828 … … 2621 2709 } 2622 2710 2623 if (!rsm ->F) {2711 if (!rsm || !rsm->F) { 2624 2712 setvar_i(intvar[ERRNO], errno); 2625 2713 setvar_i(res, -1); … … 2930 3018 static int is_assignment(const char *expr) 2931 3019 { 2932 char *exprc, *val , *s, *s1;3020 char *exprc, *val; 2933 3021 2934 3022 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { … … 2940 3028 *val++ = '\0'; 2941 3029 2942 s = s1 = val; 2943 while ((*s1 = nextchar(&s)) != '\0') 2944 s1++; 2945 3030 unescape_string_in_place(val); 2946 3031 setvar_u(newvar(exprc), val); 2947 3032 free(exprc); … … 2955 3040 #define files_happen (G.next_input_file__files_happen) 2956 3041 2957 FILE *F = NULL;3042 FILE *F; 2958 3043 const char *fname, *ind; 2959 3044 … … 2963 3048 rsm.pos = rsm.adv = 0; 2964 3049 2965 do{3050 for (;;) { 2966 3051 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { 2967 3052 if (files_happen) … … 2969 3054 fname = "-"; 2970 3055 F = stdin; 2971 } else { 2972 ind = getvar_s(incvar(intvar[ARGIND])); 2973 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); 2974 if (fname && *fname && !is_assignment(fname)) 2975 F = xfopen_stdin(fname); 2976 } 2977 } while (!F); 3056 break; 3057 } 3058 ind = getvar_s(incvar(intvar[ARGIND])); 3059 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); 3060 if (fname && *fname && !is_assignment(fname)) { 3061 F = xfopen_stdin(fname); 3062 break; 3063 } 3064 } 2978 3065 2979 3066 files_happen = TRUE; … … 2989 3076 { 2990 3077 unsigned opt; 2991 char *opt_F , *opt_W;3078 char *opt_F; 2992 3079 llist_t *list_v = NULL; 2993 3080 llist_t *list_f = NULL; … … 3051 3138 } 3052 3139 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */ 3053 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);3140 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL); 3054 3141 argv += optind; 3055 3142 argc -= optind; 3056 if (opt & 0x1) 3057 setvar_s(intvar[FS], opt_F); // -F 3143 if (opt & 0x1) { /* -F */ 3144 unescape_string_in_place(opt_F); 3145 setvar_s(intvar[FS], opt_F); 3146 } 3058 3147 while (list_v) { /* -v */ 3059 3148 if (!is_assignment(llist_pop(&list_v))) … … 3085 3174 } 3086 3175 if (opt & 0x8) // -W 3087 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);3176 bb_error_msg("warning: option -W is ignored"); 3088 3177 3089 3178 /* fill in ARGV array */
Note:
See TracChangeset
for help on using the changeset viewer.