Changeset 2725 in MondoRescue for branches/2.2.9/mindi-busybox/editors/diff.c
- Timestamp:
- Feb 25, 2011, 9:26:54 PM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2.2.9/mindi-busybox/editors/diff.c
r1765 r2725 3 3 * Mini diff implementation for busybox, adapted from OpenBSD diff. 4 4 * 5 * Copyright (C) 2010 by Matheus Izvekov <mizvekov@gmail.com> 5 6 * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com> 6 7 * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> … … 10 11 * Materiel Command, USAF, under agreement number F39502-99-1-0512. 11 12 * 12 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.13 * Licensed under GPLv2 or later, see file LICENSE in this source tree. 13 14 */ 14 15 #include "libbb.h"16 17 #define FSIZE_MAX 3276818 19 /*20 * Output flags21 */22 #define D_HEADER 1 /* Print a header/footer between files */23 #define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */24 #define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */25 26 /*27 * Status values for print_status() and diffreg() return values28 * Guide:29 * D_SAME - files are the same30 * D_DIFFER - files differ31 * D_BINARY - binary files differ32 * D_COMMON - subdirectory common to both dirs33 * D_ONLY - file only exists in one dir34 * D_MISMATCH1 - path1 a dir, path2 a file35 * D_MISMATCH2 - path1 a file, path2 a dir36 * D_ERROR - error occurred37 * D_SKIPPED1 - skipped path1 as it is a special file38 * D_SKIPPED2 - skipped path2 as it is a special file39 */40 41 #define D_SAME 042 #define D_DIFFER (1<<0)43 #define D_BINARY (1<<1)44 #define D_COMMON (1<<2)45 #define D_ONLY (1<<3)46 #define D_MISMATCH1 (1<<4)47 #define D_MISMATCH2 (1<<5)48 #define D_ERROR (1<<6)49 #define D_SKIPPED1 (1<<7)50 #define D_SKIPPED2 (1<<8)51 52 /* Command line options */53 #define FLAG_a (1<<0)54 #define FLAG_b (1<<1)55 #define FLAG_d (1<<2)56 #define FLAG_i (1<<3)57 #define FLAG_L (1<<4)58 #define FLAG_N (1<<5)59 #define FLAG_q (1<<6)60 #define FLAG_r (1<<7)61 #define FLAG_s (1<<8)62 #define FLAG_S (1<<9)63 #define FLAG_t (1<<10)64 #define FLAG_T (1<<11)65 #define FLAG_U (1<<12)66 #define FLAG_w (1<<13)67 68 #define g_read_buf bb_common_bufsiz169 70 struct cand {71 int x;72 int y;73 int pred;74 };75 76 struct line {77 int serial;78 int value;79 };80 81 /*82 * The following struct is used to record change information83 * doing a "context" or "unified" diff. (see routine "change" to84 * understand the highly mnemonic field names)85 */86 struct context_vec {87 int a; /* start line in old file */88 int b; /* end line in old file */89 int c; /* start line in new file */90 int d; /* end line in new file */91 };92 93 struct globals {94 USE_FEATURE_DIFF_DIR(char **dl;)95 USE_FEATURE_DIFF_DIR(int dl_count;)96 /* This is the default number of lines of context. */97 int context;98 size_t max_context;99 int status;100 char *start;101 const char *label1;102 const char *label2;103 struct line *file[2];104 int *J; /* will be overlaid on class */105 int *class; /* will be overlaid on file[0] */106 int *klist; /* will be overlaid on file[0] after class */107 int *member; /* will be overlaid on file[1] */108 int clen;109 int len[2];110 int pref, suff; /* length of prefix and suffix */111 int slen[2];112 bool anychange;113 long *ixnew; /* will be overlaid on file[1] */114 long *ixold; /* will be overlaid on klist */115 struct cand *clist; /* merely a free storage pot for candidates */116 int clistlen; /* the length of clist */117 struct line *sfile[2]; /* shortened by pruning common prefix/suffix */118 struct context_vec *context_vec_start;119 struct context_vec *context_vec_end;120 struct context_vec *context_vec_ptr;121 struct stat stb1, stb2;122 };123 #define G (*ptr_to_globals)124 #define dl (G.dl )125 #define dl_count (G.dl_count )126 #define context (G.context )127 #define max_context (G.max_context )128 #define status (G.status )129 #define start (G.start )130 #define label1 (G.label1 )131 #define label2 (G.label2 )132 #define file (G.file )133 #define J (G.J )134 #define class (G.class )135 #define klist (G.klist )136 #define member (G.member )137 #define clen (G.clen )138 #define len (G.len )139 #define pref (G.pref )140 #define suff (G.suff )141 #define slen (G.slen )142 #define anychange (G.anychange )143 #define ixnew (G.ixnew )144 #define ixold (G.ixold )145 #define clist (G.clist )146 #define clistlen (G.clistlen )147 #define sfile (G.sfile )148 #define context_vec_start (G.context_vec_start )149 #define context_vec_end (G.context_vec_end )150 #define context_vec_ptr (G.context_vec_ptr )151 #define stb1 (G.stb1 )152 #define stb2 (G.stb2 )153 #define INIT_G() do { \154 PTR_TO_GLOBALS = xzalloc(sizeof(G)); \155 context = 3; \156 max_context = 64; \157 } while (0)158 159 160 static void print_only(const char *path, size_t dirlen, const char *entry)161 {162 if (dirlen > 1)163 dirlen--;164 printf("Only in %.*s: %s\n", (int) dirlen, path, entry);165 }166 167 static void print_status(int val, char *path1, char *path2, char *entry)168 {169 const char *const _entry = entry ? entry : "";170 char * const _path1 = entry ? concat_path_file(path1, _entry) : path1;171 char * const _path2 = entry ? concat_path_file(path2, _entry) : path2;172 173 switch (val) {174 case D_ONLY:175 print_only(path1, strlen(path1), entry);176 break;177 case D_COMMON:178 printf("Common subdirectories: %s and %s\n", _path1, _path2);179 break;180 case D_BINARY:181 printf("Binary files %s and %s differ\n", _path1, _path2);182 break;183 case D_DIFFER:184 if (option_mask32 & FLAG_q)185 printf("Files %s and %s differ\n", _path1, _path2);186 break;187 case D_SAME:188 if (option_mask32 & FLAG_s)189 printf("Files %s and %s are identical\n", _path1, _path2);190 break;191 case D_MISMATCH1:192 printf("File %s is a %s while file %s is a %s\n",193 _path1, "directory", _path2, "regular file");194 break;195 case D_MISMATCH2:196 printf("File %s is a %s while file %s is a %s\n",197 _path1, "regular file", _path2, "directory");198 break;199 case D_SKIPPED1:200 printf("File %s is not a regular file or directory and was skipped\n",201 _path1);202 break;203 case D_SKIPPED2:204 printf("File %s is not a regular file or directory and was skipped\n",205 _path2);206 break;207 }208 if (entry) {209 free(_path1);210 free(_path2);211 }212 }213 static ALWAYS_INLINE int fiddle_sum(int sum, int t)214 {215 return sum * 127 + t;216 }217 /*218 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.219 */220 static int readhash(FILE *fp)221 {222 int i, t, space;223 int sum;224 225 sum = 1;226 space = 0;227 if (!(option_mask32 & (FLAG_b | FLAG_w))) {228 for (i = 0; (t = getc(fp)) != '\n'; i++) {229 if (t == EOF) {230 if (i == 0)231 return 0;232 break;233 }234 sum = fiddle_sum(sum, t);235 }236 } else {237 for (i = 0;;) {238 switch (t = getc(fp)) {239 case '\t':240 case '\r':241 case '\v':242 case '\f':243 case ' ':244 space++;245 continue;246 default:247 if (space && !(option_mask32 & FLAG_w)) {248 i++;249 space = 0;250 }251 sum = fiddle_sum(sum, t);252 i++;253 continue;254 case EOF:255 if (i == 0)256 return 0;257 /* FALLTHROUGH */258 case '\n':259 break;260 }261 break;262 }263 }264 /*265 * There is a remote possibility that we end up with a zero sum.266 * Zero is used as an EOF marker, so return 1 instead.267 */268 return (sum == 0 ? 1 : sum);269 }270 271 272 /*273 * Check to see if the given files differ.274 * Returns 0 if they are the same, 1 if different, and -1 on error.275 */276 static int files_differ(FILE *f1, FILE *f2, int flags)277 {278 size_t i, j;279 280 if ((flags & (D_EMPTY1 | D_EMPTY2)) || stb1.st_size != stb2.st_size281 || (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)282 ) {283 return 1;284 }285 while (1) {286 i = fread(g_read_buf, 1, COMMON_BUFSIZE/2, f1);287 j = fread(g_read_buf + COMMON_BUFSIZE/2, 1, COMMON_BUFSIZE/2, f2);288 if (i != j)289 return 1;290 if (i == 0)291 return (ferror(f1) || ferror(f2));292 if (memcmp(g_read_buf,293 g_read_buf + COMMON_BUFSIZE/2, i) != 0)294 return 1;295 }296 }297 298 299 static void prepare(int i, FILE *fp /*, off_t filesize*/)300 {301 struct line *p;302 int h;303 size_t j, sz;304 305 rewind(fp);306 307 /*sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;*/308 /*if (sz < 100)*/309 sz = 100;310 311 p = xmalloc((sz + 3) * sizeof(p[0]));312 j = 0;313 while ((h = readhash(fp))) {314 if (j == sz) {315 sz = sz * 3 / 2;316 p = xrealloc(p, (sz + 3) * sizeof(p[0]));317 }318 p[++j].value = h;319 }320 len[i] = j;321 file[i] = p;322 }323 324 325 static void prune(void)326 {327 int i, j;328 329 for (pref = 0; pref < len[0] && pref < len[1] &&330 file[0][pref + 1].value == file[1][pref + 1].value; pref++)331 continue;332 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&333 file[0][len[0] - suff].value == file[1][len[1] - suff].value;334 suff++)335 continue;336 for (j = 0; j < 2; j++) {337 sfile[j] = file[j] + pref;338 slen[j] = len[j] - pref - suff;339 for (i = 0; i <= slen[j]; i++)340 sfile[j][i].serial = i;341 }342 }343 344 345 static void equiv(struct line *a, int n, struct line *b, int m, int *c)346 {347 int i, j;348 349 i = j = 1;350 while (i <= n && j <= m) {351 if (a[i].value < b[j].value)352 a[i++].value = 0;353 else if (a[i].value == b[j].value)354 a[i++].value = j;355 else356 j++;357 }358 while (i <= n)359 a[i++].value = 0;360 b[m + 1].value = 0;361 j = 0;362 while (++j <= m) {363 c[j] = -b[j].serial;364 while (b[j + 1].value == b[j].value) {365 j++;366 c[j] = b[j].serial;367 }368 }369 c[j] = -1;370 }371 372 373 static int isqrt(int n)374 {375 int y, x;376 377 if (n == 0)378 return 0;379 x = 1;380 do {381 y = x;382 x = n / x;383 x += y;384 x /= 2;385 } while ((x - y) > 1 || (x - y) < -1);386 387 return x;388 }389 390 391 static int newcand(int x, int y, int pred)392 {393 struct cand *q;394 395 if (clen == clistlen) {396 clistlen = clistlen * 11 / 10;397 clist = xrealloc(clist, clistlen * sizeof(struct cand));398 }399 q = clist + clen;400 q->x = x;401 q->y = y;402 q->pred = pred;403 return clen++;404 }405 406 407 static int search(int *c, int k, int y)408 {409 int i, j, l, t;410 411 if (clist[c[k]].y < y) /* quick look for typical case */412 return k + 1;413 i = 0;414 j = k + 1;415 while (1) {416 l = i + j;417 if ((l >>= 1) <= i)418 break;419 t = clist[c[l]].y;420 if (t > y)421 j = l;422 else if (t < y)423 i = l;424 else425 return l;426 }427 return l + 1;428 }429 430 431 static int stone(int *a, int n, int *b, int *c)432 {433 int i, k, y, j, l;434 int oldc, tc, oldl;435 unsigned int numtries;436 437 #if ENABLE_FEATURE_DIFF_MINIMAL438 const unsigned int bound =439 (option_mask32 & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));440 #else441 const unsigned int bound = MAX(256, isqrt(n));442 #endif443 k = 0;444 c[0] = newcand(0, 0, 0);445 for (i = 1; i <= n; i++) {446 j = a[i];447 if (j == 0)448 continue;449 y = -b[j];450 oldl = 0;451 oldc = c[0];452 numtries = 0;453 do {454 if (y <= clist[oldc].y)455 continue;456 l = search(c, k, y);457 if (l != oldl + 1)458 oldc = c[l - 1];459 if (l <= k) {460 if (clist[c[l]].y <= y)461 continue;462 tc = c[l];463 c[l] = newcand(i, y, oldc);464 oldc = tc;465 oldl = l;466 numtries++;467 } else {468 c[l] = newcand(i, y, oldc);469 k++;470 break;471 }472 } while ((y = b[++j]) > 0 && numtries < bound);473 }474 return k;475 }476 477 478 static void unravel(int p)479 {480 struct cand *q;481 int i;482 483 for (i = 0; i <= len[0]; i++)484 J[i] = i <= pref ? i : i > len[0] - suff ? i + len[1] - len[0] : 0;485 for (q = clist + p; q->y != 0; q = clist + q->pred)486 J[q->x + pref] = q->y + pref;487 }488 489 490 static void unsort(struct line *f, int l, int *b)491 {492 int *a, i;493 494 a = xmalloc((l + 1) * sizeof(int));495 for (i = 1; i <= l; i++)496 a[f[i].serial] = f[i].value;497 for (i = 1; i <= l; i++)498 b[i] = a[i];499 free(a);500 }501 502 503 static int skipline(FILE * f)504 {505 int i, c;506 507 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)508 continue;509 return i;510 }511 512 513 /*514 * Check does double duty:515 * 1. ferret out any fortuitous correspondences due516 * to confounding by hashing (which result in "jackpot")517 * 2. collect random access indexes to the two files518 */519 static void check(FILE * f1, FILE * f2)520 {521 int i, j, jackpot, c, d;522 long ctold, ctnew;523 524 rewind(f1);525 rewind(f2);526 j = 1;527 ixold[0] = ixnew[0] = 0;528 jackpot = 0;529 ctold = ctnew = 0;530 for (i = 1; i <= len[0]; i++) {531 if (J[i] == 0) {532 ixold[i] = ctold += skipline(f1);533 continue;534 }535 while (j < J[i]) {536 ixnew[j] = ctnew += skipline(f2);537 j++;538 }539 if ((option_mask32 & FLAG_b) || (option_mask32 & FLAG_w)540 || (option_mask32 & FLAG_i)) {541 while (1) {542 c = getc(f1);543 d = getc(f2);544 /*545 * GNU diff ignores a missing newline546 * in one file if bflag || wflag.547 */548 if (((option_mask32 & FLAG_b) || (option_mask32 & FLAG_w)) &&549 ((c == EOF && d == '\n') || (c == '\n' && d == EOF))) {550 break;551 }552 ctold++;553 ctnew++;554 if ((option_mask32 & FLAG_b) && isspace(c) && isspace(d)) {555 do {556 if (c == '\n')557 break;558 ctold++;559 } while (isspace(c = getc(f1)));560 do {561 if (d == '\n')562 break;563 ctnew++;564 } while (isspace(d = getc(f2)));565 } else if (option_mask32 & FLAG_w) {566 while (isspace(c) && c != '\n') {567 c = getc(f1);568 ctold++;569 }570 while (isspace(d) && d != '\n') {571 d = getc(f2);572 ctnew++;573 }574 }575 if (c != d) {576 jackpot++;577 J[i] = 0;578 if (c != '\n' && c != EOF)579 ctold += skipline(f1);580 if (d != '\n' && c != EOF)581 ctnew += skipline(f2);582 break;583 }584 if (c == '\n' || c == EOF)585 break;586 }587 } else {588 while (1) {589 ctold++;590 ctnew++;591 if ((c = getc(f1)) != (d = getc(f2))) {592 J[i] = 0;593 if (c != '\n' && c != EOF)594 ctold += skipline(f1);595 if (d != '\n' && c != EOF)596 ctnew += skipline(f2);597 break;598 }599 if (c == '\n' || c == EOF)600 break;601 }602 }603 ixold[i] = ctold;604 ixnew[j] = ctnew;605 j++;606 }607 for (; j <= len[1]; j++)608 ixnew[j] = ctnew += skipline(f2);609 }610 611 612 /* shellsort CACM #201 */613 static void sort(struct line *a, int n)614 {615 struct line *ai, *aim, w;616 int j, m = 0, k;617 618 if (n == 0)619 return;620 for (j = 1; j <= n; j *= 2)621 m = 2 * j - 1;622 for (m /= 2; m != 0; m /= 2) {623 k = n - m;624 for (j = 1; j <= k; j++) {625 for (ai = &a[j]; ai > a; ai -= m) {626 aim = &ai[m];627 if (aim < ai)628 break; /* wraparound */629 if (aim->value > ai[0].value ||630 (aim->value == ai[0].value && aim->serial > ai[0].serial))631 break;632 w.value = ai[0].value;633 ai[0].value = aim->value;634 aim->value = w.value;635 w.serial = ai[0].serial;636 ai[0].serial = aim->serial;637 aim->serial = w.serial;638 }639 }640 }641 }642 643 644 static void uni_range(int a, int b)645 {646 if (a < b)647 printf("%d,%d", a, b - a + 1);648 else if (a == b)649 printf("%d", b);650 else651 printf("%d,0", b);652 }653 654 655 static void fetch(long *f, int a, int b, FILE * lb, int ch)656 {657 int i, j, c, lastc, col, nc;658 659 if (a > b)660 return;661 for (i = a; i <= b; i++) {662 fseek(lb, f[i - 1], SEEK_SET);663 nc = f[i] - f[i - 1];664 if (ch != '\0') {665 putchar(ch);666 if (option_mask32 & FLAG_T)667 putchar('\t');668 }669 col = 0;670 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {671 if ((c = getc(lb)) == EOF) {672 printf("\n\\ No newline at end of file\n");673 return;674 }675 if (c == '\t' && (option_mask32 & FLAG_t)) {676 do {677 putchar(' ');678 } while (++col & 7);679 } else {680 putchar(c);681 col++;682 }683 }684 }685 }686 687 688 static int asciifile(FILE * f)689 {690 #if ENABLE_FEATURE_DIFF_BINARY691 int i, cnt;692 #endif693 694 if ((option_mask32 & FLAG_a) || f == NULL)695 return 1;696 697 #if ENABLE_FEATURE_DIFF_BINARY698 rewind(f);699 cnt = fread(g_read_buf, 1, COMMON_BUFSIZE, f);700 for (i = 0; i < cnt; i++) {701 if (!isprint(g_read_buf[i])702 && !isspace(g_read_buf[i])) {703 return 0;704 }705 }706 #endif707 return 1;708 }709 710 711 /* dump accumulated "unified" diff changes */712 static void dump_unified_vec(FILE * f1, FILE * f2)713 {714 struct context_vec *cvp = context_vec_start;715 int lowa, upb, lowc, upd;716 int a, b, c, d;717 char ch;718 719 if (context_vec_start > context_vec_ptr)720 return;721 722 b = d = 0; /* gcc */723 lowa = MAX(1, cvp->a - context);724 upb = MIN(len[0], context_vec_ptr->b + context);725 lowc = MAX(1, cvp->c - context);726 upd = MIN(len[1], context_vec_ptr->d + context);727 728 printf("@@ -");729 uni_range(lowa, upb);730 printf(" +");731 uni_range(lowc, upd);732 printf(" @@\n");733 734 /*735 * Output changes in "unified" diff format--the old and new lines736 * are printed together.737 */738 for (; cvp <= context_vec_ptr; cvp++) {739 a = cvp->a;740 b = cvp->b;741 c = cvp->c;742 d = cvp->d;743 744 /*745 * c: both new and old changes746 * d: only changes in the old file747 * a: only changes in the new file748 */749 if (a <= b && c <= d)750 ch = 'c';751 else752 ch = (a <= b) ? 'd' : 'a';753 #if 0754 switch (ch) {755 case 'c':756 fetch(ixold, lowa, a - 1, f1, ' ');757 fetch(ixold, a, b, f1, '-');758 fetch(ixnew, c, d, f2, '+');759 break;760 case 'd':761 fetch(ixold, lowa, a - 1, f1, ' ');762 fetch(ixold, a, b, f1, '-');763 break;764 case 'a':765 fetch(ixnew, lowc, c - 1, f2, ' ');766 fetch(ixnew, c, d, f2, '+');767 break;768 }769 #else770 if (ch == 'c' || ch == 'd') {771 fetch(ixold, lowa, a - 1, f1, ' ');772 fetch(ixold, a, b, f1, '-');773 }774 if (ch == 'a')775 fetch(ixnew, lowc, c - 1, f2, ' ');776 if (ch == 'c' || ch == 'a')777 fetch(ixnew, c, d, f2, '+');778 #endif779 lowa = b + 1;780 lowc = d + 1;781 }782 fetch(ixnew, d + 1, upd, f2, ' ');783 784 context_vec_ptr = context_vec_start - 1;785 }786 787 788 static void print_header(const char *file1, const char *file2)789 {790 if (label1)791 printf("--- %s\n", label1);792 else793 printf("--- %s\t%s", file1, ctime(&stb1.st_mtime));794 if (label2)795 printf("+++ %s\n", label2);796 else797 printf("+++ %s\t%s", file2, ctime(&stb2.st_mtime));798 }799 800 801 /*802 * Indicate that there is a difference between lines a and b of the from file803 * to get to lines c to d of the to file. If a is greater than b then there804 * are no lines in the from file involved and this means that there were805 * lines appended (beginning at b). If c is greater than d then there are806 * lines missing from the to file.807 */808 static void change(char *file1, FILE * f1, char *file2, FILE * f2, int a,809 int b, int c, int d)810 {811 if ((a > b && c > d) || (option_mask32 & FLAG_q)) {812 anychange = 1;813 return;814 }815 816 /*817 * Allocate change records as needed.818 */819 if (context_vec_ptr == context_vec_end - 1) {820 ptrdiff_t offset = context_vec_ptr - context_vec_start;821 822 max_context <<= 1;823 context_vec_start = xrealloc(context_vec_start,824 max_context * sizeof(struct context_vec));825 context_vec_end = context_vec_start + max_context;826 context_vec_ptr = context_vec_start + offset;827 }828 if (anychange == 0) {829 /*830 * Print the context/unidiff header first time through.831 */832 print_header(file1, file2);833 } else if (a > context_vec_ptr->b + (2 * context) + 1 &&834 c > context_vec_ptr->d + (2 * context) + 1) {835 /*836 * If this change is more than 'context' lines from the837 * previous change, dump the record and reset it.838 */839 dump_unified_vec(f1, f2);840 }841 context_vec_ptr++;842 context_vec_ptr->a = a;843 context_vec_ptr->b = b;844 context_vec_ptr->c = c;845 context_vec_ptr->d = d;846 anychange = 1;847 }848 849 850 static void output(char *file1, FILE * f1, char *file2, FILE * f2)851 {852 /* Note that j0 and j1 can't be used as they are defined in math.h.853 * This also allows the rather amusing variable 'j00'... */854 int m, i0, i1, j00, j01;855 856 rewind(f1);857 rewind(f2);858 m = len[0];859 J[0] = 0;860 J[m + 1] = len[1] + 1;861 for (i0 = 1; i0 <= m; i0 = i1 + 1) {862 while (i0 <= m && J[i0] == J[i0 - 1] + 1)863 i0++;864 j00 = J[i0 - 1] + 1;865 i1 = i0 - 1;866 while (i1 < m && J[i1 + 1] == 0)867 i1++;868 j01 = J[i1 + 1] - 1;869 J[i1] = j01;870 change(file1, f1, file2, f2, i0, i1, j00, j01);871 }872 if (m == 0) {873 change(file1, f1, file2, f2, 1, 0, 1, len[1]);874 }875 if (anychange != 0 && !(option_mask32 & FLAG_q)) {876 dump_unified_vec(f1, f2);877 }878 }879 15 880 16 /* … … 885 21 * The major goal is to generate the match vector J. 886 22 * J[i] is the index of the line in file1 corresponding 887 * to line i file0. J[i] = 0 if there is no23 * to line i in file0. J[i] = 0 if there is no 888 24 * such line in file1. 889 25 * 890 26 * Lines are hashed so as to work in core. All potential 891 27 * matches are located by sorting the lines of each file 892 * on the hash (called ``value''). In particular, this28 * on the hash (called "value"). In particular, this 893 29 * collects the equivalence classes in file1 together. 894 30 * Subroutine equiv replaces the value of each line in … … 906 42 * through the lines of file0, developing a vector klist 907 43 * of "k-candidates". At step i a k-candidate is a matched 908 * pair of lines x,y (x in file0 y in file1) such that44 * pair of lines x,y (x in file0, y in file1) such that 909 45 * there is a common subsequence of length k 910 46 * between the first i lines of file0 and the first y … … 937 73 * The core requirements for problems larger than somewhat 938 74 * are (in words) 2*length(file0) + length(file1) + 939 * 3*(number of k-candidates installed), 75 * 3*(number of k-candidates installed), typically about 940 76 * 6n words for files of length n. 941 77 */ 942 static unsigned diffreg(char *ofile1, char *ofile2, int flags) 943 { 944 char *file1 = ofile1; 945 char *file2 = ofile2; 946 FILE *f1 = stdin, *f2 = stdin; 947 unsigned rval; 78 79 #include "libbb.h" 80 81 #if 0 82 //#define dbg_error_msg(...) bb_error_msg(__VA_ARGS__) 83 #else 84 #define dbg_error_msg(...) ((void)0) 85 #endif 86 87 enum { /* print_status() and diffreg() return values */ 88 STATUS_SAME, /* files are the same */ 89 STATUS_DIFFER, /* files differ */ 90 STATUS_BINARY, /* binary files differ */ 91 }; 92 93 enum { /* Commandline flags */ 94 FLAG_a, 95 FLAG_b, 96 FLAG_d, 97 FLAG_i, 98 FLAG_L, /* never used, handled by getopt32 */ 99 FLAG_N, 100 FLAG_q, 101 FLAG_r, 102 FLAG_s, 103 FLAG_S, /* never used, handled by getopt32 */ 104 FLAG_t, 105 FLAG_T, 106 FLAG_U, /* never used, handled by getopt32 */ 107 FLAG_w, 108 FLAG_u, /* ignored, this is the default */ 109 FLAG_p, /* not implemented */ 110 FLAG_B, 111 FLAG_E, /* not implemented */ 112 }; 113 #define FLAG(x) (1 << FLAG_##x) 114 115 /* We cache file position to avoid excessive seeking */ 116 typedef struct FILE_and_pos_t { 117 FILE *ft_fp; 118 off_t ft_pos; 119 } FILE_and_pos_t; 120 121 struct globals { 122 smallint exit_status; 123 int opt_U_context; 124 const char *other_dir; 125 char *label[2]; 126 struct stat stb[2]; 127 }; 128 #define G (*ptr_to_globals) 129 #define exit_status (G.exit_status ) 130 #define opt_U_context (G.opt_U_context ) 131 #define label (G.label ) 132 #define stb (G.stb ) 133 #define INIT_G() do { \ 134 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \ 135 opt_U_context = 3; \ 136 } while (0) 137 138 typedef int token_t; 139 140 enum { 141 /* Public */ 142 TOK_EMPTY = 1 << 9, /* Line fully processed, you can proceed to the next */ 143 TOK_EOF = 1 << 10, /* File ended */ 144 /* Private (Only to be used by read_token() */ 145 TOK_EOL = 1 << 11, /* we saw EOL (sticky) */ 146 TOK_SPACE = 1 << 12, /* used -b code, means we are skipping spaces */ 147 SHIFT_EOF = (sizeof(token_t)*8 - 8) - 1, 148 CHAR_MASK = 0x1ff, /* 8th bit is used to distinguish EOF from 0xff */ 149 }; 150 151 /* Restores full EOF from one 8th bit: */ 152 //#define TOK2CHAR(t) (((t) << SHIFT_EOF) >> SHIFT_EOF) 153 /* We don't really need the above, we only need to have EOF != any_real_char: */ 154 #define TOK2CHAR(t) ((t) & CHAR_MASK) 155 156 static void seek_ft(FILE_and_pos_t *ft, off_t pos) 157 { 158 if (ft->ft_pos != pos) { 159 ft->ft_pos = pos; 160 fseeko(ft->ft_fp, pos, SEEK_SET); 161 } 162 } 163 164 /* Reads tokens from given fp, handling -b and -w flags 165 * The user must reset tok every line start 166 */ 167 static int read_token(FILE_and_pos_t *ft, token_t tok) 168 { 169 tok |= TOK_EMPTY; 170 while (!(tok & TOK_EOL)) { 171 bool is_space; 172 int t; 173 174 t = fgetc(ft->ft_fp); 175 if (t != EOF) 176 ft->ft_pos++; 177 is_space = (t == EOF || isspace(t)); 178 179 /* If t == EOF (-1), set both TOK_EOF and TOK_EOL */ 180 tok |= (t & (TOK_EOF + TOK_EOL)); 181 /* Only EOL? */ 182 if (t == '\n') 183 tok |= TOK_EOL; 184 185 if (option_mask32 & FLAG(i)) /* Handcoded tolower() */ 186 t = (t >= 'A' && t <= 'Z') ? t - ('A' - 'a') : t; 187 188 if ((option_mask32 & FLAG(w)) && is_space) 189 continue; 190 191 /* Trim char value to low 9 bits */ 192 t &= CHAR_MASK; 193 194 if (option_mask32 & FLAG(b)) { 195 /* Was prev char whitespace? */ 196 if (tok & TOK_SPACE) { /* yes */ 197 if (is_space) /* this one too, ignore it */ 198 continue; 199 tok &= ~TOK_SPACE; 200 } else if (is_space) { 201 /* 1st whitespace char. 202 * Set TOK_SPACE and replace char by ' ' */ 203 t = TOK_SPACE + ' '; 204 } 205 } 206 /* Clear EMPTY */ 207 tok &= ~(TOK_EMPTY + CHAR_MASK); 208 /* Assign char value (low 9 bits) and maybe set TOK_SPACE */ 209 tok |= t; 210 break; 211 } 212 #if 0 213 bb_error_msg("fp:%p tok:%x '%c'%s%s%s%s", fp, tok, tok & 0xff 214 , tok & TOK_EOF ? " EOF" : "" 215 , tok & TOK_EOL ? " EOL" : "" 216 , tok & TOK_EMPTY ? " EMPTY" : "" 217 , tok & TOK_SPACE ? " SPACE" : "" 218 ); 219 #endif 220 return tok; 221 } 222 223 struct cand { 224 int x; 225 int y; 226 int pred; 227 }; 228 229 static int search(const int *c, int k, int y, const struct cand *list) 230 { 231 int i, j; 232 233 if (list[c[k]].y < y) /* quick look for typical case */ 234 return k + 1; 235 236 for (i = 0, j = k + 1;;) { 237 const int l = (i + j) >> 1; 238 if (l > i) { 239 const int t = list[c[l]].y; 240 if (t > y) 241 j = l; 242 else if (t < y) 243 i = l; 244 else 245 return l; 246 } else 247 return l + 1; 248 } 249 } 250 251 static unsigned isqrt(unsigned n) 252 { 253 unsigned x = 1; 254 while (1) { 255 const unsigned y = x; 256 x = ((n / x) + x) >> 1; 257 if (x <= (y + 1) && x >= (y - 1)) 258 return x; 259 } 260 } 261 262 static void stone(const int *a, int n, const int *b, int *J, int pref) 263 { 264 const unsigned isq = isqrt(n); 265 const unsigned bound = 266 (option_mask32 & FLAG(d)) ? UINT_MAX : MAX(256, isq); 267 int clen = 1; 268 int clistlen = 100; 269 int k = 0; 270 struct cand *clist = xzalloc(clistlen * sizeof(clist[0])); 271 struct cand cand; 272 struct cand *q; 273 int *klist = xzalloc((n + 2) * sizeof(klist[0])); 274 /*clist[0] = (struct cand){0}; - xzalloc did it */ 275 /*klist[0] = 0; */ 276 277 for (cand.x = 1; cand.x <= n; cand.x++) { 278 int j = a[cand.x], oldl = 0; 279 unsigned numtries = 0; 280 if (j == 0) 281 continue; 282 cand.y = -b[j]; 283 cand.pred = klist[0]; 284 do { 285 int l, tc; 286 if (cand.y <= clist[cand.pred].y) 287 continue; 288 l = search(klist, k, cand.y, clist); 289 if (l != oldl + 1) 290 cand.pred = klist[l - 1]; 291 if (l <= k && clist[klist[l]].y <= cand.y) 292 continue; 293 if (clen == clistlen) { 294 clistlen = clistlen * 11 / 10; 295 clist = xrealloc(clist, clistlen * sizeof(clist[0])); 296 } 297 clist[clen] = cand; 298 tc = klist[l]; 299 klist[l] = clen++; 300 if (l <= k) { 301 cand.pred = tc; 302 oldl = l; 303 numtries++; 304 } else { 305 k++; 306 break; 307 } 308 } while ((cand.y = b[++j]) > 0 && numtries < bound); 309 } 310 /* Unravel */ 311 for (q = clist + klist[k]; q->y; q = clist + q->pred) 312 J[q->x + pref] = q->y + pref; 313 free(klist); 314 free(clist); 315 } 316 317 struct line { 318 /* 'serial' is not used in the begining, so we reuse it 319 * to store line offsets, thus reducing memory pressure 320 */ 321 union { 322 unsigned serial; 323 off_t offset; 324 }; 325 unsigned value; 326 }; 327 328 static void equiv(struct line *a, int n, struct line *b, int m, int *c) 329 { 330 int i = 1, j = 1; 331 332 while (i <= n && j <= m) { 333 if (a[i].value < b[j].value) 334 a[i++].value = 0; 335 else if (a[i].value == b[j].value) 336 a[i++].value = j; 337 else 338 j++; 339 } 340 while (i <= n) 341 a[i++].value = 0; 342 b[m + 1].value = 0; 343 j = 0; 344 while (++j <= m) { 345 c[j] = -b[j].serial; 346 while (b[j + 1].value == b[j].value) { 347 j++; 348 c[j] = b[j].serial; 349 } 350 } 351 c[j] = -1; 352 } 353 354 static void unsort(const struct line *f, int l, int *b) 355 { 948 356 int i; 949 950 anychange = 0; 951 context_vec_ptr = context_vec_start - 1; 952 953 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 954 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 955 956 rval = D_SAME; 957 958 if (LONE_DASH(file1) && LONE_DASH(file2)) 959 goto closem; 960 961 if (flags & D_EMPTY1) 962 f1 = xfopen(bb_dev_null, "r"); 963 else if (NOT_LONE_DASH(file1)) 964 f1 = xfopen(file1, "r"); 965 if (flags & D_EMPTY2) 966 f2 = xfopen(bb_dev_null, "r"); 967 else if (NOT_LONE_DASH(file2)) 968 f2 = xfopen(file2, "r"); 969 970 /* We can't diff non-seekable stream - we use rewind(), fseek(). 971 * This can be fixed (volunteers?). 972 * Meanwhile we should check it here by stat'ing input fds, 973 * but I am lazy and check that in main() instead. 974 * Check in main won't catch "diffing fifos buried in subdirectories" 975 * failure scenario - not very likely in real life... */ 976 977 i = files_differ(f1, f2, flags); 978 if (i == 0) 979 goto closem; 980 else if (i != 1) { /* 1 == ok */ 981 /* error */ 982 status |= 2; 983 goto closem; 984 } 985 986 if (!asciifile(f1) || !asciifile(f2)) { 987 rval = D_BINARY; 988 status |= 1; 989 goto closem; 990 } 991 992 prepare(0, f1 /*, stb1.st_size*/); 993 prepare(1, f2 /*, stb2.st_size*/); 994 prune(); 995 sort(sfile[0], slen[0]); 996 sort(sfile[1], slen[1]); 997 998 member = (int *) file[1]; 357 int *a = xmalloc((l + 1) * sizeof(a[0])); 358 for (i = 1; i <= l; i++) 359 a[f[i].serial] = f[i].value; 360 for (i = 1; i <= l; i++) 361 b[i] = a[i]; 362 free(a); 363 } 364 365 static int line_compar(const void *a, const void *b) 366 { 367 #define l0 ((const struct line*)a) 368 #define l1 ((const struct line*)b) 369 int r = l0->value - l1->value; 370 if (r) 371 return r; 372 return l0->serial - l1->serial; 373 #undef l0 374 #undef l1 375 } 376 377 static void fetch(FILE_and_pos_t *ft, const off_t *ix, int a, int b, int ch) 378 { 379 int i, j, col; 380 for (i = a; i <= b; i++) { 381 seek_ft(ft, ix[i - 1]); 382 putchar(ch); 383 if (option_mask32 & FLAG(T)) 384 putchar('\t'); 385 for (j = 0, col = 0; j < ix[i] - ix[i - 1]; j++) { 386 int c = fgetc(ft->ft_fp); 387 if (c == EOF) { 388 printf("\n\\ No newline at end of file\n"); 389 return; 390 } 391 ft->ft_pos++; 392 if (c == '\t' && (option_mask32 & FLAG(t))) 393 do putchar(' '); while (++col & 7); 394 else { 395 putchar(c); 396 col++; 397 } 398 } 399 } 400 } 401 402 /* Creates the match vector J, where J[i] is the index 403 * of the line in the new file corresponding to the line i 404 * in the old file. Lines start at 1 instead of 0, that value 405 * being used instead to denote no corresponding line. 406 * This vector is dynamically allocated and must be freed by the caller. 407 * 408 * * fp is an input parameter, where fp[0] and fp[1] are the open 409 * old file and new file respectively. 410 * * nlen is an output variable, where nlen[0] and nlen[1] 411 * gets the number of lines in the old and new file respectively. 412 * * ix is an output variable, where ix[0] and ix[1] gets 413 * assigned dynamically allocated vectors of the offsets of the lines 414 * of the old and new file respectively. These must be freed by the caller. 415 */ 416 static NOINLINE int *create_J(FILE_and_pos_t ft[2], int nlen[2], off_t *ix[2]) 417 { 418 int *J, slen[2], *class, *member; 419 struct line *nfile[2], *sfile[2]; 420 int pref = 0, suff = 0, i, j, delta; 421 422 /* Lines of both files are hashed, and in the process 423 * their offsets are stored in the array ix[fileno] 424 * where fileno == 0 points to the old file, and 425 * fileno == 1 points to the new one. 426 */ 427 for (i = 0; i < 2; i++) { 428 unsigned hash; 429 token_t tok; 430 size_t sz = 100; 431 nfile[i] = xmalloc((sz + 3) * sizeof(nfile[i][0])); 432 /* ft gets here without the correct position, cant use seek_ft */ 433 ft[i].ft_pos = 0; 434 fseeko(ft[i].ft_fp, 0, SEEK_SET); 435 436 nlen[i] = 0; 437 /* We could zalloc nfile, but then zalloc starts showing in gprof at ~1% */ 438 nfile[i][0].offset = 0; 439 goto start; /* saves code */ 440 while (1) { 441 tok = read_token(&ft[i], tok); 442 if (!(tok & TOK_EMPTY)) { 443 /* Hash algorithm taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. */ 444 /*hash = hash * 128 - hash + TOK2CHAR(tok); 445 * gcc insists on optimizing above to "hash * 127 + ...", thus... */ 446 unsigned o = hash - TOK2CHAR(tok); 447 hash = hash * 128 - o; /* we want SPEED here */ 448 continue; 449 } 450 if (nlen[i]++ == sz) { 451 sz = sz * 3 / 2; 452 nfile[i] = xrealloc(nfile[i], (sz + 3) * sizeof(nfile[i][0])); 453 } 454 /* line_compar needs hashes fit into positive int */ 455 nfile[i][nlen[i]].value = hash & INT_MAX; 456 /* like ftello(ft[i].ft_fp) but faster (avoids lseek syscall) */ 457 nfile[i][nlen[i]].offset = ft[i].ft_pos; 458 if (tok & TOK_EOF) { 459 /* EOF counts as a token, so we have to adjust it here */ 460 nfile[i][nlen[i]].offset++; 461 break; 462 } 463 start: 464 hash = tok = 0; 465 } 466 /* Exclude lone EOF line from the end of the file, to make fetch()'s job easier */ 467 if (nfile[i][nlen[i]].offset - nfile[i][nlen[i] - 1].offset == 1) 468 nlen[i]--; 469 /* Now we copy the line offsets into ix */ 470 ix[i] = xmalloc((nlen[i] + 2) * sizeof(ix[i][0])); 471 for (j = 0; j < nlen[i] + 1; j++) 472 ix[i][j] = nfile[i][j].offset; 473 } 474 475 /* length of prefix and suffix is calculated */ 476 for (; pref < nlen[0] && pref < nlen[1] && 477 nfile[0][pref + 1].value == nfile[1][pref + 1].value; 478 pref++); 479 for (; suff < nlen[0] - pref && suff < nlen[1] - pref && 480 nfile[0][nlen[0] - suff].value == nfile[1][nlen[1] - suff].value; 481 suff++); 482 /* Arrays are pruned by the suffix and prefix length, 483 * the result being sorted and stored in sfile[fileno], 484 * and their sizes are stored in slen[fileno] 485 */ 486 for (j = 0; j < 2; j++) { 487 sfile[j] = nfile[j] + pref; 488 slen[j] = nlen[j] - pref - suff; 489 for (i = 0; i <= slen[j]; i++) 490 sfile[j][i].serial = i; 491 qsort(sfile[j] + 1, slen[j], sizeof(*sfile[j]), line_compar); 492 } 493 /* nfile arrays are reused to reduce memory pressure 494 * The #if zeroed out section performs the same task as the 495 * one in the #else section. 496 * Peak memory usage is higher, but one array copy is avoided 497 * by not using unsort() 498 */ 499 #if 0 500 member = xmalloc((slen[1] + 2) * sizeof(member[0])); 999 501 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 1000 member = xrealloc(member, (slen[1] + 2) * sizeof(int)); 1001 1002 class = (int *) file[0]; 1003 unsort(sfile[0], slen[0], class); 1004 class = xrealloc(class, (slen[0] + 2) * sizeof(int)); 1005 1006 klist = xmalloc((slen[0] + 2) * sizeof(int)); 1007 clen = 0; 1008 clistlen = 100; 1009 clist = xmalloc(clistlen * sizeof(struct cand)); 1010 i = stone(class, slen[0], member, klist); 502 free(nfile[1]); 503 504 class = xmalloc((slen[0] + 1) * sizeof(class[0])); 505 for (i = 1; i <= slen[0]; i++) /* Unsorting */ 506 class[sfile[0][i].serial] = sfile[0][i].value; 507 free(nfile[0]); 508 #else 509 member = (int *)nfile[1]; 510 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 511 member = xrealloc(member, (slen[1] + 2) * sizeof(member[0])); 512 513 class = (int *)nfile[0]; 514 unsort(sfile[0], slen[0], (int *)nfile[0]); 515 class = xrealloc(class, (slen[0] + 2) * sizeof(class[0])); 516 #endif 517 J = xmalloc((nlen[0] + 2) * sizeof(J[0])); 518 /* The elements of J which fall inside the prefix and suffix regions 519 * are marked as unchanged, while the ones which fall outside 520 * are initialized with 0 (no matches), so that function stone can 521 * then assign them their right values 522 */ 523 for (i = 0, delta = nlen[1] - nlen[0]; i <= nlen[0]; i++) 524 J[i] = i <= pref ? i : 525 i > (nlen[0] - suff) ? (i + delta) : 0; 526 /* Here the magic is performed */ 527 stone(class, slen[0], member, J, pref); 528 J[nlen[0] + 1] = nlen[1] + 1; 529 530 free(class); 1011 531 free(member); 1012 free(class); 1013 1014 J = xrealloc(J, (len[0] + 2) * sizeof(int)); 1015 unravel(klist[i]); 1016 free(clist); 1017 free(klist); 1018 1019 ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long)); 1020 ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long)); 1021 check(f1, f2); 1022 output(file1, f1, file2, f2); 1023 1024 closem: 1025 if (anychange) { 1026 status |= 1; 1027 if (rval == D_SAME) 1028 rval = D_DIFFER; 1029 } 1030 fclose_if_not_stdin(f1); 1031 fclose_if_not_stdin(f2); 1032 if (file1 != ofile1) 1033 free(file1); 1034 if (file2 != ofile2) 1035 free(file2); 1036 return rval; 1037 } 1038 532 533 /* Both files are rescanned, in an effort to find any lines 534 * which, due to limitations intrinsic to any hashing algorithm, 535 * are different but ended up confounded as the same 536 */ 537 for (i = 1; i <= nlen[0]; i++) { 538 if (!J[i]) 539 continue; 540 541 seek_ft(&ft[0], ix[0][i - 1]); 542 seek_ft(&ft[1], ix[1][J[i] - 1]); 543 544 for (j = J[i]; i <= nlen[0] && J[i] == j; i++, j++) { 545 token_t tok0 = 0, tok1 = 0; 546 do { 547 tok0 = read_token(&ft[0], tok0); 548 tok1 = read_token(&ft[1], tok1); 549 550 if (((tok0 ^ tok1) & TOK_EMPTY) != 0 /* one is empty (not both) */ 551 || (!(tok0 & TOK_EMPTY) && TOK2CHAR(tok0) != TOK2CHAR(tok1)) 552 ) { 553 J[i] = 0; /* Break the correspondence */ 554 } 555 } while (!(tok0 & tok1 & TOK_EMPTY)); 556 } 557 } 558 559 return J; 560 } 561 562 static bool diff(FILE* fp[2], char *file[2]) 563 { 564 int nlen[2]; 565 off_t *ix[2]; 566 FILE_and_pos_t ft[2]; 567 typedef struct { int a, b; } vec_t[2]; 568 vec_t *vec = NULL; 569 int i = 1, j, k, idx = -1; 570 bool anychange = false; 571 int *J; 572 573 ft[0].ft_fp = fp[0]; 574 ft[1].ft_fp = fp[1]; 575 /* note that ft[i].ft_pos is unintitalized, create_J() 576 * must not assume otherwise */ 577 J = create_J(ft, nlen, ix); 578 579 do { 580 bool nonempty = false; 581 582 while (1) { 583 vec_t v; 584 585 for (v[0].a = i; v[0].a <= nlen[0] && J[v[0].a] == J[v[0].a - 1] + 1; v[0].a++) 586 continue; 587 v[1].a = J[v[0].a - 1] + 1; 588 589 for (v[0].b = v[0].a - 1; v[0].b < nlen[0] && !J[v[0].b + 1]; v[0].b++) 590 continue; 591 v[1].b = J[v[0].b + 1] - 1; 592 /* 593 * Indicate that there is a difference between lines a and b of the 'from' file 594 * to get to lines c to d of the 'to' file. If a is greater than b then there 595 * are no lines in the 'from' file involved and this means that there were 596 * lines appended (beginning at b). If c is greater than d then there are 597 * lines missing from the 'to' file. 598 */ 599 if (v[0].a <= v[0].b || v[1].a <= v[1].b) { 600 /* 601 * If this change is more than 'context' lines from the 602 * previous change, dump the record and reset it. 603 */ 604 int ct = (2 * opt_U_context) + 1; 605 if (idx >= 0 606 && v[0].a > vec[idx][0].b + ct 607 && v[1].a > vec[idx][1].b + ct 608 ) { 609 break; 610 } 611 612 for (j = 0; j < 2; j++) 613 for (k = v[j].a; k < v[j].b; k++) 614 nonempty |= (ix[j][k+1] - ix[j][k] != 1); 615 616 vec = xrealloc_vector(vec, 6, ++idx); 617 memcpy(vec[idx], v, sizeof(v)); 618 } 619 620 i = v[0].b + 1; 621 if (i > nlen[0]) 622 break; 623 J[v[0].b] = v[1].b; 624 } 625 if (idx < 0 || ((option_mask32 & FLAG(B)) && !nonempty)) 626 goto cont; 627 if (!(option_mask32 & FLAG(q))) { 628 int lowa; 629 vec_t span, *cvp = vec; 630 631 if (!anychange) { 632 /* Print the context/unidiff header first time through */ 633 printf("--- %s\n", label[0] ? label[0] : file[0]); 634 printf("+++ %s\n", label[1] ? label[1] : file[1]); 635 } 636 637 printf("@@"); 638 for (j = 0; j < 2; j++) { 639 int a = span[j].a = MAX(1, (*cvp)[j].a - opt_U_context); 640 int b = span[j].b = MIN(nlen[j], vec[idx][j].b + opt_U_context); 641 642 printf(" %c%d", j ? '+' : '-', MIN(a, b)); 643 if (a == b) 644 continue; 645 printf(",%d", (a < b) ? b - a + 1 : 0); 646 } 647 printf(" @@\n"); 648 /* 649 * Output changes in "unified" diff format--the old and new lines 650 * are printed together. 651 */ 652 for (lowa = span[0].a; ; lowa = (*cvp++)[0].b + 1) { 653 bool end = cvp > &vec[idx]; 654 fetch(&ft[0], ix[0], lowa, end ? span[0].b : (*cvp)[0].a - 1, ' '); 655 if (end) 656 break; 657 for (j = 0; j < 2; j++) 658 fetch(&ft[j], ix[j], (*cvp)[j].a, (*cvp)[j].b, j ? '+' : '-'); 659 } 660 } 661 anychange = true; 662 cont: 663 idx = -1; 664 } while (i <= nlen[0]); 665 666 free(vec); 667 free(ix[0]); 668 free(ix[1]); 669 free(J); 670 return anychange; 671 } 672 673 static int diffreg(char *file[2]) 674 { 675 FILE *fp[2] = { stdin, stdin }; 676 bool binary = false, differ = false; 677 int status = STATUS_SAME, i; 678 679 for (i = 0; i < 2; i++) { 680 int fd = open_or_warn_stdin(file[i]); 681 if (fd == -1) 682 goto out; 683 /* Our diff implementation is using seek. 684 * When we meet non-seekable file, we must make a temp copy. 685 */ 686 if (lseek(fd, 0, SEEK_SET) == -1 && errno == ESPIPE) { 687 char name[] = "/tmp/difXXXXXX"; 688 int fd_tmp = xmkstemp(name); 689 690 unlink(name); 691 if (bb_copyfd_eof(fd, fd_tmp) < 0) 692 xfunc_die(); 693 if (fd) /* Prevents closing of stdin */ 694 close(fd); 695 fd = fd_tmp; 696 } 697 fp[i] = fdopen(fd, "r"); 698 } 699 700 while (1) { 701 const size_t sz = COMMON_BUFSIZE / 2; 702 char *const buf0 = bb_common_bufsiz1; 703 char *const buf1 = buf0 + sz; 704 int j, k; 705 i = fread(buf0, 1, sz, fp[0]); 706 j = fread(buf1, 1, sz, fp[1]); 707 if (i != j) { 708 differ = true; 709 i = MIN(i, j); 710 } 711 if (i == 0) 712 break; 713 for (k = 0; k < i; k++) { 714 if (!buf0[k] || !buf1[k]) 715 binary = true; 716 if (buf0[k] != buf1[k]) 717 differ = true; 718 } 719 } 720 if (differ) { 721 if (binary && !(option_mask32 & FLAG(a))) 722 status = STATUS_BINARY; 723 else if (diff(fp, file)) 724 status = STATUS_DIFFER; 725 } 726 if (status != STATUS_SAME) 727 exit_status |= 1; 728 out: 729 fclose_if_not_stdin(fp[0]); 730 fclose_if_not_stdin(fp[1]); 731 732 return status; 733 } 734 735 static void print_status(int status, char *path[2]) 736 { 737 switch (status) { 738 case STATUS_BINARY: 739 case STATUS_DIFFER: 740 if ((option_mask32 & FLAG(q)) || status == STATUS_BINARY) 741 printf("Files %s and %s differ\n", path[0], path[1]); 742 break; 743 case STATUS_SAME: 744 if (option_mask32 & FLAG(s)) 745 printf("Files %s and %s are identical\n", path[0], path[1]); 746 break; 747 } 748 } 1039 749 1040 750 #if ENABLE_FEATURE_DIFF_DIR 1041 static void do_diff(char *dir1, char *path1, char *dir2, char *path2) 1042 { 1043 int flags = D_HEADER; 1044 int val; 1045 char *fullpath1 = NULL; /* if -N */ 1046 char *fullpath2 = NULL; 1047 1048 if (path1) 1049 fullpath1 = concat_path_file(dir1, path1); 1050 if (path2) 1051 fullpath2 = concat_path_file(dir2, path2); 1052 1053 if (!fullpath1 || stat(fullpath1, &stb1) != 0) { 1054 flags |= D_EMPTY1; 1055 memset(&stb1, 0, sizeof(stb1)); 1056 if (path2) { 1057 free(fullpath1); 1058 fullpath1 = concat_path_file(dir1, path2); 1059 } 1060 } 1061 if (!fullpath2 || stat(fullpath2, &stb2) != 0) { 1062 flags |= D_EMPTY2; 1063 memset(&stb2, 0, sizeof(stb2)); 1064 stb2.st_mode = stb1.st_mode; 1065 if (path1) { 1066 free(fullpath2); 1067 fullpath2 = concat_path_file(dir2, path1); 1068 } 1069 } 1070 1071 if (stb1.st_mode == 0) 1072 stb1.st_mode = stb2.st_mode; 1073 1074 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { 1075 printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2); 1076 goto ret; 1077 } 1078 1079 if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) 1080 val = D_SKIPPED1; 1081 else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) 1082 val = D_SKIPPED2; 1083 else 1084 val = diffreg(fullpath1, fullpath2, flags); 1085 1086 print_status(val, fullpath1, fullpath2, NULL); 1087 ret: 1088 free(fullpath1); 1089 free(fullpath2); 1090 } 1091 #endif 1092 1093 1094 #if ENABLE_FEATURE_DIFF_DIR 1095 static int dir_strcmp(const void *p1, const void *p2) 1096 { 1097 return strcmp(*(char *const *) p1, *(char *const *) p2); 1098 } 1099 751 struct dlist { 752 size_t len; 753 int s, e; 754 char **dl; 755 }; 1100 756 1101 757 /* This function adds a filename to dl, the directory listing. */ 1102 static int add_to_dirlist(const char *filename, 1103 struct stat ATTRIBUTE_UNUSED * sb, void *userdata, 1104 int depth ATTRIBUTE_UNUSED) 1105 { 1106 /* +2: with space for eventual trailing NULL */ 1107 dl = xrealloc(dl, (dl_count+2) * sizeof(dl[0])); 1108 dl[dl_count] = xstrdup(filename + (int)(ptrdiff_t)userdata); 1109 dl_count++; 758 static int FAST_FUNC add_to_dirlist(const char *filename, 759 struct stat *sb UNUSED_PARAM, 760 void *userdata, int depth UNUSED_PARAM) 761 { 762 struct dlist *const l = userdata; 763 const char *file = filename + l->len; 764 while (*file == '/') 765 file++; 766 l->dl = xrealloc_vector(l->dl, 6, l->e); 767 l->dl[l->e] = xstrdup(file); 768 l->e++; 1110 769 return TRUE; 1111 770 } 1112 771 1113 1114 /* This returns a sorted directory listing. */ 1115 static char **get_dir(char *path) 1116 { 1117 dl_count = 0; 1118 dl = xzalloc(sizeof(dl[0])); 1119 1120 /* If -r has been set, then the recursive_action function will be 1121 * used. Unfortunately, this outputs the root directory along with 1122 * the recursed paths, so use void *userdata to specify the string 1123 * length of the root directory - '(void*)(strlen(path)+)'. 1124 * add_to_dirlist then removes root dir prefix. */ 1125 1126 if (option_mask32 & FLAG_r) { 1127 recursive_action(path, ACTION_RECURSE|ACTION_FOLLOWLINKS, 1128 add_to_dirlist, NULL, 1129 (void*)(strlen(path)+1), 0); 1130 } else { 1131 DIR *dp; 1132 struct dirent *ep; 1133 1134 dp = warn_opendir(path); 1135 while ((ep = readdir(dp))) { 1136 if (!strcmp(ep->d_name, "..") || LONE_CHAR(ep->d_name, '.')) 1137 continue; 1138 add_to_dirlist(ep->d_name, NULL, (void*)(int)0, 0); 1139 } 1140 closedir(dp); 1141 } 1142 1143 /* Sort dl alphabetically. */ 1144 qsort(dl, dl_count, sizeof(char *), dir_strcmp); 1145 1146 dl[dl_count] = NULL; 1147 return dl; 1148 } 1149 1150 1151 static void diffdir(char *p1, char *p2) 1152 { 1153 char **dirlist1, **dirlist2; 1154 char *dp1, *dp2; 1155 int pos; 1156 1157 /* Check for trailing slashes. */ 1158 dp1 = last_char_is(p1, '/'); 1159 if (dp1 != NULL) 1160 *dp1 = '\0'; 1161 dp2 = last_char_is(p2, '/'); 1162 if (dp2 != NULL) 1163 *dp2 = '\0'; 1164 1165 /* Get directory listings for p1 and p2. */ 1166 1167 dirlist1 = get_dir(p1); 1168 dirlist2 = get_dir(p2); 1169 1170 /* If -S was set, find the starting point. */ 1171 if (start) { 1172 while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0) 1173 dirlist1++; 1174 while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0) 1175 dirlist2++; 1176 if ((*dirlist1 == NULL) || (*dirlist2 == NULL)) 1177 bb_error_msg(bb_msg_invalid_arg, "NULL", "-S"); 1178 } 1179 772 /* If recursion is not set, this function adds the directory 773 * to the list and prevents recursive_action from recursing into it. 774 */ 775 static int FAST_FUNC skip_dir(const char *filename, 776 struct stat *sb, void *userdata, 777 int depth) 778 { 779 if (!(option_mask32 & FLAG(r)) && depth) { 780 add_to_dirlist(filename, sb, userdata, depth); 781 return SKIP; 782 } 783 if (!(option_mask32 & FLAG(N))) { 784 /* -r without -N: no need to recurse into dirs 785 * which do not exist on the "other side". 786 * Testcase: diff -r /tmp / 787 * (it would recurse deep into /proc without this code) */ 788 struct dlist *const l = userdata; 789 filename += l->len; 790 if (filename[0]) { 791 struct stat osb; 792 char *othername = concat_path_file(G.other_dir, filename); 793 int r = stat(othername, &osb); 794 free(othername); 795 if (r != 0 || !S_ISDIR(osb.st_mode)) { 796 /* other dir doesn't have similarly named 797 * directory, don't recurse */ 798 return SKIP; 799 } 800 } 801 } 802 return TRUE; 803 } 804 805 static void diffdir(char *p[2], const char *s_start) 806 { 807 struct dlist list[2]; 808 int i; 809 810 memset(&list, 0, sizeof(list)); 811 for (i = 0; i < 2; i++) { 812 /*list[i].s = list[i].e = 0; - memset did it */ 813 /*list[i].dl = NULL; */ 814 815 G.other_dir = p[1 - i]; 816 /* We need to trim root directory prefix. 817 * Using list.len to specify its length, 818 * add_to_dirlist will remove it. */ 819 list[i].len = strlen(p[i]); 820 recursive_action(p[i], ACTION_RECURSE | ACTION_FOLLOWLINKS, 821 add_to_dirlist, skip_dir, &list[i], 0); 822 /* Sort dl alphabetically. 823 * GNU diff does this ignoring any number of trailing dots. 824 * We don't, so for us dotted files almost always are 825 * first on the list. 826 */ 827 qsort_string_vector(list[i].dl, list[i].e); 828 /* If -S was set, find the starting point. */ 829 if (!s_start) 830 continue; 831 while (list[i].s < list[i].e && strcmp(list[i].dl[list[i].s], s_start) < 0) 832 list[i].s++; 833 } 1180 834 /* Now that both dirlist1 and dirlist2 contain sorted directory 1181 835 * listings, we can start to go through dirlist1. If both listings 1182 836 * contain the same file, then do a normal diff. Otherwise, behaviour 1183 837 * is determined by whether the -N flag is set. */ 1184 while (*dirlist1 != NULL || *dirlist2 != NULL) { 1185 dp1 = *dirlist1; 1186 dp2 = *dirlist2; 1187 pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2); 838 while (1) { 839 char *dp[2]; 840 int pos; 841 int k; 842 843 dp[0] = list[0].s < list[0].e ? list[0].dl[list[0].s] : NULL; 844 dp[1] = list[1].s < list[1].e ? list[1].dl[list[1].s] : NULL; 845 if (!dp[0] && !dp[1]) 846 break; 847 pos = !dp[0] ? 1 : (!dp[1] ? -1 : strcmp(dp[0], dp[1])); 848 k = pos > 0; 849 if (pos && !(option_mask32 & FLAG(N))) 850 printf("Only in %s: %s\n", p[k], dp[k]); 851 else { 852 char *fullpath[2], *path[2]; /* if -N */ 853 854 for (i = 0; i < 2; i++) { 855 if (pos == 0 || i == k) { 856 path[i] = fullpath[i] = concat_path_file(p[i], dp[i]); 857 stat(fullpath[i], &stb[i]); 858 } else { 859 fullpath[i] = concat_path_file(p[i], dp[1 - i]); 860 path[i] = (char *)bb_dev_null; 861 } 862 } 863 if (pos) 864 stat(fullpath[k], &stb[1 - k]); 865 866 if (S_ISDIR(stb[0].st_mode) && S_ISDIR(stb[1].st_mode)) 867 printf("Common subdirectories: %s and %s\n", fullpath[0], fullpath[1]); 868 else if (!S_ISREG(stb[0].st_mode) && !S_ISDIR(stb[0].st_mode)) 869 printf("File %s is not a regular file or directory and was skipped\n", fullpath[0]); 870 else if (!S_ISREG(stb[1].st_mode) && !S_ISDIR(stb[1].st_mode)) 871 printf("File %s is not a regular file or directory and was skipped\n", fullpath[1]); 872 else if (S_ISDIR(stb[0].st_mode) != S_ISDIR(stb[1].st_mode)) { 873 if (S_ISDIR(stb[0].st_mode)) 874 printf("File %s is a %s while file %s is a %s\n", fullpath[0], "directory", fullpath[1], "regular file"); 875 else 876 printf("File %s is a %s while file %s is a %s\n", fullpath[0], "regular file", fullpath[1], "directory"); 877 } else 878 print_status(diffreg(path), fullpath); 879 880 free(fullpath[0]); 881 free(fullpath[1]); 882 } 883 free(dp[k]); 884 list[k].s++; 1188 885 if (pos == 0) { 1189 do_diff(p1, dp1, p2, dp2); 1190 dirlist1++; 1191 dirlist2++; 1192 } else if (pos < 0) { 1193 if (option_mask32 & FLAG_N) 1194 do_diff(p1, dp1, p2, NULL); 1195 else 1196 print_only(p1, strlen(p1) + 1, dp1); 1197 dirlist1++; 1198 } else { 1199 if (option_mask32 & FLAG_N) 1200 do_diff(p1, NULL, p2, dp2); 1201 else 1202 print_only(p2, strlen(p2) + 1, dp2); 1203 dirlist2++; 1204 } 886 free(dp[1 - k]); 887 list[1 - k].s++; 888 } 889 } 890 if (ENABLE_FEATURE_CLEAN_UP) { 891 free(list[0].dl); 892 free(list[1].dl); 1205 893 } 1206 894 } 1207 895 #endif 1208 896 1209 1210 int diff_main(int argc, char **argv); 1211 int diff_main(int argc, char **argv) 1212 { 1213 bool gotstdin = 0; 1214 char *U_opt; 1215 char *f1, *f2; 897 #if ENABLE_FEATURE_DIFF_LONG_OPTIONS 898 static const char diff_longopts[] ALIGN1 = 899 "ignore-case\0" No_argument "i" 900 "ignore-tab-expansion\0" No_argument "E" 901 "ignore-space-change\0" No_argument "b" 902 "ignore-all-space\0" No_argument "w" 903 "ignore-blank-lines\0" No_argument "B" 904 "text\0" No_argument "a" 905 "unified\0" Required_argument "U" 906 "label\0" Required_argument "L" 907 "show-c-function\0" No_argument "p" 908 "brief\0" No_argument "q" 909 "expand-tabs\0" No_argument "t" 910 "initial-tab\0" No_argument "T" 911 "recursive\0" No_argument "r" 912 "new-file\0" No_argument "N" 913 "report-identical-files\0" No_argument "s" 914 "starting-file\0" Required_argument "S" 915 "minimal\0" No_argument "d" 916 ; 917 #endif 918 919 int diff_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 920 int diff_main(int argc UNUSED_PARAM, char **argv) 921 { 922 int gotstdin = 0, i; 923 char *file[2], *s_start = NULL; 1216 924 llist_t *L_arg = NULL; 1217 925 1218 926 INIT_G(); 1219 927 1220 /* exactly 2 params; collect multiple -L <label> */ 1221 opt_complementary = "=2:L::"; 1222 getopt32(argv, "abdiL:NqrsS:tTU:wu" 1223 "p" /* ignored (for compatibility) */, 1224 &L_arg, &start, &U_opt); 1225 /*argc -= optind;*/ 928 /* exactly 2 params; collect multiple -L <label>; -U N */ 929 opt_complementary = "=2:L::U+"; 930 #if ENABLE_FEATURE_DIFF_LONG_OPTIONS 931 applet_long_options = diff_longopts; 932 #endif 933 getopt32(argv, "abdiL:NqrsS:tTU:wupBE", 934 &L_arg, &s_start, &opt_U_context); 1226 935 argv += optind; 1227 while (L_arg) { 1228 if (label1 && label2) 1229 bb_show_usage(); 1230 if (!label1) 1231 label1 = L_arg->data; 1232 else { /* then label2 is NULL */ 1233 label2 = label1; 1234 label1 = L_arg->data; 1235 } 1236 /* we leak L_arg here... */ 1237 L_arg = L_arg->link; 1238 } 1239 if (option_mask32 & FLAG_U) 1240 context = xatoi_u(U_opt); 1241 1242 /* 1243 * Do sanity checks, fill in stb1 and stb2 and call the appropriate 1244 * driver routine. Both drivers use the contents of stb1 and stb2. 1245 */ 1246 1247 f1 = argv[0]; 1248 f2 = argv[1]; 1249 if (LONE_DASH(f1)) { 1250 fstat(STDIN_FILENO, &stb1); 1251 gotstdin = 1; 1252 } else 1253 xstat(f1, &stb1); 1254 if (LONE_DASH(f2)) { 1255 fstat(STDIN_FILENO, &stb2); 1256 gotstdin = 1; 1257 } else 1258 xstat(f2, &stb2); 1259 if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode))) 1260 bb_error_msg_and_die("can't compare - to a directory"); 1261 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { 936 while (L_arg) 937 label[!!label[0]] = llist_pop(&L_arg); 938 xfunc_error_retval = 2; 939 for (i = 0; i < 2; i++) { 940 file[i] = argv[i]; 941 /* Compat: "diff file name_which_doesnt_exist" exits with 2 */ 942 if (LONE_DASH(file[i])) { 943 fstat(STDIN_FILENO, &stb[i]); 944 gotstdin++; 945 } else 946 xstat(file[i], &stb[i]); 947 } 948 xfunc_error_retval = 1; 949 if (gotstdin && (S_ISDIR(stb[0].st_mode) || S_ISDIR(stb[1].st_mode))) 950 bb_error_msg_and_die("can't compare stdin to a directory"); 951 952 if (S_ISDIR(stb[0].st_mode) && S_ISDIR(stb[1].st_mode)) { 1262 953 #if ENABLE_FEATURE_DIFF_DIR 1263 diffdir(f 1, f2);954 diffdir(file, s_start); 1264 955 #else 1265 bb_error_msg_and_die(" directory comparison not supported");956 bb_error_msg_and_die("no support for directory comparison"); 1266 957 #endif 1267 958 } else { 1268 if (S_ISDIR(stb1.st_mode)) { 1269 f1 = concat_path_file(f1, f2); 1270 xstat(f1, &stb1); 1271 } 1272 if (S_ISDIR(stb2.st_mode)) { 1273 f2 = concat_path_file(f2, f1); 1274 xstat(f2, &stb2); 1275 } 1276 /* XXX: FIXME: */ 1277 /* We can't diff e.g. stdin supplied by a pipe - we use rewind(), fseek(). 1278 * This can be fixed (volunteers?) */ 1279 if (!S_ISREG(stb1.st_mode) || !S_ISREG(stb2.st_mode)) 1280 bb_error_msg_and_die("can't diff non-seekable stream"); 1281 print_status(diffreg(f1, f2, 0), f1, f2, NULL); 1282 } 1283 return status; 1284 } 959 bool dirfile = S_ISDIR(stb[0].st_mode) || S_ISDIR(stb[1].st_mode); 960 bool dir = S_ISDIR(stb[1].st_mode); 961 if (dirfile) { 962 const char *slash = strrchr(file[!dir], '/'); 963 file[dir] = concat_path_file(file[dir], slash ? slash + 1 : file[!dir]); 964 xstat(file[dir], &stb[dir]); 965 } 966 /* diffreg can get non-regular files here */ 967 print_status(gotstdin > 1 ? STATUS_SAME : diffreg(file), file); 968 969 if (dirfile) 970 free(file[dir]); 971 } 972 973 return exit_status; 974 }
Note:
See TracChangeset
for help on using the changeset viewer.