source: MondoRescue/branches/3.3/mindi-busybox/editors/diff.c@ 3723

Last change on this file since 3723 was 3621, checked in by Bruno Cornec, 10 years ago

New 3?3 banch for incorporation of latest busybox 1.25. Changing minor version to handle potential incompatibilities.

  • Property svn:eol-style set to native
File size: 30.1 KB
RevLine 
[1765]1/* vi: set sw=4 ts=4: */
2/*
3 * Mini diff implementation for busybox, adapted from OpenBSD diff.
4 *
[2725]5 * Copyright (C) 2010 by Matheus Izvekov <mizvekov@gmail.com>
[1765]6 * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
7 * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
8 *
9 * Sponsored in part by the Defense Advanced Research Projects
10 * Agency (DARPA) and Air Force Research Laboratory, Air Force
11 * Materiel Command, USAF, under agreement number F39502-99-1-0512.
12 *
[2725]13 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
[1765]14 */
15
16/*
[2725]17 * The following code uses an algorithm due to Harold Stone,
18 * which finds a pair of longest identical subsequences in
19 * the two files.
20 *
21 * The major goal is to generate the match vector J.
22 * J[i] is the index of the line in file1 corresponding
23 * to line i in file0. J[i] = 0 if there is no
24 * such line in file1.
25 *
26 * Lines are hashed so as to work in core. All potential
27 * matches are located by sorting the lines of each file
28 * on the hash (called "value"). In particular, this
29 * collects the equivalence classes in file1 together.
30 * Subroutine equiv replaces the value of each line in
31 * file0 by the index of the first element of its
32 * matching equivalence in (the reordered) file1.
33 * To save space equiv squeezes file1 into a single
34 * array member in which the equivalence classes
35 * are simply concatenated, except that their first
36 * members are flagged by changing sign.
37 *
38 * Next the indices that point into member are unsorted into
39 * array class according to the original order of file0.
40 *
41 * The cleverness lies in routine stone. This marches
42 * through the lines of file0, developing a vector klist
43 * of "k-candidates". At step i a k-candidate is a matched
44 * pair of lines x,y (x in file0, y in file1) such that
45 * there is a common subsequence of length k
46 * between the first i lines of file0 and the first y
47 * lines of file1, but there is no such subsequence for
48 * any smaller y. x is the earliest possible mate to y
49 * that occurs in such a subsequence.
50 *
51 * Whenever any of the members of the equivalence class of
52 * lines in file1 matable to a line in file0 has serial number
53 * less than the y of some k-candidate, that k-candidate
54 * with the smallest such y is replaced. The new
55 * k-candidate is chained (via pred) to the current
56 * k-1 candidate so that the actual subsequence can
57 * be recovered. When a member has serial number greater
58 * that the y of all k-candidates, the klist is extended.
59 * At the end, the longest subsequence is pulled out
60 * and placed in the array J by unravel
61 *
62 * With J in hand, the matches there recorded are
63 * checked against reality to assure that no spurious
64 * matches have crept in due to hashing. If they have,
65 * they are broken, and "jackpot" is recorded--a harmless
66 * matter except that a true match for a spuriously
67 * mated line may now be unnecessarily reported as a change.
68 *
69 * Much of the complexity of the program comes simply
70 * from trying to minimize core utilization and
71 * maximize the range of doable problems by dynamically
72 * allocating what is needed and reusing what is not.
73 * The core requirements for problems larger than somewhat
74 * are (in words) 2*length(file0) + length(file1) +
75 * 3*(number of k-candidates installed), typically about
76 * 6n words for files of length n.
[1765]77 */
78
[3621]79//config:config DIFF
80//config: bool "diff"
81//config: default y
82//config: help
83//config: diff compares two files or directories and outputs the
84//config: differences between them in a form that can be given to
85//config: the patch command.
86//config:
87//config:config FEATURE_DIFF_LONG_OPTIONS
88//config: bool "Enable long options"
89//config: default y
90//config: depends on DIFF && LONG_OPTS
91//config: help
92//config: Enable use of long options.
93//config:
94//config:config FEATURE_DIFF_DIR
95//config: bool "Enable directory support"
96//config: default y
97//config: depends on DIFF
98//config: help
99//config: This option enables support for directory and subdirectory
100//config: comparison.
101
102//kbuild:lib-$(CONFIG_DIFF) += diff.o
103
104//applet:IF_DIFF(APPLET(diff, BB_DIR_USR_BIN, BB_SUID_DROP))
105
[3232]106//usage:#define diff_trivial_usage
107//usage: "[-abBdiNqrTstw] [-L LABEL] [-S FILE] [-U LINES] FILE1 FILE2"
108//usage:#define diff_full_usage "\n\n"
109//usage: "Compare files line by line and output the differences between them.\n"
110//usage: "This implementation supports unified diffs only.\n"
111//usage: "\n -a Treat all files as text"
112//usage: "\n -b Ignore changes in the amount of whitespace"
113//usage: "\n -B Ignore changes whose lines are all blank"
114//usage: "\n -d Try hard to find a smaller set of changes"
115//usage: "\n -i Ignore case differences"
116//usage: "\n -L Use LABEL instead of the filename in the unified header"
117//usage: "\n -N Treat absent files as empty"
118//usage: "\n -q Output only whether files differ"
119//usage: "\n -r Recurse"
120//usage: "\n -S Start with FILE when comparing directories"
121//usage: "\n -T Make tabs line up by prefixing a tab when necessary"
122//usage: "\n -s Report when two files are the same"
123//usage: "\n -t Expand tabs to spaces in output"
124//usage: "\n -U Output LINES lines of context"
125//usage: "\n -w Ignore all whitespace"
126
[2725]127#include "libbb.h"
[3621]128#include "common_bufsiz.h"
[1765]129
[2725]130#if 0
[3232]131# define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
[2725]132#else
[3232]133# define dbg_error_msg(...) ((void)0)
[2725]134#endif
[1765]135
[2725]136enum { /* print_status() and diffreg() return values */
137 STATUS_SAME, /* files are the same */
138 STATUS_DIFFER, /* files differ */
139 STATUS_BINARY, /* binary files differ */
[1765]140};
141
[2725]142enum { /* Commandline flags */
143 FLAG_a,
144 FLAG_b,
145 FLAG_d,
146 FLAG_i,
147 FLAG_L, /* never used, handled by getopt32 */
148 FLAG_N,
149 FLAG_q,
150 FLAG_r,
151 FLAG_s,
152 FLAG_S, /* never used, handled by getopt32 */
153 FLAG_t,
154 FLAG_T,
155 FLAG_U, /* never used, handled by getopt32 */
156 FLAG_w,
157 FLAG_u, /* ignored, this is the default */
158 FLAG_p, /* not implemented */
159 FLAG_B,
160 FLAG_E, /* not implemented */
[1765]161};
[2725]162#define FLAG(x) (1 << FLAG_##x)
[1765]163
[2725]164/* We cache file position to avoid excessive seeking */
165typedef struct FILE_and_pos_t {
166 FILE *ft_fp;
167 off_t ft_pos;
168} FILE_and_pos_t;
[1765]169
170struct globals {
[2725]171 smallint exit_status;
172 int opt_U_context;
173 const char *other_dir;
174 char *label[2];
175 struct stat stb[2];
[1765]176};
177#define G (*ptr_to_globals)
[2725]178#define exit_status (G.exit_status )
179#define opt_U_context (G.opt_U_context )
180#define label (G.label )
181#define stb (G.stb )
[1765]182#define INIT_G() do { \
[2725]183 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
184 opt_U_context = 3; \
[1765]185} while (0)
186
[2725]187typedef int token_t;
[1765]188
[2725]189enum {
190 /* Public */
191 TOK_EMPTY = 1 << 9, /* Line fully processed, you can proceed to the next */
192 TOK_EOF = 1 << 10, /* File ended */
193 /* Private (Only to be used by read_token() */
194 TOK_EOL = 1 << 11, /* we saw EOL (sticky) */
195 TOK_SPACE = 1 << 12, /* used -b code, means we are skipping spaces */
196 SHIFT_EOF = (sizeof(token_t)*8 - 8) - 1,
197 CHAR_MASK = 0x1ff, /* 8th bit is used to distinguish EOF from 0xff */
198};
[1765]199
[2725]200/* Restores full EOF from one 8th bit: */
201//#define TOK2CHAR(t) (((t) << SHIFT_EOF) >> SHIFT_EOF)
202/* We don't really need the above, we only need to have EOF != any_real_char: */
203#define TOK2CHAR(t) ((t) & CHAR_MASK)
204
205static void seek_ft(FILE_and_pos_t *ft, off_t pos)
[1765]206{
[2725]207 if (ft->ft_pos != pos) {
208 ft->ft_pos = pos;
209 fseeko(ft->ft_fp, pos, SEEK_SET);
[1765]210 }
211}
[2725]212
213/* Reads tokens from given fp, handling -b and -w flags
214 * The user must reset tok every line start
[1765]215 */
[2725]216static int read_token(FILE_and_pos_t *ft, token_t tok)
[1765]217{
[2725]218 tok |= TOK_EMPTY;
219 while (!(tok & TOK_EOL)) {
220 bool is_space;
221 int t;
[1765]222
[2725]223 t = fgetc(ft->ft_fp);
224 if (t != EOF)
225 ft->ft_pos++;
226 is_space = (t == EOF || isspace(t));
227
228 /* If t == EOF (-1), set both TOK_EOF and TOK_EOL */
229 tok |= (t & (TOK_EOF + TOK_EOL));
230 /* Only EOL? */
231 if (t == '\n')
232 tok |= TOK_EOL;
233
234 if (option_mask32 & FLAG(i)) /* Handcoded tolower() */
235 t = (t >= 'A' && t <= 'Z') ? t - ('A' - 'a') : t;
236
237 if ((option_mask32 & FLAG(w)) && is_space)
238 continue;
239
240 /* Trim char value to low 9 bits */
241 t &= CHAR_MASK;
242
243 if (option_mask32 & FLAG(b)) {
244 /* Was prev char whitespace? */
245 if (tok & TOK_SPACE) { /* yes */
246 if (is_space) /* this one too, ignore it */
247 continue;
248 tok &= ~TOK_SPACE;
249 } else if (is_space) {
250 /* 1st whitespace char.
251 * Set TOK_SPACE and replace char by ' ' */
252 t = TOK_SPACE + ' ';
[1765]253 }
254 }
[2725]255 /* Clear EMPTY */
256 tok &= ~(TOK_EMPTY + CHAR_MASK);
257 /* Assign char value (low 9 bits) and maybe set TOK_SPACE */
258 tok |= t;
259 break;
[1765]260 }
[2725]261#if 0
262 bb_error_msg("fp:%p tok:%x '%c'%s%s%s%s", fp, tok, tok & 0xff
263 , tok & TOK_EOF ? " EOF" : ""
264 , tok & TOK_EOL ? " EOL" : ""
265 , tok & TOK_EMPTY ? " EMPTY" : ""
266 , tok & TOK_SPACE ? " SPACE" : ""
267 );
268#endif
269 return tok;
[1765]270}
271
[2725]272struct cand {
273 int x;
274 int y;
275 int pred;
276};
[1765]277
[2725]278static int search(const int *c, int k, int y, const struct cand *list)
[1765]279{
[2725]280 int i, j;
[1765]281
[2725]282 if (list[c[k]].y < y) /* quick look for typical case */
283 return k + 1;
284
285 for (i = 0, j = k + 1;;) {
286 const int l = (i + j) >> 1;
287 if (l > i) {
288 const int t = list[c[l]].y;
289 if (t > y)
290 j = l;
291 else if (t < y)
292 i = l;
293 else
294 return l;
295 } else
296 return l + 1;
[1765]297 }
298}
299
[2725]300static unsigned isqrt(unsigned n)
[1765]301{
[2725]302 unsigned x = 1;
303 while (1) {
304 const unsigned y = x;
305 x = ((n / x) + x) >> 1;
306 if (x <= (y + 1) && x >= (y - 1))
307 return x;
[1765]308 }
309}
310
[2725]311static void stone(const int *a, int n, const int *b, int *J, int pref)
[1765]312{
[2725]313 const unsigned isq = isqrt(n);
314 const unsigned bound =
315 (option_mask32 & FLAG(d)) ? UINT_MAX : MAX(256, isq);
316 int clen = 1;
317 int clistlen = 100;
318 int k = 0;
319 struct cand *clist = xzalloc(clistlen * sizeof(clist[0]));
320 struct cand cand;
321 struct cand *q;
322 int *klist = xzalloc((n + 2) * sizeof(klist[0]));
323 /*clist[0] = (struct cand){0}; - xzalloc did it */
324 /*klist[0] = 0; */
[1765]325
[2725]326 for (cand.x = 1; cand.x <= n; cand.x++) {
327 int j = a[cand.x], oldl = 0;
328 unsigned numtries = 0;
329 if (j == 0)
330 continue;
331 cand.y = -b[j];
332 cand.pred = klist[0];
333 do {
334 int l, tc;
335 if (cand.y <= clist[cand.pred].y)
336 continue;
337 l = search(klist, k, cand.y, clist);
338 if (l != oldl + 1)
339 cand.pred = klist[l - 1];
340 if (l <= k && clist[klist[l]].y <= cand.y)
341 continue;
342 if (clen == clistlen) {
343 clistlen = clistlen * 11 / 10;
344 clist = xrealloc(clist, clistlen * sizeof(clist[0]));
345 }
346 clist[clen] = cand;
347 tc = klist[l];
348 klist[l] = clen++;
349 if (l <= k) {
350 cand.pred = tc;
351 oldl = l;
352 numtries++;
353 } else {
354 k++;
355 break;
356 }
357 } while ((cand.y = b[++j]) > 0 && numtries < bound);
[1765]358 }
[2725]359 /* Unravel */
360 for (q = clist + klist[k]; q->y; q = clist + q->pred)
361 J[q->x + pref] = q->y + pref;
362 free(klist);
363 free(clist);
[1765]364}
365
[2725]366struct line {
[3621]367 /* 'serial' is not used in the beginning, so we reuse it
[2725]368 * to store line offsets, thus reducing memory pressure
369 */
370 union {
371 unsigned serial;
372 off_t offset;
373 };
374 unsigned value;
375};
[1765]376
377static void equiv(struct line *a, int n, struct line *b, int m, int *c)
378{
[2725]379 int i = 1, j = 1;
[1765]380
381 while (i <= n && j <= m) {
382 if (a[i].value < b[j].value)
383 a[i++].value = 0;
384 else if (a[i].value == b[j].value)
385 a[i++].value = j;
386 else
387 j++;
388 }
389 while (i <= n)
390 a[i++].value = 0;
391 b[m + 1].value = 0;
392 j = 0;
393 while (++j <= m) {
394 c[j] = -b[j].serial;
395 while (b[j + 1].value == b[j].value) {
396 j++;
397 c[j] = b[j].serial;
398 }
399 }
400 c[j] = -1;
401}
402
[2725]403static void unsort(const struct line *f, int l, int *b)
[1765]404{
405 int i;
[2725]406 int *a = xmalloc((l + 1) * sizeof(a[0]));
[1765]407 for (i = 1; i <= l; i++)
408 a[f[i].serial] = f[i].value;
409 for (i = 1; i <= l; i++)
410 b[i] = a[i];
411 free(a);
412}
413
[2725]414static int line_compar(const void *a, const void *b)
[1765]415{
[2725]416#define l0 ((const struct line*)a)
417#define l1 ((const struct line*)b)
418 int r = l0->value - l1->value;
419 if (r)
420 return r;
421 return l0->serial - l1->serial;
422#undef l0
423#undef l1
[1765]424}
425
[2725]426static void fetch(FILE_and_pos_t *ft, const off_t *ix, int a, int b, int ch)
[1765]427{
[2725]428 int i, j, col;
[1765]429 for (i = a; i <= b; i++) {
[2725]430 seek_ft(ft, ix[i - 1]);
431 putchar(ch);
432 if (option_mask32 & FLAG(T))
433 putchar('\t');
434 for (j = 0, col = 0; j < ix[i] - ix[i - 1]; j++) {
435 int c = fgetc(ft->ft_fp);
436 if (c == EOF) {
[3621]437 puts("\n\\ No newline at end of file");
[1765]438 return;
439 }
[2725]440 ft->ft_pos++;
441 if (c == '\t' && (option_mask32 & FLAG(t)))
442 do putchar(' '); while (++col & 7);
443 else {
[1765]444 putchar(c);
445 col++;
446 }
447 }
448 }
449}
450
[2725]451/* Creates the match vector J, where J[i] is the index
452 * of the line in the new file corresponding to the line i
453 * in the old file. Lines start at 1 instead of 0, that value
454 * being used instead to denote no corresponding line.
455 * This vector is dynamically allocated and must be freed by the caller.
456 *
457 * * fp is an input parameter, where fp[0] and fp[1] are the open
458 * old file and new file respectively.
459 * * nlen is an output variable, where nlen[0] and nlen[1]
460 * gets the number of lines in the old and new file respectively.
461 * * ix is an output variable, where ix[0] and ix[1] gets
462 * assigned dynamically allocated vectors of the offsets of the lines
463 * of the old and new file respectively. These must be freed by the caller.
464 */
465static NOINLINE int *create_J(FILE_and_pos_t ft[2], int nlen[2], off_t *ix[2])
[1765]466{
[2725]467 int *J, slen[2], *class, *member;
468 struct line *nfile[2], *sfile[2];
469 int pref = 0, suff = 0, i, j, delta;
[1765]470
[2725]471 /* Lines of both files are hashed, and in the process
472 * their offsets are stored in the array ix[fileno]
473 * where fileno == 0 points to the old file, and
474 * fileno == 1 points to the new one.
475 */
476 for (i = 0; i < 2; i++) {
477 unsigned hash;
478 token_t tok;
479 size_t sz = 100;
480 nfile[i] = xmalloc((sz + 3) * sizeof(nfile[i][0]));
481 /* ft gets here without the correct position, cant use seek_ft */
482 ft[i].ft_pos = 0;
483 fseeko(ft[i].ft_fp, 0, SEEK_SET);
[1765]484
[2725]485 nlen[i] = 0;
486 /* We could zalloc nfile, but then zalloc starts showing in gprof at ~1% */
487 nfile[i][0].offset = 0;
488 goto start; /* saves code */
489 while (1) {
490 tok = read_token(&ft[i], tok);
491 if (!(tok & TOK_EMPTY)) {
492 /* Hash algorithm taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. */
493 /*hash = hash * 128 - hash + TOK2CHAR(tok);
494 * gcc insists on optimizing above to "hash * 127 + ...", thus... */
495 unsigned o = hash - TOK2CHAR(tok);
496 hash = hash * 128 - o; /* we want SPEED here */
497 continue;
498 }
499 if (nlen[i]++ == sz) {
500 sz = sz * 3 / 2;
501 nfile[i] = xrealloc(nfile[i], (sz + 3) * sizeof(nfile[i][0]));
502 }
503 /* line_compar needs hashes fit into positive int */
504 nfile[i][nlen[i]].value = hash & INT_MAX;
505 /* like ftello(ft[i].ft_fp) but faster (avoids lseek syscall) */
506 nfile[i][nlen[i]].offset = ft[i].ft_pos;
507 if (tok & TOK_EOF) {
508 /* EOF counts as a token, so we have to adjust it here */
509 nfile[i][nlen[i]].offset++;
510 break;
511 }
512start:
513 hash = tok = 0;
[1765]514 }
[2725]515 /* Exclude lone EOF line from the end of the file, to make fetch()'s job easier */
516 if (nfile[i][nlen[i]].offset - nfile[i][nlen[i] - 1].offset == 1)
517 nlen[i]--;
518 /* Now we copy the line offsets into ix */
519 ix[i] = xmalloc((nlen[i] + 2) * sizeof(ix[i][0]));
520 for (j = 0; j < nlen[i] + 1; j++)
521 ix[i][j] = nfile[i][j].offset;
[1765]522 }
523
[2725]524 /* length of prefix and suffix is calculated */
525 for (; pref < nlen[0] && pref < nlen[1] &&
526 nfile[0][pref + 1].value == nfile[1][pref + 1].value;
527 pref++);
528 for (; suff < nlen[0] - pref && suff < nlen[1] - pref &&
529 nfile[0][nlen[0] - suff].value == nfile[1][nlen[1] - suff].value;
530 suff++);
531 /* Arrays are pruned by the suffix and prefix length,
532 * the result being sorted and stored in sfile[fileno],
533 * and their sizes are stored in slen[fileno]
[1765]534 */
[2725]535 for (j = 0; j < 2; j++) {
536 sfile[j] = nfile[j] + pref;
537 slen[j] = nlen[j] - pref - suff;
538 for (i = 0; i <= slen[j]; i++)
539 sfile[j][i].serial = i;
540 qsort(sfile[j] + 1, slen[j], sizeof(*sfile[j]), line_compar);
541 }
542 /* nfile arrays are reused to reduce memory pressure
543 * The #if zeroed out section performs the same task as the
544 * one in the #else section.
545 * Peak memory usage is higher, but one array copy is avoided
546 * by not using unsort()
547 */
548#if 0
549 member = xmalloc((slen[1] + 2) * sizeof(member[0]));
550 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
551 free(nfile[1]);
[1765]552
[2725]553 class = xmalloc((slen[0] + 1) * sizeof(class[0]));
554 for (i = 1; i <= slen[0]; i++) /* Unsorting */
555 class[sfile[0][i].serial] = sfile[0][i].value;
556 free(nfile[0]);
[1765]557#else
[2725]558 member = (int *)nfile[1];
559 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
560 member = xrealloc(member, (slen[1] + 2) * sizeof(member[0]));
561
562 class = (int *)nfile[0];
563 unsort(sfile[0], slen[0], (int *)nfile[0]);
564 class = xrealloc(class, (slen[0] + 2) * sizeof(class[0]));
[1765]565#endif
[2725]566 J = xmalloc((nlen[0] + 2) * sizeof(J[0]));
567 /* The elements of J which fall inside the prefix and suffix regions
568 * are marked as unchanged, while the ones which fall outside
569 * are initialized with 0 (no matches), so that function stone can
570 * then assign them their right values
571 */
572 for (i = 0, delta = nlen[1] - nlen[0]; i <= nlen[0]; i++)
573 J[i] = i <= pref ? i :
574 i > (nlen[0] - suff) ? (i + delta) : 0;
575 /* Here the magic is performed */
576 stone(class, slen[0], member, J, pref);
577 J[nlen[0] + 1] = nlen[1] + 1;
[1765]578
[2725]579 free(class);
580 free(member);
[1765]581
[2725]582 /* Both files are rescanned, in an effort to find any lines
583 * which, due to limitations intrinsic to any hashing algorithm,
584 * are different but ended up confounded as the same
585 */
586 for (i = 1; i <= nlen[0]; i++) {
587 if (!J[i])
588 continue;
[1765]589
[2725]590 seek_ft(&ft[0], ix[0][i - 1]);
591 seek_ft(&ft[1], ix[1][J[i] - 1]);
[1765]592
[2725]593 for (j = J[i]; i <= nlen[0] && J[i] == j; i++, j++) {
594 token_t tok0 = 0, tok1 = 0;
595 do {
596 tok0 = read_token(&ft[0], tok0);
597 tok1 = read_token(&ft[1], tok1);
[1765]598
[2725]599 if (((tok0 ^ tok1) & TOK_EMPTY) != 0 /* one is empty (not both) */
600 || (!(tok0 & TOK_EMPTY) && TOK2CHAR(tok0) != TOK2CHAR(tok1))
601 ) {
602 J[i] = 0; /* Break the correspondence */
603 }
604 } while (!(tok0 & tok1 & TOK_EMPTY));
605 }
[1765]606 }
607
[2725]608 return J;
[1765]609}
610
[2725]611static bool diff(FILE* fp[2], char *file[2])
[1765]612{
[2725]613 int nlen[2];
614 off_t *ix[2];
615 FILE_and_pos_t ft[2];
616 typedef struct { int a, b; } vec_t[2];
617 vec_t *vec = NULL;
618 int i = 1, j, k, idx = -1;
619 bool anychange = false;
620 int *J;
[1765]621
[2725]622 ft[0].ft_fp = fp[0];
623 ft[1].ft_fp = fp[1];
624 /* note that ft[i].ft_pos is unintitalized, create_J()
625 * must not assume otherwise */
626 J = create_J(ft, nlen, ix);
[1765]627
[2725]628 do {
629 bool nonempty = false;
[1765]630
[2725]631 while (1) {
632 vec_t v;
[1765]633
[2725]634 for (v[0].a = i; v[0].a <= nlen[0] && J[v[0].a] == J[v[0].a - 1] + 1; v[0].a++)
635 continue;
636 v[1].a = J[v[0].a - 1] + 1;
[1765]637
[2725]638 for (v[0].b = v[0].a - 1; v[0].b < nlen[0] && !J[v[0].b + 1]; v[0].b++)
639 continue;
640 v[1].b = J[v[0].b + 1] - 1;
641 /*
642 * Indicate that there is a difference between lines a and b of the 'from' file
643 * to get to lines c to d of the 'to' file. If a is greater than b then there
644 * are no lines in the 'from' file involved and this means that there were
645 * lines appended (beginning at b). If c is greater than d then there are
646 * lines missing from the 'to' file.
647 */
648 if (v[0].a <= v[0].b || v[1].a <= v[1].b) {
649 /*
650 * If this change is more than 'context' lines from the
651 * previous change, dump the record and reset it.
652 */
653 int ct = (2 * opt_U_context) + 1;
654 if (idx >= 0
655 && v[0].a > vec[idx][0].b + ct
656 && v[1].a > vec[idx][1].b + ct
657 ) {
658 break;
659 }
[1765]660
[2725]661 for (j = 0; j < 2; j++)
[3621]662 for (k = v[j].a; k <= v[j].b; k++)
663 nonempty |= (ix[j][k] - ix[j][k - 1] != 1);
[1765]664
[2725]665 vec = xrealloc_vector(vec, 6, ++idx);
666 memcpy(vec[idx], v, sizeof(v));
667 }
[1765]668
[2725]669 i = v[0].b + 1;
670 if (i > nlen[0])
671 break;
672 J[v[0].b] = v[1].b;
673 }
674 if (idx < 0 || ((option_mask32 & FLAG(B)) && !nonempty))
675 goto cont;
676 if (!(option_mask32 & FLAG(q))) {
677 int lowa;
678 vec_t span, *cvp = vec;
[1765]679
[2725]680 if (!anychange) {
681 /* Print the context/unidiff header first time through */
682 printf("--- %s\n", label[0] ? label[0] : file[0]);
683 printf("+++ %s\n", label[1] ? label[1] : file[1]);
684 }
[1765]685
[2725]686 printf("@@");
687 for (j = 0; j < 2; j++) {
688 int a = span[j].a = MAX(1, (*cvp)[j].a - opt_U_context);
689 int b = span[j].b = MIN(nlen[j], vec[idx][j].b + opt_U_context);
[1765]690
[2725]691 printf(" %c%d", j ? '+' : '-', MIN(a, b));
692 if (a == b)
693 continue;
694 printf(",%d", (a < b) ? b - a + 1 : 0);
695 }
[3621]696 puts(" @@");
[2725]697 /*
698 * Output changes in "unified" diff format--the old and new lines
699 * are printed together.
700 */
701 for (lowa = span[0].a; ; lowa = (*cvp++)[0].b + 1) {
702 bool end = cvp > &vec[idx];
703 fetch(&ft[0], ix[0], lowa, end ? span[0].b : (*cvp)[0].a - 1, ' ');
704 if (end)
705 break;
706 for (j = 0; j < 2; j++)
707 fetch(&ft[j], ix[j], (*cvp)[j].a, (*cvp)[j].b, j ? '+' : '-');
708 }
709 }
710 anychange = true;
711 cont:
712 idx = -1;
713 } while (i <= nlen[0]);
[1765]714
[2725]715 free(vec);
716 free(ix[0]);
717 free(ix[1]);
718 free(J);
719 return anychange;
[1765]720}
721
[2725]722static int diffreg(char *file[2])
[1765]723{
[3232]724 FILE *fp[2];
[2725]725 bool binary = false, differ = false;
726 int status = STATUS_SAME, i;
[1765]727
[3232]728 fp[0] = stdin;
729 fp[1] = stdin;
[2725]730 for (i = 0; i < 2; i++) {
731 int fd = open_or_warn_stdin(file[i]);
732 if (fd == -1)
733 goto out;
734 /* Our diff implementation is using seek.
735 * When we meet non-seekable file, we must make a temp copy.
736 */
737 if (lseek(fd, 0, SEEK_SET) == -1 && errno == ESPIPE) {
738 char name[] = "/tmp/difXXXXXX";
739 int fd_tmp = xmkstemp(name);
[1765]740
[2725]741 unlink(name);
742 if (bb_copyfd_eof(fd, fd_tmp) < 0)
743 xfunc_die();
[3621]744 if (fd != STDIN_FILENO)
[2725]745 close(fd);
746 fd = fd_tmp;
[3621]747 xlseek(fd, 0, SEEK_SET);
[1765]748 }
[2725]749 fp[i] = fdopen(fd, "r");
[1765]750 }
[2725]751
[3621]752 setup_common_bufsiz();
[2725]753 while (1) {
754 const size_t sz = COMMON_BUFSIZE / 2;
755 char *const buf0 = bb_common_bufsiz1;
756 char *const buf1 = buf0 + sz;
757 int j, k;
758 i = fread(buf0, 1, sz, fp[0]);
759 j = fread(buf1, 1, sz, fp[1]);
760 if (i != j) {
761 differ = true;
762 i = MIN(i, j);
[1765]763 }
[2725]764 if (i == 0)
765 break;
766 for (k = 0; k < i; k++) {
767 if (!buf0[k] || !buf1[k])
768 binary = true;
769 if (buf0[k] != buf1[k])
770 differ = true;
771 }
[1765]772 }
[2725]773 if (differ) {
774 if (binary && !(option_mask32 & FLAG(a)))
775 status = STATUS_BINARY;
776 else if (diff(fp, file))
777 status = STATUS_DIFFER;
[1765]778 }
[2725]779 if (status != STATUS_SAME)
780 exit_status |= 1;
781out:
782 fclose_if_not_stdin(fp[0]);
783 fclose_if_not_stdin(fp[1]);
[1765]784
[2725]785 return status;
[1765]786}
787
[2725]788static void print_status(int status, char *path[2])
[1765]789{
[2725]790 switch (status) {
791 case STATUS_BINARY:
792 case STATUS_DIFFER:
793 if ((option_mask32 & FLAG(q)) || status == STATUS_BINARY)
794 printf("Files %s and %s differ\n", path[0], path[1]);
795 break;
796 case STATUS_SAME:
797 if (option_mask32 & FLAG(s))
798 printf("Files %s and %s are identical\n", path[0], path[1]);
799 break;
800 }
[1765]801}
802
[2725]803#if ENABLE_FEATURE_DIFF_DIR
804struct dlist {
805 size_t len;
806 int s, e;
807 char **dl;
808};
[1765]809
810/* This function adds a filename to dl, the directory listing. */
[2725]811static int FAST_FUNC add_to_dirlist(const char *filename,
812 struct stat *sb UNUSED_PARAM,
813 void *userdata, int depth UNUSED_PARAM)
[1765]814{
[2725]815 struct dlist *const l = userdata;
816 const char *file = filename + l->len;
817 while (*file == '/')
818 file++;
819 l->dl = xrealloc_vector(l->dl, 6, l->e);
820 l->dl[l->e] = xstrdup(file);
821 l->e++;
[1765]822 return TRUE;
823}
824
[2725]825/* If recursion is not set, this function adds the directory
826 * to the list and prevents recursive_action from recursing into it.
827 */
828static int FAST_FUNC skip_dir(const char *filename,
829 struct stat *sb, void *userdata,
830 int depth)
[1765]831{
[2725]832 if (!(option_mask32 & FLAG(r)) && depth) {
833 add_to_dirlist(filename, sb, userdata, depth);
834 return SKIP;
835 }
836 if (!(option_mask32 & FLAG(N))) {
837 /* -r without -N: no need to recurse into dirs
838 * which do not exist on the "other side".
839 * Testcase: diff -r /tmp /
840 * (it would recurse deep into /proc without this code) */
841 struct dlist *const l = userdata;
842 filename += l->len;
843 if (filename[0]) {
844 struct stat osb;
845 char *othername = concat_path_file(G.other_dir, filename);
846 int r = stat(othername, &osb);
847 free(othername);
848 if (r != 0 || !S_ISDIR(osb.st_mode)) {
849 /* other dir doesn't have similarly named
[3232]850 * directory, don't recurse; return 1 upon
851 * exit, just like diffutils' diff */
852 exit_status |= 1;
[2725]853 return SKIP;
854 }
[1765]855 }
856 }
[2725]857 return TRUE;
[1765]858}
859
[2725]860static void diffdir(char *p[2], const char *s_start)
[1765]861{
[2725]862 struct dlist list[2];
863 int i;
[1765]864
[2725]865 memset(&list, 0, sizeof(list));
866 for (i = 0; i < 2; i++) {
867 /*list[i].s = list[i].e = 0; - memset did it */
868 /*list[i].dl = NULL; */
[1765]869
[2725]870 G.other_dir = p[1 - i];
871 /* We need to trim root directory prefix.
872 * Using list.len to specify its length,
873 * add_to_dirlist will remove it. */
874 list[i].len = strlen(p[i]);
875 recursive_action(p[i], ACTION_RECURSE | ACTION_FOLLOWLINKS,
[3232]876 add_to_dirlist, skip_dir, &list[i], 0);
[2725]877 /* Sort dl alphabetically.
878 * GNU diff does this ignoring any number of trailing dots.
879 * We don't, so for us dotted files almost always are
880 * first on the list.
881 */
882 qsort_string_vector(list[i].dl, list[i].e);
883 /* If -S was set, find the starting point. */
884 if (!s_start)
885 continue;
886 while (list[i].s < list[i].e && strcmp(list[i].dl[list[i].s], s_start) < 0)
887 list[i].s++;
[1765]888 }
889 /* Now that both dirlist1 and dirlist2 contain sorted directory
890 * listings, we can start to go through dirlist1. If both listings
891 * contain the same file, then do a normal diff. Otherwise, behaviour
892 * is determined by whether the -N flag is set. */
[2725]893 while (1) {
894 char *dp[2];
895 int pos;
896 int k;
897
898 dp[0] = list[0].s < list[0].e ? list[0].dl[list[0].s] : NULL;
899 dp[1] = list[1].s < list[1].e ? list[1].dl[list[1].s] : NULL;
900 if (!dp[0] && !dp[1])
901 break;
902 pos = !dp[0] ? 1 : (!dp[1] ? -1 : strcmp(dp[0], dp[1]));
903 k = pos > 0;
[3232]904 if (pos && !(option_mask32 & FLAG(N))) {
[2725]905 printf("Only in %s: %s\n", p[k], dp[k]);
[3232]906 exit_status |= 1;
907 } else {
[2725]908 char *fullpath[2], *path[2]; /* if -N */
909
910 for (i = 0; i < 2; i++) {
911 if (pos == 0 || i == k) {
912 path[i] = fullpath[i] = concat_path_file(p[i], dp[i]);
913 stat(fullpath[i], &stb[i]);
914 } else {
915 fullpath[i] = concat_path_file(p[i], dp[1 - i]);
916 path[i] = (char *)bb_dev_null;
917 }
918 }
919 if (pos)
920 stat(fullpath[k], &stb[1 - k]);
921
922 if (S_ISDIR(stb[0].st_mode) && S_ISDIR(stb[1].st_mode))
923 printf("Common subdirectories: %s and %s\n", fullpath[0], fullpath[1]);
924 else if (!S_ISREG(stb[0].st_mode) && !S_ISDIR(stb[0].st_mode))
925 printf("File %s is not a regular file or directory and was skipped\n", fullpath[0]);
926 else if (!S_ISREG(stb[1].st_mode) && !S_ISDIR(stb[1].st_mode))
927 printf("File %s is not a regular file or directory and was skipped\n", fullpath[1]);
928 else if (S_ISDIR(stb[0].st_mode) != S_ISDIR(stb[1].st_mode)) {
929 if (S_ISDIR(stb[0].st_mode))
930 printf("File %s is a %s while file %s is a %s\n", fullpath[0], "directory", fullpath[1], "regular file");
931 else
932 printf("File %s is a %s while file %s is a %s\n", fullpath[0], "regular file", fullpath[1], "directory");
933 } else
934 print_status(diffreg(path), fullpath);
935
936 free(fullpath[0]);
937 free(fullpath[1]);
938 }
939 free(dp[k]);
940 list[k].s++;
[1765]941 if (pos == 0) {
[2725]942 free(dp[1 - k]);
943 list[1 - k].s++;
[1765]944 }
945 }
[2725]946 if (ENABLE_FEATURE_CLEAN_UP) {
947 free(list[0].dl);
948 free(list[1].dl);
949 }
[1765]950}
951#endif
952
[2725]953#if ENABLE_FEATURE_DIFF_LONG_OPTIONS
954static const char diff_longopts[] ALIGN1 =
955 "ignore-case\0" No_argument "i"
956 "ignore-tab-expansion\0" No_argument "E"
957 "ignore-space-change\0" No_argument "b"
958 "ignore-all-space\0" No_argument "w"
959 "ignore-blank-lines\0" No_argument "B"
960 "text\0" No_argument "a"
961 "unified\0" Required_argument "U"
962 "label\0" Required_argument "L"
963 "show-c-function\0" No_argument "p"
964 "brief\0" No_argument "q"
965 "expand-tabs\0" No_argument "t"
966 "initial-tab\0" No_argument "T"
967 "recursive\0" No_argument "r"
968 "new-file\0" No_argument "N"
969 "report-identical-files\0" No_argument "s"
970 "starting-file\0" Required_argument "S"
971 "minimal\0" No_argument "d"
972 ;
973#endif
[1765]974
[2725]975int diff_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
976int diff_main(int argc UNUSED_PARAM, char **argv)
[1765]977{
[2725]978 int gotstdin = 0, i;
979 char *file[2], *s_start = NULL;
[1765]980 llist_t *L_arg = NULL;
981
982 INIT_G();
983
[2725]984 /* exactly 2 params; collect multiple -L <label>; -U N */
985 opt_complementary = "=2:L::U+";
986#if ENABLE_FEATURE_DIFF_LONG_OPTIONS
987 applet_long_options = diff_longopts;
988#endif
989 getopt32(argv, "abdiL:NqrsS:tTU:wupBE",
990 &L_arg, &s_start, &opt_U_context);
[1765]991 argv += optind;
[2725]992 while (L_arg)
993 label[!!label[0]] = llist_pop(&L_arg);
994 xfunc_error_retval = 2;
995 for (i = 0; i < 2; i++) {
996 file[i] = argv[i];
997 /* Compat: "diff file name_which_doesnt_exist" exits with 2 */
998 if (LONE_DASH(file[i])) {
999 fstat(STDIN_FILENO, &stb[i]);
1000 gotstdin++;
1001 } else
1002 xstat(file[i], &stb[i]);
[1765]1003 }
[2725]1004 xfunc_error_retval = 1;
1005 if (gotstdin && (S_ISDIR(stb[0].st_mode) || S_ISDIR(stb[1].st_mode)))
1006 bb_error_msg_and_die("can't compare stdin to a directory");
[1765]1007
[3232]1008 /* Compare metadata to check if the files are the same physical file.
1009 *
1010 * Comment from diffutils source says:
1011 * POSIX says that two files are identical if st_ino and st_dev are
1012 * the same, but many file systems incorrectly assign the same (device,
1013 * inode) pair to two distinct files, including:
1014 * GNU/Linux NFS servers that export all local file systems as a
1015 * single NFS file system, if a local device number (st_dev) exceeds
1016 * 255, or if a local inode number (st_ino) exceeds 16777215.
1017 */
1018 if (ENABLE_DESKTOP
1019 && stb[0].st_ino == stb[1].st_ino
1020 && stb[0].st_dev == stb[1].st_dev
1021 && stb[0].st_size == stb[1].st_size
1022 && stb[0].st_mtime == stb[1].st_mtime
1023 && stb[0].st_ctime == stb[1].st_ctime
1024 && stb[0].st_mode == stb[1].st_mode
1025 && stb[0].st_nlink == stb[1].st_nlink
1026 && stb[0].st_uid == stb[1].st_uid
1027 && stb[0].st_gid == stb[1].st_gid
1028 ) {
1029 /* files are physically the same; no need to compare them */
1030 return STATUS_SAME;
1031 }
1032
[2725]1033 if (S_ISDIR(stb[0].st_mode) && S_ISDIR(stb[1].st_mode)) {
[1765]1034#if ENABLE_FEATURE_DIFF_DIR
[2725]1035 diffdir(file, s_start);
[1765]1036#else
[2725]1037 bb_error_msg_and_die("no support for directory comparison");
[1765]1038#endif
1039 } else {
[2725]1040 bool dirfile = S_ISDIR(stb[0].st_mode) || S_ISDIR(stb[1].st_mode);
1041 bool dir = S_ISDIR(stb[1].st_mode);
1042 if (dirfile) {
1043 const char *slash = strrchr(file[!dir], '/');
1044 file[dir] = concat_path_file(file[dir], slash ? slash + 1 : file[!dir]);
1045 xstat(file[dir], &stb[dir]);
[1765]1046 }
[2725]1047 /* diffreg can get non-regular files here */
1048 print_status(gotstdin > 1 ? STATUS_SAME : diffreg(file), file);
1049
1050 if (dirfile)
1051 free(file[dir]);
[1765]1052 }
[2725]1053
1054 return exit_status;
[1765]1055}
Note: See TracBrowser for help on using the repository browser.