Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: MondoRescue/branches/2.2.2/mindi-busybox/coreutils/diff.c@ 1247

Visit:

Last change on this file since 1247 was 821, checked in by Bruno Cornec, 18 years ago
Addition of busybox 1.2.1 as a mindi-busybox new package This should avoid delivering binary files in mindi not built there (Fedora and Debian are quite serious about that)
File size: 29.1 KB

Line
1	/* vi: set sw=4 ts=4: */
2	/*
3	* Mini diff implementation for busybox, adapted from OpenBSD diff.
4	*
5	* Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
6	* Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
7	*
8	* Sponsored in part by the Defense Advanced Research Projects
9	* Agency (DARPA) and Air Force Research Laboratory, Air Force
10	* Materiel Command, USAF, under agreement number F39502-99-1-0512.
11	*
12	* Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
13	*/
14
15	#include <time.h>
16	#include <sys/types.h>
17	#include <sys/param.h>
18	#include <sys/stat.h>
19	#include <ctype.h>
20	#include <errno.h>
21	#include <signal.h>
22	#include <stdlib.h>
23	#include <stdio.h>
24	#include <stdarg.h>
25	#include <string.h>
26	#include <unistd.h>
27	#include <sys/wait.h>
28	#include <fcntl.h>
29	#include <stddef.h>
30	#include <paths.h>
31	#include <dirent.h>
32	#include "busybox.h"
33
34	#define FSIZE_MAX 32768
35
36	/*
37	* Output flags
38	*/
39	#define D_HEADER 1 /* Print a header/footer between files */
40	#define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */
41	#define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */
42
43	/*
44	* Status values for print_status() and diffreg() return values
45	* Guide:
46	* D_SAME - files are the same
47	* D_DIFFER - files differ
48	* D_BINARY - binary files differ
49	* D_COMMON - subdirectory common to both dirs
50	* D_ONLY - file only exists in one dir
51	* D_MISMATCH1 - path1 a dir, path2 a file
52	* D_MISMATCH2 - path1 a file, path2 a dir
53	* D_ERROR - error occurred
54	* D_SKIPPED1 - skipped path1 as it is a special file
55	* D_SKIPPED2 - skipped path2 as it is a special file
56	*/
57
58	#define D_SAME 0
59	#define D_DIFFER (1<<0)
60	#define D_BINARY (1<<1)
61	#define D_COMMON (1<<2)
62	#define D_ONLY (1<<3)
63	#define D_MISMATCH1 (1<<4)
64	#define D_MISMATCH2 (1<<5)
65	#define D_ERROR (1<<6)
66	#define D_SKIPPED1 (1<<7)
67	#define D_SKIPPED2 (1<<8)
68
69	/* Command line options */
70	static unsigned long cmd_flags;
71
72	#define FLAG_a (1<<0)
73	#define FLAG_b (1<<1)
74	#define FLAG_d (1<<2)
75	#define FLAG_i (1<<3)
76	#define FLAG_L (1<<4)
77	#define FLAG_N (1<<5)
78	#define FLAG_q (1<<6)
79	#define FLAG_r (1<<7)
80	#define FLAG_s (1<<8)
81	#define FLAG_S (1<<9)
82	#define FLAG_t (1<<10)
83	#define FLAG_T (1<<11)
84	#define FLAG_U (1<<12)
85	#define FLAG_w (1<<13)
86
87	int context, status;
88	char start, label[2];
89	struct stat stb1, stb2;
90	char **dl;
91	int dl_count = 0;
92
93	struct cand {
94	int x;
95	int y;
96	int pred;
97	};
98
99	struct line {
100	int serial;
101	int value;
102	} *file[2];
103
104	/*
105	* The following struct is used to record change information
106	* doing a "context" or "unified" diff. (see routine "change" to
107	* understand the highly mnemonic field names)
108	*/
109	struct context_vec {
110	int a; /* start line in old file */
111	int b; /* end line in old file */
112	int c; /* start line in new file */
113	int d; /* end line in new file */
114	};
115
116	static int J; / will be overlaid on class */
117	static int class; / will be overlaid on file[0] */
118	static int klist; / will be overlaid on file[0] after class */
119	static int member; / will be overlaid on file[1] */
120	static int clen;
121	static int len[2];
122	static int pref, suff; /* length of prefix and suffix */
123	static int slen[2];
124	static int anychange;
125	static long ixnew; / will be overlaid on file[1] */
126	static long ixold; / will be overlaid on klist */
127	static struct cand clist; / merely a free storage pot for candidates */
128	static int clistlen; /* the length of clist */
129	static struct line sfile[2]; / shortened by pruning common prefix/suffix */
130	static struct context_vec *context_vec_start;
131	static struct context_vec *context_vec_end;
132	static struct context_vec *context_vec_ptr;
133
134	static void print_only(const char path, size_t dirlen, const char entry)
135	{
136	if (dirlen > 1)
137	dirlen--;
138	printf("Only in %.*s: %s\n", (int) dirlen, path, entry);
139	}
140
141	static void print_status(int val, char path1, char path2, char *entry)
142	{
143	const char *const _entry = entry ? entry : "";
144	char *_path1 = entry ? concat_path_file(path1, _entry) : path1;
145	char *_path2 = entry ? concat_path_file(path2, _entry) : path2;
146
147	switch (val) {
148	case D_ONLY:
149	print_only(path1, strlen(path1), entry);
150	break;
151	case D_COMMON:
152	printf("Common subdirectories: %s and %s\n", _path1, _path2);
153	break;
154	case D_BINARY:
155	printf("Binary files %s and %s differ\n", _path1, _path2);
156	break;
157	case D_DIFFER:
158	if (cmd_flags & FLAG_q)
159	printf("Files %s and %s differ\n", _path1, _path2);
160	break;
161	case D_SAME:
162	if (cmd_flags & FLAG_s)
163	printf("Files %s and %s are identical\n", _path1, _path2);
164	break;
165	case D_MISMATCH1:
166	printf("File %s is a directory while file %s is a regular file\n",
167	_path1, _path2);
168	break;
169	case D_MISMATCH2:
170	printf("File %s is a regular file while file %s is a directory\n",
171	_path1, _path2);
172	break;
173	case D_SKIPPED1:
174	printf("File %s is not a regular file or directory and was skipped\n",
175	_path1);
176	break;
177	case D_SKIPPED2:
178	printf("File %s is not a regular file or directory and was skipped\n",
179	_path2);
180	break;
181	}
182	if (entry) {
183	free(_path1);
184	free(_path2);
185	}
186	}
187
188	/*
189	* Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
190	*/
191	static int readhash(FILE * f)
192	{
193	int i, t, space;
194	int sum;
195
196	sum = 1;
197	space = 0;
198	if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) {
199	if (FLAG_i)
200	for (i = 0; (t = getc(f)) != '\n'; i++) {
201	if (t == EOF) {
202	if (i == 0)
203	return (0);
204	break;
205	}
206	sum = sum * 127 + t;
207	} else
208	for (i = 0; (t = getc(f)) != '\n'; i++) {
209	if (t == EOF) {
210	if (i == 0)
211	return (0);
212	break;
213	}
214	sum = sum * 127 + t;
215	}
216	} else {
217	for (i = 0;;) {
218	switch (t = getc(f)) {
219	case '\t':
220	case '\r':
221	case '\v':
222	case '\f':
223	case ' ':
224	space++;
225	continue;
226	default:
227	if (space && !(cmd_flags & FLAG_w)) {
228	i++;
229	space = 0;
230	}
231	sum = sum * 127 + t;
232	i++;
233	continue;
234	case EOF:
235	if (i == 0)
236	return (0);
237	/* FALLTHROUGH */
238	case '\n':
239	break;
240	}
241	break;
242	}
243	}
244	/*
245	* There is a remote possibility that we end up with a zero sum.
246	* Zero is used as an EOF marker, so return 1 instead.
247	*/
248	return (sum == 0 ? 1 : sum);
249	}
250
251
252
253	/*
254	* Check to see if the given files differ.
255	* Returns 0 if they are the same, 1 if different, and -1 on error.
256	*/
257	static int files_differ(FILE * f1, FILE * f2, int flags)
258	{
259	char buf1[BUFSIZ], buf2[BUFSIZ];
260	size_t i, j;
261
262	if ((flags & (D_EMPTY1 \| D_EMPTY2)) \|\| stb1.st_size != stb2.st_size \|\|
263	(stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
264	return (1);
265	while (1) {
266	i = fread(buf1, 1, sizeof(buf1), f1);
267	j = fread(buf2, 1, sizeof(buf2), f2);
268	if (i != j)
269	return (1);
270	if (i == 0 && j == 0) {
271	if (ferror(f1) \|\| ferror(f2))
272	return (1);
273	return (0);
274	}
275	if (memcmp(buf1, buf2, i) != 0)
276	return (1);
277	}
278	}
279
280	static void prepare(int i, FILE * fd, off_t filesize)
281	{
282	struct line *p;
283	int h;
284	size_t j, sz;
285
286	rewind(fd);
287
288	sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;
289	if (sz < 100)
290	sz = 100;
291
292	p = xmalloc((sz + 3) * sizeof(struct line));
293	for (j = 0; (h = readhash(fd));) {
294	if (j == sz) {
295	sz = sz * 3 / 2;
296	p = xrealloc(p, (sz + 3) * sizeof(struct line));
297	}
298	p[++j].value = h;
299	}
300	len[i] = j;
301	file[i] = p;
302	}
303
304	static void prune(void)
305	{
306	int i, j;
307
308	for (pref = 0; pref < len[0] && pref < len[1] &&
309	file[0][pref + 1].value == file[1][pref + 1].value; pref++);
310	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
311	file[0][len[0] - suff].value == file[1][len[1] - suff].value;
312	suff++);
313	for (j = 0; j < 2; j++) {
314	sfile[j] = file[j] + pref;
315	slen[j] = len[j] - pref - suff;
316	for (i = 0; i <= slen[j]; i++)
317	sfile[j][i].serial = i;
318	}
319	}
320
321	static void equiv(struct line a, int n, struct line b, int m, int *c)
322	{
323	int i, j;
324
325	i = j = 1;
326	while (i <= n && j <= m) {
327	if (a[i].value < b[j].value)
328	a[i++].value = 0;
329	else if (a[i].value == b[j].value)
330	a[i++].value = j;
331	else
332	j++;
333	}
334	while (i <= n)
335	a[i++].value = 0;
336	b[m + 1].value = 0;
337	j = 0;
338	while (++j <= m) {
339	c[j] = -b[j].serial;
340	while (b[j + 1].value == b[j].value) {
341	j++;
342	c[j] = b[j].serial;
343	}
344	}
345	c[j] = -1;
346	}
347
348	static int isqrt(int n)
349	{
350	int y, x = 1;
351
352	if (n == 0)
353	return (0);
354
355	do {
356	y = x;
357	x = n / x;
358	x += y;
359	x /= 2;
360	} while ((x - y) > 1 \|\| (x - y) < -1);
361
362	return (x);
363	}
364
365	static int newcand(int x, int y, int pred)
366	{
367	struct cand *q;
368
369	if (clen == clistlen) {
370	clistlen = clistlen * 11 / 10;
371	clist = xrealloc(clist, clistlen * sizeof(struct cand));
372	}
373	q = clist + clen;
374	q->x = x;
375	q->y = y;
376	q->pred = pred;
377	return (clen++);
378	}
379
380
381	static int search(int *c, int k, int y)
382	{
383	int i, j, l, t;
384
385	if (clist[c[k]].y < y) /* quick look for typical case */
386	return (k + 1);
387	i = 0;
388	j = k + 1;
389	while (1) {
390	l = i + j;
391	if ((l >>= 1) <= i)
392	break;
393	t = clist[c[l]].y;
394	if (t > y)
395	j = l;
396	else if (t < y)
397	i = l;
398	else
399	return (l);
400	}
401	return (l + 1);
402	}
403
404
405	static int stone(int a, int n, int b, int *c)
406	{
407	int i, k, y, j, l;
408	int oldc, tc, oldl;
409	unsigned int numtries;
410
411	#if ENABLE_FEATURE_DIFF_MINIMAL
412	const unsigned int bound =
413	(cmd_flags & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));
414	#else
415	const unsigned int bound = MAX(256, isqrt(n));
416	#endif
417	k = 0;
418	c[0] = newcand(0, 0, 0);
419	for (i = 1; i <= n; i++) {
420	j = a[i];
421	if (j == 0)
422	continue;
423	y = -b[j];
424	oldl = 0;
425	oldc = c[0];
426	numtries = 0;
427	do {
428	if (y <= clist[oldc].y)
429	continue;
430	l = search(c, k, y);
431	if (l != oldl + 1)
432	oldc = c[l - 1];
433	if (l <= k) {
434	if (clist[c[l]].y <= y)
435	continue;
436	tc = c[l];
437	c[l] = newcand(i, y, oldc);
438	oldc = tc;
439	oldl = l;
440	numtries++;
441	} else {
442	c[l] = newcand(i, y, oldc);
443	k++;
444	break;
445	}
446	} while ((y = b[++j]) > 0 && numtries < bound);
447	}
448	return (k);
449	}
450
451	static void unravel(int p)
452	{
453	struct cand *q;
454	int i;
455
456	for (i = 0; i <= len[0]; i++)
457	J[i] = i <= pref ? i : i > len[0] - suff ? i + len[1] - len[0] : 0;
458	for (q = clist + p; q->y != 0; q = clist + q->pred)
459	J[q->x + pref] = q->y + pref;
460	}
461
462
463	static void unsort(struct line f, int l, int b)
464	{
465	int *a, i;
466
467	a = xmalloc((l + 1) * sizeof(int));
468	for (i = 1; i <= l; i++)
469	a[f[i].serial] = f[i].value;
470	for (i = 1; i <= l; i++)
471	b[i] = a[i];
472	free(a);
473	}
474
475	static int skipline(FILE * f)
476	{
477	int i, c;
478
479	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
480	continue;
481	return (i);
482	}
483
484
485	/*
486	* Check does double duty:
487	* 1. ferret out any fortuitous correspondences due
488	* to confounding by hashing (which result in "jackpot")
489	* 2. collect random access indexes to the two files
490	*/
491	static void check(FILE * f1, FILE * f2)
492	{
493	int i, j, jackpot, c, d;
494	long ctold, ctnew;
495
496	rewind(f1);
497	rewind(f2);
498	j = 1;
499	ixold[0] = ixnew[0] = 0;
500	jackpot = 0;
501	ctold = ctnew = 0;
502	for (i = 1; i <= len[0]; i++) {
503	if (J[i] == 0) {
504	ixold[i] = ctold += skipline(f1);
505	continue;
506	}
507	while (j < J[i]) {
508	ixnew[j] = ctnew += skipline(f2);
509	j++;
510	}
511	if ((cmd_flags & FLAG_b) \|\| (cmd_flags & FLAG_w)
512	\|\| (cmd_flags & FLAG_i)) {
513	while (1) {
514	c = getc(f1);
515	d = getc(f2);
516	/*
517	* GNU diff ignores a missing newline
518	* in one file if bflag \|\| wflag.
519	*/
520	if (((cmd_flags & FLAG_b) \|\| (cmd_flags & FLAG_w)) &&
521	((c == EOF && d == '\n') \|\| (c == '\n' && d == EOF))) {
522	break;
523	}
524	ctold++;
525	ctnew++;
526	if ((cmd_flags & FLAG_b) && isspace(c) && isspace(d)) {
527	do {
528	if (c == '\n')
529	break;
530	ctold++;
531	} while (isspace(c = getc(f1)));
532	do {
533	if (d == '\n')
534	break;
535	ctnew++;
536	} while (isspace(d = getc(f2)));
537	} else if (cmd_flags & FLAG_w) {
538	while (isspace(c) && c != '\n') {
539	c = getc(f1);
540	ctold++;
541	}
542	while (isspace(d) && d != '\n') {
543	d = getc(f2);
544	ctnew++;
545	}
546	}
547	if (c != d) {
548	jackpot++;
549	J[i] = 0;
550	if (c != '\n' && c != EOF)
551	ctold += skipline(f1);
552	if (d != '\n' && c != EOF)
553	ctnew += skipline(f2);
554	break;
555	}
556	if (c == '\n' \|\| c == EOF)
557	break;
558	}
559	} else {
560	while (1) {
561	ctold++;
562	ctnew++;
563	if ((c = getc(f1)) != (d = getc(f2))) {
564	J[i] = 0;
565	if (c != '\n' && c != EOF)
566	ctold += skipline(f1);
567	if (d != '\n' && c != EOF)
568	ctnew += skipline(f2);
569	break;
570	}
571	if (c == '\n' \|\| c == EOF)
572	break;
573	}
574	}
575	ixold[i] = ctold;
576	ixnew[j] = ctnew;
577	j++;
578	}
579	for (; j <= len[1]; j++)
580	ixnew[j] = ctnew += skipline(f2);
581	}
582
583	/* shellsort CACM #201 */
584	static void sort(struct line *a, int n)
585	{
586	struct line ai, aim, w;
587	int j, m = 0, k;
588
589	if (n == 0)
590	return;
591	for (j = 1; j <= n; j *= 2)
592	m = 2 * j - 1;
593	for (m /= 2; m != 0; m /= 2) {
594	k = n - m;
595	for (j = 1; j <= k; j++) {
596	for (ai = &a[j]; ai > a; ai -= m) {
597	aim = &ai[m];
598	if (aim < ai)
599	break; /* wraparound */
600	if (aim->value > ai[0].value \|\|
601	(aim->value == ai[0].value && aim->serial > ai[0].serial))
602	break;
603	w.value = ai[0].value;
604	ai[0].value = aim->value;
605	aim->value = w.value;
606	w.serial = ai[0].serial;
607	ai[0].serial = aim->serial;
608	aim->serial = w.serial;
609	}
610	}
611	}
612	}
613
614
615	static void uni_range(int a, int b)
616	{
617	if (a < b)
618	printf("%d,%d", a, b - a + 1);
619	else if (a == b)
620	printf("%d", b);
621	else
622	printf("%d,0", b);
623	}
624
625	static int fetch(long f, int a, int b, FILE lb, int ch)
626	{
627	int i, j, c, lastc, col, nc;
628
629	if (a > b)
630	return (0);
631	for (i = a; i <= b; i++) {
632	fseek(lb, f[i - 1], SEEK_SET);
633	nc = f[i] - f[i - 1];
634	if (ch != '\0') {
635	putchar(ch);
636	if (cmd_flags & FLAG_T)
637	putchar('\t');
638	}
639	col = 0;
640	for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
641	if ((c = getc(lb)) == EOF) {
642	puts("\n\\ No newline at end of file");
643	return (0);
644	}
645	if (c == '\t' && (cmd_flags & FLAG_t)) {
646	do {
647	putchar(' ');
648	} while (++col & 7);
649	} else {
650	putchar(c);
651	col++;
652	}
653	}
654	}
655	return (0);
656	}
657
658	static int asciifile(FILE * f)
659	{
660	#if ENABLE_FEATURE_DIFF_BINARY
661	unsigned char buf[BUFSIZ];
662	int i, cnt;
663	#endif
664
665	if ((cmd_flags & FLAG_a) \|\| f == NULL)
666	return (1);
667
668	#if ENABLE_FEATURE_DIFF_BINARY
669	rewind(f);
670	cnt = fread(buf, 1, sizeof(buf), f);
671	for (i = 0; i < cnt; i++) {
672	if (!isprint(buf[i]) && !isspace(buf[i])) {
673	return (0);
674	}
675	}
676	#endif
677	return (1);
678	}
679
680	/* dump accumulated "unified" diff changes */
681	static void dump_unified_vec(FILE * f1, FILE * f2)
682	{
683	struct context_vec *cvp = context_vec_start;
684	int lowa, upb, lowc, upd;
685	int a, b, c, d;
686	char ch;
687
688	if (context_vec_start > context_vec_ptr)
689	return;
690
691	b = d = 0; /* gcc */
692	lowa = MAX(1, cvp->a - context);
693	upb = MIN(len[0], context_vec_ptr->b + context);
694	lowc = MAX(1, cvp->c - context);
695	upd = MIN(len[1], context_vec_ptr->d + context);
696
697	fputs("@@ -", stdout);
698	uni_range(lowa, upb);
699	fputs(" +", stdout);
700	uni_range(lowc, upd);
701	fputs(" @@", stdout);
702	putchar('\n');
703
704	/*
705	* Output changes in "unified" diff format--the old and new lines
706	* are printed together.
707	*/
708	for (; cvp <= context_vec_ptr; cvp++) {
709	a = cvp->a;
710	b = cvp->b;
711	c = cvp->c;
712	d = cvp->d;
713
714	/*
715	* c: both new and old changes
716	* d: only changes in the old file
717	* a: only changes in the new file
718	*/
719	if (a <= b && c <= d)
720	ch = 'c';
721	else
722	ch = (a <= b) ? 'd' : 'a';
723	#if 0
724	switch (ch) {
725	case 'c':
726	fetch(ixold, lowa, a - 1, f1, ' ');
727	fetch(ixold, a, b, f1, '-');
728	fetch(ixnew, c, d, f2, '+');
729	break;
730	case 'd':
731	fetch(ixold, lowa, a - 1, f1, ' ');
732	fetch(ixold, a, b, f1, '-');
733	break;
734	case 'a':
735	fetch(ixnew, lowc, c - 1, f2, ' ');
736	fetch(ixnew, c, d, f2, '+');
737	break;
738	}
739	#else
740	if (ch == 'c' \|\| ch == 'd') {
741	fetch(ixold, lowa, a - 1, f1, ' ');
742	fetch(ixold, a, b, f1, '-');
743	}
744	if (ch == 'a')
745	fetch(ixnew, lowc, c - 1, f2, ' ');
746	if (ch == 'c' \|\| ch == 'a')
747	fetch(ixnew, c, d, f2, '+');
748	#endif
749	lowa = b + 1;
750	lowc = d + 1;
751	}
752	fetch(ixnew, d + 1, upd, f2, ' ');
753
754	context_vec_ptr = context_vec_start - 1;
755	}
756
757
758	static void print_header(const char file1, const char file2)
759	{
760	if (label[0] != NULL)
761	printf("%s %s\n", "---", label[0]);
762	else
763	printf("%s %s\t%s", "---", file1, ctime(&stb1.st_mtime));
764	if (label[1] != NULL)
765	printf("%s %s\n", "+++", label[1]);
766	else
767	printf("%s %s\t%s", "+++", file2, ctime(&stb2.st_mtime));
768	}
769
770
771
772	/*
773	* Indicate that there is a difference between lines a and b of the from file
774	* to get to lines c to d of the to file. If a is greater then b then there
775	* are no lines in the from file involved and this means that there were
776	* lines appended (beginning at b). If c is greater than d then there are
777	* lines missing from the to file.
778	*/
779	static void change(char file1, FILE f1, char file2, FILE f2, int a,
780	int b, int c, int d)
781	{
782	static size_t max_context = 64;
783
784	if (a > b && c > d)
785	return;
786	if (cmd_flags & FLAG_q)
787	return;
788
789	/*
790	* Allocate change records as needed.
791	*/
792	if (context_vec_ptr == context_vec_end - 1) {
793	ptrdiff_t offset = context_vec_ptr - context_vec_start;
794
795	max_context <<= 1;
796	context_vec_start = xrealloc(context_vec_start,
797	max_context *
798	sizeof(struct context_vec));
799	context_vec_end = context_vec_start + max_context;
800	context_vec_ptr = context_vec_start + offset;
801	}
802	if (anychange == 0) {
803	/*
804	* Print the context/unidiff header first time through.
805	*/
806	print_header(file1, file2);
807	anychange = 1;
808	} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
809	c > context_vec_ptr->d + (2 * context) + 1) {
810	/*
811	* If this change is more than 'context' lines from the
812	* previous change, dump the record and reset it.
813	*/
814	dump_unified_vec(f1, f2);
815	}
816	context_vec_ptr++;
817	context_vec_ptr->a = a;
818	context_vec_ptr->b = b;
819	context_vec_ptr->c = c;
820	context_vec_ptr->d = d;
821	return;
822
823	}
824
825
826	static void output(char file1, FILE f1, char file2, FILE f2)
827	{
828
829	/* Note that j0 and j1 can't be used as they are defined in math.h.
830	* This also allows the rather amusing variable 'j00'... */
831	int m, i0, i1, j00, j01;
832
833	rewind(f1);
834	rewind(f2);
835	m = len[0];
836	J[0] = 0;
837	J[m + 1] = len[1] + 1;
838	for (i0 = 1; i0 <= m; i0 = i1 + 1) {
839	while (i0 <= m && J[i0] == J[i0 - 1] + 1)
840	i0++;
841	j00 = J[i0 - 1] + 1;
842	i1 = i0 - 1;
843	while (i1 < m && J[i1 + 1] == 0)
844	i1++;
845	j01 = J[i1 + 1] - 1;
846	J[i1] = j01;
847	change(file1, f1, file2, f2, i0, i1, j00, j01);
848	}
849	if (m == 0) {
850	change(file1, f1, file2, f2, 1, 0, 1, len[1]);
851	}
852	if (anychange != 0) {
853	dump_unified_vec(f1, f2);
854	}
855	}
856
857	/*
858	* The following code uses an algorithm due to Harold Stone,
859	* which finds a pair of longest identical subsequences in
860	* the two files.
861	*
862	* The major goal is to generate the match vector J.
863	* J[i] is the index of the line in file1 corresponding
864	* to line i file0. J[i] = 0 if there is no
865	* such line in file1.
866	*
867	* Lines are hashed so as to work in core. All potential
868	* matches are located by sorting the lines of each file
869	* on the hash (called ``value''). In particular, this
870	* collects the equivalence classes in file1 together.
871	* Subroutine equiv replaces the value of each line in
872	* file0 by the index of the first element of its
873	* matching equivalence in (the reordered) file1.
874	* To save space equiv squeezes file1 into a single
875	* array member in which the equivalence classes
876	* are simply concatenated, except that their first
877	* members are flagged by changing sign.
878	*
879	* Next the indices that point into member are unsorted into
880	* array class according to the original order of file0.
881	*
882	* The cleverness lies in routine stone. This marches
883	* through the lines of file0, developing a vector klist
884	* of "k-candidates". At step i a k-candidate is a matched
885	* pair of lines x,y (x in file0 y in file1) such that
886	* there is a common subsequence of length k
887	* between the first i lines of file0 and the first y
888	* lines of file1, but there is no such subsequence for
889	* any smaller y. x is the earliest possible mate to y
890	* that occurs in such a subsequence.
891	*
892	* Whenever any of the members of the equivalence class of
893	* lines in file1 matable to a line in file0 has serial number
894	* less than the y of some k-candidate, that k-candidate
895	* with the smallest such y is replaced. The new
896	* k-candidate is chained (via pred) to the current
897	* k-1 candidate so that the actual subsequence can
898	* be recovered. When a member has serial number greater
899	* that the y of all k-candidates, the klist is extended.
900	* At the end, the longest subsequence is pulled out
901	* and placed in the array J by unravel
902	*
903	* With J in hand, the matches there recorded are
904	* checked against reality to assure that no spurious
905	* matches have crept in due to hashing. If they have,
906	* they are broken, and "jackpot" is recorded--a harmless
907	* matter except that a true match for a spuriously
908	* mated line may now be unnecessarily reported as a change.
909	*
910	* Much of the complexity of the program comes simply
911	* from trying to minimize core utilization and
912	* maximize the range of doable problems by dynamically
913	* allocating what is needed and reusing what is not.
914	* The core requirements for problems larger than somewhat
915	* are (in words) 2*length(file0) + length(file1) +
916	* 3*(number of k-candidates installed), typically about
917	* 6n words for files of length n.
918	*/
919
920	static int diffreg(char ofile1, char ofile2, int flags)
921	{
922	char *file1 = ofile1;
923	char *file2 = ofile2;
924	FILE *f1 = NULL;
925	FILE *f2 = NULL;
926	int rval = D_SAME;
927	int i;
928
929	anychange = 0;
930	context_vec_ptr = context_vec_start - 1;
931
932	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
933	return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
934	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
935	goto closem;
936
937	if (flags & D_EMPTY1)
938	f1 = bb_xfopen(bb_dev_null, "r");
939	else {
940	if (strcmp(file1, "-") == 0)
941	f1 = stdin;
942	else
943	f1 = bb_xfopen(file1, "r");
944	}
945
946	if (flags & D_EMPTY2)
947	f2 = bb_xfopen(bb_dev_null, "r");
948	else {
949	if (strcmp(file2, "-") == 0)
950	f2 = stdin;
951	else
952	f2 = bb_xfopen(file2, "r");
953	}
954
955	if ((i = files_differ(f1, f2, flags)) == 0)
956	goto closem;
957	else if (i != 1) { /* 1 == ok */
958	/* error */
959	status \|= 2;
960	goto closem;
961	}
962
963	if (!asciifile(f1) \|\| !asciifile(f2)) {
964	rval = D_BINARY;
965	status \|= 1;
966	goto closem;
967	}
968
969	prepare(0, f1, stb1.st_size);
970	prepare(1, f2, stb2.st_size);
971	prune();
972	sort(sfile[0], slen[0]);
973	sort(sfile[1], slen[1]);
974
975	member = (int *) file[1];
976	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
977	member = xrealloc(member, (slen[1] + 2) * sizeof(int));
978
979	class = (int *) file[0];
980	unsort(sfile[0], slen[0], class);
981	class = xrealloc(class, (slen[0] + 2) * sizeof(int));
982
983	klist = xmalloc((slen[0] + 2) * sizeof(int));
984	clen = 0;
985	clistlen = 100;
986	clist = xmalloc(clistlen * sizeof(struct cand));
987	i = stone(class, slen[0], member, klist);
988	free(member);
989	free(class);
990
991	J = xrealloc(J, (len[0] + 2) * sizeof(int));
992	unravel(klist[i]);
993	free(clist);
994	free(klist);
995
996	ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
997	ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
998	check(f1, f2);
999	output(file1, f1, file2, f2);
1000
1001	closem:
1002	if (anychange) {
1003	status \|= 1;
1004	if (rval == D_SAME)
1005	rval = D_DIFFER;
1006	}
1007	if (f1 != NULL)
1008	fclose(f1);
1009	if (f2 != NULL)
1010	fclose(f2);
1011	if (file1 != ofile1)
1012	free(file1);
1013	if (file2 != ofile2)
1014	free(file2);
1015	return (rval);
1016	}
1017
1018	#if ENABLE_FEATURE_DIFF_DIR
1019	static void do_diff(char dir1, char path1, char dir2, char path2)
1020	{
1021
1022	int flags = D_HEADER;
1023	int val;
1024
1025	char *fullpath1 = bb_xasprintf("%s/%s", dir1, path1);
1026	char *fullpath2 = bb_xasprintf("%s/%s", dir2, path2);
1027
1028	if (stat(fullpath1, &stb1) != 0) {
1029	flags \|= D_EMPTY1;
1030	memset(&stb1, 0, sizeof(stb1));
1031	fullpath1 = bb_xasprintf("%s/%s", dir1, path2);
1032	}
1033	if (stat(fullpath2, &stb2) != 0) {
1034	flags \|= D_EMPTY2;
1035	memset(&stb2, 0, sizeof(stb2));
1036	stb2.st_mode = stb1.st_mode;
1037	fullpath2 = bb_xasprintf("%s/%s", dir2, path1);
1038	}
1039
1040	if (stb1.st_mode == 0)
1041	stb1.st_mode = stb2.st_mode;
1042
1043	if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
1044	printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
1045	return;
1046	}
1047
1048	if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
1049	val = D_SKIPPED1;
1050	else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
1051	val = D_SKIPPED2;
1052	else
1053	val = diffreg(fullpath1, fullpath2, flags);
1054
1055	print_status(val, fullpath1, fullpath2, NULL);
1056	}
1057	#endif
1058
1059	#if ENABLE_FEATURE_DIFF_DIR
1060	static int dir_strcmp(const void p1, const void p2)
1061	{
1062	return strcmp((char const ) p1, (char const ) p2);
1063	}
1064
1065	/* This function adds a filename to dl, the directory listing. */
1066
1067	static int add_to_dirlist(const char *filename,
1068	struct stat ATTRIBUTE_UNUSED * sb, void *userdata)
1069	{
1070	dl_count++;
1071	dl = xrealloc(dl, dl_count * sizeof(char *));
1072	dl[dl_count - 1] = bb_xstrdup(filename);
1073	if (cmd_flags & FLAG_r) {
1074	int pp = (int ) userdata;
1075	int path_len = *pp + 1;
1076
1077	dl[dl_count - 1] = &(dl[dl_count - 1])[path_len];
1078	}
1079	return TRUE;
1080	}
1081
1082	/* This returns a sorted directory listing. */
1083	static char *get_dir(char path)
1084	{
1085
1086	int i;
1087	char **retval;
1088
1089	/* If -r has been set, then the recursive_action function will be
1090	* used. Unfortunately, this outputs the root directory along with
1091	* the recursed paths, so use void *userdata to specify the string
1092	* length of the root directory. It can then be removed in
1093	* add_to_dirlist. */
1094
1095	int path_len = strlen(path);
1096	void *userdata = &path_len;
1097
1098	/* Reset dl_count - there's no need to free dl as bb_xrealloc does
1099	* the job nicely. */
1100	dl_count = 0;
1101
1102	/* Now fill dl with a listing. */
1103	if (cmd_flags & FLAG_r)
1104	recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL,
1105	userdata);
1106	else {
1107	DIR *dp;
1108	struct dirent *ep;
1109
1110	dp = bb_opendir(path);
1111	while ((ep = readdir(dp))) {
1112	if ((!strcmp(ep->d_name, "..")) \|\| (!strcmp(ep->d_name, ".")))
1113	continue;
1114	add_to_dirlist(ep->d_name, NULL, NULL);
1115	}
1116	closedir(dp);
1117	}
1118
1119	/* Sort dl alphabetically. */
1120	qsort(dl, dl_count, sizeof(char *), dir_strcmp);
1121
1122	/* Copy dl so that we can return it. */
1123	retval = xmalloc(dl_count * sizeof(char *));
1124	for (i = 0; i < dl_count; i++)
1125	retval[i] = bb_xstrdup(dl[i]);
1126
1127	return retval;
1128	}
1129
1130	static void diffdir(char p1, char p2)
1131	{
1132
1133	char dirlist1, dirlist2;
1134	char dp1, dp2;
1135	int dirlist1_count, dirlist2_count;
1136	int pos;
1137
1138	/* Check for trailing slashes. */
1139
1140	if (p1[strlen(p1) - 1] == '/')
1141	p1[strlen(p1) - 1] = '\0';
1142	if (p2[strlen(p2) - 1] == '/')
1143	p2[strlen(p2) - 1] = '\0';
1144
1145	/* Get directory listings for p1 and p2. */
1146
1147	dirlist1 = get_dir(p1);
1148	dirlist1_count = dl_count;
1149	dirlist1[dirlist1_count] = NULL;
1150	dirlist2 = get_dir(p2);
1151	dirlist2_count = dl_count;
1152	dirlist2[dirlist2_count] = NULL;
1153
1154	/* If -S was set, find the starting point. */
1155	if (start) {
1156	while (dirlist1 != NULL && strcmp(dirlist1, start) < 0)
1157	dirlist1++;
1158	while (dirlist2 != NULL && strcmp(dirlist2, start) < 0)
1159	dirlist2++;
1160	if ((dirlist1 == NULL) \|\| (dirlist2 == NULL))
1161	bb_error_msg(bb_msg_invalid_arg, "NULL", "-S");
1162	}
1163
1164	/* Now that both dirlist1 and dirlist2 contain sorted directory
1165	* listings, we can start to go through dirlist1. If both listings
1166	* contain the same file, then do a normal diff. Otherwise, behaviour
1167	* is determined by whether the -N flag is set. */
1168	while (dirlist1 != NULL \|\| dirlist2 != NULL) {
1169	dp1 = *dirlist1;
1170	dp2 = *dirlist2;
1171	pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
1172	if (pos == 0) {
1173	do_diff(p1, dp1, p2, dp2);
1174	dirlist1++;
1175	dirlist2++;
1176	} else if (pos < 0) {
1177	if (cmd_flags & FLAG_N)
1178	do_diff(p1, dp1, p2, NULL);
1179	else
1180	print_only(p1, strlen(p1) + 1, dp1);
1181	dirlist1++;
1182	} else {
1183	if (cmd_flags & FLAG_N)
1184	do_diff(p1, NULL, p2, dp2);
1185	else
1186	print_only(p2, strlen(p2) + 1, dp2);
1187	dirlist2++;
1188	}
1189	}
1190	}
1191	#endif
1192
1193
1194
1195	int diff_main(int argc, char **argv)
1196	{
1197	int gotstdin = 0;
1198
1199	char *U_opt;
1200	llist_t *L_arg = NULL;
1201
1202	bb_opt_complementally = "L::";
1203	cmd_flags =
1204	bb_getopt_ulflags(argc, argv, "abdiL:NqrsS:tTU:wu", &L_arg, &start,
1205	&U_opt);
1206
1207	if (cmd_flags & FLAG_L) {
1208	while (L_arg) {
1209	if (label[0] == NULL)
1210	label[0] = L_arg->data;
1211	else if (label[1] == NULL)
1212	label[1] = L_arg->data;
1213	else
1214	bb_show_usage();
1215
1216	L_arg = L_arg->link;
1217	}
1218
1219	/* If both label[0] and label[1] were set, they need to be swapped. */
1220	if (label[0] && label[1]) {
1221	char *tmp;
1222
1223	tmp = label[1];
1224	label[1] = label[0];
1225	label[0] = tmp;
1226	}
1227	}
1228
1229	context = 3; /* This is the default number of lines of context. */
1230	if (cmd_flags & FLAG_U) {
1231	context = bb_xgetlarg(U_opt, 10, 1, INT_MAX);
1232	}
1233	argc -= optind;
1234	argv += optind;
1235
1236	/*
1237	* Do sanity checks, fill in stb1 and stb2 and call the appropriate
1238	* driver routine. Both drivers use the contents of stb1 and stb2.
1239	*/
1240	if (argc < 2) {
1241	bb_error_msg("Missing filename");
1242	bb_show_usage();
1243	}
1244	if (strcmp(argv[0], "-") == 0) {
1245	fstat(STDIN_FILENO, &stb1);
1246	gotstdin = 1;
1247	} else
1248	xstat(argv[0], &stb1);
1249	if (strcmp(argv[1], "-") == 0) {
1250	fstat(STDIN_FILENO, &stb2);
1251	gotstdin = 1;
1252	} else
1253	xstat(argv[1], &stb2);
1254	if (gotstdin && (S_ISDIR(stb1.st_mode) \|\| S_ISDIR(stb2.st_mode)))
1255	bb_error_msg_and_die("Can't compare - to a directory");
1256	if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
1257	#if ENABLE_FEATURE_DIFF_DIR
1258	diffdir(argv[0], argv[1]);
1259	#else
1260	bb_error_msg_and_die("Directory comparison not supported");
1261	#endif
1262	} else {
1263	if (S_ISDIR(stb1.st_mode)) {
1264	argv[0] = concat_path_file(argv[0], argv[1]);
1265	xstat(argv[0], &stb1);
1266	}
1267	if (S_ISDIR(stb2.st_mode)) {
1268	argv[1] = concat_path_file(argv[1], argv[0]);
1269	xstat(argv[1], &stb2);
1270	}
1271	print_status(diffreg(argv[0], argv[1], 0), argv[0], argv[1], NULL);
1272	}
1273	exit(status);
1274	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: