Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: MondoRescue/branches/stable/mindi-busybox/coreutils/diff.c@ 1705

Visit:

Last change on this file since 1705 was 821, checked in by Bruno Cornec, 18 years ago
Addition of busybox 1.2.1 as a mindi-busybox new package This should avoid delivering binary files in mindi not built there (Fedora and Debian are quite serious about that)
File size: 29.1 KB

Rev	Line
[821]	1	/* vi: set sw=4 ts=4: */
	2	/*
	3	* Mini diff implementation for busybox, adapted from OpenBSD diff.
	4	*
	5	* Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
	6	* Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
	7	*
	8	* Sponsored in part by the Defense Advanced Research Projects
	9	* Agency (DARPA) and Air Force Research Laboratory, Air Force
	10	* Materiel Command, USAF, under agreement number F39502-99-1-0512.
	11	*
	12	* Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
	13	*/
	14
	15	#include <time.h>
	16	#include <sys/types.h>
	17	#include <sys/param.h>
	18	#include <sys/stat.h>
	19	#include <ctype.h>
	20	#include <errno.h>
	21	#include <signal.h>
	22	#include <stdlib.h>
	23	#include <stdio.h>
	24	#include <stdarg.h>
	25	#include <string.h>
	26	#include <unistd.h>
	27	#include <sys/wait.h>
	28	#include <fcntl.h>
	29	#include <stddef.h>
	30	#include <paths.h>
	31	#include <dirent.h>
	32	#include "busybox.h"
	33
	34	#define FSIZE_MAX 32768
	35
	36	/*
	37	* Output flags
	38	*/
	39	#define D_HEADER 1 /* Print a header/footer between files */
	40	#define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */
	41	#define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */
	42
	43	/*
	44	* Status values for print_status() and diffreg() return values
	45	* Guide:
	46	* D_SAME - files are the same
	47	* D_DIFFER - files differ
	48	* D_BINARY - binary files differ
	49	* D_COMMON - subdirectory common to both dirs
	50	* D_ONLY - file only exists in one dir
	51	* D_MISMATCH1 - path1 a dir, path2 a file
	52	* D_MISMATCH2 - path1 a file, path2 a dir
	53	* D_ERROR - error occurred
	54	* D_SKIPPED1 - skipped path1 as it is a special file
	55	* D_SKIPPED2 - skipped path2 as it is a special file
	56	*/
	57
	58	#define D_SAME 0
	59	#define D_DIFFER (1<<0)
	60	#define D_BINARY (1<<1)
	61	#define D_COMMON (1<<2)
	62	#define D_ONLY (1<<3)
	63	#define D_MISMATCH1 (1<<4)
	64	#define D_MISMATCH2 (1<<5)
	65	#define D_ERROR (1<<6)
	66	#define D_SKIPPED1 (1<<7)
	67	#define D_SKIPPED2 (1<<8)
	68
	69	/* Command line options */
	70	static unsigned long cmd_flags;
	71
	72	#define FLAG_a (1<<0)
	73	#define FLAG_b (1<<1)
	74	#define FLAG_d (1<<2)
	75	#define FLAG_i (1<<3)
	76	#define FLAG_L (1<<4)
	77	#define FLAG_N (1<<5)
	78	#define FLAG_q (1<<6)
	79	#define FLAG_r (1<<7)
	80	#define FLAG_s (1<<8)
	81	#define FLAG_S (1<<9)
	82	#define FLAG_t (1<<10)
	83	#define FLAG_T (1<<11)
	84	#define FLAG_U (1<<12)
	85	#define FLAG_w (1<<13)
	86
	87	int context, status;
	88	char start, label[2];
	89	struct stat stb1, stb2;
	90	char **dl;
	91	int dl_count = 0;
	92
	93	struct cand {
	94	int x;
	95	int y;
	96	int pred;
	97	};
	98
	99	struct line {
	100	int serial;
	101	int value;
	102	} *file[2];
	103
	104	/*
	105	* The following struct is used to record change information
	106	* doing a "context" or "unified" diff. (see routine "change" to
	107	* understand the highly mnemonic field names)
	108	*/
	109	struct context_vec {
	110	int a; /* start line in old file */
	111	int b; /* end line in old file */
	112	int c; /* start line in new file */
	113	int d; /* end line in new file */
	114	};
	115
	116	static int J; / will be overlaid on class */
	117	static int class; / will be overlaid on file[0] */
	118	static int klist; / will be overlaid on file[0] after class */
	119	static int member; / will be overlaid on file[1] */
	120	static int clen;
	121	static int len[2];
	122	static int pref, suff; /* length of prefix and suffix */
	123	static int slen[2];
	124	static int anychange;
	125	static long ixnew; / will be overlaid on file[1] */
	126	static long ixold; / will be overlaid on klist */
	127	static struct cand clist; / merely a free storage pot for candidates */
	128	static int clistlen; /* the length of clist */
	129	static struct line sfile[2]; / shortened by pruning common prefix/suffix */
	130	static struct context_vec *context_vec_start;
	131	static struct context_vec *context_vec_end;
	132	static struct context_vec *context_vec_ptr;
	133
	134	static void print_only(const char path, size_t dirlen, const char entry)
	135	{
	136	if (dirlen > 1)
	137	dirlen--;
	138	printf("Only in %.*s: %s\n", (int) dirlen, path, entry);
	139	}
	140
	141	static void print_status(int val, char path1, char path2, char *entry)
	142	{
	143	const char *const _entry = entry ? entry : "";
	144	char *_path1 = entry ? concat_path_file(path1, _entry) : path1;
	145	char *_path2 = entry ? concat_path_file(path2, _entry) : path2;
	146
	147	switch (val) {
	148	case D_ONLY:
	149	print_only(path1, strlen(path1), entry);
	150	break;
	151	case D_COMMON:
	152	printf("Common subdirectories: %s and %s\n", _path1, _path2);
	153	break;
	154	case D_BINARY:
	155	printf("Binary files %s and %s differ\n", _path1, _path2);
	156	break;
	157	case D_DIFFER:
	158	if (cmd_flags & FLAG_q)
	159	printf("Files %s and %s differ\n", _path1, _path2);
	160	break;
	161	case D_SAME:
	162	if (cmd_flags & FLAG_s)
	163	printf("Files %s and %s are identical\n", _path1, _path2);
	164	break;
	165	case D_MISMATCH1:
	166	printf("File %s is a directory while file %s is a regular file\n",
	167	_path1, _path2);
	168	break;
	169	case D_MISMATCH2:
	170	printf("File %s is a regular file while file %s is a directory\n",
	171	_path1, _path2);
	172	break;
	173	case D_SKIPPED1:
	174	printf("File %s is not a regular file or directory and was skipped\n",
	175	_path1);
	176	break;
	177	case D_SKIPPED2:
	178	printf("File %s is not a regular file or directory and was skipped\n",
	179	_path2);
	180	break;
	181	}
	182	if (entry) {
	183	free(_path1);
	184	free(_path2);
	185	}
	186	}
	187
	188	/*
	189	* Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
	190	*/
	191	static int readhash(FILE * f)
	192	{
	193	int i, t, space;
	194	int sum;
	195
	196	sum = 1;
	197	space = 0;
	198	if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) {
	199	if (FLAG_i)
	200	for (i = 0; (t = getc(f)) != '\n'; i++) {
	201	if (t == EOF) {
	202	if (i == 0)
	203	return (0);
	204	break;
	205	}
	206	sum = sum * 127 + t;
	207	} else
	208	for (i = 0; (t = getc(f)) != '\n'; i++) {
	209	if (t == EOF) {
	210	if (i == 0)
	211	return (0);
	212	break;
	213	}
	214	sum = sum * 127 + t;
	215	}
	216	} else {
	217	for (i = 0;;) {
	218	switch (t = getc(f)) {
	219	case '\t':
	220	case '\r':
	221	case '\v':
	222	case '\f':
	223	case ' ':
	224	space++;
	225	continue;
	226	default:
	227	if (space && !(cmd_flags & FLAG_w)) {
	228	i++;
	229	space = 0;
	230	}
	231	sum = sum * 127 + t;
	232	i++;
	233	continue;
	234	case EOF:
	235	if (i == 0)
	236	return (0);
	237	/* FALLTHROUGH */
	238	case '\n':
	239	break;
	240	}
	241	break;
	242	}
	243	}
	244	/*
	245	* There is a remote possibility that we end up with a zero sum.
	246	* Zero is used as an EOF marker, so return 1 instead.
	247	*/
	248	return (sum == 0 ? 1 : sum);
	249	}
	250
	251
	252
	253	/*
	254	* Check to see if the given files differ.
	255	* Returns 0 if they are the same, 1 if different, and -1 on error.
	256	*/
	257	static int files_differ(FILE * f1, FILE * f2, int flags)
	258	{
	259	char buf1[BUFSIZ], buf2[BUFSIZ];
	260	size_t i, j;
	261
	262	if ((flags & (D_EMPTY1 \| D_EMPTY2)) \|\| stb1.st_size != stb2.st_size \|\|
	263	(stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
	264	return (1);
	265	while (1) {
	266	i = fread(buf1, 1, sizeof(buf1), f1);
	267	j = fread(buf2, 1, sizeof(buf2), f2);
	268	if (i != j)
	269	return (1);
	270	if (i == 0 && j == 0) {
	271	if (ferror(f1) \|\| ferror(f2))
	272	return (1);
	273	return (0);
	274	}
	275	if (memcmp(buf1, buf2, i) != 0)
	276	return (1);
	277	}
	278	}
	279
	280	static void prepare(int i, FILE * fd, off_t filesize)
	281	{
	282	struct line *p;
	283	int h;
	284	size_t j, sz;
	285
	286	rewind(fd);
	287
	288	sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;
	289	if (sz < 100)
	290	sz = 100;
	291
	292	p = xmalloc((sz + 3) * sizeof(struct line));
	293	for (j = 0; (h = readhash(fd));) {
	294	if (j == sz) {
	295	sz = sz * 3 / 2;
	296	p = xrealloc(p, (sz + 3) * sizeof(struct line));
	297	}
	298	p[++j].value = h;
	299	}
	300	len[i] = j;
	301	file[i] = p;
	302	}
	303
	304	static void prune(void)
	305	{
	306	int i, j;
	307
	308	for (pref = 0; pref < len[0] && pref < len[1] &&
	309	file[0][pref + 1].value == file[1][pref + 1].value; pref++);
	310	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
	311	file[0][len[0] - suff].value == file[1][len[1] - suff].value;
	312	suff++);
	313	for (j = 0; j < 2; j++) {
	314	sfile[j] = file[j] + pref;
	315	slen[j] = len[j] - pref - suff;
	316	for (i = 0; i <= slen[j]; i++)
	317	sfile[j][i].serial = i;
	318	}
	319	}
	320
	321	static void equiv(struct line a, int n, struct line b, int m, int *c)
	322	{
	323	int i, j;
	324
	325	i = j = 1;
	326	while (i <= n && j <= m) {
	327	if (a[i].value < b[j].value)
	328	a[i++].value = 0;
	329	else if (a[i].value == b[j].value)
	330	a[i++].value = j;
	331	else
	332	j++;
	333	}
	334	while (i <= n)
	335	a[i++].value = 0;
	336	b[m + 1].value = 0;
	337	j = 0;
	338	while (++j <= m) {
	339	c[j] = -b[j].serial;
	340	while (b[j + 1].value == b[j].value) {
	341	j++;
	342	c[j] = b[j].serial;
	343	}
	344	}
	345	c[j] = -1;
	346	}
	347
	348	static int isqrt(int n)
	349	{
	350	int y, x = 1;
	351
	352	if (n == 0)
	353	return (0);
	354
	355	do {
	356	y = x;
	357	x = n / x;
	358	x += y;
	359	x /= 2;
	360	} while ((x - y) > 1 \|\| (x - y) < -1);
	361
	362	return (x);
	363	}
	364
	365	static int newcand(int x, int y, int pred)
	366	{
	367	struct cand *q;
	368
	369	if (clen == clistlen) {
	370	clistlen = clistlen * 11 / 10;
	371	clist = xrealloc(clist, clistlen * sizeof(struct cand));
	372	}
	373	q = clist + clen;
	374	q->x = x;
	375	q->y = y;
	376	q->pred = pred;
	377	return (clen++);
	378	}
	379
	380
	381	static int search(int *c, int k, int y)
	382	{
	383	int i, j, l, t;
	384
	385	if (clist[c[k]].y < y) /* quick look for typical case */
	386	return (k + 1);
	387	i = 0;
	388	j = k + 1;
	389	while (1) {
	390	l = i + j;
	391	if ((l >>= 1) <= i)
	392	break;
	393	t = clist[c[l]].y;
	394	if (t > y)
	395	j = l;
	396	else if (t < y)
	397	i = l;
	398	else
	399	return (l);
	400	}
	401	return (l + 1);
	402	}
	403
	404
	405	static int stone(int a, int n, int b, int *c)
	406	{
	407	int i, k, y, j, l;
	408	int oldc, tc, oldl;
	409	unsigned int numtries;
	410
	411	#if ENABLE_FEATURE_DIFF_MINIMAL
	412	const unsigned int bound =
	413	(cmd_flags & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));
	414	#else
	415	const unsigned int bound = MAX(256, isqrt(n));
	416	#endif
	417	k = 0;
	418	c[0] = newcand(0, 0, 0);
	419	for (i = 1; i <= n; i++) {
	420	j = a[i];
	421	if (j == 0)
	422	continue;
	423	y = -b[j];
	424	oldl = 0;
	425	oldc = c[0];
	426	numtries = 0;
	427	do {
	428	if (y <= clist[oldc].y)
	429	continue;
	430	l = search(c, k, y);
	431	if (l != oldl + 1)
	432	oldc = c[l - 1];
	433	if (l <= k) {
	434	if (clist[c[l]].y <= y)
	435	continue;
	436	tc = c[l];
	437	c[l] = newcand(i, y, oldc);
	438	oldc = tc;
	439	oldl = l;
	440	numtries++;
	441	} else {
	442	c[l] = newcand(i, y, oldc);
	443	k++;
	444	break;
	445	}
	446	} while ((y = b[++j]) > 0 && numtries < bound);
	447	}
	448	return (k);
	449	}
	450
	451	static void unravel(int p)
	452	{
	453	struct cand *q;
	454	int i;
	455
	456	for (i = 0; i <= len[0]; i++)
	457	J[i] = i <= pref ? i : i > len[0] - suff ? i + len[1] - len[0] : 0;
	458	for (q = clist + p; q->y != 0; q = clist + q->pred)
	459	J[q->x + pref] = q->y + pref;
	460	}
	461
	462
	463	static void unsort(struct line f, int l, int b)
	464	{
	465	int *a, i;
	466
	467	a = xmalloc((l + 1) * sizeof(int));
	468	for (i = 1; i <= l; i++)
	469	a[f[i].serial] = f[i].value;
	470	for (i = 1; i <= l; i++)
	471	b[i] = a[i];
	472	free(a);
	473	}
	474
	475	static int skipline(FILE * f)
	476	{
	477	int i, c;
	478
	479	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
	480	continue;
	481	return (i);
	482	}
	483
	484
	485	/*
	486	* Check does double duty:
	487	* 1. ferret out any fortuitous correspondences due
	488	* to confounding by hashing (which result in "jackpot")
	489	* 2. collect random access indexes to the two files
	490	*/
	491	static void check(FILE * f1, FILE * f2)
	492	{
	493	int i, j, jackpot, c, d;
	494	long ctold, ctnew;
	495
	496	rewind(f1);
	497	rewind(f2);
	498	j = 1;
	499	ixold[0] = ixnew[0] = 0;
	500	jackpot = 0;
	501	ctold = ctnew = 0;
	502	for (i = 1; i <= len[0]; i++) {
	503	if (J[i] == 0) {
	504	ixold[i] = ctold += skipline(f1);
	505	continue;
	506	}
	507	while (j < J[i]) {
	508	ixnew[j] = ctnew += skipline(f2);
	509	j++;
	510	}
	511	if ((cmd_flags & FLAG_b) \|\| (cmd_flags & FLAG_w)
	512	\|\| (cmd_flags & FLAG_i)) {
	513	while (1) {
	514	c = getc(f1);
	515	d = getc(f2);
	516	/*
	517	* GNU diff ignores a missing newline
	518	* in one file if bflag \|\| wflag.
	519	*/
	520	if (((cmd_flags & FLAG_b) \|\| (cmd_flags & FLAG_w)) &&
	521	((c == EOF && d == '\n') \|\| (c == '\n' && d == EOF))) {
	522	break;
	523	}
	524	ctold++;
	525	ctnew++;
	526	if ((cmd_flags & FLAG_b) && isspace(c) && isspace(d)) {
	527	do {
	528	if (c == '\n')
	529	break;
	530	ctold++;
	531	} while (isspace(c = getc(f1)));
	532	do {
	533	if (d == '\n')
	534	break;
	535	ctnew++;
	536	} while (isspace(d = getc(f2)));
	537	} else if (cmd_flags & FLAG_w) {
	538	while (isspace(c) && c != '\n') {
	539	c = getc(f1);
	540	ctold++;
	541	}
	542	while (isspace(d) && d != '\n') {
	543	d = getc(f2);
	544	ctnew++;
	545	}
	546	}
	547	if (c != d) {
	548	jackpot++;
	549	J[i] = 0;
	550	if (c != '\n' && c != EOF)
	551	ctold += skipline(f1);
	552	if (d != '\n' && c != EOF)
	553	ctnew += skipline(f2);
	554	break;
	555	}
	556	if (c == '\n' \|\| c == EOF)
	557	break;
	558	}
	559	} else {
	560	while (1) {
	561	ctold++;
	562	ctnew++;
	563	if ((c = getc(f1)) != (d = getc(f2))) {
	564	J[i] = 0;
	565	if (c != '\n' && c != EOF)
	566	ctold += skipline(f1);
	567	if (d != '\n' && c != EOF)
	568	ctnew += skipline(f2);
	569	break;
	570	}
	571	if (c == '\n' \|\| c == EOF)
	572	break;
	573	}
	574	}
	575	ixold[i] = ctold;
	576	ixnew[j] = ctnew;
	577	j++;
	578	}
	579	for (; j <= len[1]; j++)
	580	ixnew[j] = ctnew += skipline(f2);
	581	}
	582
	583	/* shellsort CACM #201 */
	584	static void sort(struct line *a, int n)
	585	{
	586	struct line ai, aim, w;
	587	int j, m = 0, k;
	588
	589	if (n == 0)
	590	return;
	591	for (j = 1; j <= n; j *= 2)
	592	m = 2 * j - 1;
	593	for (m /= 2; m != 0; m /= 2) {
	594	k = n - m;
	595	for (j = 1; j <= k; j++) {
	596	for (ai = &a[j]; ai > a; ai -= m) {
	597	aim = &ai[m];
	598	if (aim < ai)
	599	break; /* wraparound */
	600	if (aim->value > ai[0].value \|\|
	601	(aim->value == ai[0].value && aim->serial > ai[0].serial))
	602	break;
	603	w.value = ai[0].value;
	604	ai[0].value = aim->value;
	605	aim->value = w.value;
	606	w.serial = ai[0].serial;
	607	ai[0].serial = aim->serial;
	608	aim->serial = w.serial;
	609	}
	610	}
	611	}
	612	}
	613
	614
	615	static void uni_range(int a, int b)
	616	{
	617	if (a < b)
	618	printf("%d,%d", a, b - a + 1);
	619	else if (a == b)
	620	printf("%d", b);
	621	else
	622	printf("%d,0", b);
	623	}
	624
	625	static int fetch(long f, int a, int b, FILE lb, int ch)
	626	{
	627	int i, j, c, lastc, col, nc;
	628
	629	if (a > b)
	630	return (0);
	631	for (i = a; i <= b; i++) {
	632	fseek(lb, f[i - 1], SEEK_SET);
	633	nc = f[i] - f[i - 1];
	634	if (ch != '\0') {
	635	putchar(ch);
	636	if (cmd_flags & FLAG_T)
	637	putchar('\t');
	638	}
	639	col = 0;
	640	for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
	641	if ((c = getc(lb)) == EOF) {
	642	puts("\n\\ No newline at end of file");
	643	return (0);
	644	}
	645	if (c == '\t' && (cmd_flags & FLAG_t)) {
	646	do {
	647	putchar(' ');
	648	} while (++col & 7);
	649	} else {
	650	putchar(c);
	651	col++;
	652	}
	653	}
	654	}
	655	return (0);
	656	}
	657
	658	static int asciifile(FILE * f)
	659	{
	660	#if ENABLE_FEATURE_DIFF_BINARY
	661	unsigned char buf[BUFSIZ];
	662	int i, cnt;
	663	#endif
	664
	665	if ((cmd_flags & FLAG_a) \|\| f == NULL)
	666	return (1);
	667
	668	#if ENABLE_FEATURE_DIFF_BINARY
	669	rewind(f);
	670	cnt = fread(buf, 1, sizeof(buf), f);
	671	for (i = 0; i < cnt; i++) {
	672	if (!isprint(buf[i]) && !isspace(buf[i])) {
	673	return (0);
	674	}
	675	}
	676	#endif
	677	return (1);
	678	}
	679
	680	/* dump accumulated "unified" diff changes */
	681	static void dump_unified_vec(FILE * f1, FILE * f2)
	682	{
	683	struct context_vec *cvp = context_vec_start;
	684	int lowa, upb, lowc, upd;
	685	int a, b, c, d;
	686	char ch;
	687
	688	if (context_vec_start > context_vec_ptr)
	689	return;
	690
	691	b = d = 0; /* gcc */
	692	lowa = MAX(1, cvp->a - context);
	693	upb = MIN(len[0], context_vec_ptr->b + context);
	694	lowc = MAX(1, cvp->c - context);
	695	upd = MIN(len[1], context_vec_ptr->d + context);
	696
	697	fputs("@@ -", stdout);
	698	uni_range(lowa, upb);
	699	fputs(" +", stdout);
	700	uni_range(lowc, upd);
	701	fputs(" @@", stdout);
	702	putchar('\n');
	703
	704	/*
	705	* Output changes in "unified" diff format--the old and new lines
	706	* are printed together.
	707	*/
	708	for (; cvp <= context_vec_ptr; cvp++) {
	709	a = cvp->a;
	710	b = cvp->b;
	711	c = cvp->c;
	712	d = cvp->d;
	713
	714	/*
	715	* c: both new and old changes
	716	* d: only changes in the old file
	717	* a: only changes in the new file
	718	*/
	719	if (a <= b && c <= d)
	720	ch = 'c';
	721	else
	722	ch = (a <= b) ? 'd' : 'a';
	723	#if 0
	724	switch (ch) {
	725	case 'c':
	726	fetch(ixold, lowa, a - 1, f1, ' ');
	727	fetch(ixold, a, b, f1, '-');
	728	fetch(ixnew, c, d, f2, '+');
	729	break;
	730	case 'd':
	731	fetch(ixold, lowa, a - 1, f1, ' ');
	732	fetch(ixold, a, b, f1, '-');
	733	break;
	734	case 'a':
	735	fetch(ixnew, lowc, c - 1, f2, ' ');
	736	fetch(ixnew, c, d, f2, '+');
	737	break;
	738	}
	739	#else
	740	if (ch == 'c' \|\| ch == 'd') {
	741	fetch(ixold, lowa, a - 1, f1, ' ');
	742	fetch(ixold, a, b, f1, '-');
	743	}
	744	if (ch == 'a')
	745	fetch(ixnew, lowc, c - 1, f2, ' ');
	746	if (ch == 'c' \|\| ch == 'a')
	747	fetch(ixnew, c, d, f2, '+');
	748	#endif
	749	lowa = b + 1;
	750	lowc = d + 1;
	751	}
	752	fetch(ixnew, d + 1, upd, f2, ' ');
	753
	754	context_vec_ptr = context_vec_start - 1;
	755	}
	756
	757
	758	static void print_header(const char file1, const char file2)
	759	{
	760	if (label[0] != NULL)
	761	printf("%s %s\n", "---", label[0]);
	762	else
	763	printf("%s %s\t%s", "---", file1, ctime(&stb1.st_mtime));
	764	if (label[1] != NULL)
	765	printf("%s %s\n", "+++", label[1]);
	766	else
	767	printf("%s %s\t%s", "+++", file2, ctime(&stb2.st_mtime));
	768	}
	769
	770
	771
	772	/*
	773	* Indicate that there is a difference between lines a and b of the from file
	774	* to get to lines c to d of the to file. If a is greater then b then there
	775	* are no lines in the from file involved and this means that there were
	776	* lines appended (beginning at b). If c is greater than d then there are
	777	* lines missing from the to file.
	778	*/
	779	static void change(char file1, FILE f1, char file2, FILE f2, int a,
	780	int b, int c, int d)
	781	{
	782	static size_t max_context = 64;
	783
	784	if (a > b && c > d)
	785	return;
	786	if (cmd_flags & FLAG_q)
	787	return;
	788
	789	/*
	790	* Allocate change records as needed.
	791	*/
	792	if (context_vec_ptr == context_vec_end - 1) {
	793	ptrdiff_t offset = context_vec_ptr - context_vec_start;
	794
	795	max_context <<= 1;
	796	context_vec_start = xrealloc(context_vec_start,
	797	max_context *
	798	sizeof(struct context_vec));
	799	context_vec_end = context_vec_start + max_context;
	800	context_vec_ptr = context_vec_start + offset;
	801	}
	802	if (anychange == 0) {
	803	/*
	804	* Print the context/unidiff header first time through.
	805	*/
	806	print_header(file1, file2);
	807	anychange = 1;
	808	} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
	809	c > context_vec_ptr->d + (2 * context) + 1) {
	810	/*
	811	* If this change is more than 'context' lines from the
	812	* previous change, dump the record and reset it.
	813	*/
	814	dump_unified_vec(f1, f2);
	815	}
	816	context_vec_ptr++;
	817	context_vec_ptr->a = a;
	818	context_vec_ptr->b = b;
	819	context_vec_ptr->c = c;
	820	context_vec_ptr->d = d;
	821	return;
	822
	823	}
	824
	825
	826	static void output(char file1, FILE f1, char file2, FILE f2)
	827	{
	828
	829	/* Note that j0 and j1 can't be used as they are defined in math.h.
	830	* This also allows the rather amusing variable 'j00'... */
	831	int m, i0, i1, j00, j01;
	832
	833	rewind(f1);
	834	rewind(f2);
	835	m = len[0];
	836	J[0] = 0;
	837	J[m + 1] = len[1] + 1;
	838	for (i0 = 1; i0 <= m; i0 = i1 + 1) {
	839	while (i0 <= m && J[i0] == J[i0 - 1] + 1)
	840	i0++;
	841	j00 = J[i0 - 1] + 1;
	842	i1 = i0 - 1;
	843	while (i1 < m && J[i1 + 1] == 0)
	844	i1++;
	845	j01 = J[i1 + 1] - 1;
	846	J[i1] = j01;
	847	change(file1, f1, file2, f2, i0, i1, j00, j01);
	848	}
	849	if (m == 0) {
	850	change(file1, f1, file2, f2, 1, 0, 1, len[1]);
	851	}
	852	if (anychange != 0) {
	853	dump_unified_vec(f1, f2);
	854	}
	855	}
	856
	857	/*
	858	* The following code uses an algorithm due to Harold Stone,
	859	* which finds a pair of longest identical subsequences in
	860	* the two files.
	861	*
	862	* The major goal is to generate the match vector J.
	863	* J[i] is the index of the line in file1 corresponding
	864	* to line i file0. J[i] = 0 if there is no
	865	* such line in file1.
	866	*
	867	* Lines are hashed so as to work in core. All potential
	868	* matches are located by sorting the lines of each file
	869	* on the hash (called ``value''). In particular, this
	870	* collects the equivalence classes in file1 together.
	871	* Subroutine equiv replaces the value of each line in
	872	* file0 by the index of the first element of its
	873	* matching equivalence in (the reordered) file1.
	874	* To save space equiv squeezes file1 into a single
	875	* array member in which the equivalence classes
	876	* are simply concatenated, except that their first
	877	* members are flagged by changing sign.
	878	*
	879	* Next the indices that point into member are unsorted into
	880	* array class according to the original order of file0.
	881	*
	882	* The cleverness lies in routine stone. This marches
	883	* through the lines of file0, developing a vector klist
	884	* of "k-candidates". At step i a k-candidate is a matched
	885	* pair of lines x,y (x in file0 y in file1) such that
	886	* there is a common subsequence of length k
	887	* between the first i lines of file0 and the first y
	888	* lines of file1, but there is no such subsequence for
	889	* any smaller y. x is the earliest possible mate to y
	890	* that occurs in such a subsequence.
	891	*
	892	* Whenever any of the members of the equivalence class of
	893	* lines in file1 matable to a line in file0 has serial number
	894	* less than the y of some k-candidate, that k-candidate
	895	* with the smallest such y is replaced. The new
	896	* k-candidate is chained (via pred) to the current
	897	* k-1 candidate so that the actual subsequence can
	898	* be recovered. When a member has serial number greater
	899	* that the y of all k-candidates, the klist is extended.
	900	* At the end, the longest subsequence is pulled out
	901	* and placed in the array J by unravel
	902	*
	903	* With J in hand, the matches there recorded are
	904	* checked against reality to assure that no spurious
	905	* matches have crept in due to hashing. If they have,
	906	* they are broken, and "jackpot" is recorded--a harmless
	907	* matter except that a true match for a spuriously
	908	* mated line may now be unnecessarily reported as a change.
	909	*
	910	* Much of the complexity of the program comes simply
	911	* from trying to minimize core utilization and
	912	* maximize the range of doable problems by dynamically
	913	* allocating what is needed and reusing what is not.
	914	* The core requirements for problems larger than somewhat
	915	* are (in words) 2*length(file0) + length(file1) +
	916	* 3*(number of k-candidates installed), typically about
	917	* 6n words for files of length n.
	918	*/
	919
	920	static int diffreg(char ofile1, char ofile2, int flags)
	921	{
	922	char *file1 = ofile1;
	923	char *file2 = ofile2;
	924	FILE *f1 = NULL;
	925	FILE *f2 = NULL;
	926	int rval = D_SAME;
	927	int i;
	928
	929	anychange = 0;
	930	context_vec_ptr = context_vec_start - 1;
	931
	932	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
	933	return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
	934	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
	935	goto closem;
	936
	937	if (flags & D_EMPTY1)
	938	f1 = bb_xfopen(bb_dev_null, "r");
	939	else {
	940	if (strcmp(file1, "-") == 0)
	941	f1 = stdin;
	942	else
	943	f1 = bb_xfopen(file1, "r");
	944	}
	945
	946	if (flags & D_EMPTY2)
	947	f2 = bb_xfopen(bb_dev_null, "r");
	948	else {
	949	if (strcmp(file2, "-") == 0)
	950	f2 = stdin;
	951	else
	952	f2 = bb_xfopen(file2, "r");
	953	}
	954
	955	if ((i = files_differ(f1, f2, flags)) == 0)
	956	goto closem;
	957	else if (i != 1) { /* 1 == ok */
	958	/* error */
	959	status \|= 2;
	960	goto closem;
	961	}
	962
	963	if (!asciifile(f1) \|\| !asciifile(f2)) {
	964	rval = D_BINARY;
	965	status \|= 1;
	966	goto closem;
	967	}
	968
	969	prepare(0, f1, stb1.st_size);
	970	prepare(1, f2, stb2.st_size);
	971	prune();
	972	sort(sfile[0], slen[0]);
	973	sort(sfile[1], slen[1]);
	974
	975	member = (int *) file[1];
	976	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
	977	member = xrealloc(member, (slen[1] + 2) * sizeof(int));
	978
	979	class = (int *) file[0];
	980	unsort(sfile[0], slen[0], class);
	981	class = xrealloc(class, (slen[0] + 2) * sizeof(int));
	982
	983	klist = xmalloc((slen[0] + 2) * sizeof(int));
	984	clen = 0;
	985	clistlen = 100;
	986	clist = xmalloc(clistlen * sizeof(struct cand));
	987	i = stone(class, slen[0], member, klist);
	988	free(member);
	989	free(class);
	990
	991	J = xrealloc(J, (len[0] + 2) * sizeof(int));
	992	unravel(klist[i]);
	993	free(clist);
	994	free(klist);
	995
	996	ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
	997	ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
	998	check(f1, f2);
	999	output(file1, f1, file2, f2);
	1000
	1001	closem:
	1002	if (anychange) {
	1003	status \|= 1;
	1004	if (rval == D_SAME)
	1005	rval = D_DIFFER;
	1006	}
	1007	if (f1 != NULL)
	1008	fclose(f1);
	1009	if (f2 != NULL)
	1010	fclose(f2);
	1011	if (file1 != ofile1)
	1012	free(file1);
	1013	if (file2 != ofile2)
	1014	free(file2);
	1015	return (rval);
	1016	}
	1017
	1018	#if ENABLE_FEATURE_DIFF_DIR
	1019	static void do_diff(char dir1, char path1, char dir2, char path2)
	1020	{
	1021
	1022	int flags = D_HEADER;
	1023	int val;
	1024
	1025	char *fullpath1 = bb_xasprintf("%s/%s", dir1, path1);
	1026	char *fullpath2 = bb_xasprintf("%s/%s", dir2, path2);
	1027
	1028	if (stat(fullpath1, &stb1) != 0) {
	1029	flags \|= D_EMPTY1;
	1030	memset(&stb1, 0, sizeof(stb1));
	1031	fullpath1 = bb_xasprintf("%s/%s", dir1, path2);
	1032	}
	1033	if (stat(fullpath2, &stb2) != 0) {
	1034	flags \|= D_EMPTY2;
	1035	memset(&stb2, 0, sizeof(stb2));
	1036	stb2.st_mode = stb1.st_mode;
	1037	fullpath2 = bb_xasprintf("%s/%s", dir2, path1);
	1038	}
	1039
	1040	if (stb1.st_mode == 0)
	1041	stb1.st_mode = stb2.st_mode;
	1042
	1043	if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
	1044	printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
	1045	return;
	1046	}
	1047
	1048	if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
	1049	val = D_SKIPPED1;
	1050	else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
	1051	val = D_SKIPPED2;
	1052	else
	1053	val = diffreg(fullpath1, fullpath2, flags);
	1054
	1055	print_status(val, fullpath1, fullpath2, NULL);
	1056	}
	1057	#endif
	1058
	1059	#if ENABLE_FEATURE_DIFF_DIR
	1060	static int dir_strcmp(const void p1, const void p2)
	1061	{
	1062	return strcmp((char const ) p1, (char const ) p2);
	1063	}
	1064
	1065	/* This function adds a filename to dl, the directory listing. */
	1066
	1067	static int add_to_dirlist(const char *filename,
	1068	struct stat ATTRIBUTE_UNUSED * sb, void *userdata)
	1069	{
	1070	dl_count++;
	1071	dl = xrealloc(dl, dl_count * sizeof(char *));
	1072	dl[dl_count - 1] = bb_xstrdup(filename);
	1073	if (cmd_flags & FLAG_r) {
	1074	int pp = (int ) userdata;
	1075	int path_len = *pp + 1;
	1076
	1077	dl[dl_count - 1] = &(dl[dl_count - 1])[path_len];
	1078	}
	1079	return TRUE;
	1080	}
	1081
	1082	/* This returns a sorted directory listing. */
	1083	static char *get_dir(char path)
	1084	{
	1085
	1086	int i;
	1087	char **retval;
	1088
	1089	/* If -r has been set, then the recursive_action function will be
	1090	* used. Unfortunately, this outputs the root directory along with
	1091	* the recursed paths, so use void *userdata to specify the string
	1092	* length of the root directory. It can then be removed in
	1093	* add_to_dirlist. */
	1094
	1095	int path_len = strlen(path);
	1096	void *userdata = &path_len;
	1097
	1098	/* Reset dl_count - there's no need to free dl as bb_xrealloc does
	1099	* the job nicely. */
	1100	dl_count = 0;
	1101
	1102	/* Now fill dl with a listing. */
	1103	if (cmd_flags & FLAG_r)
	1104	recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL,
	1105	userdata);
	1106	else {
	1107	DIR *dp;
	1108	struct dirent *ep;
	1109
	1110	dp = bb_opendir(path);
	1111	while ((ep = readdir(dp))) {
	1112	if ((!strcmp(ep->d_name, "..")) \|\| (!strcmp(ep->d_name, ".")))
	1113	continue;
	1114	add_to_dirlist(ep->d_name, NULL, NULL);
	1115	}
	1116	closedir(dp);
	1117	}
	1118
	1119	/* Sort dl alphabetically. */
	1120	qsort(dl, dl_count, sizeof(char *), dir_strcmp);
	1121
	1122	/* Copy dl so that we can return it. */
	1123	retval = xmalloc(dl_count * sizeof(char *));
	1124	for (i = 0; i < dl_count; i++)
	1125	retval[i] = bb_xstrdup(dl[i]);
	1126
	1127	return retval;
	1128	}
	1129
	1130	static void diffdir(char p1, char p2)
	1131	{
	1132
	1133	char dirlist1, dirlist2;
	1134	char dp1, dp2;
	1135	int dirlist1_count, dirlist2_count;
	1136	int pos;
	1137
	1138	/* Check for trailing slashes. */
	1139
	1140	if (p1[strlen(p1) - 1] == '/')
	1141	p1[strlen(p1) - 1] = '\0';
	1142	if (p2[strlen(p2) - 1] == '/')
	1143	p2[strlen(p2) - 1] = '\0';
	1144
	1145	/* Get directory listings for p1 and p2. */
	1146
	1147	dirlist1 = get_dir(p1);
	1148	dirlist1_count = dl_count;
	1149	dirlist1[dirlist1_count] = NULL;
	1150	dirlist2 = get_dir(p2);
	1151	dirlist2_count = dl_count;
	1152	dirlist2[dirlist2_count] = NULL;
	1153
	1154	/* If -S was set, find the starting point. */
	1155	if (start) {
	1156	while (dirlist1 != NULL && strcmp(dirlist1, start) < 0)
	1157	dirlist1++;
	1158	while (dirlist2 != NULL && strcmp(dirlist2, start) < 0)
	1159	dirlist2++;
	1160	if ((dirlist1 == NULL) \|\| (dirlist2 == NULL))
	1161	bb_error_msg(bb_msg_invalid_arg, "NULL", "-S");
	1162	}
	1163
	1164	/* Now that both dirlist1 and dirlist2 contain sorted directory
	1165	* listings, we can start to go through dirlist1. If both listings
	1166	* contain the same file, then do a normal diff. Otherwise, behaviour
	1167	* is determined by whether the -N flag is set. */
	1168	while (dirlist1 != NULL \|\| dirlist2 != NULL) {
	1169	dp1 = *dirlist1;
	1170	dp2 = *dirlist2;
	1171	pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
	1172	if (pos == 0) {
	1173	do_diff(p1, dp1, p2, dp2);
	1174	dirlist1++;
	1175	dirlist2++;
	1176	} else if (pos < 0) {
	1177	if (cmd_flags & FLAG_N)
	1178	do_diff(p1, dp1, p2, NULL);
	1179	else
	1180	print_only(p1, strlen(p1) + 1, dp1);
	1181	dirlist1++;
	1182	} else {
	1183	if (cmd_flags & FLAG_N)
	1184	do_diff(p1, NULL, p2, dp2);
	1185	else
	1186	print_only(p2, strlen(p2) + 1, dp2);
	1187	dirlist2++;
	1188	}
	1189	}
	1190	}
	1191	#endif
	1192
	1193
	1194
	1195	int diff_main(int argc, char **argv)
	1196	{
	1197	int gotstdin = 0;
	1198
	1199	char *U_opt;
	1200	llist_t *L_arg = NULL;
	1201
	1202	bb_opt_complementally = "L::";
	1203	cmd_flags =
	1204	bb_getopt_ulflags(argc, argv, "abdiL:NqrsS:tTU:wu", &L_arg, &start,
	1205	&U_opt);
	1206
	1207	if (cmd_flags & FLAG_L) {
	1208	while (L_arg) {
	1209	if (label[0] == NULL)
	1210	label[0] = L_arg->data;
	1211	else if (label[1] == NULL)
	1212	label[1] = L_arg->data;
	1213	else
	1214	bb_show_usage();
	1215
	1216	L_arg = L_arg->link;
	1217	}
	1218
	1219	/* If both label[0] and label[1] were set, they need to be swapped. */
	1220	if (label[0] && label[1]) {
	1221	char *tmp;
	1222
	1223	tmp = label[1];
	1224	label[1] = label[0];
	1225	label[0] = tmp;
	1226	}
	1227	}
	1228
	1229	context = 3; /* This is the default number of lines of context. */
	1230	if (cmd_flags & FLAG_U) {
	1231	context = bb_xgetlarg(U_opt, 10, 1, INT_MAX);
	1232	}
	1233	argc -= optind;
	1234	argv += optind;
	1235
	1236	/*
	1237	* Do sanity checks, fill in stb1 and stb2 and call the appropriate
	1238	* driver routine. Both drivers use the contents of stb1 and stb2.
	1239	*/
	1240	if (argc < 2) {
	1241	bb_error_msg("Missing filename");
	1242	bb_show_usage();
	1243	}
	1244	if (strcmp(argv[0], "-") == 0) {
	1245	fstat(STDIN_FILENO, &stb1);
	1246	gotstdin = 1;
	1247	} else
	1248	xstat(argv[0], &stb1);
	1249	if (strcmp(argv[1], "-") == 0) {
	1250	fstat(STDIN_FILENO, &stb2);
	1251	gotstdin = 1;
	1252	} else
	1253	xstat(argv[1], &stb2);
	1254	if (gotstdin && (S_ISDIR(stb1.st_mode) \|\| S_ISDIR(stb2.st_mode)))
	1255	bb_error_msg_and_die("Can't compare - to a directory");
	1256	if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
	1257	#if ENABLE_FEATURE_DIFF_DIR
	1258	diffdir(argv[0], argv[1]);
	1259	#else
	1260	bb_error_msg_and_die("Directory comparison not supported");
	1261	#endif
	1262	} else {
	1263	if (S_ISDIR(stb1.st_mode)) {
	1264	argv[0] = concat_path_file(argv[0], argv[1]);
	1265	xstat(argv[0], &stb1);
	1266	}
	1267	if (S_ISDIR(stb2.st_mode)) {
	1268	argv[1] = concat_path_file(argv[1], argv[0]);
	1269	xstat(argv[1], &stb2);
	1270	}
	1271	print_status(diffreg(argv[0], argv[1], 0), argv[0], argv[1], NULL);
	1272	}
	1273	exit(status);
	1274	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: