Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

diff.c@ 3528

Visit:

Last change on this file since 3528 was 1765, checked in by Bruno Cornec, 17 years ago
Update to busybox 1.7.2
Property svn:eol-style set to `native`
File size: 30.1 KB

Rev	Line
[1765]	1	/* vi: set sw=4 ts=4: */
	2	/*
	3	* Mini diff implementation for busybox, adapted from OpenBSD diff.
	4	*
	5	* Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
	6	* Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
	7	*
	8	* Sponsored in part by the Defense Advanced Research Projects
	9	* Agency (DARPA) and Air Force Research Laboratory, Air Force
	10	* Materiel Command, USAF, under agreement number F39502-99-1-0512.
	11	*
	12	* Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
	13	*/
	14
	15	#include "libbb.h"
	16
	17	#define FSIZE_MAX 32768
	18
	19	/*
	20	* Output flags
	21	*/
	22	#define D_HEADER 1 /* Print a header/footer between files */
	23	#define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */
	24	#define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */
	25
	26	/*
	27	* Status values for print_status() and diffreg() return values
	28	* Guide:
	29	* D_SAME - files are the same
	30	* D_DIFFER - files differ
	31	* D_BINARY - binary files differ
	32	* D_COMMON - subdirectory common to both dirs
	33	* D_ONLY - file only exists in one dir
	34	* D_MISMATCH1 - path1 a dir, path2 a file
	35	* D_MISMATCH2 - path1 a file, path2 a dir
	36	* D_ERROR - error occurred
	37	* D_SKIPPED1 - skipped path1 as it is a special file
	38	* D_SKIPPED2 - skipped path2 as it is a special file
	39	*/
	40
	41	#define D_SAME 0
	42	#define D_DIFFER (1<<0)
	43	#define D_BINARY (1<<1)
	44	#define D_COMMON (1<<2)
	45	#define D_ONLY (1<<3)
	46	#define D_MISMATCH1 (1<<4)
	47	#define D_MISMATCH2 (1<<5)
	48	#define D_ERROR (1<<6)
	49	#define D_SKIPPED1 (1<<7)
	50	#define D_SKIPPED2 (1<<8)
	51
	52	/* Command line options */
	53	#define FLAG_a (1<<0)
	54	#define FLAG_b (1<<1)
	55	#define FLAG_d (1<<2)
	56	#define FLAG_i (1<<3)
	57	#define FLAG_L (1<<4)
	58	#define FLAG_N (1<<5)
	59	#define FLAG_q (1<<6)
	60	#define FLAG_r (1<<7)
	61	#define FLAG_s (1<<8)
	62	#define FLAG_S (1<<9)
	63	#define FLAG_t (1<<10)
	64	#define FLAG_T (1<<11)
	65	#define FLAG_U (1<<12)
	66	#define FLAG_w (1<<13)
	67
	68	#define g_read_buf bb_common_bufsiz1
	69
	70	struct cand {
	71	int x;
	72	int y;
	73	int pred;
	74	};
	75
	76	struct line {
	77	int serial;
	78	int value;
	79	};
	80
	81	/*
	82	* The following struct is used to record change information
	83	* doing a "context" or "unified" diff. (see routine "change" to
	84	* understand the highly mnemonic field names)
	85	*/
	86	struct context_vec {
	87	int a; /* start line in old file */
	88	int b; /* end line in old file */
	89	int c; /* start line in new file */
	90	int d; /* end line in new file */
	91	};
	92
	93	struct globals {
	94	USE_FEATURE_DIFF_DIR(char **dl;)
	95	USE_FEATURE_DIFF_DIR(int dl_count;)
	96	/* This is the default number of lines of context. */
	97	int context;
	98	size_t max_context;
	99	int status;
	100	char *start;
	101	const char *label1;
	102	const char *label2;
	103	struct line *file[2];
	104	int J; / will be overlaid on class */
	105	int class; / will be overlaid on file[0] */
	106	int klist; / will be overlaid on file[0] after class */
	107	int member; / will be overlaid on file[1] */
	108	int clen;
	109	int len[2];
	110	int pref, suff; /* length of prefix and suffix */
	111	int slen[2];
	112	bool anychange;
	113	long ixnew; / will be overlaid on file[1] */
	114	long ixold; / will be overlaid on klist */
	115	struct cand clist; / merely a free storage pot for candidates */
	116	int clistlen; /* the length of clist */
	117	struct line sfile[2]; / shortened by pruning common prefix/suffix */
	118	struct context_vec *context_vec_start;
	119	struct context_vec *context_vec_end;
	120	struct context_vec *context_vec_ptr;
	121	struct stat stb1, stb2;
	122	};
	123	#define G (*ptr_to_globals)
	124	#define dl (G.dl )
	125	#define dl_count (G.dl_count )
	126	#define context (G.context )
	127	#define max_context (G.max_context )
	128	#define status (G.status )
	129	#define start (G.start )
	130	#define label1 (G.label1 )
	131	#define label2 (G.label2 )
	132	#define file (G.file )
	133	#define J (G.J )
	134	#define class (G.class )
	135	#define klist (G.klist )
	136	#define member (G.member )
	137	#define clen (G.clen )
	138	#define len (G.len )
	139	#define pref (G.pref )
	140	#define suff (G.suff )
	141	#define slen (G.slen )
	142	#define anychange (G.anychange )
	143	#define ixnew (G.ixnew )
	144	#define ixold (G.ixold )
	145	#define clist (G.clist )
	146	#define clistlen (G.clistlen )
	147	#define sfile (G.sfile )
	148	#define context_vec_start (G.context_vec_start )
	149	#define context_vec_end (G.context_vec_end )
	150	#define context_vec_ptr (G.context_vec_ptr )
	151	#define stb1 (G.stb1 )
	152	#define stb2 (G.stb2 )
	153	#define INIT_G() do { \
	154	PTR_TO_GLOBALS = xzalloc(sizeof(G)); \
	155	context = 3; \
	156	max_context = 64; \
	157	} while (0)
	158
	159
	160	static void print_only(const char path, size_t dirlen, const char entry)
	161	{
	162	if (dirlen > 1)
	163	dirlen--;
	164	printf("Only in %.*s: %s\n", (int) dirlen, path, entry);
	165	}
	166
	167	static void print_status(int val, char path1, char path2, char *entry)
	168	{
	169	const char *const _entry = entry ? entry : "";
	170	char * const _path1 = entry ? concat_path_file(path1, _entry) : path1;
	171	char * const _path2 = entry ? concat_path_file(path2, _entry) : path2;
	172
	173	switch (val) {
	174	case D_ONLY:
	175	print_only(path1, strlen(path1), entry);
	176	break;
	177	case D_COMMON:
	178	printf("Common subdirectories: %s and %s\n", _path1, _path2);
	179	break;
	180	case D_BINARY:
	181	printf("Binary files %s and %s differ\n", _path1, _path2);
	182	break;
	183	case D_DIFFER:
	184	if (option_mask32 & FLAG_q)
	185	printf("Files %s and %s differ\n", _path1, _path2);
	186	break;
	187	case D_SAME:
	188	if (option_mask32 & FLAG_s)
	189	printf("Files %s and %s are identical\n", _path1, _path2);
	190	break;
	191	case D_MISMATCH1:
	192	printf("File %s is a %s while file %s is a %s\n",
	193	_path1, "directory", _path2, "regular file");
	194	break;
	195	case D_MISMATCH2:
	196	printf("File %s is a %s while file %s is a %s\n",
	197	_path1, "regular file", _path2, "directory");
	198	break;
	199	case D_SKIPPED1:
	200	printf("File %s is not a regular file or directory and was skipped\n",
	201	_path1);
	202	break;
	203	case D_SKIPPED2:
	204	printf("File %s is not a regular file or directory and was skipped\n",
	205	_path2);
	206	break;
	207	}
	208	if (entry) {
	209	free(_path1);
	210	free(_path2);
	211	}
	212	}
	213	static ALWAYS_INLINE int fiddle_sum(int sum, int t)
	214	{
	215	return sum * 127 + t;
	216	}
	217	/*
	218	* Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
	219	*/
	220	static int readhash(FILE *fp)
	221	{
	222	int i, t, space;
	223	int sum;
	224
	225	sum = 1;
	226	space = 0;
	227	if (!(option_mask32 & (FLAG_b \| FLAG_w))) {
	228	for (i = 0; (t = getc(fp)) != '\n'; i++) {
	229	if (t == EOF) {
	230	if (i == 0)
	231	return 0;
	232	break;
	233	}
	234	sum = fiddle_sum(sum, t);
	235	}
	236	} else {
	237	for (i = 0;;) {
	238	switch (t = getc(fp)) {
	239	case '\t':
	240	case '\r':
	241	case '\v':
	242	case '\f':
	243	case ' ':
	244	space++;
	245	continue;
	246	default:
	247	if (space && !(option_mask32 & FLAG_w)) {
	248	i++;
	249	space = 0;
	250	}
	251	sum = fiddle_sum(sum, t);
	252	i++;
	253	continue;
	254	case EOF:
	255	if (i == 0)
	256	return 0;
	257	/* FALLTHROUGH */
	258	case '\n':
	259	break;
	260	}
	261	break;
	262	}
	263	}
	264	/*
	265	* There is a remote possibility that we end up with a zero sum.
	266	* Zero is used as an EOF marker, so return 1 instead.
	267	*/
	268	return (sum == 0 ? 1 : sum);
	269	}
	270
	271
	272	/*
	273	* Check to see if the given files differ.
	274	* Returns 0 if they are the same, 1 if different, and -1 on error.
	275	*/
	276	static int files_differ(FILE f1, FILE f2, int flags)
	277	{
	278	size_t i, j;
	279
	280	if ((flags & (D_EMPTY1 \| D_EMPTY2)) \|\| stb1.st_size != stb2.st_size
	281	\|\| (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)
	282	) {
	283	return 1;
	284	}
	285	while (1) {
	286	i = fread(g_read_buf, 1, COMMON_BUFSIZE/2, f1);
	287	j = fread(g_read_buf + COMMON_BUFSIZE/2, 1, COMMON_BUFSIZE/2, f2);
	288	if (i != j)
	289	return 1;
	290	if (i == 0)
	291	return (ferror(f1) \|\| ferror(f2));
	292	if (memcmp(g_read_buf,
	293	g_read_buf + COMMON_BUFSIZE/2, i) != 0)
	294	return 1;
	295	}
	296	}
	297
	298
	299	static void prepare(int i, FILE fp /, off_t filesize*/)
	300	{
	301	struct line *p;
	302	int h;
	303	size_t j, sz;
	304
	305	rewind(fp);
	306
	307	/sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;/
	308	/if (sz < 100)/
	309	sz = 100;
	310
	311	p = xmalloc((sz + 3) * sizeof(p[0]));
	312	j = 0;
	313	while ((h = readhash(fp))) {
	314	if (j == sz) {
	315	sz = sz * 3 / 2;
	316	p = xrealloc(p, (sz + 3) * sizeof(p[0]));
	317	}
	318	p[++j].value = h;
	319	}
	320	len[i] = j;
	321	file[i] = p;
	322	}
	323
	324
	325	static void prune(void)
	326	{
	327	int i, j;
	328
	329	for (pref = 0; pref < len[0] && pref < len[1] &&
	330	file[0][pref + 1].value == file[1][pref + 1].value; pref++)
	331	continue;
	332	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
	333	file[0][len[0] - suff].value == file[1][len[1] - suff].value;
	334	suff++)
	335	continue;
	336	for (j = 0; j < 2; j++) {
	337	sfile[j] = file[j] + pref;
	338	slen[j] = len[j] - pref - suff;
	339	for (i = 0; i <= slen[j]; i++)
	340	sfile[j][i].serial = i;
	341	}
	342	}
	343
	344
	345	static void equiv(struct line a, int n, struct line b, int m, int *c)
	346	{
	347	int i, j;
	348
	349	i = j = 1;
	350	while (i <= n && j <= m) {
	351	if (a[i].value < b[j].value)
	352	a[i++].value = 0;
	353	else if (a[i].value == b[j].value)
	354	a[i++].value = j;
	355	else
	356	j++;
	357	}
	358	while (i <= n)
	359	a[i++].value = 0;
	360	b[m + 1].value = 0;
	361	j = 0;
	362	while (++j <= m) {
	363	c[j] = -b[j].serial;
	364	while (b[j + 1].value == b[j].value) {
	365	j++;
	366	c[j] = b[j].serial;
	367	}
	368	}
	369	c[j] = -1;
	370	}
	371
	372
	373	static int isqrt(int n)
	374	{
	375	int y, x;
	376
	377	if (n == 0)
	378	return 0;
	379	x = 1;
	380	do {
	381	y = x;
	382	x = n / x;
	383	x += y;
	384	x /= 2;
	385	} while ((x - y) > 1 \|\| (x - y) < -1);
	386
	387	return x;
	388	}
	389
	390
	391	static int newcand(int x, int y, int pred)
	392	{
	393	struct cand *q;
	394
	395	if (clen == clistlen) {
	396	clistlen = clistlen * 11 / 10;
	397	clist = xrealloc(clist, clistlen * sizeof(struct cand));
	398	}
	399	q = clist + clen;
	400	q->x = x;
	401	q->y = y;
	402	q->pred = pred;
	403	return clen++;
	404	}
	405
	406
	407	static int search(int *c, int k, int y)
	408	{
	409	int i, j, l, t;
	410
	411	if (clist[c[k]].y < y) /* quick look for typical case */
	412	return k + 1;
	413	i = 0;
	414	j = k + 1;
	415	while (1) {
	416	l = i + j;
	417	if ((l >>= 1) <= i)
	418	break;
	419	t = clist[c[l]].y;
	420	if (t > y)
	421	j = l;
	422	else if (t < y)
	423	i = l;
	424	else
	425	return l;
	426	}
	427	return l + 1;
	428	}
	429
	430
	431	static int stone(int a, int n, int b, int *c)
	432	{
	433	int i, k, y, j, l;
	434	int oldc, tc, oldl;
	435	unsigned int numtries;
	436
	437	#if ENABLE_FEATURE_DIFF_MINIMAL
	438	const unsigned int bound =
	439	(option_mask32 & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));
	440	#else
	441	const unsigned int bound = MAX(256, isqrt(n));
	442	#endif
	443	k = 0;
	444	c[0] = newcand(0, 0, 0);
	445	for (i = 1; i <= n; i++) {
	446	j = a[i];
	447	if (j == 0)
	448	continue;
	449	y = -b[j];
	450	oldl = 0;
	451	oldc = c[0];
	452	numtries = 0;
	453	do {
	454	if (y <= clist[oldc].y)
	455	continue;
	456	l = search(c, k, y);
	457	if (l != oldl + 1)
	458	oldc = c[l - 1];
	459	if (l <= k) {
	460	if (clist[c[l]].y <= y)
	461	continue;
	462	tc = c[l];
	463	c[l] = newcand(i, y, oldc);
	464	oldc = tc;
	465	oldl = l;
	466	numtries++;
	467	} else {
	468	c[l] = newcand(i, y, oldc);
	469	k++;
	470	break;
	471	}
	472	} while ((y = b[++j]) > 0 && numtries < bound);
	473	}
	474	return k;
	475	}
	476
	477
	478	static void unravel(int p)
	479	{
	480	struct cand *q;
	481	int i;
	482
	483	for (i = 0; i <= len[0]; i++)
	484	J[i] = i <= pref ? i : i > len[0] - suff ? i + len[1] - len[0] : 0;
	485	for (q = clist + p; q->y != 0; q = clist + q->pred)
	486	J[q->x + pref] = q->y + pref;
	487	}
	488
	489
	490	static void unsort(struct line f, int l, int b)
	491	{
	492	int *a, i;
	493
	494	a = xmalloc((l + 1) * sizeof(int));
	495	for (i = 1; i <= l; i++)
	496	a[f[i].serial] = f[i].value;
	497	for (i = 1; i <= l; i++)
	498	b[i] = a[i];
	499	free(a);
	500	}
	501
	502
	503	static int skipline(FILE * f)
	504	{
	505	int i, c;
	506
	507	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
	508	continue;
	509	return i;
	510	}
	511
	512
	513	/*
	514	* Check does double duty:
	515	* 1. ferret out any fortuitous correspondences due
	516	* to confounding by hashing (which result in "jackpot")
	517	* 2. collect random access indexes to the two files
	518	*/
	519	static void check(FILE * f1, FILE * f2)
	520	{
	521	int i, j, jackpot, c, d;
	522	long ctold, ctnew;
	523
	524	rewind(f1);
	525	rewind(f2);
	526	j = 1;
	527	ixold[0] = ixnew[0] = 0;
	528	jackpot = 0;
	529	ctold = ctnew = 0;
	530	for (i = 1; i <= len[0]; i++) {
	531	if (J[i] == 0) {
	532	ixold[i] = ctold += skipline(f1);
	533	continue;
	534	}
	535	while (j < J[i]) {
	536	ixnew[j] = ctnew += skipline(f2);
	537	j++;
	538	}
	539	if ((option_mask32 & FLAG_b) \|\| (option_mask32 & FLAG_w)
	540	\|\| (option_mask32 & FLAG_i)) {
	541	while (1) {
	542	c = getc(f1);
	543	d = getc(f2);
	544	/*
	545	* GNU diff ignores a missing newline
	546	* in one file if bflag \|\| wflag.
	547	*/
	548	if (((option_mask32 & FLAG_b) \|\| (option_mask32 & FLAG_w)) &&
	549	((c == EOF && d == '\n') \|\| (c == '\n' && d == EOF))) {
	550	break;
	551	}
	552	ctold++;
	553	ctnew++;
	554	if ((option_mask32 & FLAG_b) && isspace(c) && isspace(d)) {
	555	do {
	556	if (c == '\n')
	557	break;
	558	ctold++;
	559	} while (isspace(c = getc(f1)));
	560	do {
	561	if (d == '\n')
	562	break;
	563	ctnew++;
	564	} while (isspace(d = getc(f2)));
	565	} else if (option_mask32 & FLAG_w) {
	566	while (isspace(c) && c != '\n') {
	567	c = getc(f1);
	568	ctold++;
	569	}
	570	while (isspace(d) && d != '\n') {
	571	d = getc(f2);
	572	ctnew++;
	573	}
	574	}
	575	if (c != d) {
	576	jackpot++;
	577	J[i] = 0;
	578	if (c != '\n' && c != EOF)
	579	ctold += skipline(f1);
	580	if (d != '\n' && c != EOF)
	581	ctnew += skipline(f2);
	582	break;
	583	}
	584	if (c == '\n' \|\| c == EOF)
	585	break;
	586	}
	587	} else {
	588	while (1) {
	589	ctold++;
	590	ctnew++;
	591	if ((c = getc(f1)) != (d = getc(f2))) {
	592	J[i] = 0;
	593	if (c != '\n' && c != EOF)
	594	ctold += skipline(f1);
	595	if (d != '\n' && c != EOF)
	596	ctnew += skipline(f2);
	597	break;
	598	}
	599	if (c == '\n' \|\| c == EOF)
	600	break;
	601	}
	602	}
	603	ixold[i] = ctold;
	604	ixnew[j] = ctnew;
	605	j++;
	606	}
	607	for (; j <= len[1]; j++)
	608	ixnew[j] = ctnew += skipline(f2);
	609	}
	610
	611
	612	/* shellsort CACM #201 */
	613	static void sort(struct line *a, int n)
	614	{
	615	struct line ai, aim, w;
	616	int j, m = 0, k;
	617
	618	if (n == 0)
	619	return;
	620	for (j = 1; j <= n; j *= 2)
	621	m = 2 * j - 1;
	622	for (m /= 2; m != 0; m /= 2) {
	623	k = n - m;
	624	for (j = 1; j <= k; j++) {
	625	for (ai = &a[j]; ai > a; ai -= m) {
	626	aim = &ai[m];
	627	if (aim < ai)
	628	break; /* wraparound */
	629	if (aim->value > ai[0].value \|\|
	630	(aim->value == ai[0].value && aim->serial > ai[0].serial))
	631	break;
	632	w.value = ai[0].value;
	633	ai[0].value = aim->value;
	634	aim->value = w.value;
	635	w.serial = ai[0].serial;
	636	ai[0].serial = aim->serial;
	637	aim->serial = w.serial;
	638	}
	639	}
	640	}
	641	}
	642
	643
	644	static void uni_range(int a, int b)
	645	{
	646	if (a < b)
	647	printf("%d,%d", a, b - a + 1);
	648	else if (a == b)
	649	printf("%d", b);
	650	else
	651	printf("%d,0", b);
	652	}
	653
	654
	655	static void fetch(long f, int a, int b, FILE lb, int ch)
	656	{
	657	int i, j, c, lastc, col, nc;
	658
	659	if (a > b)
	660	return;
	661	for (i = a; i <= b; i++) {
	662	fseek(lb, f[i - 1], SEEK_SET);
	663	nc = f[i] - f[i - 1];
	664	if (ch != '\0') {
	665	putchar(ch);
	666	if (option_mask32 & FLAG_T)
	667	putchar('\t');
	668	}
	669	col = 0;
	670	for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
	671	if ((c = getc(lb)) == EOF) {
	672	printf("\n\\ No newline at end of file\n");
	673	return;
	674	}
	675	if (c == '\t' && (option_mask32 & FLAG_t)) {
	676	do {
	677	putchar(' ');
	678	} while (++col & 7);
	679	} else {
	680	putchar(c);
	681	col++;
	682	}
	683	}
	684	}
	685	}
	686
	687
	688	static int asciifile(FILE * f)
	689	{
	690	#if ENABLE_FEATURE_DIFF_BINARY
	691	int i, cnt;
	692	#endif
	693
	694	if ((option_mask32 & FLAG_a) \|\| f == NULL)
	695	return 1;
	696
	697	#if ENABLE_FEATURE_DIFF_BINARY
	698	rewind(f);
	699	cnt = fread(g_read_buf, 1, COMMON_BUFSIZE, f);
	700	for (i = 0; i < cnt; i++) {
	701	if (!isprint(g_read_buf[i])
	702	&& !isspace(g_read_buf[i])) {
	703	return 0;
	704	}
	705	}
	706	#endif
	707	return 1;
	708	}
	709
	710
	711	/* dump accumulated "unified" diff changes */
	712	static void dump_unified_vec(FILE * f1, FILE * f2)
	713	{
	714	struct context_vec *cvp = context_vec_start;
	715	int lowa, upb, lowc, upd;
	716	int a, b, c, d;
	717	char ch;
	718
	719	if (context_vec_start > context_vec_ptr)
	720	return;
	721
	722	b = d = 0; /* gcc */
	723	lowa = MAX(1, cvp->a - context);
	724	upb = MIN(len[0], context_vec_ptr->b + context);
	725	lowc = MAX(1, cvp->c - context);
	726	upd = MIN(len[1], context_vec_ptr->d + context);
	727
	728	printf("@@ -");
	729	uni_range(lowa, upb);
	730	printf(" +");
	731	uni_range(lowc, upd);
	732	printf(" @@\n");
	733
	734	/*
	735	* Output changes in "unified" diff format--the old and new lines
	736	* are printed together.
	737	*/
	738	for (; cvp <= context_vec_ptr; cvp++) {
	739	a = cvp->a;
	740	b = cvp->b;
	741	c = cvp->c;
	742	d = cvp->d;
	743
	744	/*
	745	* c: both new and old changes
	746	* d: only changes in the old file
	747	* a: only changes in the new file
	748	*/
	749	if (a <= b && c <= d)
	750	ch = 'c';
	751	else
	752	ch = (a <= b) ? 'd' : 'a';
	753	#if 0
	754	switch (ch) {
	755	case 'c':
	756	fetch(ixold, lowa, a - 1, f1, ' ');
	757	fetch(ixold, a, b, f1, '-');
	758	fetch(ixnew, c, d, f2, '+');
	759	break;
	760	case 'd':
	761	fetch(ixold, lowa, a - 1, f1, ' ');
	762	fetch(ixold, a, b, f1, '-');
	763	break;
	764	case 'a':
	765	fetch(ixnew, lowc, c - 1, f2, ' ');
	766	fetch(ixnew, c, d, f2, '+');
	767	break;
	768	}
	769	#else
	770	if (ch == 'c' \|\| ch == 'd') {
	771	fetch(ixold, lowa, a - 1, f1, ' ');
	772	fetch(ixold, a, b, f1, '-');
	773	}
	774	if (ch == 'a')
	775	fetch(ixnew, lowc, c - 1, f2, ' ');
	776	if (ch == 'c' \|\| ch == 'a')
	777	fetch(ixnew, c, d, f2, '+');
	778	#endif
	779	lowa = b + 1;
	780	lowc = d + 1;
	781	}
	782	fetch(ixnew, d + 1, upd, f2, ' ');
	783
	784	context_vec_ptr = context_vec_start - 1;
	785	}
	786
	787
	788	static void print_header(const char file1, const char file2)
	789	{
	790	if (label1)
	791	printf("--- %s\n", label1);
	792	else
	793	printf("--- %s\t%s", file1, ctime(&stb1.st_mtime));
	794	if (label2)
	795	printf("+++ %s\n", label2);
	796	else
	797	printf("+++ %s\t%s", file2, ctime(&stb2.st_mtime));
	798	}
	799
	800
	801	/*
	802	* Indicate that there is a difference between lines a and b of the from file
	803	* to get to lines c to d of the to file. If a is greater than b then there
	804	* are no lines in the from file involved and this means that there were
	805	* lines appended (beginning at b). If c is greater than d then there are
	806	* lines missing from the to file.
	807	*/
	808	static void change(char file1, FILE f1, char file2, FILE f2, int a,
	809	int b, int c, int d)
	810	{
	811	if ((a > b && c > d) \|\| (option_mask32 & FLAG_q)) {
	812	anychange = 1;
	813	return;
	814	}
	815
	816	/*
	817	* Allocate change records as needed.
	818	*/
	819	if (context_vec_ptr == context_vec_end - 1) {
	820	ptrdiff_t offset = context_vec_ptr - context_vec_start;
	821
	822	max_context <<= 1;
	823	context_vec_start = xrealloc(context_vec_start,
	824	max_context * sizeof(struct context_vec));
	825	context_vec_end = context_vec_start + max_context;
	826	context_vec_ptr = context_vec_start + offset;
	827	}
	828	if (anychange == 0) {
	829	/*
	830	* Print the context/unidiff header first time through.
	831	*/
	832	print_header(file1, file2);
	833	} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
	834	c > context_vec_ptr->d + (2 * context) + 1) {
	835	/*
	836	* If this change is more than 'context' lines from the
	837	* previous change, dump the record and reset it.
	838	*/
	839	dump_unified_vec(f1, f2);
	840	}
	841	context_vec_ptr++;
	842	context_vec_ptr->a = a;
	843	context_vec_ptr->b = b;
	844	context_vec_ptr->c = c;
	845	context_vec_ptr->d = d;
	846	anychange = 1;
	847	}
	848
	849
	850	static void output(char file1, FILE f1, char file2, FILE f2)
	851	{
	852	/* Note that j0 and j1 can't be used as they are defined in math.h.
	853	* This also allows the rather amusing variable 'j00'... */
	854	int m, i0, i1, j00, j01;
	855
	856	rewind(f1);
	857	rewind(f2);
	858	m = len[0];
	859	J[0] = 0;
	860	J[m + 1] = len[1] + 1;
	861	for (i0 = 1; i0 <= m; i0 = i1 + 1) {
	862	while (i0 <= m && J[i0] == J[i0 - 1] + 1)
	863	i0++;
	864	j00 = J[i0 - 1] + 1;
	865	i1 = i0 - 1;
	866	while (i1 < m && J[i1 + 1] == 0)
	867	i1++;
	868	j01 = J[i1 + 1] - 1;
	869	J[i1] = j01;
	870	change(file1, f1, file2, f2, i0, i1, j00, j01);
	871	}
	872	if (m == 0) {
	873	change(file1, f1, file2, f2, 1, 0, 1, len[1]);
	874	}
	875	if (anychange != 0 && !(option_mask32 & FLAG_q)) {
	876	dump_unified_vec(f1, f2);
	877	}
	878	}
	879
	880	/*
	881	* The following code uses an algorithm due to Harold Stone,
	882	* which finds a pair of longest identical subsequences in
	883	* the two files.
	884	*
	885	* The major goal is to generate the match vector J.
	886	* J[i] is the index of the line in file1 corresponding
	887	* to line i file0. J[i] = 0 if there is no
	888	* such line in file1.
	889	*
	890	* Lines are hashed so as to work in core. All potential
	891	* matches are located by sorting the lines of each file
	892	* on the hash (called ``value''). In particular, this
	893	* collects the equivalence classes in file1 together.
	894	* Subroutine equiv replaces the value of each line in
	895	* file0 by the index of the first element of its
	896	* matching equivalence in (the reordered) file1.
	897	* To save space equiv squeezes file1 into a single
	898	* array member in which the equivalence classes
	899	* are simply concatenated, except that their first
	900	* members are flagged by changing sign.
	901	*
	902	* Next the indices that point into member are unsorted into
	903	* array class according to the original order of file0.
	904	*
	905	* The cleverness lies in routine stone. This marches
	906	* through the lines of file0, developing a vector klist
	907	* of "k-candidates". At step i a k-candidate is a matched
	908	* pair of lines x,y (x in file0 y in file1) such that
	909	* there is a common subsequence of length k
	910	* between the first i lines of file0 and the first y
	911	* lines of file1, but there is no such subsequence for
	912	* any smaller y. x is the earliest possible mate to y
	913	* that occurs in such a subsequence.
	914	*
	915	* Whenever any of the members of the equivalence class of
	916	* lines in file1 matable to a line in file0 has serial number
	917	* less than the y of some k-candidate, that k-candidate
	918	* with the smallest such y is replaced. The new
	919	* k-candidate is chained (via pred) to the current
	920	* k-1 candidate so that the actual subsequence can
	921	* be recovered. When a member has serial number greater
	922	* that the y of all k-candidates, the klist is extended.
	923	* At the end, the longest subsequence is pulled out
	924	* and placed in the array J by unravel
	925	*
	926	* With J in hand, the matches there recorded are
	927	* checked against reality to assure that no spurious
	928	* matches have crept in due to hashing. If they have,
	929	* they are broken, and "jackpot" is recorded--a harmless
	930	* matter except that a true match for a spuriously
	931	* mated line may now be unnecessarily reported as a change.
	932	*
	933	* Much of the complexity of the program comes simply
	934	* from trying to minimize core utilization and
	935	* maximize the range of doable problems by dynamically
	936	* allocating what is needed and reusing what is not.
	937	* The core requirements for problems larger than somewhat
	938	* are (in words) 2*length(file0) + length(file1) +
	939	* 3*(number of k-candidates installed), typically about
	940	* 6n words for files of length n.
	941	*/
	942	static unsigned diffreg(char ofile1, char ofile2, int flags)
	943	{
	944	char *file1 = ofile1;
	945	char *file2 = ofile2;
	946	FILE f1 = stdin, f2 = stdin;
	947	unsigned rval;
	948	int i;
	949
	950	anychange = 0;
	951	context_vec_ptr = context_vec_start - 1;
	952
	953	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
	954	return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
	955
	956	rval = D_SAME;
	957
	958	if (LONE_DASH(file1) && LONE_DASH(file2))
	959	goto closem;
	960
	961	if (flags & D_EMPTY1)
	962	f1 = xfopen(bb_dev_null, "r");
	963	else if (NOT_LONE_DASH(file1))
	964	f1 = xfopen(file1, "r");
	965	if (flags & D_EMPTY2)
	966	f2 = xfopen(bb_dev_null, "r");
	967	else if (NOT_LONE_DASH(file2))
	968	f2 = xfopen(file2, "r");
	969
	970	/* We can't diff non-seekable stream - we use rewind(), fseek().
	971	* This can be fixed (volunteers?).
	972	* Meanwhile we should check it here by stat'ing input fds,
	973	* but I am lazy and check that in main() instead.
	974	* Check in main won't catch "diffing fifos buried in subdirectories"
	975	* failure scenario - not very likely in real life... */
	976
	977	i = files_differ(f1, f2, flags);
	978	if (i == 0)
	979	goto closem;
	980	else if (i != 1) { /* 1 == ok */
	981	/* error */
	982	status \|= 2;
	983	goto closem;
	984	}
	985
	986	if (!asciifile(f1) \|\| !asciifile(f2)) {
	987	rval = D_BINARY;
	988	status \|= 1;
	989	goto closem;
	990	}
	991
	992	prepare(0, f1 /, stb1.st_size/);
	993	prepare(1, f2 /, stb2.st_size/);
	994	prune();
	995	sort(sfile[0], slen[0]);
	996	sort(sfile[1], slen[1]);
	997
	998	member = (int *) file[1];
	999	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
	1000	member = xrealloc(member, (slen[1] + 2) * sizeof(int));
	1001
	1002	class = (int *) file[0];
	1003	unsort(sfile[0], slen[0], class);
	1004	class = xrealloc(class, (slen[0] + 2) * sizeof(int));
	1005
	1006	klist = xmalloc((slen[0] + 2) * sizeof(int));
	1007	clen = 0;
	1008	clistlen = 100;
	1009	clist = xmalloc(clistlen * sizeof(struct cand));
	1010	i = stone(class, slen[0], member, klist);
	1011	free(member);
	1012	free(class);
	1013
	1014	J = xrealloc(J, (len[0] + 2) * sizeof(int));
	1015	unravel(klist[i]);
	1016	free(clist);
	1017	free(klist);
	1018
	1019	ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
	1020	ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
	1021	check(f1, f2);
	1022	output(file1, f1, file2, f2);
	1023
	1024	closem:
	1025	if (anychange) {
	1026	status \|= 1;
	1027	if (rval == D_SAME)
	1028	rval = D_DIFFER;
	1029	}
	1030	fclose_if_not_stdin(f1);
	1031	fclose_if_not_stdin(f2);
	1032	if (file1 != ofile1)
	1033	free(file1);
	1034	if (file2 != ofile2)
	1035	free(file2);
	1036	return rval;
	1037	}
	1038
	1039
	1040	#if ENABLE_FEATURE_DIFF_DIR
	1041	static void do_diff(char dir1, char path1, char dir2, char path2)
	1042	{
	1043	int flags = D_HEADER;
	1044	int val;
	1045	char fullpath1 = NULL; / if -N */
	1046	char *fullpath2 = NULL;
	1047
	1048	if (path1)
	1049	fullpath1 = concat_path_file(dir1, path1);
	1050	if (path2)
	1051	fullpath2 = concat_path_file(dir2, path2);
	1052
	1053	if (!fullpath1 \|\| stat(fullpath1, &stb1) != 0) {
	1054	flags \|= D_EMPTY1;
	1055	memset(&stb1, 0, sizeof(stb1));
	1056	if (path2) {
	1057	free(fullpath1);
	1058	fullpath1 = concat_path_file(dir1, path2);
	1059	}
	1060	}
	1061	if (!fullpath2 \|\| stat(fullpath2, &stb2) != 0) {
	1062	flags \|= D_EMPTY2;
	1063	memset(&stb2, 0, sizeof(stb2));
	1064	stb2.st_mode = stb1.st_mode;
	1065	if (path1) {
	1066	free(fullpath2);
	1067	fullpath2 = concat_path_file(dir2, path1);
	1068	}
	1069	}
	1070
	1071	if (stb1.st_mode == 0)
	1072	stb1.st_mode = stb2.st_mode;
	1073
	1074	if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
	1075	printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
	1076	goto ret;
	1077	}
	1078
	1079	if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
	1080	val = D_SKIPPED1;
	1081	else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
	1082	val = D_SKIPPED2;
	1083	else
	1084	val = diffreg(fullpath1, fullpath2, flags);
	1085
	1086	print_status(val, fullpath1, fullpath2, NULL);
	1087	ret:
	1088	free(fullpath1);
	1089	free(fullpath2);
	1090	}
	1091	#endif
	1092
	1093
	1094	#if ENABLE_FEATURE_DIFF_DIR
	1095	static int dir_strcmp(const void p1, const void p2)
	1096	{
	1097	return strcmp((char const ) p1, (char const ) p2);
	1098	}
	1099
	1100
	1101	/* This function adds a filename to dl, the directory listing. */
	1102	static int add_to_dirlist(const char *filename,
	1103	struct stat ATTRIBUTE_UNUSED * sb, void *userdata,
	1104	int depth ATTRIBUTE_UNUSED)
	1105	{
	1106	/* +2: with space for eventual trailing NULL */
	1107	dl = xrealloc(dl, (dl_count+2) * sizeof(dl[0]));
	1108	dl[dl_count] = xstrdup(filename + (int)(ptrdiff_t)userdata);
	1109	dl_count++;
	1110	return TRUE;
	1111	}
	1112
	1113
	1114	/* This returns a sorted directory listing. */
	1115	static char *get_dir(char path)
	1116	{
	1117	dl_count = 0;
	1118	dl = xzalloc(sizeof(dl[0]));
	1119
	1120	/* If -r has been set, then the recursive_action function will be
	1121	* used. Unfortunately, this outputs the root directory along with
	1122	* the recursed paths, so use void *userdata to specify the string
	1123	* length of the root directory - '(void*)(strlen(path)+)'.
	1124	* add_to_dirlist then removes root dir prefix. */
	1125
	1126	if (option_mask32 & FLAG_r) {
	1127	recursive_action(path, ACTION_RECURSE\|ACTION_FOLLOWLINKS,
	1128	add_to_dirlist, NULL,
	1129	(void*)(strlen(path)+1), 0);
	1130	} else {
	1131	DIR *dp;
	1132	struct dirent *ep;
	1133
	1134	dp = warn_opendir(path);
	1135	while ((ep = readdir(dp))) {
	1136	if (!strcmp(ep->d_name, "..") \|\| LONE_CHAR(ep->d_name, '.'))
	1137	continue;
	1138	add_to_dirlist(ep->d_name, NULL, (void*)(int)0, 0);
	1139	}
	1140	closedir(dp);
	1141	}
	1142
	1143	/* Sort dl alphabetically. */
	1144	qsort(dl, dl_count, sizeof(char *), dir_strcmp);
	1145
	1146	dl[dl_count] = NULL;
	1147	return dl;
	1148	}
	1149
	1150
	1151	static void diffdir(char p1, char p2)
	1152	{
	1153	char dirlist1, dirlist2;
	1154	char dp1, dp2;
	1155	int pos;
	1156
	1157	/* Check for trailing slashes. */
	1158	dp1 = last_char_is(p1, '/');
	1159	if (dp1 != NULL)
	1160	*dp1 = '\0';
	1161	dp2 = last_char_is(p2, '/');
	1162	if (dp2 != NULL)
	1163	*dp2 = '\0';
	1164
	1165	/* Get directory listings for p1 and p2. */
	1166
	1167	dirlist1 = get_dir(p1);
	1168	dirlist2 = get_dir(p2);
	1169
	1170	/* If -S was set, find the starting point. */
	1171	if (start) {
	1172	while (dirlist1 != NULL && strcmp(dirlist1, start) < 0)
	1173	dirlist1++;
	1174	while (dirlist2 != NULL && strcmp(dirlist2, start) < 0)
	1175	dirlist2++;
	1176	if ((dirlist1 == NULL) \|\| (dirlist2 == NULL))
	1177	bb_error_msg(bb_msg_invalid_arg, "NULL", "-S");
	1178	}
	1179
	1180	/* Now that both dirlist1 and dirlist2 contain sorted directory
	1181	* listings, we can start to go through dirlist1. If both listings
	1182	* contain the same file, then do a normal diff. Otherwise, behaviour
	1183	* is determined by whether the -N flag is set. */
	1184	while (dirlist1 != NULL \|\| dirlist2 != NULL) {
	1185	dp1 = *dirlist1;
	1186	dp2 = *dirlist2;
	1187	pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
	1188	if (pos == 0) {
	1189	do_diff(p1, dp1, p2, dp2);
	1190	dirlist1++;
	1191	dirlist2++;
	1192	} else if (pos < 0) {
	1193	if (option_mask32 & FLAG_N)
	1194	do_diff(p1, dp1, p2, NULL);
	1195	else
	1196	print_only(p1, strlen(p1) + 1, dp1);
	1197	dirlist1++;
	1198	} else {
	1199	if (option_mask32 & FLAG_N)
	1200	do_diff(p1, NULL, p2, dp2);
	1201	else
	1202	print_only(p2, strlen(p2) + 1, dp2);
	1203	dirlist2++;
	1204	}
	1205	}
	1206	}
	1207	#endif
	1208
	1209
	1210	int diff_main(int argc, char **argv);
	1211	int diff_main(int argc, char **argv)
	1212	{
	1213	bool gotstdin = 0;
	1214	char *U_opt;
	1215	char f1, f2;
	1216	llist_t *L_arg = NULL;
	1217
	1218	INIT_G();
	1219
	1220	/* exactly 2 params; collect multiple -L <label> */
	1221	opt_complementary = "=2:L::";
	1222	getopt32(argv, "abdiL:NqrsS:tTU:wu"
	1223	"p" /* ignored (for compatibility) */,
	1224	&L_arg, &start, &U_opt);
	1225	/argc -= optind;/
	1226	argv += optind;
	1227	while (L_arg) {
	1228	if (label1 && label2)
	1229	bb_show_usage();
	1230	if (!label1)
	1231	label1 = L_arg->data;
	1232	else { /* then label2 is NULL */
	1233	label2 = label1;
	1234	label1 = L_arg->data;
	1235	}
	1236	/* we leak L_arg here... */
	1237	L_arg = L_arg->link;
	1238	}
	1239	if (option_mask32 & FLAG_U)
	1240	context = xatoi_u(U_opt);
	1241
	1242	/*
	1243	* Do sanity checks, fill in stb1 and stb2 and call the appropriate
	1244	* driver routine. Both drivers use the contents of stb1 and stb2.
	1245	*/
	1246
	1247	f1 = argv[0];
	1248	f2 = argv[1];
	1249	if (LONE_DASH(f1)) {
	1250	fstat(STDIN_FILENO, &stb1);
	1251	gotstdin = 1;
	1252	} else
	1253	xstat(f1, &stb1);
	1254	if (LONE_DASH(f2)) {
	1255	fstat(STDIN_FILENO, &stb2);
	1256	gotstdin = 1;
	1257	} else
	1258	xstat(f2, &stb2);
	1259	if (gotstdin && (S_ISDIR(stb1.st_mode) \|\| S_ISDIR(stb2.st_mode)))
	1260	bb_error_msg_and_die("can't compare - to a directory");
	1261	if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
	1262	#if ENABLE_FEATURE_DIFF_DIR
	1263	diffdir(f1, f2);
	1264	#else
	1265	bb_error_msg_and_die("directory comparison not supported");
	1266	#endif
	1267	} else {
	1268	if (S_ISDIR(stb1.st_mode)) {
	1269	f1 = concat_path_file(f1, f2);
	1270	xstat(f1, &stb1);
	1271	}
	1272	if (S_ISDIR(stb2.st_mode)) {
	1273	f2 = concat_path_file(f2, f1);
	1274	xstat(f2, &stb2);
	1275	}
	1276	/* XXX: FIXME: */
	1277	/* We can't diff e.g. stdin supplied by a pipe - we use rewind(), fseek().
	1278	* This can be fixed (volunteers?) */
	1279	if (!S_ISREG(stb1.st_mode) \|\| !S_ISREG(stb2.st_mode))
	1280	bb_error_msg_and_die("can't diff non-seekable stream");
	1281	print_status(diffreg(f1, f2, 0), f1, f2, NULL);
	1282	}
	1283	return status;
	1284	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: MondoRescue/branches/2.2.5/mindi-busybox/editors/diff.c@ 3528

Download in other formats: