Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

diff.c@ 3723

Visit:

Last change on this file since 3723 was 3621, checked in by Bruno Cornec, 10 years ago
New 3?3 banch for incorporation of latest busybox 1.25. Changing minor version to handle potential incompatibilities.
Property svn:eol-style set to `native`
File size: 30.1 KB

Rev	Line
[1765]	1	/* vi: set sw=4 ts=4: */
	2	/*
	3	* Mini diff implementation for busybox, adapted from OpenBSD diff.
	4	*
[2725]	5	* Copyright (C) 2010 by Matheus Izvekov <mizvekov@gmail.com>
[1765]	6	* Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
	7	* Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
	8	*
	9	* Sponsored in part by the Defense Advanced Research Projects
	10	* Agency (DARPA) and Air Force Research Laboratory, Air Force
	11	* Materiel Command, USAF, under agreement number F39502-99-1-0512.
	12	*
[2725]	13	* Licensed under GPLv2 or later, see file LICENSE in this source tree.
[1765]	14	*/
	15
	16	/*
[2725]	17	* The following code uses an algorithm due to Harold Stone,
	18	* which finds a pair of longest identical subsequences in
	19	* the two files.
	20	*
	21	* The major goal is to generate the match vector J.
	22	* J[i] is the index of the line in file1 corresponding
	23	* to line i in file0. J[i] = 0 if there is no
	24	* such line in file1.
	25	*
	26	* Lines are hashed so as to work in core. All potential
	27	* matches are located by sorting the lines of each file
	28	* on the hash (called "value"). In particular, this
	29	* collects the equivalence classes in file1 together.
	30	* Subroutine equiv replaces the value of each line in
	31	* file0 by the index of the first element of its
	32	* matching equivalence in (the reordered) file1.
	33	* To save space equiv squeezes file1 into a single
	34	* array member in which the equivalence classes
	35	* are simply concatenated, except that their first
	36	* members are flagged by changing sign.
	37	*
	38	* Next the indices that point into member are unsorted into
	39	* array class according to the original order of file0.
	40	*
	41	* The cleverness lies in routine stone. This marches
	42	* through the lines of file0, developing a vector klist
	43	* of "k-candidates". At step i a k-candidate is a matched
	44	* pair of lines x,y (x in file0, y in file1) such that
	45	* there is a common subsequence of length k
	46	* between the first i lines of file0 and the first y
	47	* lines of file1, but there is no such subsequence for
	48	* any smaller y. x is the earliest possible mate to y
	49	* that occurs in such a subsequence.
	50	*
	51	* Whenever any of the members of the equivalence class of
	52	* lines in file1 matable to a line in file0 has serial number
	53	* less than the y of some k-candidate, that k-candidate
	54	* with the smallest such y is replaced. The new
	55	* k-candidate is chained (via pred) to the current
	56	* k-1 candidate so that the actual subsequence can
	57	* be recovered. When a member has serial number greater
	58	* that the y of all k-candidates, the klist is extended.
	59	* At the end, the longest subsequence is pulled out
	60	* and placed in the array J by unravel
	61	*
	62	* With J in hand, the matches there recorded are
	63	* checked against reality to assure that no spurious
	64	* matches have crept in due to hashing. If they have,
	65	* they are broken, and "jackpot" is recorded--a harmless
	66	* matter except that a true match for a spuriously
	67	* mated line may now be unnecessarily reported as a change.
	68	*
	69	* Much of the complexity of the program comes simply
	70	* from trying to minimize core utilization and
	71	* maximize the range of doable problems by dynamically
	72	* allocating what is needed and reusing what is not.
	73	* The core requirements for problems larger than somewhat
	74	* are (in words) 2*length(file0) + length(file1) +
	75	* 3*(number of k-candidates installed), typically about
	76	* 6n words for files of length n.
[1765]	77	*/
	78
[3621]	79	//config:config DIFF
	80	//config: bool "diff"
	81	//config: default y
	82	//config: help
	83	//config: diff compares two files or directories and outputs the
	84	//config: differences between them in a form that can be given to
	85	//config: the patch command.
	86	//config:
	87	//config:config FEATURE_DIFF_LONG_OPTIONS
	88	//config: bool "Enable long options"
	89	//config: default y
	90	//config: depends on DIFF && LONG_OPTS
	91	//config: help
	92	//config: Enable use of long options.
	93	//config:
	94	//config:config FEATURE_DIFF_DIR
	95	//config: bool "Enable directory support"
	96	//config: default y
	97	//config: depends on DIFF
	98	//config: help
	99	//config: This option enables support for directory and subdirectory
	100	//config: comparison.
	101
	102	//kbuild:lib-$(CONFIG_DIFF) += diff.o
	103
	104	//applet:IF_DIFF(APPLET(diff, BB_DIR_USR_BIN, BB_SUID_DROP))
	105
[3232]	106	//usage:#define diff_trivial_usage
	107	//usage: "[-abBdiNqrTstw] [-L LABEL] [-S FILE] [-U LINES] FILE1 FILE2"
	108	//usage:#define diff_full_usage "\n\n"
	109	//usage: "Compare files line by line and output the differences between them.\n"
	110	//usage: "This implementation supports unified diffs only.\n"
	111	//usage: "\n -a Treat all files as text"
	112	//usage: "\n -b Ignore changes in the amount of whitespace"
	113	//usage: "\n -B Ignore changes whose lines are all blank"
	114	//usage: "\n -d Try hard to find a smaller set of changes"
	115	//usage: "\n -i Ignore case differences"
	116	//usage: "\n -L Use LABEL instead of the filename in the unified header"
	117	//usage: "\n -N Treat absent files as empty"
	118	//usage: "\n -q Output only whether files differ"
	119	//usage: "\n -r Recurse"
	120	//usage: "\n -S Start with FILE when comparing directories"
	121	//usage: "\n -T Make tabs line up by prefixing a tab when necessary"
	122	//usage: "\n -s Report when two files are the same"
	123	//usage: "\n -t Expand tabs to spaces in output"
	124	//usage: "\n -U Output LINES lines of context"
	125	//usage: "\n -w Ignore all whitespace"
	126
[2725]	127	#include "libbb.h"
[3621]	128	#include "common_bufsiz.h"
[1765]	129
[2725]	130	#if 0
[3232]	131	# define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
[2725]	132	#else
[3232]	133	# define dbg_error_msg(...) ((void)0)
[2725]	134	#endif
[1765]	135
[2725]	136	enum { /* print_status() and diffreg() return values */
	137	STATUS_SAME, /* files are the same */
	138	STATUS_DIFFER, /* files differ */
	139	STATUS_BINARY, /* binary files differ */
[1765]	140	};
	141
[2725]	142	enum { /* Commandline flags */
	143	FLAG_a,
	144	FLAG_b,
	145	FLAG_d,
	146	FLAG_i,
	147	FLAG_L, /* never used, handled by getopt32 */
	148	FLAG_N,
	149	FLAG_q,
	150	FLAG_r,
	151	FLAG_s,
	152	FLAG_S, /* never used, handled by getopt32 */
	153	FLAG_t,
	154	FLAG_T,
	155	FLAG_U, /* never used, handled by getopt32 */
	156	FLAG_w,
	157	FLAG_u, /* ignored, this is the default */
	158	FLAG_p, /* not implemented */
	159	FLAG_B,
	160	FLAG_E, /* not implemented */
[1765]	161	};
[2725]	162	#define FLAG(x) (1 << FLAG_##x)
[1765]	163
[2725]	164	/* We cache file position to avoid excessive seeking */
	165	typedef struct FILE_and_pos_t {
	166	FILE *ft_fp;
	167	off_t ft_pos;
	168	} FILE_and_pos_t;
[1765]	169
	170	struct globals {
[2725]	171	smallint exit_status;
	172	int opt_U_context;
	173	const char *other_dir;
	174	char *label[2];
	175	struct stat stb[2];
[1765]	176	};
	177	#define G (*ptr_to_globals)
[2725]	178	#define exit_status (G.exit_status )
	179	#define opt_U_context (G.opt_U_context )
	180	#define label (G.label )
	181	#define stb (G.stb )
[1765]	182	#define INIT_G() do { \
[2725]	183	SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
	184	opt_U_context = 3; \
[1765]	185	} while (0)
	186
[2725]	187	typedef int token_t;
[1765]	188
[2725]	189	enum {
	190	/* Public */
	191	TOK_EMPTY = 1 << 9, /* Line fully processed, you can proceed to the next */
	192	TOK_EOF = 1 << 10, /* File ended */
	193	/* Private (Only to be used by read_token() */
	194	TOK_EOL = 1 << 11, /* we saw EOL (sticky) */
	195	TOK_SPACE = 1 << 12, /* used -b code, means we are skipping spaces */
	196	SHIFT_EOF = (sizeof(token_t)*8 - 8) - 1,
	197	CHAR_MASK = 0x1ff, /* 8th bit is used to distinguish EOF from 0xff */
	198	};
[1765]	199
[2725]	200	/* Restores full EOF from one 8th bit: */
	201	//#define TOK2CHAR(t) (((t) << SHIFT_EOF) >> SHIFT_EOF)
	202	/* We don't really need the above, we only need to have EOF != any_real_char: */
	203	#define TOK2CHAR(t) ((t) & CHAR_MASK)
	204
	205	static void seek_ft(FILE_and_pos_t *ft, off_t pos)
[1765]	206	{
[2725]	207	if (ft->ft_pos != pos) {
	208	ft->ft_pos = pos;
	209	fseeko(ft->ft_fp, pos, SEEK_SET);
[1765]	210	}
	211	}
[2725]	212
	213	/* Reads tokens from given fp, handling -b and -w flags
	214	* The user must reset tok every line start
[1765]	215	*/
[2725]	216	static int read_token(FILE_and_pos_t *ft, token_t tok)
[1765]	217	{
[2725]	218	tok \|= TOK_EMPTY;
	219	while (!(tok & TOK_EOL)) {
	220	bool is_space;
	221	int t;
[1765]	222
[2725]	223	t = fgetc(ft->ft_fp);
	224	if (t != EOF)
	225	ft->ft_pos++;
	226	is_space = (t == EOF \|\| isspace(t));
	227
	228	/* If t == EOF (-1), set both TOK_EOF and TOK_EOL */
	229	tok \|= (t & (TOK_EOF + TOK_EOL));
	230	/* Only EOL? */
	231	if (t == '\n')
	232	tok \|= TOK_EOL;
	233
	234	if (option_mask32 & FLAG(i)) /* Handcoded tolower() */
	235	t = (t >= 'A' && t <= 'Z') ? t - ('A' - 'a') : t;
	236
	237	if ((option_mask32 & FLAG(w)) && is_space)
	238	continue;
	239
	240	/* Trim char value to low 9 bits */
	241	t &= CHAR_MASK;
	242
	243	if (option_mask32 & FLAG(b)) {
	244	/* Was prev char whitespace? */
	245	if (tok & TOK_SPACE) { /* yes */
	246	if (is_space) /* this one too, ignore it */
	247	continue;
	248	tok &= ~TOK_SPACE;
	249	} else if (is_space) {
	250	/* 1st whitespace char.
	251	* Set TOK_SPACE and replace char by ' ' */
	252	t = TOK_SPACE + ' ';
[1765]	253	}
	254	}
[2725]	255	/* Clear EMPTY */
	256	tok &= ~(TOK_EMPTY + CHAR_MASK);
	257	/* Assign char value (low 9 bits) and maybe set TOK_SPACE */
	258	tok \|= t;
	259	break;
[1765]	260	}
[2725]	261	#if 0
	262	bb_error_msg("fp:%p tok:%x '%c'%s%s%s%s", fp, tok, tok & 0xff
	263	, tok & TOK_EOF ? " EOF" : ""
	264	, tok & TOK_EOL ? " EOL" : ""
	265	, tok & TOK_EMPTY ? " EMPTY" : ""
	266	, tok & TOK_SPACE ? " SPACE" : ""
	267	);
	268	#endif
	269	return tok;
[1765]	270	}
	271
[2725]	272	struct cand {
	273	int x;
	274	int y;
	275	int pred;
	276	};
[1765]	277
[2725]	278	static int search(const int c, int k, int y, const struct cand list)
[1765]	279	{
[2725]	280	int i, j;
[1765]	281
[2725]	282	if (list[c[k]].y < y) /* quick look for typical case */
	283	return k + 1;
	284
	285	for (i = 0, j = k + 1;;) {
	286	const int l = (i + j) >> 1;
	287	if (l > i) {
	288	const int t = list[c[l]].y;
	289	if (t > y)
	290	j = l;
	291	else if (t < y)
	292	i = l;
	293	else
	294	return l;
	295	} else
	296	return l + 1;
[1765]	297	}
	298	}
	299
[2725]	300	static unsigned isqrt(unsigned n)
[1765]	301	{
[2725]	302	unsigned x = 1;
	303	while (1) {
	304	const unsigned y = x;
	305	x = ((n / x) + x) >> 1;
	306	if (x <= (y + 1) && x >= (y - 1))
	307	return x;
[1765]	308	}
	309	}
	310
[2725]	311	static void stone(const int a, int n, const int b, int *J, int pref)
[1765]	312	{
[2725]	313	const unsigned isq = isqrt(n);
	314	const unsigned bound =
	315	(option_mask32 & FLAG(d)) ? UINT_MAX : MAX(256, isq);
	316	int clen = 1;
	317	int clistlen = 100;
	318	int k = 0;
	319	struct cand clist = xzalloc(clistlen sizeof(clist[0]));
	320	struct cand cand;
	321	struct cand *q;
	322	int klist = xzalloc((n + 2) sizeof(klist[0]));
	323	/clist[0] = (struct cand){0}; - xzalloc did it /
	324	/klist[0] = 0; /
[1765]	325
[2725]	326	for (cand.x = 1; cand.x <= n; cand.x++) {
	327	int j = a[cand.x], oldl = 0;
	328	unsigned numtries = 0;
	329	if (j == 0)
	330	continue;
	331	cand.y = -b[j];
	332	cand.pred = klist[0];
	333	do {
	334	int l, tc;
	335	if (cand.y <= clist[cand.pred].y)
	336	continue;
	337	l = search(klist, k, cand.y, clist);
	338	if (l != oldl + 1)
	339	cand.pred = klist[l - 1];
	340	if (l <= k && clist[klist[l]].y <= cand.y)
	341	continue;
	342	if (clen == clistlen) {
	343	clistlen = clistlen * 11 / 10;
	344	clist = xrealloc(clist, clistlen * sizeof(clist[0]));
	345	}
	346	clist[clen] = cand;
	347	tc = klist[l];
	348	klist[l] = clen++;
	349	if (l <= k) {
	350	cand.pred = tc;
	351	oldl = l;
	352	numtries++;
	353	} else {
	354	k++;
	355	break;
	356	}
	357	} while ((cand.y = b[++j]) > 0 && numtries < bound);
[1765]	358	}
[2725]	359	/* Unravel */
	360	for (q = clist + klist[k]; q->y; q = clist + q->pred)
	361	J[q->x + pref] = q->y + pref;
	362	free(klist);
	363	free(clist);
[1765]	364	}
	365
[2725]	366	struct line {
[3621]	367	/* 'serial' is not used in the beginning, so we reuse it
[2725]	368	* to store line offsets, thus reducing memory pressure
	369	*/
	370	union {
	371	unsigned serial;
	372	off_t offset;
	373	};
	374	unsigned value;
	375	};
[1765]	376
	377	static void equiv(struct line a, int n, struct line b, int m, int *c)
	378	{
[2725]	379	int i = 1, j = 1;
[1765]	380
	381	while (i <= n && j <= m) {
	382	if (a[i].value < b[j].value)
	383	a[i++].value = 0;
	384	else if (a[i].value == b[j].value)
	385	a[i++].value = j;
	386	else
	387	j++;
	388	}
	389	while (i <= n)
	390	a[i++].value = 0;
	391	b[m + 1].value = 0;
	392	j = 0;
	393	while (++j <= m) {
	394	c[j] = -b[j].serial;
	395	while (b[j + 1].value == b[j].value) {
	396	j++;
	397	c[j] = b[j].serial;
	398	}
	399	}
	400	c[j] = -1;
	401	}
	402
[2725]	403	static void unsort(const struct line f, int l, int b)
[1765]	404	{
	405	int i;
[2725]	406	int a = xmalloc((l + 1) sizeof(a[0]));
[1765]	407	for (i = 1; i <= l; i++)
	408	a[f[i].serial] = f[i].value;
	409	for (i = 1; i <= l; i++)
	410	b[i] = a[i];
	411	free(a);
	412	}
	413
[2725]	414	static int line_compar(const void a, const void b)
[1765]	415	{
[2725]	416	#define l0 ((const struct line*)a)
	417	#define l1 ((const struct line*)b)
	418	int r = l0->value - l1->value;
	419	if (r)
	420	return r;
	421	return l0->serial - l1->serial;
	422	#undef l0
	423	#undef l1
[1765]	424	}
	425
[2725]	426	static void fetch(FILE_and_pos_t ft, const off_t ix, int a, int b, int ch)
[1765]	427	{
[2725]	428	int i, j, col;
[1765]	429	for (i = a; i <= b; i++) {
[2725]	430	seek_ft(ft, ix[i - 1]);
	431	putchar(ch);
	432	if (option_mask32 & FLAG(T))
	433	putchar('\t');
	434	for (j = 0, col = 0; j < ix[i] - ix[i - 1]; j++) {
	435	int c = fgetc(ft->ft_fp);
	436	if (c == EOF) {
[3621]	437	puts("\n\\ No newline at end of file");
[1765]	438	return;
	439	}
[2725]	440	ft->ft_pos++;
	441	if (c == '\t' && (option_mask32 & FLAG(t)))
	442	do putchar(' '); while (++col & 7);
	443	else {
[1765]	444	putchar(c);
	445	col++;
	446	}
	447	}
	448	}
	449	}
	450
[2725]	451	/* Creates the match vector J, where J[i] is the index
	452	* of the line in the new file corresponding to the line i
	453	* in the old file. Lines start at 1 instead of 0, that value
	454	* being used instead to denote no corresponding line.
	455	* This vector is dynamically allocated and must be freed by the caller.
	456	*
	457	* * fp is an input parameter, where fp[0] and fp[1] are the open
	458	* old file and new file respectively.
	459	* * nlen is an output variable, where nlen[0] and nlen[1]
	460	* gets the number of lines in the old and new file respectively.
	461	* * ix is an output variable, where ix[0] and ix[1] gets
	462	* assigned dynamically allocated vectors of the offsets of the lines
	463	* of the old and new file respectively. These must be freed by the caller.
	464	*/
	465	static NOINLINE int create_J(FILE_and_pos_t ft[2], int nlen[2], off_t ix[2])
[1765]	466	{
[2725]	467	int J, slen[2], class, *member;
	468	struct line nfile[2], sfile[2];
	469	int pref = 0, suff = 0, i, j, delta;
[1765]	470
[2725]	471	/* Lines of both files are hashed, and in the process
	472	* their offsets are stored in the array ix[fileno]
	473	* where fileno == 0 points to the old file, and
	474	* fileno == 1 points to the new one.
	475	*/
	476	for (i = 0; i < 2; i++) {
	477	unsigned hash;
	478	token_t tok;
	479	size_t sz = 100;
	480	nfile[i] = xmalloc((sz + 3) * sizeof(nfile[i][0]));
	481	/* ft gets here without the correct position, cant use seek_ft */
	482	ft[i].ft_pos = 0;
	483	fseeko(ft[i].ft_fp, 0, SEEK_SET);
[1765]	484
[2725]	485	nlen[i] = 0;
	486	/* We could zalloc nfile, but then zalloc starts showing in gprof at ~1% */
	487	nfile[i][0].offset = 0;
	488	goto start; /* saves code */
	489	while (1) {
	490	tok = read_token(&ft[i], tok);
	491	if (!(tok & TOK_EMPTY)) {
	492	/* Hash algorithm taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. */
	493	/hash = hash 128 - hash + TOK2CHAR(tok);
	494	* gcc insists on optimizing above to "hash * 127 + ...", thus... */
	495	unsigned o = hash - TOK2CHAR(tok);
	496	hash = hash * 128 - o; /* we want SPEED here */
	497	continue;
	498	}
	499	if (nlen[i]++ == sz) {
	500	sz = sz * 3 / 2;
	501	nfile[i] = xrealloc(nfile[i], (sz + 3) * sizeof(nfile[i][0]));
	502	}
	503	/* line_compar needs hashes fit into positive int */
	504	nfile[i][nlen[i]].value = hash & INT_MAX;
	505	/* like ftello(ft[i].ft_fp) but faster (avoids lseek syscall) */
	506	nfile[i][nlen[i]].offset = ft[i].ft_pos;
	507	if (tok & TOK_EOF) {
	508	/* EOF counts as a token, so we have to adjust it here */
	509	nfile[i][nlen[i]].offset++;
	510	break;
	511	}
	512	start:
	513	hash = tok = 0;
[1765]	514	}
[2725]	515	/* Exclude lone EOF line from the end of the file, to make fetch()'s job easier */
	516	if (nfile[i][nlen[i]].offset - nfile[i][nlen[i] - 1].offset == 1)
	517	nlen[i]--;
	518	/* Now we copy the line offsets into ix */
	519	ix[i] = xmalloc((nlen[i] + 2) * sizeof(ix[i][0]));
	520	for (j = 0; j < nlen[i] + 1; j++)
	521	ix[i][j] = nfile[i][j].offset;
[1765]	522	}
	523
[2725]	524	/* length of prefix and suffix is calculated */
	525	for (; pref < nlen[0] && pref < nlen[1] &&
	526	nfile[0][pref + 1].value == nfile[1][pref + 1].value;
	527	pref++);
	528	for (; suff < nlen[0] - pref && suff < nlen[1] - pref &&
	529	nfile[0][nlen[0] - suff].value == nfile[1][nlen[1] - suff].value;
	530	suff++);
	531	/* Arrays are pruned by the suffix and prefix length,
	532	* the result being sorted and stored in sfile[fileno],
	533	* and their sizes are stored in slen[fileno]
[1765]	534	*/
[2725]	535	for (j = 0; j < 2; j++) {
	536	sfile[j] = nfile[j] + pref;
	537	slen[j] = nlen[j] - pref - suff;
	538	for (i = 0; i <= slen[j]; i++)
	539	sfile[j][i].serial = i;
	540	qsort(sfile[j] + 1, slen[j], sizeof(*sfile[j]), line_compar);
	541	}
	542	/* nfile arrays are reused to reduce memory pressure
	543	* The #if zeroed out section performs the same task as the
	544	* one in the #else section.
	545	* Peak memory usage is higher, but one array copy is avoided
	546	* by not using unsort()
	547	*/
	548	#if 0
	549	member = xmalloc((slen[1] + 2) * sizeof(member[0]));
	550	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
	551	free(nfile[1]);
[1765]	552
[2725]	553	class = xmalloc((slen[0] + 1) * sizeof(class[0]));
	554	for (i = 1; i <= slen[0]; i++) /* Unsorting */
	555	class[sfile[0][i].serial] = sfile[0][i].value;
	556	free(nfile[0]);
[1765]	557	#else
[2725]	558	member = (int *)nfile[1];
	559	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
	560	member = xrealloc(member, (slen[1] + 2) * sizeof(member[0]));
	561
	562	class = (int *)nfile[0];
	563	unsort(sfile[0], slen[0], (int *)nfile[0]);
	564	class = xrealloc(class, (slen[0] + 2) * sizeof(class[0]));
[1765]	565	#endif
[2725]	566	J = xmalloc((nlen[0] + 2) * sizeof(J[0]));
	567	/* The elements of J which fall inside the prefix and suffix regions
	568	* are marked as unchanged, while the ones which fall outside
	569	* are initialized with 0 (no matches), so that function stone can
	570	* then assign them their right values
	571	*/
	572	for (i = 0, delta = nlen[1] - nlen[0]; i <= nlen[0]; i++)
	573	J[i] = i <= pref ? i :
	574	i > (nlen[0] - suff) ? (i + delta) : 0;
	575	/* Here the magic is performed */
	576	stone(class, slen[0], member, J, pref);
	577	J[nlen[0] + 1] = nlen[1] + 1;
[1765]	578
[2725]	579	free(class);
	580	free(member);
[1765]	581
[2725]	582	/* Both files are rescanned, in an effort to find any lines
	583	* which, due to limitations intrinsic to any hashing algorithm,
	584	* are different but ended up confounded as the same
	585	*/
	586	for (i = 1; i <= nlen[0]; i++) {
	587	if (!J[i])
	588	continue;
[1765]	589
[2725]	590	seek_ft(&ft[0], ix[0][i - 1]);
	591	seek_ft(&ft[1], ix[1][J[i] - 1]);
[1765]	592
[2725]	593	for (j = J[i]; i <= nlen[0] && J[i] == j; i++, j++) {
	594	token_t tok0 = 0, tok1 = 0;
	595	do {
	596	tok0 = read_token(&ft[0], tok0);
	597	tok1 = read_token(&ft[1], tok1);
[1765]	598
[2725]	599	if (((tok0 ^ tok1) & TOK_EMPTY) != 0 /* one is empty (not both) */
	600	\|\| (!(tok0 & TOK_EMPTY) && TOK2CHAR(tok0) != TOK2CHAR(tok1))
	601	) {
	602	J[i] = 0; /* Break the correspondence */
	603	}
	604	} while (!(tok0 & tok1 & TOK_EMPTY));
	605	}
[1765]	606	}
	607
[2725]	608	return J;
[1765]	609	}
	610
[2725]	611	static bool diff(FILE* fp[2], char *file[2])
[1765]	612	{
[2725]	613	int nlen[2];
	614	off_t *ix[2];
	615	FILE_and_pos_t ft[2];
	616	typedef struct { int a, b; } vec_t[2];
	617	vec_t *vec = NULL;
	618	int i = 1, j, k, idx = -1;
	619	bool anychange = false;
	620	int *J;
[1765]	621
[2725]	622	ft[0].ft_fp = fp[0];
	623	ft[1].ft_fp = fp[1];
	624	/* note that ft[i].ft_pos is unintitalized, create_J()
	625	* must not assume otherwise */
	626	J = create_J(ft, nlen, ix);
[1765]	627
[2725]	628	do {
	629	bool nonempty = false;
[1765]	630
[2725]	631	while (1) {
	632	vec_t v;
[1765]	633
[2725]	634	for (v[0].a = i; v[0].a <= nlen[0] && J[v[0].a] == J[v[0].a - 1] + 1; v[0].a++)
	635	continue;
	636	v[1].a = J[v[0].a - 1] + 1;
[1765]	637
[2725]	638	for (v[0].b = v[0].a - 1; v[0].b < nlen[0] && !J[v[0].b + 1]; v[0].b++)
	639	continue;
	640	v[1].b = J[v[0].b + 1] - 1;
	641	/*
	642	* Indicate that there is a difference between lines a and b of the 'from' file
	643	* to get to lines c to d of the 'to' file. If a is greater than b then there
	644	* are no lines in the 'from' file involved and this means that there were
	645	* lines appended (beginning at b). If c is greater than d then there are
	646	* lines missing from the 'to' file.
	647	*/
	648	if (v[0].a <= v[0].b \|\| v[1].a <= v[1].b) {
	649	/*
	650	* If this change is more than 'context' lines from the
	651	* previous change, dump the record and reset it.
	652	*/
	653	int ct = (2 * opt_U_context) + 1;
	654	if (idx >= 0
	655	&& v[0].a > vec[idx][0].b + ct
	656	&& v[1].a > vec[idx][1].b + ct
	657	) {
	658	break;
	659	}
[1765]	660
[2725]	661	for (j = 0; j < 2; j++)
[3621]	662	for (k = v[j].a; k <= v[j].b; k++)
	663	nonempty \|= (ix[j][k] - ix[j][k - 1] != 1);
[1765]	664
[2725]	665	vec = xrealloc_vector(vec, 6, ++idx);
	666	memcpy(vec[idx], v, sizeof(v));
	667	}
[1765]	668
[2725]	669	i = v[0].b + 1;
	670	if (i > nlen[0])
	671	break;
	672	J[v[0].b] = v[1].b;
	673	}
	674	if (idx < 0 \|\| ((option_mask32 & FLAG(B)) && !nonempty))
	675	goto cont;
	676	if (!(option_mask32 & FLAG(q))) {
	677	int lowa;
	678	vec_t span, *cvp = vec;
[1765]	679
[2725]	680	if (!anychange) {
	681	/* Print the context/unidiff header first time through */
	682	printf("--- %s\n", label[0] ? label[0] : file[0]);
	683	printf("+++ %s\n", label[1] ? label[1] : file[1]);
	684	}
[1765]	685
[2725]	686	printf("@@");
	687	for (j = 0; j < 2; j++) {
	688	int a = span[j].a = MAX(1, (*cvp)[j].a - opt_U_context);
	689	int b = span[j].b = MIN(nlen[j], vec[idx][j].b + opt_U_context);
[1765]	690
[2725]	691	printf(" %c%d", j ? '+' : '-', MIN(a, b));
	692	if (a == b)
	693	continue;
	694	printf(",%d", (a < b) ? b - a + 1 : 0);
	695	}
[3621]	696	puts(" @@");
[2725]	697	/*
	698	* Output changes in "unified" diff format--the old and new lines
	699	* are printed together.
	700	*/
	701	for (lowa = span[0].a; ; lowa = (*cvp++)[0].b + 1) {
	702	bool end = cvp > &vec[idx];
	703	fetch(&ft[0], ix[0], lowa, end ? span[0].b : (*cvp)[0].a - 1, ' ');
	704	if (end)
	705	break;
	706	for (j = 0; j < 2; j++)
	707	fetch(&ft[j], ix[j], (cvp)[j].a, (cvp)[j].b, j ? '+' : '-');
	708	}
	709	}
	710	anychange = true;
	711	cont:
	712	idx = -1;
	713	} while (i <= nlen[0]);
[1765]	714
[2725]	715	free(vec);
	716	free(ix[0]);
	717	free(ix[1]);
	718	free(J);
	719	return anychange;
[1765]	720	}
	721
[2725]	722	static int diffreg(char *file[2])
[1765]	723	{
[3232]	724	FILE *fp[2];
[2725]	725	bool binary = false, differ = false;
	726	int status = STATUS_SAME, i;
[1765]	727
[3232]	728	fp[0] = stdin;
	729	fp[1] = stdin;
[2725]	730	for (i = 0; i < 2; i++) {
	731	int fd = open_or_warn_stdin(file[i]);
	732	if (fd == -1)
	733	goto out;
	734	/* Our diff implementation is using seek.
	735	* When we meet non-seekable file, we must make a temp copy.
	736	*/
	737	if (lseek(fd, 0, SEEK_SET) == -1 && errno == ESPIPE) {
	738	char name[] = "/tmp/difXXXXXX";
	739	int fd_tmp = xmkstemp(name);
[1765]	740
[2725]	741	unlink(name);
	742	if (bb_copyfd_eof(fd, fd_tmp) < 0)
	743	xfunc_die();
[3621]	744	if (fd != STDIN_FILENO)
[2725]	745	close(fd);
	746	fd = fd_tmp;
[3621]	747	xlseek(fd, 0, SEEK_SET);
[1765]	748	}
[2725]	749	fp[i] = fdopen(fd, "r");
[1765]	750	}
[2725]	751
[3621]	752	setup_common_bufsiz();
[2725]	753	while (1) {
	754	const size_t sz = COMMON_BUFSIZE / 2;
	755	char *const buf0 = bb_common_bufsiz1;
	756	char *const buf1 = buf0 + sz;
	757	int j, k;
	758	i = fread(buf0, 1, sz, fp[0]);
	759	j = fread(buf1, 1, sz, fp[1]);
	760	if (i != j) {
	761	differ = true;
	762	i = MIN(i, j);
[1765]	763	}
[2725]	764	if (i == 0)
	765	break;
	766	for (k = 0; k < i; k++) {
	767	if (!buf0[k] \|\| !buf1[k])
	768	binary = true;
	769	if (buf0[k] != buf1[k])
	770	differ = true;
	771	}
[1765]	772	}
[2725]	773	if (differ) {
	774	if (binary && !(option_mask32 & FLAG(a)))
	775	status = STATUS_BINARY;
	776	else if (diff(fp, file))
	777	status = STATUS_DIFFER;
[1765]	778	}
[2725]	779	if (status != STATUS_SAME)
	780	exit_status \|= 1;
	781	out:
	782	fclose_if_not_stdin(fp[0]);
	783	fclose_if_not_stdin(fp[1]);
[1765]	784
[2725]	785	return status;
[1765]	786	}
	787
[2725]	788	static void print_status(int status, char *path[2])
[1765]	789	{
[2725]	790	switch (status) {
	791	case STATUS_BINARY:
	792	case STATUS_DIFFER:
	793	if ((option_mask32 & FLAG(q)) \|\| status == STATUS_BINARY)
	794	printf("Files %s and %s differ\n", path[0], path[1]);
	795	break;
	796	case STATUS_SAME:
	797	if (option_mask32 & FLAG(s))
	798	printf("Files %s and %s are identical\n", path[0], path[1]);
	799	break;
	800	}
[1765]	801	}
	802
[2725]	803	#if ENABLE_FEATURE_DIFF_DIR
	804	struct dlist {
	805	size_t len;
	806	int s, e;
	807	char **dl;
	808	};
[1765]	809
	810	/* This function adds a filename to dl, the directory listing. */
[2725]	811	static int FAST_FUNC add_to_dirlist(const char *filename,
	812	struct stat *sb UNUSED_PARAM,
	813	void *userdata, int depth UNUSED_PARAM)
[1765]	814	{
[2725]	815	struct dlist *const l = userdata;
	816	const char *file = filename + l->len;
	817	while (*file == '/')
	818	file++;
	819	l->dl = xrealloc_vector(l->dl, 6, l->e);
	820	l->dl[l->e] = xstrdup(file);
	821	l->e++;
[1765]	822	return TRUE;
	823	}
	824
[2725]	825	/* If recursion is not set, this function adds the directory
	826	* to the list and prevents recursive_action from recursing into it.
	827	*/
	828	static int FAST_FUNC skip_dir(const char *filename,
	829	struct stat sb, void userdata,
	830	int depth)
[1765]	831	{
[2725]	832	if (!(option_mask32 & FLAG(r)) && depth) {
	833	add_to_dirlist(filename, sb, userdata, depth);
	834	return SKIP;
	835	}
	836	if (!(option_mask32 & FLAG(N))) {
	837	/* -r without -N: no need to recurse into dirs
	838	* which do not exist on the "other side".
	839	* Testcase: diff -r /tmp /
	840	* (it would recurse deep into /proc without this code) */
	841	struct dlist *const l = userdata;
	842	filename += l->len;
	843	if (filename[0]) {
	844	struct stat osb;
	845	char *othername = concat_path_file(G.other_dir, filename);
	846	int r = stat(othername, &osb);
	847	free(othername);
	848	if (r != 0 \|\| !S_ISDIR(osb.st_mode)) {
	849	/* other dir doesn't have similarly named
[3232]	850	* directory, don't recurse; return 1 upon
	851	* exit, just like diffutils' diff */
	852	exit_status \|= 1;
[2725]	853	return SKIP;
	854	}
[1765]	855	}
	856	}
[2725]	857	return TRUE;
[1765]	858	}
	859
[2725]	860	static void diffdir(char p[2], const char s_start)
[1765]	861	{
[2725]	862	struct dlist list[2];
	863	int i;
[1765]	864
[2725]	865	memset(&list, 0, sizeof(list));
	866	for (i = 0; i < 2; i++) {
	867	/list[i].s = list[i].e = 0; - memset did it /
	868	/list[i].dl = NULL; /
[1765]	869
[2725]	870	G.other_dir = p[1 - i];
	871	/* We need to trim root directory prefix.
	872	* Using list.len to specify its length,
	873	* add_to_dirlist will remove it. */
	874	list[i].len = strlen(p[i]);
	875	recursive_action(p[i], ACTION_RECURSE \| ACTION_FOLLOWLINKS,
[3232]	876	add_to_dirlist, skip_dir, &list[i], 0);
[2725]	877	/* Sort dl alphabetically.
	878	* GNU diff does this ignoring any number of trailing dots.
	879	* We don't, so for us dotted files almost always are
	880	* first on the list.
	881	*/
	882	qsort_string_vector(list[i].dl, list[i].e);
	883	/* If -S was set, find the starting point. */
	884	if (!s_start)
	885	continue;
	886	while (list[i].s < list[i].e && strcmp(list[i].dl[list[i].s], s_start) < 0)
	887	list[i].s++;
[1765]	888	}
	889	/* Now that both dirlist1 and dirlist2 contain sorted directory
	890	* listings, we can start to go through dirlist1. If both listings
	891	* contain the same file, then do a normal diff. Otherwise, behaviour
	892	* is determined by whether the -N flag is set. */
[2725]	893	while (1) {
	894	char *dp[2];
	895	int pos;
	896	int k;
	897
	898	dp[0] = list[0].s < list[0].e ? list[0].dl[list[0].s] : NULL;
	899	dp[1] = list[1].s < list[1].e ? list[1].dl[list[1].s] : NULL;
	900	if (!dp[0] && !dp[1])
	901	break;
	902	pos = !dp[0] ? 1 : (!dp[1] ? -1 : strcmp(dp[0], dp[1]));
	903	k = pos > 0;
[3232]	904	if (pos && !(option_mask32 & FLAG(N))) {
[2725]	905	printf("Only in %s: %s\n", p[k], dp[k]);
[3232]	906	exit_status \|= 1;
	907	} else {
[2725]	908	char fullpath[2], path[2]; /* if -N */
	909
	910	for (i = 0; i < 2; i++) {
	911	if (pos == 0 \|\| i == k) {
	912	path[i] = fullpath[i] = concat_path_file(p[i], dp[i]);
	913	stat(fullpath[i], &stb[i]);
	914	} else {
	915	fullpath[i] = concat_path_file(p[i], dp[1 - i]);
	916	path[i] = (char *)bb_dev_null;
	917	}
	918	}
	919	if (pos)
	920	stat(fullpath[k], &stb[1 - k]);
	921
	922	if (S_ISDIR(stb[0].st_mode) && S_ISDIR(stb[1].st_mode))
	923	printf("Common subdirectories: %s and %s\n", fullpath[0], fullpath[1]);
	924	else if (!S_ISREG(stb[0].st_mode) && !S_ISDIR(stb[0].st_mode))
	925	printf("File %s is not a regular file or directory and was skipped\n", fullpath[0]);
	926	else if (!S_ISREG(stb[1].st_mode) && !S_ISDIR(stb[1].st_mode))
	927	printf("File %s is not a regular file or directory and was skipped\n", fullpath[1]);
	928	else if (S_ISDIR(stb[0].st_mode) != S_ISDIR(stb[1].st_mode)) {
	929	if (S_ISDIR(stb[0].st_mode))
	930	printf("File %s is a %s while file %s is a %s\n", fullpath[0], "directory", fullpath[1], "regular file");
	931	else
	932	printf("File %s is a %s while file %s is a %s\n", fullpath[0], "regular file", fullpath[1], "directory");
	933	} else
	934	print_status(diffreg(path), fullpath);
	935
	936	free(fullpath[0]);
	937	free(fullpath[1]);
	938	}
	939	free(dp[k]);
	940	list[k].s++;
[1765]	941	if (pos == 0) {
[2725]	942	free(dp[1 - k]);
	943	list[1 - k].s++;
[1765]	944	}
	945	}
[2725]	946	if (ENABLE_FEATURE_CLEAN_UP) {
	947	free(list[0].dl);
	948	free(list[1].dl);
	949	}
[1765]	950	}
	951	#endif
	952
[2725]	953	#if ENABLE_FEATURE_DIFF_LONG_OPTIONS
	954	static const char diff_longopts[] ALIGN1 =
	955	"ignore-case\0" No_argument "i"
	956	"ignore-tab-expansion\0" No_argument "E"
	957	"ignore-space-change\0" No_argument "b"
	958	"ignore-all-space\0" No_argument "w"
	959	"ignore-blank-lines\0" No_argument "B"
	960	"text\0" No_argument "a"
	961	"unified\0" Required_argument "U"
	962	"label\0" Required_argument "L"
	963	"show-c-function\0" No_argument "p"
	964	"brief\0" No_argument "q"
	965	"expand-tabs\0" No_argument "t"
	966	"initial-tab\0" No_argument "T"
	967	"recursive\0" No_argument "r"
	968	"new-file\0" No_argument "N"
	969	"report-identical-files\0" No_argument "s"
	970	"starting-file\0" Required_argument "S"
	971	"minimal\0" No_argument "d"
	972	;
	973	#endif
[1765]	974
[2725]	975	int diff_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
	976	int diff_main(int argc UNUSED_PARAM, char **argv)
[1765]	977	{
[2725]	978	int gotstdin = 0, i;
	979	char file[2], s_start = NULL;
[1765]	980	llist_t *L_arg = NULL;
	981
	982	INIT_G();
	983
[2725]	984	/* exactly 2 params; collect multiple -L <label>; -U N */
	985	opt_complementary = "=2:L::U+";
	986	#if ENABLE_FEATURE_DIFF_LONG_OPTIONS
	987	applet_long_options = diff_longopts;
	988	#endif
	989	getopt32(argv, "abdiL:NqrsS:tTU:wupBE",
	990	&L_arg, &s_start, &opt_U_context);
[1765]	991	argv += optind;
[2725]	992	while (L_arg)
	993	label[!!label[0]] = llist_pop(&L_arg);
	994	xfunc_error_retval = 2;
	995	for (i = 0; i < 2; i++) {
	996	file[i] = argv[i];
	997	/* Compat: "diff file name_which_doesnt_exist" exits with 2 */
	998	if (LONE_DASH(file[i])) {
	999	fstat(STDIN_FILENO, &stb[i]);
	1000	gotstdin++;
	1001	} else
	1002	xstat(file[i], &stb[i]);
[1765]	1003	}
[2725]	1004	xfunc_error_retval = 1;
	1005	if (gotstdin && (S_ISDIR(stb[0].st_mode) \|\| S_ISDIR(stb[1].st_mode)))
	1006	bb_error_msg_and_die("can't compare stdin to a directory");
[1765]	1007
[3232]	1008	/* Compare metadata to check if the files are the same physical file.
	1009	*
	1010	* Comment from diffutils source says:
	1011	* POSIX says that two files are identical if st_ino and st_dev are
	1012	* the same, but many file systems incorrectly assign the same (device,
	1013	* inode) pair to two distinct files, including:
	1014	* GNU/Linux NFS servers that export all local file systems as a
	1015	* single NFS file system, if a local device number (st_dev) exceeds
	1016	* 255, or if a local inode number (st_ino) exceeds 16777215.
	1017	*/
	1018	if (ENABLE_DESKTOP
	1019	&& stb[0].st_ino == stb[1].st_ino
	1020	&& stb[0].st_dev == stb[1].st_dev
	1021	&& stb[0].st_size == stb[1].st_size
	1022	&& stb[0].st_mtime == stb[1].st_mtime
	1023	&& stb[0].st_ctime == stb[1].st_ctime
	1024	&& stb[0].st_mode == stb[1].st_mode
	1025	&& stb[0].st_nlink == stb[1].st_nlink
	1026	&& stb[0].st_uid == stb[1].st_uid
	1027	&& stb[0].st_gid == stb[1].st_gid
	1028	) {
	1029	/* files are physically the same; no need to compare them */
	1030	return STATUS_SAME;
	1031	}
	1032
[2725]	1033	if (S_ISDIR(stb[0].st_mode) && S_ISDIR(stb[1].st_mode)) {
[1765]	1034	#if ENABLE_FEATURE_DIFF_DIR
[2725]	1035	diffdir(file, s_start);
[1765]	1036	#else
[2725]	1037	bb_error_msg_and_die("no support for directory comparison");
[1765]	1038	#endif
	1039	} else {
[2725]	1040	bool dirfile = S_ISDIR(stb[0].st_mode) \|\| S_ISDIR(stb[1].st_mode);
	1041	bool dir = S_ISDIR(stb[1].st_mode);
	1042	if (dirfile) {
	1043	const char *slash = strrchr(file[!dir], '/');
	1044	file[dir] = concat_path_file(file[dir], slash ? slash + 1 : file[!dir]);
	1045	xstat(file[dir], &stb[dir]);
[1765]	1046	}
[2725]	1047	/* diffreg can get non-regular files here */
	1048	print_status(gotstdin > 1 ? STATUS_SAME : diffreg(file), file);
	1049
	1050	if (dirfile)
	1051	free(file[dir]);
[1765]	1052	}
[2725]	1053
	1054	return exit_status;
[1765]	1055	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: MondoRescue/branches/3.3/mindi-busybox/editors/diff.c@ 3723

Download in other formats: