Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: MondoRescue/branches/2.2.9/mindi-busybox/archival/libarchive/bz/blocksort.c@ 2888

Visit:

Last change on this file since 2888 was 2725, checked in by Bruno Cornec, 14 years ago
Update mindi-busybox to 1.18.3 to avoid problems with the tar command which is now failing on recent versions with busybox 1.7.3
Property svn:eol-style set to `native`
File size: 23.7 KB

Rev	Line
[2725]	1	/*
	2	* bzip2 is written by Julian Seward <jseward@bzip.org>.
	3	* Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
	4	* See README and LICENSE files in this directory for more information.
	5	*/
	6
	7	/-------------------------------------------------------------/
	8	/--- Block sorting machinery ---/
	9	/--- blocksort.c ---/
	10	/-------------------------------------------------------------/
	11
	12	/* ------------------------------------------------------------------
	13	This file is part of bzip2/libbzip2, a program and library for
	14	lossless, block-sorting data compression.
	15
	16	bzip2/libbzip2 version 1.0.4 of 20 December 2006
	17	Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
	18
	19	Please read the WARNING, DISCLAIMER and PATENTS sections in the
	20	README file.
	21
	22	This program is released under the terms of the license contained
	23	in the file LICENSE.
	24	------------------------------------------------------------------ */
	25
	26	/* #include "bzlib_private.h" */
	27
	28	#define mswap(zz1, zz2) \
	29	{ \
	30	int32_t zztmp = zz1; \
	31	zz1 = zz2; \
	32	zz2 = zztmp; \
	33	}
	34
	35	static
	36	/* No measurable speed gain with inlining */
	37	/* ALWAYS_INLINE */
	38	void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn)
	39	{
	40	while (zzn > 0) {
	41	mswap(ptr[zzp1], ptr[zzp2]);
	42	zzp1++;
	43	zzp2++;
	44	zzn--;
	45	}
	46	}
	47
	48	static
	49	ALWAYS_INLINE
	50	int32_t mmin(int32_t a, int32_t b)
	51	{
	52	return (a < b) ? a : b;
	53	}
	54
	55
	56	/---------------------------------------------/
	57	/--- Fallback O(N log(N)^2) sorting ---/
	58	/--- algorithm, for repetitive blocks ---/
	59	/---------------------------------------------/
	60
	61	/---------------------------------------------/
	62	static
	63	inline
	64	void fallbackSimpleSort(uint32_t* fmap,
	65	uint32_t* eclass,
	66	int32_t lo,
	67	int32_t hi)
	68	{
	69	int32_t i, j, tmp;
	70	uint32_t ec_tmp;
	71
	72	if (lo == hi) return;
	73
	74	if (hi - lo > 3) {
	75	for (i = hi-4; i >= lo; i--) {
	76	tmp = fmap[i];
	77	ec_tmp = eclass[tmp];
	78	for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
	79	fmap[j-4] = fmap[j];
	80	fmap[j-4] = tmp;
	81	}
	82	}
	83
	84	for (i = hi-1; i >= lo; i--) {
	85	tmp = fmap[i];
	86	ec_tmp = eclass[tmp];
	87	for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
	88	fmap[j-1] = fmap[j];
	89	fmap[j-1] = tmp;
	90	}
	91	}
	92
	93
	94	/---------------------------------------------/
	95	#define fpush(lz,hz) { \
	96	stackLo[sp] = lz; \
	97	stackHi[sp] = hz; \
	98	sp++; \
	99	}
	100
	101	#define fpop(lz,hz) { \
	102	sp--; \
	103	lz = stackLo[sp]; \
	104	hz = stackHi[sp]; \
	105	}
	106
	107	#define FALLBACK_QSORT_SMALL_THRESH 10
	108	#define FALLBACK_QSORT_STACK_SIZE 100
	109
	110	static
	111	void fallbackQSort3(uint32_t* fmap,
	112	uint32_t* eclass,
	113	int32_t loSt,
	114	int32_t hiSt)
	115	{
	116	int32_t unLo, unHi, ltLo, gtHi, n, m;
	117	int32_t sp, lo, hi;
	118	uint32_t med, r, r3;
	119	int32_t stackLo[FALLBACK_QSORT_STACK_SIZE];
	120	int32_t stackHi[FALLBACK_QSORT_STACK_SIZE];
	121
	122	r = 0;
	123
	124	sp = 0;
	125	fpush(loSt, hiSt);
	126
	127	while (sp > 0) {
	128	AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
	129
	130	fpop(lo, hi);
	131	if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
	132	fallbackSimpleSort(fmap, eclass, lo, hi);
	133	continue;
	134	}
	135
	136	/* Random partitioning. Median of 3 sometimes fails to
	137	* avoid bad cases. Median of 9 seems to help but
	138	* looks rather expensive. This too seems to work but
	139	* is cheaper. Guidance for the magic constants
	140	* 7621 and 32768 is taken from Sedgewick's algorithms
	141	* book, chapter 35.
	142	*/
	143	r = ((r * 7621) + 1) % 32768;
	144	r3 = r % 3;
	145	if (r3 == 0)
	146	med = eclass[fmap[lo]];
	147	else if (r3 == 1)
	148	med = eclass[fmap[(lo+hi)>>1]];
	149	else
	150	med = eclass[fmap[hi]];
	151
	152	unLo = ltLo = lo;
	153	unHi = gtHi = hi;
	154
	155	while (1) {
	156	while (1) {
	157	if (unLo > unHi) break;
	158	n = (int32_t)eclass[fmap[unLo]] - (int32_t)med;
	159	if (n == 0) {
	160	mswap(fmap[unLo], fmap[ltLo]);
	161	ltLo++;
	162	unLo++;
	163	continue;
	164	};
	165	if (n > 0) break;
	166	unLo++;
	167	}
	168	while (1) {
	169	if (unLo > unHi) break;
	170	n = (int32_t)eclass[fmap[unHi]] - (int32_t)med;
	171	if (n == 0) {
	172	mswap(fmap[unHi], fmap[gtHi]);
	173	gtHi--; unHi--;
	174	continue;
	175	};
	176	if (n < 0) break;
	177	unHi--;
	178	}
	179	if (unLo > unHi) break;
	180	mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
	181	}
	182
	183	AssertD(unHi == unLo-1, "fallbackQSort3(2)");
	184
	185	if (gtHi < ltLo) continue;
	186
	187	n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n);
	188	m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m);
	189
	190	n = lo + unLo - ltLo - 1;
	191	m = hi - (gtHi - unHi) + 1;
	192
	193	if (n - lo > hi - m) {
	194	fpush(lo, n);
	195	fpush(m, hi);
	196	} else {
	197	fpush(m, hi);
	198	fpush(lo, n);
	199	}
	200	}
	201	}
	202
	203	#undef fpush
	204	#undef fpop
	205	#undef FALLBACK_QSORT_SMALL_THRESH
	206	#undef FALLBACK_QSORT_STACK_SIZE
	207
	208
	209	/---------------------------------------------/
	210	/* Pre:
	211	* nblock > 0
	212	* eclass exists for [0 .. nblock-1]
	213	* ((uint8_t*)eclass) [0 .. nblock-1] holds block
	214	* ptr exists for [0 .. nblock-1]
	215	*
	216	* Post:
	217	* ((uint8_t*)eclass) [0 .. nblock-1] holds block
	218	* All other areas of eclass destroyed
	219	* fmap [0 .. nblock-1] holds sorted order
	220	* bhtab[0 .. 2+(nblock/32)] destroyed
	221	*/
	222
	223	#define SET_BH(zz) bhtab[(zz) >> 5] \|= (1 << ((zz) & 31))
	224	#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
	225	#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
	226	#define WORD_BH(zz) bhtab[(zz) >> 5]
	227	#define UNALIGNED_BH(zz) ((zz) & 0x01f)
	228
	229	static
	230	void fallbackSort(uint32_t* fmap,
	231	uint32_t* eclass,
	232	uint32_t* bhtab,
	233	int32_t nblock)
	234	{
	235	int32_t ftab[257];
	236	int32_t ftabCopy[256];
	237	int32_t H, i, j, k, l, r, cc, cc1;
	238	int32_t nNotDone;
	239	int32_t nBhtab;
	240	uint8_t* eclass8 = (uint8_t*)eclass;
	241
	242	/*
	243	* Initial 1-char radix sort to generate
	244	* initial fmap and initial BH bits.
	245	*/
	246	for (i = 0; i < 257; i++) ftab[i] = 0;
	247	for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
	248	for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
	249
	250	j = ftab[0]; /* bbox: optimized */
	251	for (i = 1; i < 257; i++) {
	252	j += ftab[i];
	253	ftab[i] = j;
	254	}
	255
	256	for (i = 0; i < nblock; i++) {
	257	j = eclass8[i];
	258	k = ftab[j] - 1;
	259	ftab[j] = k;
	260	fmap[k] = i;
	261	}
	262
	263	nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
	264	for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
	265	for (i = 0; i < 256; i++) SET_BH(ftab[i]);
	266
	267	/*
	268	* Inductively refine the buckets. Kind-of an
	269	* "exponential radix sort" (!), inspired by the
	270	* Manber-Myers suffix array construction algorithm.
	271	*/
	272
	273	/-- set sentinel bits for block-end detection --/
	274	for (i = 0; i < 32; i++) {
	275	SET_BH(nblock + 2*i);
	276	CLEAR_BH(nblock + 2*i + 1);
	277	}
	278
	279	/-- the log(N) loop --/
	280	H = 1;
	281	while (1) {
	282	j = 0;
	283	for (i = 0; i < nblock; i++) {
	284	if (ISSET_BH(i))
	285	j = i;
	286	k = fmap[i] - H;
	287	if (k < 0)
	288	k += nblock;
	289	eclass[k] = j;
	290	}
	291
	292	nNotDone = 0;
	293	r = -1;
	294	while (1) {
	295
	296	/-- find the next non-singleton bucket --/
	297	k = r + 1;
	298	while (ISSET_BH(k) && UNALIGNED_BH(k))
	299	k++;
	300	if (ISSET_BH(k)) {
	301	while (WORD_BH(k) == 0xffffffff) k += 32;
	302	while (ISSET_BH(k)) k++;
	303	}
	304	l = k - 1;
	305	if (l >= nblock)
	306	break;
	307	while (!ISSET_BH(k) && UNALIGNED_BH(k))
	308	k++;
	309	if (!ISSET_BH(k)) {
	310	while (WORD_BH(k) == 0x00000000) k += 32;
	311	while (!ISSET_BH(k)) k++;
	312	}
	313	r = k - 1;
	314	if (r >= nblock)
	315	break;
	316
	317	/-- now [l, r] bracket current bucket --/
	318	if (r > l) {
	319	nNotDone += (r - l + 1);
	320	fallbackQSort3(fmap, eclass, l, r);
	321
	322	/-- scan bucket and generate header bits-- /
	323	cc = -1;
	324	for (i = l; i <= r; i++) {
	325	cc1 = eclass[fmap[i]];
	326	if (cc != cc1) {
	327	SET_BH(i);
	328	cc = cc1;
	329	};
	330	}
	331	}
	332	}
	333
	334	H *= 2;
	335	if (H > nblock \|\| nNotDone == 0)
	336	break;
	337	}
	338
	339	/*
	340	* Reconstruct the original block in
	341	* eclass8 [0 .. nblock-1], since the
	342	* previous phase destroyed it.
	343	*/
	344	j = 0;
	345	for (i = 0; i < nblock; i++) {
	346	while (ftabCopy[j] == 0)
	347	j++;
	348	ftabCopy[j]--;
	349	eclass8[fmap[i]] = (uint8_t)j;
	350	}
	351	AssertH(j < 256, 1005);
	352	}
	353
	354	#undef SET_BH
	355	#undef CLEAR_BH
	356	#undef ISSET_BH
	357	#undef WORD_BH
	358	#undef UNALIGNED_BH
	359
	360
	361	/---------------------------------------------/
	362	/--- The main, O(N^2 log(N)) sorting ---/
	363	/--- algorithm. Faster for "normal" ---/
	364	/--- non-repetitive blocks. ---/
	365	/---------------------------------------------/
	366
	367	/---------------------------------------------/
	368	static
	369	NOINLINE
	370	int mainGtU(
	371	uint32_t i1,
	372	uint32_t i2,
	373	uint8_t* block,
	374	uint16_t* quadrant,
	375	uint32_t nblock,
	376	int32_t* budget)
	377	{
	378	int32_t k;
	379	uint8_t c1, c2;
	380	uint16_t s1, s2;
	381
	382	/* Loop unrolling here is actually very useful
	383	* (generated code is much simpler),
	384	* code size increase is only 270 bytes (i386)
	385	* but speeds up compression 10% overall
	386	*/
	387
	388	#if CONFIG_BZIP2_FEATURE_SPEED >= 1
	389
	390	#define TIMES_8(code) \
	391	code; code; code; code; \
	392	code; code; code; code;
	393	#define TIMES_12(code) \
	394	code; code; code; code; \
	395	code; code; code; code; \
	396	code; code; code; code;
	397
	398	#else
	399
	400	#define TIMES_8(code) \
	401	{ \
	402	int nn = 8; \
	403	do { \
	404	code; \
	405	} while (--nn); \
	406	}
	407	#define TIMES_12(code) \
	408	{ \
	409	int nn = 12; \
	410	do { \
	411	code; \
	412	} while (--nn); \
	413	}
	414
	415	#endif
	416
	417	AssertD(i1 != i2, "mainGtU");
	418	TIMES_12(
	419	c1 = block[i1]; c2 = block[i2];
	420	if (c1 != c2) return (c1 > c2);
	421	i1++; i2++;
	422	)
	423
	424	k = nblock + 8;
	425
	426	do {
	427	TIMES_8(
	428	c1 = block[i1]; c2 = block[i2];
	429	if (c1 != c2) return (c1 > c2);
	430	s1 = quadrant[i1]; s2 = quadrant[i2];
	431	if (s1 != s2) return (s1 > s2);
	432	i1++; i2++;
	433	)
	434
	435	if (i1 >= nblock) i1 -= nblock;
	436	if (i2 >= nblock) i2 -= nblock;
	437
	438	(*budget)--;
	439	k -= 8;
	440	} while (k >= 0);
	441
	442	return False;
	443	}
	444	#undef TIMES_8
	445	#undef TIMES_12
	446
	447	/---------------------------------------------/
	448	/*
	449	* Knuth's increments seem to work better
	450	* than Incerpi-Sedgewick here. Possibly
	451	* because the number of elems to sort is
	452	* usually small, typically <= 20.
	453	*/
	454	static
	455	const int32_t incs[14] = {
	456	1, 4, 13, 40, 121, 364, 1093, 3280,
	457	9841, 29524, 88573, 265720,
	458	797161, 2391484
	459	};
	460
	461	static
	462	void mainSimpleSort(uint32_t* ptr,
	463	uint8_t* block,
	464	uint16_t* quadrant,
	465	int32_t nblock,
	466	int32_t lo,
	467	int32_t hi,
	468	int32_t d,
	469	int32_t* budget)
	470	{
	471	int32_t i, j, h, bigN, hp;
	472	uint32_t v;
	473
	474	bigN = hi - lo + 1;
	475	if (bigN < 2) return;
	476
	477	hp = 0;
	478	while (incs[hp] < bigN) hp++;
	479	hp--;
	480
	481	for (; hp >= 0; hp--) {
	482	h = incs[hp];
	483
	484	i = lo + h;
	485	while (1) {
	486	/-- copy 1 --/
	487	if (i > hi) break;
	488	v = ptr[i];
	489	j = i;
	490	while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
	491	ptr[j] = ptr[j-h];
	492	j = j - h;
	493	if (j <= (lo + h - 1)) break;
	494	}
	495	ptr[j] = v;
	496	i++;
	497
	498	/* 1.5% overall speedup, +290 bytes */
	499	#if CONFIG_BZIP2_FEATURE_SPEED >= 3
	500	/-- copy 2 --/
	501	if (i > hi) break;
	502	v = ptr[i];
	503	j = i;
	504	while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
	505	ptr[j] = ptr[j-h];
	506	j = j - h;
	507	if (j <= (lo + h - 1)) break;
	508	}
	509	ptr[j] = v;
	510	i++;
	511
	512	/-- copy 3 --/
	513	if (i > hi) break;
	514	v = ptr[i];
	515	j = i;
	516	while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
	517	ptr[j] = ptr[j-h];
	518	j = j - h;
	519	if (j <= (lo + h - 1)) break;
	520	}
	521	ptr[j] = v;
	522	i++;
	523	#endif
	524	if (*budget < 0) return;
	525	}
	526	}
	527	}
	528
	529
	530	/---------------------------------------------/
	531	/*
	532	* The following is an implementation of
	533	* an elegant 3-way quicksort for strings,
	534	* described in a paper "Fast Algorithms for
	535	* Sorting and Searching Strings", by Robert
	536	* Sedgewick and Jon L. Bentley.
	537	*/
	538
	539	static
	540	ALWAYS_INLINE
	541	uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c)
	542	{
	543	uint8_t t;
	544	if (a > b) {
	545	t = a;
	546	a = b;
	547	b = t;
	548	};
	549	/* here b >= a */
	550	if (b > c) {
	551	b = c;
	552	if (a > b)
	553	b = a;
	554	}
	555	return b;
	556	}
	557
	558	#define mpush(lz,hz,dz) \
	559	{ \
	560	stackLo[sp] = lz; \
	561	stackHi[sp] = hz; \
	562	stackD [sp] = dz; \
	563	sp++; \
	564	}
	565
	566	#define mpop(lz,hz,dz) \
	567	{ \
	568	sp--; \
	569	lz = stackLo[sp]; \
	570	hz = stackHi[sp]; \
	571	dz = stackD [sp]; \
	572	}
	573
	574	#define mnextsize(az) (nextHi[az] - nextLo[az])
	575
	576	#define mnextswap(az,bz) \
	577	{ \
	578	int32_t tz; \
	579	tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
	580	tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
	581	tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \
	582	}
	583
	584	#define MAIN_QSORT_SMALL_THRESH 20
	585	#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
	586	#define MAIN_QSORT_STACK_SIZE 100
	587
	588	static NOINLINE
	589	void mainQSort3(uint32_t* ptr,
	590	uint8_t* block,
	591	uint16_t* quadrant,
	592	int32_t nblock,
	593	int32_t loSt,
	594	int32_t hiSt,
	595	int32_t dSt,
	596	int32_t* budget)
	597	{
	598	int32_t unLo, unHi, ltLo, gtHi, n, m, med;
	599	int32_t sp, lo, hi, d;
	600
	601	int32_t stackLo[MAIN_QSORT_STACK_SIZE];
	602	int32_t stackHi[MAIN_QSORT_STACK_SIZE];
	603	int32_t stackD [MAIN_QSORT_STACK_SIZE];
	604
	605	int32_t nextLo[3];
	606	int32_t nextHi[3];
	607	int32_t nextD [3];
	608
	609	sp = 0;
	610	mpush(loSt, hiSt, dSt);
	611
	612	while (sp > 0) {
	613	AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
	614
	615	mpop(lo, hi, d);
	616	if (hi - lo < MAIN_QSORT_SMALL_THRESH
	617	\|\| d > MAIN_QSORT_DEPTH_THRESH
	618	) {
	619	mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
	620	if (*budget < 0)
	621	return;
	622	continue;
	623	}
	624	med = (int32_t) mmed3(block[ptr[lo ] + d],
	625	block[ptr[hi ] + d],
	626	block[ptr[(lo+hi) >> 1] + d]);
	627
	628	unLo = ltLo = lo;
	629	unHi = gtHi = hi;
	630
	631	while (1) {
	632	while (1) {
	633	if (unLo > unHi)
	634	break;
	635	n = ((int32_t)block[ptr[unLo]+d]) - med;
	636	if (n == 0) {
	637	mswap(ptr[unLo], ptr[ltLo]);
	638	ltLo++;
	639	unLo++;
	640	continue;
	641	};
	642	if (n > 0) break;
	643	unLo++;
	644	}
	645	while (1) {
	646	if (unLo > unHi)
	647	break;
	648	n = ((int32_t)block[ptr[unHi]+d]) - med;
	649	if (n == 0) {
	650	mswap(ptr[unHi], ptr[gtHi]);
	651	gtHi--;
	652	unHi--;
	653	continue;
	654	};
	655	if (n < 0) break;
	656	unHi--;
	657	}
	658	if (unLo > unHi)
	659	break;
	660	mswap(ptr[unLo], ptr[unHi]);
	661	unLo++;
	662	unHi--;
	663	}
	664
	665	AssertD(unHi == unLo-1, "mainQSort3(2)");
	666
	667	if (gtHi < ltLo) {
	668	mpush(lo, hi, d + 1);
	669	continue;
	670	}
	671
	672	n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n);
	673	m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m);
	674
	675	n = lo + unLo - ltLo - 1;
	676	m = hi - (gtHi - unHi) + 1;
	677
	678	nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
	679	nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
	680	nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
	681
	682	if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
	683	if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2);
	684	if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
	685
	686	AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
	687	AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
	688
	689	mpush(nextLo[0], nextHi[0], nextD[0]);
	690	mpush(nextLo[1], nextHi[1], nextD[1]);
	691	mpush(nextLo[2], nextHi[2], nextD[2]);
	692	}
	693	}
	694
	695	#undef mpush
	696	#undef mpop
	697	#undef mnextsize
	698	#undef mnextswap
	699	#undef MAIN_QSORT_SMALL_THRESH
	700	#undef MAIN_QSORT_DEPTH_THRESH
	701	#undef MAIN_QSORT_STACK_SIZE
	702
	703
	704	/---------------------------------------------/
	705	/* Pre:
	706	* nblock > N_OVERSHOOT
	707	* block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
	708	* ((uint8_t*)block32) [0 .. nblock-1] holds block
	709	* ptr exists for [0 .. nblock-1]
	710	*
	711	* Post:
	712	* ((uint8_t*)block32) [0 .. nblock-1] holds block
	713	* All other areas of block32 destroyed
	714	* ftab[0 .. 65536] destroyed
	715	* ptr [0 .. nblock-1] holds sorted order
	716	* if (*budget < 0), sorting was abandoned
	717	*/
	718
	719	#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
	720	#define SETMASK (1 << 21)
	721	#define CLEARMASK (~(SETMASK))
	722
	723	static NOINLINE
	724	void mainSort(EState* state,
	725	uint32_t* ptr,
	726	uint8_t* block,
	727	uint16_t* quadrant,
	728	uint32_t* ftab,
	729	int32_t nblock,
	730	int32_t* budget)
	731	{
	732	int32_t i, j, k, ss, sb;
	733	uint8_t c1;
	734	int32_t numQSorted;
	735	uint16_t s;
	736	Bool bigDone[256];
	737	/* bbox: moved to EState to save stack
	738	int32_t runningOrder[256];
	739	int32_t copyStart[256];
	740	int32_t copyEnd [256];
	741	*/
	742	#define runningOrder (state->mainSort__runningOrder)
	743	#define copyStart (state->mainSort__copyStart)
	744	#define copyEnd (state->mainSort__copyEnd)
	745
	746	/-- set up the 2-byte frequency table --/
	747	/* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
	748	memset(ftab, 0, 65537 * sizeof(ftab[0]));
	749
	750	j = block[0] << 8;
	751	i = nblock - 1;
	752	/* 3%, +300 bytes */
	753	#if CONFIG_BZIP2_FEATURE_SPEED >= 2
	754	for (; i >= 3; i -= 4) {
	755	quadrant[i] = 0;
	756	j = (j >> 8) \| (((uint16_t)block[i]) << 8);
	757	ftab[j]++;
	758	quadrant[i-1] = 0;
	759	j = (j >> 8) \| (((uint16_t)block[i-1]) << 8);
	760	ftab[j]++;
	761	quadrant[i-2] = 0;
	762	j = (j >> 8) \| (((uint16_t)block[i-2]) << 8);
	763	ftab[j]++;
	764	quadrant[i-3] = 0;
	765	j = (j >> 8) \| (((uint16_t)block[i-3]) << 8);
	766	ftab[j]++;
	767	}
	768	#endif
	769	for (; i >= 0; i--) {
	770	quadrant[i] = 0;
	771	j = (j >> 8) \| (((uint16_t)block[i]) << 8);
	772	ftab[j]++;
	773	}
	774
	775	/-- (emphasises close relationship of block & quadrant) --/
	776	for (i = 0; i < BZ_N_OVERSHOOT; i++) {
	777	block [nblock+i] = block[i];
	778	quadrant[nblock+i] = 0;
	779	}
	780
	781	/-- Complete the initial radix sort --/
	782	j = ftab[0]; /* bbox: optimized */
	783	for (i = 1; i <= 65536; i++) {
	784	j += ftab[i];
	785	ftab[i] = j;
	786	}
	787
	788	s = block[0] << 8;
	789	i = nblock - 1;
	790	#if CONFIG_BZIP2_FEATURE_SPEED >= 2
	791	for (; i >= 3; i -= 4) {
	792	s = (s >> 8) \| (block[i] << 8);
	793	j = ftab[s] - 1;
	794	ftab[s] = j;
	795	ptr[j] = i;
	796	s = (s >> 8) \| (block[i-1] << 8);
	797	j = ftab[s] - 1;
	798	ftab[s] = j;
	799	ptr[j] = i-1;
	800	s = (s >> 8) \| (block[i-2] << 8);
	801	j = ftab[s] - 1;
	802	ftab[s] = j;
	803	ptr[j] = i-2;
	804	s = (s >> 8) \| (block[i-3] << 8);
	805	j = ftab[s] - 1;
	806	ftab[s] = j;
	807	ptr[j] = i-3;
	808	}
	809	#endif
	810	for (; i >= 0; i--) {
	811	s = (s >> 8) \| (block[i] << 8);
	812	j = ftab[s] - 1;
	813	ftab[s] = j;
	814	ptr[j] = i;
	815	}
	816
	817	/*
	818	* Now ftab contains the first loc of every small bucket.
	819	* Calculate the running order, from smallest to largest
	820	* big bucket.
	821	*/
	822	for (i = 0; i <= 255; i++) {
	823	bigDone [i] = False;
	824	runningOrder[i] = i;
	825	}
	826
	827	{
	828	int32_t vv;
	829	/* bbox: was: int32_t h = 1; */
	830	/* do h = 3 * h + 1; while (h <= 256); */
	831	uint32_t h = 364;
	832
	833	do {
	834	/h = h / 3;/
	835	h = (h * 171) >> 9; /* bbox: fast h/3 */
	836	for (i = h; i <= 255; i++) {
	837	vv = runningOrder[i];
	838	j = i;
	839	while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
	840	runningOrder[j] = runningOrder[j-h];
	841	j = j - h;
	842	if (j <= (h - 1))
	843	goto zero;
	844	}
	845	zero:
	846	runningOrder[j] = vv;
	847	}
	848	} while (h != 1);
	849	}
	850
	851	/*
	852	* The main sorting loop.
	853	*/
	854
	855	numQSorted = 0;
	856
	857	for (i = 0; i <= 255; i++) {
	858
	859	/*
	860	* Process big buckets, starting with the least full.
	861	* Basically this is a 3-step process in which we call
	862	* mainQSort3 to sort the small buckets [ss, j], but
	863	* also make a big effort to avoid the calls if we can.
	864	*/
	865	ss = runningOrder[i];
	866
	867	/*
	868	* Step 1:
	869	* Complete the big bucket [ss] by quicksorting
	870	* any unsorted small buckets [ss, j], for j != ss.
	871	* Hopefully previous pointer-scanning phases have already
	872	* completed many of the small buckets [ss, j], so
	873	* we don't have to sort them at all.
	874	*/
	875	for (j = 0; j <= 255; j++) {
	876	if (j != ss) {
	877	sb = (ss << 8) + j;
	878	if (!(ftab[sb] & SETMASK)) {
	879	int32_t lo = ftab[sb] & CLEARMASK;
	880	int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
	881	if (hi > lo) {
	882	mainQSort3(
	883	ptr, block, quadrant, nblock,
	884	lo, hi, BZ_N_RADIX, budget
	885	);
	886	if (*budget < 0) return;
	887	numQSorted += (hi - lo + 1);
	888	}
	889	}
	890	ftab[sb] \|= SETMASK;
	891	}
	892	}
	893
	894	AssertH(!bigDone[ss], 1006);
	895
	896	/*
	897	* Step 2:
	898	* Now scan this big bucket [ss] so as to synthesise the
	899	* sorted order for small buckets [t, ss] for all t,
	900	* including, magically, the bucket [ss,ss] too.
	901	* This will avoid doing Real Work in subsequent Step 1's.
	902	*/
	903	{
	904	for (j = 0; j <= 255; j++) {
	905	copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
	906	copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
	907	}
	908	for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
	909	k = ptr[j] - 1;
	910	if (k < 0)
	911	k += nblock;
	912	c1 = block[k];
	913	if (!bigDone[c1])
	914	ptr[copyStart[c1]++] = k;
	915	}
	916	for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
	917	k = ptr[j]-1;
	918	if (k < 0)
	919	k += nblock;
	920	c1 = block[k];
	921	if (!bigDone[c1])
	922	ptr[copyEnd[c1]--] = k;
	923	}
	924	}
	925
	926	/* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
	927	* Necessity for this case is demonstrated by compressing
	928	* a sequence of approximately 48.5 million of character
	929	* 251; 1.0.0/1.0.1 will then die here. */
	930	AssertH((copyStart[ss]-1 == copyEnd[ss]) \
	931	\|\| (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007);
	932
	933	for (j = 0; j <= 255; j++)
	934	ftab[(j << 8) + ss] \|= SETMASK;
	935
	936	/*
	937	* Step 3:
	938	* The [ss] big bucket is now done. Record this fact,
	939	* and update the quadrant descriptors. Remember to
	940	* update quadrants in the overshoot area too, if
	941	* necessary. The "if (i < 255)" test merely skips
	942	* this updating for the last bucket processed, since
	943	* updating for the last bucket is pointless.
	944	*
	945	* The quadrant array provides a way to incrementally
	946	* cache sort orderings, as they appear, so as to
	947	* make subsequent comparisons in fullGtU() complete
	948	* faster. For repetitive blocks this makes a big
	949	* difference (but not big enough to be able to avoid
	950	* the fallback sorting mechanism, exponential radix sort).
	951	*
	952	* The precise meaning is: at all times:
	953	*
	954	* for 0 <= i < nblock and 0 <= j <= nblock
	955	*
	956	* if block[i] != block[j],
	957	*
	958	* then the relative values of quadrant[i] and
	959	* quadrant[j] are meaningless.
	960	*
	961	* else {
	962	* if quadrant[i] < quadrant[j]
	963	* then the string starting at i lexicographically
	964	* precedes the string starting at j
	965	*
	966	* else if quadrant[i] > quadrant[j]
	967	* then the string starting at j lexicographically
	968	* precedes the string starting at i
	969	*
	970	* else
	971	* the relative ordering of the strings starting
	972	* at i and j has not yet been determined.
	973	* }
	974	*/
	975	bigDone[ss] = True;
	976
	977	if (i < 255) {
	978	int32_t bbStart = ftab[ss << 8] & CLEARMASK;
	979	int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
	980	int32_t shifts = 0;
	981
	982	while ((bbSize >> shifts) > 65534) shifts++;
	983
	984	for (j = bbSize-1; j >= 0; j--) {
	985	int32_t a2update = ptr[bbStart + j];
	986	uint16_t qVal = (uint16_t)(j >> shifts);
	987	quadrant[a2update] = qVal;
	988	if (a2update < BZ_N_OVERSHOOT)
	989	quadrant[a2update + nblock] = qVal;
	990	}
	991	AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
	992	}
	993	}
	994	#undef runningOrder
	995	#undef copyStart
	996	#undef copyEnd
	997	}
	998
	999	#undef BIGFREQ
	1000	#undef SETMASK
	1001	#undef CLEARMASK
	1002
	1003
	1004	/---------------------------------------------/
	1005	/* Pre:
	1006	* nblock > 0
	1007	* arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
	1008	* ((uint8_t*)arr2)[0 .. nblock-1] holds block
	1009	* arr1 exists for [0 .. nblock-1]
	1010	*
	1011	* Post:
	1012	* ((uint8_t*)arr2) [0 .. nblock-1] holds block
	1013	* All other areas of block destroyed
	1014	* ftab[0 .. 65536] destroyed
	1015	* arr1[0 .. nblock-1] holds sorted order
	1016	*/
	1017	static NOINLINE
	1018	void BZ2_blockSort(EState* s)
	1019	{
	1020	/* In original bzip2 1.0.4, it's a parameter, but 30
	1021	* (which was the default) should work ok. */
	1022	enum { wfact = 30 };
	1023
	1024	uint32_t* ptr = s->ptr;
	1025	uint8_t* block = s->block;
	1026	uint32_t* ftab = s->ftab;
	1027	int32_t nblock = s->nblock;
	1028	uint16_t* quadrant;
	1029	int32_t budget;
	1030	int32_t i;
	1031
	1032	if (nblock < 10000) {
	1033	fallbackSort(s->arr1, s->arr2, ftab, nblock);
	1034	} else {
	1035	/* Calculate the location for quadrant, remembering to get
	1036	* the alignment right. Assumes that &(block[0]) is at least
	1037	* 2-byte aligned -- this should be ok since block is really
	1038	* the first section of arr2.
	1039	*/
	1040	i = nblock + BZ_N_OVERSHOOT;
	1041	if (i & 1) i++;
	1042	quadrant = (uint16_t*)(&(block[i]));
	1043
	1044	/* (wfact-1) / 3 puts the default-factor-30
	1045	* transition point at very roughly the same place as
	1046	* with v0.1 and v0.9.0.
	1047	* Not that it particularly matters any more, since the
	1048	* resulting compressed stream is now the same regardless
	1049	* of whether or not we use the main sort or fallback sort.
	1050	*/
	1051	budget = nblock * ((wfact-1) / 3);
	1052
	1053	mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
	1054	if (budget < 0) {
	1055	fallbackSort(s->arr1, s->arr2, ftab, nblock);
	1056	}
	1057	}
	1058
	1059	s->origPtr = -1;
	1060	for (i = 0; i < s->nblock; i++)
	1061	if (ptr[i] == 0) {
	1062	s->origPtr = i;
	1063	break;
	1064	};
	1065
	1066	AssertH(s->origPtr != -1, 1003);
	1067	}
	1068
	1069
	1070	/-------------------------------------------------------------/
	1071	/--- end blocksort.c ---/
	1072	/-------------------------------------------------------------/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: