Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

blocksort.c@ 3863

Visit:

Last change on this file since 3863 was 3320, checked in by Bruno Cornec, 10 years ago
Re-add (thanks git BTW) the 2.2.9 branch which had been destroyed in the move to 3.0
Property svn:eol-style set to `native`
File size: 23.7 KB

Line
1	/*
2	* bzip2 is written by Julian Seward <jseward@bzip.org>.
3	* Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4	* See README and LICENSE files in this directory for more information.
5	*/
6
7	/-------------------------------------------------------------/
8	/--- Block sorting machinery ---/
9	/--- blocksort.c ---/
10	/-------------------------------------------------------------/
11
12	/* ------------------------------------------------------------------
13	This file is part of bzip2/libbzip2, a program and library for
14	lossless, block-sorting data compression.
15
16	bzip2/libbzip2 version 1.0.4 of 20 December 2006
17	Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19	Please read the WARNING, DISCLAIMER and PATENTS sections in the
20	README file.
21
22	This program is released under the terms of the license contained
23	in the file LICENSE.
24	------------------------------------------------------------------ */
25
26	/* #include "bzlib_private.h" */
27
28	#define mswap(zz1, zz2) \
29	{ \
30	int32_t zztmp = zz1; \
31	zz1 = zz2; \
32	zz2 = zztmp; \
33	}
34
35	static
36	/* No measurable speed gain with inlining */
37	/* ALWAYS_INLINE */
38	void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn)
39	{
40	while (zzn > 0) {
41	mswap(ptr[zzp1], ptr[zzp2]);
42	zzp1++;
43	zzp2++;
44	zzn--;
45	}
46	}
47
48	static
49	ALWAYS_INLINE
50	int32_t mmin(int32_t a, int32_t b)
51	{
52	return (a < b) ? a : b;
53	}
54
55
56	/---------------------------------------------/
57	/--- Fallback O(N log(N)^2) sorting ---/
58	/--- algorithm, for repetitive blocks ---/
59	/---------------------------------------------/
60
61	/---------------------------------------------/
62	static
63	inline
64	void fallbackSimpleSort(uint32_t* fmap,
65	uint32_t* eclass,
66	int32_t lo,
67	int32_t hi)
68	{
69	int32_t i, j, tmp;
70	uint32_t ec_tmp;
71
72	if (lo == hi) return;
73
74	if (hi - lo > 3) {
75	for (i = hi-4; i >= lo; i--) {
76	tmp = fmap[i];
77	ec_tmp = eclass[tmp];
78	for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
79	fmap[j-4] = fmap[j];
80	fmap[j-4] = tmp;
81	}
82	}
83
84	for (i = hi-1; i >= lo; i--) {
85	tmp = fmap[i];
86	ec_tmp = eclass[tmp];
87	for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
88	fmap[j-1] = fmap[j];
89	fmap[j-1] = tmp;
90	}
91	}
92
93
94	/---------------------------------------------/
95	#define fpush(lz,hz) { \
96	stackLo[sp] = lz; \
97	stackHi[sp] = hz; \
98	sp++; \
99	}
100
101	#define fpop(lz,hz) { \
102	sp--; \
103	lz = stackLo[sp]; \
104	hz = stackHi[sp]; \
105	}
106
107	#define FALLBACK_QSORT_SMALL_THRESH 10
108	#define FALLBACK_QSORT_STACK_SIZE 100
109
110	static
111	void fallbackQSort3(uint32_t* fmap,
112	uint32_t* eclass,
113	int32_t loSt,
114	int32_t hiSt)
115	{
116	int32_t unLo, unHi, ltLo, gtHi, n, m;
117	int32_t sp, lo, hi;
118	uint32_t med, r, r3;
119	int32_t stackLo[FALLBACK_QSORT_STACK_SIZE];
120	int32_t stackHi[FALLBACK_QSORT_STACK_SIZE];
121
122	r = 0;
123
124	sp = 0;
125	fpush(loSt, hiSt);
126
127	while (sp > 0) {
128	AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
129
130	fpop(lo, hi);
131	if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
132	fallbackSimpleSort(fmap, eclass, lo, hi);
133	continue;
134	}
135
136	/* Random partitioning. Median of 3 sometimes fails to
137	* avoid bad cases. Median of 9 seems to help but
138	* looks rather expensive. This too seems to work but
139	* is cheaper. Guidance for the magic constants
140	* 7621 and 32768 is taken from Sedgewick's algorithms
141	* book, chapter 35.
142	*/
143	r = ((r * 7621) + 1) % 32768;
144	r3 = r % 3;
145	if (r3 == 0)
146	med = eclass[fmap[lo]];
147	else if (r3 == 1)
148	med = eclass[fmap[(lo+hi)>>1]];
149	else
150	med = eclass[fmap[hi]];
151
152	unLo = ltLo = lo;
153	unHi = gtHi = hi;
154
155	while (1) {
156	while (1) {
157	if (unLo > unHi) break;
158	n = (int32_t)eclass[fmap[unLo]] - (int32_t)med;
159	if (n == 0) {
160	mswap(fmap[unLo], fmap[ltLo]);
161	ltLo++;
162	unLo++;
163	continue;
164	};
165	if (n > 0) break;
166	unLo++;
167	}
168	while (1) {
169	if (unLo > unHi) break;
170	n = (int32_t)eclass[fmap[unHi]] - (int32_t)med;
171	if (n == 0) {
172	mswap(fmap[unHi], fmap[gtHi]);
173	gtHi--; unHi--;
174	continue;
175	};
176	if (n < 0) break;
177	unHi--;
178	}
179	if (unLo > unHi) break;
180	mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
181	}
182
183	AssertD(unHi == unLo-1, "fallbackQSort3(2)");
184
185	if (gtHi < ltLo) continue;
186
187	n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n);
188	m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m);
189
190	n = lo + unLo - ltLo - 1;
191	m = hi - (gtHi - unHi) + 1;
192
193	if (n - lo > hi - m) {
194	fpush(lo, n);
195	fpush(m, hi);
196	} else {
197	fpush(m, hi);
198	fpush(lo, n);
199	}
200	}
201	}
202
203	#undef fpush
204	#undef fpop
205	#undef FALLBACK_QSORT_SMALL_THRESH
206	#undef FALLBACK_QSORT_STACK_SIZE
207
208
209	/---------------------------------------------/
210	/* Pre:
211	* nblock > 0
212	* eclass exists for [0 .. nblock-1]
213	* ((uint8_t*)eclass) [0 .. nblock-1] holds block
214	* ptr exists for [0 .. nblock-1]
215	*
216	* Post:
217	* ((uint8_t*)eclass) [0 .. nblock-1] holds block
218	* All other areas of eclass destroyed
219	* fmap [0 .. nblock-1] holds sorted order
220	* bhtab[0 .. 2+(nblock/32)] destroyed
221	*/
222
223	#define SET_BH(zz) bhtab[(zz) >> 5] \|= (1 << ((zz) & 31))
224	#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
225	#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
226	#define WORD_BH(zz) bhtab[(zz) >> 5]
227	#define UNALIGNED_BH(zz) ((zz) & 0x01f)
228
229	static
230	void fallbackSort(uint32_t* fmap,
231	uint32_t* eclass,
232	uint32_t* bhtab,
233	int32_t nblock)
234	{
235	int32_t ftab[257];
236	int32_t ftabCopy[256];
237	int32_t H, i, j, k, l, r, cc, cc1;
238	int32_t nNotDone;
239	int32_t nBhtab;
240	uint8_t* eclass8 = (uint8_t*)eclass;
241
242	/*
243	* Initial 1-char radix sort to generate
244	* initial fmap and initial BH bits.
245	*/
246	for (i = 0; i < 257; i++) ftab[i] = 0;
247	for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
248	for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
249
250	j = ftab[0]; /* bbox: optimized */
251	for (i = 1; i < 257; i++) {
252	j += ftab[i];
253	ftab[i] = j;
254	}
255
256	for (i = 0; i < nblock; i++) {
257	j = eclass8[i];
258	k = ftab[j] - 1;
259	ftab[j] = k;
260	fmap[k] = i;
261	}
262
263	nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
264	for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
265	for (i = 0; i < 256; i++) SET_BH(ftab[i]);
266
267	/*
268	* Inductively refine the buckets. Kind-of an
269	* "exponential radix sort" (!), inspired by the
270	* Manber-Myers suffix array construction algorithm.
271	*/
272
273	/-- set sentinel bits for block-end detection --/
274	for (i = 0; i < 32; i++) {
275	SET_BH(nblock + 2*i);
276	CLEAR_BH(nblock + 2*i + 1);
277	}
278
279	/-- the log(N) loop --/
280	H = 1;
281	while (1) {
282	j = 0;
283	for (i = 0; i < nblock; i++) {
284	if (ISSET_BH(i))
285	j = i;
286	k = fmap[i] - H;
287	if (k < 0)
288	k += nblock;
289	eclass[k] = j;
290	}
291
292	nNotDone = 0;
293	r = -1;
294	while (1) {
295
296	/-- find the next non-singleton bucket --/
297	k = r + 1;
298	while (ISSET_BH(k) && UNALIGNED_BH(k))
299	k++;
300	if (ISSET_BH(k)) {
301	while (WORD_BH(k) == 0xffffffff) k += 32;
302	while (ISSET_BH(k)) k++;
303	}
304	l = k - 1;
305	if (l >= nblock)
306	break;
307	while (!ISSET_BH(k) && UNALIGNED_BH(k))
308	k++;
309	if (!ISSET_BH(k)) {
310	while (WORD_BH(k) == 0x00000000) k += 32;
311	while (!ISSET_BH(k)) k++;
312	}
313	r = k - 1;
314	if (r >= nblock)
315	break;
316
317	/-- now [l, r] bracket current bucket --/
318	if (r > l) {
319	nNotDone += (r - l + 1);
320	fallbackQSort3(fmap, eclass, l, r);
321
322	/-- scan bucket and generate header bits-- /
323	cc = -1;
324	for (i = l; i <= r; i++) {
325	cc1 = eclass[fmap[i]];
326	if (cc != cc1) {
327	SET_BH(i);
328	cc = cc1;
329	};
330	}
331	}
332	}
333
334	H *= 2;
335	if (H > nblock \|\| nNotDone == 0)
336	break;
337	}
338
339	/*
340	* Reconstruct the original block in
341	* eclass8 [0 .. nblock-1], since the
342	* previous phase destroyed it.
343	*/
344	j = 0;
345	for (i = 0; i < nblock; i++) {
346	while (ftabCopy[j] == 0)
347	j++;
348	ftabCopy[j]--;
349	eclass8[fmap[i]] = (uint8_t)j;
350	}
351	AssertH(j < 256, 1005);
352	}
353
354	#undef SET_BH
355	#undef CLEAR_BH
356	#undef ISSET_BH
357	#undef WORD_BH
358	#undef UNALIGNED_BH
359
360
361	/---------------------------------------------/
362	/--- The main, O(N^2 log(N)) sorting ---/
363	/--- algorithm. Faster for "normal" ---/
364	/--- non-repetitive blocks. ---/
365	/---------------------------------------------/
366
367	/---------------------------------------------/
368	static
369	NOINLINE
370	int mainGtU(
371	uint32_t i1,
372	uint32_t i2,
373	uint8_t* block,
374	uint16_t* quadrant,
375	uint32_t nblock,
376	int32_t* budget)
377	{
378	int32_t k;
379	uint8_t c1, c2;
380	uint16_t s1, s2;
381
382	/* Loop unrolling here is actually very useful
383	* (generated code is much simpler),
384	* code size increase is only 270 bytes (i386)
385	* but speeds up compression 10% overall
386	*/
387
388	#if CONFIG_BZIP2_FEATURE_SPEED >= 1
389
390	#define TIMES_8(code) \
391	code; code; code; code; \
392	code; code; code; code;
393	#define TIMES_12(code) \
394	code; code; code; code; \
395	code; code; code; code; \
396	code; code; code; code;
397
398	#else
399
400	#define TIMES_8(code) \
401	{ \
402	int nn = 8; \
403	do { \
404	code; \
405	} while (--nn); \
406	}
407	#define TIMES_12(code) \
408	{ \
409	int nn = 12; \
410	do { \
411	code; \
412	} while (--nn); \
413	}
414
415	#endif
416
417	AssertD(i1 != i2, "mainGtU");
418	TIMES_12(
419	c1 = block[i1]; c2 = block[i2];
420	if (c1 != c2) return (c1 > c2);
421	i1++; i2++;
422	)
423
424	k = nblock + 8;
425
426	do {
427	TIMES_8(
428	c1 = block[i1]; c2 = block[i2];
429	if (c1 != c2) return (c1 > c2);
430	s1 = quadrant[i1]; s2 = quadrant[i2];
431	if (s1 != s2) return (s1 > s2);
432	i1++; i2++;
433	)
434
435	if (i1 >= nblock) i1 -= nblock;
436	if (i2 >= nblock) i2 -= nblock;
437
438	(*budget)--;
439	k -= 8;
440	} while (k >= 0);
441
442	return False;
443	}
444	#undef TIMES_8
445	#undef TIMES_12
446
447	/---------------------------------------------/
448	/*
449	* Knuth's increments seem to work better
450	* than Incerpi-Sedgewick here. Possibly
451	* because the number of elems to sort is
452	* usually small, typically <= 20.
453	*/
454	static
455	const int32_t incs[14] = {
456	1, 4, 13, 40, 121, 364, 1093, 3280,
457	9841, 29524, 88573, 265720,
458	797161, 2391484
459	};
460
461	static
462	void mainSimpleSort(uint32_t* ptr,
463	uint8_t* block,
464	uint16_t* quadrant,
465	int32_t nblock,
466	int32_t lo,
467	int32_t hi,
468	int32_t d,
469	int32_t* budget)
470	{
471	int32_t i, j, h, bigN, hp;
472	uint32_t v;
473
474	bigN = hi - lo + 1;
475	if (bigN < 2) return;
476
477	hp = 0;
478	while (incs[hp] < bigN) hp++;
479	hp--;
480
481	for (; hp >= 0; hp--) {
482	h = incs[hp];
483
484	i = lo + h;
485	while (1) {
486	/-- copy 1 --/
487	if (i > hi) break;
488	v = ptr[i];
489	j = i;
490	while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
491	ptr[j] = ptr[j-h];
492	j = j - h;
493	if (j <= (lo + h - 1)) break;
494	}
495	ptr[j] = v;
496	i++;
497
498	/* 1.5% overall speedup, +290 bytes */
499	#if CONFIG_BZIP2_FEATURE_SPEED >= 3
500	/-- copy 2 --/
501	if (i > hi) break;
502	v = ptr[i];
503	j = i;
504	while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
505	ptr[j] = ptr[j-h];
506	j = j - h;
507	if (j <= (lo + h - 1)) break;
508	}
509	ptr[j] = v;
510	i++;
511
512	/-- copy 3 --/
513	if (i > hi) break;
514	v = ptr[i];
515	j = i;
516	while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
517	ptr[j] = ptr[j-h];
518	j = j - h;
519	if (j <= (lo + h - 1)) break;
520	}
521	ptr[j] = v;
522	i++;
523	#endif
524	if (*budget < 0) return;
525	}
526	}
527	}
528
529
530	/---------------------------------------------/
531	/*
532	* The following is an implementation of
533	* an elegant 3-way quicksort for strings,
534	* described in a paper "Fast Algorithms for
535	* Sorting and Searching Strings", by Robert
536	* Sedgewick and Jon L. Bentley.
537	*/
538
539	static
540	ALWAYS_INLINE
541	uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c)
542	{
543	uint8_t t;
544	if (a > b) {
545	t = a;
546	a = b;
547	b = t;
548	};
549	/* here b >= a */
550	if (b > c) {
551	b = c;
552	if (a > b)
553	b = a;
554	}
555	return b;
556	}
557
558	#define mpush(lz,hz,dz) \
559	{ \
560	stackLo[sp] = lz; \
561	stackHi[sp] = hz; \
562	stackD [sp] = dz; \
563	sp++; \
564	}
565
566	#define mpop(lz,hz,dz) \
567	{ \
568	sp--; \
569	lz = stackLo[sp]; \
570	hz = stackHi[sp]; \
571	dz = stackD [sp]; \
572	}
573
574	#define mnextsize(az) (nextHi[az] - nextLo[az])
575
576	#define mnextswap(az,bz) \
577	{ \
578	int32_t tz; \
579	tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
580	tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
581	tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \
582	}
583
584	#define MAIN_QSORT_SMALL_THRESH 20
585	#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
586	#define MAIN_QSORT_STACK_SIZE 100
587
588	static NOINLINE
589	void mainQSort3(uint32_t* ptr,
590	uint8_t* block,
591	uint16_t* quadrant,
592	int32_t nblock,
593	int32_t loSt,
594	int32_t hiSt,
595	int32_t dSt,
596	int32_t* budget)
597	{
598	int32_t unLo, unHi, ltLo, gtHi, n, m, med;
599	int32_t sp, lo, hi, d;
600
601	int32_t stackLo[MAIN_QSORT_STACK_SIZE];
602	int32_t stackHi[MAIN_QSORT_STACK_SIZE];
603	int32_t stackD [MAIN_QSORT_STACK_SIZE];
604
605	int32_t nextLo[3];
606	int32_t nextHi[3];
607	int32_t nextD [3];
608
609	sp = 0;
610	mpush(loSt, hiSt, dSt);
611
612	while (sp > 0) {
613	AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
614
615	mpop(lo, hi, d);
616	if (hi - lo < MAIN_QSORT_SMALL_THRESH
617	\|\| d > MAIN_QSORT_DEPTH_THRESH
618	) {
619	mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
620	if (*budget < 0)
621	return;
622	continue;
623	}
624	med = (int32_t) mmed3(block[ptr[lo ] + d],
625	block[ptr[hi ] + d],
626	block[ptr[(lo+hi) >> 1] + d]);
627
628	unLo = ltLo = lo;
629	unHi = gtHi = hi;
630
631	while (1) {
632	while (1) {
633	if (unLo > unHi)
634	break;
635	n = ((int32_t)block[ptr[unLo]+d]) - med;
636	if (n == 0) {
637	mswap(ptr[unLo], ptr[ltLo]);
638	ltLo++;
639	unLo++;
640	continue;
641	};
642	if (n > 0) break;
643	unLo++;
644	}
645	while (1) {
646	if (unLo > unHi)
647	break;
648	n = ((int32_t)block[ptr[unHi]+d]) - med;
649	if (n == 0) {
650	mswap(ptr[unHi], ptr[gtHi]);
651	gtHi--;
652	unHi--;
653	continue;
654	};
655	if (n < 0) break;
656	unHi--;
657	}
658	if (unLo > unHi)
659	break;
660	mswap(ptr[unLo], ptr[unHi]);
661	unLo++;
662	unHi--;
663	}
664
665	AssertD(unHi == unLo-1, "mainQSort3(2)");
666
667	if (gtHi < ltLo) {
668	mpush(lo, hi, d + 1);
669	continue;
670	}
671
672	n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n);
673	m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m);
674
675	n = lo + unLo - ltLo - 1;
676	m = hi - (gtHi - unHi) + 1;
677
678	nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
679	nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
680	nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
681
682	if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
683	if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2);
684	if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
685
686	AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
687	AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
688
689	mpush(nextLo[0], nextHi[0], nextD[0]);
690	mpush(nextLo[1], nextHi[1], nextD[1]);
691	mpush(nextLo[2], nextHi[2], nextD[2]);
692	}
693	}
694
695	#undef mpush
696	#undef mpop
697	#undef mnextsize
698	#undef mnextswap
699	#undef MAIN_QSORT_SMALL_THRESH
700	#undef MAIN_QSORT_DEPTH_THRESH
701	#undef MAIN_QSORT_STACK_SIZE
702
703
704	/---------------------------------------------/
705	/* Pre:
706	* nblock > N_OVERSHOOT
707	* block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
708	* ((uint8_t*)block32) [0 .. nblock-1] holds block
709	* ptr exists for [0 .. nblock-1]
710	*
711	* Post:
712	* ((uint8_t*)block32) [0 .. nblock-1] holds block
713	* All other areas of block32 destroyed
714	* ftab[0 .. 65536] destroyed
715	* ptr [0 .. nblock-1] holds sorted order
716	* if (*budget < 0), sorting was abandoned
717	*/
718
719	#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
720	#define SETMASK (1 << 21)
721	#define CLEARMASK (~(SETMASK))
722
723	static NOINLINE
724	void mainSort(EState* state,
725	uint32_t* ptr,
726	uint8_t* block,
727	uint16_t* quadrant,
728	uint32_t* ftab,
729	int32_t nblock,
730	int32_t* budget)
731	{
732	int32_t i, j, k, ss, sb;
733	uint8_t c1;
734	int32_t numQSorted;
735	uint16_t s;
736	Bool bigDone[256];
737	/* bbox: moved to EState to save stack
738	int32_t runningOrder[256];
739	int32_t copyStart[256];
740	int32_t copyEnd [256];
741	*/
742	#define runningOrder (state->mainSort__runningOrder)
743	#define copyStart (state->mainSort__copyStart)
744	#define copyEnd (state->mainSort__copyEnd)
745
746	/-- set up the 2-byte frequency table --/
747	/* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
748	memset(ftab, 0, 65537 * sizeof(ftab[0]));
749
750	j = block[0] << 8;
751	i = nblock - 1;
752	/* 3%, +300 bytes */
753	#if CONFIG_BZIP2_FEATURE_SPEED >= 2
754	for (; i >= 3; i -= 4) {
755	quadrant[i] = 0;
756	j = (j >> 8) \| (((uint16_t)block[i]) << 8);
757	ftab[j]++;
758	quadrant[i-1] = 0;
759	j = (j >> 8) \| (((uint16_t)block[i-1]) << 8);
760	ftab[j]++;
761	quadrant[i-2] = 0;
762	j = (j >> 8) \| (((uint16_t)block[i-2]) << 8);
763	ftab[j]++;
764	quadrant[i-3] = 0;
765	j = (j >> 8) \| (((uint16_t)block[i-3]) << 8);
766	ftab[j]++;
767	}
768	#endif
769	for (; i >= 0; i--) {
770	quadrant[i] = 0;
771	j = (j >> 8) \| (((uint16_t)block[i]) << 8);
772	ftab[j]++;
773	}
774
775	/-- (emphasises close relationship of block & quadrant) --/
776	for (i = 0; i < BZ_N_OVERSHOOT; i++) {
777	block [nblock+i] = block[i];
778	quadrant[nblock+i] = 0;
779	}
780
781	/-- Complete the initial radix sort --/
782	j = ftab[0]; /* bbox: optimized */
783	for (i = 1; i <= 65536; i++) {
784	j += ftab[i];
785	ftab[i] = j;
786	}
787
788	s = block[0] << 8;
789	i = nblock - 1;
790	#if CONFIG_BZIP2_FEATURE_SPEED >= 2
791	for (; i >= 3; i -= 4) {
792	s = (s >> 8) \| (block[i] << 8);
793	j = ftab[s] - 1;
794	ftab[s] = j;
795	ptr[j] = i;
796	s = (s >> 8) \| (block[i-1] << 8);
797	j = ftab[s] - 1;
798	ftab[s] = j;
799	ptr[j] = i-1;
800	s = (s >> 8) \| (block[i-2] << 8);
801	j = ftab[s] - 1;
802	ftab[s] = j;
803	ptr[j] = i-2;
804	s = (s >> 8) \| (block[i-3] << 8);
805	j = ftab[s] - 1;
806	ftab[s] = j;
807	ptr[j] = i-3;
808	}
809	#endif
810	for (; i >= 0; i--) {
811	s = (s >> 8) \| (block[i] << 8);
812	j = ftab[s] - 1;
813	ftab[s] = j;
814	ptr[j] = i;
815	}
816
817	/*
818	* Now ftab contains the first loc of every small bucket.
819	* Calculate the running order, from smallest to largest
820	* big bucket.
821	*/
822	for (i = 0; i <= 255; i++) {
823	bigDone [i] = False;
824	runningOrder[i] = i;
825	}
826
827	{
828	int32_t vv;
829	/* bbox: was: int32_t h = 1; */
830	/* do h = 3 * h + 1; while (h <= 256); */
831	uint32_t h = 364;
832
833	do {
834	/h = h / 3;/
835	h = (h * 171) >> 9; /* bbox: fast h/3 */
836	for (i = h; i <= 255; i++) {
837	vv = runningOrder[i];
838	j = i;
839	while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
840	runningOrder[j] = runningOrder[j-h];
841	j = j - h;
842	if (j <= (h - 1))
843	goto zero;
844	}
845	zero:
846	runningOrder[j] = vv;
847	}
848	} while (h != 1);
849	}
850
851	/*
852	* The main sorting loop.
853	*/
854
855	numQSorted = 0;
856
857	for (i = 0; i <= 255; i++) {
858
859	/*
860	* Process big buckets, starting with the least full.
861	* Basically this is a 3-step process in which we call
862	* mainQSort3 to sort the small buckets [ss, j], but
863	* also make a big effort to avoid the calls if we can.
864	*/
865	ss = runningOrder[i];
866
867	/*
868	* Step 1:
869	* Complete the big bucket [ss] by quicksorting
870	* any unsorted small buckets [ss, j], for j != ss.
871	* Hopefully previous pointer-scanning phases have already
872	* completed many of the small buckets [ss, j], so
873	* we don't have to sort them at all.
874	*/
875	for (j = 0; j <= 255; j++) {
876	if (j != ss) {
877	sb = (ss << 8) + j;
878	if (!(ftab[sb] & SETMASK)) {
879	int32_t lo = ftab[sb] & CLEARMASK;
880	int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
881	if (hi > lo) {
882	mainQSort3(
883	ptr, block, quadrant, nblock,
884	lo, hi, BZ_N_RADIX, budget
885	);
886	if (*budget < 0) return;
887	numQSorted += (hi - lo + 1);
888	}
889	}
890	ftab[sb] \|= SETMASK;
891	}
892	}
893
894	AssertH(!bigDone[ss], 1006);
895
896	/*
897	* Step 2:
898	* Now scan this big bucket [ss] so as to synthesise the
899	* sorted order for small buckets [t, ss] for all t,
900	* including, magically, the bucket [ss,ss] too.
901	* This will avoid doing Real Work in subsequent Step 1's.
902	*/
903	{
904	for (j = 0; j <= 255; j++) {
905	copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
906	copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
907	}
908	for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
909	k = ptr[j] - 1;
910	if (k < 0)
911	k += nblock;
912	c1 = block[k];
913	if (!bigDone[c1])
914	ptr[copyStart[c1]++] = k;
915	}
916	for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
917	k = ptr[j]-1;
918	if (k < 0)
919	k += nblock;
920	c1 = block[k];
921	if (!bigDone[c1])
922	ptr[copyEnd[c1]--] = k;
923	}
924	}
925
926	/* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
927	* Necessity for this case is demonstrated by compressing
928	* a sequence of approximately 48.5 million of character
929	* 251; 1.0.0/1.0.1 will then die here. */
930	AssertH((copyStart[ss]-1 == copyEnd[ss]) \
931	\|\| (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007);
932
933	for (j = 0; j <= 255; j++)
934	ftab[(j << 8) + ss] \|= SETMASK;
935
936	/*
937	* Step 3:
938	* The [ss] big bucket is now done. Record this fact,
939	* and update the quadrant descriptors. Remember to
940	* update quadrants in the overshoot area too, if
941	* necessary. The "if (i < 255)" test merely skips
942	* this updating for the last bucket processed, since
943	* updating for the last bucket is pointless.
944	*
945	* The quadrant array provides a way to incrementally
946	* cache sort orderings, as they appear, so as to
947	* make subsequent comparisons in fullGtU() complete
948	* faster. For repetitive blocks this makes a big
949	* difference (but not big enough to be able to avoid
950	* the fallback sorting mechanism, exponential radix sort).
951	*
952	* The precise meaning is: at all times:
953	*
954	* for 0 <= i < nblock and 0 <= j <= nblock
955	*
956	* if block[i] != block[j],
957	*
958	* then the relative values of quadrant[i] and
959	* quadrant[j] are meaningless.
960	*
961	* else {
962	* if quadrant[i] < quadrant[j]
963	* then the string starting at i lexicographically
964	* precedes the string starting at j
965	*
966	* else if quadrant[i] > quadrant[j]
967	* then the string starting at j lexicographically
968	* precedes the string starting at i
969	*
970	* else
971	* the relative ordering of the strings starting
972	* at i and j has not yet been determined.
973	* }
974	*/
975	bigDone[ss] = True;
976
977	if (i < 255) {
978	int32_t bbStart = ftab[ss << 8] & CLEARMASK;
979	int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
980	int32_t shifts = 0;
981
982	while ((bbSize >> shifts) > 65534) shifts++;
983
984	for (j = bbSize-1; j >= 0; j--) {
985	int32_t a2update = ptr[bbStart + j];
986	uint16_t qVal = (uint16_t)(j >> shifts);
987	quadrant[a2update] = qVal;
988	if (a2update < BZ_N_OVERSHOOT)
989	quadrant[a2update + nblock] = qVal;
990	}
991	AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
992	}
993	}
994	#undef runningOrder
995	#undef copyStart
996	#undef copyEnd
997	}
998
999	#undef BIGFREQ
1000	#undef SETMASK
1001	#undef CLEARMASK
1002
1003
1004	/---------------------------------------------/
1005	/* Pre:
1006	* nblock > 0
1007	* arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1008	* ((uint8_t*)arr2)[0 .. nblock-1] holds block
1009	* arr1 exists for [0 .. nblock-1]
1010	*
1011	* Post:
1012	* ((uint8_t*)arr2) [0 .. nblock-1] holds block
1013	* All other areas of block destroyed
1014	* ftab[0 .. 65536] destroyed
1015	* arr1[0 .. nblock-1] holds sorted order
1016	*/
1017	static NOINLINE
1018	void BZ2_blockSort(EState* s)
1019	{
1020	/* In original bzip2 1.0.4, it's a parameter, but 30
1021	* (which was the default) should work ok. */
1022	enum { wfact = 30 };
1023
1024	uint32_t* ptr = s->ptr;
1025	uint8_t* block = s->block;
1026	uint32_t* ftab = s->ftab;
1027	int32_t nblock = s->nblock;
1028	uint16_t* quadrant;
1029	int32_t budget;
1030	int32_t i;
1031
1032	if (nblock < 10000) {
1033	fallbackSort(s->arr1, s->arr2, ftab, nblock);
1034	} else {
1035	/* Calculate the location for quadrant, remembering to get
1036	* the alignment right. Assumes that &(block[0]) is at least
1037	* 2-byte aligned -- this should be ok since block is really
1038	* the first section of arr2.
1039	*/
1040	i = nblock + BZ_N_OVERSHOOT;
1041	if (i & 1) i++;
1042	quadrant = (uint16_t*)(&(block[i]));
1043
1044	/* (wfact-1) / 3 puts the default-factor-30
1045	* transition point at very roughly the same place as
1046	* with v0.1 and v0.9.0.
1047	* Not that it particularly matters any more, since the
1048	* resulting compressed stream is now the same regardless
1049	* of whether or not we use the main sort or fallback sort.
1050	*/
1051	budget = nblock * ((wfact-1) / 3);
1052
1053	mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
1054	if (budget < 0) {
1055	fallbackSort(s->arr1, s->arr2, ftab, nblock);
1056	}
1057	}
1058
1059	s->origPtr = -1;
1060	for (i = 0; i < s->nblock; i++)
1061	if (ptr[i] == 0) {
1062	s->origPtr = i;
1063	break;
1064	};
1065
1066	AssertH(s->origPtr != -1, 1003);
1067	}
1068
1069
1070	/-------------------------------------------------------------/
1071	/--- end blocksort.c ---/
1072	/-------------------------------------------------------------/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: MondoRescue/branches/2.2.9/mindi-busybox/archival/libarchive/bz/blocksort.c@ 3863

Download in other formats: