[2725] | 1 | /*
|
---|
| 2 | * bzip2 is written by Julian Seward <jseward@bzip.org>.
|
---|
| 3 | * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
|
---|
| 4 | * See README and LICENSE files in this directory for more information.
|
---|
| 5 | */
|
---|
| 6 |
|
---|
| 7 | /*-------------------------------------------------------------*/
|
---|
| 8 | /*--- Block sorting machinery ---*/
|
---|
| 9 | /*--- blocksort.c ---*/
|
---|
| 10 | /*-------------------------------------------------------------*/
|
---|
| 11 |
|
---|
| 12 | /* ------------------------------------------------------------------
|
---|
| 13 | This file is part of bzip2/libbzip2, a program and library for
|
---|
| 14 | lossless, block-sorting data compression.
|
---|
| 15 |
|
---|
| 16 | bzip2/libbzip2 version 1.0.4 of 20 December 2006
|
---|
| 17 | Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
|
---|
| 18 |
|
---|
| 19 | Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
---|
| 20 | README file.
|
---|
| 21 |
|
---|
| 22 | This program is released under the terms of the license contained
|
---|
| 23 | in the file LICENSE.
|
---|
| 24 | ------------------------------------------------------------------ */
|
---|
| 25 |
|
---|
| 26 | /* #include "bzlib_private.h" */
|
---|
| 27 |
|
---|
| 28 | #define mswap(zz1, zz2) \
|
---|
| 29 | { \
|
---|
| 30 | int32_t zztmp = zz1; \
|
---|
| 31 | zz1 = zz2; \
|
---|
| 32 | zz2 = zztmp; \
|
---|
| 33 | }
|
---|
| 34 |
|
---|
| 35 | static
|
---|
| 36 | /* No measurable speed gain with inlining */
|
---|
| 37 | /* ALWAYS_INLINE */
|
---|
| 38 | void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn)
|
---|
| 39 | {
|
---|
| 40 | while (zzn > 0) {
|
---|
| 41 | mswap(ptr[zzp1], ptr[zzp2]);
|
---|
| 42 | zzp1++;
|
---|
| 43 | zzp2++;
|
---|
| 44 | zzn--;
|
---|
| 45 | }
|
---|
| 46 | }
|
---|
| 47 |
|
---|
| 48 | static
|
---|
| 49 | ALWAYS_INLINE
|
---|
| 50 | int32_t mmin(int32_t a, int32_t b)
|
---|
| 51 | {
|
---|
| 52 | return (a < b) ? a : b;
|
---|
| 53 | }
|
---|
| 54 |
|
---|
| 55 |
|
---|
| 56 | /*---------------------------------------------*/
|
---|
| 57 | /*--- Fallback O(N log(N)^2) sorting ---*/
|
---|
| 58 | /*--- algorithm, for repetitive blocks ---*/
|
---|
| 59 | /*---------------------------------------------*/
|
---|
| 60 |
|
---|
| 61 | /*---------------------------------------------*/
|
---|
| 62 | static
|
---|
| 63 | inline
|
---|
| 64 | void fallbackSimpleSort(uint32_t* fmap,
|
---|
| 65 | uint32_t* eclass,
|
---|
| 66 | int32_t lo,
|
---|
| 67 | int32_t hi)
|
---|
| 68 | {
|
---|
| 69 | int32_t i, j, tmp;
|
---|
| 70 | uint32_t ec_tmp;
|
---|
| 71 |
|
---|
| 72 | if (lo == hi) return;
|
---|
| 73 |
|
---|
| 74 | if (hi - lo > 3) {
|
---|
| 75 | for (i = hi-4; i >= lo; i--) {
|
---|
| 76 | tmp = fmap[i];
|
---|
| 77 | ec_tmp = eclass[tmp];
|
---|
| 78 | for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
|
---|
| 79 | fmap[j-4] = fmap[j];
|
---|
| 80 | fmap[j-4] = tmp;
|
---|
| 81 | }
|
---|
| 82 | }
|
---|
| 83 |
|
---|
| 84 | for (i = hi-1; i >= lo; i--) {
|
---|
| 85 | tmp = fmap[i];
|
---|
| 86 | ec_tmp = eclass[tmp];
|
---|
| 87 | for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
|
---|
| 88 | fmap[j-1] = fmap[j];
|
---|
| 89 | fmap[j-1] = tmp;
|
---|
| 90 | }
|
---|
| 91 | }
|
---|
| 92 |
|
---|
| 93 |
|
---|
| 94 | /*---------------------------------------------*/
|
---|
| 95 | #define fpush(lz,hz) { \
|
---|
| 96 | stackLo[sp] = lz; \
|
---|
| 97 | stackHi[sp] = hz; \
|
---|
| 98 | sp++; \
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 | #define fpop(lz,hz) { \
|
---|
| 102 | sp--; \
|
---|
| 103 | lz = stackLo[sp]; \
|
---|
| 104 | hz = stackHi[sp]; \
|
---|
| 105 | }
|
---|
| 106 |
|
---|
| 107 | #define FALLBACK_QSORT_SMALL_THRESH 10
|
---|
| 108 | #define FALLBACK_QSORT_STACK_SIZE 100
|
---|
| 109 |
|
---|
| 110 | static
|
---|
| 111 | void fallbackQSort3(uint32_t* fmap,
|
---|
| 112 | uint32_t* eclass,
|
---|
| 113 | int32_t loSt,
|
---|
| 114 | int32_t hiSt)
|
---|
| 115 | {
|
---|
| 116 | int32_t unLo, unHi, ltLo, gtHi, n, m;
|
---|
| 117 | int32_t sp, lo, hi;
|
---|
| 118 | uint32_t med, r, r3;
|
---|
| 119 | int32_t stackLo[FALLBACK_QSORT_STACK_SIZE];
|
---|
| 120 | int32_t stackHi[FALLBACK_QSORT_STACK_SIZE];
|
---|
| 121 |
|
---|
| 122 | r = 0;
|
---|
| 123 |
|
---|
| 124 | sp = 0;
|
---|
| 125 | fpush(loSt, hiSt);
|
---|
| 126 |
|
---|
| 127 | while (sp > 0) {
|
---|
| 128 | AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
|
---|
| 129 |
|
---|
| 130 | fpop(lo, hi);
|
---|
| 131 | if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
|
---|
| 132 | fallbackSimpleSort(fmap, eclass, lo, hi);
|
---|
| 133 | continue;
|
---|
| 134 | }
|
---|
| 135 |
|
---|
| 136 | /* Random partitioning. Median of 3 sometimes fails to
|
---|
| 137 | * avoid bad cases. Median of 9 seems to help but
|
---|
| 138 | * looks rather expensive. This too seems to work but
|
---|
| 139 | * is cheaper. Guidance for the magic constants
|
---|
| 140 | * 7621 and 32768 is taken from Sedgewick's algorithms
|
---|
| 141 | * book, chapter 35.
|
---|
| 142 | */
|
---|
| 143 | r = ((r * 7621) + 1) % 32768;
|
---|
| 144 | r3 = r % 3;
|
---|
| 145 | if (r3 == 0)
|
---|
| 146 | med = eclass[fmap[lo]];
|
---|
| 147 | else if (r3 == 1)
|
---|
| 148 | med = eclass[fmap[(lo+hi)>>1]];
|
---|
| 149 | else
|
---|
| 150 | med = eclass[fmap[hi]];
|
---|
| 151 |
|
---|
| 152 | unLo = ltLo = lo;
|
---|
| 153 | unHi = gtHi = hi;
|
---|
| 154 |
|
---|
| 155 | while (1) {
|
---|
| 156 | while (1) {
|
---|
| 157 | if (unLo > unHi) break;
|
---|
| 158 | n = (int32_t)eclass[fmap[unLo]] - (int32_t)med;
|
---|
| 159 | if (n == 0) {
|
---|
| 160 | mswap(fmap[unLo], fmap[ltLo]);
|
---|
| 161 | ltLo++;
|
---|
| 162 | unLo++;
|
---|
| 163 | continue;
|
---|
| 164 | };
|
---|
| 165 | if (n > 0) break;
|
---|
| 166 | unLo++;
|
---|
| 167 | }
|
---|
| 168 | while (1) {
|
---|
| 169 | if (unLo > unHi) break;
|
---|
| 170 | n = (int32_t)eclass[fmap[unHi]] - (int32_t)med;
|
---|
| 171 | if (n == 0) {
|
---|
| 172 | mswap(fmap[unHi], fmap[gtHi]);
|
---|
| 173 | gtHi--; unHi--;
|
---|
| 174 | continue;
|
---|
| 175 | };
|
---|
| 176 | if (n < 0) break;
|
---|
| 177 | unHi--;
|
---|
| 178 | }
|
---|
| 179 | if (unLo > unHi) break;
|
---|
| 180 | mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
|
---|
| 181 | }
|
---|
| 182 |
|
---|
| 183 | AssertD(unHi == unLo-1, "fallbackQSort3(2)");
|
---|
| 184 |
|
---|
| 185 | if (gtHi < ltLo) continue;
|
---|
| 186 |
|
---|
| 187 | n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n);
|
---|
| 188 | m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m);
|
---|
| 189 |
|
---|
| 190 | n = lo + unLo - ltLo - 1;
|
---|
| 191 | m = hi - (gtHi - unHi) + 1;
|
---|
| 192 |
|
---|
| 193 | if (n - lo > hi - m) {
|
---|
| 194 | fpush(lo, n);
|
---|
| 195 | fpush(m, hi);
|
---|
| 196 | } else {
|
---|
| 197 | fpush(m, hi);
|
---|
| 198 | fpush(lo, n);
|
---|
| 199 | }
|
---|
| 200 | }
|
---|
| 201 | }
|
---|
| 202 |
|
---|
| 203 | #undef fpush
|
---|
| 204 | #undef fpop
|
---|
| 205 | #undef FALLBACK_QSORT_SMALL_THRESH
|
---|
| 206 | #undef FALLBACK_QSORT_STACK_SIZE
|
---|
| 207 |
|
---|
| 208 |
|
---|
| 209 | /*---------------------------------------------*/
|
---|
| 210 | /* Pre:
|
---|
| 211 | * nblock > 0
|
---|
| 212 | * eclass exists for [0 .. nblock-1]
|
---|
| 213 | * ((uint8_t*)eclass) [0 .. nblock-1] holds block
|
---|
| 214 | * ptr exists for [0 .. nblock-1]
|
---|
| 215 | *
|
---|
| 216 | * Post:
|
---|
| 217 | * ((uint8_t*)eclass) [0 .. nblock-1] holds block
|
---|
| 218 | * All other areas of eclass destroyed
|
---|
| 219 | * fmap [0 .. nblock-1] holds sorted order
|
---|
| 220 | * bhtab[0 .. 2+(nblock/32)] destroyed
|
---|
| 221 | */
|
---|
| 222 |
|
---|
| 223 | #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
|
---|
| 224 | #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
|
---|
| 225 | #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
|
---|
| 226 | #define WORD_BH(zz) bhtab[(zz) >> 5]
|
---|
| 227 | #define UNALIGNED_BH(zz) ((zz) & 0x01f)
|
---|
| 228 |
|
---|
| 229 | static
|
---|
| 230 | void fallbackSort(uint32_t* fmap,
|
---|
| 231 | uint32_t* eclass,
|
---|
| 232 | uint32_t* bhtab,
|
---|
| 233 | int32_t nblock)
|
---|
| 234 | {
|
---|
| 235 | int32_t ftab[257];
|
---|
| 236 | int32_t ftabCopy[256];
|
---|
| 237 | int32_t H, i, j, k, l, r, cc, cc1;
|
---|
| 238 | int32_t nNotDone;
|
---|
| 239 | int32_t nBhtab;
|
---|
| 240 | uint8_t* eclass8 = (uint8_t*)eclass;
|
---|
| 241 |
|
---|
| 242 | /*
|
---|
| 243 | * Initial 1-char radix sort to generate
|
---|
| 244 | * initial fmap and initial BH bits.
|
---|
| 245 | */
|
---|
| 246 | for (i = 0; i < 257; i++) ftab[i] = 0;
|
---|
| 247 | for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
|
---|
| 248 | for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
|
---|
| 249 |
|
---|
| 250 | j = ftab[0]; /* bbox: optimized */
|
---|
| 251 | for (i = 1; i < 257; i++) {
|
---|
| 252 | j += ftab[i];
|
---|
| 253 | ftab[i] = j;
|
---|
| 254 | }
|
---|
| 255 |
|
---|
| 256 | for (i = 0; i < nblock; i++) {
|
---|
| 257 | j = eclass8[i];
|
---|
| 258 | k = ftab[j] - 1;
|
---|
| 259 | ftab[j] = k;
|
---|
| 260 | fmap[k] = i;
|
---|
| 261 | }
|
---|
| 262 |
|
---|
| 263 | nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
|
---|
| 264 | for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
|
---|
| 265 | for (i = 0; i < 256; i++) SET_BH(ftab[i]);
|
---|
| 266 |
|
---|
| 267 | /*
|
---|
| 268 | * Inductively refine the buckets. Kind-of an
|
---|
| 269 | * "exponential radix sort" (!), inspired by the
|
---|
| 270 | * Manber-Myers suffix array construction algorithm.
|
---|
| 271 | */
|
---|
| 272 |
|
---|
| 273 | /*-- set sentinel bits for block-end detection --*/
|
---|
| 274 | for (i = 0; i < 32; i++) {
|
---|
| 275 | SET_BH(nblock + 2*i);
|
---|
| 276 | CLEAR_BH(nblock + 2*i + 1);
|
---|
| 277 | }
|
---|
| 278 |
|
---|
| 279 | /*-- the log(N) loop --*/
|
---|
| 280 | H = 1;
|
---|
| 281 | while (1) {
|
---|
| 282 | j = 0;
|
---|
| 283 | for (i = 0; i < nblock; i++) {
|
---|
| 284 | if (ISSET_BH(i))
|
---|
| 285 | j = i;
|
---|
| 286 | k = fmap[i] - H;
|
---|
| 287 | if (k < 0)
|
---|
| 288 | k += nblock;
|
---|
| 289 | eclass[k] = j;
|
---|
| 290 | }
|
---|
| 291 |
|
---|
| 292 | nNotDone = 0;
|
---|
| 293 | r = -1;
|
---|
| 294 | while (1) {
|
---|
| 295 |
|
---|
| 296 | /*-- find the next non-singleton bucket --*/
|
---|
| 297 | k = r + 1;
|
---|
| 298 | while (ISSET_BH(k) && UNALIGNED_BH(k))
|
---|
| 299 | k++;
|
---|
| 300 | if (ISSET_BH(k)) {
|
---|
| 301 | while (WORD_BH(k) == 0xffffffff) k += 32;
|
---|
| 302 | while (ISSET_BH(k)) k++;
|
---|
| 303 | }
|
---|
| 304 | l = k - 1;
|
---|
| 305 | if (l >= nblock)
|
---|
| 306 | break;
|
---|
| 307 | while (!ISSET_BH(k) && UNALIGNED_BH(k))
|
---|
| 308 | k++;
|
---|
| 309 | if (!ISSET_BH(k)) {
|
---|
| 310 | while (WORD_BH(k) == 0x00000000) k += 32;
|
---|
| 311 | while (!ISSET_BH(k)) k++;
|
---|
| 312 | }
|
---|
| 313 | r = k - 1;
|
---|
| 314 | if (r >= nblock)
|
---|
| 315 | break;
|
---|
| 316 |
|
---|
| 317 | /*-- now [l, r] bracket current bucket --*/
|
---|
| 318 | if (r > l) {
|
---|
| 319 | nNotDone += (r - l + 1);
|
---|
| 320 | fallbackQSort3(fmap, eclass, l, r);
|
---|
| 321 |
|
---|
| 322 | /*-- scan bucket and generate header bits-- */
|
---|
| 323 | cc = -1;
|
---|
| 324 | for (i = l; i <= r; i++) {
|
---|
| 325 | cc1 = eclass[fmap[i]];
|
---|
| 326 | if (cc != cc1) {
|
---|
| 327 | SET_BH(i);
|
---|
| 328 | cc = cc1;
|
---|
| 329 | };
|
---|
| 330 | }
|
---|
| 331 | }
|
---|
| 332 | }
|
---|
| 333 |
|
---|
| 334 | H *= 2;
|
---|
| 335 | if (H > nblock || nNotDone == 0)
|
---|
| 336 | break;
|
---|
| 337 | }
|
---|
| 338 |
|
---|
| 339 | /*
|
---|
| 340 | * Reconstruct the original block in
|
---|
| 341 | * eclass8 [0 .. nblock-1], since the
|
---|
| 342 | * previous phase destroyed it.
|
---|
| 343 | */
|
---|
| 344 | j = 0;
|
---|
| 345 | for (i = 0; i < nblock; i++) {
|
---|
| 346 | while (ftabCopy[j] == 0)
|
---|
| 347 | j++;
|
---|
| 348 | ftabCopy[j]--;
|
---|
| 349 | eclass8[fmap[i]] = (uint8_t)j;
|
---|
| 350 | }
|
---|
| 351 | AssertH(j < 256, 1005);
|
---|
| 352 | }
|
---|
| 353 |
|
---|
| 354 | #undef SET_BH
|
---|
| 355 | #undef CLEAR_BH
|
---|
| 356 | #undef ISSET_BH
|
---|
| 357 | #undef WORD_BH
|
---|
| 358 | #undef UNALIGNED_BH
|
---|
| 359 |
|
---|
| 360 |
|
---|
| 361 | /*---------------------------------------------*/
|
---|
| 362 | /*--- The main, O(N^2 log(N)) sorting ---*/
|
---|
| 363 | /*--- algorithm. Faster for "normal" ---*/
|
---|
| 364 | /*--- non-repetitive blocks. ---*/
|
---|
| 365 | /*---------------------------------------------*/
|
---|
| 366 |
|
---|
| 367 | /*---------------------------------------------*/
|
---|
| 368 | static
|
---|
| 369 | NOINLINE
|
---|
| 370 | int mainGtU(
|
---|
| 371 | uint32_t i1,
|
---|
| 372 | uint32_t i2,
|
---|
| 373 | uint8_t* block,
|
---|
| 374 | uint16_t* quadrant,
|
---|
| 375 | uint32_t nblock,
|
---|
| 376 | int32_t* budget)
|
---|
| 377 | {
|
---|
| 378 | int32_t k;
|
---|
| 379 | uint8_t c1, c2;
|
---|
| 380 | uint16_t s1, s2;
|
---|
| 381 |
|
---|
| 382 | /* Loop unrolling here is actually very useful
|
---|
| 383 | * (generated code is much simpler),
|
---|
| 384 | * code size increase is only 270 bytes (i386)
|
---|
| 385 | * but speeds up compression 10% overall
|
---|
| 386 | */
|
---|
| 387 |
|
---|
| 388 | #if CONFIG_BZIP2_FEATURE_SPEED >= 1
|
---|
| 389 |
|
---|
| 390 | #define TIMES_8(code) \
|
---|
| 391 | code; code; code; code; \
|
---|
| 392 | code; code; code; code;
|
---|
| 393 | #define TIMES_12(code) \
|
---|
| 394 | code; code; code; code; \
|
---|
| 395 | code; code; code; code; \
|
---|
| 396 | code; code; code; code;
|
---|
| 397 |
|
---|
| 398 | #else
|
---|
| 399 |
|
---|
| 400 | #define TIMES_8(code) \
|
---|
| 401 | { \
|
---|
| 402 | int nn = 8; \
|
---|
| 403 | do { \
|
---|
| 404 | code; \
|
---|
| 405 | } while (--nn); \
|
---|
| 406 | }
|
---|
| 407 | #define TIMES_12(code) \
|
---|
| 408 | { \
|
---|
| 409 | int nn = 12; \
|
---|
| 410 | do { \
|
---|
| 411 | code; \
|
---|
| 412 | } while (--nn); \
|
---|
| 413 | }
|
---|
| 414 |
|
---|
| 415 | #endif
|
---|
| 416 |
|
---|
| 417 | AssertD(i1 != i2, "mainGtU");
|
---|
| 418 | TIMES_12(
|
---|
| 419 | c1 = block[i1]; c2 = block[i2];
|
---|
| 420 | if (c1 != c2) return (c1 > c2);
|
---|
| 421 | i1++; i2++;
|
---|
| 422 | )
|
---|
| 423 |
|
---|
| 424 | k = nblock + 8;
|
---|
| 425 |
|
---|
| 426 | do {
|
---|
| 427 | TIMES_8(
|
---|
| 428 | c1 = block[i1]; c2 = block[i2];
|
---|
| 429 | if (c1 != c2) return (c1 > c2);
|
---|
| 430 | s1 = quadrant[i1]; s2 = quadrant[i2];
|
---|
| 431 | if (s1 != s2) return (s1 > s2);
|
---|
| 432 | i1++; i2++;
|
---|
| 433 | )
|
---|
| 434 |
|
---|
| 435 | if (i1 >= nblock) i1 -= nblock;
|
---|
| 436 | if (i2 >= nblock) i2 -= nblock;
|
---|
| 437 |
|
---|
| 438 | (*budget)--;
|
---|
| 439 | k -= 8;
|
---|
| 440 | } while (k >= 0);
|
---|
| 441 |
|
---|
| 442 | return False;
|
---|
| 443 | }
|
---|
| 444 | #undef TIMES_8
|
---|
| 445 | #undef TIMES_12
|
---|
| 446 |
|
---|
| 447 | /*---------------------------------------------*/
|
---|
| 448 | /*
|
---|
| 449 | * Knuth's increments seem to work better
|
---|
| 450 | * than Incerpi-Sedgewick here. Possibly
|
---|
| 451 | * because the number of elems to sort is
|
---|
| 452 | * usually small, typically <= 20.
|
---|
| 453 | */
|
---|
| 454 | static
|
---|
| 455 | const int32_t incs[14] = {
|
---|
| 456 | 1, 4, 13, 40, 121, 364, 1093, 3280,
|
---|
| 457 | 9841, 29524, 88573, 265720,
|
---|
| 458 | 797161, 2391484
|
---|
| 459 | };
|
---|
| 460 |
|
---|
| 461 | static
|
---|
| 462 | void mainSimpleSort(uint32_t* ptr,
|
---|
| 463 | uint8_t* block,
|
---|
| 464 | uint16_t* quadrant,
|
---|
| 465 | int32_t nblock,
|
---|
| 466 | int32_t lo,
|
---|
| 467 | int32_t hi,
|
---|
| 468 | int32_t d,
|
---|
| 469 | int32_t* budget)
|
---|
| 470 | {
|
---|
| 471 | int32_t i, j, h, bigN, hp;
|
---|
| 472 | uint32_t v;
|
---|
| 473 |
|
---|
| 474 | bigN = hi - lo + 1;
|
---|
| 475 | if (bigN < 2) return;
|
---|
| 476 |
|
---|
| 477 | hp = 0;
|
---|
| 478 | while (incs[hp] < bigN) hp++;
|
---|
| 479 | hp--;
|
---|
| 480 |
|
---|
| 481 | for (; hp >= 0; hp--) {
|
---|
| 482 | h = incs[hp];
|
---|
| 483 |
|
---|
| 484 | i = lo + h;
|
---|
| 485 | while (1) {
|
---|
| 486 | /*-- copy 1 --*/
|
---|
| 487 | if (i > hi) break;
|
---|
| 488 | v = ptr[i];
|
---|
| 489 | j = i;
|
---|
| 490 | while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
|
---|
| 491 | ptr[j] = ptr[j-h];
|
---|
| 492 | j = j - h;
|
---|
| 493 | if (j <= (lo + h - 1)) break;
|
---|
| 494 | }
|
---|
| 495 | ptr[j] = v;
|
---|
| 496 | i++;
|
---|
| 497 |
|
---|
| 498 | /* 1.5% overall speedup, +290 bytes */
|
---|
| 499 | #if CONFIG_BZIP2_FEATURE_SPEED >= 3
|
---|
| 500 | /*-- copy 2 --*/
|
---|
| 501 | if (i > hi) break;
|
---|
| 502 | v = ptr[i];
|
---|
| 503 | j = i;
|
---|
| 504 | while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
|
---|
| 505 | ptr[j] = ptr[j-h];
|
---|
| 506 | j = j - h;
|
---|
| 507 | if (j <= (lo + h - 1)) break;
|
---|
| 508 | }
|
---|
| 509 | ptr[j] = v;
|
---|
| 510 | i++;
|
---|
| 511 |
|
---|
| 512 | /*-- copy 3 --*/
|
---|
| 513 | if (i > hi) break;
|
---|
| 514 | v = ptr[i];
|
---|
| 515 | j = i;
|
---|
| 516 | while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
|
---|
| 517 | ptr[j] = ptr[j-h];
|
---|
| 518 | j = j - h;
|
---|
| 519 | if (j <= (lo + h - 1)) break;
|
---|
| 520 | }
|
---|
| 521 | ptr[j] = v;
|
---|
| 522 | i++;
|
---|
| 523 | #endif
|
---|
| 524 | if (*budget < 0) return;
|
---|
| 525 | }
|
---|
| 526 | }
|
---|
| 527 | }
|
---|
| 528 |
|
---|
| 529 |
|
---|
| 530 | /*---------------------------------------------*/
|
---|
| 531 | /*
|
---|
| 532 | * The following is an implementation of
|
---|
| 533 | * an elegant 3-way quicksort for strings,
|
---|
| 534 | * described in a paper "Fast Algorithms for
|
---|
| 535 | * Sorting and Searching Strings", by Robert
|
---|
| 536 | * Sedgewick and Jon L. Bentley.
|
---|
| 537 | */
|
---|
| 538 |
|
---|
| 539 | static
|
---|
| 540 | ALWAYS_INLINE
|
---|
| 541 | uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c)
|
---|
| 542 | {
|
---|
| 543 | uint8_t t;
|
---|
| 544 | if (a > b) {
|
---|
| 545 | t = a;
|
---|
| 546 | a = b;
|
---|
| 547 | b = t;
|
---|
| 548 | };
|
---|
| 549 | /* here b >= a */
|
---|
| 550 | if (b > c) {
|
---|
| 551 | b = c;
|
---|
| 552 | if (a > b)
|
---|
| 553 | b = a;
|
---|
| 554 | }
|
---|
| 555 | return b;
|
---|
| 556 | }
|
---|
| 557 |
|
---|
| 558 | #define mpush(lz,hz,dz) \
|
---|
| 559 | { \
|
---|
| 560 | stackLo[sp] = lz; \
|
---|
| 561 | stackHi[sp] = hz; \
|
---|
| 562 | stackD [sp] = dz; \
|
---|
| 563 | sp++; \
|
---|
| 564 | }
|
---|
| 565 |
|
---|
| 566 | #define mpop(lz,hz,dz) \
|
---|
| 567 | { \
|
---|
| 568 | sp--; \
|
---|
| 569 | lz = stackLo[sp]; \
|
---|
| 570 | hz = stackHi[sp]; \
|
---|
| 571 | dz = stackD [sp]; \
|
---|
| 572 | }
|
---|
| 573 |
|
---|
| 574 | #define mnextsize(az) (nextHi[az] - nextLo[az])
|
---|
| 575 |
|
---|
| 576 | #define mnextswap(az,bz) \
|
---|
| 577 | { \
|
---|
| 578 | int32_t tz; \
|
---|
| 579 | tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
|
---|
| 580 | tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
|
---|
| 581 | tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \
|
---|
| 582 | }
|
---|
| 583 |
|
---|
| 584 | #define MAIN_QSORT_SMALL_THRESH 20
|
---|
| 585 | #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
|
---|
| 586 | #define MAIN_QSORT_STACK_SIZE 100
|
---|
| 587 |
|
---|
| 588 | static NOINLINE
|
---|
| 589 | void mainQSort3(uint32_t* ptr,
|
---|
| 590 | uint8_t* block,
|
---|
| 591 | uint16_t* quadrant,
|
---|
| 592 | int32_t nblock,
|
---|
| 593 | int32_t loSt,
|
---|
| 594 | int32_t hiSt,
|
---|
| 595 | int32_t dSt,
|
---|
| 596 | int32_t* budget)
|
---|
| 597 | {
|
---|
| 598 | int32_t unLo, unHi, ltLo, gtHi, n, m, med;
|
---|
| 599 | int32_t sp, lo, hi, d;
|
---|
| 600 |
|
---|
| 601 | int32_t stackLo[MAIN_QSORT_STACK_SIZE];
|
---|
| 602 | int32_t stackHi[MAIN_QSORT_STACK_SIZE];
|
---|
| 603 | int32_t stackD [MAIN_QSORT_STACK_SIZE];
|
---|
| 604 |
|
---|
| 605 | int32_t nextLo[3];
|
---|
| 606 | int32_t nextHi[3];
|
---|
| 607 | int32_t nextD [3];
|
---|
| 608 |
|
---|
| 609 | sp = 0;
|
---|
| 610 | mpush(loSt, hiSt, dSt);
|
---|
| 611 |
|
---|
| 612 | while (sp > 0) {
|
---|
| 613 | AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
|
---|
| 614 |
|
---|
| 615 | mpop(lo, hi, d);
|
---|
| 616 | if (hi - lo < MAIN_QSORT_SMALL_THRESH
|
---|
| 617 | || d > MAIN_QSORT_DEPTH_THRESH
|
---|
| 618 | ) {
|
---|
| 619 | mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
|
---|
| 620 | if (*budget < 0)
|
---|
| 621 | return;
|
---|
| 622 | continue;
|
---|
| 623 | }
|
---|
| 624 | med = (int32_t) mmed3(block[ptr[lo ] + d],
|
---|
| 625 | block[ptr[hi ] + d],
|
---|
| 626 | block[ptr[(lo+hi) >> 1] + d]);
|
---|
| 627 |
|
---|
| 628 | unLo = ltLo = lo;
|
---|
| 629 | unHi = gtHi = hi;
|
---|
| 630 |
|
---|
| 631 | while (1) {
|
---|
| 632 | while (1) {
|
---|
| 633 | if (unLo > unHi)
|
---|
| 634 | break;
|
---|
| 635 | n = ((int32_t)block[ptr[unLo]+d]) - med;
|
---|
| 636 | if (n == 0) {
|
---|
| 637 | mswap(ptr[unLo], ptr[ltLo]);
|
---|
| 638 | ltLo++;
|
---|
| 639 | unLo++;
|
---|
| 640 | continue;
|
---|
| 641 | };
|
---|
| 642 | if (n > 0) break;
|
---|
| 643 | unLo++;
|
---|
| 644 | }
|
---|
| 645 | while (1) {
|
---|
| 646 | if (unLo > unHi)
|
---|
| 647 | break;
|
---|
| 648 | n = ((int32_t)block[ptr[unHi]+d]) - med;
|
---|
| 649 | if (n == 0) {
|
---|
| 650 | mswap(ptr[unHi], ptr[gtHi]);
|
---|
| 651 | gtHi--;
|
---|
| 652 | unHi--;
|
---|
| 653 | continue;
|
---|
| 654 | };
|
---|
| 655 | if (n < 0) break;
|
---|
| 656 | unHi--;
|
---|
| 657 | }
|
---|
| 658 | if (unLo > unHi)
|
---|
| 659 | break;
|
---|
| 660 | mswap(ptr[unLo], ptr[unHi]);
|
---|
| 661 | unLo++;
|
---|
| 662 | unHi--;
|
---|
| 663 | }
|
---|
| 664 |
|
---|
| 665 | AssertD(unHi == unLo-1, "mainQSort3(2)");
|
---|
| 666 |
|
---|
| 667 | if (gtHi < ltLo) {
|
---|
| 668 | mpush(lo, hi, d + 1);
|
---|
| 669 | continue;
|
---|
| 670 | }
|
---|
| 671 |
|
---|
| 672 | n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n);
|
---|
| 673 | m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m);
|
---|
| 674 |
|
---|
| 675 | n = lo + unLo - ltLo - 1;
|
---|
| 676 | m = hi - (gtHi - unHi) + 1;
|
---|
| 677 |
|
---|
| 678 | nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
|
---|
| 679 | nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
|
---|
| 680 | nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
|
---|
| 681 |
|
---|
| 682 | if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
|
---|
| 683 | if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2);
|
---|
| 684 | if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
|
---|
| 685 |
|
---|
| 686 | AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
|
---|
| 687 | AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
|
---|
| 688 |
|
---|
| 689 | mpush(nextLo[0], nextHi[0], nextD[0]);
|
---|
| 690 | mpush(nextLo[1], nextHi[1], nextD[1]);
|
---|
| 691 | mpush(nextLo[2], nextHi[2], nextD[2]);
|
---|
| 692 | }
|
---|
| 693 | }
|
---|
| 694 |
|
---|
| 695 | #undef mpush
|
---|
| 696 | #undef mpop
|
---|
| 697 | #undef mnextsize
|
---|
| 698 | #undef mnextswap
|
---|
| 699 | #undef MAIN_QSORT_SMALL_THRESH
|
---|
| 700 | #undef MAIN_QSORT_DEPTH_THRESH
|
---|
| 701 | #undef MAIN_QSORT_STACK_SIZE
|
---|
| 702 |
|
---|
| 703 |
|
---|
| 704 | /*---------------------------------------------*/
|
---|
| 705 | /* Pre:
|
---|
| 706 | * nblock > N_OVERSHOOT
|
---|
| 707 | * block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
|
---|
| 708 | * ((uint8_t*)block32) [0 .. nblock-1] holds block
|
---|
| 709 | * ptr exists for [0 .. nblock-1]
|
---|
| 710 | *
|
---|
| 711 | * Post:
|
---|
| 712 | * ((uint8_t*)block32) [0 .. nblock-1] holds block
|
---|
| 713 | * All other areas of block32 destroyed
|
---|
| 714 | * ftab[0 .. 65536] destroyed
|
---|
| 715 | * ptr [0 .. nblock-1] holds sorted order
|
---|
| 716 | * if (*budget < 0), sorting was abandoned
|
---|
| 717 | */
|
---|
| 718 |
|
---|
| 719 | #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
|
---|
| 720 | #define SETMASK (1 << 21)
|
---|
| 721 | #define CLEARMASK (~(SETMASK))
|
---|
| 722 |
|
---|
| 723 | static NOINLINE
|
---|
| 724 | void mainSort(EState* state,
|
---|
| 725 | uint32_t* ptr,
|
---|
| 726 | uint8_t* block,
|
---|
| 727 | uint16_t* quadrant,
|
---|
| 728 | uint32_t* ftab,
|
---|
| 729 | int32_t nblock,
|
---|
| 730 | int32_t* budget)
|
---|
| 731 | {
|
---|
| 732 | int32_t i, j, k, ss, sb;
|
---|
| 733 | uint8_t c1;
|
---|
| 734 | int32_t numQSorted;
|
---|
| 735 | uint16_t s;
|
---|
| 736 | Bool bigDone[256];
|
---|
| 737 | /* bbox: moved to EState to save stack
|
---|
| 738 | int32_t runningOrder[256];
|
---|
| 739 | int32_t copyStart[256];
|
---|
| 740 | int32_t copyEnd [256];
|
---|
| 741 | */
|
---|
| 742 | #define runningOrder (state->mainSort__runningOrder)
|
---|
| 743 | #define copyStart (state->mainSort__copyStart)
|
---|
| 744 | #define copyEnd (state->mainSort__copyEnd)
|
---|
| 745 |
|
---|
| 746 | /*-- set up the 2-byte frequency table --*/
|
---|
| 747 | /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
|
---|
| 748 | memset(ftab, 0, 65537 * sizeof(ftab[0]));
|
---|
| 749 |
|
---|
| 750 | j = block[0] << 8;
|
---|
| 751 | i = nblock - 1;
|
---|
| 752 | /* 3%, +300 bytes */
|
---|
| 753 | #if CONFIG_BZIP2_FEATURE_SPEED >= 2
|
---|
| 754 | for (; i >= 3; i -= 4) {
|
---|
| 755 | quadrant[i] = 0;
|
---|
| 756 | j = (j >> 8) | (((uint16_t)block[i]) << 8);
|
---|
| 757 | ftab[j]++;
|
---|
| 758 | quadrant[i-1] = 0;
|
---|
| 759 | j = (j >> 8) | (((uint16_t)block[i-1]) << 8);
|
---|
| 760 | ftab[j]++;
|
---|
| 761 | quadrant[i-2] = 0;
|
---|
| 762 | j = (j >> 8) | (((uint16_t)block[i-2]) << 8);
|
---|
| 763 | ftab[j]++;
|
---|
| 764 | quadrant[i-3] = 0;
|
---|
| 765 | j = (j >> 8) | (((uint16_t)block[i-3]) << 8);
|
---|
| 766 | ftab[j]++;
|
---|
| 767 | }
|
---|
| 768 | #endif
|
---|
| 769 | for (; i >= 0; i--) {
|
---|
| 770 | quadrant[i] = 0;
|
---|
| 771 | j = (j >> 8) | (((uint16_t)block[i]) << 8);
|
---|
| 772 | ftab[j]++;
|
---|
| 773 | }
|
---|
| 774 |
|
---|
| 775 | /*-- (emphasises close relationship of block & quadrant) --*/
|
---|
| 776 | for (i = 0; i < BZ_N_OVERSHOOT; i++) {
|
---|
| 777 | block [nblock+i] = block[i];
|
---|
| 778 | quadrant[nblock+i] = 0;
|
---|
| 779 | }
|
---|
| 780 |
|
---|
| 781 | /*-- Complete the initial radix sort --*/
|
---|
| 782 | j = ftab[0]; /* bbox: optimized */
|
---|
| 783 | for (i = 1; i <= 65536; i++) {
|
---|
| 784 | j += ftab[i];
|
---|
| 785 | ftab[i] = j;
|
---|
| 786 | }
|
---|
| 787 |
|
---|
| 788 | s = block[0] << 8;
|
---|
| 789 | i = nblock - 1;
|
---|
| 790 | #if CONFIG_BZIP2_FEATURE_SPEED >= 2
|
---|
| 791 | for (; i >= 3; i -= 4) {
|
---|
| 792 | s = (s >> 8) | (block[i] << 8);
|
---|
| 793 | j = ftab[s] - 1;
|
---|
| 794 | ftab[s] = j;
|
---|
| 795 | ptr[j] = i;
|
---|
| 796 | s = (s >> 8) | (block[i-1] << 8);
|
---|
| 797 | j = ftab[s] - 1;
|
---|
| 798 | ftab[s] = j;
|
---|
| 799 | ptr[j] = i-1;
|
---|
| 800 | s = (s >> 8) | (block[i-2] << 8);
|
---|
| 801 | j = ftab[s] - 1;
|
---|
| 802 | ftab[s] = j;
|
---|
| 803 | ptr[j] = i-2;
|
---|
| 804 | s = (s >> 8) | (block[i-3] << 8);
|
---|
| 805 | j = ftab[s] - 1;
|
---|
| 806 | ftab[s] = j;
|
---|
| 807 | ptr[j] = i-3;
|
---|
| 808 | }
|
---|
| 809 | #endif
|
---|
| 810 | for (; i >= 0; i--) {
|
---|
| 811 | s = (s >> 8) | (block[i] << 8);
|
---|
| 812 | j = ftab[s] - 1;
|
---|
| 813 | ftab[s] = j;
|
---|
| 814 | ptr[j] = i;
|
---|
| 815 | }
|
---|
| 816 |
|
---|
| 817 | /*
|
---|
| 818 | * Now ftab contains the first loc of every small bucket.
|
---|
| 819 | * Calculate the running order, from smallest to largest
|
---|
| 820 | * big bucket.
|
---|
| 821 | */
|
---|
| 822 | for (i = 0; i <= 255; i++) {
|
---|
| 823 | bigDone [i] = False;
|
---|
| 824 | runningOrder[i] = i;
|
---|
| 825 | }
|
---|
| 826 |
|
---|
| 827 | {
|
---|
| 828 | int32_t vv;
|
---|
| 829 | /* bbox: was: int32_t h = 1; */
|
---|
| 830 | /* do h = 3 * h + 1; while (h <= 256); */
|
---|
| 831 | uint32_t h = 364;
|
---|
| 832 |
|
---|
| 833 | do {
|
---|
| 834 | /*h = h / 3;*/
|
---|
| 835 | h = (h * 171) >> 9; /* bbox: fast h/3 */
|
---|
| 836 | for (i = h; i <= 255; i++) {
|
---|
| 837 | vv = runningOrder[i];
|
---|
| 838 | j = i;
|
---|
| 839 | while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
|
---|
| 840 | runningOrder[j] = runningOrder[j-h];
|
---|
| 841 | j = j - h;
|
---|
| 842 | if (j <= (h - 1))
|
---|
| 843 | goto zero;
|
---|
| 844 | }
|
---|
| 845 | zero:
|
---|
| 846 | runningOrder[j] = vv;
|
---|
| 847 | }
|
---|
| 848 | } while (h != 1);
|
---|
| 849 | }
|
---|
| 850 |
|
---|
| 851 | /*
|
---|
| 852 | * The main sorting loop.
|
---|
| 853 | */
|
---|
| 854 |
|
---|
| 855 | numQSorted = 0;
|
---|
| 856 |
|
---|
| 857 | for (i = 0; i <= 255; i++) {
|
---|
| 858 |
|
---|
| 859 | /*
|
---|
| 860 | * Process big buckets, starting with the least full.
|
---|
| 861 | * Basically this is a 3-step process in which we call
|
---|
| 862 | * mainQSort3 to sort the small buckets [ss, j], but
|
---|
| 863 | * also make a big effort to avoid the calls if we can.
|
---|
| 864 | */
|
---|
| 865 | ss = runningOrder[i];
|
---|
| 866 |
|
---|
| 867 | /*
|
---|
| 868 | * Step 1:
|
---|
| 869 | * Complete the big bucket [ss] by quicksorting
|
---|
| 870 | * any unsorted small buckets [ss, j], for j != ss.
|
---|
| 871 | * Hopefully previous pointer-scanning phases have already
|
---|
| 872 | * completed many of the small buckets [ss, j], so
|
---|
| 873 | * we don't have to sort them at all.
|
---|
| 874 | */
|
---|
| 875 | for (j = 0; j <= 255; j++) {
|
---|
| 876 | if (j != ss) {
|
---|
| 877 | sb = (ss << 8) + j;
|
---|
| 878 | if (!(ftab[sb] & SETMASK)) {
|
---|
| 879 | int32_t lo = ftab[sb] & CLEARMASK;
|
---|
| 880 | int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
|
---|
| 881 | if (hi > lo) {
|
---|
| 882 | mainQSort3(
|
---|
| 883 | ptr, block, quadrant, nblock,
|
---|
| 884 | lo, hi, BZ_N_RADIX, budget
|
---|
| 885 | );
|
---|
| 886 | if (*budget < 0) return;
|
---|
| 887 | numQSorted += (hi - lo + 1);
|
---|
| 888 | }
|
---|
| 889 | }
|
---|
| 890 | ftab[sb] |= SETMASK;
|
---|
| 891 | }
|
---|
| 892 | }
|
---|
| 893 |
|
---|
| 894 | AssertH(!bigDone[ss], 1006);
|
---|
| 895 |
|
---|
| 896 | /*
|
---|
| 897 | * Step 2:
|
---|
| 898 | * Now scan this big bucket [ss] so as to synthesise the
|
---|
| 899 | * sorted order for small buckets [t, ss] for all t,
|
---|
| 900 | * including, magically, the bucket [ss,ss] too.
|
---|
| 901 | * This will avoid doing Real Work in subsequent Step 1's.
|
---|
| 902 | */
|
---|
| 903 | {
|
---|
| 904 | for (j = 0; j <= 255; j++) {
|
---|
| 905 | copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
|
---|
| 906 | copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
|
---|
| 907 | }
|
---|
| 908 | for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
|
---|
| 909 | k = ptr[j] - 1;
|
---|
| 910 | if (k < 0)
|
---|
| 911 | k += nblock;
|
---|
| 912 | c1 = block[k];
|
---|
| 913 | if (!bigDone[c1])
|
---|
| 914 | ptr[copyStart[c1]++] = k;
|
---|
| 915 | }
|
---|
| 916 | for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
|
---|
| 917 | k = ptr[j]-1;
|
---|
| 918 | if (k < 0)
|
---|
| 919 | k += nblock;
|
---|
| 920 | c1 = block[k];
|
---|
| 921 | if (!bigDone[c1])
|
---|
| 922 | ptr[copyEnd[c1]--] = k;
|
---|
| 923 | }
|
---|
| 924 | }
|
---|
| 925 |
|
---|
| 926 | /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
|
---|
| 927 | * Necessity for this case is demonstrated by compressing
|
---|
| 928 | * a sequence of approximately 48.5 million of character
|
---|
| 929 | * 251; 1.0.0/1.0.1 will then die here. */
|
---|
| 930 | AssertH((copyStart[ss]-1 == copyEnd[ss]) \
|
---|
| 931 | || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007);
|
---|
| 932 |
|
---|
| 933 | for (j = 0; j <= 255; j++)
|
---|
| 934 | ftab[(j << 8) + ss] |= SETMASK;
|
---|
| 935 |
|
---|
| 936 | /*
|
---|
| 937 | * Step 3:
|
---|
| 938 | * The [ss] big bucket is now done. Record this fact,
|
---|
| 939 | * and update the quadrant descriptors. Remember to
|
---|
| 940 | * update quadrants in the overshoot area too, if
|
---|
| 941 | * necessary. The "if (i < 255)" test merely skips
|
---|
| 942 | * this updating for the last bucket processed, since
|
---|
| 943 | * updating for the last bucket is pointless.
|
---|
| 944 | *
|
---|
| 945 | * The quadrant array provides a way to incrementally
|
---|
| 946 | * cache sort orderings, as they appear, so as to
|
---|
| 947 | * make subsequent comparisons in fullGtU() complete
|
---|
| 948 | * faster. For repetitive blocks this makes a big
|
---|
| 949 | * difference (but not big enough to be able to avoid
|
---|
| 950 | * the fallback sorting mechanism, exponential radix sort).
|
---|
| 951 | *
|
---|
| 952 | * The precise meaning is: at all times:
|
---|
| 953 | *
|
---|
| 954 | * for 0 <= i < nblock and 0 <= j <= nblock
|
---|
| 955 | *
|
---|
| 956 | * if block[i] != block[j],
|
---|
| 957 | *
|
---|
| 958 | * then the relative values of quadrant[i] and
|
---|
| 959 | * quadrant[j] are meaningless.
|
---|
| 960 | *
|
---|
| 961 | * else {
|
---|
| 962 | * if quadrant[i] < quadrant[j]
|
---|
| 963 | * then the string starting at i lexicographically
|
---|
| 964 | * precedes the string starting at j
|
---|
| 965 | *
|
---|
| 966 | * else if quadrant[i] > quadrant[j]
|
---|
| 967 | * then the string starting at j lexicographically
|
---|
| 968 | * precedes the string starting at i
|
---|
| 969 | *
|
---|
| 970 | * else
|
---|
| 971 | * the relative ordering of the strings starting
|
---|
| 972 | * at i and j has not yet been determined.
|
---|
| 973 | * }
|
---|
| 974 | */
|
---|
| 975 | bigDone[ss] = True;
|
---|
| 976 |
|
---|
| 977 | if (i < 255) {
|
---|
| 978 | int32_t bbStart = ftab[ss << 8] & CLEARMASK;
|
---|
| 979 | int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
|
---|
| 980 | int32_t shifts = 0;
|
---|
| 981 |
|
---|
| 982 | while ((bbSize >> shifts) > 65534) shifts++;
|
---|
| 983 |
|
---|
| 984 | for (j = bbSize-1; j >= 0; j--) {
|
---|
| 985 | int32_t a2update = ptr[bbStart + j];
|
---|
| 986 | uint16_t qVal = (uint16_t)(j >> shifts);
|
---|
| 987 | quadrant[a2update] = qVal;
|
---|
| 988 | if (a2update < BZ_N_OVERSHOOT)
|
---|
| 989 | quadrant[a2update + nblock] = qVal;
|
---|
| 990 | }
|
---|
| 991 | AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
|
---|
| 992 | }
|
---|
| 993 | }
|
---|
| 994 | #undef runningOrder
|
---|
| 995 | #undef copyStart
|
---|
| 996 | #undef copyEnd
|
---|
| 997 | }
|
---|
| 998 |
|
---|
| 999 | #undef BIGFREQ
|
---|
| 1000 | #undef SETMASK
|
---|
| 1001 | #undef CLEARMASK
|
---|
| 1002 |
|
---|
| 1003 |
|
---|
| 1004 | /*---------------------------------------------*/
|
---|
| 1005 | /* Pre:
|
---|
| 1006 | * nblock > 0
|
---|
| 1007 | * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
|
---|
| 1008 | * ((uint8_t*)arr2)[0 .. nblock-1] holds block
|
---|
| 1009 | * arr1 exists for [0 .. nblock-1]
|
---|
| 1010 | *
|
---|
| 1011 | * Post:
|
---|
| 1012 | * ((uint8_t*)arr2) [0 .. nblock-1] holds block
|
---|
| 1013 | * All other areas of block destroyed
|
---|
| 1014 | * ftab[0 .. 65536] destroyed
|
---|
| 1015 | * arr1[0 .. nblock-1] holds sorted order
|
---|
| 1016 | */
|
---|
| 1017 | static NOINLINE
|
---|
| 1018 | void BZ2_blockSort(EState* s)
|
---|
| 1019 | {
|
---|
| 1020 | /* In original bzip2 1.0.4, it's a parameter, but 30
|
---|
| 1021 | * (which was the default) should work ok. */
|
---|
| 1022 | enum { wfact = 30 };
|
---|
| 1023 |
|
---|
| 1024 | uint32_t* ptr = s->ptr;
|
---|
| 1025 | uint8_t* block = s->block;
|
---|
| 1026 | uint32_t* ftab = s->ftab;
|
---|
| 1027 | int32_t nblock = s->nblock;
|
---|
| 1028 | uint16_t* quadrant;
|
---|
| 1029 | int32_t budget;
|
---|
| 1030 | int32_t i;
|
---|
| 1031 |
|
---|
| 1032 | if (nblock < 10000) {
|
---|
| 1033 | fallbackSort(s->arr1, s->arr2, ftab, nblock);
|
---|
| 1034 | } else {
|
---|
| 1035 | /* Calculate the location for quadrant, remembering to get
|
---|
| 1036 | * the alignment right. Assumes that &(block[0]) is at least
|
---|
| 1037 | * 2-byte aligned -- this should be ok since block is really
|
---|
| 1038 | * the first section of arr2.
|
---|
| 1039 | */
|
---|
| 1040 | i = nblock + BZ_N_OVERSHOOT;
|
---|
| 1041 | if (i & 1) i++;
|
---|
| 1042 | quadrant = (uint16_t*)(&(block[i]));
|
---|
| 1043 |
|
---|
| 1044 | /* (wfact-1) / 3 puts the default-factor-30
|
---|
| 1045 | * transition point at very roughly the same place as
|
---|
| 1046 | * with v0.1 and v0.9.0.
|
---|
| 1047 | * Not that it particularly matters any more, since the
|
---|
| 1048 | * resulting compressed stream is now the same regardless
|
---|
| 1049 | * of whether or not we use the main sort or fallback sort.
|
---|
| 1050 | */
|
---|
| 1051 | budget = nblock * ((wfact-1) / 3);
|
---|
| 1052 |
|
---|
| 1053 | mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
|
---|
| 1054 | if (budget < 0) {
|
---|
| 1055 | fallbackSort(s->arr1, s->arr2, ftab, nblock);
|
---|
| 1056 | }
|
---|
| 1057 | }
|
---|
| 1058 |
|
---|
| 1059 | s->origPtr = -1;
|
---|
| 1060 | for (i = 0; i < s->nblock; i++)
|
---|
| 1061 | if (ptr[i] == 0) {
|
---|
| 1062 | s->origPtr = i;
|
---|
| 1063 | break;
|
---|
| 1064 | };
|
---|
| 1065 |
|
---|
| 1066 | AssertH(s->origPtr != -1, 1003);
|
---|
| 1067 | }
|
---|
| 1068 |
|
---|
| 1069 |
|
---|
| 1070 | /*-------------------------------------------------------------*/
|
---|
| 1071 | /*--- end blocksort.c ---*/
|
---|
| 1072 | /*-------------------------------------------------------------*/
|
---|