Changeset 1770 in MondoRescue for branches/stable/mindi-busybox/archival/libunarchive/decompress_bunzip2.c
- Timestamp:
- Nov 6, 2007, 11:01:53 AM (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/stable/mindi-busybox/archival/libunarchive/decompress_bunzip2.c
r821 r1770 29 29 */ 30 30 31 #include <setjmp.h>32 #include <stdio.h>33 #include <stdlib.h>34 #include <string.h>35 #include <unistd.h>36 #include <limits.h>37 38 31 #include "libbb.h" 39 40 32 #include "unarchive.h" 41 33 42 34 /* Constants for Huffman coding */ 43 #define MAX_GROUPS 44 #define GROUP_SIZE 50/* 64 would have been more efficient */45 #define MAX_HUFCODE_BITS 20/* Longest Huffman code allowed */46 #define MAX_SYMBOLS 258/* 256 literals + RUNA + RUNB */47 #define SYMBOL_RUNA 48 #define SYMBOL_RUNB 35 #define MAX_GROUPS 6 36 #define GROUP_SIZE 50 /* 64 would have been more efficient */ 37 #define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ 38 #define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ 39 #define SYMBOL_RUNA 0 40 #define SYMBOL_RUNB 1 49 41 50 42 /* Status return values */ 51 #define RETVAL_OK 52 #define RETVAL_LAST_BLOCK 53 #define RETVAL_NOT_BZIP_DATA 54 #define RETVAL_UNEXPECTED_INPUT_EOF 55 #define RETVAL_UNEXPECTED_OUTPUT_EOF 56 #define RETVAL_DATA_ERROR 57 #define RETVAL_OUT_OF_MEMORY 58 #define RETVAL_OBSOLETE_INPUT 43 #define RETVAL_OK 0 44 #define RETVAL_LAST_BLOCK (-1) 45 #define RETVAL_NOT_BZIP_DATA (-2) 46 #define RETVAL_UNEXPECTED_INPUT_EOF (-3) 47 #define RETVAL_UNEXPECTED_OUTPUT_EOF (-4) 48 #define RETVAL_DATA_ERROR (-5) 49 #define RETVAL_OUT_OF_MEMORY (-6) 50 #define RETVAL_OBSOLETE_INPUT (-7) 59 51 60 52 /* Other housekeeping constants */ 61 #define IOBUF_SIZE 53 #define IOBUF_SIZE 4096 62 54 63 55 /* This is what we know about each Huffman coding group */ 64 56 struct group_data { 65 57 /* We have an extra slot at the end of limit[] for a sentinal value. */ 66 int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS],permute[MAX_SYMBOLS];58 int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS]; 67 59 int minLen, maxLen; 68 60 }; … … 71 63 memory that persists between calls to bunzip */ 72 64 73 typedef struct{65 struct bunzip_data { 74 66 /* State for interrupting output loop */ 75 76 int writeCopies,writePos,writeRunCountdown,writeCount,writeCurrent; 67 int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; 77 68 78 69 /* I/O tracking data (file handles, buffers, positions, etc.) */ 79 80 int in_fd,out_fd,inbufCount,inbufPos /*,outbufPos*/; 70 int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; 81 71 unsigned char *inbuf /*,*outbuf*/; 82 unsigned in t inbufBitCount, inbufBits;72 unsigned inbufBitCount, inbufBits; 83 73 84 74 /* The CRC values stored in the block header and calculated from the data */ 85 86 75 uint32_t headerCRC, totalCRC, writeCRC; 87 uint32_t *crc32Table; 76 88 77 /* Intermediate buffer and its size (in bytes) */ 89 90 unsigned int *dbuf, dbufSize; 91 92 /* These things are a bit too big to go on the stack */ 93 78 unsigned *dbuf, dbufSize; 79 80 /* For I/O error handling */ 81 jmp_buf jmpbuf; 82 83 /* Big things go last (register-relative addressing can be larger for big offsets */ 84 uint32_t crc32Table[256]; 94 85 unsigned char selectors[32768]; /* nSelectors=15 bits */ 95 86 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ 96 97 /* For I/O error handling */ 98 99 jmp_buf jmpbuf; 100 } bunzip_data; 87 }; 88 /* typedef struct bunzip_data bunzip_data; -- done in .h file */ 89 101 90 102 91 /* Return the next nnn bits of input. All reads from the compressed input 103 92 are done through this function. All reads are big endian */ 104 93 105 static unsigned intget_bits(bunzip_data *bd, char bits_wanted)94 static unsigned get_bits(bunzip_data *bd, char bits_wanted) 106 95 { 107 unsigned int bits=0;96 unsigned bits = 0; 108 97 109 98 /* If we need to get more data from the byte buffer, do so. (Loop getting 110 99 one byte at a time to enforce endianness and avoid unaligned access.) */ 111 100 112 while (bd->inbufBitCount <bits_wanted) {101 while (bd->inbufBitCount < bits_wanted) { 113 102 114 103 /* If we need to read more data from file into byte buffer, do so */ 115 104 116 if(bd->inbufPos==bd->inbufCount) { 117 if((bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE)) <= 0) 118 longjmp(bd->jmpbuf,RETVAL_UNEXPECTED_INPUT_EOF); 119 bd->inbufPos=0; 105 if (bd->inbufPos == bd->inbufCount) { 106 /* if "no input fd" case: in_fd == -1, read fails, we jump */ 107 bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE); 108 if (bd->inbufCount <= 0) 109 longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF); 110 bd->inbufPos = 0; 120 111 } 121 112 122 113 /* Avoid 32-bit overflow (dump bit buffer to top of output) */ 123 114 124 if (bd->inbufBitCount>=24) {125 bits =bd->inbufBits&((1<<bd->inbufBitCount)-1);126 bits_wanted -=bd->inbufBitCount;127 bits <<=bits_wanted;128 bd->inbufBitCount =0;115 if (bd->inbufBitCount >= 24) { 116 bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); 117 bits_wanted -= bd->inbufBitCount; 118 bits <<= bits_wanted; 119 bd->inbufBitCount = 0; 129 120 } 130 121 131 122 /* Grab next 8 bits of input from buffer. */ 132 123 133 bd->inbufBits =(bd->inbufBits<<8)|bd->inbuf[bd->inbufPos++];134 bd->inbufBitCount +=8;124 bd->inbufBits = (bd->inbufBits<<8) | bd->inbuf[bd->inbufPos++]; 125 bd->inbufBitCount += 8; 135 126 } 136 127 137 128 /* Calculate result */ 138 129 139 bd->inbufBitCount -=bits_wanted;140 bits |=(bd->inbufBits>>bd->inbufBitCount)&((1<<bits_wanted)-1);130 bd->inbufBitCount -= bits_wanted; 131 bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1); 141 132 142 133 return bits; … … 148 139 { 149 140 struct group_data *hufGroup; 150 int dbufCount, nextSym,dbufSize,groupCount,*base,*limit,selector,151 i, j,k,t,runPos,symCount,symTotal,nSelectors,byteCount[256];141 int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, 142 i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; 152 143 unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; 153 unsigned int *dbuf,origPtr;154 155 dbuf =bd->dbuf;156 dbufSize =bd->dbufSize;157 selectors =bd->selectors;144 unsigned *dbuf, origPtr; 145 146 dbuf = bd->dbuf; 147 dbufSize = bd->dbufSize; 148 selectors = bd->selectors; 158 149 159 150 /* Reset longjmp I/O error handling */ 160 151 161 i =setjmp(bd->jmpbuf);162 if (i) return i;152 i = setjmp(bd->jmpbuf); 153 if (i) return i; 163 154 164 155 /* Read in header signature and CRC, then validate signature. 165 156 (last block signature means CRC is for whole file, return now) */ 166 157 167 i = get_bits(bd, 24);168 j = get_bits(bd, 24);169 bd->headerCRC =get_bits(bd,32);158 i = get_bits(bd, 24); 159 j = get_bits(bd, 24); 160 bd->headerCRC = get_bits(bd, 32); 170 161 if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK; 171 162 if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA; … … 175 166 it didn't actually work. */ 176 167 177 if(get_bits(bd,1)) return RETVAL_OBSOLETE_INPUT; 178 if((origPtr=get_bits(bd,24)) > dbufSize) return RETVAL_DATA_ERROR; 168 if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; 169 origPtr = get_bits(bd, 24); 170 if (origPtr > dbufSize) return RETVAL_DATA_ERROR; 179 171 180 172 /* mapping table: if some byte values are never used (encoding things … … 184 176 back to the corresponding bytes. */ 185 177 186 t=get_bits(bd, 16); 187 symTotal=0; 188 for (i=0;i<16;i++) { 189 if(t&(1<<(15-i))) { 190 k=get_bits(bd,16); 191 for(j=0;j<16;j++) 192 if(k&(1<<(15-j))) symToByte[symTotal++]=(16*i)+j; 178 t = get_bits(bd, 16); 179 symTotal = 0; 180 for (i = 0; i < 16; i++) { 181 if (t & (1 << (15-i))) { 182 k = get_bits(bd, 16); 183 for (j = 0; j < 16; j++) 184 if (k & (1 << (15-j))) 185 symToByte[symTotal++] = (16*i) + j; 193 186 } 194 187 } … … 196 189 /* How many different Huffman coding groups does this block use? */ 197 190 198 groupCount=get_bits(bd,3); 199 if (groupCount<2 || groupCount>MAX_GROUPS) return RETVAL_DATA_ERROR; 191 groupCount = get_bits(bd, 3); 192 if (groupCount < 2 || groupCount > MAX_GROUPS) 193 return RETVAL_DATA_ERROR; 200 194 201 195 /* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding … … 204 198 start of the list.) */ 205 199 206 if(!(nSelectors=get_bits(bd, 15))) return RETVAL_DATA_ERROR; 207 for(i=0; i<groupCount; i++) mtfSymbol[i] = i; 208 for(i=0; i<nSelectors; i++) { 200 nSelectors = get_bits(bd, 15); 201 if (!nSelectors) return RETVAL_DATA_ERROR; 202 for (i = 0; i < groupCount; i++) mtfSymbol[i] = i; 203 for (i = 0; i < nSelectors; i++) { 209 204 210 205 /* Get next value */ 211 206 212 for(j=0;get_bits(bd,1);j++) if (j>=groupCount) return RETVAL_DATA_ERROR; 207 for (j = 0; get_bits(bd, 1); j++) 208 if (j>=groupCount) return RETVAL_DATA_ERROR; 213 209 214 210 /* Decode MTF to get the next selector */ 215 211 216 212 uc = mtfSymbol[j]; 217 for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1];218 mtfSymbol[0] =selectors[i]=uc;213 for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; 214 mtfSymbol[0] = selectors[i] = uc; 219 215 } 220 216 … … 222 218 literal symbols, plus two run symbols (RUNA, RUNB) */ 223 219 224 symCount =symTotal+2;225 for (j =0; j<groupCount; j++) {226 unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];227 int minLen,maxLen, pp;220 symCount = symTotal + 2; 221 for (j = 0; j < groupCount; j++) { 222 unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; 223 int minLen, maxLen, pp; 228 224 229 225 /* Read Huffman code lengths for each symbol. They're stored in … … 234 230 length 0 becomes negative, so an unsigned inequality catches it.) */ 235 231 236 t =get_bits(bd, 5)-1;232 t = get_bits(bd, 5) - 1; 237 233 for (i = 0; i < symCount; i++) { 238 for (;;) {239 if (( (unsigned)t)> (MAX_HUFCODE_BITS-1))234 for (;;) { 235 if ((unsigned)t > (MAX_HUFCODE_BITS-1)) 240 236 return RETVAL_DATA_ERROR; 241 237 … … 244 240 bits and unget the second if the first was 0. */ 245 241 246 k = get_bits(bd, 2);242 k = get_bits(bd, 2); 247 243 if (k < 2) { 248 244 bd->inbufBitCount++; … … 252 248 /* Add one if second bit 1, else subtract 1. Avoids if/else */ 253 249 254 t +=(((k+1)&2)-1);250 t += (((k+1) & 2) - 1); 255 251 } 256 252 257 253 /* Correct for the initial -1, to get the final symbol length */ 258 254 259 length[i] =t+1;255 length[i] = t + 1; 260 256 } 261 257 262 258 /* Find largest and smallest lengths in this group */ 263 259 264 minLen =maxLen=length[0];265 for (i = 1; i < symCount; i++) {266 if (length[i] > maxLen) maxLen = length[i];267 else if (length[i] < minLen) minLen = length[i];260 minLen = maxLen = length[0]; 261 for (i = 1; i < symCount; i++) { 262 if (length[i] > maxLen) maxLen = length[i]; 263 else if (length[i] < minLen) minLen = length[i]; 268 264 } 269 265 … … 279 275 */ 280 276 281 hufGroup =bd->groups+j;277 hufGroup = bd->groups + j; 282 278 hufGroup->minLen = minLen; 283 279 hufGroup->maxLen = maxLen; … … 287 283 entry. We do this again when using them (during symbol decoding).*/ 288 284 289 base =hufGroup->base-1;290 limit =hufGroup->limit-1;285 base = hufGroup->base - 1; 286 limit = hufGroup->limit - 1; 291 287 292 288 /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ 293 289 294 pp=0; 295 for(i=minLen;i<=maxLen;i++) { 296 temp[i]=limit[i]=0; 297 for(t=0;t<symCount;t++) 298 if(length[t]==i) hufGroup->permute[pp++] = t; 290 pp = 0; 291 for (i = minLen; i <= maxLen; i++) { 292 temp[i] = limit[i] = 0; 293 for (t = 0; t < symCount; t++) 294 if (length[t] == i) 295 hufGroup->permute[pp++] = t; 299 296 } 300 297 301 298 /* Count symbols coded for at each bit length */ 302 299 303 for (i =0;i<symCount;i++) temp[length[i]]++;300 for (i = 0; i < symCount; i++) temp[length[i]]++; 304 301 305 302 /* Calculate limit[] (the largest symbol-coding value at each bit … … 308 305 * limit minus the cumulative count of symbols coded for already). */ 309 306 310 pp =t=0;311 for (i =minLen; i<maxLen; i++) {312 pp +=temp[i];307 pp = t = 0; 308 for (i = minLen; i < maxLen; i++) { 309 pp += temp[i]; 313 310 314 311 /* We read the largest possible symbol size and then unget bits … … 319 316 don't affect the value>limit[length] comparison. */ 320 317 321 limit[i]= (pp << (maxLen - i)) - 1; 322 pp<<=1; 323 base[i+1]=pp-(t+=temp[i]); 318 limit[i] = (pp << (maxLen - i)) - 1; 319 pp <<= 1; 320 t += temp[i]; 321 base[i+1] = pp - t; 324 322 } 325 323 limit[maxLen+1] = INT_MAX; /* Sentinal value for reading next sym. */ 326 limit[maxLen] =pp+temp[maxLen]-1;327 base[minLen] =0;324 limit[maxLen] = pp + temp[maxLen] - 1; 325 base[minLen] = 0; 328 326 } 329 327 … … 334 332 /* Initialize symbol occurrence counters and symbol Move To Front table */ 335 333 336 for (i=0;i<256;i++) {334 for (i = 0; i < 256; i++) { 337 335 byteCount[i] = 0; 338 mtfSymbol[i] =(unsigned char)i;336 mtfSymbol[i] = (unsigned char)i; 339 337 } 340 338 341 339 /* Loop through compressed symbols. */ 342 340 343 runPos =dbufCount=selector=0;344 for (;;) {341 runPos = dbufCount = selector = 0; 342 for (;;) { 345 343 346 344 /* fetch next Huffman coding group from list. */ 347 345 348 symCount =GROUP_SIZE-1;349 if (selector>=nSelectors) return RETVAL_DATA_ERROR;350 hufGroup =bd->groups+selectors[selector++];351 base =hufGroup->base-1;352 limit =hufGroup->limit-1;353 continue_this_group:346 symCount = GROUP_SIZE - 1; 347 if (selector >= nSelectors) return RETVAL_DATA_ERROR; 348 hufGroup = bd->groups + selectors[selector++]; 349 base = hufGroup->base - 1; 350 limit = hufGroup->limit - 1; 351 continue_this_group: 354 352 355 353 /* Read next Huffman-coded symbol. */ … … 362 360 inline (falling back to a call to get_bits if the buffer runs 363 361 dry). The following (up to got_huff_bits:) is equivalent to 364 j =get_bits(bd,hufGroup->maxLen);362 j = get_bits(bd, hufGroup->maxLen); 365 363 */ 366 364 367 while (bd->inbufBitCount <hufGroup->maxLen) {368 if (bd->inbufPos==bd->inbufCount) {369 j = get_bits(bd, hufGroup->maxLen);365 while (bd->inbufBitCount < hufGroup->maxLen) { 366 if (bd->inbufPos == bd->inbufCount) { 367 j = get_bits(bd, hufGroup->maxLen); 370 368 goto got_huff_bits; 371 369 } 372 bd->inbufBits =(bd->inbufBits<<8)|bd->inbuf[bd->inbufPos++];373 bd->inbufBitCount +=8;370 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; 371 bd->inbufBitCount += 8; 374 372 }; 375 bd->inbufBitCount -=hufGroup->maxLen;376 j = (bd->inbufBits >>bd->inbufBitCount)&((1<<hufGroup->maxLen)-1);377 378 got_huff_bits:373 bd->inbufBitCount -= hufGroup->maxLen; 374 j = (bd->inbufBits >> bd->inbufBitCount) & ((1 << hufGroup->maxLen) - 1); 375 376 got_huff_bits: 379 377 380 378 /* Figure how how many bits are in next symbol and unget extras */ 381 379 382 i =hufGroup->minLen;383 while (j>limit[i]) ++i;380 i = hufGroup->minLen; 381 while (j > limit[i]) ++i; 384 382 bd->inbufBitCount += (hufGroup->maxLen - i); 385 383 386 384 /* Huffman decode value to get nextSym (with bounds checking) */ 387 385 388 if ((i > hufGroup->maxLen) 389 || (((unsigned)(j=(j>>(hufGroup->maxLen-i))-base[i])) 390 >= MAX_SYMBOLS)) 386 if (i > hufGroup->maxLen) 387 return RETVAL_DATA_ERROR; 388 j = (j >> (hufGroup->maxLen - i)) - base[i]; 389 if ((unsigned)j >= MAX_SYMBOLS) 391 390 return RETVAL_DATA_ERROR; 392 391 nextSym = hufGroup->permute[j]; … … 397 396 how many times to repeat the last literal. */ 398 397 399 if (( (unsigned)nextSym)<= SYMBOL_RUNB) { /* RUNA or RUNB */398 if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ 400 399 401 400 /* If this is the start of a new run, zero out counter */ 402 401 403 if (!runPos) {402 if (!runPos) { 404 403 runPos = 1; 405 404 t = 0; … … 415 414 416 415 t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ 417 if (runPos < dbufSize) runPos <<= 1;416 if (runPos < dbufSize) runPos <<= 1; 418 417 goto end_of_huffman_loop; 419 418 } … … 424 423 literal used is the one at the head of the mtfSymbol array.) */ 425 424 426 if (runPos) {427 runPos =0;428 if (dbufCount+t>=dbufSize) return RETVAL_DATA_ERROR;425 if (runPos) { 426 runPos = 0; 427 if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR; 429 428 430 429 uc = symToByte[mtfSymbol[0]]; 431 430 byteCount[uc] += t; 432 while (t--) dbuf[dbufCount++]=uc;431 while (t--) dbuf[dbufCount++] = uc; 433 432 } 434 433 435 434 /* Is this the terminating symbol? */ 436 435 437 if (nextSym>symTotal) break;436 if (nextSym > symTotal) break; 438 437 439 438 /* At this point, nextSym indicates a new literal character. Subtract … … 445 444 2 non-literal nextSym values equals -1.) */ 446 445 447 if (dbufCount>=dbufSize) return RETVAL_DATA_ERROR;446 if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR; 448 447 i = nextSym - 1; 449 448 uc = mtfSymbol[i]; … … 458 457 } while (--i); 459 458 mtfSymbol[0] = uc; 460 uc =symToByte[uc];459 uc = symToByte[uc]; 461 460 462 461 /* We have our literal byte. Save it into dbuf. */ 463 462 464 463 byteCount[uc]++; 465 dbuf[dbufCount++] = (unsigned int)uc;464 dbuf[dbufCount++] = (unsigned)uc; 466 465 467 466 /* Skip group initialization if we're not done with this group. Done 468 467 * this way to avoid compiler warning. */ 469 468 470 end_of_huffman_loop:471 if (symCount--) goto continue_this_group;469 end_of_huffman_loop: 470 if (symCount--) goto continue_this_group; 472 471 } 473 472 474 473 /* At this point, we've read all the Huffman-coded symbols (and repeated 475 474 runs) for this block from the input stream, and decoded them into the 476 475 intermediate buffer. There are dbufCount many decoded bytes in dbuf[]. 477 476 Now undo the Burrows-Wheeler transform on dbuf. … … 481 480 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ 482 481 483 j =0;484 for (i=0;i<256;i++) {485 k =j+byteCount[i];482 j = 0; 483 for (i = 0; i < 256; i++) { 484 k = j + byteCount[i]; 486 485 byteCount[i] = j; 487 j =k;486 j = k; 488 487 } 489 488 490 489 /* Figure out what order dbuf would be in if we sorted it. */ 491 490 492 for (i =0;i<dbufCount;i++) {493 uc =(unsigned char)(dbuf[i] & 0xff);491 for (i = 0; i < dbufCount; i++) { 492 uc = (unsigned char)(dbuf[i] & 0xff); 494 493 dbuf[byteCount[uc]] |= (i << 8); 495 494 byteCount[uc]++; … … 500 499 it doesn't qualify as a run (hence writeRunCountdown=5). */ 501 500 502 if (dbufCount) {503 if (origPtr>=dbufCount) return RETVAL_DATA_ERROR;504 bd->writePos =dbuf[origPtr];505 bd->writeCurrent =(unsigned char)(bd->writePos&0xff);506 bd->writePos >>=8;507 bd->writeRunCountdown =5;508 } 509 bd->writeCount =dbufCount;501 if (dbufCount) { 502 if (origPtr >= dbufCount) return RETVAL_DATA_ERROR; 503 bd->writePos = dbuf[origPtr]; 504 bd->writeCurrent = (unsigned char)(bd->writePos & 0xff); 505 bd->writePos >>= 8; 506 bd->writeRunCountdown = 5; 507 } 508 bd->writeCount = dbufCount; 510 509 511 510 return RETVAL_OK; … … 519 518 */ 520 519 521 staticint read_bunzip(bunzip_data *bd, char *outbuf, int len)520 int read_bunzip(bunzip_data *bd, char *outbuf, int len) 522 521 { 523 const unsigned int*dbuf;524 int pos, current,previous,gotcount;522 const unsigned *dbuf; 523 int pos, current, previous, gotcount; 525 524 526 525 /* If last read was short due to end of file, return last block now */ 527 if (bd->writeCount<0) return bd->writeCount;526 if (bd->writeCount < 0) return bd->writeCount; 528 527 529 528 gotcount = 0; 530 dbuf =bd->dbuf;531 pos =bd->writePos;532 current =bd->writeCurrent;529 dbuf = bd->dbuf; 530 pos = bd->writePos; 531 current = bd->writeCurrent; 533 532 534 533 /* We will always have pending decoded data to write into the output … … 544 543 /* Loop outputting bytes */ 545 544 546 for (;;) {545 for (;;) { 547 546 548 547 /* If the output buffer is full, snapshot state and return */ 549 548 550 if (gotcount >= len) {551 bd->writePos =pos;552 bd->writeCurrent =current;549 if (gotcount >= len) { 550 bd->writePos =pos; 551 bd->writeCurrent = current; 553 552 bd->writeCopies++; 554 553 return len; … … 558 557 559 558 outbuf[gotcount++] = current; 560 bd->writeCRC =(((bd->writeCRC)<<8)561 ^ bd->crc32Table[((bd->writeCRC)>>24)^current]);559 bd->writeCRC = (bd->writeCRC << 8) 560 ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current]; 562 561 563 562 /* Loop now if we're outputting multiple copies of this byte */ … … 567 566 continue; 568 567 } 569 decode_next_byte:568 decode_next_byte: 570 569 if (!bd->writeCount--) break; 571 570 /* Follow sequence vector to undo Burrows-Wheeler transform */ 572 previous =current;573 pos =dbuf[pos];574 current =pos&0xff;575 pos >>=8;571 previous = current; 572 pos = dbuf[pos]; 573 current = pos & 0xff; 574 pos >>= 8; 576 575 577 576 /* After 3 consecutive copies of the same byte, the 4th is a repeat … … 579 578 * of counting up because testing for non-zero is faster */ 580 579 581 if(--bd->writeRunCountdown) { 582 if(current!=previous) bd->writeRunCountdown=4; 580 if (--bd->writeRunCountdown) { 581 if (current != previous) 582 bd->writeRunCountdown = 4; 583 583 } else { 584 584 585 585 /* We have a repeated run, this byte indicates the count */ 586 586 587 bd->writeCopies =current;588 current =previous;589 bd->writeRunCountdown =5;587 bd->writeCopies = current; 588 current = previous; 589 bd->writeRunCountdown = 5; 590 590 591 591 /* Sometimes there are just 3 bytes (run length 0) */ 592 592 593 if (!bd->writeCopies) goto decode_next_byte;593 if (!bd->writeCopies) goto decode_next_byte; 594 594 595 595 /* Subtract the 1 copy we'd output anyway to get extras */ … … 601 601 /* Decompression of this block completed successfully */ 602 602 603 bd->writeCRC =~bd->writeCRC;604 bd->totalCRC =((bd->totalCRC<<1) | (bd->totalCRC>>31)) ^ bd->writeCRC;603 bd->writeCRC = ~bd->writeCRC; 604 bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; 605 605 606 606 /* If this block had a CRC error, force file level CRC error. */ 607 607 608 if (bd->writeCRC!=bd->headerCRC) {609 bd->totalCRC =bd->headerCRC+1;608 if (bd->writeCRC != bd->headerCRC) { 609 bd->totalCRC = bd->headerCRC+1; 610 610 return RETVAL_LAST_BLOCK; 611 611 } … … 615 615 /* (previous is just a convenient unused temp variable here) */ 616 616 617 previous =get_next_block(bd);618 if (previous) {619 bd->writeCount =previous;620 return (previous !=RETVAL_LAST_BLOCK) ? previous : gotcount;621 } 622 bd->writeCRC =~0;623 pos =bd->writePos;624 current =bd->writeCurrent;617 previous = get_next_block(bd); 618 if (previous) { 619 bd->writeCount = previous; 620 return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; 621 } 622 bd->writeCRC = ~0; 623 pos = bd->writePos; 624 current = bd->writeCurrent; 625 625 goto decode_next_byte; 626 626 } 627 627 628 628 629 /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain … … 630 631 ignored, and data is read from file handle into temporary buffer. */ 631 632 632 static int start_bunzip(bunzip_data **bdp, int in_fd, unsigned char *inbuf, 633 /* Because bunzip2 is used for help text unpacking, and because bb_show_usage() 634 should work for NOFORK applets too, we must be extremely careful to not leak 635 any allocations! */ 636 637 int start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, 633 638 int len) 634 639 { 635 640 bunzip_data *bd; 636 unsigned int i; 637 const unsigned int BZh0=(((unsigned int)'B')<<24)+(((unsigned int)'Z')<<16) 638 +(((unsigned int)'h')<<8)+(unsigned int)'0'; 641 unsigned i; 642 enum { 643 BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0' 644 }; 639 645 640 646 /* Figure out how much data to allocate */ 641 647 642 i =sizeof(bunzip_data);643 if (in_fd!=-1) i+=IOBUF_SIZE;648 i = sizeof(bunzip_data); 649 if (in_fd != -1) i += IOBUF_SIZE; 644 650 645 651 /* Allocate bunzip_data. Most fields initialize to zero. */ 646 652 647 bd =*bdp=xzalloc(i);653 bd = *bdp = xzalloc(i); 648 654 649 655 /* Setup input buffer */ 650 656 651 if(-1==(bd->in_fd=in_fd)) { 652 bd->inbuf=inbuf; 653 bd->inbufCount=len; 654 } else bd->inbuf=(unsigned char *)(bd+1); 657 bd->in_fd = in_fd; 658 if (-1 == in_fd) { 659 /* in this case, bd->inbuf is read-only */ 660 bd->inbuf = (void*)inbuf; /* cast away const-ness */ 661 bd->inbufCount = len; 662 } else 663 bd->inbuf = (unsigned char *)(bd + 1); 655 664 656 665 /* Init the CRC32 table (big endian) */ 657 666 658 bd->crc32Table = bb_crc32_filltable(1);667 crc32_filltable(bd->crc32Table, 1); 659 668 660 669 /* Setup for I/O error handling via longjmp */ 661 670 662 i =setjmp(bd->jmpbuf);663 if (i) return i;671 i = setjmp(bd->jmpbuf); 672 if (i) return i; 664 673 665 674 /* Ensure that file starts with "BZh['1'-'9']." */ 666 675 667 i = get_bits(bd, 32);668 if (( (unsigned int)(i-BZh0-1)) >= 9) return RETVAL_NOT_BZIP_DATA;676 i = get_bits(bd, 32); 677 if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; 669 678 670 679 /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of 671 680 uncompressed data. Allocate intermediate buffer for block. */ 672 681 673 bd->dbufSize=100000*(i-BZh0); 674 675 bd->dbuf=xmalloc(bd->dbufSize * sizeof(int)); 682 bd->dbufSize = 100000 * (i - BZh0); 683 684 /* Cannot use xmalloc - may leak bd in NOFORK case! */ 685 bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(int)); 686 if (!bd->dbuf) { 687 free(bd); 688 xfunc_die(); 689 } 676 690 return RETVAL_OK; 677 691 } 678 692 679 /* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip data, 680 not end of file.) */ 681 682 int uncompressStream(int src_fd, int dst_fd) 693 void dealloc_bunzip(bunzip_data *bd) 683 694 { 695 free(bd->dbuf); 696 free(bd); 697 } 698 699 700 /* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ 701 702 USE_DESKTOP(long long) int 703 unpack_bz2_stream(int src_fd, int dst_fd) 704 { 705 USE_DESKTOP(long long total_written = 0;) 684 706 char *outbuf; 685 707 bunzip_data *bd; 686 708 int i; 687 709 688 outbuf=xmalloc(IOBUF_SIZE); 689 if(!(i=start_bunzip(&bd,src_fd,0,0))) { 690 for(;;) { 691 if((i=read_bunzip(bd,outbuf,IOBUF_SIZE)) <= 0) break; 692 if(i!=write(dst_fd,outbuf,i)) { 693 i=RETVAL_UNEXPECTED_OUTPUT_EOF; 710 outbuf = xmalloc(IOBUF_SIZE); 711 i = start_bunzip(&bd, src_fd, NULL, 0); 712 if (!i) { 713 for (;;) { 714 i = read_bunzip(bd, outbuf, IOBUF_SIZE); 715 if (i <= 0) break; 716 if (i != safe_write(dst_fd, outbuf, i)) { 717 i = RETVAL_UNEXPECTED_OUTPUT_EOF; 694 718 break; 695 719 } 720 USE_DESKTOP(total_written += i;) 696 721 } 697 722 } … … 699 724 /* Check CRC and release memory */ 700 725 701 if (i==RETVAL_LAST_BLOCK) {702 if (bd->headerCRC !=bd->totalCRC) {703 bb_error_msg(" Data integrity error when decompressing.");726 if (i == RETVAL_LAST_BLOCK) { 727 if (bd->headerCRC != bd->totalCRC) { 728 bb_error_msg("data integrity error when decompressing"); 704 729 } else { 705 i =RETVAL_OK;706 } 707 } else if (i ==RETVAL_UNEXPECTED_OUTPUT_EOF) {708 bb_error_msg(" Compressed file ends unexpectedly");730 i = RETVAL_OK; 731 } 732 } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { 733 bb_error_msg("compressed file ends unexpectedly"); 709 734 } else { 710 bb_error_msg("Decompression failed"); 711 } 712 free(bd->dbuf); 713 free(bd); 735 bb_error_msg("decompression failed"); 736 } 737 dealloc_bunzip(bd); 714 738 free(outbuf); 715 739 716 return i ;740 return i ? i : USE_DESKTOP(total_written) + 0; 717 741 } 718 742 719 743 #ifdef TESTING 720 744 721 static char * const bunzip_errors[]={NULL,"Bad file checksum","Not bzip data", 722 "Unexpected input EOF","Unexpected output EOF","Data error", 723 "Out of memory","Obsolete (pre 0.9.5) bzip format not supported."}; 745 static char *const bunzip_errors[] = { 746 NULL, "Bad file checksum", "Not bzip data", 747 "Unexpected input EOF", "Unexpected output EOF", "Data error", 748 "Out of memory", "Obsolete (pre 0.9.5) bzip format not supported" 749 }; 724 750 725 751 /* Dumb little test thing, decompress stdin to stdout */ 726 int main(int argc, char * argv[])752 int main(int argc, char **argv) 727 753 { 728 int i =uncompressStream(0,1);754 int i = unpack_bz2_stream(0, 1); 729 755 char c; 730 756 731 if(i) fprintf(stderr,"%s\n", bunzip_errors[-i]); 732 else if(read(0,&c,1)) fprintf(stderr,"Trailing garbage ignored\n"); 757 if (i < 0) 758 fprintf(stderr,"%s\n", bunzip_errors[-i]); 759 else if (read(0, &c, 1)) 760 fprintf(stderr,"Trailing garbage ignored\n"); 733 761 return -i; 734 762 }
Note:
See TracChangeset
for help on using the changeset viewer.