Changeset 3232 in MondoRescue for branches/3.2/mindi-busybox/libbb/hash_md5_sha.c
- Timestamp:
- Jan 1, 2014, 12:47:38 AM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/3.2/mindi-busybox/libbb/hash_md5_sha.c
r2725 r3232 32 32 } 33 33 34 /* rotl64 only used for sha3 currently */ 35 static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n) 36 { 37 return (x << n) | (x >> (64 - n)); 38 } 34 39 35 40 /* Feed data through a temporary buffer. … … 52 57 buffer = (const char *)buffer + remaining; 53 58 bufpos += remaining; 54 /* clever way to do "if (bufpos != 64) break; ... ; bufpos = 0;" */59 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */ 55 60 bufpos -= 64; 56 61 if (bufpos != 0) … … 105 110 106 111 /* 0: fastest, 3: smallest */ 107 #if CONFIG_MD5_S IZE_VS_SPEED< 0108 # define MD5_S IZE_VS_SPEED0109 #elif CONFIG_MD5_S IZE_VS_SPEED> 3110 # define MD5_S IZE_VS_SPEED3112 #if CONFIG_MD5_SMALL < 0 113 # define MD5_SMALL 0 114 #elif CONFIG_MD5_SMALL > 3 115 # define MD5_SMALL 3 111 116 #else 112 # define MD5_S IZE_VS_SPEED CONFIG_MD5_SIZE_VS_SPEED117 # define MD5_SMALL CONFIG_MD5_SMALL 113 118 #endif 114 119 … … 130 135 static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) 131 136 { 132 #if MD5_S IZE_VS_SPEED> 0137 #if MD5_SMALL > 0 133 138 /* Before we start, one word to the strange constants. 134 139 They are defined in RFC 1321 as … … 158 163 }; 159 164 static const char P_array[] ALIGN1 = { 160 # if MD5_S IZE_VS_SPEED> 1165 # if MD5_SMALL > 1 161 166 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */ 162 167 # endif … … 172 177 uint32_t D = ctx->hash[3]; 173 178 174 #if MD5_S IZE_VS_SPEED>= 2 /* 2 or 3 */179 #if MD5_SMALL >= 2 /* 2 or 3 */ 175 180 176 181 static const char S_array[] ALIGN1 = { … … 186 191 uint32_t temp; 187 192 188 # if BB_BIG_ENDIAN 189 for (i = 0; i < 16; i++) 190 words[i] = SWAP_LE32(words[i]); 191 # endif 192 193 # if MD5_SIZE_VS_SPEED == 3 193 if (BB_BIG_ENDIAN) 194 for (i = 0; i < 16; i++) 195 words[i] = SWAP_LE32(words[i]); 196 197 # if MD5_SMALL == 3 194 198 pc = C_array; 195 199 pp = P_array; … … 221 225 B = temp; 222 226 } 223 # else /* MD5_S IZE_VS_SPEED== 2 */227 # else /* MD5_SMALL == 2 */ 224 228 pc = C_array; 225 229 pp = P_array; … … 272 276 ctx->hash[3] += D; 273 277 274 #else /* MD5_S IZE_VS_SPEED== 0 or 1 */278 #else /* MD5_SMALL == 0 or 1 */ 275 279 276 280 uint32_t A_save = A; … … 278 282 uint32_t C_save = C; 279 283 uint32_t D_save = D; 280 # if MD5_S IZE_VS_SPEED== 1284 # if MD5_SMALL == 1 281 285 const uint32_t *pc; 282 286 const char *pp; … … 300 304 301 305 /* Round 1 */ 302 # if MD5_S IZE_VS_SPEED== 1306 # if MD5_SMALL == 1 303 307 pc = C_array; 304 308 for (i = 0; i < 4; i++) { … … 340 344 341 345 /* Round 2 */ 342 # if MD5_S IZE_VS_SPEED== 1346 # if MD5_SMALL == 1 343 347 pp = P_array; 344 348 for (i = 0; i < 4; i++) { … … 368 372 369 373 /* Round 3 */ 370 # if MD5_S IZE_VS_SPEED== 1374 # if MD5_SMALL == 1 371 375 for (i = 0; i < 4; i++) { 372 376 OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++); … … 395 399 396 400 /* Round 4 */ 397 # if MD5_S IZE_VS_SPEED== 1401 # if MD5_SMALL == 1 398 402 for (i = 0; i < 4; i++) { 399 403 OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++); … … 463 467 464 468 /* The MD5 result is in little endian byte order */ 465 #if BB_BIG_ENDIAN 466 ctx->hash[0] = SWAP_LE32(ctx->hash[0]); 467 ctx->hash[1] = SWAP_LE32(ctx->hash[1]); 468 ctx->hash[2] = SWAP_LE32(ctx->hash[2]); 469 ctx->hash[3] = SWAP_LE32(ctx->hash[3]); 470 #endif 469 if (BB_BIG_ENDIAN) { 470 ctx->hash[0] = SWAP_LE32(ctx->hash[0]); 471 ctx->hash[1] = SWAP_LE32(ctx->hash[1]); 472 ctx->hash[2] = SWAP_LE32(ctx->hash[2]); 473 ctx->hash[3] = SWAP_LE32(ctx->hash[3]); 474 } 475 471 476 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4); 472 477 } … … 835 840 buffer = (const char *)buffer + remaining; 836 841 bufpos += remaining; 837 /* clever way to do "if (bufpos != 128) break; ... ; bufpos = 0;" */842 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */ 838 843 bufpos -= 128; 839 844 if (bufpos != 0) … … 897 902 memcpy(resbuf, ctx->hash, sizeof(ctx->hash)); 898 903 } 904 905 906 /* 907 * The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 908 * Michael Peeters and Gilles Van Assche. For more information, feedback or 909 * questions, please refer to our website: http://keccak.noekeon.org/ 910 * 911 * Implementation by Ronny Van Keer, 912 * hereby denoted as "the implementer". 913 * 914 * To the extent possible under law, the implementer has waived all copyright 915 * and related or neighboring rights to the source code in this file. 916 * http://creativecommons.org/publicdomain/zero/1.0/ 917 * 918 * Busybox modifications (C) Lauri Kasanen, under the GPLv2. 919 */ 920 921 #if CONFIG_SHA3_SMALL < 0 922 # define SHA3_SMALL 0 923 #elif CONFIG_SHA3_SMALL > 1 924 # define SHA3_SMALL 1 925 #else 926 # define SHA3_SMALL CONFIG_SHA3_SMALL 927 #endif 928 929 enum { 930 SHA3_IBLK_BYTES = 72, /* 576 bits / 8 */ 931 }; 932 933 /* 934 * In the crypto literature this function is usually called Keccak-f(). 935 */ 936 static void sha3_process_block72(uint64_t *state) 937 { 938 enum { NROUNDS = 24 }; 939 940 /* Elements should be 64-bit, but top half is always zero or 0x80000000. 941 * We encode 63rd bits in a separate word below. 942 * Same is true for 31th bits, which lets us use 16-bit table instead of 64-bit. 943 * The speed penalty is lost in the noise. 944 */ 945 static const uint16_t IOTA_CONST[NROUNDS] = { 946 0x0001, 947 0x8082, 948 0x808a, 949 0x8000, 950 0x808b, 951 0x0001, 952 0x8081, 953 0x8009, 954 0x008a, 955 0x0088, 956 0x8009, 957 0x000a, 958 0x808b, 959 0x008b, 960 0x8089, 961 0x8003, 962 0x8002, 963 0x0080, 964 0x800a, 965 0x000a, 966 0x8081, 967 0x8080, 968 0x0001, 969 0x8008, 970 }; 971 /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */ 972 const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00); 973 /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */ 974 const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00); 975 976 static const uint8_t ROT_CONST[24] = { 977 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 978 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44, 979 }; 980 static const uint8_t PI_LANE[24] = { 981 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 982 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1, 983 }; 984 /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/ 985 986 unsigned x, y; 987 unsigned round; 988 989 if (BB_BIG_ENDIAN) { 990 for (x = 0; x < 25; x++) { 991 state[x] = SWAP_LE64(state[x]); 992 } 993 } 994 995 for (round = 0; round < NROUNDS; ++round) { 996 /* Theta */ 997 { 998 uint64_t BC[10]; 999 for (x = 0; x < 5; ++x) { 1000 BC[x + 5] = BC[x] = state[x] 1001 ^ state[x + 5] ^ state[x + 10] 1002 ^ state[x + 15] ^ state[x + 20]; 1003 } 1004 /* Using 2x5 vector above eliminates the need to use 1005 * BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5], 1006 * and the code is a bit _smaller_. 1007 */ 1008 for (x = 0; x < 5; ++x) { 1009 uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1); 1010 state[x] ^= temp; 1011 state[x + 5] ^= temp; 1012 state[x + 10] ^= temp; 1013 state[x + 15] ^= temp; 1014 state[x + 20] ^= temp; 1015 } 1016 } 1017 1018 /* Rho Pi */ 1019 if (SHA3_SMALL) { 1020 uint64_t t1 = state[1]; 1021 for (x = 0; x < 24; ++x) { 1022 uint64_t t0 = state[PI_LANE[x]]; 1023 state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]); 1024 t1 = t0; 1025 } 1026 } else { 1027 /* Especially large benefit for 32-bit arch (75% faster): 1028 * 64-bit rotations by non-constant usually are SLOW on those. 1029 * We resort to unrolling here. 1030 * This optimizes out PI_LANE[] and ROT_CONST[], 1031 * but generates 300-500 more bytes of code. 1032 */ 1033 uint64_t t0; 1034 uint64_t t1 = state[1]; 1035 #define RhoPi_twice(x) \ 1036 t0 = state[PI_LANE[x ]]; \ 1037 state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \ 1038 t1 = state[PI_LANE[x+1]]; \ 1039 state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]); 1040 RhoPi_twice(0); RhoPi_twice(2); 1041 RhoPi_twice(4); RhoPi_twice(6); 1042 RhoPi_twice(8); RhoPi_twice(10); 1043 RhoPi_twice(12); RhoPi_twice(14); 1044 RhoPi_twice(16); RhoPi_twice(18); 1045 RhoPi_twice(20); RhoPi_twice(22); 1046 #undef RhoPi_twice 1047 } 1048 1049 /* Chi */ 1050 for (y = 0; y <= 20; y += 5) { 1051 uint64_t BC0, BC1, BC2, BC3, BC4; 1052 BC0 = state[y + 0]; 1053 BC1 = state[y + 1]; 1054 BC2 = state[y + 2]; 1055 state[y + 0] = BC0 ^ ((~BC1) & BC2); 1056 BC3 = state[y + 3]; 1057 state[y + 1] = BC1 ^ ((~BC2) & BC3); 1058 BC4 = state[y + 4]; 1059 state[y + 2] = BC2 ^ ((~BC3) & BC4); 1060 state[y + 3] = BC3 ^ ((~BC4) & BC0); 1061 state[y + 4] = BC4 ^ ((~BC0) & BC1); 1062 } 1063 1064 /* Iota */ 1065 state[0] ^= IOTA_CONST[round] 1066 | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000) 1067 | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32; 1068 } 1069 1070 if (BB_BIG_ENDIAN) { 1071 for (x = 0; x < 25; x++) { 1072 state[x] = SWAP_LE64(state[x]); 1073 } 1074 } 1075 } 1076 1077 void FAST_FUNC sha3_begin(sha3_ctx_t *ctx) 1078 { 1079 memset(ctx, 0, sizeof(*ctx)); 1080 } 1081 1082 void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len) 1083 { 1084 #if SHA3_SMALL 1085 const uint8_t *data = buffer; 1086 unsigned bufpos = ctx->bytes_queued; 1087 1088 while (1) { 1089 unsigned remaining = SHA3_IBLK_BYTES - bufpos; 1090 if (remaining > len) 1091 remaining = len; 1092 len -= remaining; 1093 /* XOR data into buffer */ 1094 while (remaining != 0) { 1095 uint8_t *buf = (uint8_t*)ctx->state; 1096 buf[bufpos] ^= *data++; 1097 bufpos++; 1098 remaining--; 1099 } 1100 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */ 1101 bufpos -= SHA3_IBLK_BYTES; 1102 if (bufpos != 0) 1103 break; 1104 /* Buffer is filled up, process it */ 1105 sha3_process_block72(ctx->state); 1106 /*bufpos = 0; - already is */ 1107 } 1108 ctx->bytes_queued = bufpos + SHA3_IBLK_BYTES; 1109 #else 1110 /* +50 bytes code size, but a bit faster because of long-sized XORs */ 1111 const uint8_t *data = buffer; 1112 unsigned bufpos = ctx->bytes_queued; 1113 1114 /* If already data in queue, continue queuing first */ 1115 while (len != 0 && bufpos != 0) { 1116 uint8_t *buf = (uint8_t*)ctx->state; 1117 buf[bufpos] ^= *data++; 1118 len--; 1119 bufpos++; 1120 if (bufpos == SHA3_IBLK_BYTES) { 1121 bufpos = 0; 1122 goto do_block; 1123 } 1124 } 1125 1126 /* Absorb complete blocks */ 1127 while (len >= SHA3_IBLK_BYTES) { 1128 /* XOR data onto beginning of state[]. 1129 * We try to be efficient - operate one word at a time, not byte. 1130 * Careful wrt unaligned access: can't just use "*(long*)data"! 1131 */ 1132 unsigned count = SHA3_IBLK_BYTES / sizeof(long); 1133 long *buf = (long*)ctx->state; 1134 do { 1135 long v; 1136 move_from_unaligned_long(v, (long*)data); 1137 *buf++ ^= v; 1138 data += sizeof(long); 1139 } while (--count); 1140 len -= SHA3_IBLK_BYTES; 1141 do_block: 1142 sha3_process_block72(ctx->state); 1143 } 1144 1145 /* Queue remaining data bytes */ 1146 while (len != 0) { 1147 uint8_t *buf = (uint8_t*)ctx->state; 1148 buf[bufpos] ^= *data++; 1149 bufpos++; 1150 len--; 1151 } 1152 1153 ctx->bytes_queued = bufpos; 1154 #endif 1155 } 1156 1157 void FAST_FUNC sha3_end(sha3_ctx_t *ctx, void *resbuf) 1158 { 1159 /* Padding */ 1160 uint8_t *buf = (uint8_t*)ctx->state; 1161 buf[ctx->bytes_queued] ^= 1; 1162 buf[SHA3_IBLK_BYTES - 1] ^= 0x80; 1163 1164 sha3_process_block72(ctx->state); 1165 1166 /* Output */ 1167 memcpy(resbuf, ctx->state, 64); 1168 }
Note:
See TracChangeset
for help on using the changeset viewer.