Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

hash_md5_sha.c@ 3621

Visit:

Last change on this file since 3621 was 3621, checked in by Bruno Cornec, 7 years ago
New 3?3 banch for incorporation of latest busybox 1.25. Changing minor version to handle potential incompatibilities.
Property svn:eol-style set to `native`
File size: 37.4 KB

Line
1	/* vi: set sw=4 ts=4: */
2	/*
3	* Utility routines.
4	*
5	* Copyright (C) 2010 Denys Vlasenko
6	*
7	* Licensed under GPLv2 or later, see file LICENSE in this source tree.
8	*/
9
10	#include "libbb.h"
11
12	/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
13	* (for rotX32, there is no difference). Why? My guess is that
14	* macro requires clever common subexpression elimination heuristics
15	* in gcc, while inline basically forces it to happen.
16	*/
17	//#define rotl32(x,n) (((x) << (n)) \| ((x) >> (32 - (n))))
18	static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
19	{
20	return (x << n) \| (x >> (32 - n));
21	}
22	//#define rotr32(x,n) (((x) >> (n)) \| ((x) << (32 - (n))))
23	static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
24	{
25	return (x >> n) \| (x << (32 - n));
26	}
27	/* rotr64 in needed for sha512 only: */
28	//#define rotr64(x,n) (((x) >> (n)) \| ((x) << (64 - (n))))
29	static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
30	{
31	return (x >> n) \| (x << (64 - n));
32	}
33
34	/* rotl64 only used for sha3 currently */
35	static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
36	{
37	return (x << n) \| (x >> (64 - n));
38	}
39
40	/* Feed data through a temporary buffer.
41	* The internal buffer remembers previous data until it has 64
42	* bytes worth to pass on.
43	*/
44	static void FAST_FUNC common64_hash(md5_ctx_t ctx, const void buffer, size_t len)
45	{
46	unsigned bufpos = ctx->total64 & 63;
47
48	ctx->total64 += len;
49
50	while (1) {
51	unsigned remaining = 64 - bufpos;
52	if (remaining > len)
53	remaining = len;
54	/* Copy data into aligned buffer */
55	memcpy(ctx->wbuffer + bufpos, buffer, remaining);
56	len -= remaining;
57	buffer = (const char *)buffer + remaining;
58	bufpos += remaining;
59	/* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
60	bufpos -= 64;
61	if (bufpos != 0)
62	break;
63	/* Buffer is filled up, process it */
64	ctx->process_block(ctx);
65	/bufpos = 0; - already is /
66	}
67	}
68
69	/* Process the remaining bytes in the buffer */
70	static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
71	{
72	unsigned bufpos = ctx->total64 & 63;
73	/* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
74	ctx->wbuffer[bufpos++] = 0x80;
75
76	/* This loop iterates either once or twice, no more, no less */
77	while (1) {
78	unsigned remaining = 64 - bufpos;
79	memset(ctx->wbuffer + bufpos, 0, remaining);
80	/* Do we have enough space for the length count? */
81	if (remaining >= 8) {
82	/* Store the 64-bit counter of bits in the buffer */
83	uint64_t t = ctx->total64 << 3;
84	if (swap_needed)
85	t = bb_bswap_64(t);
86	/* wbuffer is suitably aligned for this */
87	(bb__aliased_uint64_t ) (&ctx->wbuffer[64 - 8]) = t;
88	}
89	ctx->process_block(ctx);
90	if (remaining >= 8)
91	break;
92	bufpos = 0;
93	}
94	}
95
96
97	/*
98	* Compute MD5 checksum of strings according to the
99	* definition of MD5 in RFC 1321 from April 1992.
100	*
101	* Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
102	*
103	* Copyright (C) 1995-1999 Free Software Foundation, Inc.
104	* Copyright (C) 2001 Manuel Novoa III
105	* Copyright (C) 2003 Glenn L. McGrath
106	* Copyright (C) 2003 Erik Andersen
107	*
108	* Licensed under GPLv2 or later, see file LICENSE in this source tree.
109	*/
110
111	/* 0: fastest, 3: smallest */
112	#if CONFIG_MD5_SMALL < 0
113	# define MD5_SMALL 0
114	#elif CONFIG_MD5_SMALL > 3
115	# define MD5_SMALL 3
116	#else
117	# define MD5_SMALL CONFIG_MD5_SMALL
118	#endif
119
120	/* These are the four functions used in the four steps of the MD5 algorithm
121	* and defined in the RFC 1321. The first function is a little bit optimized
122	* (as found in Colin Plumbs public domain implementation).
123	* #define FF(b, c, d) ((b & c) \| (~b & d))
124	*/
125	#undef FF
126	#undef FG
127	#undef FH
128	#undef FI
129	#define FF(b, c, d) (d ^ (b & (c ^ d)))
130	#define FG(b, c, d) FF(d, b, c)
131	#define FH(b, c, d) (b ^ c ^ d)
132	#define FI(b, c, d) (c ^ (b \| ~d))
133
134	/* Hash a single block, 64 bytes long and 4-byte aligned */
135	static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
136	{
137	#if MD5_SMALL > 0
138	/* Before we start, one word to the strange constants.
139	They are defined in RFC 1321 as
140	T[i] = (int)(2^32 * fabs(sin(i))), i=1..64
141	*/
142	static const uint32_t C_array[] = {
143	/* round 1 */
144	0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
145	0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
146	0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
147	0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
148	/* round 2 */
149	0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
150	0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
151	0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
152	0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
153	/* round 3 */
154	0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
155	0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
156	0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
157	0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
158	/* round 4 */
159	0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
160	0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
161	0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
162	0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
163	};
164	static const char P_array[] ALIGN1 = {
165	# if MD5_SMALL > 1
166	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
167	# endif
168	1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
169	5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
170	0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */
171	};
172	#endif
173	uint32_t words = (void) ctx->wbuffer;
174	uint32_t A = ctx->hash[0];
175	uint32_t B = ctx->hash[1];
176	uint32_t C = ctx->hash[2];
177	uint32_t D = ctx->hash[3];
178
179	#if MD5_SMALL >= 2 /* 2 or 3 */
180
181	static const char S_array[] ALIGN1 = {
182	7, 12, 17, 22,
183	5, 9, 14, 20,
184	4, 11, 16, 23,
185	6, 10, 15, 21
186	};
187	const uint32_t *pc;
188	const char *pp;
189	const char *ps;
190	int i;
191	uint32_t temp;
192
193	if (BB_BIG_ENDIAN)
194	for (i = 0; i < 16; i++)
195	words[i] = SWAP_LE32(words[i]);
196
197	# if MD5_SMALL == 3
198	pc = C_array;
199	pp = P_array;
200	ps = S_array - 4;
201
202	for (i = 0; i < 64; i++) {
203	if ((i & 0x0f) == 0)
204	ps += 4;
205	temp = A;
206	switch (i >> 4) {
207	case 0:
208	temp += FF(B, C, D);
209	break;
210	case 1:
211	temp += FG(B, C, D);
212	break;
213	case 2:
214	temp += FH(B, C, D);
215	break;
216	default: /* case 3 */
217	temp += FI(B, C, D);
218	}
219	temp += words[(int) (pp++)] + pc++;
220	temp = rotl32(temp, ps[i & 3]);
221	temp += B;
222	A = D;
223	D = C;
224	C = B;
225	B = temp;
226	}
227	# else /* MD5_SMALL == 2 */
228	pc = C_array;
229	pp = P_array;
230	ps = S_array;
231
232	for (i = 0; i < 16; i++) {
233	temp = A + FF(B, C, D) + words[(int) (pp++)] + pc++;
234	temp = rotl32(temp, ps[i & 3]);
235	temp += B;
236	A = D;
237	D = C;
238	C = B;
239	B = temp;
240	}
241	ps += 4;
242	for (i = 0; i < 16; i++) {
243	temp = A + FG(B, C, D) + words[(int) (pp++)] + pc++;
244	temp = rotl32(temp, ps[i & 3]);
245	temp += B;
246	A = D;
247	D = C;
248	C = B;
249	B = temp;
250	}
251	ps += 4;
252	for (i = 0; i < 16; i++) {
253	temp = A + FH(B, C, D) + words[(int) (pp++)] + pc++;
254	temp = rotl32(temp, ps[i & 3]);
255	temp += B;
256	A = D;
257	D = C;
258	C = B;
259	B = temp;
260	}
261	ps += 4;
262	for (i = 0; i < 16; i++) {
263	temp = A + FI(B, C, D) + words[(int) (pp++)] + pc++;
264	temp = rotl32(temp, ps[i & 3]);
265	temp += B;
266	A = D;
267	D = C;
268	C = B;
269	B = temp;
270	}
271	# endif
272	/* Add checksum to the starting values */
273	ctx->hash[0] += A;
274	ctx->hash[1] += B;
275	ctx->hash[2] += C;
276	ctx->hash[3] += D;
277
278	#else /* MD5_SMALL == 0 or 1 */
279
280	# if MD5_SMALL == 1
281	const uint32_t *pc;
282	const char *pp;
283	int i;
284	# endif
285
286	/* First round: using the given function, the context and a constant
287	the next context is computed. Because the algorithm's processing
288	unit is a 32-bit word and it is determined to work on words in
289	little endian byte order we perhaps have to change the byte order
290	before the computation. To reduce the work for the next steps
291	we save swapped words in WORDS array. */
292	# undef OP
293	# define OP(a, b, c, d, s, T) \
294	do { \
295	a += FF(b, c, d) + (words IF_BIG_ENDIAN(= SWAP_LE32(words))) + T; \
296	words++; \
297	a = rotl32(a, s); \
298	a += b; \
299	} while (0)
300
301	/* Round 1 */
302	# if MD5_SMALL == 1
303	pc = C_array;
304	for (i = 0; i < 4; i++) {
305	OP(A, B, C, D, 7, *pc++);
306	OP(D, A, B, C, 12, *pc++);
307	OP(C, D, A, B, 17, *pc++);
308	OP(B, C, D, A, 22, *pc++);
309	}
310	# else
311	OP(A, B, C, D, 7, 0xd76aa478);
312	OP(D, A, B, C, 12, 0xe8c7b756);
313	OP(C, D, A, B, 17, 0x242070db);
314	OP(B, C, D, A, 22, 0xc1bdceee);
315	OP(A, B, C, D, 7, 0xf57c0faf);
316	OP(D, A, B, C, 12, 0x4787c62a);
317	OP(C, D, A, B, 17, 0xa8304613);
318	OP(B, C, D, A, 22, 0xfd469501);
319	OP(A, B, C, D, 7, 0x698098d8);
320	OP(D, A, B, C, 12, 0x8b44f7af);
321	OP(C, D, A, B, 17, 0xffff5bb1);
322	OP(B, C, D, A, 22, 0x895cd7be);
323	OP(A, B, C, D, 7, 0x6b901122);
324	OP(D, A, B, C, 12, 0xfd987193);
325	OP(C, D, A, B, 17, 0xa679438e);
326	OP(B, C, D, A, 22, 0x49b40821);
327	# endif
328	words -= 16;
329
330	/* For the second to fourth round we have the possibly swapped words
331	in WORDS. Redefine the macro to take an additional first
332	argument specifying the function to use. */
333	# undef OP
334	# define OP(f, a, b, c, d, k, s, T) \
335	do { \
336	a += f(b, c, d) + words[k] + T; \
337	a = rotl32(a, s); \
338	a += b; \
339	} while (0)
340
341	/* Round 2 */
342	# if MD5_SMALL == 1
343	pp = P_array;
344	for (i = 0; i < 4; i++) {
345	OP(FG, A, B, C, D, (int) (pp++), 5, pc++);
346	OP(FG, D, A, B, C, (int) (pp++), 9, pc++);
347	OP(FG, C, D, A, B, (int) (pp++), 14, pc++);
348	OP(FG, B, C, D, A, (int) (pp++), 20, pc++);
349	}
350	# else
351	OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
352	OP(FG, D, A, B, C, 6, 9, 0xc040b340);
353	OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
354	OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
355	OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
356	OP(FG, D, A, B, C, 10, 9, 0x02441453);
357	OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
358	OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
359	OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
360	OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
361	OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
362	OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
363	OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
364	OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
365	OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
366	OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
367	# endif
368
369	/* Round 3 */
370	# if MD5_SMALL == 1
371	for (i = 0; i < 4; i++) {
372	OP(FH, A, B, C, D, (int) (pp++), 4, pc++);
373	OP(FH, D, A, B, C, (int) (pp++), 11, pc++);
374	OP(FH, C, D, A, B, (int) (pp++), 16, pc++);
375	OP(FH, B, C, D, A, (int) (pp++), 23, pc++);
376	}
377	# else
378	OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
379	OP(FH, D, A, B, C, 8, 11, 0x8771f681);
380	OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
381	OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
382	OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
383	OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
384	OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
385	OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
386	OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
387	OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
388	OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
389	OP(FH, B, C, D, A, 6, 23, 0x04881d05);
390	OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
391	OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
392	OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
393	OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
394	# endif
395
396	/* Round 4 */
397	# if MD5_SMALL == 1
398	for (i = 0; i < 4; i++) {
399	OP(FI, A, B, C, D, (int) (pp++), 6, pc++);
400	OP(FI, D, A, B, C, (int) (pp++), 10, pc++);
401	OP(FI, C, D, A, B, (int) (pp++), 15, pc++);
402	OP(FI, B, C, D, A, (int) (pp++), 21, pc++);
403	}
404	# else
405	OP(FI, A, B, C, D, 0, 6, 0xf4292244);
406	OP(FI, D, A, B, C, 7, 10, 0x432aff97);
407	OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
408	OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
409	OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
410	OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
411	OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
412	OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
413	OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
414	OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
415	OP(FI, C, D, A, B, 6, 15, 0xa3014314);
416	OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
417	OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
418	OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
419	OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
420	OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
421	# undef OP
422	# endif
423	/* Add checksum to the starting values */
424	ctx->hash[0] += A;
425	ctx->hash[1] += B;
426	ctx->hash[2] += C;
427	ctx->hash[3] += D;
428	#endif
429	}
430	#undef FF
431	#undef FG
432	#undef FH
433	#undef FI
434
435	/* Initialize structure containing state of computation.
436	* (RFC 1321, 3.3: Step 3)
437	*/
438	void FAST_FUNC md5_begin(md5_ctx_t *ctx)
439	{
440	ctx->hash[0] = 0x67452301;
441	ctx->hash[1] = 0xefcdab89;
442	ctx->hash[2] = 0x98badcfe;
443	ctx->hash[3] = 0x10325476;
444	ctx->total64 = 0;
445	ctx->process_block = md5_process_block64;
446	}
447
448	/* Used also for sha1 and sha256 */
449	void FAST_FUNC md5_hash(md5_ctx_t ctx, const void buffer, size_t len)
450	{
451	common64_hash(ctx, buffer, len);
452	}
453
454	/* Process the remaining bytes in the buffer and put result from CTX
455	* in first 16 bytes following RESBUF. The result is always in little
456	* endian byte order, so that a byte-wise output yields to the wanted
457	* ASCII representation of the message digest.
458	*/
459	void FAST_FUNC md5_end(md5_ctx_t ctx, void resbuf)
460	{
461	/* MD5 stores total in LE, need to swap on BE arches: */
462	common64_end(ctx, /swap_needed:/ BB_BIG_ENDIAN);
463
464	/* The MD5 result is in little endian byte order */
465	if (BB_BIG_ENDIAN) {
466	ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
467	ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
468	ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
469	ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
470	}
471
472	memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
473	}
474
475
476	/*
477	* SHA1 part is:
478	* Copyright 2007 Rob Landley <rob@landley.net>
479	*
480	* Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
481	* from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
482	*
483	* Licensed under GPLv2, see file LICENSE in this source tree.
484	*
485	* ---------------------------------------------------------------------------
486	*
487	* SHA256 and SHA512 parts are:
488	* Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
489	* Shrank by Denys Vlasenko.
490	*
491	* ---------------------------------------------------------------------------
492	*
493	* The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
494	* and replace "4096" with something like "2000 + time(NULL) % 2097",
495	* then rebuild and compare "shaNNNsum bigfile" results.
496	*/
497
498	static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
499	{
500	static const uint32_t rconsts[] = {
501	0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
502	};
503	int i, j;
504	int cnt;
505	uint32_t W[16+16];
506	uint32_t a, b, c, d, e;
507
508	/* On-stack work buffer frees up one register in the main loop
509	* which otherwise will be needed to hold ctx pointer */
510	for (i = 0; i < 16; i++)
511	W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
512
513	a = ctx->hash[0];
514	b = ctx->hash[1];
515	c = ctx->hash[2];
516	d = ctx->hash[3];
517	e = ctx->hash[4];
518
519	/* 4 rounds of 20 operations each */
520	cnt = 0;
521	for (i = 0; i < 4; i++) {
522	j = 19;
523	do {
524	uint32_t work;
525
526	work = c ^ d;
527	if (i == 0) {
528	work = (work & b) ^ d;
529	if (j <= 3)
530	goto ge16;
531	/* Used to do SWAP_BE32 here, but this
532	* requires ctx (see comment above) */
533	work += W[cnt];
534	} else {
535	if (i == 2)
536	work = ((b \| c) & d) \| (b & c);
537	else /* i = 1 or 3 */
538	work ^= b;
539	ge16:
540	W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1);
541	work += W[cnt];
542	}
543	work += e + rotl32(a, 5) + rconsts[i];
544
545	/* Rotate by one for next time */
546	e = d;
547	d = c;
548	c = /* b = */ rotl32(b, 30);
549	b = a;
550	a = work;
551	cnt = (cnt + 1) & 15;
552	} while (--j >= 0);
553	}
554
555	ctx->hash[0] += a;
556	ctx->hash[1] += b;
557	ctx->hash[2] += c;
558	ctx->hash[3] += d;
559	ctx->hash[4] += e;
560	}
561
562	/* Constants for SHA512 from FIPS 180-2:4.2.3.
563	* SHA256 constants from FIPS 180-2:4.2.2
564	* are the most significant half of first 64 elements
565	* of the same array.
566	*/
567	static const uint64_t sha_K[80] = {
568	0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
569	0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
570	0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
571	0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
572	0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
573	0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
574	0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
575	0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
576	0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
577	0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
578	0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
579	0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
580	0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
581	0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
582	0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
583	0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
584	0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
585	0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
586	0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
587	0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
588	0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
589	0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
590	0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
591	0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
592	0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
593	0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
594	0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
595	0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
596	0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
597	0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
598	0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
599	0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
600	0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, /* [64]+ are used for sha512 only */
601	0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
602	0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
603	0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
604	0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
605	0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
606	0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
607	0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
608	};
609
610	#undef Ch
611	#undef Maj
612	#undef S0
613	#undef S1
614	#undef R0
615	#undef R1
616
617	static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
618	{
619	unsigned t;
620	uint32_t W[64], a, b, c, d, e, f, g, h;
621	const uint32_t words = (uint32_t) ctx->wbuffer;
622
623	/* Operators defined in FIPS 180-2:4.1.2. */
624	#define Ch(x, y, z) ((x & y) ^ (~x & z))
625	#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
626	#define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
627	#define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
628	#define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
629	#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
630
631	/* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
632	for (t = 0; t < 16; ++t)
633	W[t] = SWAP_BE32(words[t]);
634	for (/t = 16/; t < 64; ++t)
635	W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
636
637	a = ctx->hash[0];
638	b = ctx->hash[1];
639	c = ctx->hash[2];
640	d = ctx->hash[3];
641	e = ctx->hash[4];
642	f = ctx->hash[5];
643	g = ctx->hash[6];
644	h = ctx->hash[7];
645
646	/* The actual computation according to FIPS 180-2:6.2.2 step 3. */
647	for (t = 0; t < 64; ++t) {
648	/* Need to fetch upper half of sha_K[t]
649	* (I hope compiler is clever enough to just fetch
650	* upper half)
651	*/
652	uint32_t K_t = sha_K[t] >> 32;
653	uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
654	uint32_t T2 = S0(a) + Maj(a, b, c);
655	h = g;
656	g = f;
657	f = e;
658	e = d + T1;
659	d = c;
660	c = b;
661	b = a;
662	a = T1 + T2;
663	}
664	#undef Ch
665	#undef Maj
666	#undef S0
667	#undef S1
668	#undef R0
669	#undef R1
670	/* Add the starting values of the context according to FIPS 180-2:6.2.2
671	step 4. */
672	ctx->hash[0] += a;
673	ctx->hash[1] += b;
674	ctx->hash[2] += c;
675	ctx->hash[3] += d;
676	ctx->hash[4] += e;
677	ctx->hash[5] += f;
678	ctx->hash[6] += g;
679	ctx->hash[7] += h;
680	}
681
682	static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
683	{
684	unsigned t;
685	uint64_t W[80];
686	/* On i386, having assignments here (not later as sha256 does)
687	* produces 99 bytes smaller code with gcc 4.3.1
688	*/
689	uint64_t a = ctx->hash[0];
690	uint64_t b = ctx->hash[1];
691	uint64_t c = ctx->hash[2];
692	uint64_t d = ctx->hash[3];
693	uint64_t e = ctx->hash[4];
694	uint64_t f = ctx->hash[5];
695	uint64_t g = ctx->hash[6];
696	uint64_t h = ctx->hash[7];
697	const uint64_t words = (uint64_t) ctx->wbuffer;
698
699	/* Operators defined in FIPS 180-2:4.1.2. */
700	#define Ch(x, y, z) ((x & y) ^ (~x & z))
701	#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
702	#define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
703	#define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
704	#define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
705	#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
706
707	/* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
708	for (t = 0; t < 16; ++t)
709	W[t] = SWAP_BE64(words[t]);
710	for (/t = 16/; t < 80; ++t)
711	W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
712
713	/* The actual computation according to FIPS 180-2:6.3.2 step 3. */
714	for (t = 0; t < 80; ++t) {
715	uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
716	uint64_t T2 = S0(a) + Maj(a, b, c);
717	h = g;
718	g = f;
719	f = e;
720	e = d + T1;
721	d = c;
722	c = b;
723	b = a;
724	a = T1 + T2;
725	}
726	#undef Ch
727	#undef Maj
728	#undef S0
729	#undef S1
730	#undef R0
731	#undef R1
732	/* Add the starting values of the context according to FIPS 180-2:6.3.2
733	step 4. */
734	ctx->hash[0] += a;
735	ctx->hash[1] += b;
736	ctx->hash[2] += c;
737	ctx->hash[3] += d;
738	ctx->hash[4] += e;
739	ctx->hash[5] += f;
740	ctx->hash[6] += g;
741	ctx->hash[7] += h;
742	}
743
744
745	void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
746	{
747	ctx->hash[0] = 0x67452301;
748	ctx->hash[1] = 0xefcdab89;
749	ctx->hash[2] = 0x98badcfe;
750	ctx->hash[3] = 0x10325476;
751	ctx->hash[4] = 0xc3d2e1f0;
752	ctx->total64 = 0;
753	ctx->process_block = sha1_process_block64;
754	}
755
756	static const uint32_t init256[] = {
757	0,
758	0,
759	0x6a09e667,
760	0xbb67ae85,
761	0x3c6ef372,
762	0xa54ff53a,
763	0x510e527f,
764	0x9b05688c,
765	0x1f83d9ab,
766	0x5be0cd19,
767	};
768	static const uint32_t init512_lo[] = {
769	0,
770	0,
771	0xf3bcc908,
772	0x84caa73b,
773	0xfe94f82b,
774	0x5f1d36f1,
775	0xade682d1,
776	0x2b3e6c1f,
777	0xfb41bd6b,
778	0x137e2179,
779	};
780
781	/* Initialize structure containing state of computation.
782	(FIPS 180-2:5.3.2) */
783	void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
784	{
785	memcpy(&ctx->total64, init256, sizeof(init256));
786	/ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 /
787	ctx->process_block = sha256_process_block64;
788	}
789
790	/* Initialize structure containing state of computation.
791	(FIPS 180-2:5.3.3) */
792	void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
793	{
794	int i;
795	/* Two extra iterations zero out ctx->total64[2] */
796	uint64_t *tp = ctx->total64;
797	for (i = 0; i < 2+8; i++)
798	tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
799	/ctx->total64[0] = ctx->total64[1] = 0; - already done /
800	}
801
802	void FAST_FUNC sha512_hash(sha512_ctx_t ctx, const void buffer, size_t len)
803	{
804	unsigned bufpos = ctx->total64[0] & 127;
805	unsigned remaining;
806
807	/* First increment the byte count. FIPS 180-2 specifies the possible
808	length of the file up to 2^128 _bits_.
809	We compute the number of _bytes_ and convert to bits later. */
810	ctx->total64[0] += len;
811	if (ctx->total64[0] < len)
812	ctx->total64[1]++;
813	#if 0
814	remaining = 128 - bufpos;
815
816	/* Hash whole blocks */
817	while (len >= remaining) {
818	memcpy(ctx->wbuffer + bufpos, buffer, remaining);
819	buffer = (const char *)buffer + remaining;
820	len -= remaining;
821	remaining = 128;
822	bufpos = 0;
823	sha512_process_block128(ctx);
824	}
825
826	/* Save last, partial blosk */
827	memcpy(ctx->wbuffer + bufpos, buffer, len);
828	#else
829	while (1) {
830	remaining = 128 - bufpos;
831	if (remaining > len)
832	remaining = len;
833	/* Copy data into aligned buffer */
834	memcpy(ctx->wbuffer + bufpos, buffer, remaining);
835	len -= remaining;
836	buffer = (const char *)buffer + remaining;
837	bufpos += remaining;
838	/* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
839	bufpos -= 128;
840	if (bufpos != 0)
841	break;
842	/* Buffer is filled up, process it */
843	sha512_process_block128(ctx);
844	/bufpos = 0; - already is /
845	}
846	#endif
847	}
848
849	/* Used also for sha256 */
850	void FAST_FUNC sha1_end(sha1_ctx_t ctx, void resbuf)
851	{
852	unsigned hash_size;
853
854	/* SHA stores total in BE, need to swap on LE arches: */
855	common64_end(ctx, /swap_needed:/ BB_LITTLE_ENDIAN);
856
857	hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
858	/* This way we do not impose alignment constraints on resbuf: */
859	if (BB_LITTLE_ENDIAN) {
860	unsigned i;
861	for (i = 0; i < hash_size; ++i)
862	ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
863	}
864	memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * hash_size);
865	}
866
867	void FAST_FUNC sha512_end(sha512_ctx_t ctx, void resbuf)
868	{
869	unsigned bufpos = ctx->total64[0] & 127;
870
871	/* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
872	ctx->wbuffer[bufpos++] = 0x80;
873
874	while (1) {
875	unsigned remaining = 128 - bufpos;
876	memset(ctx->wbuffer + bufpos, 0, remaining);
877	if (remaining >= 16) {
878	/* Store the 128-bit counter of bits in the buffer in BE format */
879	uint64_t t;
880	t = ctx->total64[0] << 3;
881	t = SWAP_BE64(t);
882	(bb__aliased_uint64_t ) (&ctx->wbuffer[128 - 8]) = t;
883	t = (ctx->total64[1] << 3) \| (ctx->total64[0] >> 61);
884	t = SWAP_BE64(t);
885	(bb__aliased_uint64_t ) (&ctx->wbuffer[128 - 16]) = t;
886	}
887	sha512_process_block128(ctx);
888	if (remaining >= 16)
889	break;
890	bufpos = 0;
891	}
892
893	if (BB_LITTLE_ENDIAN) {
894	unsigned i;
895	for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
896	ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
897	}
898	memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
899	}
900
901
902	/*
903	* The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
904	* Michael Peeters and Gilles Van Assche. For more information, feedback or
905	* questions, please refer to our website: http://keccak.noekeon.org/
906	*
907	* Implementation by Ronny Van Keer,
908	* hereby denoted as "the implementer".
909	*
910	* To the extent possible under law, the implementer has waived all copyright
911	* and related or neighboring rights to the source code in this file.
912	* http://creativecommons.org/publicdomain/zero/1.0/
913	*
914	* Busybox modifications (C) Lauri Kasanen, under the GPLv2.
915	*/
916
917	#if CONFIG_SHA3_SMALL < 0
918	# define SHA3_SMALL 0
919	#elif CONFIG_SHA3_SMALL > 1
920	# define SHA3_SMALL 1
921	#else
922	# define SHA3_SMALL CONFIG_SHA3_SMALL
923	#endif
924
925	#define OPTIMIZE_SHA3_FOR_32 0
926	/*
927	* SHA3 can be optimized for 32-bit CPUs with bit-slicing:
928	* every 64-bit word of state[] can be split into two 32-bit words
929	* by even/odd bits. In this form, all rotations of sha3 round
930	* are 32-bit - and there are lots of them.
931	* However, it requires either splitting/combining state words
932	* before/after sha3 round (code does this now)
933	* or shuffling bits before xor'ing them into state and in sha3_end.
934	* Without shuffling, bit-slicing results in -130 bytes of code
935	* and marginal speedup (but of course it gives wrong result).
936	* With shuffling it works, but +260 code bytes, and slower.
937	* Disabled for now:
938	*/
939	#if 0 /* LONG_MAX == 0x7fffffff */
940	# undef OPTIMIZE_SHA3_FOR_32
941	# define OPTIMIZE_SHA3_FOR_32 1
942	#endif
943
944	enum {
945	SHA3_IBLK_BYTES = 72, /* 576 bits / 8 */
946	};
947
948	#if OPTIMIZE_SHA3_FOR_32
949	/* This splits every 64-bit word into a pair of 32-bit words,
950	* even bits go into first word, odd bits go to second one.
951	* The conversion is done in-place.
952	*/
953	static void split_halves(uint64_t *state)
954	{
955	/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
956	uint32_t s32 = (uint32_t)state;
957	uint32_t t, x0, x1;
958	int i;
959	for (i = 24; i >= 0; --i) {
960	x0 = s32[0];
961	t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
962	t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
963	t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
964	t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
965	x1 = s32[1];
966	t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
967	t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
968	t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
969	t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
970	*s32++ = (x0 & 0x0000FFFF) \| (x1 << 16);
971	*s32++ = (x0 >> 16) \| (x1 & 0xFFFF0000);
972	}
973	}
974	/* The reverse operation */
975	static void combine_halves(uint64_t *state)
976	{
977	uint32_t s32 = (uint32_t)state;
978	uint32_t t, x0, x1;
979	int i;
980	for (i = 24; i >= 0; --i) {
981	x0 = s32[0];
982	x1 = s32[1];
983	t = (x0 & 0x0000FFFF) \| (x1 << 16);
984	x1 = (x0 >> 16) \| (x1 & 0xFFFF0000);
985	x0 = t;
986	t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
987	t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
988	t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
989	t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
990	*s32++ = x0;
991	t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
992	t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
993	t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
994	t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
995	*s32++ = x1;
996	}
997	}
998	#endif
999
1000	/*
1001	* In the crypto literature this function is usually called Keccak-f().
1002	*/
1003	static void sha3_process_block72(uint64_t *state)
1004	{
1005	enum { NROUNDS = 24 };
1006
1007	#if OPTIMIZE_SHA3_FOR_32
1008	/*
1009	static const uint32_t IOTA_CONST_0[NROUNDS] = {
1010	0x00000001UL,
1011	0x00000000UL,
1012	0x00000000UL,
1013	0x00000000UL,
1014	0x00000001UL,
1015	0x00000001UL,
1016	0x00000001UL,
1017	0x00000001UL,
1018	0x00000000UL,
1019	0x00000000UL,
1020	0x00000001UL,
1021	0x00000000UL,
1022	0x00000001UL,
1023	0x00000001UL,
1024	0x00000001UL,
1025	0x00000001UL,
1026	0x00000000UL,
1027	0x00000000UL,
1028	0x00000000UL,
1029	0x00000000UL,
1030	0x00000001UL,
1031	0x00000000UL,
1032	0x00000001UL,
1033	0x00000000UL,
1034	};
1035	** bits are in lsb: 0101 0000 1111 0100 1111 0001
1036	*/
1037	uint32_t IOTA_CONST_0bits = (uint32_t)(0x0050f4f1);
1038	static const uint32_t IOTA_CONST_1[NROUNDS] = {
1039	0x00000000UL,
1040	0x00000089UL,
1041	0x8000008bUL,
1042	0x80008080UL,
1043	0x0000008bUL,
1044	0x00008000UL,
1045	0x80008088UL,
1046	0x80000082UL,
1047	0x0000000bUL,
1048	0x0000000aUL,
1049	0x00008082UL,
1050	0x00008003UL,
1051	0x0000808bUL,
1052	0x8000000bUL,
1053	0x8000008aUL,
1054	0x80000081UL,
1055	0x80000081UL,
1056	0x80000008UL,
1057	0x00000083UL,
1058	0x80008003UL,
1059	0x80008088UL,
1060	0x80000088UL,
1061	0x00008000UL,
1062	0x80008082UL,
1063	};
1064
1065	uint32_t const s32 = (uint32_t)state;
1066	unsigned round;
1067
1068	split_halves(state);
1069
1070	for (round = 0; round < NROUNDS; round++) {
1071	unsigned x;
1072
1073	/* Theta */
1074	{
1075	uint32_t BC[20];
1076	for (x = 0; x < 10; ++x) {
1077	BC[x+10] = BC[x] = s32[x]^s32[x+10]^s32[x+20]^s32[x+30]^s32[x+40];
1078	}
1079	for (x = 0; x < 10; x += 2) {
1080	uint32_t ta, tb;
1081	ta = BC[x+8] ^ rotl32(BC[x+3], 1);
1082	tb = BC[x+9] ^ BC[x+2];
1083	s32[x+0] ^= ta;
1084	s32[x+1] ^= tb;
1085	s32[x+10] ^= ta;
1086	s32[x+11] ^= tb;
1087	s32[x+20] ^= ta;
1088	s32[x+21] ^= tb;
1089	s32[x+30] ^= ta;
1090	s32[x+31] ^= tb;
1091	s32[x+40] ^= ta;
1092	s32[x+41] ^= tb;
1093	}
1094	}
1095	/* RhoPi */
1096	{
1097	uint32_t t0a,t0b, t1a,t1b;
1098	t1a = s32[1*2+0];
1099	t1b = s32[1*2+1];
1100
1101	#define RhoPi(PI_LANE, ROT_CONST) \
1102	t0a = s32[PI_LANE*2+0];\
1103	t0b = s32[PI_LANE*2+1];\
1104	if (ROT_CONST & 1) {\
1105	s32[PI_LANE*2+0] = rotl32(t1b, ROT_CONST/2+1);\
1106	s32[PI_LANE*2+1] = ROT_CONST == 1 ? t1a : rotl32(t1a, ROT_CONST/2+0);\
1107	} else {\
1108	s32[PI_LANE*2+0] = rotl32(t1a, ROT_CONST/2);\
1109	s32[PI_LANE*2+1] = rotl32(t1b, ROT_CONST/2);\
1110	}\
1111	t1a = t0a; t1b = t0b;
1112
1113	RhoPi(10, 1)
1114	RhoPi( 7, 3)
1115	RhoPi(11, 6)
1116	RhoPi(17,10)
1117	RhoPi(18,15)
1118	RhoPi( 3,21)
1119	RhoPi( 5,28)
1120	RhoPi(16,36)
1121	RhoPi( 8,45)
1122	RhoPi(21,55)
1123	RhoPi(24, 2)
1124	RhoPi( 4,14)
1125	RhoPi(15,27)
1126	RhoPi(23,41)
1127	RhoPi(19,56)
1128	RhoPi(13, 8)
1129	RhoPi(12,25)
1130	RhoPi( 2,43)
1131	RhoPi(20,62)
1132	RhoPi(14,18)
1133	RhoPi(22,39)
1134	RhoPi( 9,61)
1135	RhoPi( 6,20)
1136	RhoPi( 1,44)
1137	#undef RhoPi
1138	}
1139	/* Chi */
1140	for (x = 0; x <= 40;) {
1141	uint32_t BC0, BC1, BC2, BC3, BC4;
1142	BC0 = s32[x + 0*2];
1143	BC1 = s32[x + 1*2];
1144	BC2 = s32[x + 2*2];
1145	s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1146	BC3 = s32[x + 3*2];
1147	s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1148	BC4 = s32[x + 4*2];
1149	s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1150	s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1151	s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1152	x++;
1153	BC0 = s32[x + 0*2];
1154	BC1 = s32[x + 1*2];
1155	BC2 = s32[x + 2*2];
1156	s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1157	BC3 = s32[x + 3*2];
1158	s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1159	BC4 = s32[x + 4*2];
1160	s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1161	s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1162	s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1163	x += 9;
1164	}
1165	/* Iota */
1166	s32[0] ^= IOTA_CONST_0bits & 1;
1167	IOTA_CONST_0bits >>= 1;
1168	s32[1] ^= IOTA_CONST_1[round];
1169	}
1170
1171	combine_halves(state);
1172	#else
1173	/* Native 64-bit algorithm */
1174	static const uint16_t IOTA_CONST[NROUNDS] = {
1175	/* Elements should be 64-bit, but top half is always zero
1176	* or 0x80000000. We encode 63rd bits in a separate word below.
1177	* Same is true for 31th bits, which lets us use 16-bit table
1178	* instead of 64-bit. The speed penalty is lost in the noise.
1179	*/
1180	0x0001,
1181	0x8082,
1182	0x808a,
1183	0x8000,
1184	0x808b,
1185	0x0001,
1186	0x8081,
1187	0x8009,
1188	0x008a,
1189	0x0088,
1190	0x8009,
1191	0x000a,
1192	0x808b,
1193	0x008b,
1194	0x8089,
1195	0x8003,
1196	0x8002,
1197	0x0080,
1198	0x800a,
1199	0x000a,
1200	0x8081,
1201	0x8080,
1202	0x0001,
1203	0x8008,
1204	};
1205	/* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
1206	const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
1207	/* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
1208	const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
1209
1210	static const uint8_t ROT_CONST[24] = {
1211	1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
1212	27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
1213	};
1214	static const uint8_t PI_LANE[24] = {
1215	10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
1216	15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
1217	};
1218	/static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };/
1219
1220	unsigned x;
1221	unsigned round;
1222
1223	if (BB_BIG_ENDIAN) {
1224	for (x = 0; x < 25; x++) {
1225	state[x] = SWAP_LE64(state[x]);
1226	}
1227	}
1228
1229	for (round = 0; round < NROUNDS; ++round) {
1230	/* Theta */
1231	{
1232	uint64_t BC[10];
1233	for (x = 0; x < 5; ++x) {
1234	BC[x + 5] = BC[x] = state[x]
1235	^ state[x + 5] ^ state[x + 10]
1236	^ state[x + 15] ^ state[x + 20];
1237	}
1238	/* Using 2x5 vector above eliminates the need to use
1239	* BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
1240	* and the code is a bit _smaller_.
1241	*/
1242	for (x = 0; x < 5; ++x) {
1243	uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
1244	state[x] ^= temp;
1245	state[x + 5] ^= temp;
1246	state[x + 10] ^= temp;
1247	state[x + 15] ^= temp;
1248	state[x + 20] ^= temp;
1249	}
1250	}
1251
1252	/* Rho Pi */
1253	if (SHA3_SMALL) {
1254	uint64_t t1 = state[1];
1255	for (x = 0; x < 24; ++x) {
1256	uint64_t t0 = state[PI_LANE[x]];
1257	state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
1258	t1 = t0;
1259	}
1260	} else {
1261	/* Especially large benefit for 32-bit arch (75% faster):
1262	* 64-bit rotations by non-constant usually are SLOW on those.
1263	* We resort to unrolling here.
1264	* This optimizes out PI_LANE[] and ROT_CONST[],
1265	* but generates 300-500 more bytes of code.
1266	*/
1267	uint64_t t0;
1268	uint64_t t1 = state[1];
1269	#define RhoPi_twice(x) \
1270	t0 = state[PI_LANE[x ]]; \
1271	state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \
1272	t1 = state[PI_LANE[x+1]]; \
1273	state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
1274	RhoPi_twice(0); RhoPi_twice(2);
1275	RhoPi_twice(4); RhoPi_twice(6);
1276	RhoPi_twice(8); RhoPi_twice(10);
1277	RhoPi_twice(12); RhoPi_twice(14);
1278	RhoPi_twice(16); RhoPi_twice(18);
1279	RhoPi_twice(20); RhoPi_twice(22);
1280	#undef RhoPi_twice
1281	}
1282	/* Chi */
1283	# if LONG_MAX > 0x7fffffff
1284	for (x = 0; x <= 20; x += 5) {
1285	uint64_t BC0, BC1, BC2, BC3, BC4;
1286	BC0 = state[x + 0];
1287	BC1 = state[x + 1];
1288	BC2 = state[x + 2];
1289	state[x + 0] = BC0 ^ ((~BC1) & BC2);
1290	BC3 = state[x + 3];
1291	state[x + 1] = BC1 ^ ((~BC2) & BC3);
1292	BC4 = state[x + 4];
1293	state[x + 2] = BC2 ^ ((~BC3) & BC4);
1294	state[x + 3] = BC3 ^ ((~BC4) & BC0);
1295	state[x + 4] = BC4 ^ ((~BC0) & BC1);
1296	}
1297	# else
1298	/* Reduced register pressure version
1299	* for register-starved 32-bit arches
1300	* (i386: -95 bytes, and it is _faster_)
1301	*/
1302	for (x = 0; x <= 40;) {
1303	uint32_t BC0, BC1, BC2, BC3, BC4;
1304	uint32_t const s32 = (uint32_t)state;
1305	# if SHA3_SMALL
1306	do_half:
1307	# endif
1308	BC0 = s32[x + 0*2];
1309	BC1 = s32[x + 1*2];
1310	BC2 = s32[x + 2*2];
1311	s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1312	BC3 = s32[x + 3*2];
1313	s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1314	BC4 = s32[x + 4*2];
1315	s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1316	s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1317	s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1318	x++;
1319	# if SHA3_SMALL
1320	if (x & 1)
1321	goto do_half;
1322	x += 8;
1323	# else
1324	BC0 = s32[x + 0*2];
1325	BC1 = s32[x + 1*2];
1326	BC2 = s32[x + 2*2];
1327	s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1328	BC3 = s32[x + 3*2];
1329	s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1330	BC4 = s32[x + 4*2];
1331	s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1332	s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1333	s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1334	x += 9;
1335	# endif
1336	}
1337	# endif /* long is 32-bit */
1338	/* Iota */
1339	state[0] ^= IOTA_CONST[round]
1340	\| (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
1341	\| (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
1342	}
1343
1344	if (BB_BIG_ENDIAN) {
1345	for (x = 0; x < 25; x++) {
1346	state[x] = SWAP_LE64(state[x]);
1347	}
1348	}
1349	#endif
1350	}
1351
1352	void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
1353	{
1354	memset(ctx, 0, sizeof(*ctx));
1355	}
1356
1357	void FAST_FUNC sha3_hash(sha3_ctx_t ctx, const void buffer, size_t len)
1358	{
1359	#if SHA3_SMALL
1360	const uint8_t *data = buffer;
1361	unsigned bufpos = ctx->bytes_queued;
1362
1363	while (1) {
1364	unsigned remaining = SHA3_IBLK_BYTES - bufpos;
1365	if (remaining > len)
1366	remaining = len;
1367	len -= remaining;
1368	/* XOR data into buffer */
1369	while (remaining != 0) {
1370	uint8_t buf = (uint8_t)ctx->state;
1371	buf[bufpos] ^= *data++;
1372	bufpos++;
1373	remaining--;
1374	}
1375	/* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
1376	bufpos -= SHA3_IBLK_BYTES;
1377	if (bufpos != 0)
1378	break;
1379	/* Buffer is filled up, process it */
1380	sha3_process_block72(ctx->state);
1381	/bufpos = 0; - already is /
1382	}
1383	ctx->bytes_queued = bufpos + SHA3_IBLK_BYTES;
1384	#else
1385	/* +50 bytes code size, but a bit faster because of long-sized XORs */
1386	const uint8_t *data = buffer;
1387	unsigned bufpos = ctx->bytes_queued;
1388
1389	/* If already data in queue, continue queuing first */
1390	while (len != 0 && bufpos != 0) {
1391	uint8_t buf = (uint8_t)ctx->state;
1392	buf[bufpos] ^= *data++;
1393	len--;
1394	bufpos++;
1395	if (bufpos == SHA3_IBLK_BYTES) {
1396	bufpos = 0;
1397	goto do_block;
1398	}
1399	}
1400
1401	/* Absorb complete blocks */
1402	while (len >= SHA3_IBLK_BYTES) {
1403	/* XOR data onto beginning of state[].
1404	* We try to be efficient - operate one word at a time, not byte.
1405	* Careful wrt unaligned access: can't just use "(long)data"!
1406	*/
1407	unsigned count = SHA3_IBLK_BYTES / sizeof(long);
1408	long buf = (long)ctx->state;
1409	do {
1410	long v;
1411	move_from_unaligned_long(v, (long*)data);
1412	*buf++ ^= v;
1413	data += sizeof(long);
1414	} while (--count);
1415	len -= SHA3_IBLK_BYTES;
1416	do_block:
1417	sha3_process_block72(ctx->state);
1418	}
1419
1420	/* Queue remaining data bytes */
1421	while (len != 0) {
1422	uint8_t buf = (uint8_t)ctx->state;
1423	buf[bufpos] ^= *data++;
1424	bufpos++;
1425	len--;
1426	}
1427
1428	ctx->bytes_queued = bufpos;
1429	#endif
1430	}
1431
1432	void FAST_FUNC sha3_end(sha3_ctx_t ctx, void resbuf)
1433	{
1434	/* Padding */
1435	uint8_t buf = (uint8_t)ctx->state;
1436	buf[ctx->bytes_queued] ^= 1;
1437	buf[SHA3_IBLK_BYTES - 1] ^= 0x80;
1438
1439	sha3_process_block72(ctx->state);
1440
1441	/* Output */
1442	memcpy(resbuf, ctx->state, 64);
1443	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: MondoRescue/branches/3.3/mindi-busybox/libbb/hash_md5_sha.c@ 3621

Download in other formats: