-/* Modified for SILC -Pekka */
-
-/* LibTomCrypt, modular cryptographic library -- Tom St Denis
- *
- * LibTomCrypt is a library that provides various cryptographic
- * algorithms in a highly modular and flexible manner.
- *
- * The library is free for all purposes without any express
- * guarantee it works.
- *
- * Tom St Denis, tomstdenis@gmail.com, http://libtomcrypt.org
- */
+/* Taken from public domain libtomcrypt library and the code and all changes
+ to it are in public domain. -Pekka */
+
+/* LibTomCrypt, modular cryptographic library -- Tom St Denis */
+
#include "silc.h"
#include "sha256_internal.h"
#include "sha256.h"
SILC_HASH_API_TRANSFORM(sha256)
{
- sha256_compress(state, (unsigned char *)buffer);
+ sha256_transform(state, (unsigned char *)buffer);
}
SILC_HASH_API_CONTEXT_LEN(sha256)
#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
-/* compress 512-bits */
-int sha256_compress(SilcUInt32 *state, unsigned char *buf)
-{
- SilcUInt32 S[8], W[64], t0, t1;
- int i;
-
- /* copy state into S */
- for (i = 0; i < 8; i++) {
- S[i] = state[i];
- }
-
- /* copy the state into 512-bits into W[0..15] */
- for (i = 0; i < 16; i++)
- SILC_GET32_MSB(W[i], buf + (4 * i));
+#ifndef SILC_SHA256_ASM
- /* fill W[16..63] */
- for (i = 16; i < 64; i++) {
- W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
- }
-
- /* Compress */
-#define RND(a,b,c,d,e,f,g,h,i,ki) \
- t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]; \
- t1 = Sigma0(a) + Maj(a, b, c); \
- d += t0; \
- h = t0 + t1;
-
- RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98);
- RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],1,0x71374491);
- RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],2,0xb5c0fbcf);
- RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],3,0xe9b5dba5);
- RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],4,0x3956c25b);
- RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],5,0x59f111f1);
- RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],6,0x923f82a4);
- RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],7,0xab1c5ed5);
- RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],8,0xd807aa98);
- RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],9,0x12835b01);
- RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],10,0x243185be);
- RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],11,0x550c7dc3);
- RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],12,0x72be5d74);
- RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,0x80deb1fe);
- RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,0x9bdc06a7);
- RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,0xc19bf174);
- RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1);
- RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786);
- RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6);
- RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc);
- RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f);
- RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa);
- RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc);
- RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da);
- RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152);
- RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d);
- RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8);
- RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7);
- RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3);
- RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147);
- RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351);
- RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967);
- RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85);
- RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138);
- RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc);
- RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13);
- RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354);
- RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb);
- RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e);
- RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85);
- RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1);
- RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b);
- RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70);
- RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3);
- RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819);
- RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624);
- RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585);
- RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070);
- RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116);
- RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08);
- RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c);
- RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5);
- RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3);
- RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a);
- RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f);
- RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3);
- RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee);
- RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f);
- RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814);
- RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208);
- RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa);
- RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb);
- RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7);
- RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2);
+/* Transform 512-bits */
+void sha256_transform(SilcUInt32 *state, unsigned char *buf)
+{
+ SilcUInt32 S[8], W[64], t0, t1;
+ int i;
+
+ /* copy state into S */
+ for (i = 0; i < 8; i++) {
+ S[i] = state[i];
+ }
+
+ /* copy the state into 512-bits into W[0..15] */
+ for (i = 0; i < 16; i++)
+ SILC_GET32_MSB(W[i], buf + (4 * i));
+
+ /* fill W[16..63] */
+ for (i = 16; i < 64; i++) {
+ W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
+ }
+
+ /* Compress */
+#define RND(a,b,c,d,e,f,g,h,i,ki) \
+ t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]; \
+ t1 = Sigma0(a) + Maj(a, b, c); \
+ d += t0; \
+ h = t0 + t1;
+
+ RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98);
+ RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],1,0x71374491);
+ RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],2,0xb5c0fbcf);
+ RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],3,0xe9b5dba5);
+ RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],4,0x3956c25b);
+ RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],5,0x59f111f1);
+ RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],6,0x923f82a4);
+ RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],7,0xab1c5ed5);
+ RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],8,0xd807aa98);
+ RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],9,0x12835b01);
+ RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],10,0x243185be);
+ RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],11,0x550c7dc3);
+ RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],12,0x72be5d74);
+ RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,0x80deb1fe);
+ RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,0x9bdc06a7);
+ RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,0xc19bf174);
+ RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1);
+ RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786);
+ RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6);
+ RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc);
+ RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f);
+ RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa);
+ RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc);
+ RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da);
+ RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152);
+ RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d);
+ RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8);
+ RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7);
+ RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3);
+ RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147);
+ RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351);
+ RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967);
+ RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85);
+ RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138);
+ RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc);
+ RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13);
+ RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354);
+ RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb);
+ RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e);
+ RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85);
+ RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1);
+ RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b);
+ RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70);
+ RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3);
+ RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819);
+ RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624);
+ RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585);
+ RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070);
+ RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116);
+ RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08);
+ RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c);
+ RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5);
+ RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3);
+ RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a);
+ RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f);
+ RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3);
+ RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee);
+ RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f);
+ RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814);
+ RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208);
+ RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa);
+ RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb);
+ RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7);
+ RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2);
#undef RND
- /* feedback */
- for (i = 0; i < 8; i++) {
- state[i] = state[i] + S[i];
- }
- return TRUE;
+ /* feedback */
+ for (i = 0; i < 8; i++) {
+ state[i] = state[i] + S[i];
+ }
}
-/**
- Initialize the hash state
- @param md The hash state you wish to initialize
- @return CRYPT_OK if successful
-*/
+#endif /* !SILC_SHA256_ASM */
+
int sha256_init(sha256_state * md)
{
- md->length = 0;
- md->curlen = 0;
- md->state[0] = 0x6A09E667UL;
- md->state[1] = 0xBB67AE85UL;
- md->state[2] = 0x3C6EF372UL;
- md->state[3] = 0xA54FF53AUL;
- md->state[4] = 0x510E527FUL;
- md->state[5] = 0x9B05688CUL;
- md->state[6] = 0x1F83D9ABUL;
- md->state[7] = 0x5BE0CD19UL;
- return TRUE;
+ md->length = 0;
+ md->curlen = 0;
+ md->state[0] = 0x6A09E667UL;
+ md->state[1] = 0xBB67AE85UL;
+ md->state[2] = 0x3C6EF372UL;
+ md->state[3] = 0xA54FF53AUL;
+ md->state[4] = 0x510E527FUL;
+ md->state[5] = 0x9B05688CUL;
+ md->state[6] = 0x1F83D9ABUL;
+ md->state[7] = 0x5BE0CD19UL;
+ return TRUE;
}
#if !defined(MIN)
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
-/**
- Process a block of memory though the hash
- @param md The hash state
- @param in The data to hash
- @param inlen The length of the data (octets)
- @return CRYPT_OK if successful
-*/
int sha256_process(sha256_state * md, const unsigned char *in,
unsigned long inlen)
{
- unsigned long n;
- int err, block_size = sizeof(md->buf);
-
- if (md->curlen > block_size)
- return FALSE;
-
- while (inlen > 0) {
- if (md->curlen == 0 && inlen >= block_size) {
- if ((err = sha256_compress(md->state, (unsigned char *)in)) != TRUE)
- return err;
- md->length += block_size * 8;
- in += block_size;
- inlen -= block_size;
- } else {
- n = MIN(inlen, (block_size - md->curlen));
- memcpy(md->buf + md->curlen, in, (size_t)n);
- md->curlen += n;
- in += n;
- inlen -= n;
- if (md->curlen == block_size) {
- if ((err = sha256_compress(md->state, md->buf)) != TRUE)
- return err;
- md->length += block_size * 8;
- md->curlen = 0;
- }
- }
+ unsigned long n;
+ int block_size = sizeof(md->buf);
+
+ if (md->curlen > block_size)
+ return FALSE;
+
+ while (inlen > 0) {
+ if (md->curlen == 0 && inlen >= block_size) {
+ sha256_transform(md->state, (unsigned char *)in);
+ md->length += block_size * 8;
+ in += block_size;
+ inlen -= block_size;
+ } else {
+ n = MIN(inlen, (block_size - md->curlen));
+ memcpy(md->buf + md->curlen, in, (size_t)n);
+ md->curlen += n;
+ in += n;
+ inlen -= n;
+ if (md->curlen == block_size) {
+ sha256_transform(md->state, md->buf);
+ md->length += block_size * 8;
+ md->curlen = 0;
+ }
}
- return TRUE;
+ }
+ return TRUE;
}
-/**
- Terminate the hash to get the digest
- @param md The hash state
- @param out [out] The destination of the hash (32 bytes)
- @return CRYPT_OK if successful
-*/
int sha256_done(sha256_state * md, unsigned char *out)
{
- int i;
-
- if (md->curlen >= sizeof(md->buf))
- return FALSE;
-
- /* increase the length of the message */
- md->length += md->curlen * 8;
-
- /* append the '1' bit */
- md->buf[md->curlen++] = (unsigned char)0x80;
-
- /* if the length is currently above 56 bytes we append zeros
- * then compress. Then we can fall back to padding zeros and length
- * encoding like normal.
- */
- if (md->curlen > 56) {
- while (md->curlen < 64) {
- md->buf[md->curlen++] = (unsigned char)0;
- }
- sha256_compress(md->state, md->buf);
- md->curlen = 0;
- }
+ int i;
+
+ if (md->curlen >= sizeof(md->buf))
+ return FALSE;
- /* pad upto 56 bytes of zeroes */
- while (md->curlen < 56) {
- md->buf[md->curlen++] = (unsigned char)0;
+ /* increase the length of the message */
+ md->length += md->curlen * 8;
+
+ /* append the '1' bit */
+ md->buf[md->curlen++] = (unsigned char)0x80;
+
+ /* if the length is currently above 56 bytes we append zeros
+ * then compress. Then we can fall back to padding zeros and length
+ * encoding like normal.
+ */
+ if (md->curlen > 56) {
+ while (md->curlen < 64) {
+ md->buf[md->curlen++] = (unsigned char)0;
}
+ sha256_transform(md->state, md->buf);
+ md->curlen = 0;
+ }
+
+ /* pad upto 56 bytes of zeroes */
+ while (md->curlen < 56) {
+ md->buf[md->curlen++] = (unsigned char)0;
+ }
- /* store length */
- SILC_PUT64_MSB(md->length, md->buf + 56);
- sha256_compress(md->state, md->buf);
+ /* store length */
+ SILC_PUT64_MSB(md->length, md->buf + 56);
+ sha256_transform(md->state, md->buf);
- /* copy output */
- for (i = 0; i < 8; i++)
- SILC_PUT32_MSB(md->state[i], out + (4 * i));
+ /* copy output */
+ for (i = 0; i < 8; i += 2) {
+ SILC_PUT32_MSB(md->state[i], out + (4 * i));
+ SILC_PUT32_MSB(md->state[i + 1], out + (4 * (i + 1)));
+ }
- return TRUE;
+ return TRUE;
}
--- /dev/null
+/*
+
+ sha256_x86.S
+
+ Author: Pekka Riikonen <priikone@silcnet.org>
+
+ Copyright (C) 2007 Pekka Riikonen
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+*/
+
+/* SHA-256 x86 assembler implementation. This implements only the SHA-256
+ transform function and other parts are implemented in sha256.c. The
+ function preserves ebp, edx, edi and esi but does not preserve other
+ registers.
+
+ This implementation uses only 32-bit registers. It does not use MMX or
+ SSE registers which could be used to enhance the performance, especially
+ when loading the W. This is about as fast as we can get with less than
+ 8 32-bit registers on 32-bit CPU.
+
+*/
+
+#ifdef SILC_SHA256_ASM
+
+#define STACK_STATE (8 * 4)
+#define STACK_W (64 * 4)
+#define STACK_SIZE STACK_STATE + STACK_W
+#define ARG_STATE STACK_SIZE + 20(%esp)
+#define ARG_BUF STACK_SIZE + 24(%esp)
+
+#define A 0
+#define B 4
+#define C 8
+#define D 12
+#define E 16
+#define F 20
+#define G 24
+#define H 28
+
+#define r0 %eax
+#define r1 %ebx
+#define r2 %ecx
+#define r3 %edx
+#define r4 %edi
+#define r5 %esi
+
+/* One round of SHA-256. The a (r0) and e (r1) are inputs already in
+ registers. r0 will be the next round a, r1 the next round e. The
+ d and h are outputs and they are the r0 and r1 for next round. */
+#define RND(a, b, c, d, e, f, g, h, W, ki) \
+ movl f(%ebp), r2; \
+ movl g(%ebp), r3; \
+ \
+ movl e, r4; /* e to Sigma1 */ \
+ rorl $6, r4; /* Sigma1 >>= 6 */ \
+ movl r4, r5; /* Sigma1 to temp */ \
+ rorl $5, r4; /* Sigma1 >>= 5 (11) */ \
+ xorl r4, r5; /* temp ^= Sigma1 */ \
+ rorl $14, r4; /* Sigma1 >>= 14 (25) */ \
+ xorl r5, r4; /* Sigma1 ^= temp */ \
+ \
+ movl r3, r5; /* g to Ch */ \
+ xorl r2, r5; /* Ch ^= f */ \
+ andl e, r5; /* Ch &= e */ \
+ xorl r3, r5; /* Ch ^= g */ \
+ \
+ movl h(%ebp), r3; \
+ movl d(%ebp), r1; \
+ leal ki(r4, r5), r4; /* t0 = Sigma1 + Ch + ki */ \
+ addl W * 4(%esp), r4; /* t0 += W[i] */ \
+ addl r4, r3; /* h += t0 */ \
+ addl r3, r1; /* d += h (t0) */ \
+ \
+ movl a, r4; /* a to Sigma0 */ \
+ rorl $2, r4; /* Sigma0 >>= 2 */ \
+ movl r4, r5; /* Sigma0 to temp */ \
+ rorl $11, r4; /* Sigma0 >>= 11 (13) */ \
+ xorl r4, r5; /* temp ^= Sigma0 */ \
+ rorl $9, r4; /* Sigma0 >>= 9 (22) */ \
+ xorl r5, r4; /* Sigma0 ^= temp */ \
+ \
+ addl r3, r4; /* t1 = Sigma0 + h (t0) */ \
+ movl b(%ebp), r2; \
+ movl c(%ebp), r3; \
+ \
+ movl r2, r5; /* b to temp */ \
+ orl a, r5; /* temp |= a */ \
+ andl r3, r5; /* temp &= c */ \
+ andl r2, a; /* a &= b */ \
+ orl r5, a; /* a |= temp */ \
+ addl r4, r0; /* h = t0 + t1 */
+
+#define ROUND(a, b, c, d, e, f, g, h, W, ki) \
+ RND(a, b, c, d, e, f, g, h, W, ki) \
+ movl r1, d(%ebp); /* Update d in stack */ \
+ movl r0, h(%ebp); /* Update h in stack */
+
+/* Get 64 bits from input buffer in MSB first order */
+#define GET_BUF(i) \
+ movl i * 4(r5), r4; \
+ movl (i + 1) * 4(r5), r3; \
+ bswapl r4; \
+ bswapl r3; \
+ movl r4, i * 4(%esp); \
+ movl r3, (i + 1) * 4(%esp);
+
+/* Expand the input */
+#define EXP_BUF(i) \
+ rorl $17, r4; /* Gamma1 >>= 17 */ \
+ movl r4, r5; /* Gamma1 to temp */ \
+ rorl $2, r4; /* Gamma1 >>= 2 (19) */ \
+ xorl r4, r5; /* temp ^= Gamma1 */ \
+ shrl $10, r2; /* w-2 >> 10 */ \
+ xorl r5, r2; /* Gamma1 = w-2 ^ temp */ \
+ \
+ addl (i - 7) * 4(%esp), r2; /* Gamma1 += w-7 */ \
+ addl (i - 16) * 4(%esp), r2; /* Gamma1 += w-16 */ \
+ \
+ movl (i - 15) * 4(%esp), r3; \
+ movl r3, r4; /* w-15 to Gamma0 */ \
+ rorl $7, r4; /* Gamma0 >>= 7 */ \
+ movl r4, r5; /* Gamma0 to temp */ \
+ rorl $11, r4; /* Gamma0 >>= 11 (18) */ \
+ xorl r4, r5; /* temp ^= Gamma0 */ \
+ shrl $3, r3; /* w-15 >> 3 */ \
+ xorl r5, r3; /* Gamma0 = w-15 ^ temp */ \
+ \
+ addl r2, r3; /* Gamma0 += Gamma1 */ \
+ movl r3, i * 4(%esp);
+
+#define EXP_BUF0(i) \
+ movl r4, r2; \
+ EXP_BUF(i)
+
+#define EXP_BUFX(i) \
+ movl (i - 2) * 4(%esp), r2; \
+ movl r2, r4; /* w-2 to Gamma1 */ \
+ EXP_BUF(i)
+
+
+.text
+.align 4
+.globl sha256_transform
+sha256_transform:
+ pushl %ebp
+ pushl %ebx
+ pushl %edi
+ pushl %esi
+ subl $STACK_SIZE, %esp
+
+ /* State to stack */
+ movl ARG_STATE, %ebp
+ movl A(%ebp), r0
+ movl B(%ebp), r1
+ movl C(%ebp), r2
+ movl D(%ebp), r3
+ movl r0, A + STACK_W(%esp)
+ movl r1, B + STACK_W(%esp)
+ movl r2, C + STACK_W(%esp)
+ movl r3, D + STACK_W(%esp)
+ movl E(%ebp), r1
+ movl F(%ebp), r2
+ movl G(%ebp), r3
+ movl H(%ebp), r4
+ movl r1, E + STACK_W(%esp)
+ movl r2, F + STACK_W(%esp)
+ movl r3, G + STACK_W(%esp)
+ movl r4, H + STACK_W(%esp)
+
+ /* Get buf in MSB first order, W[0..15] */
+ movl ARG_BUF, r5
+ GET_BUF(0) GET_BUF(2) GET_BUF(4) GET_BUF(6)
+ GET_BUF(8) GET_BUF(10) GET_BUF(12) GET_BUF(14)
+
+ /* Expand input, fill in W[16..63] */
+ EXP_BUF0(16) EXP_BUFX(17) EXP_BUFX(18) EXP_BUFX(19) EXP_BUFX(20)
+ EXP_BUFX(21) EXP_BUFX(22) EXP_BUFX(23) EXP_BUFX(24) EXP_BUFX(25)
+ EXP_BUFX(26) EXP_BUFX(27) EXP_BUFX(28) EXP_BUFX(29) EXP_BUFX(30)
+ EXP_BUFX(31) EXP_BUFX(32) EXP_BUFX(33) EXP_BUFX(34) EXP_BUFX(35)
+ EXP_BUFX(36) EXP_BUFX(37) EXP_BUFX(38) EXP_BUFX(39) EXP_BUFX(40)
+ EXP_BUFX(41) EXP_BUFX(42) EXP_BUFX(43) EXP_BUFX(44) EXP_BUFX(45)
+ EXP_BUFX(46) EXP_BUFX(47) EXP_BUFX(48) EXP_BUFX(49) EXP_BUFX(50)
+ EXP_BUFX(51) EXP_BUFX(52) EXP_BUFX(53) EXP_BUFX(54) EXP_BUFX(55)
+ EXP_BUFX(56) EXP_BUFX(57) EXP_BUFX(58) EXP_BUFX(59) EXP_BUFX(60)
+ EXP_BUFX(61) EXP_BUFX(62) EXP_BUFX(63)
+
+ /* Hash, r0 and r1 set above, ebp is base address to state */
+ leal STACK_W(%esp), %ebp
+
+ ROUND(r0, B, C, D, r1, F, G, H, 0, 0x428a2f98);
+ ROUND(r0, A, B, C, r1, E, F, G, 1, 0x71374491);
+ ROUND(r0, H, A, B, r1, D, E, F, 2, 0xb5c0fbcf);
+ ROUND(r0, G, H, A, r1, C, D, E, 3, 0xe9b5dba5);
+ ROUND(r0, F, G, H, r1, B, C, D, 4, 0x3956c25b);
+ ROUND(r0, E, F, G, r1, A, B, C, 5, 0x59f111f1);
+ ROUND(r0, D, E, F, r1, H, A, B, 6, 0x923f82a4);
+ ROUND(r0, C, D, E, r1, G, H, A, 7, 0xab1c5ed5);
+
+ ROUND(r0, B, C, D, r1, F, G, H, 8, 0xd807aa98);
+ ROUND(r0, A, B, C, r1, E, F, G, 9, 0x12835b01);
+ ROUND(r0, H, A, B, r1, D, E, F, 10, 0x243185be);
+ ROUND(r0, G, H, A, r1, C, D, E, 11, 0x550c7dc3);
+ ROUND(r0, F, G, H, r1, B, C, D, 12, 0x72be5d74);
+ ROUND(r0, E, F, G, r1, A, B, C, 13, 0x80deb1fe);
+ ROUND(r0, D, E, F, r1, H, A, B, 14, 0x9bdc06a7);
+ ROUND(r0, C, D, E, r1, G, H, A, 15, 0xc19bf174);
+
+ ROUND(r0, B, C, D, r1, F, G, H, 16, 0xe49b69c1);
+ ROUND(r0, A, B, C, r1, E, F, G, 17, 0xefbe4786);
+ ROUND(r0, H, A, B, r1, D, E, F, 18, 0x0fc19dc6);
+ ROUND(r0, G, H, A, r1, C, D, E, 19, 0x240ca1cc);
+ ROUND(r0, F, G, H, r1, B, C, D, 20, 0x2de92c6f);
+ ROUND(r0, E, F, G, r1, A, B, C, 21, 0x4a7484aa);
+ ROUND(r0, D, E, F, r1, H, A, B, 22, 0x5cb0a9dc);
+ ROUND(r0, C, D, E, r1, G, H, A, 23, 0x76f988da);
+
+ ROUND(r0, B, C, D, r1, F, G, H, 24, 0x983e5152);
+ ROUND(r0, A, B, C, r1, E, F, G, 25, 0xa831c66d);
+ ROUND(r0, H, A, B, r1, D, E, F, 26, 0xb00327c8);
+ ROUND(r0, G, H, A, r1, C, D, E, 27, 0xbf597fc7);
+ ROUND(r0, F, G, H, r1, B, C, D, 28, 0xc6e00bf3);
+ ROUND(r0, E, F, G, r1, A, B, C, 29, 0xd5a79147);
+ ROUND(r0, D, E, F, r1, H, A, B, 30, 0x06ca6351);
+ ROUND(r0, C, D, E, r1, G, H, A, 31, 0x14292967);
+
+ ROUND(r0, B, C, D, r1, F, G, H, 32, 0x27b70a85);
+ ROUND(r0, A, B, C, r1, E, F, G, 33, 0x2e1b2138);
+ ROUND(r0, H, A, B, r1, D, E, F, 34, 0x4d2c6dfc);
+ ROUND(r0, G, H, A, r1, C, D, E, 35, 0x53380d13);
+ ROUND(r0, F, G, H, r1, B, C, D, 36, 0x650a7354);
+ ROUND(r0, E, F, G, r1, A, B, C, 37, 0x766a0abb);
+ ROUND(r0, D, E, F, r1, H, A, B, 38, 0x81c2c92e);
+ ROUND(r0, C, D, E, r1, G, H, A, 39, 0x92722c85);
+
+ ROUND(r0, B, C, D, r1, F, G, H, 40, 0xa2bfe8a1);
+ ROUND(r0, A, B, C, r1, E, F, G, 41, 0xa81a664b);
+ ROUND(r0, H, A, B, r1, D, E, F, 42, 0xc24b8b70);
+ ROUND(r0, G, H, A, r1, C, D, E, 43, 0xc76c51a3);
+ ROUND(r0, F, G, H, r1, B, C, D, 44, 0xd192e819);
+ ROUND(r0, E, F, G, r1, A, B, C, 45, 0xd6990624);
+ ROUND(r0, D, E, F, r1, H, A, B, 46, 0xf40e3585);
+ ROUND(r0, C, D, E, r1, G, H, A, 47, 0x106aa070);
+
+ ROUND(r0, B, C, D, r1, F, G, H, 48, 0x19a4c116);
+ ROUND(r0, A, B, C, r1, E, F, G, 49, 0x1e376c08);
+ ROUND(r0, H, A, B, r1, D, E, F, 50, 0x2748774c);
+ ROUND(r0, G, H, A, r1, C, D, E, 51, 0x34b0bcb5);
+ ROUND(r0, F, G, H, r1, B, C, D, 52, 0x391c0cb3);
+ ROUND(r0, E, F, G, r1, A, B, C, 53, 0x4ed8aa4a);
+ ROUND(r0, D, E, F, r1, H, A, B, 54, 0x5b9cca4f);
+ ROUND(r0, C, D, E, r1, G, H, A, 55, 0x682e6ff3);
+
+ ROUND(r0, B, C, D, r1, F, G, H, 56, 0x748f82ee);
+ ROUND(r0, A, B, C, r1, E, F, G, 57, 0x78a5636f);
+ ROUND(r0, H, A, B, r1, D, E, F, 58, 0x84c87814);
+ ROUND(r0, G, H, A, r1, C, D, E, 59, 0x8cc70208);
+ ROUND(r0, F, G, H, r1, B, C, D, 60, 0x90befffa);
+ ROUND(r0, E, F, G, r1, A, B, C, 61, 0xa4506ceb);
+ ROUND(r0, D, E, F, r1, H, A, B, 62, 0xbef9a3f7);
+ RND (r0, C, D, E, r1, G, H, A, 63, 0xc67178f2);
+
+ /* Update state from stack */
+ movl ARG_STATE, %ebp
+ addl r0, A(%ebp) /* a from last round */
+ addl r1, E(%ebp) /* e from last round */
+ movl B + STACK_W(%esp), r0
+ movl C + STACK_W(%esp), r1
+ movl D + STACK_W(%esp), r2
+ movl F + STACK_W(%esp), r3
+ movl G + STACK_W(%esp), r4
+ movl H + STACK_W(%esp), r5
+ addl r0, B(%ebp)
+ addl r1, C(%ebp)
+ addl r2, D(%ebp)
+ addl r3, F(%ebp)
+ addl r4, G(%ebp)
+ addl r5, H(%ebp)
+
+ addl $STACK_SIZE, %esp
+ popl %esi
+ popl %edi
+ popl %ebx
+ popl %ebp
+
+ ret
+
+#endif /* SILC_SHA256_ASM */