5 Author: Pekka Riikonen <priikone@silcnet.org>
7 Copyright (C) 2007 Pekka Riikonen
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; version 2 of the License.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
20 /* SHA-256 x86 assembler implementation. This implements only the SHA-256
21 transform function and other parts are implemented in sha256.c. The
22 function preserves ebp, edx, edi and esi but does not preserve other
25 This implementation uses only 32-bit registers. It does not use MMX or
26 SSE registers which could be used to enhance the performance, especially
27 when loading the W. This is about as fast as we can get with less than
28 8 32-bit registers on 32-bit CPU.
32 #ifdef SILC_SHA256_ASM
34 #define STACK_STATE (8 * 4)
35 #define STACK_W (64 * 4)
36 #define STACK_SIZE STACK_STATE + STACK_W
37 #define ARG_STATE STACK_SIZE + 20(%esp)
38 #define ARG_BUF STACK_SIZE + 24(%esp)
56 /* One round of SHA-256. The a (r0) and e (r1) are inputs already in
57 registers. r0 will be the next round a, r1 the next round e. The
58 d and h are outputs and they are the r0 and r1 for next round. */
59 #define RND(a, b, c, d, e, f, g, h, W, ki) \
63 movl e, r4; /* e to Sigma1 */ \
64 rorl $6, r4; /* Sigma1 >>= 6 */ \
65 movl r4, r5; /* Sigma1 to temp */ \
66 rorl $5, r4; /* Sigma1 >>= 5 (11) */ \
67 xorl r4, r5; /* temp ^= Sigma1 */ \
68 rorl $14, r4; /* Sigma1 >>= 14 (25) */ \
69 xorl r5, r4; /* Sigma1 ^= temp */ \
71 movl r3, r5; /* g to Ch */ \
72 xorl r2, r5; /* Ch ^= f */ \
73 andl e, r5; /* Ch &= e */ \
74 xorl r3, r5; /* Ch ^= g */ \
78 leal ki(r4, r5), r4; /* t0 = Sigma1 + Ch + ki */ \
79 addl W * 4(%esp), r4; /* t0 += W[i] */ \
80 addl r4, r3; /* h += t0 */ \
81 addl r3, r1; /* d += h (t0) */ \
83 movl a, r4; /* a to Sigma0 */ \
84 rorl $2, r4; /* Sigma0 >>= 2 */ \
85 movl r4, r5; /* Sigma0 to temp */ \
86 rorl $11, r4; /* Sigma0 >>= 11 (13) */ \
87 xorl r4, r5; /* temp ^= Sigma0 */ \
88 rorl $9, r4; /* Sigma0 >>= 9 (22) */ \
89 xorl r5, r4; /* Sigma0 ^= temp */ \
91 addl r3, r4; /* t1 = Sigma0 + h (t0) */ \
95 movl r2, r5; /* b to temp */ \
96 orl a, r5; /* temp |= a */ \
97 andl r3, r5; /* temp &= c */ \
98 andl r2, a; /* a &= b */ \
99 orl r5, a; /* a |= temp */ \
100 addl r4, r0; /* h = t0 + t1 */
102 #define ROUND(a, b, c, d, e, f, g, h, W, ki) \
103 RND(a, b, c, d, e, f, g, h, W, ki) \
104 movl r1, d(%ebp); /* Update d in stack */ \
105 movl r0, h(%ebp); /* Update h in stack */
107 /* Get 64 bits from input buffer in MSB first order */
109 movl i * 4(r5), r4; \
110 movl (i + 1) * 4(r5), r3; \
113 movl r4, i * 4(%esp); \
114 movl r3, (i + 1) * 4(%esp);
116 /* Expand the input */
118 rorl $17, r4; /* Gamma1 >>= 17 */ \
119 movl r4, r5; /* Gamma1 to temp */ \
120 rorl $2, r4; /* Gamma1 >>= 2 (19) */ \
121 xorl r4, r5; /* temp ^= Gamma1 */ \
122 shrl $10, r2; /* w-2 >> 10 */ \
123 xorl r5, r2; /* Gamma1 = w-2 ^ temp */ \
125 addl (i - 7) * 4(%esp), r2; /* Gamma1 += w-7 */ \
126 addl (i - 16) * 4(%esp), r2; /* Gamma1 += w-16 */ \
128 movl (i - 15) * 4(%esp), r3; \
129 movl r3, r4; /* w-15 to Gamma0 */ \
130 rorl $7, r4; /* Gamma0 >>= 7 */ \
131 movl r4, r5; /* Gamma0 to temp */ \
132 rorl $11, r4; /* Gamma0 >>= 11 (18) */ \
133 xorl r4, r5; /* temp ^= Gamma0 */ \
134 shrl $3, r3; /* w-15 >> 3 */ \
135 xorl r5, r3; /* Gamma0 = w-15 ^ temp */ \
137 addl r2, r3; /* Gamma0 += Gamma1 */ \
138 movl r3, i * 4(%esp);
140 #define EXP_BUF0(i) \
144 #define EXP_BUFX(i) \
145 movl (i - 2) * 4(%esp), r2; \
146 movl r2, r4; /* w-2 to Gamma1 */ \
152 .globl sha256_transform
158 subl $STACK_SIZE, %esp
166 movl r0, A + STACK_W(%esp)
167 movl r1, B + STACK_W(%esp)
168 movl r2, C + STACK_W(%esp)
169 movl r3, D + STACK_W(%esp)
174 movl r1, E + STACK_W(%esp)
175 movl r2, F + STACK_W(%esp)
176 movl r3, G + STACK_W(%esp)
177 movl r4, H + STACK_W(%esp)
179 /* Get buf in MSB first order, W[0..15] */
181 GET_BUF(0) GET_BUF(2) GET_BUF(4) GET_BUF(6)
182 GET_BUF(8) GET_BUF(10) GET_BUF(12) GET_BUF(14)
184 /* Expand input, fill in W[16..63] */
185 EXP_BUF0(16) EXP_BUFX(17) EXP_BUFX(18) EXP_BUFX(19) EXP_BUFX(20)
186 EXP_BUFX(21) EXP_BUFX(22) EXP_BUFX(23) EXP_BUFX(24) EXP_BUFX(25)
187 EXP_BUFX(26) EXP_BUFX(27) EXP_BUFX(28) EXP_BUFX(29) EXP_BUFX(30)
188 EXP_BUFX(31) EXP_BUFX(32) EXP_BUFX(33) EXP_BUFX(34) EXP_BUFX(35)
189 EXP_BUFX(36) EXP_BUFX(37) EXP_BUFX(38) EXP_BUFX(39) EXP_BUFX(40)
190 EXP_BUFX(41) EXP_BUFX(42) EXP_BUFX(43) EXP_BUFX(44) EXP_BUFX(45)
191 EXP_BUFX(46) EXP_BUFX(47) EXP_BUFX(48) EXP_BUFX(49) EXP_BUFX(50)
192 EXP_BUFX(51) EXP_BUFX(52) EXP_BUFX(53) EXP_BUFX(54) EXP_BUFX(55)
193 EXP_BUFX(56) EXP_BUFX(57) EXP_BUFX(58) EXP_BUFX(59) EXP_BUFX(60)
194 EXP_BUFX(61) EXP_BUFX(62) EXP_BUFX(63)
196 /* Hash, r0 and r1 set above, ebp is base address to state */
197 leal STACK_W(%esp), %ebp
199 ROUND(r0, B, C, D, r1, F, G, H, 0, 0x428a2f98);
200 ROUND(r0, A, B, C, r1, E, F, G, 1, 0x71374491);
201 ROUND(r0, H, A, B, r1, D, E, F, 2, 0xb5c0fbcf);
202 ROUND(r0, G, H, A, r1, C, D, E, 3, 0xe9b5dba5);
203 ROUND(r0, F, G, H, r1, B, C, D, 4, 0x3956c25b);
204 ROUND(r0, E, F, G, r1, A, B, C, 5, 0x59f111f1);
205 ROUND(r0, D, E, F, r1, H, A, B, 6, 0x923f82a4);
206 ROUND(r0, C, D, E, r1, G, H, A, 7, 0xab1c5ed5);
208 ROUND(r0, B, C, D, r1, F, G, H, 8, 0xd807aa98);
209 ROUND(r0, A, B, C, r1, E, F, G, 9, 0x12835b01);
210 ROUND(r0, H, A, B, r1, D, E, F, 10, 0x243185be);
211 ROUND(r0, G, H, A, r1, C, D, E, 11, 0x550c7dc3);
212 ROUND(r0, F, G, H, r1, B, C, D, 12, 0x72be5d74);
213 ROUND(r0, E, F, G, r1, A, B, C, 13, 0x80deb1fe);
214 ROUND(r0, D, E, F, r1, H, A, B, 14, 0x9bdc06a7);
215 ROUND(r0, C, D, E, r1, G, H, A, 15, 0xc19bf174);
217 ROUND(r0, B, C, D, r1, F, G, H, 16, 0xe49b69c1);
218 ROUND(r0, A, B, C, r1, E, F, G, 17, 0xefbe4786);
219 ROUND(r0, H, A, B, r1, D, E, F, 18, 0x0fc19dc6);
220 ROUND(r0, G, H, A, r1, C, D, E, 19, 0x240ca1cc);
221 ROUND(r0, F, G, H, r1, B, C, D, 20, 0x2de92c6f);
222 ROUND(r0, E, F, G, r1, A, B, C, 21, 0x4a7484aa);
223 ROUND(r0, D, E, F, r1, H, A, B, 22, 0x5cb0a9dc);
224 ROUND(r0, C, D, E, r1, G, H, A, 23, 0x76f988da);
226 ROUND(r0, B, C, D, r1, F, G, H, 24, 0x983e5152);
227 ROUND(r0, A, B, C, r1, E, F, G, 25, 0xa831c66d);
228 ROUND(r0, H, A, B, r1, D, E, F, 26, 0xb00327c8);
229 ROUND(r0, G, H, A, r1, C, D, E, 27, 0xbf597fc7);
230 ROUND(r0, F, G, H, r1, B, C, D, 28, 0xc6e00bf3);
231 ROUND(r0, E, F, G, r1, A, B, C, 29, 0xd5a79147);
232 ROUND(r0, D, E, F, r1, H, A, B, 30, 0x06ca6351);
233 ROUND(r0, C, D, E, r1, G, H, A, 31, 0x14292967);
235 ROUND(r0, B, C, D, r1, F, G, H, 32, 0x27b70a85);
236 ROUND(r0, A, B, C, r1, E, F, G, 33, 0x2e1b2138);
237 ROUND(r0, H, A, B, r1, D, E, F, 34, 0x4d2c6dfc);
238 ROUND(r0, G, H, A, r1, C, D, E, 35, 0x53380d13);
239 ROUND(r0, F, G, H, r1, B, C, D, 36, 0x650a7354);
240 ROUND(r0, E, F, G, r1, A, B, C, 37, 0x766a0abb);
241 ROUND(r0, D, E, F, r1, H, A, B, 38, 0x81c2c92e);
242 ROUND(r0, C, D, E, r1, G, H, A, 39, 0x92722c85);
244 ROUND(r0, B, C, D, r1, F, G, H, 40, 0xa2bfe8a1);
245 ROUND(r0, A, B, C, r1, E, F, G, 41, 0xa81a664b);
246 ROUND(r0, H, A, B, r1, D, E, F, 42, 0xc24b8b70);
247 ROUND(r0, G, H, A, r1, C, D, E, 43, 0xc76c51a3);
248 ROUND(r0, F, G, H, r1, B, C, D, 44, 0xd192e819);
249 ROUND(r0, E, F, G, r1, A, B, C, 45, 0xd6990624);
250 ROUND(r0, D, E, F, r1, H, A, B, 46, 0xf40e3585);
251 ROUND(r0, C, D, E, r1, G, H, A, 47, 0x106aa070);
253 ROUND(r0, B, C, D, r1, F, G, H, 48, 0x19a4c116);
254 ROUND(r0, A, B, C, r1, E, F, G, 49, 0x1e376c08);
255 ROUND(r0, H, A, B, r1, D, E, F, 50, 0x2748774c);
256 ROUND(r0, G, H, A, r1, C, D, E, 51, 0x34b0bcb5);
257 ROUND(r0, F, G, H, r1, B, C, D, 52, 0x391c0cb3);
258 ROUND(r0, E, F, G, r1, A, B, C, 53, 0x4ed8aa4a);
259 ROUND(r0, D, E, F, r1, H, A, B, 54, 0x5b9cca4f);
260 ROUND(r0, C, D, E, r1, G, H, A, 55, 0x682e6ff3);
262 ROUND(r0, B, C, D, r1, F, G, H, 56, 0x748f82ee);
263 ROUND(r0, A, B, C, r1, E, F, G, 57, 0x78a5636f);
264 ROUND(r0, H, A, B, r1, D, E, F, 58, 0x84c87814);
265 ROUND(r0, G, H, A, r1, C, D, E, 59, 0x8cc70208);
266 ROUND(r0, F, G, H, r1, B, C, D, 60, 0x90befffa);
267 ROUND(r0, E, F, G, r1, A, B, C, 61, 0xa4506ceb);
268 ROUND(r0, D, E, F, r1, H, A, B, 62, 0xbef9a3f7);
269 RND (r0, C, D, E, r1, G, H, A, 63, 0xc67178f2);
271 /* Update state from stack */
273 addl r0, A(%ebp) /* a from last round */
274 addl r1, E(%ebp) /* e from last round */
275 movl B + STACK_W(%esp), r0
276 movl C + STACK_W(%esp), r1
277 movl D + STACK_W(%esp), r2
278 movl F + STACK_W(%esp), r3
279 movl G + STACK_W(%esp), r4
280 movl H + STACK_W(%esp), r5
288 addl $STACK_SIZE, %esp
296 #endif /* SILC_SHA256_ASM */