5 Author: Pekka Riikonen <priikone@silcnet.org>
7 Copyright (C) 2007 Pekka Riikonen
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; version 2 of the License.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
20 /* SHA-256 x86 assembler implementation. This implements only the SHA-256
21 transform function and other parts are implemented in sha256.c. The
22 function preserves ebp, edx, edi and esi but does not preserve other
25 This implementation uses only 32-bit registers. It does not use MMX or
26 SSE registers which could be used to enhance the performance, especially
27 when loading the W. This is about as fast as we can get with less than
28 8 32-bit registers on 32-bit CPU.
34 #ifdef SILC_SHA256_ASM
36 #define STACK_STATE (8 * 4)
37 #define STACK_W (64 * 4)
38 #define STACK_SIZE STACK_STATE + STACK_W
39 #define ARG_STATE STACK_SIZE + 20(%esp)
40 #define ARG_BUF STACK_SIZE + 24(%esp)
58 /* One round of SHA-256. The a (r0) and e (r1) are inputs already in
59 registers. r0 will be the next round a, r1 the next round e. The
60 d and h are outputs and they are the r0 and r1 for next round. */
61 #define RND(a, b, c, d, e, f, g, h, W, ki) \
65 movl e, r4; /* e to Sigma1 */ \
66 rorl $6, r4; /* Sigma1 >>= 6 */ \
67 movl r4, r5; /* Sigma1 to temp */ \
68 rorl $5, r4; /* Sigma1 >>= 5 (11) */ \
69 xorl r4, r5; /* temp ^= Sigma1 */ \
70 rorl $14, r4; /* Sigma1 >>= 14 (25) */ \
71 xorl r5, r4; /* Sigma1 ^= temp */ \
73 movl r3, r5; /* g to Ch */ \
74 xorl r2, r5; /* Ch ^= f */ \
75 andl e, r5; /* Ch &= e */ \
76 xorl r3, r5; /* Ch ^= g */ \
80 leal ki(r4, r5), r4; /* t0 = Sigma1 + Ch + ki */ \
81 addl W * 4(%esp), r4; /* t0 += W[i] */ \
82 addl r4, r3; /* h += t0 */ \
83 addl r3, r1; /* d += h (t0) */ \
85 movl a, r4; /* a to Sigma0 */ \
86 rorl $2, r4; /* Sigma0 >>= 2 */ \
87 movl r4, r5; /* Sigma0 to temp */ \
88 rorl $11, r4; /* Sigma0 >>= 11 (13) */ \
89 xorl r4, r5; /* temp ^= Sigma0 */ \
90 rorl $9, r4; /* Sigma0 >>= 9 (22) */ \
91 xorl r5, r4; /* Sigma0 ^= temp */ \
93 addl r3, r4; /* t1 = Sigma0 + h (t0) */ \
97 movl r2, r5; /* b to temp */ \
98 orl a, r5; /* temp |= a */ \
99 andl r3, r5; /* temp &= c */ \
100 andl r2, a; /* a &= b */ \
101 orl r5, a; /* a |= temp */ \
102 addl r4, r0; /* h = t0 + t1 */
104 #define ROUND(a, b, c, d, e, f, g, h, W, ki) \
105 RND(a, b, c, d, e, f, g, h, W, ki) \
106 movl r1, d(%ebp); /* Update d in stack */ \
107 movl r0, h(%ebp); /* Update h in stack */
109 /* Get 64 bits from input buffer in MSB first order */
111 movl i * 4(r5), r4; \
112 movl (i + 1) * 4(r5), r3; \
115 movl r4, i * 4(%esp); \
116 movl r3, (i + 1) * 4(%esp);
118 /* Expand the input */
120 rorl $17, r4; /* Gamma1 >>= 17 */ \
121 movl r4, r5; /* Gamma1 to temp */ \
122 rorl $2, r4; /* Gamma1 >>= 2 (19) */ \
123 xorl r4, r5; /* temp ^= Gamma1 */ \
124 shrl $10, r2; /* w-2 >> 10 */ \
125 xorl r5, r2; /* Gamma1 = w-2 ^ temp */ \
127 addl (i - 7) * 4(%esp), r2; /* Gamma1 += w-7 */ \
128 addl (i - 16) * 4(%esp), r2; /* Gamma1 += w-16 */ \
130 movl (i - 15) * 4(%esp), r3; \
131 movl r3, r4; /* w-15 to Gamma0 */ \
132 rorl $7, r4; /* Gamma0 >>= 7 */ \
133 movl r4, r5; /* Gamma0 to temp */ \
134 rorl $11, r4; /* Gamma0 >>= 11 (18) */ \
135 xorl r4, r5; /* temp ^= Gamma0 */ \
136 shrl $3, r3; /* w-15 >> 3 */ \
137 xorl r5, r3; /* Gamma0 = w-15 ^ temp */ \
139 addl r2, r3; /* Gamma0 += Gamma1 */ \
140 movl r3, i * 4(%esp);
142 #define EXP_BUF0(i) \
146 #define EXP_BUFX(i) \
147 movl (i - 2) * 4(%esp), r2; \
148 movl r2, r4; /* w-2 to Gamma1 */ \
154 .globl sha256_transform
160 subl $STACK_SIZE, %esp
168 movl r0, A + STACK_W(%esp)
169 movl r1, B + STACK_W(%esp)
170 movl r2, C + STACK_W(%esp)
171 movl r3, D + STACK_W(%esp)
176 movl r1, E + STACK_W(%esp)
177 movl r2, F + STACK_W(%esp)
178 movl r3, G + STACK_W(%esp)
179 movl r4, H + STACK_W(%esp)
181 /* Get buf in MSB first order, W[0..15] */
183 GET_BUF(0) GET_BUF(2) GET_BUF(4) GET_BUF(6)
184 GET_BUF(8) GET_BUF(10) GET_BUF(12) GET_BUF(14)
186 /* Expand input, fill in W[16..63] */
187 EXP_BUF0(16) EXP_BUFX(17) EXP_BUFX(18) EXP_BUFX(19) EXP_BUFX(20)
188 EXP_BUFX(21) EXP_BUFX(22) EXP_BUFX(23) EXP_BUFX(24) EXP_BUFX(25)
189 EXP_BUFX(26) EXP_BUFX(27) EXP_BUFX(28) EXP_BUFX(29) EXP_BUFX(30)
190 EXP_BUFX(31) EXP_BUFX(32) EXP_BUFX(33) EXP_BUFX(34) EXP_BUFX(35)
191 EXP_BUFX(36) EXP_BUFX(37) EXP_BUFX(38) EXP_BUFX(39) EXP_BUFX(40)
192 EXP_BUFX(41) EXP_BUFX(42) EXP_BUFX(43) EXP_BUFX(44) EXP_BUFX(45)
193 EXP_BUFX(46) EXP_BUFX(47) EXP_BUFX(48) EXP_BUFX(49) EXP_BUFX(50)
194 EXP_BUFX(51) EXP_BUFX(52) EXP_BUFX(53) EXP_BUFX(54) EXP_BUFX(55)
195 EXP_BUFX(56) EXP_BUFX(57) EXP_BUFX(58) EXP_BUFX(59) EXP_BUFX(60)
196 EXP_BUFX(61) EXP_BUFX(62) EXP_BUFX(63)
198 /* Hash, r0 and r1 set above, ebp is base address to state */
199 leal STACK_W(%esp), %ebp
201 ROUND(r0, B, C, D, r1, F, G, H, 0, 0x428a2f98);
202 ROUND(r0, A, B, C, r1, E, F, G, 1, 0x71374491);
203 ROUND(r0, H, A, B, r1, D, E, F, 2, 0xb5c0fbcf);
204 ROUND(r0, G, H, A, r1, C, D, E, 3, 0xe9b5dba5);
205 ROUND(r0, F, G, H, r1, B, C, D, 4, 0x3956c25b);
206 ROUND(r0, E, F, G, r1, A, B, C, 5, 0x59f111f1);
207 ROUND(r0, D, E, F, r1, H, A, B, 6, 0x923f82a4);
208 ROUND(r0, C, D, E, r1, G, H, A, 7, 0xab1c5ed5);
210 ROUND(r0, B, C, D, r1, F, G, H, 8, 0xd807aa98);
211 ROUND(r0, A, B, C, r1, E, F, G, 9, 0x12835b01);
212 ROUND(r0, H, A, B, r1, D, E, F, 10, 0x243185be);
213 ROUND(r0, G, H, A, r1, C, D, E, 11, 0x550c7dc3);
214 ROUND(r0, F, G, H, r1, B, C, D, 12, 0x72be5d74);
215 ROUND(r0, E, F, G, r1, A, B, C, 13, 0x80deb1fe);
216 ROUND(r0, D, E, F, r1, H, A, B, 14, 0x9bdc06a7);
217 ROUND(r0, C, D, E, r1, G, H, A, 15, 0xc19bf174);
219 ROUND(r0, B, C, D, r1, F, G, H, 16, 0xe49b69c1);
220 ROUND(r0, A, B, C, r1, E, F, G, 17, 0xefbe4786);
221 ROUND(r0, H, A, B, r1, D, E, F, 18, 0x0fc19dc6);
222 ROUND(r0, G, H, A, r1, C, D, E, 19, 0x240ca1cc);
223 ROUND(r0, F, G, H, r1, B, C, D, 20, 0x2de92c6f);
224 ROUND(r0, E, F, G, r1, A, B, C, 21, 0x4a7484aa);
225 ROUND(r0, D, E, F, r1, H, A, B, 22, 0x5cb0a9dc);
226 ROUND(r0, C, D, E, r1, G, H, A, 23, 0x76f988da);
228 ROUND(r0, B, C, D, r1, F, G, H, 24, 0x983e5152);
229 ROUND(r0, A, B, C, r1, E, F, G, 25, 0xa831c66d);
230 ROUND(r0, H, A, B, r1, D, E, F, 26, 0xb00327c8);
231 ROUND(r0, G, H, A, r1, C, D, E, 27, 0xbf597fc7);
232 ROUND(r0, F, G, H, r1, B, C, D, 28, 0xc6e00bf3);
233 ROUND(r0, E, F, G, r1, A, B, C, 29, 0xd5a79147);
234 ROUND(r0, D, E, F, r1, H, A, B, 30, 0x06ca6351);
235 ROUND(r0, C, D, E, r1, G, H, A, 31, 0x14292967);
237 ROUND(r0, B, C, D, r1, F, G, H, 32, 0x27b70a85);
238 ROUND(r0, A, B, C, r1, E, F, G, 33, 0x2e1b2138);
239 ROUND(r0, H, A, B, r1, D, E, F, 34, 0x4d2c6dfc);
240 ROUND(r0, G, H, A, r1, C, D, E, 35, 0x53380d13);
241 ROUND(r0, F, G, H, r1, B, C, D, 36, 0x650a7354);
242 ROUND(r0, E, F, G, r1, A, B, C, 37, 0x766a0abb);
243 ROUND(r0, D, E, F, r1, H, A, B, 38, 0x81c2c92e);
244 ROUND(r0, C, D, E, r1, G, H, A, 39, 0x92722c85);
246 ROUND(r0, B, C, D, r1, F, G, H, 40, 0xa2bfe8a1);
247 ROUND(r0, A, B, C, r1, E, F, G, 41, 0xa81a664b);
248 ROUND(r0, H, A, B, r1, D, E, F, 42, 0xc24b8b70);
249 ROUND(r0, G, H, A, r1, C, D, E, 43, 0xc76c51a3);
250 ROUND(r0, F, G, H, r1, B, C, D, 44, 0xd192e819);
251 ROUND(r0, E, F, G, r1, A, B, C, 45, 0xd6990624);
252 ROUND(r0, D, E, F, r1, H, A, B, 46, 0xf40e3585);
253 ROUND(r0, C, D, E, r1, G, H, A, 47, 0x106aa070);
255 ROUND(r0, B, C, D, r1, F, G, H, 48, 0x19a4c116);
256 ROUND(r0, A, B, C, r1, E, F, G, 49, 0x1e376c08);
257 ROUND(r0, H, A, B, r1, D, E, F, 50, 0x2748774c);
258 ROUND(r0, G, H, A, r1, C, D, E, 51, 0x34b0bcb5);
259 ROUND(r0, F, G, H, r1, B, C, D, 52, 0x391c0cb3);
260 ROUND(r0, E, F, G, r1, A, B, C, 53, 0x4ed8aa4a);
261 ROUND(r0, D, E, F, r1, H, A, B, 54, 0x5b9cca4f);
262 ROUND(r0, C, D, E, r1, G, H, A, 55, 0x682e6ff3);
264 ROUND(r0, B, C, D, r1, F, G, H, 56, 0x748f82ee);
265 ROUND(r0, A, B, C, r1, E, F, G, 57, 0x78a5636f);
266 ROUND(r0, H, A, B, r1, D, E, F, 58, 0x84c87814);
267 ROUND(r0, G, H, A, r1, C, D, E, 59, 0x8cc70208);
268 ROUND(r0, F, G, H, r1, B, C, D, 60, 0x90befffa);
269 ROUND(r0, E, F, G, r1, A, B, C, 61, 0xa4506ceb);
270 ROUND(r0, D, E, F, r1, H, A, B, 62, 0xbef9a3f7);
271 RND (r0, C, D, E, r1, G, H, A, 63, 0xc67178f2);
273 /* Update state from stack */
275 addl r0, A(%ebp) /* a from last round */
276 addl r1, E(%ebp) /* e from last round */
277 movl B + STACK_W(%esp), r0
278 movl C + STACK_W(%esp), r1
279 movl D + STACK_W(%esp), r2
280 movl F + STACK_W(%esp), r3
281 movl G + STACK_W(%esp), r4
282 movl H + STACK_W(%esp), r5
290 addl $STACK_SIZE, %esp
298 #endif /* SILC_SHA256_ASM */