1 /* Modified for SILC -Pekka */
2 /* Includes key scheduling in C always, and encryption and decryption in C
3 when assembler optimized version cannot be used. */
5 ---------------------------------------------------------------------------
6 Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved.
10 The free distribution and use of this software in both source and binary
11 form is allowed (with or without changes) provided that:
13 1. distributions of this source code include the above copyright
14 notice, this list of conditions and the following disclaimer;
16 2. distributions in binary form include the above copyright
17 notice, this list of conditions and the following disclaimer
18 in the documentation and/or other associated materials;
20 3. the copyright holder's name is not used to endorse products
21 built using this software without specific written permission.
23 ALTERNATIVELY, provided that this notice is retained in full, this product
24 may be distributed under the terms of the GNU General Public License (GPL),
25 in which case the provisions of the GPL apply INSTEAD OF those given above.
29 This software is provided 'as is' with no explicit or implied warranties
30 in respect of its properties, including, but not limited to, correctness
31 and/or fitness for purpose.
32 ---------------------------------------------------------------------------
37 #include "rijndael_internal.h"
41 * SILC Crypto API for AES
46 /* Sets the key for the cipher. */
48 SILC_CIPHER_API_SET_KEY(aes_cbc)
51 aes_encrypt_key(key, keylen, &((AesContext *)context)->u.enc);
53 aes_decrypt_key(key, keylen, &((AesContext *)context)->u.dec);
57 /* Sets IV for the cipher. */
59 SILC_CIPHER_API_SET_IV(aes_cbc)
64 /* Returns the size of the cipher context. */
66 SILC_CIPHER_API_CONTEXT_LEN(aes_cbc)
68 return sizeof(AesContext);
71 /* Encrypts with the cipher in CBC mode. Source and destination buffers
72 maybe one and same. */
74 SILC_CIPHER_API_ENCRYPT(aes_cbc)
77 SilcUInt32 tmp[4], tmp2[4];
79 SILC_ASSERT((len & (16 - 1)) == 0);
84 SILC_GET32_MSB(tmp[0], &iv[0]);
85 SILC_GET32_MSB(tmp[1], &iv[4]);
86 SILC_GET32_MSB(tmp[2], &iv[8]);
87 SILC_GET32_MSB(tmp[3], &iv[12]);
89 SILC_GET32_MSB(tmp2[0], &src[0]);
90 SILC_GET32_MSB(tmp2[1], &src[4]);
91 SILC_GET32_MSB(tmp2[2], &src[8]);
92 SILC_GET32_MSB(tmp2[3], &src[12]);
94 tmp[0] = tmp[0] ^ tmp2[0];
95 tmp[1] = tmp[1] ^ tmp2[1];
96 tmp[2] = tmp[2] ^ tmp2[2];
97 tmp[3] = tmp[3] ^ tmp2[3];
99 SILC_PUT32_MSB(tmp[0], &iv[0]);
100 SILC_PUT32_MSB(tmp[1], &iv[4]);
101 SILC_PUT32_MSB(tmp[2], &iv[8]);
102 SILC_PUT32_MSB(tmp[3], &iv[12]);
104 aes_encrypt(iv, iv, &((AesContext *)context)->u.enc);
114 /* Decrypts with the cipher in CBC mode. Source and destination buffers
115 maybe one and same. */
117 SILC_CIPHER_API_DECRYPT(aes_cbc)
119 unsigned char tmp[16];
121 SilcUInt32 tmp2[4], tmp3[4];
127 memcpy(tmp, src, 16);
128 aes_decrypt(src, dst, &((AesContext *)context)->u.dec);
130 SILC_GET32_MSB(tmp2[0], &iv[0]);
131 SILC_GET32_MSB(tmp2[1], &iv[4]);
132 SILC_GET32_MSB(tmp2[2], &iv[8]);
133 SILC_GET32_MSB(tmp2[3], &iv[12]);
135 SILC_GET32_MSB(tmp3[0], &dst[0]);
136 SILC_GET32_MSB(tmp3[1], &dst[4]);
137 SILC_GET32_MSB(tmp3[2], &dst[8]);
138 SILC_GET32_MSB(tmp3[3], &dst[12]);
140 tmp2[0] = tmp3[0] ^ tmp2[0];
141 tmp2[1] = tmp3[1] ^ tmp2[1];
142 tmp2[2] = tmp3[2] ^ tmp2[2];
143 tmp2[3] = tmp3[3] ^ tmp2[3];
145 SILC_PUT32_MSB(tmp2[0], &dst[0]);
146 SILC_PUT32_MSB(tmp2[1], &dst[4]);
147 SILC_PUT32_MSB(tmp2[2], &dst[8]);
148 SILC_PUT32_MSB(tmp2[3], &dst[12]);
160 /* Sets the key for the cipher. */
162 SILC_CIPHER_API_SET_KEY(aes_ctr)
164 AesContext *aes = context;
165 memset(&aes->u.enc, 0, sizeof(aes->u.enc));
166 aes_encrypt_key(key, keylen, &aes->u.enc);
170 /* Sets IV for the cipher. */
172 SILC_CIPHER_API_SET_IV(aes_ctr)
174 AesContext *aes = context;
176 /* Starts new block. */
177 aes->u.enc.inf.b[2] = 0;
180 /* Returns the size of the cipher context. */
182 SILC_CIPHER_API_CONTEXT_LEN(aes_ctr)
184 return sizeof(AesContext);
187 /* Encrypts with the cipher in CTR mode. Source and destination buffers
188 may be one and same. Assumes MSB first ordered counter. */
190 SILC_CIPHER_API_ENCRYPT(aes_ctr)
192 AesContext *aes = context;
196 SILC_GET32_MSB(ctr[0], iv);
197 SILC_GET32_MSB(ctr[1], iv + 4);
198 SILC_GET32_MSB(ctr[2], iv + 8);
199 SILC_GET32_MSB(ctr[3], iv + 12);
201 i = aes->u.enc.inf.b[2];
212 SILC_PUT32_MSB(ctr[0], iv);
213 SILC_PUT32_MSB(ctr[1], iv + 4);
214 SILC_PUT32_MSB(ctr[2], iv + 8);
215 SILC_PUT32_MSB(ctr[3], iv + 12);
217 aes_encrypt(iv, iv, &aes->u.enc);
220 *dst++ = *src++ ^ iv[i++];
222 aes->u.enc.inf.b[2] = i;
224 SILC_PUT32_MSB(ctr[0], iv);
225 SILC_PUT32_MSB(ctr[1], iv + 4);
226 SILC_PUT32_MSB(ctr[2], iv + 8);
227 SILC_PUT32_MSB(ctr[3], iv + 12);
232 /* Decrypts with the cipher in CTR mode. Source and destination buffers
233 maybe one and same. */
235 SILC_CIPHER_API_DECRYPT(aes_ctr)
237 return silc_aes_ctr_encrypt(context, src, dst, len, iv);
240 /****************************************************************************/
242 #if defined(__cplusplus)
247 #if defined( __WATCOMC__ ) && ( __WATCOMC__ >= 1100 )
248 # define XP_DIR __cdecl
253 #define d_1(t,n,b,e) ALIGN const XP_DIR t n[256] = b(e)
254 #define d_4(t,n,b,e,f,g,h) ALIGN const XP_DIR t n[4][256] = { b(e), b(f), b(g), b(h) }
255 ALIGN const uint_32t t_dec(r,c)[RC_LENGTH] = rc_data(w0);
258 d_1(uint_8t, t_dec(i,box), isb_data, h0);
259 #endif /* SILC_AES_ASM */
260 d_4(uint_32t, t_dec(f,n), sb_data, u0, u1, u2, u3);
261 d_4(uint_32t, t_dec(f,l), sb_data, w0, w1, w2, w3);
262 d_4(uint_32t, t_dec(i,n), isb_data, v0, v1, v2, v3);
263 d_4(uint_32t, t_dec(i,l), isb_data, w0, w1, w2, w3);
264 d_4(uint_32t, t_dec(i,m), mm_data, v0, v1, v2, v3);
267 { k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \
268 k[4*(i)+5] = ss[1] ^= ss[0]; \
269 k[4*(i)+6] = ss[2] ^= ss[1]; \
270 k[4*(i)+7] = ss[3] ^= ss[2]; \
273 AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1])
276 cx->ks[0] = ss[0] = word_in(key, 0);
277 cx->ks[1] = ss[1] = word_in(key, 1);
278 cx->ks[2] = ss[2] = word_in(key, 2);
279 cx->ks[3] = ss[3] = word_in(key, 3);
281 ke4(cx->ks, 0); ke4(cx->ks, 1);
282 ke4(cx->ks, 2); ke4(cx->ks, 3);
283 ke4(cx->ks, 4); ke4(cx->ks, 5);
284 ke4(cx->ks, 6); ke4(cx->ks, 7);
288 cx->inf.b[0] = 10 * 16;
292 { k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \
293 k[6*(i)+ 7] = ss[1] ^= ss[0]; \
294 k[6*(i)+ 8] = ss[2] ^= ss[1]; \
295 k[6*(i)+ 9] = ss[3] ^= ss[2]; \
300 k[6*(i)+10] = ss[4] ^= ss[3]; \
301 k[6*(i)+11] = ss[5] ^= ss[4]; \
304 AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1])
307 cx->ks[0] = ss[0] = word_in(key, 0);
308 cx->ks[1] = ss[1] = word_in(key, 1);
309 cx->ks[2] = ss[2] = word_in(key, 2);
310 cx->ks[3] = ss[3] = word_in(key, 3);
311 cx->ks[4] = ss[4] = word_in(key, 4);
312 cx->ks[5] = ss[5] = word_in(key, 5);
314 ke6(cx->ks, 0); ke6(cx->ks, 1);
315 ke6(cx->ks, 2); ke6(cx->ks, 3);
316 ke6(cx->ks, 4); ke6(cx->ks, 5);
320 cx->inf.b[0] = 12 * 16;
324 { k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \
325 k[8*(i)+ 9] = ss[1] ^= ss[0]; \
326 k[8*(i)+10] = ss[2] ^= ss[1]; \
327 k[8*(i)+11] = ss[3] ^= ss[2]; \
332 k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \
333 k[8*(i)+13] = ss[5] ^= ss[4]; \
334 k[8*(i)+14] = ss[6] ^= ss[5]; \
335 k[8*(i)+15] = ss[7] ^= ss[6]; \
338 AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1])
341 cx->ks[0] = ss[0] = word_in(key, 0);
342 cx->ks[1] = ss[1] = word_in(key, 1);
343 cx->ks[2] = ss[2] = word_in(key, 2);
344 cx->ks[3] = ss[3] = word_in(key, 3);
345 cx->ks[4] = ss[4] = word_in(key, 4);
346 cx->ks[5] = ss[5] = word_in(key, 5);
347 cx->ks[6] = ss[6] = word_in(key, 6);
348 cx->ks[7] = ss[7] = word_in(key, 7);
350 ke8(cx->ks, 0); ke8(cx->ks, 1);
351 ke8(cx->ks, 2); ke8(cx->ks, 3);
352 ke8(cx->ks, 4); ke8(cx->ks, 5);
355 cx->inf.b[0] = 14 * 16;
358 AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1])
362 case 16: case 128: aes_encrypt_key128(key, cx); return;
363 case 24: case 192: aes_encrypt_key192(key, cx); return;
364 case 32: case 256: aes_encrypt_key256(key, cx); return;
368 #define v(n,i) ((n) - (i) + 2 * ((i) & 3))
370 { k[v(40,(4*(i))+4)] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \
371 k[v(40,(4*(i))+5)] = ss[1] ^= ss[0]; \
372 k[v(40,(4*(i))+6)] = ss[2] ^= ss[1]; \
373 k[v(40,(4*(i))+7)] = ss[3] ^= ss[2]; \
377 { ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
378 ss[1] = ss[1] ^ ss[3]; \
379 ss[2] = ss[2] ^ ss[3]; \
380 ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \
381 ss[i % 4] ^= ss[4]; \
382 ss[4] ^= k[v(40,(4*(i)))]; k[v(40,(4*(i))+4)] = ff(ss[4]); \
383 ss[4] ^= k[v(40,(4*(i))+1)]; k[v(40,(4*(i))+5)] = ff(ss[4]); \
384 ss[4] ^= k[v(40,(4*(i))+2)]; k[v(40,(4*(i))+6)] = ff(ss[4]); \
385 ss[4] ^= k[v(40,(4*(i))+3)]; k[v(40,(4*(i))+7)] = ff(ss[4]); \
389 { ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \
390 ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
391 k[v(40,(4*(i))+4)] = ss[4] ^= k[v(40,(4*(i)))]; \
392 k[v(40,(4*(i))+5)] = ss[4] ^= k[v(40,(4*(i))+1)]; \
393 k[v(40,(4*(i))+6)] = ss[4] ^= k[v(40,(4*(i))+2)]; \
394 k[v(40,(4*(i))+7)] = ss[4] ^= k[v(40,(4*(i))+3)]; \
398 { ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \
399 k[v(40,(4*(i))+4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
400 k[v(40,(4*(i))+5)] = ss[1] ^ ss[3]; \
401 k[v(40,(4*(i))+6)] = ss[0]; \
402 k[v(40,(4*(i))+7)] = ss[1]; \
405 AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1])
407 #if defined( d_vars )
410 cx->ks[v(40,(0))] = ss[0] = word_in(key, 0);
411 cx->ks[v(40,(1))] = ss[1] = word_in(key, 1);
412 cx->ks[v(40,(2))] = ss[2] = word_in(key, 2);
413 cx->ks[v(40,(3))] = ss[3] = word_in(key, 3);
415 kdf4(cx->ks, 0); kd4(cx->ks, 1);
416 kd4(cx->ks, 2); kd4(cx->ks, 3);
417 kd4(cx->ks, 4); kd4(cx->ks, 5);
418 kd4(cx->ks, 6); kd4(cx->ks, 7);
419 kd4(cx->ks, 8); kdl4(cx->ks, 9);
421 cx->inf.b[0] = 10 * 16;
425 { k[v(48,(6*(i))+ 6)] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \
426 k[v(48,(6*(i))+ 7)] = ss[1] ^= ss[0]; \
427 k[v(48,(6*(i))+ 8)] = ss[2] ^= ss[1]; \
428 k[v(48,(6*(i))+ 9)] = ss[3] ^= ss[2]; \
433 k[v(48,(6*(i))+10)] = ss[4] ^= ss[3]; \
434 k[v(48,(6*(i))+11)] = ss[5] ^= ss[4]; \
438 { ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ff(ss[0]); \
439 ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ff(ss[1]); \
440 ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ff(ss[2]); \
441 ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ff(ss[3]); \
442 ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ff(ss[4]); \
443 ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ff(ss[5]); \
447 { ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \
448 ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[v(48,(6*(i))+ 6)] = ss[6] ^= k[v(48,(6*(i)))]; \
449 ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[6] ^= k[v(48,(6*(i))+ 1)]; \
450 ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[6] ^= k[v(48,(6*(i))+ 2)]; \
451 ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[6] ^= k[v(48,(6*(i))+ 3)]; \
452 ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ss[6] ^= k[v(48,(6*(i))+ 4)]; \
453 ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ss[6] ^= k[v(48,(6*(i))+ 5)]; \
457 { ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ss[0]; \
458 ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[1]; \
459 ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[2]; \
460 ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[3]; \
463 AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1])
465 #if defined( d_vars )
468 cx->ks[v(48,(0))] = ss[0] = word_in(key, 0);
469 cx->ks[v(48,(1))] = ss[1] = word_in(key, 1);
470 cx->ks[v(48,(2))] = ss[2] = word_in(key, 2);
471 cx->ks[v(48,(3))] = ss[3] = word_in(key, 3);
473 cx->ks[v(48,(4))] = ff(ss[4] = word_in(key, 4));
474 cx->ks[v(48,(5))] = ff(ss[5] = word_in(key, 5));
475 kdf6(cx->ks, 0); kd6(cx->ks, 1);
476 kd6(cx->ks, 2); kd6(cx->ks, 3);
477 kd6(cx->ks, 4); kd6(cx->ks, 5);
478 kd6(cx->ks, 6); kdl6(cx->ks, 7);
480 cx->inf.b[0] = 12 * 16;
484 { k[v(56,(8*(i))+ 8)] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \
485 k[v(56,(8*(i))+ 9)] = ss[1] ^= ss[0]; \
486 k[v(56,(8*(i))+10)] = ss[2] ^= ss[1]; \
487 k[v(56,(8*(i))+11)] = ss[3] ^= ss[2]; \
492 k[v(56,(8*(i))+12)] = ss[4] ^= ls_box(ss[3],0); \
493 k[v(56,(8*(i))+13)] = ss[5] ^= ss[4]; \
494 k[v(56,(8*(i))+14)] = ss[6] ^= ss[5]; \
495 k[v(56,(8*(i))+15)] = ss[7] ^= ss[6]; \
499 { ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ff(ss[0]); \
500 ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ff(ss[1]); \
501 ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ff(ss[2]); \
502 ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ff(ss[3]); \
503 ss[4] ^= ls_box(ss[3],0); k[v(56,(8*(i))+12)] = ff(ss[4]); \
504 ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ff(ss[5]); \
505 ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ff(ss[6]); \
506 ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ff(ss[7]); \
510 { ss[8] = ls_box(ss[7],3) ^ t_use(r,c)[i]; \
511 ss[0] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+ 8)] = ss[8] ^= k[v(56,(8*(i)))]; \
512 ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[8] ^= k[v(56,(8*(i))+ 1)]; \
513 ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[8] ^= k[v(56,(8*(i))+ 2)]; \
514 ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[8] ^= k[v(56,(8*(i))+ 3)]; \
515 ss[8] = ls_box(ss[3],0); \
516 ss[4] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+12)] = ss[8] ^= k[v(56,(8*(i))+ 4)]; \
517 ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ss[8] ^= k[v(56,(8*(i))+ 5)]; \
518 ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ss[8] ^= k[v(56,(8*(i))+ 6)]; \
519 ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ss[8] ^= k[v(56,(8*(i))+ 7)]; \
523 { ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ss[0]; \
524 ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[1]; \
525 ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[2]; \
526 ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[3]; \
529 AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1])
531 #if defined( d_vars )
534 cx->ks[v(56,(0))] = ss[0] = word_in(key, 0);
535 cx->ks[v(56,(1))] = ss[1] = word_in(key, 1);
536 cx->ks[v(56,(2))] = ss[2] = word_in(key, 2);
537 cx->ks[v(56,(3))] = ss[3] = word_in(key, 3);
539 cx->ks[v(56,(4))] = ff(ss[4] = word_in(key, 4));
540 cx->ks[v(56,(5))] = ff(ss[5] = word_in(key, 5));
541 cx->ks[v(56,(6))] = ff(ss[6] = word_in(key, 6));
542 cx->ks[v(56,(7))] = ff(ss[7] = word_in(key, 7));
543 kdf8(cx->ks, 0); kd8(cx->ks, 1);
544 kd8(cx->ks, 2); kd8(cx->ks, 3);
545 kd8(cx->ks, 4); kd8(cx->ks, 5);
548 cx->inf.b[0] = 14 * 16;
551 AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1])
555 case 16: case 128: aes_decrypt_key128(key, cx); return;
556 case 24: case 192: aes_decrypt_key192(key, cx); return;
557 case 32: case 256: aes_decrypt_key256(key, cx); return;
562 /* C version of AES */
564 #define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
565 #define so(y,x,c) word_out(y, c, s(x,c))
566 #define locals(y,x) x[4],y[4]
567 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
568 s(y,2) = s(x,2); s(y,3) = s(x,3);
569 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
570 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
571 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
573 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
574 Pentium optimiation with small code but this is poor for decryption
575 so we need to control this with the following VC++ pragmas
578 #if defined( _MSC_VER ) && !defined( _WIN64 )
579 #pragma optimize( "s", on )
582 #define fwd_var(x,r,c)\
583 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
584 : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
585 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
586 : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
587 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
588 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
590 AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1])
591 { uint_32t locals(b0, b1);
595 state_in(b0, in, kp);
600 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
601 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
604 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
605 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
608 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
609 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
610 round(fwd_rnd, b1, b0, kp + 3 * N_COLS);
611 round(fwd_rnd, b0, b1, kp + 4 * N_COLS);
612 round(fwd_rnd, b1, b0, kp + 5 * N_COLS);
613 round(fwd_rnd, b0, b1, kp + 6 * N_COLS);
614 round(fwd_rnd, b1, b0, kp + 7 * N_COLS);
615 round(fwd_rnd, b0, b1, kp + 8 * N_COLS);
616 round(fwd_rnd, b1, b0, kp + 9 * N_COLS);
617 round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
623 #define inv_var(x,r,c)\
624 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
625 : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
626 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
627 : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
629 #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
630 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
632 #define rnd_key(n) (kp + n * N_COLS)
634 AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1])
635 { uint_32t locals(b0, b1);
638 kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0);
639 state_in(b0, in, kp);
641 kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2));
645 round(inv_rnd, b1, b0, rnd_key(-13));
646 round(inv_rnd, b0, b1, rnd_key(-12));
648 round(inv_rnd, b1, b0, rnd_key(-11));
649 round(inv_rnd, b0, b1, rnd_key(-10));
651 round(inv_rnd, b1, b0, rnd_key(-9));
652 round(inv_rnd, b0, b1, rnd_key(-8));
653 round(inv_rnd, b1, b0, rnd_key(-7));
654 round(inv_rnd, b0, b1, rnd_key(-6));
655 round(inv_rnd, b1, b0, rnd_key(-5));
656 round(inv_rnd, b0, b1, rnd_key(-4));
657 round(inv_rnd, b1, b0, rnd_key(-3));
658 round(inv_rnd, b0, b1, rnd_key(-2));
659 round(inv_rnd, b1, b0, rnd_key(-1));
660 round(inv_lrnd, b0, b1, rnd_key( 0));
666 #if defined(__cplusplus)
670 #endif /* SILC_AES_ASM */