From 97ca3ffe0ce65ac0c5fa3274284825537e996c78 Mon Sep 17 00:00:00 2001 From: Pekka Riikonen Date: Fri, 15 Dec 2006 15:22:48 +0000 Subject: [PATCH] Added assembler AES for x86 and x86_64. Simplified Cipher implementation API. --- lib/silccrypt/Makefile.ad | 100 ++-- lib/silccrypt/aes.c | 827 +++++++++++++++------------- lib/silccrypt/aes.h | 13 +- lib/silccrypt/aes_x86.asm | 645 ++++++++++++++++++++++ lib/silccrypt/aes_x86_64.asm | 866 ++++++++++++++++++++++++++++++ lib/silccrypt/blowfish.c | 20 +- lib/silccrypt/blowfish.h | 7 +- lib/silccrypt/cast.c | 238 ++++---- lib/silccrypt/cast.h | 5 +- lib/silccrypt/ciphers_def.h | 6 +- lib/silccrypt/configure.ad | 57 ++ lib/silccrypt/none.c | 9 +- lib/silccrypt/none.h | 9 +- lib/silccrypt/rc5.c | 28 +- lib/silccrypt/rc5.h | 5 +- lib/silccrypt/rijndael_internal.h | 298 +++++++++- lib/silccrypt/silccipher.c | 27 +- lib/silccrypt/silccipher.h | 6 - lib/silccrypt/twofish.c | 78 ++- lib/silccrypt/twofish.h | 5 +- 20 files changed, 2552 insertions(+), 697 deletions(-) create mode 100644 lib/silccrypt/aes_x86.asm create mode 100644 lib/silccrypt/aes_x86_64.asm create mode 100644 lib/silccrypt/configure.ad diff --git a/lib/silccrypt/Makefile.ad b/lib/silccrypt/Makefile.ad index 01edf7c3..56de2636 100644 --- a/lib/silccrypt/Makefile.ad +++ b/lib/silccrypt/Makefile.ad @@ -3,7 +3,7 @@ # # Author: Pekka Riikonen # -# Copyright (C) 2000 - 2005 Pekka Riikonen +# Copyright (C) 2000 - 2006 Pekka Riikonen # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,23 +19,34 @@ AUTOMAKE_OPTIONS = 1.0 no-dependencies foreign noinst_LTLIBRARIES = libsilccrypt.la -libsilccrypt_la_SOURCES = \ - none.c \ - rc5.c \ - md5.c \ - aes.c \ - rsa.c \ - sha1.c \ - sha256.c \ - twofish.c \ - blowfish.c \ - cast.c \ - silccipher.c \ - silchash.c \ - silchmac.c \ - silcrng.c \ - silcpkcs.c \ - silcpkcs1.c \ +if SILC_AES_ASM +if SILC_I486 +SILC_AES_S = aes_x86.asm aes.c +endif +if SILC_X86_64 +SILC_AES_S = aes_x86_84.asm aes.c +endif +else +SILC_AES_S = aes.c +endif + +libsilccrypt_la_SOURCES = \ + none.c \ + rc5.c \ + md5.c \ + $(SILC_AES_S) \ + rsa.c \ + sha1.c \ + sha256.c \ + twofish.c \ + blowfish.c \ + cast.c \ + silccipher.c \ + silchash.c \ + silchmac.c \ + silcrng.c \ + silcpkcs.c \ + silcpkcs1.c \ silcpk.c if SILC_LIBTOOLFIX @@ -45,37 +56,42 @@ else LTFLAGS = endif -CFLAGS = $(LTFLAGS) +CFLAGS = $(LTFLAGS) @SILC_CRYPTO_CFLAGS@ #ifdef SILC_DIST_TOOLKIT -include_HEADERS = \ - aes.h \ - blowfish.h \ - cast.h \ - ciphers_def.h \ - ciphers.h \ - md5.h \ - none.h \ - rc5.h \ - rsa.h \ - sha1.h \ - sha1_internal.h \ - sha256.h \ - sha256_internal.h \ - silccipher.h \ - silcdh.h \ - silchash.h \ - silchmac.h \ - silcpkcs.h \ - silcrng.h \ - silcpkcs1.h \ - twofish.h \ +include_HEADERS = \ + aes.h \ + blowfish.h \ + cast.h \ + ciphers_def.h \ + ciphers.h \ + md5.h \ + none.h \ + rc5.h \ + rsa.h \ + sha1.h \ + sha1_internal.h \ + sha256.h \ + sha256_internal.h \ + silccipher.h \ + silcdh.h \ + silchash.h \ + silchmac.h \ + silcpkcs.h \ + silcrng.h \ + silcpkcs1.h \ + twofish.h \ silcpk.h SILC_EXTRA_DIST = tests #endif SILC_DIST_TOOLKIT -EXTRA_DIST = *.h $(SILC_EXTRA_DIST) +SUFFIXES = .asm + +.asm.lo: + $(LIBTOOL) --tag=CC --mode=compile @SILC_ASSEMBLER@ $< + +EXTRA_DIST = *.h *.asm $(SILC_EXTRA_DIST) include $(top_srcdir)/Makefile.defines.in diff --git a/lib/silccrypt/aes.c b/lib/silccrypt/aes.c index b8308fd6..9fc9f1e6 100644 --- a/lib/silccrypt/aes.c +++ b/lib/silccrypt/aes.c @@ -1,77 +1,60 @@ -/* Modified for SILC. -Pekka */ -/* The AES */ - -/* This is an independent implementation of the encryption algorithm: */ -/* */ -/* RIJNDAEL by Joan Daemen and Vincent Rijmen */ -/* */ -/* which is a candidate algorithm in the Advanced Encryption Standard */ -/* programme of the US National Institute of Standards and Technology. */ -/* */ -/* Copyright in this implementation is held by Dr B R Gladman but I */ -/* hereby give permission for its free direct or derivative use subject */ -/* to acknowledgment of its origin and compliance with any conditions */ -/* that the originators of the algorithm place on its exploitation. */ -/* */ -/* Dr Brian Gladman (gladman@seven77.demon.co.uk) 14th January 1999 */ - -/* Timing data for Rijndael (rijndael.c) - -Algorithm: rijndael (rijndael.c) - -128 bit key: -Key Setup: 305/1389 cycles (encrypt/decrypt) -Encrypt: 374 cycles = 68.4 mbits/sec -Decrypt: 352 cycles = 72.7 mbits/sec -Mean: 363 cycles = 70.5 mbits/sec - -192 bit key: -Key Setup: 277/1595 cycles (encrypt/decrypt) -Encrypt: 439 cycles = 58.3 mbits/sec -Decrypt: 425 cycles = 60.2 mbits/sec -Mean: 432 cycles = 59.3 mbits/sec - -256 bit key: -Key Setup: 374/1960 cycles (encrypt/decrypt) -Encrypt: 502 cycles = 51.0 mbits/sec -Decrypt: 498 cycles = 51.4 mbits/sec -Mean: 500 cycles = 51.2 mbits/sec +/* Modified for SILC -Pekka */ +/* Includes key scheduling in C always, and encryption and decryption in C + when assembler optimized version cannot be used. */ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 09/09/2006 */ #include "silc.h" #include "rijndael_internal.h" #include "aes.h" -/* - * SILC Crypto API for Rijndael +/* + * SILC Crypto API for AES */ /* Sets the key for the cipher. */ SILC_CIPHER_API_SET_KEY(aes) { - SilcUInt32 k[8]; - - SILC_GET_WORD_KEY(key, k, keylen); - rijndael_set_key((RijndaelContext *)context, k, keylen); - + aes_encrypt_key(key, keylen, &((AesContext *)context)->enc); + aes_decrypt_key(key, keylen, &((AesContext *)context)->dec); return TRUE; } -/* Sets the string as a new key for the cipher. The string is first - hashed and then used as a new key. */ - -SILC_CIPHER_API_SET_KEY_WITH_STRING(aes) -{ - return 1; -} - /* Returns the size of the cipher context. */ SILC_CIPHER_API_CONTEXT_LEN(aes) { - return sizeof(RijndaelContext); + return sizeof(AesContext); } /* Encrypts with the cipher in CBC mode. Source and destination buffers @@ -79,23 +62,19 @@ SILC_CIPHER_API_CONTEXT_LEN(aes) SILC_CIPHER_API_ENCRYPT_CBC(aes) { - SilcUInt32 tiv[4]; - int i; - - SILC_CBC_GET_IV(tiv, iv); - - SILC_CBC_ENC_PRE(tiv, src); - rijndael_encrypt((RijndaelContext *)context, tiv, tiv); - SILC_CBC_ENC_POST(tiv, dst, src); - - for (i = 16; i < len; i += 16) { - SILC_CBC_ENC_PRE(tiv, src); - rijndael_encrypt((RijndaelContext *)context, tiv, tiv); - SILC_CBC_ENC_POST(tiv, dst, src); + int nb = len >> 4; + + while(nb--) { + lp32(iv)[0] ^= lp32(src)[0]; + lp32(iv)[1] ^= lp32(src)[1]; + lp32(iv)[2] ^= lp32(src)[2]; + lp32(iv)[3] ^= lp32(src)[3]; + aes_encrypt(iv, iv, &((AesContext *)context)->enc); + memcpy(dst, iv, 16); + src += 16; + dst += 16; } - SILC_CBC_PUT_IV(tiv, iv); - return TRUE; } @@ -104,364 +83,452 @@ SILC_CIPHER_API_ENCRYPT_CBC(aes) SILC_CIPHER_API_DECRYPT_CBC(aes) { - SilcUInt32 tmp[4], tmp2[4], tiv[4]; - int i; - - SILC_CBC_GET_IV(tiv, iv); - - SILC_CBC_DEC_PRE(tmp, src); - rijndael_decrypt((RijndaelContext *)context, tmp, tmp2); - SILC_CBC_DEC_POST(tmp2, dst, src, tmp, tiv); - - for (i = 16; i < len; i += 16) { - SILC_CBC_DEC_PRE(tmp, src); - rijndael_decrypt((RijndaelContext *)context, tmp, tmp2); - SILC_CBC_DEC_POST(tmp2, dst, src, tmp, tiv); + unsigned char tmp[16]; + int nb = len >> 4; + + while(nb--) { + memcpy(tmp, src, 16); + aes_decrypt(src, dst, &((AesContext *)context)->dec); + lp32(dst)[0] ^= lp32(iv)[0]; + lp32(dst)[1] ^= lp32(iv)[1]; + lp32(dst)[2] ^= lp32(iv)[2]; + lp32(dst)[3] ^= lp32(iv)[3]; + memcpy(iv, tmp, 16); + src += 16; + dst += 16; } - - SILC_CBC_PUT_IV(tiv, iv); - + return TRUE; } -#define LARGE_TABLES +/****************************************************************************/ -u1byte pow_tab[256]; -u1byte log_tab[256]; -u1byte sbx_tab[256]; -u1byte isb_tab[256]; -u4byte rco_tab[ 10]; -u4byte ft_tab[4][256]; -u4byte it_tab[4][256]; +#if defined(__cplusplus) +extern "C" +{ +#endif -u4byte fl_tab[4][256]; -u4byte il_tab[4][256]; +#if defined( __WATCOMC__ ) && ( __WATCOMC__ >= 1100 ) +# define XP_DIR __cdecl +#else +# define XP_DIR +#endif -u4byte tab_gen = 0; +#define d_1(t,n,b,e) ALIGN const XP_DIR t n[256] = b(e) +#define d_4(t,n,b,e,f,g,h) ALIGN const XP_DIR t n[4][256] = { b(e), b(f), b(g), b(h) } +ALIGN const uint_32t t_dec(r,c)[RC_LENGTH] = rc_data(w0); + +#ifdef SILC_ASM_AES +d_1(uint_8t, t_dec(i,box), isb_data, h0); +#endif /* SILC_ASM_AES */ +d_4(uint_32t, t_dec(f,n), sb_data, u0, u1, u2, u3); +d_4(uint_32t, t_dec(f,l), sb_data, w0, w1, w2, w3); +d_4(uint_32t, t_dec(i,n), isb_data, v0, v1, v2, v3); +d_4(uint_32t, t_dec(i,l), isb_data, w0, w1, w2, w3); +d_4(uint_32t, t_dec(i,m), mm_data, v0, v1, v2, v3); + +#define ke4(k,i) \ +{ k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \ + k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; \ + k[4*(i)+7] = ss[3] ^= ss[2]; \ +} -#define ff_mult(a,b) (a && b ? pow_tab[(log_tab[a] + log_tab[b]) % 255] : 0) +AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ uint_32t ss[4]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + + ke4(cx->ks, 0); ke4(cx->ks, 1); + ke4(cx->ks, 2); ke4(cx->ks, 3); + ke4(cx->ks, 4); ke4(cx->ks, 5); + ke4(cx->ks, 6); ke4(cx->ks, 7); + ke4(cx->ks, 8); + ke4(cx->ks, 9); + cx->inf.l = 0; + cx->inf.b[0] = 10 * 16; +} -#define f_rn(bo, bi, n, k) \ - bo[n] = ft_tab[0][byte(bi[n],0)] ^ \ - ft_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ - ft_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ - ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) +#define kef6(k,i) \ +{ k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; \ + k[6*(i)+ 9] = ss[3] ^= ss[2]; \ +} -#define i_rn(bo, bi, n, k) \ - bo[n] = it_tab[0][byte(bi[n],0)] ^ \ - it_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ - it_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ - it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) +#define ke6(k,i) \ +{ kef6(k,i); \ + k[6*(i)+10] = ss[4] ^= ss[3]; \ + k[6*(i)+11] = ss[5] ^= ss[4]; \ +} -#ifdef LARGE_TABLES +AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ uint_32t ss[6]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + + ke6(cx->ks, 0); ke6(cx->ks, 1); + ke6(cx->ks, 2); ke6(cx->ks, 3); + ke6(cx->ks, 4); ke6(cx->ks, 5); + ke6(cx->ks, 6); + kef6(cx->ks, 7); + cx->inf.l = 0; + cx->inf.b[0] = 12 * 16; +} -#define ls_box(x) \ - ( fl_tab[0][byte(x, 0)] ^ \ - fl_tab[1][byte(x, 1)] ^ \ - fl_tab[2][byte(x, 2)] ^ \ - fl_tab[3][byte(x, 3)] ) +#define kef8(k,i) \ +{ k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; \ + k[8*(i)+11] = ss[3] ^= ss[2]; \ +} -#define f_rl(bo, bi, n, k) \ - bo[n] = fl_tab[0][byte(bi[n],0)] ^ \ - fl_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ - fl_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ - fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) +#define ke8(k,i) \ +{ kef8(k,i); \ + k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ + k[8*(i)+13] = ss[5] ^= ss[4]; \ + k[8*(i)+14] = ss[6] ^= ss[5]; \ + k[8*(i)+15] = ss[7] ^= ss[6]; \ +} -#define i_rl(bo, bi, n, k) \ - bo[n] = il_tab[0][byte(bi[n],0)] ^ \ - il_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ - il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ - il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) +AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ uint_32t ss[8]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + cx->ks[6] = ss[6] = word_in(key, 6); + cx->ks[7] = ss[7] = word_in(key, 7); + + ke8(cx->ks, 0); ke8(cx->ks, 1); + ke8(cx->ks, 2); ke8(cx->ks, 3); + ke8(cx->ks, 4); ke8(cx->ks, 5); + kef8(cx->ks, 6); + cx->inf.l = 0; + cx->inf.b[0] = 14 * 16; +} -#else +AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]) +{ + switch(key_len) + { + case 16: case 128: aes_encrypt_key128(key, cx); return; + case 24: case 192: aes_encrypt_key192(key, cx); return; + case 32: case 256: aes_encrypt_key256(key, cx); return; + } +} -#define ls_box(x) \ - ((u4byte)sbx_tab[byte(x, 0)] << 0) ^ \ - ((u4byte)sbx_tab[byte(x, 1)] << 8) ^ \ - ((u4byte)sbx_tab[byte(x, 2)] << 16) ^ \ - ((u4byte)sbx_tab[byte(x, 3)] << 24) +#define v(n,i) ((n) - (i) + 2 * ((i) & 3)) +#define k4e(k,i) \ +{ k[v(40,(4*(i))+4)] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \ + k[v(40,(4*(i))+5)] = ss[1] ^= ss[0]; \ + k[v(40,(4*(i))+6)] = ss[2] ^= ss[1]; \ + k[v(40,(4*(i))+7)] = ss[3] ^= ss[2]; \ +} -#define f_rl(bo, bi, n, k) \ - bo[n] = (u4byte)sbx_tab[byte(bi[n],0)] ^ \ - rotl(((u4byte)sbx_tab[byte(bi[(n + 1) & 3],1)]), 8) ^ \ - rotl(((u4byte)sbx_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \ - rotl(((u4byte)sbx_tab[byte(bi[(n + 3) & 3],3)]), 24) ^ *(k + n) +#define kdf4(k,i) \ +{ ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ + ss[1] = ss[1] ^ ss[3]; \ + ss[2] = ss[2] ^ ss[3]; \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \ + ss[i % 4] ^= ss[4]; \ + ss[4] ^= k[v(40,(4*(i)))]; k[v(40,(4*(i))+4)] = ff(ss[4]); \ + ss[4] ^= k[v(40,(4*(i))+1)]; k[v(40,(4*(i))+5)] = ff(ss[4]); \ + ss[4] ^= k[v(40,(4*(i))+2)]; k[v(40,(4*(i))+6)] = ff(ss[4]); \ + ss[4] ^= k[v(40,(4*(i))+3)]; k[v(40,(4*(i))+7)] = ff(ss[4]); \ +} -#define i_rl(bo, bi, n, k) \ - bo[n] = (u4byte)isb_tab[byte(bi[n],0)] ^ \ - rotl(((u4byte)isb_tab[byte(bi[(n + 3) & 3],1)]), 8) ^ \ - rotl(((u4byte)isb_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \ - rotl(((u4byte)isb_tab[byte(bi[(n + 1) & 3],3)]), 24) ^ *(k + n) +#define kd4(k,i) \ +{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \ + ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \ + k[v(40,(4*(i))+4)] = ss[4] ^= k[v(40,(4*(i)))]; \ + k[v(40,(4*(i))+5)] = ss[4] ^= k[v(40,(4*(i))+1)]; \ + k[v(40,(4*(i))+6)] = ss[4] ^= k[v(40,(4*(i))+2)]; \ + k[v(40,(4*(i))+7)] = ss[4] ^= k[v(40,(4*(i))+3)]; \ +} + +#define kdl4(k,i) \ +{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \ + k[v(40,(4*(i))+4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ + k[v(40,(4*(i))+5)] = ss[1] ^ ss[3]; \ + k[v(40,(4*(i))+6)] = ss[0]; \ + k[v(40,(4*(i))+7)] = ss[1]; \ +} +AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ uint_32t ss[5]; +#if defined( d_vars ) + d_vars; #endif + cx->ks[v(40,(0))] = ss[0] = word_in(key, 0); + cx->ks[v(40,(1))] = ss[1] = word_in(key, 1); + cx->ks[v(40,(2))] = ss[2] = word_in(key, 2); + cx->ks[v(40,(3))] = ss[3] = word_in(key, 3); + + kdf4(cx->ks, 0); kd4(cx->ks, 1); + kd4(cx->ks, 2); kd4(cx->ks, 3); + kd4(cx->ks, 4); kd4(cx->ks, 5); + kd4(cx->ks, 6); kd4(cx->ks, 7); + kd4(cx->ks, 8); kdl4(cx->ks, 9); + cx->inf.l = 0; + cx->inf.b[0] = 10 * 16; +} -void gen_tabs(void) -{ u4byte i, t; - u1byte p, q; +#define k6ef(k,i) \ +{ k[v(48,(6*(i))+ 6)] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + k[v(48,(6*(i))+ 7)] = ss[1] ^= ss[0]; \ + k[v(48,(6*(i))+ 8)] = ss[2] ^= ss[1]; \ + k[v(48,(6*(i))+ 9)] = ss[3] ^= ss[2]; \ +} - /* log and power tables for GF(2**8) finite field with */ - /* 0x11b as modular polynomial - the simplest prmitive */ - /* root is 0x11, used here to generate the tables */ +#define k6e(k,i) \ +{ k6ef(k,i); \ + k[v(48,(6*(i))+10)] = ss[4] ^= ss[3]; \ + k[v(48,(6*(i))+11)] = ss[5] ^= ss[4]; \ +} - for(i = 0,p = 1; i < 256; ++i) - { - pow_tab[i] = (u1byte)p; log_tab[p] = (u1byte)i; +#define kdf6(k,i) \ +{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ff(ss[0]); \ + ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ff(ss[2]); \ + ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ff(ss[3]); \ + ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ff(ss[4]); \ + ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ff(ss[5]); \ +} - p = p ^ (p << 1) ^ (p & 0x80 ? 0x01b : 0); - } +#define kd6(k,i) \ +{ ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[v(48,(6*(i))+ 6)] = ss[6] ^= k[v(48,(6*(i)))]; \ + ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[6] ^= k[v(48,(6*(i))+ 1)]; \ + ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[6] ^= k[v(48,(6*(i))+ 2)]; \ + ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[6] ^= k[v(48,(6*(i))+ 3)]; \ + ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ss[6] ^= k[v(48,(6*(i))+ 4)]; \ + ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ss[6] ^= k[v(48,(6*(i))+ 5)]; \ +} - log_tab[1] = 0; p = 1; +#define kdl6(k,i) \ +{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ss[0]; \ + ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[1]; \ + ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[2]; \ + ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[3]; \ +} - for(i = 0; i < 10; ++i) - { - rco_tab[i] = p; +AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ uint_32t ss[7]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[v(48,(0))] = ss[0] = word_in(key, 0); + cx->ks[v(48,(1))] = ss[1] = word_in(key, 1); + cx->ks[v(48,(2))] = ss[2] = word_in(key, 2); + cx->ks[v(48,(3))] = ss[3] = word_in(key, 3); + + cx->ks[v(48,(4))] = ff(ss[4] = word_in(key, 4)); + cx->ks[v(48,(5))] = ff(ss[5] = word_in(key, 5)); + kdf6(cx->ks, 0); kd6(cx->ks, 1); + kd6(cx->ks, 2); kd6(cx->ks, 3); + kd6(cx->ks, 4); kd6(cx->ks, 5); + kd6(cx->ks, 6); kdl6(cx->ks, 7); + cx->inf.l = 0; + cx->inf.b[0] = 12 * 16; +} - p = (p << 1) ^ (p & 0x80 ? 0x1b : 0); - } +#define k8ef(k,i) \ +{ k[v(56,(8*(i))+ 8)] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + k[v(56,(8*(i))+ 9)] = ss[1] ^= ss[0]; \ + k[v(56,(8*(i))+10)] = ss[2] ^= ss[1]; \ + k[v(56,(8*(i))+11)] = ss[3] ^= ss[2]; \ +} - /* note that the affine byte transformation matrix in */ - /* rijndael specification is in big endian format with */ - /* bit 0 as the most significant bit. In the remainder */ - /* of the specification the bits are numbered from the */ - /* least significant end of a byte. */ - - for(i = 0; i < 256; ++i) - { - p = (i ? pow_tab[255 - log_tab[i]] : 0); q = p; - q = (q >> 7) | (q << 1); p ^= q; - q = (q >> 7) | (q << 1); p ^= q; - q = (q >> 7) | (q << 1); p ^= q; - q = (q >> 7) | (q << 1); p ^= q ^ 0x63; - sbx_tab[i] = (u1byte)p; isb_tab[p] = (u1byte)i; - } +#define k8e(k,i) \ +{ k8ef(k,i); \ + k[v(56,(8*(i))+12)] = ss[4] ^= ls_box(ss[3],0); \ + k[v(56,(8*(i))+13)] = ss[5] ^= ss[4]; \ + k[v(56,(8*(i))+14)] = ss[6] ^= ss[5]; \ + k[v(56,(8*(i))+15)] = ss[7] ^= ss[6]; \ +} - for(i = 0; i < 256; ++i) - { - p = sbx_tab[i]; - -#ifdef LARGE_TABLES - - t = p; fl_tab[0][i] = t; - fl_tab[1][i] = rotl(t, 8); - fl_tab[2][i] = rotl(t, 16); - fl_tab[3][i] = rotl(t, 24); +#define kdf8(k,i) \ +{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ff(ss[0]); \ + ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ff(ss[2]); \ + ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ff(ss[3]); \ + ss[4] ^= ls_box(ss[3],0); k[v(56,(8*(i))+12)] = ff(ss[4]); \ + ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ff(ss[5]); \ + ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ff(ss[6]); \ + ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ff(ss[7]); \ +} + +#define kd8(k,i) \ +{ ss[8] = ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+ 8)] = ss[8] ^= k[v(56,(8*(i)))]; \ + ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[8] ^= k[v(56,(8*(i))+ 1)]; \ + ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[8] ^= k[v(56,(8*(i))+ 2)]; \ + ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[8] ^= k[v(56,(8*(i))+ 3)]; \ + ss[8] = ls_box(ss[3],0); \ + ss[4] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+12)] = ss[8] ^= k[v(56,(8*(i))+ 4)]; \ + ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ss[8] ^= k[v(56,(8*(i))+ 5)]; \ + ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ss[8] ^= k[v(56,(8*(i))+ 6)]; \ + ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ss[8] ^= k[v(56,(8*(i))+ 7)]; \ +} + +#define kdl8(k,i) \ +{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ss[0]; \ + ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[1]; \ + ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[2]; \ + ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[3]; \ +} + +AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ uint_32t ss[9]; +#if defined( d_vars ) + d_vars; #endif - t = ((u4byte)ff_mult(2, p)) | - ((u4byte)p << 8) | - ((u4byte)p << 16) | - ((u4byte)ff_mult(3, p) << 24); - - ft_tab[0][i] = t; - ft_tab[1][i] = rotl(t, 8); - ft_tab[2][i] = rotl(t, 16); - ft_tab[3][i] = rotl(t, 24); - - p = isb_tab[i]; - -#ifdef LARGE_TABLES - - t = p; il_tab[0][i] = t; - il_tab[1][i] = rotl(t, 8); - il_tab[2][i] = rotl(t, 16); - il_tab[3][i] = rotl(t, 24); -#endif - t = ((u4byte)ff_mult(14, p)) | - ((u4byte)ff_mult( 9, p) << 8) | - ((u4byte)ff_mult(13, p) << 16) | - ((u4byte)ff_mult(11, p) << 24); - - it_tab[0][i] = t; - it_tab[1][i] = rotl(t, 8); - it_tab[2][i] = rotl(t, 16); - it_tab[3][i] = rotl(t, 24); - } + cx->ks[v(56,(0))] = ss[0] = word_in(key, 0); + cx->ks[v(56,(1))] = ss[1] = word_in(key, 1); + cx->ks[v(56,(2))] = ss[2] = word_in(key, 2); + cx->ks[v(56,(3))] = ss[3] = word_in(key, 3); + + cx->ks[v(56,(4))] = ff(ss[4] = word_in(key, 4)); + cx->ks[v(56,(5))] = ff(ss[5] = word_in(key, 5)); + cx->ks[v(56,(6))] = ff(ss[6] = word_in(key, 6)); + cx->ks[v(56,(7))] = ff(ss[7] = word_in(key, 7)); + kdf8(cx->ks, 0); kd8(cx->ks, 1); + kd8(cx->ks, 2); kd8(cx->ks, 3); + kd8(cx->ks, 4); kd8(cx->ks, 5); + kdl8(cx->ks, 6); + cx->inf.l = 0; + cx->inf.b[0] = 14 * 16; +} - tab_gen = 1; -}; - -#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b) - -#define imix_col(y,x) \ - u = star_x(x); \ - v = star_x(u); \ - w = star_x(v); \ - t = w ^ (x); \ - (y) = u ^ v ^ w; \ - (y) ^= rotr(u ^ t, 8) ^ \ - rotr(v ^ t, 16) ^ \ - rotr(t,24) - -/* initialise the key schedule from the user supplied key */ - -#define loop4(i) \ -{ \ - t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ - t ^= e_key[4 * i]; e_key[4 * i + 4] = t; \ - t ^= e_key[4 * i + 1]; e_key[4 * i + 5] = t; \ - t ^= e_key[4 * i + 2]; e_key[4 * i + 6] = t; \ - t ^= e_key[4 * i + 3]; e_key[4 * i + 7] = t; \ -} - -#define loop6(i) \ -{ t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ - t ^= e_key[6 * i]; e_key[6 * i + 6] = t; \ - t ^= e_key[6 * i + 1]; e_key[6 * i + 7] = t; \ - t ^= e_key[6 * i + 2]; e_key[6 * i + 8] = t; \ - t ^= e_key[6 * i + 3]; e_key[6 * i + 9] = t; \ - t ^= e_key[6 * i + 4]; e_key[6 * i + 10] = t; \ - t ^= e_key[6 * i + 5]; e_key[6 * i + 11] = t; \ -} - -#define loop8(i) \ -{ t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ - t ^= e_key[8 * i]; e_key[8 * i + 8] = t; \ - t ^= e_key[8 * i + 1]; e_key[8 * i + 9] = t; \ - t ^= e_key[8 * i + 2]; e_key[8 * i + 10] = t; \ - t ^= e_key[8 * i + 3]; e_key[8 * i + 11] = t; \ - t = e_key[8 * i + 4] ^ ls_box(t); \ - e_key[8 * i + 12] = t; \ - t ^= e_key[8 * i + 5]; e_key[8 * i + 13] = t; \ - t ^= e_key[8 * i + 6]; e_key[8 * i + 14] = t; \ - t ^= e_key[8 * i + 7]; e_key[8 * i + 15] = t; \ -} - -u4byte *rijndael_set_key(RijndaelContext *ctx, - const u4byte in_key[], const u4byte key_len) -{ - u4byte i, t, u, v, w; - u4byte *e_key = ctx->e_key; - u4byte *d_key = ctx->d_key; - u4byte k_len; - - if(!tab_gen) - gen_tabs(); - - k_len = ctx->k_len = (key_len + 31) / 32; - - e_key[0] = in_key[0]; e_key[1] = in_key[1]; - e_key[2] = in_key[2]; e_key[3] = in_key[3]; - - switch(k_len) +AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]) +{ + switch(key_len) { - case 4: t = e_key[3]; - for(i = 0; i < 10; ++i) - loop4(i); - break; - - case 6: e_key[4] = in_key[4]; t = e_key[5] = in_key[5]; - for(i = 0; i < 8; ++i) - loop6(i); - break; - - case 8: e_key[4] = in_key[4]; e_key[5] = in_key[5]; - e_key[6] = in_key[6]; t = e_key[7] = in_key[7]; - for(i = 0; i < 7; ++i) - loop8(i); - break; + case 16: case 128: aes_decrypt_key128(key, cx); return; + case 24: case 192: aes_decrypt_key192(key, cx); return; + case 32: case 256: aes_decrypt_key256(key, cx); return; } +} - d_key[0] = e_key[0]; d_key[1] = e_key[1]; - d_key[2] = e_key[2]; d_key[3] = e_key[3]; +#ifndef SILC_ASM_AES +/* C version of AES */ + +#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c]) +#define so(y,x,c) word_out(y, c, s(x,c)) +#define locals(y,x) x[4],y[4] +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ + s(y,2) = s(x,2); s(y,3) = s(x,3); +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3) +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) + +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimiation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ - for(i = 4; i < 4 * k_len + 24; ++i) - { - imix_col(d_key[i], e_key[i]); - } +#if defined( _MSC_VER ) && !defined( _WIN64 ) +#pragma optimize( "s", on ) +#endif - return e_key; -}; +#define fwd_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c)) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c)) -/* encrypt a block of text */ +AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]) +{ uint_32t locals(b0, b1); + const uint_32t *kp; -#define f_nround(bo, bi, k) \ - f_rn(bo, bi, 0, k); \ - f_rn(bo, bi, 1, k); \ - f_rn(bo, bi, 2, k); \ - f_rn(bo, bi, 3, k); \ - k += 4 + kp = cx->ks; + state_in(b0, in, kp); -#define f_lround(bo, bi, k) \ - f_rl(bo, bi, 0, k); \ - f_rl(bo, bi, 1, k); \ - f_rl(bo, bi, 2, k); \ - f_rl(bo, bi, 3, k) + switch(cx->inf.b[0]) + { + case 14 * 16: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 12 * 16: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 10 * 16: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + round(fwd_rnd, b1, b0, kp + 3 * N_COLS); + round(fwd_rnd, b0, b1, kp + 4 * N_COLS); + round(fwd_rnd, b1, b0, kp + 5 * N_COLS); + round(fwd_rnd, b0, b1, kp + 6 * N_COLS); + round(fwd_rnd, b1, b0, kp + 7 * N_COLS); + round(fwd_rnd, b0, b1, kp + 8 * N_COLS); + round(fwd_rnd, b1, b0, kp + 9 * N_COLS); + round(fwd_lrnd, b0, b1, kp +10 * N_COLS); + } -void rijndael_encrypt(RijndaelContext *ctx, - const u4byte in_blk[4], u4byte out_blk[4]) -{ - u4byte b0[4], b1[4], *kp; - u4byte *e_key = ctx->e_key; - u4byte k_len = ctx->k_len; + state_out(out, b0); +} - b0[0] = in_blk[0] ^ e_key[0]; b0[1] = in_blk[1] ^ e_key[1]; - b0[2] = in_blk[2] ^ e_key[2]; b0[3] = in_blk[3] ^ e_key[3]; +#define inv_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))) - kp = e_key + 4; +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c)) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c)) +#define key_ofs 0 +#define rnd_key(n) (kp + n * N_COLS) - if(k_len > 6) - { - f_nround(b1, b0, kp); f_nround(b0, b1, kp); - } +AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]) +{ uint_32t locals(b0, b1); + const uint_32t *kp; - if(k_len > 4) - { - f_nround(b1, b0, kp); f_nround(b0, b1, kp); - } + kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0); + state_in(b0, in, kp); - f_nround(b1, b0, kp); f_nround(b0, b1, kp); - f_nround(b1, b0, kp); f_nround(b0, b1, kp); - f_nround(b1, b0, kp); f_nround(b0, b1, kp); - f_nround(b1, b0, kp); f_nround(b0, b1, kp); - f_nround(b1, b0, kp); f_lround(b0, b1, kp); - - out_blk[0] = b0[0]; out_blk[1] = b0[1]; - out_blk[2] = b0[2]; out_blk[3] = b0[3]; -}; - -/* decrypt a block of text */ - -#define i_nround(bo, bi, k) \ - i_rn(bo, bi, 0, k); \ - i_rn(bo, bi, 1, k); \ - i_rn(bo, bi, 2, k); \ - i_rn(bo, bi, 3, k); \ - k -= 4 - -#define i_lround(bo, bi, k) \ - i_rl(bo, bi, 0, k); \ - i_rl(bo, bi, 1, k); \ - i_rl(bo, bi, 2, k); \ - i_rl(bo, bi, 3, k) - -void rijndael_decrypt(RijndaelContext *ctx, - const u4byte in_blk[4], u4byte out_blk[4]) -{ - u4byte b0[4], b1[4], *kp; - u4byte *e_key = ctx->e_key; - u4byte *d_key = ctx->d_key; - u4byte k_len = ctx->k_len; - - b0[0] = in_blk[0] ^ e_key[4 * k_len + 24]; b0[1] = in_blk[1] ^ e_key[4 * k_len + 25]; - b0[2] = in_blk[2] ^ e_key[4 * k_len + 26]; b0[3] = in_blk[3] ^ e_key[4 * k_len + 27]; - - kp = d_key + 4 * (k_len + 5); - - if(k_len > 6) + kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2)); + switch(cx->inf.b[0]) { - i_nround(b1, b0, kp); i_nround(b0, b1, kp); + case 14 * 16: + round(inv_rnd, b1, b0, rnd_key(-13)); + round(inv_rnd, b0, b1, rnd_key(-12)); + case 12 * 16: + round(inv_rnd, b1, b0, rnd_key(-11)); + round(inv_rnd, b0, b1, rnd_key(-10)); + case 10 * 16: + round(inv_rnd, b1, b0, rnd_key(-9)); + round(inv_rnd, b0, b1, rnd_key(-8)); + round(inv_rnd, b1, b0, rnd_key(-7)); + round(inv_rnd, b0, b1, rnd_key(-6)); + round(inv_rnd, b1, b0, rnd_key(-5)); + round(inv_rnd, b0, b1, rnd_key(-4)); + round(inv_rnd, b1, b0, rnd_key(-3)); + round(inv_rnd, b0, b1, rnd_key(-2)); + round(inv_rnd, b1, b0, rnd_key(-1)); + round(inv_lrnd, b0, b1, rnd_key( 0)); } - if(k_len > 4) - { - i_nround(b1, b0, kp); i_nround(b0, b1, kp); - } + state_out(out, b0); +} - i_nround(b1, b0, kp); i_nround(b0, b1, kp); - i_nround(b1, b0, kp); i_nround(b0, b1, kp); - i_nround(b1, b0, kp); i_nround(b0, b1, kp); - i_nround(b1, b0, kp); i_nround(b0, b1, kp); - i_nround(b1, b0, kp); i_lround(b0, b1, kp); +#if defined(__cplusplus) +} +#endif - out_blk[0] = b0[0]; out_blk[1] = b0[1]; - out_blk[2] = b0[2]; out_blk[3] = b0[3]; -}; +#endif /* SILC_ASM_AES */ diff --git a/lib/silccrypt/aes.h b/lib/silccrypt/aes.h index d9d3810f..18666454 100644 --- a/lib/silccrypt/aes.h +++ b/lib/silccrypt/aes.h @@ -1,15 +1,15 @@ /* - rijndael.h + aes.h Author: Pekka Riikonen - Copyright (C) 1997 - 2000 Pekka Riikonen + Copyright (C) 1997 - 2006 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -17,15 +17,14 @@ */ -#ifndef RIJNDAEL_H -#define RIJNDAEL_H +#ifndef AES_H +#define AES_H -/* +/* * SILC Crypto API for Rijndael */ SILC_CIPHER_API_SET_KEY(aes); -SILC_CIPHER_API_SET_KEY_WITH_STRING(aes); SILC_CIPHER_API_CONTEXT_LEN(aes); SILC_CIPHER_API_ENCRYPT_CBC(aes); SILC_CIPHER_API_DECRYPT_CBC(aes); diff --git a/lib/silccrypt/aes_x86.asm b/lib/silccrypt/aes_x86.asm new file mode 100644 index 00000000..f9c34e66 --- /dev/null +++ b/lib/silccrypt/aes_x86.asm @@ -0,0 +1,645 @@ + +; --------------------------------------------------------------------------- +; Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved. +; +; LICENSE TERMS +; +; The free distribution and use of this software in both source and binary +; form is allowed (with or without changes) provided that: +; +; 1. distributions of this source code include the above copyright +; notice, this list of conditions and the following disclaimer; +; +; 2. distributions in binary form include the above copyright +; notice, this list of conditions and the following disclaimer +; in the documentation and/or other associated materials; +; +; 3. the copyright holder's name is not used to endorse products +; built using this software without specific written permission. +; +; ALTERNATIVELY, provided that this notice is retained in full, this product +; may be distributed under the terms of the GNU General Public License (GPL), +; in which case the provisions of the GPL apply INSTEAD OF those given above. +; +; DISCLAIMER +; +; This software is provided 'as is' with no explicit or implied warranties +; in respect of its properties, including, but not limited to, correctness +; and/or fitness for purpose. +; --------------------------------------------------------------------------- +; Issue 09/09/2006 + +; An AES implementation for x86 processors using the YASM (or NASM) assembler. +; This is an assembler implementation that covers encryption and decryption +; only and is intended as a replacement of the C file aescrypt.c. It hence +; requires the file aeskey.c for keying and aestab.c for the AES tables. It +; employs full tables rather than compressed tables. + +; This code provides the standard AES block size (128 bits, 16 bytes) and the +; three standard AES key sizes (128, 192 and 256 bits). It has the same call +; interface as my C implementation. The ebx, esi, edi and ebp registers are +; preserved across calls but eax, ecx and edx and the artihmetic status flags +; are not. It is also important that the defines below match those used in the +; C code. This code uses the VC++ register saving conentions; if it is used +; with another compiler, conventions for using and saving registers may need to +; be checked (and calling conventions). The YASM command line for the VC++ +; custom build step is: +; +; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)" +; +; The calling intefaces are: +; +; AES_RETURN aes_encrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; where is 128, 102 or 256. In the last two calls the length can be in +; either bits or bytes. +; +; Comment in/out the following lines to obtain the desired subroutines. These +; selections MUST match those in the C header file aes.h + +%define AES_128 ; define if AES with 128 bit keys is needed +%define AES_192 ; define if AES with 192 bit keys is needed +%define AES_256 ; define if AES with 256 bit keys is needed +%define AES_VAR ; define if a variable key size is needed +%define ENCRYPTION ; define if encryption is needed +%define DECRYPTION ; define if decryption is needed +%define AES_REV_DKS ; define if key decryption schedule is reversed +%define LAST_ROUND_TABLES ; define if tables are to be used for last round + +; offsets to parameters + +in_blk equ 4 ; input byte array address parameter +out_blk equ 8 ; output byte array address parameter +ctx equ 12 ; AES context structure +stk_spc equ 20 ; stack space +%define parms 12 ; parameter space on stack + +; The encryption key schedule has the following in memory layout where N is the +; number of rounds (10, 12 or 14): +; +; lo: | input key (round 0) | ; each round is four 32-bit words +; | encryption round 1 | +; | encryption round 2 | +; .... +; | encryption round N-1 | +; hi: | encryption round N | +; +; The decryption key schedule is normally set up so that it has the same +; layout as above by actually reversing the order of the encryption key +; schedule in memory (this happens when AES_REV_DKS is set): +; +; lo: | decryption round 0 | = | encryption round N | +; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] +; hi: | decryption round N | = | input key (round 0) | +; +; with rounds except the first and last modified using inv_mix_column() +; But if AES_REV_DKS is NOT set the order of keys is left as it is for +; encryption so that it has to be accessed in reverse when used for +; decryption (although the inverse mix column modifications are done) +; +; lo: | decryption round 0 | = | input key (round 0) | +; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] +; hi: | decryption round N | = | encryption round N | +; +; This layout is faster when the assembler key scheduling provided here +; is used. +; +; The DLL interface must use the _stdcall convention in which the number +; of bytes of parameter space is added after an @ to the sutine's name. +; We must also remove our parameters from the stack before return (see +; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. + +;%define DLL_EXPORT + +; End of user defines + +%ifdef AES_VAR +%ifndef AES_128 +%define AES_128 +%endif +%ifndef AES_192 +%define AES_192 +%endif +%ifndef AES_256 +%define AES_256 +%endif +%endif + +%ifdef AES_VAR +%define KS_LENGTH 60 +%elifdef AES_256 +%define KS_LENGTH 60 +%elifdef AES_192 +%define KS_LENGTH 52 +%else +%define KS_LENGTH 44 +%endif + +; These macros implement stack based local variables + +%macro save 2 + mov [esp+4*%1],%2 +%endmacro + +%macro restore 2 + mov %1,[esp+4*%2] +%endmacro + +; the DLL has to implement the _stdcall calling interface on return +; In this case we have to take our parameters (3 4-byte pointers) +; off the stack + +%macro do_name 1-2 parms +%ifndef DLL_EXPORT + align 32 + global %1 +%1: +%else + align 32 + global %1@%2 + export %1@%2 +%1@%2: +%endif +%endmacro + +%macro do_call 1-2 parms +%ifndef DLL_EXPORT + call %1 + add esp,%2 +%else + call %1@%2 +%endif +%endmacro + +%macro do_exit 0-1 parms +%ifdef DLL_EXPORT + ret %1 +%else + ret +%endif +%endmacro + +%ifdef ENCRYPTION + + extern t_fn + +%define etab_0(x) [t_fn+4*x] +%define etab_1(x) [t_fn+1024+4*x] +%define etab_2(x) [t_fn+2048+4*x] +%define etab_3(x) [t_fn+3072+4*x] + +%ifdef LAST_ROUND_TABLES + + extern t_fl + +%define eltab_0(x) [t_fl+4*x] +%define eltab_1(x) [t_fl+1024+4*x] +%define eltab_2(x) [t_fl+2048+4*x] +%define eltab_3(x) [t_fl+3072+4*x] + +%else + +%define etab_b(x) byte [t_fn+3072+4*x] + +%endif + +; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the +; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX. +; +; Input: +; +; EAX column[0] +; EBX column[1] +; ECX column[2] +; EDX column[3] +; ESI column key[round][2] +; EDI column key[round][3] +; EBP scratch +; +; Output: +; +; EBP column[0] unkeyed +; EBX column[1] unkeyed +; ESI column[2] keyed +; EDI column[3] keyed +; EAX scratch +; ECX scratch +; EDX scratch + +%macro rnd_fun 2 + + rol ebx,16 + %1 esi, cl, 0, ebp + %1 esi, dh, 1, ebp + %1 esi, bh, 3, ebp + %1 edi, dl, 0, ebp + %1 edi, ah, 1, ebp + %1 edi, bl, 2, ebp + %2 ebp, al, 0, ebp + shr ebx,16 + and eax,0xffff0000 + or eax,ebx + shr edx,16 + %1 ebp, ah, 1, ebx + %1 ebp, dh, 3, ebx + %2 ebx, dl, 2, ebx + %1 ebx, ch, 1, edx + %1 ebx, al, 0, edx + shr eax,16 + shr ecx,16 + %1 ebp, cl, 2, edx + %1 edi, ch, 3, edx + %1 esi, al, 2, edx + %1 ebx, ah, 3, edx + +%endmacro + +; Basic MOV and XOR Operations for normal rounds + +%macro nr_xor 4 + movzx %4,%2 + xor %1,etab_%3(%4) +%endmacro + +%macro nr_mov 4 + movzx %4,%2 + mov %1,etab_%3(%4) +%endmacro + +; Basic MOV and XOR Operations for last round + +%ifdef LAST_ROUND_TABLES + + %macro lr_xor 4 + movzx %4,%2 + xor %1,eltab_%3(%4) + %endmacro + + %macro lr_mov 4 + movzx %4,%2 + mov %1,eltab_%3(%4) + %endmacro + +%else + + %macro lr_xor 4 + movzx %4,%2 + movzx %4,etab_b(%4) + %if %3 != 0 + shl %4,8*%3 + %endif + xor %1,%4 + %endmacro + + %macro lr_mov 4 + movzx %4,%2 + movzx %1,etab_b(%4) + %if %3 != 0 + shl %1,8*%3 + %endif + %endmacro + +%endif + +%macro enc_round 0 + + add ebp,16 + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun nr_xor, nr_mov + + mov eax,ebp + mov ecx,esi + mov edx,edi + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + +%macro enc_last_round 0 + + add ebp,16 + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun lr_xor, lr_mov + + mov eax,ebp + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + + section .text align=32 + +; AES Encryption Subroutine + + do_name aes_encrypt + + sub esp,stk_spc + mov [esp+16],ebp + mov [esp+12],ebx + mov [esp+ 8],esi + mov [esp+ 4],edi + + mov esi,[esp+in_blk+stk_spc] ; input pointer + mov eax,[esi ] + mov ebx,[esi+ 4] + mov ecx,[esi+ 8] + mov edx,[esi+12] + + mov ebp,[esp+ctx+stk_spc] ; key pointer + movzx edi,byte [ebp+4*KS_LENGTH] + xor eax,[ebp ] + xor ebx,[ebp+ 4] + xor ecx,[ebp+ 8] + xor edx,[ebp+12] + +; determine the number of rounds + + cmp edi,10*16 + je .3 + cmp edi,12*16 + je .2 + cmp edi,14*16 + je .1 + mov eax,-1 + jmp .5 + +.1: enc_round + enc_round +.2: enc_round + enc_round +.3: enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_last_round + + mov edx,[esp+out_blk+stk_spc] + mov [edx],eax + mov [edx+4],ebx + mov [edx+8],esi + mov [edx+12],edi + xor eax,eax + +.5: mov ebp,[esp+16] + mov ebx,[esp+12] + mov esi,[esp+ 8] + mov edi,[esp+ 4] + add esp,stk_spc + do_exit + +%endif + +%ifdef DECRYPTION + + extern t_in + +%define dtab_0(x) [t_in+4*x] +%define dtab_1(x) [t_in+1024+4*x] +%define dtab_2(x) [t_in+2048+4*x] +%define dtab_3(x) [t_in+3072+4*x] + +%ifdef LAST_ROUND_TABLES + + extern t_il + +%define dltab_0(x) [t_il+4*x] +%define dltab_1(x) [t_il+1024+4*x] +%define dltab_2(x) [t_il+2048+4*x] +%define dltab_3(x) [t_il+3072+4*x] + +%else + + extern t_ibox + +%define dtab_x(x) byte [t_ibox+x] + +%endif + +%macro irn_fun 2 + + rol eax,16 + %1 esi, cl, 0, ebp + %1 esi, bh, 1, ebp + %1 esi, al, 2, ebp + %1 edi, dl, 0, ebp + %1 edi, ch, 1, ebp + %1 edi, ah, 3, ebp + %2 ebp, bl, 0, ebp + shr eax,16 + and ebx,0xffff0000 + or ebx,eax + shr ecx,16 + %1 ebp, bh, 1, eax + %1 ebp, ch, 3, eax + %2 eax, cl, 2, ecx + %1 eax, bl, 0, ecx + %1 eax, dh, 1, ecx + shr ebx,16 + shr edx,16 + %1 esi, dh, 3, ecx + %1 ebp, dl, 2, ecx + %1 eax, bh, 3, ecx + %1 edi, bl, 2, ecx + +%endmacro + +; Basic MOV and XOR Operations for normal rounds + +%macro ni_xor 4 + movzx %4,%2 + xor %1,dtab_%3(%4) +%endmacro + +%macro ni_mov 4 + movzx %4,%2 + mov %1,dtab_%3(%4) +%endmacro + +; Basic MOV and XOR Operations for last round + +%ifdef LAST_ROUND_TABLES + +%macro li_xor 4 + movzx %4,%2 + xor %1,dltab_%3(%4) +%endmacro + +%macro li_mov 4 + movzx %4,%2 + mov %1,dltab_%3(%4) +%endmacro + +%else + + %macro li_xor 4 + movzx %4,%2 + movzx %4,dtab_x(%4) + %if %3 != 0 + shl %4,8*%3 + %endif + xor %1,%4 + %endmacro + + %macro li_mov 4 + movzx %4,%2 + movzx %1,dtab_x(%4) + %if %3 != 0 + shl %1,8*%3 + %endif + %endmacro + +%endif + +%macro dec_round 0 + +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun ni_xor, ni_mov + + mov ebx,ebp + mov ecx,esi + mov edx,edi + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + +%macro dec_last_round 0 + +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun li_xor, li_mov + + mov ebx,ebp + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + + section .text align=32 + +; AES Decryption Subroutine + + do_name aes_decrypt + + sub esp,stk_spc + mov [esp+16],ebp + mov [esp+12],ebx + mov [esp+ 8],esi + mov [esp+ 4],edi + +; input four columns and xor in first round key + + mov esi,[esp+in_blk+stk_spc] ; input pointer + mov eax,[esi ] + mov ebx,[esi+ 4] + mov ecx,[esi+ 8] + mov edx,[esi+12] + lea esi,[esi+16] + + mov ebp,[esp+ctx+stk_spc] ; key pointer + movzx edi,byte[ebp+4*KS_LENGTH] +%ifndef AES_REV_DKS ; if decryption key schedule is not reversed + lea ebp,[ebp+edi] ; we have to access it from the top down +%endif + xor eax,[ebp ] ; key schedule + xor ebx,[ebp+ 4] + xor ecx,[ebp+ 8] + xor edx,[ebp+12] + +; determine the number of rounds + + cmp edi,10*16 + je .3 + cmp edi,12*16 + je .2 + cmp edi,14*16 + je .1 + mov eax,-1 + jmp .5 + +.1: dec_round + dec_round +.2: dec_round + dec_round +.3: dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_last_round + +; move final values to the output array. + + mov ebp,[esp+out_blk+stk_spc] + mov [ebp],eax + mov [ebp+4],ebx + mov [ebp+8],esi + mov [ebp+12],edi + xor eax,eax + +.5: mov ebp,[esp+16] + mov ebx,[esp+12] + mov esi,[esp+ 8] + mov edi,[esp+ 4] + add esp,stk_spc + do_exit + +%endif + + end diff --git a/lib/silccrypt/aes_x86_64.asm b/lib/silccrypt/aes_x86_64.asm new file mode 100644 index 00000000..ef7f6113 --- /dev/null +++ b/lib/silccrypt/aes_x86_64.asm @@ -0,0 +1,866 @@ + +; --------------------------------------------------------------------------- +; Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved. +; +; LICENSE TERMS +; +; The free distribution and use of this software in both source and binary +; form is allowed (with or without changes) provided that: +; +; 1. distributions of this source code include the above copyright +; notice, this list of conditions and the following disclaimer; +; +; 2. distributions in binary form include the above copyright +; notice, this list of conditions and the following disclaimer +; in the documentation and/or other associated materials; +; +; 3. the copyright holder's name is not used to endorse products +; built using this software without specific written permission. +; +; ALTERNATIVELY, provided that this notice is retained in full, this product +; may be distributed under the terms of the GNU General Public License (GPL), +; in which case the provisions of the GPL apply INSTEAD OF those given above. +; +; DISCLAIMER +; +; This software is provided 'as is' with no explicit or implied warranties +; in respect of its properties, including, but not limited to, correctness +; and/or fitness for purpose. +; --------------------------------------------------------------------------- +; Issue 09/09/2006 + +; I am grateful to Dag Arne Osvik for many discussions of the techniques that +; can be used to optimise AES assembler code on AMD64/EM64T architectures. +; Some of the techniques used in this implementation are the result of +; suggestions made by him for which I am most grateful. + +; An AES implementation for AMD64 processors using the YASM assembler. This +; implemetation provides only encryption, decryption and hence requires key +; scheduling support in C. It uses 8k bytes of tables but its encryption and +; decryption performance is very close to that obtained using large tables. +; It can use either Windows or Gnu/Linux calling conventions, which are as +; follows: +; windows gnu/linux +; +; in_blk rcx rdi +; out_blk rdx rsi +; context (cx) r8 rdx +; +; preserved rsi - + rbx, rbp, rsp, r12, r13, r14 & r15 +; registers rdi - on both +; +; destroyed - rsi + rax, rcx, rdx, r8, r9, r10 & r11 +; registers - rdi on both +; +; The default convention is that for windows, the gnu/linux convention being +; used if __GNUC__ is defined. +; +; This code provides the standard AES block size (128 bits, 16 bytes) and the +; three standard AES key sizes (128, 192 and 256 bits). It has the same call +; interface as my C implementation. It uses the Microsoft C AMD64 calling +; conventions in which the three parameters are placed in rcx, rdx and r8 +; respectively. The rbx, rsi, rdi, rbp and r12..r15 registers are preserved. +; +; AES_RETURN aes_encrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; where is 128, 102 or 256. In the last two calls the length can be in +; either bits or bytes. +; +; Comment in/out the following lines to obtain the desired subroutines. These +; selections MUST match those in the C header file aes.h + +%define AES_128 ; define if AES with 128 bit keys is needed +%define AES_192 ; define if AES with 192 bit keys is needed +%define AES_256 ; define if AES with 256 bit keys is needed +%define AES_VAR ; define if a variable key size is needed +%define ENCRYPTION ; define if encryption is needed +%define DECRYPTION ; define if decryption is needed +%define AES_REV_DKS ; define if key decryption schedule is reversed +%define LAST_ROUND_TABLES ; define for the faster version using extra tables + +; The encryption key schedule has the following in memory layout where N is the +; number of rounds (10, 12 or 14): +; +; lo: | input key (round 0) | ; each round is four 32-bit words +; | encryption round 1 | +; | encryption round 2 | +; .... +; | encryption round N-1 | +; hi: | encryption round N | +; +; The decryption key schedule is normally set up so that it has the same +; layout as above by actually reversing the order of the encryption key +; schedule in memory (this happens when AES_REV_DKS is set): +; +; lo: | decryption round 0 | = | encryption round N | +; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] +; hi: | decryption round N | = | input key (round 0) | +; +; with rounds except the first and last modified using inv_mix_column() +; But if AES_REV_DKS is NOT set the order of keys is left as it is for +; encryption so that it has to be accessed in reverse when used for +; decryption (although the inverse mix column modifications are done) +; +; lo: | decryption round 0 | = | input key (round 0) | +; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] +; hi: | decryption round N | = | encryption round N | +; +; This layout is faster when the assembler key scheduling provided here +; is used. +; +; The DLL interface must use the _stdcall convention in which the number +; of bytes of parameter space is added after an @ to the sutine's name. +; We must also remove our parameters from the stack before return (see +; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. + +;%define DLL_EXPORT + +; End of user defines + +%ifdef AES_VAR +%ifndef AES_128 +%define AES_128 +%endif +%ifndef AES_192 +%define AES_192 +%endif +%ifndef AES_256 +%define AES_256 +%endif +%endif + +%ifdef AES_VAR +%define KS_LENGTH 60 +%elifdef AES_256 +%define KS_LENGTH 60 +%elifdef AES_192 +%define KS_LENGTH 52 +%else +%define KS_LENGTH 44 +%endif + +%define r0 rax +%define r1 rdx +%define r2 rcx +%define r3 rbx +%define r4 rsi +%define r5 rdi +%define r6 rbp +%define r7 rsp + +%define raxd eax +%define rdxd edx +%define rcxd ecx +%define rbxd ebx +%define rsid esi +%define rdid edi +%define rbpd ebp +%define rspd esp + +%define raxb al +%define rdxb dl +%define rcxb cl +%define rbxb bl +%define rsib sil +%define rdib dil +%define rbpb bpl +%define rspb spl + +%define r0h ah +%define r1h dh +%define r2h ch +%define r3h bh + +%define r0d eax +%define r1d edx +%define r2d ecx +%define r3d ebx + +; finite field multiplies by {02}, {04} and {08} + +%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) +%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) +%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) + +; finite field multiplies required in table generation + +%define f3(x) (f2(x) ^ x) +%define f9(x) (f8(x) ^ x) +%define fb(x) (f8(x) ^ f2(x) ^ x) +%define fd(x) (f8(x) ^ f4(x) ^ x) +%define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +; macro for expanding S-box data + +%macro enc_vals 1 + db %1(0x63),%1(0x7c),%1(0x77),%1(0x7b),%1(0xf2),%1(0x6b),%1(0x6f),%1(0xc5) + db %1(0x30),%1(0x01),%1(0x67),%1(0x2b),%1(0xfe),%1(0xd7),%1(0xab),%1(0x76) + db %1(0xca),%1(0x82),%1(0xc9),%1(0x7d),%1(0xfa),%1(0x59),%1(0x47),%1(0xf0) + db %1(0xad),%1(0xd4),%1(0xa2),%1(0xaf),%1(0x9c),%1(0xa4),%1(0x72),%1(0xc0) + db %1(0xb7),%1(0xfd),%1(0x93),%1(0x26),%1(0x36),%1(0x3f),%1(0xf7),%1(0xcc) + db %1(0x34),%1(0xa5),%1(0xe5),%1(0xf1),%1(0x71),%1(0xd8),%1(0x31),%1(0x15) + db %1(0x04),%1(0xc7),%1(0x23),%1(0xc3),%1(0x18),%1(0x96),%1(0x05),%1(0x9a) + db %1(0x07),%1(0x12),%1(0x80),%1(0xe2),%1(0xeb),%1(0x27),%1(0xb2),%1(0x75) + db %1(0x09),%1(0x83),%1(0x2c),%1(0x1a),%1(0x1b),%1(0x6e),%1(0x5a),%1(0xa0) + db %1(0x52),%1(0x3b),%1(0xd6),%1(0xb3),%1(0x29),%1(0xe3),%1(0x2f),%1(0x84) + db %1(0x53),%1(0xd1),%1(0x00),%1(0xed),%1(0x20),%1(0xfc),%1(0xb1),%1(0x5b) + db %1(0x6a),%1(0xcb),%1(0xbe),%1(0x39),%1(0x4a),%1(0x4c),%1(0x58),%1(0xcf) + db %1(0xd0),%1(0xef),%1(0xaa),%1(0xfb),%1(0x43),%1(0x4d),%1(0x33),%1(0x85) + db %1(0x45),%1(0xf9),%1(0x02),%1(0x7f),%1(0x50),%1(0x3c),%1(0x9f),%1(0xa8) + db %1(0x51),%1(0xa3),%1(0x40),%1(0x8f),%1(0x92),%1(0x9d),%1(0x38),%1(0xf5) + db %1(0xbc),%1(0xb6),%1(0xda),%1(0x21),%1(0x10),%1(0xff),%1(0xf3),%1(0xd2) + db %1(0xcd),%1(0x0c),%1(0x13),%1(0xec),%1(0x5f),%1(0x97),%1(0x44),%1(0x17) + db %1(0xc4),%1(0xa7),%1(0x7e),%1(0x3d),%1(0x64),%1(0x5d),%1(0x19),%1(0x73) + db %1(0x60),%1(0x81),%1(0x4f),%1(0xdc),%1(0x22),%1(0x2a),%1(0x90),%1(0x88) + db %1(0x46),%1(0xee),%1(0xb8),%1(0x14),%1(0xde),%1(0x5e),%1(0x0b),%1(0xdb) + db %1(0xe0),%1(0x32),%1(0x3a),%1(0x0a),%1(0x49),%1(0x06),%1(0x24),%1(0x5c) + db %1(0xc2),%1(0xd3),%1(0xac),%1(0x62),%1(0x91),%1(0x95),%1(0xe4),%1(0x79) + db %1(0xe7),%1(0xc8),%1(0x37),%1(0x6d),%1(0x8d),%1(0xd5),%1(0x4e),%1(0xa9) + db %1(0x6c),%1(0x56),%1(0xf4),%1(0xea),%1(0x65),%1(0x7a),%1(0xae),%1(0x08) + db %1(0xba),%1(0x78),%1(0x25),%1(0x2e),%1(0x1c),%1(0xa6),%1(0xb4),%1(0xc6) + db %1(0xe8),%1(0xdd),%1(0x74),%1(0x1f),%1(0x4b),%1(0xbd),%1(0x8b),%1(0x8a) + db %1(0x70),%1(0x3e),%1(0xb5),%1(0x66),%1(0x48),%1(0x03),%1(0xf6),%1(0x0e) + db %1(0x61),%1(0x35),%1(0x57),%1(0xb9),%1(0x86),%1(0xc1),%1(0x1d),%1(0x9e) + db %1(0xe1),%1(0xf8),%1(0x98),%1(0x11),%1(0x69),%1(0xd9),%1(0x8e),%1(0x94) + db %1(0x9b),%1(0x1e),%1(0x87),%1(0xe9),%1(0xce),%1(0x55),%1(0x28),%1(0xdf) + db %1(0x8c),%1(0xa1),%1(0x89),%1(0x0d),%1(0xbf),%1(0xe6),%1(0x42),%1(0x68) + db %1(0x41),%1(0x99),%1(0x2d),%1(0x0f),%1(0xb0),%1(0x54),%1(0xbb),%1(0x16) +%endmacro + +%macro dec_vals 1 + db %1(0x52),%1(0x09),%1(0x6a),%1(0xd5),%1(0x30),%1(0x36),%1(0xa5),%1(0x38) + db %1(0xbf),%1(0x40),%1(0xa3),%1(0x9e),%1(0x81),%1(0xf3),%1(0xd7),%1(0xfb) + db %1(0x7c),%1(0xe3),%1(0x39),%1(0x82),%1(0x9b),%1(0x2f),%1(0xff),%1(0x87) + db %1(0x34),%1(0x8e),%1(0x43),%1(0x44),%1(0xc4),%1(0xde),%1(0xe9),%1(0xcb) + db %1(0x54),%1(0x7b),%1(0x94),%1(0x32),%1(0xa6),%1(0xc2),%1(0x23),%1(0x3d) + db %1(0xee),%1(0x4c),%1(0x95),%1(0x0b),%1(0x42),%1(0xfa),%1(0xc3),%1(0x4e) + db %1(0x08),%1(0x2e),%1(0xa1),%1(0x66),%1(0x28),%1(0xd9),%1(0x24),%1(0xb2) + db %1(0x76),%1(0x5b),%1(0xa2),%1(0x49),%1(0x6d),%1(0x8b),%1(0xd1),%1(0x25) + db %1(0x72),%1(0xf8),%1(0xf6),%1(0x64),%1(0x86),%1(0x68),%1(0x98),%1(0x16) + db %1(0xd4),%1(0xa4),%1(0x5c),%1(0xcc),%1(0x5d),%1(0x65),%1(0xb6),%1(0x92) + db %1(0x6c),%1(0x70),%1(0x48),%1(0x50),%1(0xfd),%1(0xed),%1(0xb9),%1(0xda) + db %1(0x5e),%1(0x15),%1(0x46),%1(0x57),%1(0xa7),%1(0x8d),%1(0x9d),%1(0x84) + db %1(0x90),%1(0xd8),%1(0xab),%1(0x00),%1(0x8c),%1(0xbc),%1(0xd3),%1(0x0a) + db %1(0xf7),%1(0xe4),%1(0x58),%1(0x05),%1(0xb8),%1(0xb3),%1(0x45),%1(0x06) + db %1(0xd0),%1(0x2c),%1(0x1e),%1(0x8f),%1(0xca),%1(0x3f),%1(0x0f),%1(0x02) + db %1(0xc1),%1(0xaf),%1(0xbd),%1(0x03),%1(0x01),%1(0x13),%1(0x8a),%1(0x6b) + db %1(0x3a),%1(0x91),%1(0x11),%1(0x41),%1(0x4f),%1(0x67),%1(0xdc),%1(0xea) + db %1(0x97),%1(0xf2),%1(0xcf),%1(0xce),%1(0xf0),%1(0xb4),%1(0xe6),%1(0x73) + db %1(0x96),%1(0xac),%1(0x74),%1(0x22),%1(0xe7),%1(0xad),%1(0x35),%1(0x85) + db %1(0xe2),%1(0xf9),%1(0x37),%1(0xe8),%1(0x1c),%1(0x75),%1(0xdf),%1(0x6e) + db %1(0x47),%1(0xf1),%1(0x1a),%1(0x71),%1(0x1d),%1(0x29),%1(0xc5),%1(0x89) + db %1(0x6f),%1(0xb7),%1(0x62),%1(0x0e),%1(0xaa),%1(0x18),%1(0xbe),%1(0x1b) + db %1(0xfc),%1(0x56),%1(0x3e),%1(0x4b),%1(0xc6),%1(0xd2),%1(0x79),%1(0x20) + db %1(0x9a),%1(0xdb),%1(0xc0),%1(0xfe),%1(0x78),%1(0xcd),%1(0x5a),%1(0xf4) + db %1(0x1f),%1(0xdd),%1(0xa8),%1(0x33),%1(0x88),%1(0x07),%1(0xc7),%1(0x31) + db %1(0xb1),%1(0x12),%1(0x10),%1(0x59),%1(0x27),%1(0x80),%1(0xec),%1(0x5f) + db %1(0x60),%1(0x51),%1(0x7f),%1(0xa9),%1(0x19),%1(0xb5),%1(0x4a),%1(0x0d) + db %1(0x2d),%1(0xe5),%1(0x7a),%1(0x9f),%1(0x93),%1(0xc9),%1(0x9c),%1(0xef) + db %1(0xa0),%1(0xe0),%1(0x3b),%1(0x4d),%1(0xae),%1(0x2a),%1(0xf5),%1(0xb0) + db %1(0xc8),%1(0xeb),%1(0xbb),%1(0x3c),%1(0x83),%1(0x53),%1(0x99),%1(0x61) + db %1(0x17),%1(0x2b),%1(0x04),%1(0x7e),%1(0xba),%1(0x77),%1(0xd6),%1(0x26) + db %1(0xe1),%1(0x69),%1(0x14),%1(0x63),%1(0x55),%1(0x21),%1(0x0c),%1(0x7d) +%endmacro + +%define u8(x) f2(x), x, x, f3(x), f2(x), x, x, f3(x) +%define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x +%define w8(x) x, 0, 0, 0, x, 0, 0, 0 + +%define tptr rbp ; table pointer +%define kptr r8 ; key schedule pointer +%define fofs 128 ; adjust offset in key schedule to keep |disp| < 128 +%define fk_ref(x,y) [kptr-16*x+fofs+4*y] +%ifdef AES_REV_DKS +%define rofs 128 +%define ik_ref(x,y) [kptr-16*x+rofs+4*y] +%else +%define rofs -128 +%define ik_ref(x,y) [kptr+16*x+rofs+4*y] +%endif + +%define tab_0(x) [tptr+8*x] +%define tab_1(x) [tptr+8*x+3] +%define tab_2(x) [tptr+8*x+2] +%define tab_3(x) [tptr+8*x+1] +%define tab_f(x) byte [tptr+8*x+1] +%define tab_i(x) byte [tptr+8*x+7] +%define t_ref(x,r) tab_ %+ x(r) + +%macro ff_rnd 5 ; normal forward round + mov %1d, fk_ref(%5,0) + mov %2d, fk_ref(%5,1) + mov %3d, fk_ref(%5,2) + mov %4d, fk_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) + + mov eax,%1d + mov ebx,%2d + mov ecx,%3d + mov edx,%4d +%endmacro + +%ifdef LAST_ROUND_TABLES + +%macro fl_rnd 5 ; last forward round + add tptr, 2048 + mov %1d, fk_ref(%5,0) + mov %2d, fk_ref(%5,1) + mov %3d, fk_ref(%5,2) + mov %4d, fk_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) +%endmacro + +%else + +%macro fl_rnd 5 ; last forward round + mov %1d, fk_ref(%5,0) + mov %2d, fk_ref(%5,1) + mov %3d, fk_ref(%5,2) + mov %4d, fk_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + xor %1d, esi + rol edi, 8 + xor %4d, edi + movzx esi, al + movzx edi, ah + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %3d, esi + xor %2d, edi + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + xor %2d, esi + rol edi, 8 + xor %1d, edi + movzx esi, bl + movzx edi, bh + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %4d, esi + xor %3d, edi + + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + shr ecx, 16 + xor %3d, esi + rol edi, 8 + xor %2d, edi + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %1d, esi + xor %4d, edi + + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + shr edx, 16 + xor %4d, esi + rol edi, 8 + xor %3d, edi + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %2d, esi + xor %1d, edi +%endmacro + +%endif + +%macro ii_rnd 5 ; normal inverse round + mov %1d, ik_ref(%5,0) + mov %2d, ik_ref(%5,1) + mov %3d, ik_ref(%5,2) + mov %4d, ik_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) + + mov eax,%1d + mov ebx,%2d + mov ecx,%3d + mov edx,%4d +%endmacro + +%ifdef LAST_ROUND_TABLES + +%macro il_rnd 5 ; last inverse round + add tptr, 2048 + mov %1d, ik_ref(%5,0) + mov %2d, ik_ref(%5,1) + mov %3d, ik_ref(%5,2) + mov %4d, ik_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) +%endmacro + +%else + +%macro il_rnd 5 ; last inverse round + mov %1d, ik_ref(%5,0) + mov %2d, ik_ref(%5,1) + mov %3d, ik_ref(%5,2) + mov %4d, ik_ref(%5,3) + + movzx esi, al + movzx edi, ah + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr eax, 16 + xor %1d, esi + rol edi, 8 + xor %2d, edi + movzx esi, al + movzx edi, ah + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %3d, esi + xor %4d, edi + + movzx esi, bl + movzx edi, bh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr ebx, 16 + xor %2d, esi + rol edi, 8 + xor %3d, edi + movzx esi, bl + movzx edi, bh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %4d, esi + xor %1d, edi + + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr ecx, 16 + xor %3d, esi + rol edi, 8 + xor %4d, edi + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %1d, esi + xor %2d, edi + + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr edx, 16 + xor %4d, esi + rol edi, 8 + xor %1d, edi + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %2d, esi + xor %3d, edi +%endmacro + +%endif + +%ifdef ENCRYPTION + + global aes_encrypt +%ifdef DLL_EXPORT + export aes_encrypt +%endif + + section .data align=64 + align 64 +enc_tab: + enc_vals u8 +%ifdef LAST_ROUND_TABLES + enc_vals w8 +%endif + + section .text align=16 + align 16 +aes_encrypt: + +%ifdef __GNUC__ + sub rsp, 4*8 ; gnu/linux binary interface + mov [rsp+0*8], rsi ; output pointer + mov r8, rdx ; context +%else + sub rsp, 6*8 ; windows binary interface + mov [rsp+4*8], rsi + mov [rsp+5*8], rdi + mov [rsp+0*8], rdx ; output pointer + mov rdi, rcx ; input pointer +%endif + mov [rsp+1*8], rbx ; input pointer in rdi + mov [rsp+2*8], rbp ; output pointer in [rsp] + mov [rsp+3*8], r12 ; context in r8 + + movzx esi, byte [kptr+4*KS_LENGTH] + lea tptr,[enc_tab wrt rip] + sub kptr, fofs + + mov eax, [rdi+0*4] + mov ebx, [rdi+1*4] + mov ecx, [rdi+2*4] + mov edx, [rdi+3*4] + + xor eax, [kptr+fofs] + xor ebx, [kptr+fofs+4] + xor ecx, [kptr+fofs+8] + xor edx, [kptr+fofs+12] + + lea kptr,[kptr+rsi] + cmp esi, 10*16 + je .3 + cmp esi, 12*16 + je .2 + cmp esi, 14*16 + je .1 + mov rax, -1 + jmp .4 + +.1: ff_rnd r9, r10, r11, r12, 13 + ff_rnd r9, r10, r11, r12, 12 +.2: ff_rnd r9, r10, r11, r12, 11 + ff_rnd r9, r10, r11, r12, 10 +.3: ff_rnd r9, r10, r11, r12, 9 + ff_rnd r9, r10, r11, r12, 8 + ff_rnd r9, r10, r11, r12, 7 + ff_rnd r9, r10, r11, r12, 6 + ff_rnd r9, r10, r11, r12, 5 + ff_rnd r9, r10, r11, r12, 4 + ff_rnd r9, r10, r11, r12, 3 + ff_rnd r9, r10, r11, r12, 2 + ff_rnd r9, r10, r11, r12, 1 + fl_rnd r9, r10, r11, r12, 0 + + mov rbx, [rsp] + mov [rbx], r9d + mov [rbx+4], r10d + mov [rbx+8], r11d + mov [rbx+12], r12d + xor rax, rax +.4: + mov rbx, [rsp+1*8] + mov rbp, [rsp+2*8] + mov r12, [rsp+3*8] +%ifdef __GNUC__ + add rsp, 4*8 +%else + mov rsi, [rsp+4*8] + mov rdi, [rsp+5*8] + add rsp, 6*8 +%endif + ret + +%endif + +%ifdef DECRYPTION + + global aes_decrypt +%ifdef DLL_EXPORT + export aes_decrypt +%endif + + section .data align=64 + align 64 +dec_tab: + dec_vals v8 +%ifdef LAST_ROUND_TABLES + dec_vals w8 +%endif + + section .text align=16 + align 16 +aes_decrypt: + +%ifdef __GNUC__ + sub rsp, 4*8 ; gnu/linux binary interface + mov [rsp+0*8], rsi ; output pointer + mov r8, rdx ; context +%else + sub rsp, 6*8 ; windows binary interface + mov [rsp+4*8], rsi + mov [rsp+5*8], rdi + mov [rsp+0*8], rdx ; output pointer + mov rdi, rcx ; input pointer +%endif + mov [rsp+1*8], rbx ; input pointer in rdi + mov [rsp+2*8], rbp ; output pointer in [rsp] + mov [rsp+3*8], r12 ; context in r8 + + movzx esi,byte[kptr+4*KS_LENGTH] + lea tptr,[dec_tab wrt rip] + sub kptr, rofs + + mov eax, [rdi+0*4] + mov ebx, [rdi+1*4] + mov ecx, [rdi+2*4] + mov edx, [rdi+3*4] + +%ifdef AES_REV_DKS + mov rdi, kptr + lea kptr,[kptr+rsi] +%else + lea rdi,[kptr+rsi] +%endif + + xor eax, [rdi+rofs] + xor ebx, [rdi+rofs+4] + xor ecx, [rdi+rofs+8] + xor edx, [rdi+rofs+12] + + cmp esi, 10*16 + je .3 + cmp esi, 12*16 + je .2 + cmp esi, 14*16 + je .1 + mov rax, -1 + jmp .4 + +.1: ii_rnd r9, r10, r11, r12, 13 + ii_rnd r9, r10, r11, r12, 12 +.2: ii_rnd r9, r10, r11, r12, 11 + ii_rnd r9, r10, r11, r12, 10 +.3: ii_rnd r9, r10, r11, r12, 9 + ii_rnd r9, r10, r11, r12, 8 + ii_rnd r9, r10, r11, r12, 7 + ii_rnd r9, r10, r11, r12, 6 + ii_rnd r9, r10, r11, r12, 5 + ii_rnd r9, r10, r11, r12, 4 + ii_rnd r9, r10, r11, r12, 3 + ii_rnd r9, r10, r11, r12, 2 + ii_rnd r9, r10, r11, r12, 1 + il_rnd r9, r10, r11, r12, 0 + + mov rbx, [rsp] + mov [rbx], r9d + mov [rbx+4], r10d + mov [rbx+8], r11d + mov [rbx+12], r12d + xor rax, rax +.4: mov rbx, [rsp+1*8] + mov rbp, [rsp+2*8] + mov r12, [rsp+3*8] +%ifdef __GNUC__ + add rsp, 4*8 +%else + mov rsi, [rsp+4*8] + mov rdi, [rsp+5*8] + add rsp, 6*8 +%endif + ret + +%endif + + end diff --git a/lib/silccrypt/blowfish.c b/lib/silccrypt/blowfish.c index 355d64a8..25b9deab 100644 --- a/lib/silccrypt/blowfish.c +++ b/lib/silccrypt/blowfish.c @@ -37,7 +37,7 @@ #include "blowfish_internal.h" #include "blowfish.h" -/* +/* * SILC Crypto API for Blowfish */ @@ -49,14 +49,6 @@ SILC_CIPHER_API_SET_KEY(blowfish) return TRUE; } -/* Sets the string as a new key for the cipher. The string is first - hashed and then used as a new key. */ - -SILC_CIPHER_API_SET_KEY_WITH_STRING(blowfish) -{ - return 1; -} - /* Returns the size of the cipher context. */ SILC_CIPHER_API_CONTEXT_LEN(blowfish) @@ -108,9 +100,9 @@ SILC_CIPHER_API_DECRYPT_CBC(blowfish) blowfish_decrypt((BlowfishContext *)context, tmp, tmp2, 16); SILC_CBC_DEC_POST(tmp2, dst, src, tmp, tiv); } - + SILC_CBC_PUT_IV(tiv, iv); - + return TRUE; } @@ -383,7 +375,7 @@ static u32 bf_sbox[256 * 4] = 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6, }; -/* +/* * Round loop unrolling macros, S is a pointer to a S-Box array * organized in 4 SilcUInt32s at a row. */ @@ -400,7 +392,7 @@ static u32 bf_sbox[256 * 4] = /* * The blowfish encipher, processes 64-bit blocks. - * NOTE: This function MUSTN'T respect endianess + * NOTE: This function MUSTN'T respect endianess */ int blowfish_encrypt(BlowfishContext *ctx, @@ -526,7 +518,7 @@ int blowfish_set_key(BlowfishContext *ctx, for (i = 0; i < 16 + 2; i += 2) { blowfish_encrypt(ctx, data, data, 8); - + P[i] = data[0]; P[i + 1] = data[1]; } diff --git a/lib/silccrypt/blowfish.h b/lib/silccrypt/blowfish.h index d67d8739..6f3db654 100644 --- a/lib/silccrypt/blowfish.h +++ b/lib/silccrypt/blowfish.h @@ -4,12 +4,12 @@ Author: Pekka Riikonen - Copyright (C) 1997 - 2000 Pekka Riikonen + Copyright (C) 1997 - 2006 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -20,12 +20,11 @@ #ifndef BLOWFISH_H #define BLOWFISH_H -/* +/* * SILC Crypto API for Blowfish */ SILC_CIPHER_API_SET_KEY(blowfish); -SILC_CIPHER_API_SET_KEY_WITH_STRING(blowfish); SILC_CIPHER_API_CONTEXT_LEN(blowfish); SILC_CIPHER_API_ENCRYPT_CBC(blowfish); SILC_CIPHER_API_DECRYPT_CBC(blowfish); diff --git a/lib/silccrypt/cast.c b/lib/silccrypt/cast.c index d7f42ecf..52207887 100644 --- a/lib/silccrypt/cast.c +++ b/lib/silccrypt/cast.c @@ -63,8 +63,8 @@ Mean: 674 cycles = 38.0 mbits/sec #include "cast.h" #define io_swap - -/* + +/* * SILC Crypto API for Cast-256 */ @@ -80,14 +80,6 @@ SILC_CIPHER_API_SET_KEY(cast) return TRUE; } -/* Sets the string as a new key for the cipher. The string is first - hashed and then used as a new key. */ - -SILC_CIPHER_API_SET_KEY_WITH_STRING(cast) -{ - return 1; -} - /* Returns the size of the cipher context. */ SILC_CIPHER_API_CONTEXT_LEN(cast) @@ -136,190 +128,190 @@ SILC_CIPHER_API_DECRYPT_CBC(cast) for (i = 16; i < len; i += 16) { SILC_CBC_DEC_PRE(tmp, src); - cast_decrypt((CastContext *)context, tmp, tmp2); + cast_decrypt((CastContext *)context, tmp, tmp2); SILC_CBC_DEC_POST(tmp2, dst, src, tmp, tiv); } - + SILC_CBC_PUT_IV(tiv, iv); - + return TRUE; } -u4byte s_box[4][256] = +u4byte s_box[4][256] = { { - 0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9C004dd3, + 0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9C004dd3, 0x6003e540, 0xcf9fc949, 0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, - 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e, 0x28683b6f, 0xc07fd059, + 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e, 0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3, 0xdf2f8656, 0x887ca41a, 0xa2d2bd2d, - 0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1, 0xaa54166b, - 0x22568e3a, 0xa2d341d0, 0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, - 0x97943fac, 0x4a97c1d8, 0x527644b7, 0xb5f437a7, 0xb82cbaef, 0xd751d159, + 0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1, 0xaa54166b, + 0x22568e3a, 0xa2d341d0, 0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, + 0x97943fac, 0x4a97c1d8, 0x527644b7, 0xb5f437a7, 0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0, 0x90ecf52e, 0x22b0c054, 0xbc8e5935, 0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290, 0xe93b159f, 0xb48ee411, 0x4bff345d, 0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, - 0xd5b1caad, 0xa1ac2dae, 0xa2d4b76d, 0xc19b0C50, 0x882240f2, 0x0c6e4f38, + 0xd5b1caad, 0xa1ac2dae, 0xa2d4b76d, 0xc19b0C50, 0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f, 0xc59c5319, 0xb949e354, 0xb04669fe, - 0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5, 0x6a390493, - 0xe63d37e0, 0x2a54f6b3, 0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, - 0x29f9d4d5, 0xf61b1891, 0xbb72275e, 0xaa508167, 0x38901091, 0xc6b505eb, + 0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5, 0x6a390493, + 0xe63d37e0, 0x2a54f6b3, 0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, + 0x29f9d4d5, 0xf61b1891, 0xbb72275e, 0xaa508167, 0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427, 0xa2d1936b, 0x2ad286af, 0xaa56d291, - 0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d, 0x73e2bb14, + 0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d, 0x73e2bb14, 0xa0bebc3c, 0x54623779, 0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e, 0x89fe78e6, 0x3fab0950, 0x325ff6C2, 0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf, 0x380782d5, 0xc7fa5cf6, 0x8ac31511, - 0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241, 0x051ef495, + 0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241, 0x051ef495, 0xaa573b04, 0x4a805d8d, 0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, - 0x75c6372b, 0x50afd341, 0xa7c13275, 0x915a0bf5, 0x6b54bfab, 0x2b0b1426, - 0xab4cc9d7, 0x449ccd82, 0xf7fbf265, 0xab85c5f3, 0x1b55db94, 0xaad4e324, - 0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3, 0xd5bd9e98, + 0x75c6372b, 0x50afd341, 0xa7c13275, 0x915a0bf5, 0x6b54bfab, 0x2b0b1426, + 0xab4cc9d7, 0x449ccd82, 0xf7fbf265, 0xab85c5f3, 0x1b55db94, 0xaad4e324, + 0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3, 0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c, 0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, - 0xaa786bf6, 0x22513f1e, 0xaa51a79b, 0x2ad344cc, 0x7b5a41f0, 0xd37cfbad, - 0x1b069505, 0x41ece491, 0xb4c332e6, 0x032268d4, 0xc9600acc, 0xce387e6d, - 0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da, 0x4736f464, + 0xaa786bf6, 0x22513f1e, 0xaa51a79b, 0x2ad344cc, 0x7b5a41f0, 0xd37cfbad, + 0x1b069505, 0x41ece491, 0xb4c332e6, 0x032268d4, 0xc9600acc, 0xce387e6d, + 0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da, 0x4736f464, 0x5ad328d8, 0xb347cc96, 0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, - 0xe11f0abc, 0xbfc5fe4a, 0xa70aec10, 0xac39570a, 0x3f04442f, 0x6188b153, - 0xe0397a2e, 0x5727cb79, 0x9ceb418f, 0x1cacd68d, 0x2ad37c96, 0x0175cb9d, - 0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4, 0xb11c3274, + 0xe11f0abc, 0xbfc5fe4a, 0xa70aec10, 0xac39570a, 0x3f04442f, 0x6188b153, + 0xe0397a2e, 0x5727cb79, 0x9ceb418f, 0x1cacd68d, 0x2ad37c96, 0x0175cb9d, + 0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4, 0xb11c3274, 0xdd24cb9e, 0x7e1c54bd, 0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, - 0xd47c27af, 0x51c85f4d, 0x56907596, 0xa5bb15e6, 0x580304f0, 0xca042cf1, + 0xd47c27af, 0x51c85f4d, 0x56907596, 0xa5bb15e6, 0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a, 0x3526ffa0, 0xc37b4d09, 0xbc306ed9, - 0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf, 0x700b45e1, + 0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf, 0x700b45e1, 0xd5ea50f1, 0x85a92872, 0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, - 0x42e04198, 0x0cd0ede7, 0x26470db8, 0xf881814C, 0x474d6ad7, 0x7c0c5e5c, + 0x42e04198, 0x0cd0ede7, 0x26470db8, 0xf881814C, 0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db, 0xab838653, 0x6e2f1e23, 0x83719c9e, - 0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, - 0xb141ab08, 0x7cca89b9, 0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, + 0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, + 0xb141ab08, 0x7cca89b9, 0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c, 0x5ac9f049, 0xdd8f0f00, 0x5c8165bf }, { 0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, - 0x55889c94, 0x72fc0651, 0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, - 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3, 0xa0b52f7b, 0x59e83605, + 0x55889c94, 0x72fc0651, 0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, + 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3, 0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086, 0xef944459, 0xba83ccb3, 0xe0c3cdfb, - 0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb, 0xe4e7ef5b, - 0x25a1ff41, 0xe180f806, 0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, - 0x98de8b7f, 0x77e83f4e, 0x79929269, 0x24fa9f7b, 0xe113c85b, 0xacc40083, + 0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb, 0xe4e7ef5b, + 0x25a1ff41, 0xe180f806, 0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, + 0x98de8b7f, 0x77e83f4e, 0x79929269, 0x24fa9f7b, 0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154, 0x0d554b63, 0x5d681121, 0xc866c359, - 0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181, 0x39f7627f, - 0x361e3084, 0xe4eb573b, 0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, - 0x2701f50c, 0x99847ab4, 0xa0e3df79, 0xba6cf38c, 0x10843094, 0x2537a95e, + 0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181, 0x39f7627f, + 0x361e3084, 0xe4eb573b, 0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, + 0x2701f50c, 0x99847ab4, 0xa0e3df79, 0xba6cf38c, 0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a, 0x8f458c74, 0xd9e0a227, 0x4ec73a34, - 0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c, 0x1d804366, - 0x721d9bfd, 0xa58684bb, 0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, - 0xce280ae1, 0x27e19ba5, 0xd5a6c252, 0xe49754bd, 0xc5d655dd, 0xeb667064, + 0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c, 0x1d804366, + 0x721d9bfd, 0xa58684bb, 0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, + 0xce280ae1, 0x27e19ba5, 0xd5a6c252, 0xe49754bd, 0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9, 0xe0b56714, 0x21f043b7, 0xe5d05860, - 0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf, 0x68561be6, - 0x83ca6b94, 0x2d6ed23b, 0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, - 0x33b4a34c, 0x397bc8d6, 0x5ee22b95, 0x5f0e5304, 0x81ed6f61, 0x20e74364, + 0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf, 0x68561be6, + 0x83ca6b94, 0x2d6ed23b, 0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, + 0x33b4a34c, 0x397bc8d6, 0x5ee22b95, 0x5f0e5304, 0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122, 0xb96726d1, 0x8049a7e8, 0x22b7da7b, - 0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402, 0x1ba4fe5b, - 0xa4b09f6b, 0x1ca815cf, 0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, - 0x0beeff53, 0xe3214517, 0xb4542835, 0x9f63293c, 0xee41e729, 0x6e1d2d7c, + 0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402, 0x1ba4fe5b, + 0xa4b09f6b, 0x1ca815cf, 0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, + 0x0beeff53, 0xe3214517, 0xb4542835, 0x9f63293c, 0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6, 0x30a22c95, 0x31a70850, 0x60930f13, - 0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6, 0xa02b1741, - 0x7cbad9a2, 0x2180036f, 0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, - 0x80342676, 0x25a75e7b, 0xe4e6d1fc, 0x20c710e6, 0xcdf0b680, 0x17844d3b, + 0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6, 0xa02b1741, + 0x7cbad9a2, 0x2180036f, 0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, + 0x80342676, 0x25a75e7b, 0xe4e6d1fc, 0x20c710e6, 0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb, 0x846a3bae, 0x8ff77888, 0xee5d60f6, - 0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54, 0x157fd7fa, - 0xef8579cc, 0xd152de58, 0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, - 0x99319ad5, 0xc242fa0f, 0xa7e3ebb0, 0xc68e4906, 0xb8da230c, 0x80823028, + 0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54, 0x157fd7fa, + 0xef8579cc, 0xd152de58, 0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, + 0x99319ad5, 0xc242fa0f, 0xa7e3ebb0, 0xc68e4906, 0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8, 0xbec0c560, 0x61a3c9e8, 0xbca8f54d, 0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc, 0x301e16e6, - 0x273be979, 0xb0ffeaa6, 0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, - 0x43daf65a, 0xf7e19798, 0x7619b72f, 0x8f1c9ba4, 0xdc8637a0, 0x16a7d3b1, + 0x273be979, 0xb0ffeaa6, 0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, + 0x43daf65a, 0xf7e19798, 0x7619b72f, 0x8f1c9ba4, 0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e, 0x1a513742, 0xef6828bc, 0x520365d6, - 0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb, 0x5eea29cb, - 0x145892f5, 0x91584f7f, 0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, - 0x833860d4, 0x0d23e0f9, 0x6c387e8a, 0x0ae6d249, 0xb284600c, 0xd835731d, + 0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb, 0x5eea29cb, + 0x145892f5, 0x91584f7f, 0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, + 0x833860d4, 0x0d23e0f9, 0x6c387e8a, 0x0ae6d249, 0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3, 0x230eabb0, 0x6438bc87, 0xf0b5b1fa, - 0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, - 0x5c038323, 0x3e5d3bb9, 0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, + 0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, + 0x5c038323, 0x3e5d3bb9, 0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539, 0x73bfbe70, 0x83877605, 0x4523ecf1 }, { 0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, - 0x8c1fc644, 0xaececa90, 0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, + 0x8c1fc644, 0xaececa90, 0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, 0xf0ad0548, 0xe13c8d83, 0x927010d5, 0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820, 0xfade82e0, 0xa067268b, 0x8272792e, 0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee, 0x825b1bfd, 0x9255c5ed, 0x1257a240, 0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf, 0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5, 0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1, 0x1fb78dfc, 0x8e6bd2c1, 0x437be59b, - 0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c, 0x4a012d6e, - 0xc5884a28, 0xccc36f71, 0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, - 0x2f7fe850, 0xd7c07f7e, 0x02507fbf, 0x5afb9a04, 0xa747d2d0, 0x1651192e, + 0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c, 0x4a012d6e, + 0xc5884a28, 0xccc36f71, 0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, + 0x2f7fe850, 0xd7c07f7e, 0x02507fbf, 0x5afb9a04, 0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e, 0x727cc3c4, 0x0a0fb402, 0x0f7fef82, - 0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0, 0x1eac5790, - 0x796fb449, 0x8252dc15, 0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, - 0xfa5d7403, 0xe83ec305, 0x4f91751a, 0x925669c2, 0x23efe941, 0xa903f12e, + 0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0, 0x1eac5790, + 0x796fb449, 0x8252dc15, 0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, + 0xfa5d7403, 0xe83ec305, 0x4f91751a, 0x925669c2, 0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574, 0x927985b2, 0x8276dbcb, 0x02778176, - 0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83, 0x340ce5c8, - 0x96bbb682, 0x93b4b148, 0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, - 0x224d1e20, 0x8437aa88, 0x7d29dc96, 0x2756d3dc, 0x8b907cee, 0xb51fd240, + 0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83, 0x340ce5c8, + 0x96bbb682, 0x93b4b148, 0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, + 0x224d1e20, 0x8437aa88, 0x7d29dc96, 0x2756d3dc, 0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e, 0x3cf8209d, 0x6094d1e3, 0xcd9ca341, - 0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9, 0xbda8229c, + 0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9, 0xbda8229c, 0x127dadaa, 0x438a074e, 0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, - 0x97b03cff, 0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51, 0x68cc7bfb, 0xd90f2788, + 0x97b03cff, 0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51, 0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a, 0x76a2e214, 0xb9a40368, 0x925d958f, - 0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623, 0x193cbcfa, - 0x27627545, 0x825cf47a, 0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, - 0x10428db7, 0x8272a972, 0x9270c4a8, 0x127de50b, 0x285ba1c8, 0x3c62f44f, + 0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623, 0x193cbcfa, + 0x27627545, 0x825cf47a, 0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, + 0x10428db7, 0x8272a972, 0x9270c4a8, 0x127de50b, 0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb, 0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b, - 0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11, 0x236a5cae, - 0x12deca4d, 0x2c3f8cc5, 0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, - 0x494a488c, 0xb9b6a80c, 0x5c8f82bc, 0x89d36b45, 0x3a609437, 0xec00c9a9, + 0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11, 0x236a5cae, + 0x12deca4d, 0x2c3f8cc5, 0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, + 0x494a488c, 0xb9b6a80c, 0x5c8f82bc, 0x89d36b45, 0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40, 0x7c34671c, 0x02717ef6, 0x4feb5536, - 0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1, 0x006e1888, - 0xa2e53f55, 0xb9e6d4bc, 0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, - 0x72f87b33, 0xabcc4f33, 0x7688c55d, 0x7b00a6b0, 0x947b0001, 0x570075d2, + 0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1, 0x006e1888, + 0xa2e53f55, 0xb9e6d4bc, 0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, + 0x72f87b33, 0xabcc4f33, 0x7688c55d, 0x7b00a6b0, 0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff, 0x856302e0, 0x72dbd92b, 0xee971b69, - 0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2, 0x61efc8c2, - 0xf1ac2571, 0xcc8239c2, 0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, - 0xf90a5c38, 0x0ff0443d, 0x606e6dc6, 0x60543a49, 0x5727c148, 0x2be98a1d, + 0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2, 0x61efc8c2, + 0xf1ac2571, 0xcc8239c2, 0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, + 0xf90a5c38, 0x0ff0443d, 0x606e6dc6, 0x60543a49, 0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f, 0x68458425, 0x99833be5, 0x600d457d, - 0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, - 0x52bce688, 0x1b03588a, 0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, + 0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, + 0x52bce688, 0x1b03588a, 0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636, 0xa133c501, 0xe9d3531c, 0xee353783 }, - { - 0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, - 0x85510443, 0xfa020ed1, 0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, - 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf, 0x28147f5f, 0x4fa2b8cd, + { + 0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, + 0x85510443, 0xfa020ed1, 0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, + 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf, 0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30, 0xc0a5374f, 0x1d2d00d9, 0x24147b15, - 0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f, 0x0c13fefe, - 0x081b08ca, 0x05170121, 0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, - 0xd2b8ee5f, 0x06df4261, 0xbb9e9b8a, 0x7293ea25, 0xce84ffdf, 0xf5718801, + 0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f, 0x0c13fefe, + 0x081b08ca, 0x05170121, 0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, + 0xd2b8ee5f, 0x06df4261, 0xbb9e9b8a, 0x7293ea25, 0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400, 0x547eebe6, 0x446d4ca0, 0x6cf3d6f5, - 0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061, 0x11b638e1, + 0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061, 0x11b638e1, 0x72500e03, 0xf80eb2bb, 0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, - 0xc9335400, 0x6920318f, 0x081dbb99, 0xffc304a5, 0x4d351805, 0x7f3d5ce3, + 0xc9335400, 0x6920318f, 0x081dbb99, 0xffc304a5, 0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea, 0x9f926f91, 0x9f46222f, 0x3991467d, 0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8, 0x3fb6180c, 0x18f8931e, 0x281658e6, 0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, - 0xf32d0a25, 0x79098b02, 0xe4eabb81, 0x28123b23, 0x69dead38, 0x1574ca16, + 0xf32d0a25, 0x79098b02, 0xe4eabb81, 0x28123b23, 0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9, 0x0014377b, 0x041e8ac8, 0x09114003, 0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de, 0x00eae4a7, - 0x0ce5c2ec, 0x4db4bba6, 0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, - 0x99afc8b0, 0x56c8c391, 0x6b65811c, 0x5e146119, 0x6e85cb75, 0xbe07c002, + 0x0ce5c2ec, 0x4db4bba6, 0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, + 0x99afc8b0, 0x56c8c391, 0x6b65811c, 0x5e146119, 0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d, 0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24, - 0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a, 0xeca1d7c7, - 0x041afa32, 0x1d16625a, 0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, - 0x36cc6fdb, 0xc70b8b46, 0xd9e66a48, 0x56e55a79, 0x026a4ceb, 0x52437eff, + 0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a, 0xeca1d7c7, + 0x041afa32, 0x1d16625a, 0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, + 0x36cc6fdb, 0xc70b8b46, 0xd9e66a48, 0x56e55a79, 0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3, 0xedda04eb, 0x17a9be04, 0x2c18f4df, - 0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254, 0xe5b6a035, - 0x213d42f6, 0x2c1c7c26, 0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, - 0xd8167fa2, 0x0418f2c8, 0x001a96a6, 0x0d1526ab, 0x63315c21, 0x5e0a72ec, + 0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254, 0xe5b6a035, + 0x213d42f6, 0x2c1c7c26, 0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, + 0xd8167fa2, 0x0418f2c8, 0x001a96a6, 0x0d1526ab, 0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86, 0x311170a7, 0x3e9b640c, 0xcc3e10d7, - 0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1, 0x1f9af36e, - 0xcfcbd12f, 0xc1de8417, 0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, + 0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1, 0x1f9af36e, + 0xcfcbd12f, 0xc1de8417, 0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca, 0xb4be31cd, 0xd8782806, 0x12a3a4e2, 0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5, 0x9711aac5, 0x001d7b95, 0x82e5e7d2, - 0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415, 0x7fbb977f, - 0xaf9eb3db, 0x29c9ed2a, 0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, - 0xd49e2ce7, 0x0ce454a9, 0xd60acd86, 0x015f1919, 0x77079103, 0xdea03af6, + 0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415, 0x7fbb977f, + 0xaf9eb3db, 0x29c9ed2a, 0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, + 0xd49e2ce7, 0x0ce454a9, 0xd60acd86, 0x015f1919, 0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe, 0x8b75e387, 0xb3c50651, 0xb8a5c3ef, 0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb, 0xf470c4b2, - 0xf3e0eb5b, 0xd6cc9876, 0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, + 0xf3e0eb5b, 0xd6cc9876, 0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8, 0x296b299e, 0x492fc295, 0x9266beab, 0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee, 0xf65324e6, 0x6afce36c, 0x0316cc04, 0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979, 0x932bcdf6, @@ -378,7 +370,7 @@ u4byte s_box[4][256] = u4byte *cast_set_key(CastContext *ctx, const u4byte in_key[], const u4byte key_len) -{ +{ u4byte i, j, t, u, cm, cr, lk[8], tm[8], tr[8]; u4byte *l_key = ctx->l_key; @@ -401,7 +393,7 @@ u4byte *cast_set_key(CastContext *ctx, } k_rnd(lk, tr, tm); - + for(j = 0; j < 8; ++j) { tm[j] = cm; cm += 0x6ed9eba1; @@ -423,7 +415,7 @@ u4byte *cast_set_key(CastContext *ctx, void cast_encrypt(CastContext *ctx, const u4byte in_blk[4], u4byte out_blk[]) -{ +{ u4byte t, u, blk[4]; u4byte *l_key = ctx->l_key; @@ -445,7 +437,7 @@ void cast_encrypt(CastContext *ctx, void cast_decrypt(CastContext *ctx, const u4byte in_blk[4], u4byte out_blk[4]) -{ +{ u4byte t, u, blk[4]; u4byte *l_key = ctx->l_key; diff --git a/lib/silccrypt/cast.h b/lib/silccrypt/cast.h index f83ce643..0630628a 100644 --- a/lib/silccrypt/cast.h +++ b/lib/silccrypt/cast.h @@ -9,7 +9,7 @@ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -20,12 +20,11 @@ #ifndef CAST_H #define CAST_H -/* +/* * SILC Crypto API for Cast-256 */ SILC_CIPHER_API_SET_KEY(cast); -SILC_CIPHER_API_SET_KEY_WITH_STRING(cast); SILC_CIPHER_API_CONTEXT_LEN(cast); SILC_CIPHER_API_ENCRYPT_CBC(cast); SILC_CIPHER_API_DECRYPT_CBC(cast); diff --git a/lib/silccrypt/ciphers_def.h b/lib/silccrypt/ciphers_def.h index e254e68f..34f30e89 100644 --- a/lib/silccrypt/ciphers_def.h +++ b/lib/silccrypt/ciphers_def.h @@ -4,12 +4,12 @@ Author: Pekka Riikonen - Copyright (C) 1999 - 2000 Pekka Riikonen + Copyright (C) 1999 - 2006 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -24,6 +24,8 @@ typedef unsigned char u1byte; typedef SilcUInt32 u4byte; typedef SilcUInt32 u32; +typedef SilcUInt32 uint_32t; +typedef SilcUInt8 uint_8t; #define rotr(x, nr) (((x) >> ((int)(nr))) | ((x) << (32 - (int)(nr)))) #define rotl(x, nr) (((x) << ((int)(nr))) | ((x) >> (32 - (int)(nr)))) diff --git a/lib/silccrypt/configure.ad b/lib/silccrypt/configure.ad new file mode 100644 index 00000000..b9ccd4da --- /dev/null +++ b/lib/silccrypt/configure.ad @@ -0,0 +1,57 @@ +# +# lib/silccrypt/configure.ad +# +# Author: Pekka Riikonen +# +# Copyright (C) 2006 Pekka Riikonen +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# + +if test x$compile_libs = xtrue; then + +AC_MSG_NOTICE([configuring crypto library]) +SILC_CRYPTO_CFLAGS= + +aes_asm=false +case "$host_cpu" in + i?86) + if test x$have_assembler = xtrue; then + aes_asm=true + AC_DEFINE([SILC_AES_ASM], [], [SILC_AES_ASM]) + fi + ;; + x86_64) + if test x$have_assembler = xtrue; then + aes_asm=true + AC_DEFINE([SILC_AES_ASM], [], [SILC_AES_ASM]) + fi + ;; + default) + aes_asm=false + ;; +esac +AM_CONDITIONAL(SILC_AES_ASM, test x$aes_asm = xtrue) + +SILC_ADD_CC_FLAGS(SILC_CRYPTO, -fno-regmove) +if test x$summary_debug = xno; then + SILC_ADD_CC_FLAGS(SILC_CRYPTO, -fomit-frame-pointer -O3) +fi + +AC_SUBST(SILC_CRYPTO_CFLAGS) + +AC_CONFIG_FILES( +lib/silccrypt/Makefile +#ifdef SILC_DIST_INPLACE +lib/silccrypt/tests/Makefile +#endif SILC_DIST_INPLACE +) + +fi # compile_libs diff --git a/lib/silccrypt/none.c b/lib/silccrypt/none.c index 5a1a5709..3c45849a 100644 --- a/lib/silccrypt/none.c +++ b/lib/silccrypt/none.c @@ -9,7 +9,7 @@ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -20,7 +20,7 @@ #include "silc.h" #include "none.h" -/* +/* * SILC Crypto API for None cipher (ie. no cipher) :) */ @@ -29,11 +29,6 @@ SILC_CIPHER_API_SET_KEY(none) return TRUE; } -SILC_CIPHER_API_SET_KEY_WITH_STRING(none) -{ - return TRUE; -} - SILC_CIPHER_API_CONTEXT_LEN(none) { return 1; diff --git a/lib/silccrypt/none.h b/lib/silccrypt/none.h index 8986bba8..261b1c16 100644 --- a/lib/silccrypt/none.h +++ b/lib/silccrypt/none.h @@ -9,7 +9,7 @@ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -19,6 +19,10 @@ /* * $Id$ * $Log$ + * Revision 1.3 2006/12/15 15:22:48 priikone + * Added assembler AES for x86 and x86_64. + * Simplified Cipher implementation API. + * * Revision 1.2 2005/05/10 18:31:17 priikone * Merged silc_1_0_branch to trunk. * @@ -34,12 +38,11 @@ #ifndef NONE_H #define NONE_H -/* +/* * SILC Crypto API for None cipher (ie. no cipher) :) */ SILC_CIPHER_API_SET_KEY(none); -SILC_CIPHER_API_SET_KEY_WITH_STRING(none); SILC_CIPHER_API_CONTEXT_LEN(none); SILC_CIPHER_API_ENCRYPT_CBC(none); SILC_CIPHER_API_DECRYPT_CBC(none); diff --git a/lib/silccrypt/rc5.c b/lib/silccrypt/rc5.c index dfbe7e43..30c5dffd 100644 --- a/lib/silccrypt/rc5.c +++ b/lib/silccrypt/rc5.c @@ -30,10 +30,10 @@ */ /* - * Based on RC5 reference code and on description of Bruce Schneier's + * Based on RC5 reference code and on description of Bruce Schneier's * Applied Cryptography. * - * This implementation has a word size of 32 bits, a rounds of 16 and + * This implementation has a word size of 32 bits, a rounds of 16 and * variable key length from 128 and 192 up to 256 bits. * */ @@ -42,7 +42,7 @@ #include "rc5_internal.h" #include "rc5.h" -/* +/* * SILC Crypto API for RC5 */ @@ -58,14 +58,6 @@ SILC_CIPHER_API_SET_KEY(rc5) return TRUE; } -/* Sets the string as a new key for the cipher. The string is first - hashed and then used as a new key. */ - -SILC_CIPHER_API_SET_KEY_WITH_STRING(rc5) -{ - return 1; -} - /* Returns the size of the cipher context. */ SILC_CIPHER_API_CONTEXT_LEN(rc5) @@ -114,12 +106,12 @@ SILC_CIPHER_API_DECRYPT_CBC(rc5) for (i = 16; i < len; i += 16) { SILC_CBC_DEC_PRE(tmp, src); - rc5_decrypt((RC5Context *)context, tmp, tmp2); + rc5_decrypt((RC5Context *)context, tmp, tmp2); SILC_CBC_DEC_POST(tmp2, dst, src, tmp, tiv); } - + SILC_CBC_PUT_IV(tiv, iv); - + return TRUE; } @@ -206,9 +198,9 @@ int rc5_decrypt(RC5Context *ctx, u32 *in, u32 *out) A = in[0]; B = in[1]; - RC5D(32, A, B); RC5D(30, A, B); - RC5D(28, A, B); RC5D(26, A, B); - RC5D(24, A, B); RC5D(22, A, B); + RC5D(32, A, B); RC5D(30, A, B); + RC5D(28, A, B); RC5D(26, A, B); + RC5D(24, A, B); RC5D(22, A, B); RC5D(20, A, B); RC5D(18, A, B); RC5D(16, A, B); RC5D(14, A, B); RC5D(12, A, B); RC5D(10, A, B); @@ -219,4 +211,4 @@ int rc5_decrypt(RC5Context *ctx, u32 *in, u32 *out) out[1] = B - S[1]; return 0; -} +} diff --git a/lib/silccrypt/rc5.h b/lib/silccrypt/rc5.h index afc37469..ea36d808 100644 --- a/lib/silccrypt/rc5.h +++ b/lib/silccrypt/rc5.h @@ -9,7 +9,7 @@ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -20,12 +20,11 @@ #ifndef RC5_H #define RC5_H -/* +/* * SILC Crypto API for RC5 */ SILC_CIPHER_API_SET_KEY(rc5); -SILC_CIPHER_API_SET_KEY_WITH_STRING(rc5); SILC_CIPHER_API_CONTEXT_LEN(rc5); SILC_CIPHER_API_ENCRYPT_CBC(rc5); SILC_CIPHER_API_DECRYPT_CBC(rc5); diff --git a/lib/silccrypt/rijndael_internal.h b/lib/silccrypt/rijndael_internal.h index f3893e68..bd6051fd 100644 --- a/lib/silccrypt/rijndael_internal.h +++ b/lib/silccrypt/rijndael_internal.h @@ -1,20 +1,32 @@ -/* +/* --------------------------------------------------------------------------- + Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. - rijndael_internal.h + LICENSE TERMS - Author: Pekka Riikonen + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: - Copyright (C) 1997 - 2000 Pekka Riikonen + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 09/09/2006 */ #ifndef RIJNDAEL_INTERNAL_H @@ -22,19 +34,255 @@ #include "ciphers_def.h" -/* Cipher's context */ +#define KS_LENGTH 60 + +typedef union { + uint_32t l; + uint_8t b[4]; +} aes_inf; + +typedef struct { + uint_32t ks[KS_LENGTH]; + aes_inf inf; +} aes_encrypt_ctx; + +typedef struct { + uint_32t ks[KS_LENGTH]; + aes_inf inf; +} aes_decrypt_ctx; + typedef struct { - u4byte e_key[60]; - u4byte d_key[60]; - u4byte k_len; -} RijndaelContext; - -/* Prototypes */ -u4byte *rijndael_set_key(RijndaelContext *ctx, - const u4byte in_key[], const u4byte key_len); -void rijndael_encrypt(RijndaelContext *ctx, - const u4byte in_blk[4], u4byte out_blk[4]); -void rijndael_decrypt(RijndaelContext *ctx, - const u4byte in_blk[4], u4byte out_blk[4]); + aes_encrypt_ctx enc; + aes_decrypt_ctx dec; +} AesContext; +#define AES_RETURN void +#define AES_REV_DKS /* define to reverse decryption key schedule */ +#define AES_BLOCK_SIZE 16 /* the AES block size in bytes */ +#define N_COLS 4 /* the number of columns in the state */ +#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) +#define WPOLY 0x011b +#define BPOLY 0x1b +#define m1 0x80808080 +#define m2 0x7f7f7f7f +#define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY)) +#define s(x,c) x[c] +#define lp32(x) ((uint_32t*)(x)) + +#if defined( _MSC_VER ) && ( _MSC_VER >= 1300 ) +#define TABLE_ALIGN 32 +#endif + +#if defined( bswap32 ) +#define aes_sw32 bswap32 +#elif defined( bswap_32 ) +#define aes_sw32 bswap_32 +#else +#define brot(x,n) (((uint_32t)(x) << n) | ((uint_32t)(x) >> (32 - n))) +#define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) #endif + +#ifdef WORDS_BIGENDIAN +#define upr(x,n) (((uint_32t)(x) >> (8 * (n))) | \ + ((uint_32t)(x) << (32 - 8 * (n)))) +#define ups(x,n) ((uint_32t) (x) >> (8 * (n))) +#define bval(x,n) ((uint_8t)((x) >> (24 - 8 * (n)))) +#define bytes2word(b0, b1, b2, b3) (((uint_32t)(b0) << 24) | \ + ((uint_32t)(b1) << 16) | \ + ((uint_32t)(b2) << 8) | (b3)) +#else +#define upr(x,n) (((uint_32t)(x) << (8 * (n))) | \ + ((uint_32t)(x) >> (32 - 8 * (n)))) +#define ups(x,n) ((uint_32t) (x) << (8 * (n))) +#define bval(x,n) ((uint_8t)((x) >> (8 * (n)))) +#define bytes2word(b0, b1, b2, b3) (((uint_32t)(b3) << 24) | \ + ((uint_32t)(b2) << 16) | \ + ((uint_32t)(b1) << 8) | (b0)) +#endif /* WORDS_BIGENDIAN */ + +#define word_in(x,c) bytes2word(((const uint_8t*)(x)+4*c)[0], \ + ((const uint_8t*)(x)+4*c)[1], \ + ((const uint_8t*)(x)+4*c)[2], \ + ((const uint_8t*)(x)+4*c)[3]) +#define word_out(x,c,v) { \ + ((uint_8t*)(x)+4*c)[0] = bval(v,0); \ + ((uint_8t*)(x)+4*c)[1] = bval(v,1); \ + ((uint_8t*)(x)+4*c)[2] = bval(v,2); \ + ((uint_8t*)(x)+4*c)[3] = bval(v,3); \ +} + +#define four_tables(x,tab,vf,rf,c) \ + ( tab[0][bval(vf(x,0,c),rf(0,c))] \ + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ + ^ tab[3][bval(vf(x,3,c),rf(3,c))]) + +#define vf1(x,r,c) (x) +#define rf1(r,c) (r) +#define rf2(r,c) ((8+r-c)&3) + +#define dec_fmvars uint_32t g2 +#define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) +#define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) +#define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) + +#define ff(x) inv_mcol(x) +#if defined( dec_imvars ) +#define d_vars dec_imvars +#endif + +#define sb_data(w) {\ + w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\ + w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\ + w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\ + w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\ + w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\ + w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\ + w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\ + w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\ + w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\ + w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\ + w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\ + w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\ + w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\ + w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\ + w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\ + w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\ + w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\ + w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\ + w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\ + w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\ + w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\ + w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\ + w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\ + w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\ + w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\ + w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\ + w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\ + w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\ + w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\ + w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\ + w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\ + w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) } + +#define isb_data(w) {\ + w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\ + w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\ + w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\ + w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\ + w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\ + w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\ + w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\ + w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\ + w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\ + w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\ + w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\ + w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\ + w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\ + w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\ + w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\ + w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\ + w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\ + w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\ + w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\ + w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\ + w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\ + w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\ + w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\ + w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\ + w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\ + w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\ + w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\ + w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\ + w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\ + w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\ + w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\ + w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) } + +#define mm_data(w) {\ + w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\ + w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\ + w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\ + w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\ + w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\ + w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\ + w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\ + w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\ + w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\ + w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\ + w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\ + w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\ + w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\ + w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\ + w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\ + w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\ + w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\ + w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\ + w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\ + w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\ + w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\ + w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\ + w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\ + w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\ + w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\ + w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\ + w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\ + w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\ + w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\ + w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\ + w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\ + w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) } + +#define rc_data(w) {\ + w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\ + w(0x1b), w(0x36) } + +#define h0(x) (x) + +#define w0(p) bytes2word(p, 0, 0, 0) +#define w1(p) bytes2word(0, p, 0, 0) +#define w2(p) bytes2word(0, 0, p, 0) +#define w3(p) bytes2word(0, 0, 0, p) + +#define u0(p) bytes2word(f2(p), p, p, f3(p)) +#define u1(p) bytes2word(f3(p), f2(p), p, p) +#define u2(p) bytes2word(p, f3(p), f2(p), p) +#define u3(p) bytes2word(p, p, f3(p), f2(p)) + +#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p)) +#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p)) +#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p)) +#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p)) + +#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) +#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) +#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \ + ^ (((x>>5) & 4) * WPOLY)) +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +#define t_dec(m,n) t_##m##n +#define t_set(m,n) t_##m##n +#define t_use(m,n) t_##m##n + +#if defined(_MSC_VER) && defined(TABLE_ALIGN) +#define ALIGN __declspec(align(TABLE_ALIGN)) +#else +#define ALIGN +#endif + +AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); +AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); +AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); +AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]); +AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]); +AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); +AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); +AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); +AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]); +AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]); + +#endif /* RIJNGDAEL_INTERNAL */ diff --git a/lib/silccrypt/silccipher.c b/lib/silccrypt/silccipher.c index c340e255..bf018c97 100644 --- a/lib/silccrypt/silccipher.c +++ b/lib/silccrypt/silccipher.c @@ -36,47 +36,47 @@ SilcDList silc_cipher_list = NULL; /* Static list of ciphers for silc_cipher_register_default(). */ const SilcCipherObject silc_default_ciphers[] = { - { "aes-256-cbc", silc_aes_set_key, silc_aes_set_key_with_string, + { "aes-256-cbc", silc_aes_set_key, silc_aes_encrypt_cbc, silc_aes_decrypt_cbc, silc_aes_context_len, 256, 16, 16 }, - { "aes-192-cbc", silc_aes_set_key, silc_aes_set_key_with_string, + { "aes-192-cbc", silc_aes_set_key, silc_aes_encrypt_cbc, silc_aes_decrypt_cbc, silc_aes_context_len, 192, 16, 16 }, - { "aes-128-cbc", silc_aes_set_key, silc_aes_set_key_with_string, + { "aes-128-cbc", silc_aes_set_key, silc_aes_encrypt_cbc, silc_aes_decrypt_cbc, silc_aes_context_len, 128, 16, 16 }, - { "twofish-256-cbc", silc_twofish_set_key, silc_twofish_set_key_with_string, + { "twofish-256-cbc", silc_twofish_set_key, silc_twofish_encrypt_cbc, silc_twofish_decrypt_cbc, silc_twofish_context_len, 256, 16, 16 }, - { "twofish-192-cbc", silc_twofish_set_key, silc_twofish_set_key_with_string, + { "twofish-192-cbc", silc_twofish_set_key, silc_twofish_encrypt_cbc, silc_twofish_decrypt_cbc, silc_twofish_context_len, 192, 16, 16 }, - { "twofish-128-cbc", silc_twofish_set_key, silc_twofish_set_key_with_string, + { "twofish-128-cbc", silc_twofish_set_key, silc_twofish_encrypt_cbc, silc_twofish_decrypt_cbc, silc_twofish_context_len, 128, 16, 16 }, - { "cast-256-cbc", silc_cast_set_key, silc_cast_set_key_with_string, + { "cast-256-cbc", silc_cast_set_key, silc_cast_encrypt_cbc, silc_cast_decrypt_cbc, silc_cast_context_len, 256, 16, 16 }, - { "cast-192-cbc", silc_cast_set_key, silc_cast_set_key_with_string, + { "cast-192-cbc", silc_cast_set_key, silc_cast_encrypt_cbc, silc_cast_decrypt_cbc, silc_cast_context_len, 192, 16, 16 }, - { "cast-128-cbc", silc_cast_set_key, silc_cast_set_key_with_string, + { "cast-128-cbc", silc_cast_set_key, silc_cast_encrypt_cbc, silc_cast_decrypt_cbc, silc_cast_context_len, 128, 16, 16 }, #ifdef SILC_DEBUG - { "none", silc_none_set_key, silc_none_set_key_with_string, + { "none", silc_none_set_key, silc_none_encrypt_cbc, silc_none_decrypt_cbc, silc_none_context_len, 0, 0, 0 }, #endif /* SILC_DEBUG */ - { NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0 } + { NULL, NULL, NULL, NULL, NULL, 0, 0, 0 } }; /* Register a new cipher into SILC. This is used at the initialization of @@ -107,7 +107,6 @@ SilcBool silc_cipher_register(const SilcCipherObject *cipher) new->block_len = cipher->block_len; new->iv_len = cipher->iv_len; new->set_key = cipher->set_key; - new->set_key_with_string = cipher->set_key_with_string; new->encrypt = cipher->encrypt; new->decrypt = cipher->decrypt; new->context_len = cipher->context_len; @@ -313,7 +312,7 @@ SilcBool silc_cipher_encrypt(SilcCipher cipher, const unsigned char *src, unsigned char *iv) { SILC_ASSERT((len & (cipher->cipher->block_len - 1)) == 0); - if (len & (cipher->cipher->block_len - 1)) + if (silc_unlikely(len & (cipher->cipher->block_len - 1))) return FALSE; return cipher->cipher->encrypt(cipher->context, src, dst, len, iv ? iv : cipher->iv); @@ -325,7 +324,7 @@ SilcBool silc_cipher_decrypt(SilcCipher cipher, const unsigned char *src, unsigned char *dst, SilcUInt32 len, unsigned char *iv) { - if (len & (cipher->cipher->block_len - 1)) + if (silc_unlikely(len & (cipher->cipher->block_len - 1))) return FALSE; return cipher->cipher->decrypt(cipher->context, src, dst, len, iv ? iv : cipher->iv); diff --git a/lib/silccrypt/silccipher.h b/lib/silccrypt/silccipher.h index f571b03a..51e389f7 100644 --- a/lib/silccrypt/silccipher.h +++ b/lib/silccrypt/silccipher.h @@ -50,7 +50,6 @@ typedef struct SilcCipherStruct *SilcCipher; typedef struct { char *name; SilcBool (*set_key)(void *, const unsigned char *, SilcUInt32); - SilcBool (*set_key_with_string)(void *, const unsigned char *, SilcUInt32); SilcBool (*encrypt)(void *, const unsigned char *, unsigned char *, SilcUInt32, unsigned char *); SilcBool (*decrypt)(void *, const unsigned char *, unsigned char *, @@ -81,7 +80,6 @@ extern DLLAPI const SilcCipherObject silc_default_ciphers[]; of the module. All SILC Crypto API compliant modules must support these function names (use macros below to assure this). */ #define SILC_CIPHER_SIM_SET_KEY "set_key" -#define SILC_CIPHER_SIM_SET_KEY_WITH_STRING "set_key_with_string" #define SILC_CIPHER_SIM_ENCRYPT_CBC "encrypt_cbc" #define SILC_CIPHER_SIM_DECRYPT_CBC "decrypt_cbc" #define SILC_CIPHER_SIM_CONTEXT_LEN "context_len" @@ -92,10 +90,6 @@ extern DLLAPI const SilcCipherObject silc_default_ciphers[]; SilcBool silc_##cipher##_set_key(void *context, \ const unsigned char *key, \ SilcUInt32 keylen) -#define SILC_CIPHER_API_SET_KEY_WITH_STRING(cipher) \ -SilcBool silc_##cipher##_set_key_with_string(void *context, \ - const unsigned char *string, \ - SilcUInt32 stringlen) #define SILC_CIPHER_API_ENCRYPT_CBC(cipher) \ SilcBool silc_##cipher##_encrypt_cbc(void *context, \ const unsigned char *src, \ diff --git a/lib/silccrypt/twofish.c b/lib/silccrypt/twofish.c index ebfc19a4..e388a951 100644 --- a/lib/silccrypt/twofish.c +++ b/lib/silccrypt/twofish.c @@ -43,7 +43,7 @@ Mean: 378 cycles = 67.8 mbits/sec #include "twofish_internal.h" #include "twofish.h" -/* +/* * SILC Crypto API for Twofish */ @@ -59,14 +59,6 @@ SILC_CIPHER_API_SET_KEY(twofish) return TRUE; } -/* Sets the string as a new key for the cipher. The string is first - hashed and then used as a new key. */ - -SILC_CIPHER_API_SET_KEY_WITH_STRING(twofish) -{ - return FALSE; -} - /* Returns the size of the cipher context. */ SILC_CIPHER_API_CONTEXT_LEN(twofish) @@ -118,9 +110,9 @@ SILC_CIPHER_API_DECRYPT_CBC(twofish) twofish_decrypt((TwofishContext *)context, tmp, tmp2); SILC_CBC_DEC_POST(tmp2, dst, src, tmp, tiv); } - + SILC_CBC_PUT_IV(tiv, iv); - + return TRUE; } @@ -146,26 +138,26 @@ u1byte tab_ef[4] = { 0, (G_M >> 1) ^ (G_M >> 2), G_M >> 1, G_M >> 2 }; u1byte ror4[16] = { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 }; u1byte ashx[16] = { 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, 5, 14, 7 }; -u1byte qt0[2][16] = +u1byte qt0[2][16] = { { 8, 1, 7, 13, 6, 15, 3, 2, 0, 11, 5, 9, 14, 12, 10, 4 }, { 2, 8, 11, 13, 15, 7, 6, 14, 3, 1, 9, 4, 0, 10, 12, 5 } }; u1byte qt1[2][16] = -{ { 14, 12, 11, 8, 1, 2, 3, 5, 15, 4, 10, 6, 7, 0, 9, 13 }, +{ { 14, 12, 11, 8, 1, 2, 3, 5, 15, 4, 10, 6, 7, 0, 9, 13 }, { 1, 14, 2, 11, 4, 12, 3, 7, 6, 13, 10, 5, 15, 9, 0, 8 } }; -u1byte qt2[2][16] = +u1byte qt2[2][16] = { { 11, 10, 5, 14, 6, 13, 9, 0, 12, 8, 15, 3, 2, 4, 7, 1 }, { 4, 12, 7, 5, 1, 6, 9, 10, 0, 14, 13, 8, 2, 11, 3, 15 } }; -u1byte qt3[2][16] = +u1byte qt3[2][16] = { { 13, 7, 15, 4, 1, 2, 6, 14, 9, 11, 3, 0, 8, 5, 12, 10 }, { 11, 9, 5, 1, 12, 3, 13, 14, 6, 4, 7, 15, 2, 0, 8, 10 } }; - + u1byte qp(const u4byte n, const u1byte x) { u1byte a0, a1, a2, a3, a4, b0, b1, b2, b3, b4; @@ -188,7 +180,7 @@ void gen_qtab(void) { u4byte i; for(i = 0; i < 256; ++i) - { + { q(0,i) = qp(0, (u1byte)i); q(1,i) = qp(1, (u1byte)i); } @@ -207,7 +199,7 @@ u4byte m_tab[4][256]; void gen_mtab(void) { u4byte i, f01, f5b, fef; - + for(i = 0; i < 256; ++i) { f01 = q(1,i); f5b = ffm_5b(f01); fef = ffm_ef(f01); @@ -335,12 +327,12 @@ void gen_mk_tab(TwofishContext *ctx, u4byte key[]) mk_tab[0][i] = mds(0, q20(by)); mk_tab[1][i] = mds(1, q21(by)); mk_tab[2][i] = mds(2, q22(by)); mk_tab[3][i] = mds(3, q23(by)); #else - sb[0][i] = q20(by); sb[1][i] = q21(by); + sb[0][i] = q20(by); sb[1][i] = q21(by); sb[2][i] = q22(by); sb[3][i] = q23(by); #endif } break; - + case 3: for(i = 0; i < 256; ++i) { by = (u1byte)i; @@ -348,12 +340,12 @@ void gen_mk_tab(TwofishContext *ctx, u4byte key[]) mk_tab[0][i] = mds(0, q30(by)); mk_tab[1][i] = mds(1, q31(by)); mk_tab[2][i] = mds(2, q32(by)); mk_tab[3][i] = mds(3, q33(by)); #else - sb[0][i] = q30(by); sb[1][i] = q31(by); + sb[0][i] = q30(by); sb[1][i] = q31(by); sb[2][i] = q32(by); sb[3][i] = q33(by); #endif } break; - + case 4: for(i = 0; i < 256; ++i) { by = (u1byte)i; @@ -361,7 +353,7 @@ void gen_mk_tab(TwofishContext *ctx, u4byte key[]) mk_tab[0][i] = mds(0, q40(by)); mk_tab[1][i] = mds(1, q41(by)); mk_tab[2][i] = mds(2, q42(by)); mk_tab[3][i] = mds(3, q43(by)); #else - sb[0][i] = q40(by); sb[1][i] = q41(by); + sb[0][i] = q40(by); sb[1][i] = q41(by); sb[2][i] = q42(by); sb[3][i] = q43(by); #endif } @@ -394,22 +386,22 @@ void gen_mk_tab(TwofishContext *ctx, u4byte key[]) where the coefficients are in the finite field GF(2^8) with a modular polynomial a^8 + a^6 + a^3 + a^2 + 1. To generate the remainder we have to start with a 12th order polynomial with our -eight input bytes as the coefficients of the 4th to 11th terms. +eight input bytes as the coefficients of the 4th to 11th terms. That is: m[7] * x^11 + m[6] * x^10 ... + m[0] * x^4 + 0 * x^3 +... + 0 - + We then multiply the generator polynomial by m[7] * x^7 and subtract -it - xor in GF(2^8) - from the above to eliminate the x^7 term (the -artihmetic on the coefficients is done in GF(2^8). We then multiply +it - xor in GF(2^8) - from the above to eliminate the x^7 term (the +artihmetic on the coefficients is done in GF(2^8). We then multiply the generator polynomial by x^6 * coeff(x^10) and use this to remove the x^10 term. We carry on in this way until the x^4 term is removed so that we are left with: r[3] * x^3 + r[2] * x^2 + r[1] 8 x^1 + r[0] -which give the resulting 4 bytes of the remainder. This is equivalent -to the matrix multiplication in the Twofish description but much faster +which give the resulting 4 bytes of the remainder. This is equivalent +to the matrix multiplication in the Twofish description but much faster to implement. */ @@ -422,23 +414,23 @@ u4byte mds_rem(u4byte p0, u4byte p1) for(i = 0; i < 8; ++i) { t = p1 >> 24; /* get most significant coefficient */ - + p1 = (p1 << 8) | (p0 >> 24); p0 <<= 8; /* shift others up */ - + /* multiply t by a (the primitive element - i.e. left shift) */ - u = (t << 1); - + u = (t << 1); + if(t & 0x80) /* subtract modular polynomial on overflow */ - - u ^= G_MOD; + + u ^= G_MOD; p1 ^= t ^ (u << 16); /* remove t * (a * x^2 + 1) */ u ^= (t >> 1); /* form u = a * t + t / a = t * (a + 1 / a); */ - + if(t & 0x01) /* add the modular polynomial on underflow */ - + u ^= G_MOD >> 1; p1 ^= (u << 24) | (u << 8); /* remove t * (a + 1/a) * (x^3 + x) */ @@ -451,11 +443,11 @@ u4byte mds_rem(u4byte p0, u4byte p1) u4byte *twofish_set_key(TwofishContext *ctx, const u4byte in_key[], const u4byte key_len) -{ +{ u4byte i, a, b, me_key[4], mo_key[4]; u4byte *l_key = ctx->l_key; u4byte *s_key = ctx->s_key; - + #ifdef Q_TABLES if(!qt_gen) { @@ -507,7 +499,7 @@ u4byte *twofish_set_key(TwofishContext *ctx, void twofish_encrypt(TwofishContext *ctx, const u4byte in_blk[4], u4byte out_blk[]) -{ +{ u4byte t0, t1, blk[4]; u4byte *l_key = ctx->l_key; u4byte *s_key = ctx->s_key; @@ -523,7 +515,7 @@ void twofish_encrypt(TwofishContext *ctx, out_blk[0] = blk[2] ^ l_key[4]; out_blk[1] = blk[3] ^ l_key[5]; out_blk[2] = blk[0] ^ l_key[6]; - out_blk[3] = blk[1] ^ l_key[7]; + out_blk[3] = blk[1] ^ l_key[7]; }; /* decrypt a block of text */ @@ -538,7 +530,7 @@ void twofish_encrypt(TwofishContext *ctx, void twofish_decrypt(TwofishContext *ctx, const u4byte in_blk[4], u4byte out_blk[4]) -{ +{ u4byte t0, t1, blk[4]; u4byte *l_key = ctx->l_key; u4byte *s_key = ctx->s_key; @@ -554,5 +546,5 @@ void twofish_decrypt(TwofishContext *ctx, out_blk[0] = blk[2] ^ l_key[0]; out_blk[1] = blk[3] ^ l_key[1]; out_blk[2] = blk[0] ^ l_key[2]; - out_blk[3] = blk[1] ^ l_key[3]; + out_blk[3] = blk[1] ^ l_key[3]; }; diff --git a/lib/silccrypt/twofish.h b/lib/silccrypt/twofish.h index 03269467..2f67d7ab 100644 --- a/lib/silccrypt/twofish.h +++ b/lib/silccrypt/twofish.h @@ -9,7 +9,7 @@ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -20,12 +20,11 @@ #ifndef TWOFISH_H #define TWOFISH_H -/* +/* * SILC Crypto API for Twofish */ SILC_CIPHER_API_SET_KEY(twofish); -SILC_CIPHER_API_SET_KEY_WITH_STRING(twofish); SILC_CIPHER_API_CONTEXT_LEN(twofish); SILC_CIPHER_API_ENCRYPT_CBC(twofish); SILC_CIPHER_API_DECRYPT_CBC(twofish); -- 2.24.0