1 /* Modified for SILC. -Pekka */
3 /* This is an independent implementation of the encryption algorithm: */
5 /* Twofish by Bruce Schneier and colleagues */
7 /* which is a candidate algorithm in the Advanced Encryption Standard */
8 /* programme of the US National Institute of Standards and Technology. */
10 /* Copyright in this implementation is held by Dr B R Gladman but I */
11 /* hereby give permission for its free direct or derivative use subject */
12 /* to acknowledgment of its origin and compliance with any conditions */
13 /* that the originators of t he algorithm place on its exploitation. */
15 /* My thanks to Doug Whiting and Niels Ferguson for comments that led */
16 /* to improvements in this implementation. */
18 /* Dr Brian Gladman (gladman@seven77.demon.co.uk) 14th January 1999 */
20 /* Timing data for Twofish (twofish.c)
23 Key Setup: 8414 cycles
24 Encrypt: 376 cycles = 68.1 mbits/sec
25 Decrypt: 374 cycles = 68.4 mbits/sec
26 Mean: 375 cycles = 68.3 mbits/sec
29 Key Setup: 11628 cycles
30 Encrypt: 376 cycles = 68.1 mbits/sec
31 Decrypt: 374 cycles = 68.4 mbits/sec
32 Mean: 375 cycles = 68.3 mbits/sec
35 Key Setup: 15457 cycles
36 Encrypt: 381 cycles = 67.2 mbits/sec
37 Decrypt: 374 cycles = 68.4 mbits/sec
38 Mean: 378 cycles = 67.8 mbits/sec
43 #include "twofish_internal.h"
47 * SILC Crypto API for Twofish
50 /* Sets the key for the cipher. */
52 SILC_CIPHER_API_SET_KEY(twofish)
56 SILC_GET_WORD_KEY(key, k, keylen);
57 twofish_set_key((TwofishContext *)context, k, keylen);
62 /* Sets IV for the cipher. */
64 SILC_CIPHER_API_SET_IV(twofish)
66 TwofishContext *twofish = context;
68 switch (cipher->mode) {
70 case SILC_CIPHER_MODE_CTR:
71 /* Starts new block. */
75 case SILC_CIPHER_MODE_CFB:
76 /* Starts new block. */
85 /* Returns the size of the cipher context. */
87 SILC_CIPHER_API_CONTEXT_LEN(twofish)
89 return sizeof(TwofishContext);
92 /* Encrypts with the cipher. Source and destination buffers maybe one
95 SILC_CIPHER_API_ENCRYPT(twofish)
97 TwofishContext *twofish = context;
98 SilcUInt32 tmp[4], ctr[4];
101 switch (cipher->mode) {
103 case SILC_CIPHER_MODE_CBC:
104 SILC_CBC_ENC_LSB_128_32(len, iv, tmp, src, dst, i,
105 twofish_encrypt(twofish, tmp, tmp));
108 case SILC_CIPHER_MODE_CTR:
109 SILC_CTR_LSB_128_32(iv, ctr, tmp, twofish->padlen, src, dst,
110 twofish_encrypt(twofish, tmp, tmp));
113 case SILC_CIPHER_MODE_CFB:
114 SILC_CFB_ENC_LSB_128_32(iv, tmp, twofish->padlen, src, dst,
115 twofish_encrypt(twofish, tmp, tmp));
125 /* Decrypts with the cipher. Source and destination buffers maybe one
128 SILC_CIPHER_API_DECRYPT(twofish)
130 TwofishContext *twofish = context;
131 SilcUInt32 tmp[4], tmp2[4], tiv[4];
134 switch (cipher->mode) {
136 case SILC_CIPHER_MODE_CBC:
137 SILC_CBC_DEC_LSB_128_32(len, iv, tiv, tmp, tmp2, src, dst, i,
138 twofish_decrypt(twofish, tmp, tmp2));
140 case SILC_CIPHER_MODE_CTR:
141 return silc_twofish_encrypt(cipher, context, src, dst, len, iv);
144 case SILC_CIPHER_MODE_CFB:
145 SILC_CFB_DEC_LSB_128_32(iv, tmp, twofish->padlen, src, dst,
146 twofish_encrypt(twofish, tmp, tmp));
163 /* finite field arithmetic for GF(2**8) with the modular */
164 /* polynomial x^8 + x^6 + x^5 + x^3 + 1 (0x169) */
168 u1byte tab_5b[4] = { 0, G_M >> 2, G_M >> 1, (G_M >> 1) ^ (G_M >> 2) };
169 u1byte tab_ef[4] = { 0, (G_M >> 1) ^ (G_M >> 2), G_M >> 1, G_M >> 2 };
171 #define ffm_01(x) (x)
172 #define ffm_5b(x) ((x) ^ ((x) >> 2) ^ tab_5b[(x) & 3])
173 #define ffm_ef(x) ((x) ^ ((x) >> 1) ^ ((x) >> 2) ^ tab_ef[(x) & 3])
175 u1byte ror4[16] = { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
176 u1byte ashx[16] = { 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, 5, 14, 7 };
179 { { 8, 1, 7, 13, 6, 15, 3, 2, 0, 11, 5, 9, 14, 12, 10, 4 },
180 { 2, 8, 11, 13, 15, 7, 6, 14, 3, 1, 9, 4, 0, 10, 12, 5 }
184 { { 14, 12, 11, 8, 1, 2, 3, 5, 15, 4, 10, 6, 7, 0, 9, 13 },
185 { 1, 14, 2, 11, 4, 12, 3, 7, 6, 13, 10, 5, 15, 9, 0, 8 }
189 { { 11, 10, 5, 14, 6, 13, 9, 0, 12, 8, 15, 3, 2, 4, 7, 1 },
190 { 4, 12, 7, 5, 1, 6, 9, 10, 0, 14, 13, 8, 2, 11, 3, 15 }
194 { { 13, 7, 15, 4, 1, 2, 6, 14, 9, 11, 3, 0, 8, 5, 12, 10 },
195 { 11, 9, 5, 1, 12, 3, 13, 14, 6, 4, 7, 15, 2, 0, 8, 10 }
198 u1byte qp(const u4byte n, const u1byte x)
199 { u1byte a0, a1, a2, a3, a4, b0, b1, b2, b3, b4;
201 a0 = x >> 4; b0 = x & 15;
202 a1 = a0 ^ b0; b1 = ror4[b0] ^ ashx[a0];
203 a2 = qt0[n][a1]; b2 = qt1[n][b1];
204 a3 = a2 ^ b2; b3 = ror4[b2] ^ ashx[a2];
205 a4 = qt2[n][a3]; b4 = qt3[n][b3];
206 return (b4 << 4) | a4;
212 u1byte q_tab[2][256];
214 #define q(n,x) q_tab[n][x]
219 for(i = 0; i < 256; ++i)
221 q(0,i) = qp(0, (u1byte)i);
222 q(1,i) = qp(1, (u1byte)i);
228 #define q(n,x) qp(n, x)
235 u4byte m_tab[4][256];
238 { u4byte i, f01, f5b, fef;
240 for(i = 0; i < 256; ++i)
242 f01 = q(1,i); f5b = ffm_5b(f01); fef = ffm_ef(f01);
243 m_tab[0][i] = f01 + (f5b << 8) + (fef << 16) + (fef << 24);
244 m_tab[2][i] = f5b + (fef << 8) + (f01 << 16) + (fef << 24);
246 f01 = q(0,i); f5b = ffm_5b(f01); fef = ffm_ef(f01);
247 m_tab[1][i] = fef + (fef << 8) + (f5b << 16) + (f01 << 24);
248 m_tab[3][i] = f5b + (f01 << 8) + (fef << 16) + (f5b << 24);
252 #define mds(n,x) m_tab[n][x]
260 #define q_0(x) q(1,x)
266 #define q_1(x) q(0,x)
272 #define q_2(x) q(1,x)
278 #define q_3(x) q(0,x)
280 #define f_0(n,x) ((u4byte)fm_0##n(x))
281 #define f_1(n,x) ((u4byte)fm_1##n(x) << 8)
282 #define f_2(n,x) ((u4byte)fm_2##n(x) << 16)
283 #define f_3(n,x) ((u4byte)fm_3##n(x) << 24)
285 #define mds(n,x) f_0(n,q_##n(x)) ^ f_1(n,q_##n(x)) ^ f_2(n,q_##n(x)) ^ f_3(n,q_##n(x))
289 u4byte h_fun(TwofishContext *ctx, const u4byte x, const u4byte key[])
290 { u4byte b0, b1, b2, b3;
293 u4byte m5b_b0, m5b_b1, m5b_b2, m5b_b3;
294 u4byte mef_b0, mef_b1, mef_b2, mef_b3;
297 b0 = byte(x, 0); b1 = byte(x, 1); b2 = byte(x, 2); b3 = byte(x, 3);
301 case 4: b0 = q(1, b0) ^ byte(key[3],0);
302 b1 = q(0, b1) ^ byte(key[3],1);
303 b2 = q(0, b2) ^ byte(key[3],2);
304 b3 = q(1, b3) ^ byte(key[3],3);
305 case 3: b0 = q(1, b0) ^ byte(key[2],0);
306 b1 = q(1, b1) ^ byte(key[2],1);
307 b2 = q(0, b2) ^ byte(key[2],2);
308 b3 = q(0, b3) ^ byte(key[2],3);
309 case 2: b0 = q(0,q(0,b0) ^ byte(key[1],0)) ^ byte(key[0],0);
310 b1 = q(0,q(1,b1) ^ byte(key[1],1)) ^ byte(key[0],1);
311 b2 = q(1,q(0,b2) ^ byte(key[1],2)) ^ byte(key[0],2);
312 b3 = q(1,q(1,b3) ^ byte(key[1],3)) ^ byte(key[0],3);
316 return mds(0, b0) ^ mds(1, b1) ^ mds(2, b2) ^ mds(3, b3);
320 b0 = q(1, b0); b1 = q(0, b1); b2 = q(1, b2); b3 = q(0, b3);
321 m5b_b0 = ffm_5b(b0); m5b_b1 = ffm_5b(b1); m5b_b2 = ffm_5b(b2); m5b_b3 = ffm_5b(b3);
322 mef_b0 = ffm_ef(b0); mef_b1 = ffm_ef(b1); mef_b2 = ffm_ef(b2); mef_b3 = ffm_ef(b3);
323 b0 ^= mef_b1 ^ m5b_b2 ^ m5b_b3; b3 ^= m5b_b0 ^ mef_b1 ^ mef_b2;
324 b2 ^= mef_b0 ^ m5b_b1 ^ mef_b3; b1 ^= mef_b0 ^ mef_b2 ^ m5b_b3;
326 return b0 | (b3 << 8) | (b2 << 16) | (b1 << 24);
334 u4byte mk_tab[4][256];
339 #define q20(x) q(0,q(0,x) ^ byte(key[1],0)) ^ byte(key[0],0)
340 #define q21(x) q(0,q(1,x) ^ byte(key[1],1)) ^ byte(key[0],1)
341 #define q22(x) q(1,q(0,x) ^ byte(key[1],2)) ^ byte(key[0],2)
342 #define q23(x) q(1,q(1,x) ^ byte(key[1],3)) ^ byte(key[0],3)
344 #define q30(x) q(0,q(0,q(1, x) ^ byte(key[2],0)) ^ byte(key[1],0)) ^ byte(key[0],0)
345 #define q31(x) q(0,q(1,q(1, x) ^ byte(key[2],1)) ^ byte(key[1],1)) ^ byte(key[0],1)
346 #define q32(x) q(1,q(0,q(0, x) ^ byte(key[2],2)) ^ byte(key[1],2)) ^ byte(key[0],2)
347 #define q33(x) q(1,q(1,q(0, x) ^ byte(key[2],3)) ^ byte(key[1],3)) ^ byte(key[0],3)
349 #define q40(x) q(0,q(0,q(1, q(1, x) ^ byte(key[3],0)) ^ byte(key[2],0)) ^ byte(key[1],0)) ^ byte(key[0],0)
350 #define q41(x) q(0,q(1,q(1, q(0, x) ^ byte(key[3],1)) ^ byte(key[2],1)) ^ byte(key[1],1)) ^ byte(key[0],1)
351 #define q42(x) q(1,q(0,q(0, q(0, x) ^ byte(key[3],2)) ^ byte(key[2],2)) ^ byte(key[1],2)) ^ byte(key[0],2)
352 #define q43(x) q(1,q(1,q(0, q(1, x) ^ byte(key[3],3)) ^ byte(key[2],3)) ^ byte(key[1],3)) ^ byte(key[0],3)
354 void gen_mk_tab(TwofishContext *ctx, u4byte key[])
360 case 2: for(i = 0; i < 256; ++i)
364 mk_tab[0][i] = mds(0, q20(by)); mk_tab[1][i] = mds(1, q21(by));
365 mk_tab[2][i] = mds(2, q22(by)); mk_tab[3][i] = mds(3, q23(by));
367 sb[0][i] = q20(by); sb[1][i] = q21(by);
368 sb[2][i] = q22(by); sb[3][i] = q23(by);
373 case 3: for(i = 0; i < 256; ++i)
377 mk_tab[0][i] = mds(0, q30(by)); mk_tab[1][i] = mds(1, q31(by));
378 mk_tab[2][i] = mds(2, q32(by)); mk_tab[3][i] = mds(3, q33(by));
380 sb[0][i] = q30(by); sb[1][i] = q31(by);
381 sb[2][i] = q32(by); sb[3][i] = q33(by);
386 case 4: for(i = 0; i < 256; ++i)
390 mk_tab[0][i] = mds(0, q40(by)); mk_tab[1][i] = mds(1, q41(by));
391 mk_tab[2][i] = mds(2, q42(by)); mk_tab[3][i] = mds(3, q43(by));
393 sb[0][i] = q40(by); sb[1][i] = q41(by);
394 sb[2][i] = q42(by); sb[3][i] = q43(by);
401 # define g0_fun(x) ( mk_tab[0][byte(x,0)] ^ mk_tab[1][byte(x,1)] \
402 ^ mk_tab[2][byte(x,2)] ^ mk_tab[3][byte(x,3)] )
403 # define g1_fun(x) ( mk_tab[0][byte(x,3)] ^ mk_tab[1][byte(x,0)] \
404 ^ mk_tab[2][byte(x,1)] ^ mk_tab[3][byte(x,2)] )
406 # define g0_fun(x) ( mds(0, sb[0][byte(x,0)]) ^ mds(1, sb[1][byte(x,1)]) \
407 ^ mds(2, sb[2][byte(x,2)]) ^ mds(3, sb[3][byte(x,3)]) )
408 # define g1_fun(x) ( mds(0, sb[0][byte(x,3)]) ^ mds(1, sb[1][byte(x,0)]) \
409 ^ mds(2, sb[2][byte(x,1)]) ^ mds(3, sb[3][byte(x,2)]) )
414 #define g0_fun(x) h_fun(ctx,x,s_key)
415 #define g1_fun(x) h_fun(ctx,rotl(x,8),s_key)
419 /* The (12,8) Reed Soloman code has the generator polynomial
421 g(x) = x^4 + (a + 1/a) * x^3 + a * x^2 + (a + 1/a) * x + 1
423 where the coefficients are in the finite field GF(2^8) with a
424 modular polynomial a^8 + a^6 + a^3 + a^2 + 1. To generate the
425 remainder we have to start with a 12th order polynomial with our
426 eight input bytes as the coefficients of the 4th to 11th terms.
429 m[7] * x^11 + m[6] * x^10 ... + m[0] * x^4 + 0 * x^3 +... + 0
431 We then multiply the generator polynomial by m[7] * x^7 and subtract
432 it - xor in GF(2^8) - from the above to eliminate the x^7 term (the
433 artihmetic on the coefficients is done in GF(2^8). We then multiply
434 the generator polynomial by x^6 * coeff(x^10) and use this to remove
435 the x^10 term. We carry on in this way until the x^4 term is removed
436 so that we are left with:
438 r[3] * x^3 + r[2] * x^2 + r[1] 8 x^1 + r[0]
440 which give the resulting 4 bytes of the remainder. This is equivalent
441 to the matrix multiplication in the Twofish description but much faster
446 #define G_MOD 0x0000014d
448 u4byte mds_rem(u4byte p0, u4byte p1)
451 for(i = 0; i < 8; ++i)
453 t = p1 >> 24; /* get most significant coefficient */
455 p1 = (p1 << 8) | (p0 >> 24); p0 <<= 8; /* shift others up */
457 /* multiply t by a (the primitive element - i.e. left shift) */
461 if(t & 0x80) /* subtract modular polynomial on overflow */
465 p1 ^= t ^ (u << 16); /* remove t * (a * x^2 + 1) */
467 u ^= (t >> 1); /* form u = a * t + t / a = t * (a + 1 / a); */
469 if(t & 0x01) /* add the modular polynomial on underflow */
473 p1 ^= (u << 24) | (u << 8); /* remove t * (a + 1/a) * (x^3 + x) */
479 /* initialise the key schedule from the user supplied key */
481 u4byte *twofish_set_key(TwofishContext *ctx,
482 const u4byte in_key[], const u4byte key_len)
484 u4byte i, a, b, me_key[4], mo_key[4];
485 u4byte *l_key = ctx->l_key;
486 u4byte *s_key = ctx->s_key;
491 gen_qtab(); qt_gen = 1;
498 gen_mtab(); mt_gen = 1;
502 ctx->k_len = ctx->k_len = key_len / 64; /* 2, 3 or 4 */
504 for(i = 0; i < ctx->k_len; ++i)
506 a = in_key[i + i]; me_key[i] = a;
507 b = in_key[i + i + 1]; mo_key[i] = b;
508 s_key[ctx->k_len - i - 1] = mds_rem(a, b);
511 for(i = 0; i < 40; i += 2)
513 a = 0x01010101 * i; b = a + 0x01010101;
514 a = h_fun(ctx,a, me_key);
515 b = rotl(h_fun(ctx,b, mo_key), 8);
517 l_key[i + 1] = rotl(a + 2 * b, 9);
521 gen_mk_tab(ctx,s_key);
527 /* encrypt a block of text */
530 t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); \
531 blk[2] = rotr(blk[2] ^ (t0 + t1 + l_key[4 * (i) + 8]), 1); \
532 blk[3] = rotl(blk[3], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]); \
533 t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); \
534 blk[0] = rotr(blk[0] ^ (t0 + t1 + l_key[4 * (i) + 10]), 1); \
535 blk[1] = rotl(blk[1], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 11])
537 void twofish_encrypt(TwofishContext *ctx,
538 const u4byte in_blk[4], u4byte out_blk[])
540 u4byte t0, t1, blk[4];
541 u4byte *l_key = ctx->l_key;
542 u4byte *s_key = ctx->s_key;
544 blk[0] = in_blk[0] ^ l_key[0];
545 blk[1] = in_blk[1] ^ l_key[1];
546 blk[2] = in_blk[2] ^ l_key[2];
547 blk[3] = in_blk[3] ^ l_key[3];
549 f_rnd(0); f_rnd(1); f_rnd(2); f_rnd(3);
550 f_rnd(4); f_rnd(5); f_rnd(6); f_rnd(7);
552 out_blk[0] = blk[2] ^ l_key[4];
553 out_blk[1] = blk[3] ^ l_key[5];
554 out_blk[2] = blk[0] ^ l_key[6];
555 out_blk[3] = blk[1] ^ l_key[7];
558 /* decrypt a block of text */
561 t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); \
562 blk[2] = rotl(blk[2], 1) ^ (t0 + t1 + l_key[4 * (i) + 10]); \
563 blk[3] = rotr(blk[3] ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]), 1); \
564 t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); \
565 blk[0] = rotl(blk[0], 1) ^ (t0 + t1 + l_key[4 * (i) + 8]); \
566 blk[1] = rotr(blk[1] ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]), 1)
568 void twofish_decrypt(TwofishContext *ctx,
569 const u4byte in_blk[4], u4byte out_blk[4])
571 u4byte t0, t1, blk[4];
572 u4byte *l_key = ctx->l_key;
573 u4byte *s_key = ctx->s_key;
575 blk[0] = in_blk[0] ^ l_key[4];
576 blk[1] = in_blk[1] ^ l_key[5];
577 blk[2] = in_blk[2] ^ l_key[6];
578 blk[3] = in_blk[3] ^ l_key[7];
580 i_rnd(7); i_rnd(6); i_rnd(5); i_rnd(4);
581 i_rnd(3); i_rnd(2); i_rnd(1); i_rnd(0);
583 out_blk[0] = blk[2] ^ l_key[0];
584 out_blk[1] = blk[3] ^ l_key[1];
585 out_blk[2] = blk[0] ^ l_key[2];
586 out_blk[3] = blk[1] ^ l_key[3];