1 /* Modified for SILC. -Pekka */
\r
3 /* This is an independent implementation of the encryption algorithm: */
\r
5 /* DFC designed by a team at CNRS and France Telecom */
\r
7 /* which is a candidate algorithm in the Advanced Encryption Standard */
\r
8 /* programme of the US National Institute of Standards and Technology. */
\r
10 /* Copyright in this implementation is held by Dr B R Gladman but I */
\r
11 /* hereby give permission for its free direct or derivative use subject */
\r
12 /* to acknowledgment of its origin and compliance with any conditions */
\r
13 /* that the originators of the algorithm place on its exploitation. */
\r
15 /* Dr Brian Gladman (gladman@seven77.demon.co.uk) 14th January 1999 */
\r
17 /* My thanks go to Serge Vaudenay of the Ecole Normale Superieure */
\r
18 /* for providing test vectors. This implementation has also been */
\r
19 /* tested with an independent implementation by Dr Russell Bradford */
\r
20 /* (Department of Mathematical Sciences, University of Bath, Bath, */
\r
21 /* UK) and checks out. My thanks go to Russell for his help in */
\r
22 /* comparing our implementations and finding bugs (and for help in */
\r
23 /* resolving 'endian' issues before test vectors became available). */
\r
25 /* Timing data for DFC (dfc.c)
\r
27 Core timing without I/O endian conversion:
\r
29 Algorithm: dfc (dfc.c)
\r
32 Key Setup: 5222 cycles
\r
33 Encrypt: 1203 cycles = 21.3 mbits/sec
\r
34 Decrypt: 1244 cycles = 20.6 mbits/sec
\r
35 Mean: 1224 cycles = 20.9 mbits/sec
\r
38 Key Setup: 5203 cycles
\r
39 Encrypt: 1288 cycles = 19.9 mbits/sec
\r
40 Decrypt: 1235 cycles = 20.7 mbits/sec
\r
41 Mean: 1262 cycles = 20.3 mbits/sec
\r
44 Key Setup: 5177 cycles
\r
45 Encrypt: 1178 cycles = 21.7 mbits/sec
\r
46 Decrypt: 1226 cycles = 20.9 mbits/sec
\r
47 Mean: 1202 cycles = 21.3 mbits/sec
\r
49 Full timing with I/O endian conversion:
\r
52 Key Setup: 5227 cycles
\r
53 Encrypt: 1247 cycles = 20.5 mbits/sec
\r
54 Decrypt: 1222 cycles = 20.9 mbits/sec
\r
55 Mean: 1235 cycles = 20.7 mbits/sec
\r
58 Key Setup: 5252 cycles
\r
59 Encrypt: 1215 cycles = 21.1 mbits/sec
\r
60 Decrypt: 1265 cycles = 20.2 mbits/sec
\r
61 Mean: 1240 cycles = 20.6 mbits/sec
\r
64 Key Setup: 5174 cycles
\r
65 Encrypt: 1247 cycles = 20.5 mbits/sec
\r
66 Decrypt: 1206 cycles = 21.2 mbits/sec
\r
67 Mean: 1227 cycles = 20.9 mbits/sec
\r
71 /* The EES string is as follows (the abstract contains an error in
\r
72 the last line of this sequence which changes KC and KD):
\r
74 0xb7e15162, 0x8aed2a6a, 0xbf715880, 0x9cf4f3c7,
\r
75 0x62e7160f, 0x38b4da56, 0xa784d904, 0x5190cfef,
\r
76 0x324e7738, 0x926cfbe5, 0xf4bf8d8d, 0x8c31d763,
\r
77 0xda06c80a, 0xbb1185eb, 0x4f7c7b57, 0x57f59584,
\r
79 0x90cfd47d, 0x7c19bb42, 0x158d9554, 0xf7b46bce,
\r
80 0xd55c4d79, 0xfd5f24d6, 0x613c31c3, 0x839a2ddf,
\r
81 0x8a9a276b, 0xcfbfa1c8, 0x77c56284, 0xdab79cd4,
\r
82 0xc2b3293d, 0x20e9e5ea, 0xf02ac60a, 0xcc93ed87,
\r
84 0x4422a52e, 0xcb238fee, 0xe5ab6add, 0x835fd1a0,
\r
85 0x753d0a8f, 0x78e537d2, 0xb95bb79d, 0x8dcaec64,
\r
86 0x2c1e9f23, 0xb829b5c2, 0x780bf387, 0x37df8bb3,
\r
87 0x00d01334, 0xa0d0bd86, 0x45cbfa73, 0xa6160ffe,
\r
89 0x393c48cb, 0xbbca060f, 0x0ff8ec6d, 0x31beb5cc,
\r
90 0xeed7f2f0, 0xbb088017, 0x163bc60d, 0xf45a0ecb,
\r
91 0x1bcd289b, 0x06cbbfea, 0x21ad08e1, 0x847f3f73,
\r
92 0x78d56ced, 0x94640d6e, 0xf0d3d37b, 0xe67008e1,
\r
94 0x86d1bf27, 0x5b9b241d, 0xeb64749a, 0x47dfdfb9,
\r
98 EES = RT(0) | RT(1) | ... | RT(63) | KD | KC
\r
100 Note that the abstract describing DFC is written
\r
101 in big endian notation with the most significant
\r
102 digits of a sequence of digits placed at the low
\r
103 index positions in arrays. This format is used
\r
104 here and is only converted to machine format at
\r
105 the point that maths is done on any numbers in
\r
106 the round function.
\r
108 The key input is thus treated as an array of 32
\r
109 bit words numbered from 0..3, 0..5 or 0..7
\r
110 depending on key length. The first (leftmost)
\r
111 bit of this key string as defined in the DFC
\r
112 abstract is the most significant bit of word 0
\r
113 and the rightmost bit of this string is the least
\r
114 signicant bit of the highest numbered key word.
\r
116 The input and output blocks for the cipher are
\r
117 also treated as arrays of 32 bit words numbered
\r
118 from 0..3. The most significant bit of word 0 is
\r
119 the 1st (leftmost) bit of the 128 bit input string
\r
120 and the least significant bit of word 3 is the
\r
121 last (rightmost) bit.
\r
123 Note that the inputs, the output and the key are
\r
124 in Intel little endian format when BYTE_SWAP is
\r
129 #include <sys/types.h>
\r
130 #include "dfc_internal.h"
\r
132 /* The following arrays are all stored in big endian */
\r
133 /* format with 32 bit words at lower array positions */
\r
134 /* being more significant in multi-word values */
\r
138 0xb7e15162, 0x8aed2a6a, 0xbf715880, 0x9cf4f3c7,
\r
139 0x62e7160f, 0x38b4da56, 0xa784d904, 0x5190cfef,
\r
140 0x324e7738, 0x926cfbe5, 0xf4bf8d8d, 0x8c31d763,
\r
141 0xda06c80a, 0xbb1185eb, 0x4f7c7b57, 0x57f59584,
\r
143 0x90cfd47d, 0x7c19bb42, 0x158d9554, 0xf7b46bce,
\r
144 0xd55c4d79, 0xfd5f24d6, 0x613c31c3, 0x839a2ddf,
\r
145 0x8a9a276b, 0xcfbfa1c8, 0x77c56284, 0xdab79cd4,
\r
146 0xc2b3293d, 0x20e9e5ea, 0xf02ac60a, 0xcc93ed87,
\r
148 0x4422a52e, 0xcb238fee, 0xe5ab6add, 0x835fd1a0,
\r
149 0x753d0a8f, 0x78e537d2, 0xb95bb79d, 0x8dcaec64,
\r
150 0x2c1e9f23, 0xb829b5c2, 0x780bf387, 0x37df8bb3,
\r
151 0x00d01334, 0xa0d0bd86, 0x45cbfa73, 0xa6160ffe,
\r
153 0x393c48cb, 0xbbca060f, 0x0ff8ec6d, 0x31beb5cc,
\r
154 0xeed7f2f0, 0xbb088017, 0x163bc60d, 0xf45a0ecb,
\r
155 0x1bcd289b, 0x06cbbfea, 0x21ad08e1, 0x847f3f73,
\r
156 0x78d56ced, 0x94640d6e, 0xf0d3d37b, 0xe67008e1,
\r
159 u4byte kc = 0xeb64749a;
\r
163 0x86d1bf27, 0x5b9b241d
\r
168 0xb7e15162, 0x8aed2a6a,
\r
169 0xbf715880, 0x9cf4f3c7,
\r
170 0x62e7160f, 0x38b4da56,
\r
175 0xa784d904, 0x5190cfef,
\r
176 0x324e7738, 0x926cfbe5,
\r
177 0xf4bf8d8d, 0x8c31d763,
\r
181 { 0xda06c80a, 0xbb1185eb, 0x4f7c7b57, 0x57f59584,
\r
182 0x90cfd47d, 0x7c19bb42, 0x158d9554, 0xf7b46bce,
\r
185 #define lo(x) ((x) & 0x0000ffff)
\r
186 #define hi(x) ((x) >> 16)
\r
188 #define mult_64(r,x,y) \
\r
189 x0 = lo(x[1]); x1 = hi(x[1]); x2 = lo(x[0]); x3 = hi(x[0]); \
\r
190 y0 = lo(y[1]); y1 = hi(y[1]); y2 = lo(y[0]); y3 = hi(y[0]); \
\r
191 t0 = x0 * y0; r[0] = lo(t0); c = hi(t0); \
\r
192 t0 = x0 * y1; t1 = x1 * y0; c += lo(t0) + lo(t1); \
\r
193 r[0] += (c << 16); c = hi(c) + hi(t0) + hi(t1); \
\r
194 t0 = x0 * y2; t1 = x1 * y1; t2 = x2 * y0; \
\r
195 c += lo(t0) + lo(t1) + lo(t2); r[1] = lo(c); \
\r
196 c = hi(c) + hi(t0) + hi(t1) + hi(t2); \
\r
197 t0 = x0 * y3; t1 = x1 * y2; t2 = x2 * y1; t3 = x3 * y0; \
\r
198 c += lo(t0) + lo(t1) + lo(t2) + lo(t3); r[1] += (c << 16); \
\r
199 c = hi(c) + hi(t0) + hi(t1) + hi(t2) + hi(t3); \
\r
200 t0 = x1 * y3; t1 = x2 * y2; t2 = x3 * y1; \
\r
201 c += lo(t0) + lo(t1) + lo(t2); r[2] = lo(c); \
\r
202 c = hi(c) + hi(t0) + hi(t1) + hi(t2); \
\r
203 t0 = x2 * y3; t1 = x3 * y2; c += lo(t0) + lo(t1); \
\r
204 r[2] += (c << 16); c = hi(c) + hi(t0) + hi(t1); \
\r
207 #define add_64(r,hi,lo) \
\r
208 if((r[0] += lo) < lo) \
\r
212 if((r[1] += hi) < hi) \
\r
216 #define mult_13(r) \
\r
217 c = 13 * lo((r)[0]); \
\r
220 c = hi(c) + 13 * d; \
\r
221 (r)[0] += (c << 16); \
\r
222 c = hi(c) + 13 * lo((r)[1]); \
\r
225 c = hi(c) + 13 * d; \
\r
226 (r)[1] += (c << 16); \
\r
229 /* Where necessary this is where conversion from big endian to */
\r
230 /* little endian format is performed. Since all the maths is */
\r
231 /* little endian care is needed when 64 bit blocks are being */
\r
232 /* used to get them in the right order by reversing the order */
\r
233 /* in which these are stored. This applies to the key array */
\r
234 /* which gives the two values A and B and to the constant KD. */
\r
235 /* Since the input and output blocks are big endian we also */
\r
236 /* have to invert the order of the 32 bit words in the 64 bit */
\r
237 /* blocks being processed. */
\r
239 void r_fun(u4byte outp[2], const u4byte inp[2], const u4byte key[4])
\r
240 { u4byte acc[5], x0, x1, x2, x3, y0, y1, y2, y3, t0, t1, t2, t3, c, d;
\r
242 mult_64(acc, inp, key); add_64(acc, key[2], key[3]);
\r
244 /* we need the value in the accumulator mod 2^64 + 13 so if */
\r
245 /* the accumulator value is hi * 2^64 + lo we need to find */
\r
246 /* a k value such that r = hi * 2^64 + lo - k * (2^64 + 13) */
\r
247 /* is 0 <= r < 2^64 + 13. We can see that k will be close */
\r
248 /* to hi in value - it may equal hi but will not be greater */
\r
249 /* and we can let k = hi - e with e >= 0 so that r is given */
\r
250 /* by r = e * (2^64 + 13) + lo - 13 * hi. If we compute the */
\r
251 /* lo - 13 * hi value, the overflow into the top 64 bits of */
\r
252 /* the accumulator has to be 'zeroed' by the e * (2^64 + 13)*/
\r
253 /* term and this sets the e value (in fact such an overlow */
\r
254 /* is only removed when the lower word is higher than 12). */
\r
256 mult_13(acc + 2); /* multiply top of accumulator by 13 */
\r
258 /* calculate lo - 13 * hi in acc[0] and acc[1] with any */
\r
259 /* overflow into top 64 bits in c */
\r
261 d = acc[0]; acc[0] -= acc[2]; c = (acc[0] > d ? 1 : 0);
\r
263 d = acc[1]; acc[1] -= acc[3] + c;
\r
264 c = (acc[1] > d ? 1 : (acc[1] == d ? c : 0));
\r
266 c = 13 * (acc[4] + c); /* overflow into top 64 bits of acc */
\r
268 if(((acc[0] += c) < c) && !(++acc[1]))
\r
275 /* do the confusion permutation */
\r
277 d = acc[1] ^ kc; c = acc[0] ^ rt64[acc[1] >> 26];
\r
279 c += kd2[0] + ((d += kd2[1]) < kd2[1] ? 1 : 0);
\r
281 outp[0] ^= c; outp[1] ^= d;
\r
284 u4byte *dfc_set_key(DfcContext *ctx,
\r
285 const u4byte in_key[], const u4byte key_len)
\r
287 u4byte i, lk[32], rk[4];
\r
288 u4byte *l_key = ctx->l_key;
\r
290 for(i = 0; i < key_len / 32; ++i)
\r
292 lk[i] = io_swap(in_key[i]);
\r
294 /* pad the key with the KS array */
\r
296 for(i = 0; i < 8 - key_len / 32; ++i) /* K|KS */
\r
298 lk[i + key_len / 32] = ks8[i];
\r
300 /* do the reordering of the key parameters */
\r
301 /* the OAP[1]|OBP[1]|OAP[2]... sequence is */
\r
302 /* at lk[0]... and the other at lk[16]... */
\r
304 lk[18] = lk[5]; lk[19] = lk[2]; /* EBP */
\r
305 lk[16] = lk[1]; lk[17] = lk[6]; /* EAP */
\r
306 lk[ 2] = lk[4]; lk[ 3] = lk[3]; /* OBP */
\r
307 lk[ 0] = lk[0]; lk[ 1] = lk[7]; /* OAP */
\r
309 /* create other elements using KA and KB */
\r
311 for(i = 0; i < 6; i += 2)
\r
313 lk[i + i + 4] = lk[ 0] ^ ka2[i]; /* OAP[i] ms */
\r
314 lk[i + i + 5] = lk[ 1] ^ ka2[i + 1]; /* OAP[i] ls */
\r
315 lk[i + i + 6] = lk[ 2] ^ kb2[i]; /* OBP[i] ms */
\r
316 lk[i + i + 7] = lk[ 3] ^ kb2[i + 1]; /* OBP[i] ls */
\r
317 lk[i + i + 20] = lk[16] ^ ka2[i]; /* EAP[i] ms */
\r
318 lk[i + i + 21] = lk[17] ^ ka2[i + 1]; /* EAP[i] ls */
\r
319 lk[i + i + 22] = lk[18] ^ kb2[i]; /* EBP[i] ms */
\r
320 lk[i + i + 23] = lk[19] ^ kb2[i + 1]; /* EBP[i] ls */
\r
323 rk[0] = rk[1] = rk[2] = rk[3] = 0;
\r
325 /* do the 4 round key mixing encryption */
\r
327 for(i = 0; i < 32; i += 8)
\r
329 r_fun(rk, rk + 2, lk); /* R2|R1 */
\r
330 r_fun(rk + 2, rk, lk + 4); /* R2|R3 */
\r
331 r_fun(rk, rk + 2, lk + 8); /* R4|R3 */
\r
332 r_fun(rk + 2, rk, lk + 12); /* R4|R5 */
\r
334 /* keep key in big endian format with */
\r
335 /* the most significant 32 bit words */
\r
336 /* first (lowest) in the key schedule */
\r
337 /* - note that the upper and lower 64 */
\r
338 /* bit blocks are in inverse order at */
\r
339 /* this point in the loop */
\r
341 l_key[i + 0] = rk[2]; l_key[i + 1] = rk[3];
\r
342 l_key[i + 2] = rk[0]; l_key[i + 3] = rk[1];
\r
344 r_fun(rk + 2, rk, lk + 16); /* R1|R2 */
\r
345 r_fun(rk, rk + 2, lk + 20); /* R3|R2 */
\r
346 r_fun(rk + 2, rk, lk + 24); /* R3|R4 */
\r
347 r_fun(rk, rk + 2, lk + 28); /* R5|R4 */
\r
349 l_key[i + 4] = rk[0]; l_key[i + 5] = rk[1];
\r
350 l_key[i + 6] = rk[2]; l_key[i + 7] = rk[3];
\r
356 void dfc_encrypt(DfcContext *ctx,
\r
357 const u4byte in_blk[4], u4byte out_blk[4])
\r
360 u4byte *l_key = ctx->l_key;
\r
362 /* the input/output format is big endian - */
\r
363 /* any reversals needed are performed when */
\r
364 /* maths is done in the round function */
\r
366 blk[0] = io_swap(in_blk[0]); blk[1] = io_swap(in_blk[1]);
\r
367 blk[2] = io_swap(in_blk[2]); blk[3] = io_swap(in_blk[3]);
\r
369 r_fun(blk, blk + 2, l_key + 0); /* R2|R1 */
\r
370 r_fun(blk + 2, blk, l_key + 4); /* R2|R3 */
\r
371 r_fun(blk, blk + 2, l_key + 8); /* R4|R3 */
\r
372 r_fun(blk + 2, blk, l_key + 12); /* R4|R5 */
\r
373 r_fun(blk, blk + 2, l_key + 16); /* R6|R5 */
\r
374 r_fun(blk + 2, blk, l_key + 20); /* R6|R7 */
\r
375 r_fun(blk, blk + 2, l_key + 24); /* R8|R7 */
\r
376 r_fun(blk + 2, blk, l_key + 28); /* R8|R9 */
\r
378 /* swap order to obtain the result R9|R8 */
\r
380 out_blk[0] = io_swap(blk[2]); out_blk[1] = io_swap(blk[3]);
\r
381 out_blk[2] = io_swap(blk[0]); out_blk[3] = io_swap(blk[1]);
\r
384 void dfc_decrypt(DfcContext *ctx,
\r
385 const u4byte in_blk[4], u4byte out_blk[4])
\r
388 u4byte *l_key = ctx->l_key;
\r
390 /* the input/output format is big endian - */
\r
391 /* any reversals needed are performed when */
\r
392 /* maths is done in the round function */
\r
394 blk[0] = io_swap(in_blk[0]); blk[1] = io_swap(in_blk[1]);
\r
395 blk[2] = io_swap(in_blk[2]); blk[3] = io_swap(in_blk[3]);
\r
397 r_fun(blk, blk + 2, l_key + 28); /* R7|R8 */
\r
398 r_fun(blk + 2, blk, l_key + 24); /* R7|R6 */
\r
399 r_fun(blk, blk + 2, l_key + 20); /* R5|R6 */
\r
400 r_fun(blk + 2, blk, l_key + 16); /* R5|R4 */
\r
401 r_fun(blk, blk + 2, l_key + 12); /* R3|R4 */
\r
402 r_fun(blk + 2, blk, l_key + 8); /* R3|R2 */
\r
403 r_fun(blk, blk + 2, l_key + 4); /* R1|R2 */
\r
404 r_fun(blk + 2, blk, l_key ); /* R1|R0 */
\r
406 /* swap order to obtain the result R1|R0 */
\r
408 out_blk[0] = io_swap(blk[2]); out_blk[1] = io_swap(blk[3]);
\r
409 out_blk[2] = io_swap(blk[0]); out_blk[3] = io_swap(blk[1]);
\r