1 /* Modified for SILC. -Pekka */
\r
3 /* This is an independent implementation of the encryption algorithm: */
\r
5 /* LOKI97 by Brown and Pieprzyk */
\r
7 /* which is a candidate algorithm in the Advanced Encryption Standard */
\r
8 /* programme of the US National Institute of Standards and Technology. */
\r
10 /* Copyright in this implementation is held by Dr B R Gladman but I */
\r
11 /* hereby give permission for its free direct or derivative use subject */
\r
12 /* to acknowledgment of its origin and compliance with any conditions */
\r
13 /* that the originators of the algorithm place on its exploitation. */
\r
15 /* Dr Brian Gladman (gladman@seven77.demon.co.uk) 14th January 1999 */
\r
17 /* Timing data for LOKI97 (loki.c)
\r
19 Core timing without I/O endian conversion:
\r
22 Key Setup: 7430 cycles
\r
23 Encrypt: 2134 cycles = 12.0 mbits/sec
\r
24 Decrypt: 2192 cycles = 11.7 mbits/sec
\r
25 Mean: 2163 cycles = 11.8 mbits/sec
\r
28 Key Setup: 7303 cycles
\r
29 Encrypt: 2138 cycles = 12.0 mbits/sec
\r
30 Decrypt: 2189 cycles = 11.7 mbits/sec
\r
31 Mean: 2164 cycles = 11.8 mbits/sec
\r
34 Key Setup: 7166 cycles
\r
35 Encrypt: 2131 cycles = 12.0 mbits/sec
\r
36 Decrypt: 2184 cycles = 11.7 mbits/sec
\r
37 Mean: 2158 cycles = 11.9 mbits/sec
\r
39 Full timing with I/O endian conversion:
\r
42 Key Setup: 7582 cycles
\r
43 Encrypt: 2174 cycles = 11.8 mbits/sec
\r
44 Decrypt: 2235 cycles = 11.5 mbits/sec
\r
45 Mean: 2205 cycles = 11.6 mbits/sec
\r
48 Key Setup: 7477 cycles
\r
49 Encrypt: 2167 cycles = 11.8 mbits/sec
\r
50 Decrypt: 2223 cycles = 11.5 mbits/sec
\r
51 Mean: 2195 cycles = 11.7 mbits/sec
\r
54 Key Setup: 7365 cycles
\r
55 Encrypt: 2177 cycles = 11.8 mbits/sec
\r
56 Decrypt: 2194 cycles = 11.7 mbits/sec
\r
57 Mean: 2186 cycles = 11.7 mbits/sec
\r
62 #include <sys/types.h>
\r
63 #include "loki_internal.h"
\r
66 #define S1_LEN (1 << S1_SIZE)
\r
67 #define S1_MASK (S1_LEN - 1)
\r
68 #define S1_HMASK (S1_MASK & ~0xff)
\r
69 #define S1_POLY 0x2911
\r
72 #define S2_LEN (1 << S2_SIZE)
\r
73 #define S2_MASK (S2_LEN - 1)
\r
74 #define S2_HMASK (S2_MASK & ~0xff)
\r
75 #define S2_POLY 0x0aa7
\r
77 #define io_swap(x) ((x))
\r
79 u4byte delta[2] = { 0x7f4a7c15, 0x9e3779b9 };
\r
81 u1byte sb1[S1_LEN]; // GF(2^11) S box
\r
82 u1byte sb2[S2_LEN]; // GF(2^11) S box
\r
84 u4byte init_done = 0;
\r
86 #define add_eq(x,y) (x)[1] += (y)[1] + (((x)[0] += (y)[0]) < (y)[0] ? 1 : x)
\r
87 #define sub_eq(x,y) xs = (x)[0]; (x)[1] -= (y)[1] + (((x)[0] -= (y)[0]) > xs ? 1 : 0)
\r
89 u4byte ff_mult(u4byte a, u4byte b, u4byte tpow, u4byte mpol)
\r
92 r = s = 0; m = (1 << tpow);
\r
110 void init_tables(void)
\r
113 // initialise S box 1
\r
115 for(i = 0; i < S1_LEN; ++i)
\r
117 j = v = i ^ S1_MASK; v = ff_mult(v, j, S1_SIZE, S1_POLY);
\r
118 sb1[i] = (u1byte)ff_mult(v, j, S1_SIZE, S1_POLY);
\r
120 // initialise S box 2
\r
122 for(i = 0; i < S2_LEN; ++i)
\r
124 j = v = i ^ S2_MASK; v = ff_mult(v, j, S2_SIZE, S2_POLY);
\r
125 sb2[i] = (u1byte)ff_mult(v, j, S2_SIZE, S2_POLY);
\r
128 // initialise permutation table
\r
130 for(i = 0; i < 256; ++i)
\r
132 prm[i][0] = ((i & 1) << 7) | ((i & 2) << 14) | ((i & 4) << 21) | ((i & 8) << 28);
\r
133 prm[i][1] = ((i & 16) << 3) | ((i & 32) << 10) | ((i & 64) << 17) | ((i & 128) << 24);
\r
137 void f_fun(u4byte res[2], const u4byte in[2], const u4byte key[2])
\r
138 { u4byte i, tt[2], pp[2];
\r
140 tt[0] = (in[0] & ~key[0]) | (in[1] & key[0]);
\r
141 tt[1] = (in[1] & ~key[0]) | (in[0] & key[0]);
\r
143 i = sb1[((tt[1] >> 24) | (tt[0] << 8)) & S1_MASK];
\r
144 pp[0] = prm[i][0] >> 7; pp[1] = prm[i][1] >> 7;
\r
145 i = sb2[(tt[1] >> 16) & S2_MASK];
\r
146 pp[0] |= prm[i][0] >> 6; pp[1] |= prm[i][1] >> 6;
\r
147 i = sb1[(tt[1] >> 8) & S1_MASK];
\r
148 pp[0] |= prm[i][0] >> 5; pp[1] |= prm[i][1] >> 5;
\r
149 i = sb2[tt[1] & S2_MASK];
\r
150 pp[0] |= prm[i][0] >> 4; pp[1] |= prm[i][1] >> 4;
\r
151 i = sb2[((tt[0] >> 24) | (tt[1] << 8)) & S2_MASK];
\r
152 pp[0] |= prm[i][0] >> 3; pp[1] |= prm[i][1] >> 3;
\r
153 i = sb1[(tt[0] >> 16) & S1_MASK];
\r
154 pp[0] |= prm[i][0] >> 2; pp[1] |= prm[i][1] >> 2;
\r
155 i = sb2[(tt[0] >> 8) & S2_MASK];
\r
156 pp[0] |= prm[i][0] >> 1; pp[1] |= prm[i][1] >> 1;
\r
157 i = sb1[tt[0] & S1_MASK];
\r
158 pp[0] |= prm[i][0]; pp[1] |= prm[i][1];
\r
160 res[0] ^= sb1[byte(pp[0], 0) | (key[1] << 8) & S1_HMASK]
\r
161 | (sb1[byte(pp[0], 1) | (key[1] << 3) & S1_HMASK] << 8)
\r
162 | (sb2[byte(pp[0], 2) | (key[1] >> 2) & S2_HMASK] << 16)
\r
163 | (sb2[byte(pp[0], 3) | (key[1] >> 5) & S2_HMASK] << 24);
\r
164 res[1] ^= sb1[byte(pp[1], 0) | (key[1] >> 8) & S1_HMASK]
\r
165 | (sb1[byte(pp[1], 1) | (key[1] >> 13) & S1_HMASK] << 8)
\r
166 | (sb2[byte(pp[1], 2) | (key[1] >> 18) & S2_HMASK] << 16)
\r
167 | (sb2[byte(pp[1], 3) | (key[1] >> 21) & S2_HMASK] << 24);
\r
170 u4byte *loki_set_key(LokiContext *ctx,
\r
171 const u4byte in_key[], const u4byte key_len)
\r
173 u4byte i, k1[2], k2[2], k3[2], k4[2], del[2], tt[2], sk[2];
\r
174 u4byte *l_key = ctx->l_key;
\r
178 init_tables(); init_done = 1;
\r
181 k4[0] = io_swap(in_key[1]); k4[1] = io_swap(in_key[0]);
\r
182 k3[0] = io_swap(in_key[3]); k3[1] = io_swap(in_key[2]);
\r
184 switch ((key_len + 63) / 64)
\r
187 k2[0] = 0; k2[1] = 0; f_fun(k2, k3, k4);
\r
188 k1[0] = 0; k1[1] = 0; f_fun(k1, k4, k3);
\r
191 k2[0] = io_swap(in_key[5]); k2[1] = io_swap(in_key[4]);
\r
192 k1[0] = 0; k1[1] = 0; f_fun(k1, k4, k3);
\r
195 k2[0] = in_key[5]; k2[1] = in_key[4];
\r
196 k1[0] = in_key[7]; k1[1] = in_key[6];
\r
197 k2[0] = io_swap(in_key[5]); k2[1] = io_swap(in_key[4]);
\r
198 k1[0] = io_swap(in_key[7]); k1[1] = io_swap(in_key[6]);
\r
201 del[0] = delta[0]; del[1] = delta[1];
\r
203 for(i = 0; i < 48; ++i)
\r
205 tt[0] = k1[0]; tt[1] = k1[1];
\r
206 add_eq(tt, k3); add_eq(tt, del); add_eq(del, delta);
\r
207 sk[0] = k4[0]; sk[1] = k4[1];
\r
208 k4[0] = k3[0]; k4[1] = k3[1];
\r
209 k3[0] = k2[0]; k3[1] = k2[1];
\r
210 k2[0] = k1[0]; k2[1] = k1[1];
\r
211 k1[0] = sk[0]; k1[1] = sk[1];
\r
213 l_key[i + i] = k1[0]; l_key[i + i + 1] = k1[1];
\r
219 #define r_fun(l,r,k) \
\r
221 f_fun((r),(l),(k) + 2); \
\r
222 add_eq((l), (k) + 4)
\r
224 void loki_encrypt(LokiContext *ctx,
\r
225 const u4byte in_blk[4], u4byte out_blk[4])
\r
228 u4byte *l_key = ctx->l_key;
\r
230 blk[3] = io_swap(in_blk[0]); blk[2] = io_swap(in_blk[1]);
\r
231 blk[1] = io_swap(in_blk[2]); blk[0] = io_swap(in_blk[3]);
\r
233 r_fun(blk, blk + 2, l_key + 0);
\r
234 r_fun(blk + 2, blk, l_key + 6);
\r
235 r_fun(blk, blk + 2, l_key + 12);
\r
236 r_fun(blk + 2, blk, l_key + 18);
\r
237 r_fun(blk, blk + 2, l_key + 24);
\r
238 r_fun(blk + 2, blk, l_key + 30);
\r
239 r_fun(blk, blk + 2, l_key + 36);
\r
240 r_fun(blk + 2, blk, l_key + 42);
\r
241 r_fun(blk, blk + 2, l_key + 48);
\r
242 r_fun(blk + 2, blk, l_key + 54);
\r
243 r_fun(blk, blk + 2, l_key + 60);
\r
244 r_fun(blk + 2, blk, l_key + 66);
\r
245 r_fun(blk, blk + 2, l_key + 72);
\r
246 r_fun(blk + 2, blk, l_key + 78);
\r
247 r_fun(blk, blk + 2, l_key + 84);
\r
248 r_fun(blk + 2, blk, l_key + 90);
\r
250 out_blk[3] = io_swap(blk[2]); out_blk[2] = io_swap(blk[3]);
\r
251 out_blk[1] = io_swap(blk[0]); out_blk[0] = io_swap(blk[1]);
\r
254 #define ir_fun(l,r,k) \
\r
255 sub_eq((l),(k) + 4); \
\r
256 f_fun((r),(l),(k) + 2); \
\r
259 void loki_decrypt(LokiContext *ctx,
\r
260 const u4byte in_blk[4], u4byte out_blk[4])
\r
263 u4byte *l_key = ctx->l_key;
\r
265 blk[3] = io_swap(in_blk[0]); blk[2] = io_swap(in_blk[1]);
\r
266 blk[1] = io_swap(in_blk[2]); blk[0] = io_swap(in_blk[3]);
\r
268 ir_fun(blk, blk + 2, l_key + 90);
\r
269 ir_fun(blk + 2, blk, l_key + 84);
\r
270 ir_fun(blk, blk + 2, l_key + 78);
\r
271 ir_fun(blk + 2, blk, l_key + 72);
\r
272 ir_fun(blk, blk + 2, l_key + 66);
\r
273 ir_fun(blk + 2, blk, l_key + 60);
\r
274 ir_fun(blk, blk + 2, l_key + 54);
\r
275 ir_fun(blk + 2, blk, l_key + 48);
\r
276 ir_fun(blk, blk + 2, l_key + 42);
\r
277 ir_fun(blk + 2, blk, l_key + 36);
\r
278 ir_fun(blk, blk + 2, l_key + 30);
\r
279 ir_fun(blk + 2, blk, l_key + 24);
\r
280 ir_fun(blk, blk + 2, l_key + 18);
\r
281 ir_fun(blk + 2, blk, l_key + 12);
\r
282 ir_fun(blk, blk + 2, l_key + 6);
\r
283 ir_fun(blk + 2, blk, l_key);
\r
285 out_blk[3] = io_swap(blk[2]); out_blk[2] = io_swap(blk[3]);
\r
286 out_blk[1] = io_swap(blk[0]); out_blk[0] = io_swap(blk[1]);
\r