1 /* TomsFastMath, a fast ISO C bignum library.
3 * This project is meant to fill in where LibTomMath
4 * falls short. That is speed ;-)
6 * This project is public domain and free for all purposes.
8 * Tom St Denis, tomstdenis@gmail.com
19 typedef struct tfm_fp_int_struct tfm_fp_int;
21 #include <silccrypto.h>
27 #endif /* SILC_X86_64 */
33 #endif /* SILC_CPU_SSE2 */
34 #endif /* SILC_I386 */
37 #define MIN(x,y) ((x)<(y)?(x):(y))
41 #define MAX(x,y) ((x)>(y)?(x):(y))
44 /* externally define this symbol to ignore the default settings, useful for changing the build from the make process */
45 #ifndef TFM_ALREADY_SET
47 /* do we want the large set of small multiplications ?
48 Enable these if you are going to be doing a lot of small (<= 16 digit) multiplications say in ECC
49 Or if you're on a 64-bit machine doing RSA as a 1024-bit integer == 16 digits ;-)
53 /* do we want huge code
54 Enable these if you are doing 20, 24, 28, 32, 48, 64 digit multiplications (useful for RSA)
55 Less important on 64-bit machines as 32 digits == 2048 bits
71 /* do we want some overflow checks
72 Not required if you make sure your numbers are within range (e.g. by default a modulus for tfm_fp_exptmod() can only be upto 2048 bits long)
76 /* Is the target a P4 Prescott
78 /* #define TFM_PRESCOTT */
80 /* Do we want timing resistant tfm_fp_exptmod() ?
81 * This makes it slower but also timing invariant with respect to the exponent
83 /* #define TFM_TIMING_RESISTANT */
87 /* Max size of any number in bits. Basically the largest size you will be multiplying
88 * should be half [or smaller] of TFM_FP_MAX_SIZE-four_digit
90 * You can externally define this or it defaults to 4096-bits [allowing multiplications upto 2048x2048 bits ]
92 #ifndef TFM_FP_MAX_SIZE
93 #define TFM_FP_MAX_SIZE (8192+(8*DIGIT_BIT))
96 /* will this lib work? */
98 #error CHAR_BIT must be a multiple of eight.
100 #if TFM_FP_MAX_SIZE % CHAR_BIT
101 #error TFM_FP_MAX_SIZE must be a multiple of CHAR_BIT
104 /* autodetect x86-64 and make sure we are using 64-bit digits with x86-64 asm */
105 #if defined(__x86_64__)
106 #if defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM)
107 #error x86-64 detected, x86-32/SSE2/ARM optimizations are not valid!
109 #if !defined(TFM_X86_64) && !defined(TFM_NO_ASM)
113 #if defined(TFM_X86_64)
114 #if !defined(TFM_FP_64BIT)
119 /* try to detect x86-32 */
120 #if defined(__i386__) && !defined(TFM_SSE2)
121 #if defined(TFM_X86_64) || defined(TFM_ARM)
122 #error x86-32 detected, x86-64/ARM optimizations are not valid!
124 #if !defined(TFM_X86) && !defined(TFM_NO_ASM)
129 /* make sure we're 32-bit for x86-32/sse/arm/ppc32 */
130 #if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM) || defined(TFM_PPC32)) && defined(TFM_FP_64BIT)
131 #warning x86-32, SSE2 and ARM, PPC32 optimizations require 32-bit digits (undefining)
141 #error TFM_ASM already defined!
147 #error TFM_ASM already defined!
153 #error TFM_ASM already defined!
159 #error TFM_ASM already defined!
165 #error TFM_ASM already defined!
171 #error TFM_ASM already defined!
176 /* we want no asm? */
240 /* some default configurations.
242 #if defined(TFM_FP_64BIT)
243 /* for GCC only on supported platforms */
245 typedef SilcUInt64 ulong64;
247 typedef ulong64 tfm_fp_digit;
248 typedef unsigned long tfm_fp_word __attribute__ ((mode(TI)));
250 /* this is to make porting into LibTomCrypt easier :-) */
252 #if defined(_MSC_VER) || defined(__BORLANDC__)
253 typedef unsigned __int64 ulong64;
254 typedef signed __int64 long64;
256 typedef unsigned long long ulong64;
257 typedef signed long long long64;
260 typedef unsigned long tfm_fp_digit;
261 typedef ulong64 tfm_fp_word;
264 /* # of digits this is */
265 #define DIGIT_BIT (int)((CHAR_BIT) * sizeof(tfm_fp_digit))
266 #define TFM_FP_MASK (tfm_fp_digit)(-1)
267 #define TFM_FP_SIZE (TFM_FP_MAX_SIZE/DIGIT_BIT)
270 #define TFM_FP_ZPOS 0
274 #define TFM_FP_OKAY 0
279 #define TFM_FP_LT -1 /* less than */
280 #define TFM_FP_EQ 0 /* equal to */
281 #define TFM_FP_GT 1 /* greater than */
284 #define TFM_FP_YES 1 /* yes response */
285 #define TFM_FP_NO 0 /* no response */
288 struct tfm_fp_int_struct {
292 unsigned int alloc : 31;
293 unsigned int sign : 1;
298 /* initialize [or zero] an fp int */
299 #define tfm_fp_init(a) tfm_fp_sinit(NULL, a)
300 #define tfm_fp_sinit(s, a) \
301 { (a)->stack = s; (a)->dp = NULL; (a)->alloc = (a)->used = (a)->sign = 0; }
302 int tfm_fp_init_size(SilcStack stack, tfm_fp_int *a, int size);
303 void tfm_fp_zero(tfm_fp_int *a);
305 /* zero/even/odd ? */
306 #define tfm_fp_iszero(a) (((a)->used == 0) ? TFM_FP_YES : TFM_FP_NO)
307 #define tfm_fp_iseven(a) (((a)->used >= 0 && (((a)->dp[0] & 1) == 0)) ? TFM_FP_YES : TFM_FP_NO)
308 #define tfm_fp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? TFM_FP_YES : TFM_FP_NO)
310 /* set to a small digit */
311 int tfm_fp_set(tfm_fp_int *a, tfm_fp_digit b);
313 /* copy from a to b */
314 int tfm_fp_copy(tfm_fp_int *a, tfm_fp_int *b);
315 int tfm_fp_init_copy(tfm_fp_int *a, tfm_fp_int *b, SilcStack stack);
316 void tfm_fp_exch(tfm_fp_int *a, tfm_fp_int *b);
319 #define tfm_fp_clamp(a) { while ((a)->used && (a)->dp[(a)->used-1] == 0) --((a)->used); (a)->sign = (a)->used ? (a)->sign : TFM_FP_ZPOS; }
321 /* negate and absolute */
322 #define tfm_fp_neg(a, b) { tfm_fp_copy(a, b); (b)->sign ^= 1; tfm_fp_clamp(b); }
323 #define tfm_fp_abs(a, b) { tfm_fp_copy(a, b); (b)->sign = 0; }
325 /* right shift x digits */
326 void tfm_fp_rshd(tfm_fp_int *a, int x);
328 /* left shift x digits */
329 int tfm_fp_lshd(tfm_fp_int *a, int x);
331 /* signed comparison */
332 int tfm_fp_cmp(tfm_fp_int *a, tfm_fp_int *b);
334 /* unsigned comparison */
335 int tfm_fp_cmp_mag(tfm_fp_int *a, tfm_fp_int *b);
337 /* power of 2 operations */
338 int tfm_fp_div_2d(tfm_fp_int *a, int b, tfm_fp_int *c, tfm_fp_int *d);
339 int tfm_fp_mod_2d(tfm_fp_int *a, int b, tfm_fp_int *c);
340 int tfm_fp_mul_2d(tfm_fp_int *a, int b, tfm_fp_int *c);
341 int tfm_fp_2expt (tfm_fp_int *a, int b);
342 int tfm_fp_mul_2(tfm_fp_int *a, tfm_fp_int *c);
343 int tfm_fp_div_2(tfm_fp_int *a, tfm_fp_int *c);
345 /* Counts the number of lsbs which are zero before the first zero bit */
346 int tfm_fp_cnt_lsb(tfm_fp_int *a);
349 int tfm_fp_add(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c);
352 int tfm_fp_sub(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c);
355 int tfm_fp_mul(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c);
358 int tfm_fp_sqr(tfm_fp_int *a, tfm_fp_int *b);
360 /* a/b => cb + d == a */
361 int tfm_fp_div(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c, tfm_fp_int *d);
363 /* c = a mod b, 0 <= c < b */
364 int tfm_fp_mod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c);
366 /* compare against a single digit */
367 int tfm_fp_cmp_d(tfm_fp_int *a, tfm_fp_digit b);
370 int tfm_fp_add_d(tfm_fp_int *a, tfm_fp_digit b, tfm_fp_int *c);
373 int tfm_fp_sub_d(tfm_fp_int *a, tfm_fp_digit b, tfm_fp_int *c);
376 int tfm_fp_mul_d(tfm_fp_int *a, tfm_fp_digit b, tfm_fp_int *c);
378 /* a/b => cb + d == a */
379 int tfm_fp_div_d(tfm_fp_int *a, tfm_fp_digit b, tfm_fp_int *c, tfm_fp_digit *d);
381 /* c = a mod b, 0 <= c < b */
382 int tfm_fp_mod_d(tfm_fp_int *a, tfm_fp_digit b, tfm_fp_digit *c);
384 /* ---> number theory <--- */
385 /* d = a + b (mod c) */
386 int tfm_fp_addmod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c, tfm_fp_int *d);
388 /* d = a - b (mod c) */
389 int tfm_fp_submod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c, tfm_fp_int *d);
391 /* d = a * b (mod c) */
392 int tfm_fp_mulmod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c, tfm_fp_int *d);
394 /* c = a * a (mod b) */
395 int tfm_fp_sqrmod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c);
397 /* c = 1/a (mod b) */
398 int tfm_fp_invmod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c);
401 int tfm_fp_gcd(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c);
404 int tfm_fp_lcm(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c);
406 int tfm_fp_sqrt(tfm_fp_int *arg, tfm_fp_int *ret);
407 int tfm_fp_expt_d(tfm_fp_int * a, tfm_fp_digit b, tfm_fp_int * c);
408 int tfm_fp_xor(tfm_fp_int * a, tfm_fp_int * b, tfm_fp_int * c);
409 int tfm_fp_and(tfm_fp_int * a, tfm_fp_int * b, tfm_fp_int * c);
410 int tfm_fp_or(tfm_fp_int * a, tfm_fp_int * b, tfm_fp_int * c);
412 /* setups the montgomery reduction */
413 int tfm_fp_montgomery_setup(tfm_fp_int *a, tfm_fp_digit *mp);
415 /* computes a = B**n mod b without division or multiplication useful for
416 * normalizing numbers in a Montgomery system.
418 int tfm_fp_montgomery_calc_normalization(tfm_fp_int *a, tfm_fp_int *b);
420 /* computes x/R == x (mod N) via Montgomery Reduction */
421 int tfm_fp_montgomery_reduce(tfm_fp_int *a, tfm_fp_int *m, tfm_fp_digit mp);
423 /* d = a**b (mod c) */
424 int tfm_fp_exptmod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c, tfm_fp_int *d);
426 /* radix conersions */
427 int tfm_fp_count_bits(tfm_fp_int *a);
429 int tfm_fp_unsigned_bin_size(tfm_fp_int *a);
430 void tfm_fp_read_unsigned_bin(tfm_fp_int *a, unsigned char *b, int c);
431 void tfm_fp_to_unsigned_bin(tfm_fp_int *a, unsigned char *b);
433 int tfm_fp_signed_bin_size(tfm_fp_int *a);
434 void tfm_fp_to_signed_bin(tfm_fp_int *a, unsigned char *b);
436 int tfm_fp_read_radix(tfm_fp_int *a, char *str, int radix);
437 int tfm_fp_toradix(tfm_fp_int *a, char *str, int radix);
438 int tfm_fp_toradix_n(tfm_fp_int * a, char *str, int radix, int maxlen);
439 int tfm_fp_radix_size(tfm_fp_int *a, int radix, int *size);
442 /* VARIOUS LOW LEVEL STUFFS */
443 int s_tfm_fp_add(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c);
444 int s_tfm_fp_sub(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c);
445 void tfm_fp_reverse(unsigned char *s, int len);
447 int tfm_fp_mul_comba(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C);
450 void tfm_fp_mul_comba_small(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C);
454 void tfm_fp_mul_comba20(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C);
457 void tfm_fp_mul_comba24(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C);
460 void tfm_fp_mul_comba28(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C);
463 void tfm_fp_mul_comba32(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C);
466 void tfm_fp_mul_comba48(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C);
469 void tfm_fp_mul_comba64(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C);
472 int tfm_fp_sqr_comba(tfm_fp_int *A, tfm_fp_int *B);
475 void tfm_fp_sqr_comba_small(tfm_fp_int *A, tfm_fp_int *B);
479 void tfm_fp_sqr_comba20(tfm_fp_int *A, tfm_fp_int *B);
482 void tfm_fp_sqr_comba24(tfm_fp_int *A, tfm_fp_int *B);
485 void tfm_fp_sqr_comba28(tfm_fp_int *A, tfm_fp_int *B);
488 void tfm_fp_sqr_comba32(tfm_fp_int *A, tfm_fp_int *B);
491 void tfm_fp_sqr_comba48(tfm_fp_int *A, tfm_fp_int *B);
494 void tfm_fp_sqr_comba64(tfm_fp_int *A, tfm_fp_int *B);