/* TomsFastMath, a fast ISO C bignum library. * * This project is meant to fill in where LibTomMath * falls short. That is speed ;-) * * This project is public domain and free for all purposes. * * Tom St Denis, tomstdenis@gmail.com */ #ifndef TFM_H_ #define TFM_H_ #include #include #include #include #include typedef struct tfm_fp_int_struct tfm_fp_int; #include #undef CRYPT #ifdef SILC_X86_64 #define TFM_X86_64 #endif /* SILC_X86_64 */ #ifdef SILC_I386 #define TFM_X86 #ifdef SILC_CPU_SSE2 #define TFM_SSE2 #endif /* SILC_CPU_SSE2 */ #endif /* SILC_I386 */ #ifndef MIN #define MIN(x,y) ((x)<(y)?(x):(y)) #endif #ifndef MAX #define MAX(x,y) ((x)>(y)?(x):(y)) #endif /* externally define this symbol to ignore the default settings, useful for changing the build from the make process */ #ifndef TFM_ALREADY_SET /* do we want the large set of small multiplications ? Enable these if you are going to be doing a lot of small (<= 16 digit) multiplications say in ECC Or if you're on a 64-bit machine doing RSA as a 1024-bit integer == 16 digits ;-) */ #define TFM_SMALL_SET /* do we want huge code Enable these if you are doing 20, 24, 28, 32, 48, 64 digit multiplications (useful for RSA) Less important on 64-bit machines as 32 digits == 2048 bits */ #define TFM_MUL20 #define TFM_MUL24 #define TFM_MUL28 #define TFM_MUL32 #define TFM_MUL48 #define TFM_MUL64 #define TFM_SQR20 #define TFM_SQR24 #define TFM_SQR28 #define TFM_SQR32 #define TFM_SQR48 #define TFM_SQR64 /* do we want some overflow checks Not required if you make sure your numbers are within range (e.g. by default a modulus for tfm_fp_exptmod() can only be upto 2048 bits long) */ #define TFM_CHECK /* Is the target a P4 Prescott */ /* #define TFM_PRESCOTT */ /* Do we want timing resistant tfm_fp_exptmod() ? * This makes it slower but also timing invariant with respect to the exponent */ /* #define TFM_TIMING_RESISTANT */ #endif /* Max size of any number in bits. Basically the largest size you will be multiplying * should be half [or smaller] of TFM_FP_MAX_SIZE-four_digit * * You can externally define this or it defaults to 4096-bits [allowing multiplications upto 2048x2048 bits ] */ #ifndef TFM_FP_MAX_SIZE #define TFM_FP_MAX_SIZE (8192+(8*DIGIT_BIT)) #endif /* will this lib work? */ #if (CHAR_BIT & 7) #error CHAR_BIT must be a multiple of eight. #endif #if TFM_FP_MAX_SIZE % CHAR_BIT #error TFM_FP_MAX_SIZE must be a multiple of CHAR_BIT #endif /* autodetect x86-64 and make sure we are using 64-bit digits with x86-64 asm */ #if defined(__x86_64__) #if defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM) #error x86-64 detected, x86-32/SSE2/ARM optimizations are not valid! #endif #if !defined(TFM_X86_64) && !defined(TFM_NO_ASM) #define TFM_X86_64 #endif #endif #if defined(TFM_X86_64) #if !defined(TFM_FP_64BIT) #define TFM_FP_64BIT #endif #endif /* try to detect x86-32 */ #if defined(__i386__) && !defined(TFM_SSE2) #if defined(TFM_X86_64) || defined(TFM_ARM) #error x86-32 detected, x86-64/ARM optimizations are not valid! #endif #if !defined(TFM_X86) && !defined(TFM_NO_ASM) #define TFM_X86 #endif #endif /* make sure we're 32-bit for x86-32/sse/arm/ppc32 */ #if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM) || defined(TFM_PPC32)) && defined(TFM_FP_64BIT) #warning x86-32, SSE2 and ARM, PPC32 optimizations require 32-bit digits (undefining) #undef TFM_FP_64BIT #endif /* multi asms? */ #ifdef TFM_X86 #define TFM_ASM #endif #ifdef TFM_X86_64 #ifdef TFM_ASM #error TFM_ASM already defined! #endif #define TFM_ASM #endif #ifdef TFM_SSE2 #ifdef TFM_ASM #error TFM_ASM already defined! #endif #define TFM_ASM #endif #ifdef TFM_ARM #ifdef TFM_ASM #error TFM_ASM already defined! #endif #define TFM_ASM #endif #ifdef TFM_PPC32 #ifdef TFM_ASM #error TFM_ASM already defined! #endif #define TFM_ASM #endif #ifdef TFM_PPC64 #ifdef TFM_ASM #error TFM_ASM already defined! #endif #define TFM_ASM #endif #ifdef TFM_AVR32 #ifdef TFM_ASM #error TFM_ASM already defined! #endif #define TFM_ASM #endif /* we want no asm? */ #ifdef TFM_NO_ASM #undef TFM_X86 #undef TFM_X86_64 #undef TFM_SSE2 #undef TFM_ARM #undef TFM_PPC32 #undef TFM_PPC64 #undef TFM_AVR32 #undef TFM_ASM #endif /* ECC helpers */ #ifdef TFM_ECC192 #ifdef TFM_FP_64BIT #define TFM_MUL3 #define TFM_SQR3 #else #define TFM_MUL6 #define TFM_SQR6 #endif #endif #ifdef TFM_ECC224 #ifdef TFM_FP_64BIT #define TFM_MUL4 #define TFM_SQR4 #else #define TFM_MUL7 #define TFM_SQR7 #endif #endif #ifdef TFM_ECC256 #ifdef TFM_FP_64BIT #define TFM_MUL4 #define TFM_SQR4 #else #define TFM_MUL8 #define TFM_SQR8 #endif #endif #ifdef TFM_ECC384 #ifdef TFM_FP_64BIT #define TFM_MUL6 #define TFM_SQR6 #else #define TFM_MUL12 #define TFM_SQR12 #endif #endif #ifdef TFM_ECC521 #ifdef TFM_FP_64BIT #define TFM_MUL9 #define TFM_SQR9 #else #define TFM_MUL17 #define TFM_SQR17 #endif #endif /* some default configurations. */ #if defined(TFM_FP_64BIT) /* for GCC only on supported platforms */ #ifndef CRYPT typedef SilcUInt64 ulong64; #endif typedef ulong64 tfm_fp_digit; typedef unsigned long tfm_fp_word __attribute__ ((mode(TI))); #else /* this is to make porting into LibTomCrypt easier :-) */ #ifndef CRYPT #if defined(_MSC_VER) || defined(__BORLANDC__) typedef unsigned __int64 ulong64; typedef signed __int64 long64; #else typedef unsigned long long ulong64; typedef signed long long long64; #endif #endif typedef unsigned long tfm_fp_digit; typedef ulong64 tfm_fp_word; #endif /* # of digits this is */ #define DIGIT_BIT (int)((CHAR_BIT) * sizeof(tfm_fp_digit)) #define TFM_FP_MASK (tfm_fp_digit)(-1) #define TFM_FP_SIZE (TFM_FP_MAX_SIZE/DIGIT_BIT) /* signs */ #define TFM_FP_ZPOS 0 #define TFM_FP_NEG 1 /* return codes */ #define TFM_FP_OKAY 0 #define TFM_FP_VAL 1 #define TFM_FP_MEM 2 /* equalities */ #define TFM_FP_LT -1 /* less than */ #define TFM_FP_EQ 0 /* equal to */ #define TFM_FP_GT 1 /* greater than */ /* replies */ #define TFM_FP_YES 1 /* yes response */ #define TFM_FP_NO 0 /* no response */ /* a FP type */ struct tfm_fp_int_struct { SilcStack stack; tfm_fp_digit *dp; unsigned int used; unsigned int alloc : 31; unsigned int sign : 1; }; /* functions */ /* initialize [or zero] an fp int */ #define tfm_fp_init(a) tfm_fp_sinit(NULL, a) #define tfm_fp_sinit(s, a) \ { (a)->stack = s; (a)->dp = NULL; (a)->alloc = (a)->used = (a)->sign = 0; } int tfm_fp_init_size(SilcStack stack, tfm_fp_int *a, int size); void tfm_fp_zero(tfm_fp_int *a); /* zero/even/odd ? */ #define tfm_fp_iszero(a) (((a)->used == 0) ? TFM_FP_YES : TFM_FP_NO) #define tfm_fp_iseven(a) (((a)->used >= 0 && (((a)->dp[0] & 1) == 0)) ? TFM_FP_YES : TFM_FP_NO) #define tfm_fp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? TFM_FP_YES : TFM_FP_NO) /* set to a small digit */ int tfm_fp_set(tfm_fp_int *a, tfm_fp_digit b); /* copy from a to b */ int tfm_fp_copy(tfm_fp_int *a, tfm_fp_int *b); int tfm_fp_init_copy(tfm_fp_int *a, tfm_fp_int *b, SilcStack stack); void tfm_fp_exch(tfm_fp_int *a, tfm_fp_int *b); /* clamp digits */ #define tfm_fp_clamp(a) { while ((a)->used && (a)->dp[(a)->used-1] == 0) --((a)->used); (a)->sign = (a)->used ? (a)->sign : TFM_FP_ZPOS; } /* negate and absolute */ #define tfm_fp_neg(a, b) { tfm_fp_copy(a, b); (b)->sign ^= 1; tfm_fp_clamp(b); } #define tfm_fp_abs(a, b) { tfm_fp_copy(a, b); (b)->sign = 0; } /* right shift x digits */ void tfm_fp_rshd(tfm_fp_int *a, int x); /* left shift x digits */ int tfm_fp_lshd(tfm_fp_int *a, int x); /* signed comparison */ int tfm_fp_cmp(tfm_fp_int *a, tfm_fp_int *b); /* unsigned comparison */ int tfm_fp_cmp_mag(tfm_fp_int *a, tfm_fp_int *b); /* power of 2 operations */ int tfm_fp_div_2d(tfm_fp_int *a, int b, tfm_fp_int *c, tfm_fp_int *d); int tfm_fp_mod_2d(tfm_fp_int *a, int b, tfm_fp_int *c); int tfm_fp_mul_2d(tfm_fp_int *a, int b, tfm_fp_int *c); int tfm_fp_2expt (tfm_fp_int *a, int b); int tfm_fp_mul_2(tfm_fp_int *a, tfm_fp_int *c); int tfm_fp_div_2(tfm_fp_int *a, tfm_fp_int *c); /* Counts the number of lsbs which are zero before the first zero bit */ int tfm_fp_cnt_lsb(tfm_fp_int *a); /* c = a + b */ int tfm_fp_add(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c); /* c = a - b */ int tfm_fp_sub(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c); /* c = a * b */ int tfm_fp_mul(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c); /* b = a*a */ int tfm_fp_sqr(tfm_fp_int *a, tfm_fp_int *b); /* a/b => cb + d == a */ int tfm_fp_div(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c, tfm_fp_int *d); /* c = a mod b, 0 <= c < b */ int tfm_fp_mod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c); /* compare against a single digit */ int tfm_fp_cmp_d(tfm_fp_int *a, tfm_fp_digit b); /* c = a + b */ int tfm_fp_add_d(tfm_fp_int *a, tfm_fp_digit b, tfm_fp_int *c); /* c = a - b */ int tfm_fp_sub_d(tfm_fp_int *a, tfm_fp_digit b, tfm_fp_int *c); /* c = a * b */ int tfm_fp_mul_d(tfm_fp_int *a, tfm_fp_digit b, tfm_fp_int *c); /* a/b => cb + d == a */ int tfm_fp_div_d(tfm_fp_int *a, tfm_fp_digit b, tfm_fp_int *c, tfm_fp_digit *d); /* c = a mod b, 0 <= c < b */ int tfm_fp_mod_d(tfm_fp_int *a, tfm_fp_digit b, tfm_fp_digit *c); /* ---> number theory <--- */ /* d = a + b (mod c) */ int tfm_fp_addmod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c, tfm_fp_int *d); /* d = a - b (mod c) */ int tfm_fp_submod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c, tfm_fp_int *d); /* d = a * b (mod c) */ int tfm_fp_mulmod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c, tfm_fp_int *d); /* c = a * a (mod b) */ int tfm_fp_sqrmod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c); /* c = 1/a (mod b) */ int tfm_fp_invmod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c); /* c = (a, b) */ int tfm_fp_gcd(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c); /* c = [a, b] */ int tfm_fp_lcm(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c); int tfm_fp_sqrt(tfm_fp_int *arg, tfm_fp_int *ret); int tfm_fp_expt_d(tfm_fp_int * a, tfm_fp_digit b, tfm_fp_int * c); int tfm_fp_xor(tfm_fp_int * a, tfm_fp_int * b, tfm_fp_int * c); int tfm_fp_and(tfm_fp_int * a, tfm_fp_int * b, tfm_fp_int * c); int tfm_fp_or(tfm_fp_int * a, tfm_fp_int * b, tfm_fp_int * c); /* setups the montgomery reduction */ int tfm_fp_montgomery_setup(tfm_fp_int *a, tfm_fp_digit *mp); /* computes a = B**n mod b without division or multiplication useful for * normalizing numbers in a Montgomery system. */ int tfm_fp_montgomery_calc_normalization(tfm_fp_int *a, tfm_fp_int *b); /* computes x/R == x (mod N) via Montgomery Reduction */ int tfm_fp_montgomery_reduce(tfm_fp_int *a, tfm_fp_int *m, tfm_fp_digit mp); /* d = a**b (mod c) */ int tfm_fp_exptmod(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c, tfm_fp_int *d); /* radix conersions */ int tfm_fp_count_bits(tfm_fp_int *a); int tfm_fp_unsigned_bin_size(tfm_fp_int *a); void tfm_fp_read_unsigned_bin(tfm_fp_int *a, unsigned char *b, int c); void tfm_fp_to_unsigned_bin(tfm_fp_int *a, unsigned char *b); int tfm_fp_signed_bin_size(tfm_fp_int *a); void tfm_fp_to_signed_bin(tfm_fp_int *a, unsigned char *b); int tfm_fp_read_radix(tfm_fp_int *a, char *str, int radix); int tfm_fp_toradix(tfm_fp_int *a, char *str, int radix); int tfm_fp_toradix_n(tfm_fp_int * a, char *str, int radix, int maxlen); int tfm_fp_radix_size(tfm_fp_int *a, int radix, int *size); /* VARIOUS LOW LEVEL STUFFS */ int s_tfm_fp_add(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c); int s_tfm_fp_sub(tfm_fp_int *a, tfm_fp_int *b, tfm_fp_int *c); void tfm_fp_reverse(unsigned char *s, int len); int tfm_fp_mul_comba(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C); #ifdef TFM_SMALL_SET void tfm_fp_mul_comba_small(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C); #endif #ifdef TFM_MUL20 void tfm_fp_mul_comba20(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C); #endif #ifdef TFM_MUL24 void tfm_fp_mul_comba24(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C); #endif #ifdef TFM_MUL28 void tfm_fp_mul_comba28(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C); #endif #ifdef TFM_MUL32 void tfm_fp_mul_comba32(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C); #endif #ifdef TFM_MUL48 void tfm_fp_mul_comba48(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C); #endif #ifdef TFM_MUL64 void tfm_fp_mul_comba64(tfm_fp_int *A, tfm_fp_int *B, tfm_fp_int *C); #endif int tfm_fp_sqr_comba(tfm_fp_int *A, tfm_fp_int *B); #ifdef TFM_SMALL_SET void tfm_fp_sqr_comba_small(tfm_fp_int *A, tfm_fp_int *B); #endif #ifdef TFM_SQR20 void tfm_fp_sqr_comba20(tfm_fp_int *A, tfm_fp_int *B); #endif #ifdef TFM_SQR24 void tfm_fp_sqr_comba24(tfm_fp_int *A, tfm_fp_int *B); #endif #ifdef TFM_SQR28 void tfm_fp_sqr_comba28(tfm_fp_int *A, tfm_fp_int *B); #endif #ifdef TFM_SQR32 void tfm_fp_sqr_comba32(tfm_fp_int *A, tfm_fp_int *B); #endif #ifdef TFM_SQR48 void tfm_fp_sqr_comba48(tfm_fp_int *A, tfm_fp_int *B); #endif #ifdef TFM_SQR64 void tfm_fp_sqr_comba64(tfm_fp_int *A, tfm_fp_int *B); #endif #endif