From 4ef69c174684c578fb59f51e782ac8fb8c262d8b Mon Sep 17 00:00:00 2001 From: Pekka Riikonen Date: Sun, 24 Apr 2005 15:24:11 +0000 Subject: [PATCH] Removed NSS MPI from the source tree. Added LibTomMath as the default math library Also added FastTomMath, which is currently only in CVS version and not delivered to any distribution. --- CHANGES | 24 + configure.ad | 23 +- distdir/common | 7 +- lib/configure.ad | 3 - lib/silcmath/Makefile.ad | 41 +- lib/silcmath/modinv.c | 39 +- lib/silcmath/mp_gmp.c | 12 +- lib/silcmath/mp_tfm.c | 246 + lib/silcmath/mp_tfm.h | 27 + lib/silcmath/{mp_mpi.c => mp_tma.c} | 90 +- lib/silcmath/{mp_mpi.h => mp_tma.h} | 18 +- lib/silcmath/mpbin.c | 11 +- lib/silcmath/silcmp.h | 83 +- lib/silcmath/silcprimegen.c | 217 +- lib/silcmath/tfm.c | 5481 ++++++++++++++++ lib/silcmath/tfm.h | 362 ++ lib/silcmath/tma.c | 9048 +++++++++++++++++++++++++++ lib/silcmath/tma.h | 582 ++ lib/silcske/silcske.c | 210 +- 19 files changed, 16155 insertions(+), 369 deletions(-) create mode 100644 lib/silcmath/mp_tfm.c create mode 100644 lib/silcmath/mp_tfm.h rename lib/silcmath/{mp_mpi.c => mp_tma.c} (74%) rename lib/silcmath/{mp_mpi.h => mp_tma.h} (60%) create mode 100644 lib/silcmath/tfm.c create mode 100644 lib/silcmath/tfm.h create mode 100644 lib/silcmath/tma.c create mode 100644 lib/silcmath/tma.h diff --git a/CHANGES b/CHANGES index baa76976..c2915fcc 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,27 @@ +Sun Apr 24 12:01:37 EEST 2005 Pekka Riikonen + + * Optimized primer number generator to use simpler conversion + routines. Also assure that the prime number will have the + highest bit set after modifying it. Affected file is + lib/silcmath/silcprimegen.c. + + * Added LibTomMath to SILC Math library (SILC_DIST_TMA). + + * Added FastTomMath to SILC Math Library (SILC_DIST_TFM), + for now only for testing purposes, it will not be delivered + in any distribution for now. + + * Removed NSS MPI from the source tree. Due to upcoming + license change for the SILC Toolkit, we cannot deliver the + NSS MPI anymore. I decided to replace it in all distributions + with public domain library. + + * Removed GNU regex from lib/contrib and introduced free + GNU compatible regex, lib/contrib/regexpr.[ch]. Added + GNU regex compatible API to it (only partial). + + * Removed getopt routines from lib/contrib. + Fri Apr 22 12:21:44 EEST 2005 Pekka Riikonen * Use silc_server_send_command_reply to send replies instead diff --git a/configure.ad b/configure.ad index b7affd6f..b9ff4180 100644 --- a/configure.ad +++ b/configure.ad @@ -165,7 +165,6 @@ AC_CHECK_FUNC(getopt_long, have_getopt_long=1 ], have_getopt_long=0 ) -AM_CONDITIONAL(HAVE_GETOPT_LONG, test x$have_getopt_long = x1) ## ## Enable/disable checking @@ -282,6 +281,18 @@ AC_DEFUN([SILC_ADD_CFLAGS], unset tmp_CFLAGS ]) +# Function to check if compiler flag works, destination specifiable +# Usage: SILC_ADD_CC_FLAGS(VAR, FLAGS, [ACTION-IF-FAILED]) +AC_DEFUN([SILC_ADD_CC_FLAGS], +[ tmp_CFLAGS="$1_CFLAGS" + $1_CFLAGS="${$1_CFLAGS} $2" + AC_MSG_CHECKING(whether $CC accepts $2 flag) + AC_TRY_LINK([], [], [AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no) + $1_CFLAGS="$tmp_CFLAGS" + $3]) + unset tmp_CFLAGS +]) + if test "$GCC"; then # GCC specific options if test "x$summary_debug" = "xyes"; then @@ -647,10 +658,10 @@ AC_ARG_WITH(gmp, ) AM_CONDITIONAL(SILC_MP_GMP, test x$mp_gmp = xtrue) -AM_CONDITIONAL(SILC_MP_NSS_MPI, test x$mp_gmp = xfalse) +AM_CONDITIONAL(SILC_MP_SILCMATH, test x$mp_gmp = xfalse) if test x$mp_gmp = xfalse; then - AC_DEFINE([SILC_MP_NSS_MPI], [], [MPI]) - AC_MSG_RESULT(Using NSS MPI as a MP library.) + AC_DEFINE([SILC_MP_SILCMAP], [], [SILCMATH]) + AC_MSG_RESULT(Using SILC Math as a MP library.) fi #endif SILC_DIST_MATH @@ -1224,11 +1235,9 @@ fi echo " Assembler optimizations .......: $summary_asm" #ifdef SILC_DIST_MATH -mp="MPI" if test x$mp_gmp = xtrue; then - mp="GMP" + echo " Arithmetic library ............: GMP" fi -echo " Arithmetic library ............: $mp" #endif SILC_DIST_MATH threads="no" diff --git a/distdir/common b/distdir/common index 3fa00057..95441651 100644 --- a/distdir/common +++ b/distdir/common @@ -5,7 +5,10 @@ define SILC_DIST_LIB define SILC_DIST_INCLUDES define SILC_DIST_DOC define SILC_DIST_SIM -define SILC_DIST_MPI -define SILC_DIST_MATH define SILC_DIST_SFTP define SILC_DIST_COMPILER + +# Math library, define only TMA or TFM, not both. +define SILC_DIST_MATH +define SILC_DIST_TMA +#define SILC_DIST_TFM diff --git a/lib/configure.ad b/lib/configure.ad index ff9e2a3c..b77ba968 100644 --- a/lib/configure.ad +++ b/lib/configure.ad @@ -125,9 +125,6 @@ lib/silcutil/win32/Makefile lib/silcutil/beos/Makefile lib/silcutil/os2/Makefile lib/silcutil/epoc/Makefile -#ifdef SILC_DIST_MATH -lib/silcmath/Makefile -#endif SILC_DIST_MATH #ifdef SILC_DIST_SFTP lib/silcsftp/Makefile #endif SILC_DIST_SFTP diff --git a/lib/silcmath/Makefile.ad b/lib/silcmath/Makefile.ad index a65c9d88..d352c1f8 100644 --- a/lib/silcmath/Makefile.ad +++ b/lib/silcmath/Makefile.ad @@ -17,20 +17,35 @@ AUTOMAKE_OPTIONS = 1.0 no-dependencies foreign -if SILC_MP_NSS_MPI -SUBDIRS = mpi -else -SUBDIRS = -endif +noinst_LTLIBRARIES = libsilcmath.la -DIST_SUBDIRS = mpi +if SILC_MP_SILCMATH +MP_SOURCE = \ +#ifdef SILC_DIST_TMA + mp_tma.c \ + tma.c \ +#endif SILC_DIST_TMA +#ifdef SILC_DIST_TFM + mp_tfm.c \ + tfm.c +#endif SILC_DIST_TFM -noinst_LTLIBRARIES = libsilcmath.la +MP_HEADER = \ +#ifdef SILC_DIST_TMA + mp_tma.h \ + tma.h \ + tma_class.h \ + tma_superclass.h \ +#endif SILC_DIST_TMA +#ifdef SILC_DIST_TFM + mp_tfm.h \ + tfm.h +#endif SILC_DIST_TFM -if SILC_MP_NSS_MPI -MP_SOURCE = mp_mpi.c +AM_CFLAGS = @MATH_CFLAGS@ else MP_SOURCE = mp_gmp.c +MP_HEADER = endif libsilcmath_la_SOURCES = \ @@ -40,15 +55,13 @@ libsilcmath_la_SOURCES = \ $(MP_SOURCE) #ifdef SILC_DIST_TOOLKIT -MP_HEADER = mpi/mpi.h mpi/mplogic.h mpi/mpi-config.h include_HEADERS = \ mp_gmp.h \ - mp_mpi.h \ + $(MP_HEADER) \ silcmath.h \ - silcmp.h \ - $(MP_HEADER) + silcmp.h #endif SILC_DIST_TOOLKIT -EXTRA_DIST = mp_gmp.c mp_mpi.c *.h +EXTRA_DIST = $(MP_SOURCE) $(MP_HEADER) mp_gmp.c mp_gmp.h include $(top_srcdir)/Makefile.defines.in diff --git a/lib/silcmath/modinv.c b/lib/silcmath/modinv.c index 7863923e..5c6e0657 100644 --- a/lib/silcmath/modinv.c +++ b/lib/silcmath/modinv.c @@ -2,15 +2,14 @@ modinv.h - Author: Pekka Riikonen + Author: Pekka Riikonen - Copyright (C) 1997 - 2000 Pekka Riikonen + Copyright (C) 1997 - 2005 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - + the Free Software Foundation; version 2 of the License. + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -29,21 +28,21 @@ typedef struct { #define plus1 (i == 2 ? 0 : i + 1) #define minus1 (i == 0 ? 2 : i - 1) -/* Find multiplicative inverse using Euclid's extended algorithm. - Computes inverse such that a * inv mod n = 1, where 0 < a < n. +/* Find multiplicative inverse using Euclid's extended algorithm. + Computes inverse such that a * inv mod n = 1, where 0 < a < n. Algorithm goes like this: - + g(0) = n v(0) = 0 g(1) = a v(1) = 1 - + y = g(i-1) / g(i) g(i+1) = g(i-1) - y * g(i) = g(i)-1 mod g(i) v(i+1) = v(i-1) - y * v(i) - - do until g(i) = 0, then inverse = v(i-1). If inverse is negative then n, - is added to inverse making it positive again. (Sometimes the algorithm - has a variable u defined too and it behaves just like v, except that - initalize values are swapped (i.e. u(0) = 1, u(1) = 0). However, u is + + do until g(i) = 0, then inverse = v(i-1). If inverse is negative then n, + is added to inverse making it positive again. (Sometimes the algorithm + has a variable u defined too and it behaves just like v, except that + initalize values are swapped (i.e. u(0) = 1, u(1) = 0). However, u is not needed by the algorithm so it does not have to be included.) */ @@ -52,10 +51,10 @@ void silc_mp_modinv(SilcMPInt *inv, SilcMPInt *a, SilcMPInt *n) int i; SilcMPInt y; SilcMPInt x; - + ModInv g[3]; ModInv v[3]; - + /* init MP vars */ silc_mp_init(&y); silc_mp_init(&x); @@ -69,7 +68,7 @@ void silc_mp_modinv(SilcMPInt *inv, SilcMPInt *a, SilcMPInt *n) silc_mp_set(&g[0].x, n); /* g(0) = n */ silc_mp_set(&g[1].x, a); /* g(1) = a */ silc_mp_init(&g[2].x); - + i = 1; while(silc_mp_cmp_ui(&g[i].x, 0) != 0) { silc_mp_div(&y, &g[minus1].x, &g[i].x); /* y = n / a */ @@ -79,14 +78,14 @@ void silc_mp_modinv(SilcMPInt *inv, SilcMPInt *a, SilcMPInt *n) silc_mp_sub(&v[plus1].x, &v[plus1].x, &x); i = plus1; } - + /* set the inverse */ silc_mp_set(inv, &v[minus1].x); - + /* if inverse is negative, add n to inverse */ if (silc_mp_cmp_ui(inv, 0) < 0) silc_mp_add(inv, inv, n); - + /* clear the vars */ memset(&g, 0, sizeof(g)); memset(&v, 0, sizeof(v)); diff --git a/lib/silcmath/mp_gmp.c b/lib/silcmath/mp_gmp.c index 9d62bac4..87629649 100644 --- a/lib/silcmath/mp_gmp.c +++ b/lib/silcmath/mp_gmp.c @@ -1,10 +1,10 @@ /* - mp_gmp.c + mp_gmp.c Author: Pekka Riikonen - Copyright (C) 2001 Pekka Riikonen + Copyright (C) 2001 - 2005 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -121,7 +121,7 @@ void silc_mp_div_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) mpz_div_ui(dst, mp1, ui); } -void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, +void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, SilcMPInt *mp2) { if (q && r) @@ -137,7 +137,7 @@ void silc_mp_div_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp) mpz_fdiv_q_2exp(dst, mp1, exp); } -void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, +void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, SilcUInt32 exp) { if (q) @@ -172,13 +172,13 @@ void silc_mp_pow_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp) mpz_pow_ui(dst, mp1, exp); } -void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp, +void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp, SilcMPInt *mod) { mpz_powm(dst, mp1, exp, mod); } -void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp, +void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp, SilcMPInt *mod) { mpz_powm_ui(dst, mp1, exp, mod); diff --git a/lib/silcmath/mp_tfm.c b/lib/silcmath/mp_tfm.c new file mode 100644 index 00000000..1abbf507 --- /dev/null +++ b/lib/silcmath/mp_tfm.c @@ -0,0 +1,246 @@ +/* + + mp_tfm.c + + Author: Pekka Riikonen + + Copyright (C) 2005 Pekka Riikonen + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + +*/ +/* $Id$ */ + +#include "silcincludes.h" +#include "mp_tfm.h" + +void silc_mp_init(SilcMPInt *mp) +{ + fp_init(mp); +} + +void silc_mp_uninit(SilcMPInt *mp) +{ + fp_zero(mp); +} + +size_t silc_mp_size(SilcMPInt *mp) +{ + return fp_unsigned_bin_size(mp); +} + +size_t silc_mp_sizeinbase(SilcMPInt *mp, int base) +{ + int size = 0; + fp_radix_size(mp, base, &size); + if (size > 1) + size--; + return size; +} + +void silc_mp_set(SilcMPInt *dst, SilcMPInt *src) +{ + fp_copy(src, dst); +} + +void silc_mp_set_ui(SilcMPInt *dst, SilcUInt32 ui) +{ + fp_set(dst, ui); +} + +void silc_mp_set_si(SilcMPInt *dst, SilcInt32 si) +{ + fp_set(dst, si); +} + +void silc_mp_set_str(SilcMPInt *dst, const char *str, int base) +{ + fp_read_radix(dst, str, base); +} + +SilcUInt32 silc_mp_get_ui(SilcMPInt *mp) +{ + SILC_NOT_IMPLEMENTED("silc_mp_get_ui"); + assert(FALSE); +} + +char *silc_mp_get_str(char *str, SilcMPInt *mp, int base) +{ + if (fp_toradix(mp, str, base) != MP_OKAY) + return NULL; + return str; +} + +void silc_mp_add(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) +{ + fp_add(mp1, mp2, dst); +} + +void silc_mp_add_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) +{ + mp_add_d(mp1, (mp_digit)ui, dst); +} + +void silc_mp_sub(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) +{ + fp_sub(mp1, mp2, dst); +} + +void silc_mp_sub_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) +{ + fp_sub_d(mp1, (mp_digit)ui, dst); +} + +void silc_mp_mul(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) +{ + fp_mul(mp1, mp2, dst); +} + +void silc_mp_mul_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) +{ + fp_mul_d(mp1, (mp_digit)ui, dst); +} + +void silc_mp_mul_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp) +{ + fp_mul_2d(mp1, exp, dst); +} + +void silc_mp_sqrt(SilcMPInt *dst, SilcMPInt *src) +{ + fp_sqrt(src, dst); +} + +void silc_mp_div(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) +{ + fp_div(mp1, mp2, dst, NULL); +} + +void silc_mp_div_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) +{ + fp_div_d(mp1, (mp_digit)ui, dst, NULL); +} + +void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, + SilcMPInt *mp2) +{ + fp_div(mp1, mp2, q, r); +} + +void silc_mp_div_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp) +{ + fp_div_2d(mp1, exp, dst, NULL); +} + +void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, + SilcUInt32 exp) +{ + fp_div_2d(mp1, exp, q, r); +} + +void silc_mp_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) +{ + fp_mod(mp1, mp2, dst); +} + +void silc_mp_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) +{ + fp_digit d; + fp_mod_d(mp1, ui, &d); + silc_mp_set_ui(dst, d); +} + +void silc_mp_mod_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) +{ + fp_mod_2d(mp1, ui, dst); +} + +void silc_mp_pow(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp) +{ + SILC_NOT_IMPLEMENTED("silc_mp_pow"); + assert(FALSE); +} + +void silc_mp_pow_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp) +{ + SILC_NOT_IMPLEMENTED("silc_mp_pow_ui"); + assert(FALSE); +} + +void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp, + SilcMPInt *mod) +{ + fp_exptmod(mp1, exp, mod, dst); +} + +void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp, + SilcMPInt *mod) +{ + SilcMPInt tmp; + silc_mp_init(&tmp); + silc_mp_set_ui(&tmp, exp); + silc_mp_pow_mod(dst, mp1, &tmp, mod); + silc_mp_uninit(&tmp); +} + +void silc_mp_gcd(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) +{ + fp_gcd(mp1, mp2, dst); +} + +void silc_mp_gcdext(SilcMPInt *g, SilcMPInt *s, SilcMPInt *t, SilcMPInt *mp1, + SilcMPInt *mp2) +{ + SILC_NOT_IMPLEMENTED("silc_mp_gcdext"); + assert(FALSE); +} + +int silc_mp_cmp(SilcMPInt *mp1, SilcMPInt *mp2) +{ + return fp_cmp(mp1, mp2); +} + +int silc_mp_cmp_si(SilcMPInt *mp1, SilcInt32 si) +{ + return fp_cmp_d(mp1, si); +} + +int silc_mp_cmp_ui(SilcMPInt *mp1, SilcUInt32 ui) +{ + return fp_cmp_d(mp1, ui); +} + +void silc_mp_abs(SilcMPInt *dst, SilcMPInt *src) +{ + fp_abs(src, dst); +} + +void silc_mp_neg(SilcMPInt *dst, SilcMPInt *src) +{ + fp_neg(src, dst); +} + +void silc_mp_and(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) +{ + SILC_NOT_IMPLEMENTED("silc_mp_and"); + assert(FALSE); +} + +void silc_mp_or(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) +{ + SILC_NOT_IMPLEMENTED("silc_mp_or"); + assert(FALSE); +} + +void silc_mp_xor(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) +{ + SILC_NOT_IMPLEMENTED("silc_mp_xor"); + assert(FALSE); +} diff --git a/lib/silcmath/mp_tfm.h b/lib/silcmath/mp_tfm.h new file mode 100644 index 00000000..176cd1cb --- /dev/null +++ b/lib/silcmath/mp_tfm.h @@ -0,0 +1,27 @@ +/* + + mp_tfm.h + + Author: Pekka Riikonen + + Copyright (C) 2005 Pekka Riikonen + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + +*/ + +#ifndef MP_TFM_H +#define MP_TFM_H + +#include "tfm.h" + +#define SILC_MP_INT fp_int + +#endif /* MP_TFM_H */ diff --git a/lib/silcmath/mp_mpi.c b/lib/silcmath/mp_tma.c similarity index 74% rename from lib/silcmath/mp_mpi.c rename to lib/silcmath/mp_tma.c index 8ef0a40b..ba9f2120 100644 --- a/lib/silcmath/mp_mpi.c +++ b/lib/silcmath/mp_tma.c @@ -1,10 +1,10 @@ /* - mp_mpi.c + mp_tma.c Author: Pekka Riikonen - Copyright (C) 2001 Pekka Riikonen + Copyright (C) 2005 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,8 +19,7 @@ /* $Id$ */ #include "silcincludes.h" -#include "mpi.h" -#include "mplogic.h" +#include "mp_tma.h" void silc_mp_init(SilcMPInt *mp) { @@ -29,22 +28,21 @@ void silc_mp_init(SilcMPInt *mp) void silc_mp_uninit(SilcMPInt *mp) { - (void)mp_clear(mp); + mp_clear(mp); } size_t silc_mp_size(SilcMPInt *mp) { - return mp_raw_size(mp); + return mp_unsigned_bin_size(mp); } size_t silc_mp_sizeinbase(SilcMPInt *mp, int base) { - size_t sib = mp_unsigned_octet_size(mp); - sib = s_mp_outlen(sib * CHAR_BIT, base); - if (sib > 2) - sib -= 2; /* Looks like MPI returns extra zero - bytes for C-strings. */ - return sib; + int size = 0; + mp_radix_size(mp, base, &size); + if (size > 1) + size--; + return size; } void silc_mp_set(SilcMPInt *dst, SilcMPInt *src) @@ -54,7 +52,7 @@ void silc_mp_set(SilcMPInt *dst, SilcMPInt *src) void silc_mp_set_ui(SilcMPInt *dst, SilcUInt32 ui) { - mp_set(dst, ui); + (void)mp_set_int(dst, ui); } void silc_mp_set_si(SilcMPInt *dst, SilcInt32 si) @@ -64,12 +62,12 @@ void silc_mp_set_si(SilcMPInt *dst, SilcInt32 si) void silc_mp_set_str(SilcMPInt *dst, const char *str, int base) { - (void)mp_read_variable_radix(dst, str, base); + (void)mp_read_radix(dst, str, base); } SilcUInt32 silc_mp_get_ui(SilcMPInt *mp) { - return (SilcUInt32)MP_DIGIT(mp, 0); + return (SilcUInt32)mp_get_int(mp); } char *silc_mp_get_str(char *str, SilcMPInt *mp, int base) @@ -86,7 +84,7 @@ void silc_mp_add(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) void silc_mp_add_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) { - mp_add_d(mp1, ui, dst); + mp_add_d(mp1, (mp_digit)ui, dst); } void silc_mp_sub(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) @@ -111,11 +109,7 @@ void silc_mp_mul_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) void silc_mp_mul_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp) { - SilcMPInt tmp; - silc_mp_init(&tmp); - (void)mp_2expt(&tmp, (mp_digit)exp); - (void)mp_mul(mp1, &tmp, dst); - silc_mp_uninit(&tmp); + (void)mp_mul_2d(mp1, exp, dst); } void silc_mp_sqrt(SilcMPInt *dst, SilcMPInt *src) @@ -133,7 +127,7 @@ void silc_mp_div_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) (void)mp_div_d(mp1, (mp_digit)ui, dst, NULL); } -void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, +void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, SilcMPInt *mp2) { (void)mp_div(mp1, mp2, q, r); @@ -141,20 +135,13 @@ void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, void silc_mp_div_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp) { - SilcMPInt tmp; - silc_mp_init(&tmp); - (void)mp_2expt(&tmp, (mp_digit)exp); - (void)mp_div(mp1, &tmp, dst, NULL); - silc_mp_uninit(&tmp); + (void)mp_div_2d(mp1, exp, dst, NULL); } -void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, +void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, SilcUInt32 exp) { - if (q) { - (void)mp_2expt(q, (mp_digit)exp); - (void)mp_div(mp1, q, q, r); - } + (void)mp_div_2d(mp1, exp, q, r); } void silc_mp_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) @@ -164,23 +151,20 @@ void silc_mp_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) void silc_mp_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) { - mp_digit uidst; - (void)mp_mod_d(mp1, (mp_digit)ui, &uidst); - mp_set(dst, uidst); + mp_digit d; + (void)mp_mod_d(mp1, ui, &d); + silc_mp_set_ui(dst, d); } void silc_mp_mod_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui) { - SilcMPInt tmp; - silc_mp_init(&tmp); - (void)mp_2expt(&tmp, (mp_digit)ui); - (void)mp_mod(mp1, &tmp, dst); - silc_mp_uninit(&tmp); + (void)mp_mod_2d(mp1, ui, dst); } void silc_mp_pow(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp) { - (void)mp_expt(mp1, exp, dst); + SILC_NOT_IMPLEMENTED("silc_mp_pow"); + assert(FALSE); } void silc_mp_pow_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp) @@ -188,16 +172,20 @@ void silc_mp_pow_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp) (void)mp_expt_d(mp1, (mp_digit)exp, dst); } -void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp, +void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp, SilcMPInt *mod) { (void)mp_exptmod(mp1, exp, mod, dst); } -void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp, +void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp, SilcMPInt *mod) { - (void)mp_exptmod_d(mp1, (mp_digit)exp, mod, dst); + SilcMPInt tmp; + silc_mp_init(&tmp); + silc_mp_set_ui(&tmp, exp); + silc_mp_pow_mod(dst, mp1, &tmp, mod); + silc_mp_uninit(&tmp); } void silc_mp_gcd(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) @@ -208,7 +196,7 @@ void silc_mp_gcd(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) void silc_mp_gcdext(SilcMPInt *g, SilcMPInt *s, SilcMPInt *t, SilcMPInt *mp1, SilcMPInt *mp2) { - (void)mp_xgcd(mp1, mp2, g, s, t); + (void)mp_exteuclid(mp1, mp2, s, t, g); } int silc_mp_cmp(SilcMPInt *mp1, SilcMPInt *mp2) @@ -218,7 +206,7 @@ int silc_mp_cmp(SilcMPInt *mp1, SilcMPInt *mp2) int silc_mp_cmp_si(SilcMPInt *mp1, SilcInt32 si) { - return mp_cmp_int(mp1, (long)si); + return mp_cmp_d(mp1, si); } int silc_mp_cmp_ui(SilcMPInt *mp1, SilcUInt32 ui) @@ -228,25 +216,25 @@ int silc_mp_cmp_ui(SilcMPInt *mp1, SilcUInt32 ui) void silc_mp_abs(SilcMPInt *dst, SilcMPInt *src) { - mp_abs(src, dst); + (void)mp_abs(src, dst); } void silc_mp_neg(SilcMPInt *dst, SilcMPInt *src) { - mp_neg(src, dst); + (void)mp_neg(src, dst); } void silc_mp_and(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) { - mpl_and(mp1, mp2, dst); + (void)mp_and(mp1, mp2, dst); } void silc_mp_or(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) { - mpl_or(mp1, mp2, dst); + (void)mp_or(mp1, mp2, dst); } void silc_mp_xor(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2) { - mpl_xor(mp1, mp2, dst); + (void)mp_xor(mp1, mp2, dst); } diff --git a/lib/silcmath/mp_mpi.h b/lib/silcmath/mp_tma.h similarity index 60% rename from lib/silcmath/mp_mpi.h rename to lib/silcmath/mp_tma.h index 54614a30..05d11552 100644 --- a/lib/silcmath/mp_mpi.h +++ b/lib/silcmath/mp_tma.h @@ -1,16 +1,15 @@ /* - mp_mpi.h + mp_tma.h - Author: Pekka Riikonen + Author: Pekka Riikonen - Copyright (C) 2001 Pekka Riikonen + Copyright (C) 2005 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - + the Free Software Foundation; version 2 of the License. + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -18,11 +17,10 @@ */ -#ifndef MP_MPI_H -#define MP_MPI_H +#ifndef MP_TMA_H +#define MP_TMA_H -#include "mpi.h" -#include "mplogic.h" +#include "tma.h" #define SILC_MP_INT mp_int diff --git a/lib/silcmath/mpbin.c b/lib/silcmath/mpbin.c index 29d80f77..9df0dbef 100644 --- a/lib/silcmath/mpbin.c +++ b/lib/silcmath/mpbin.c @@ -2,15 +2,14 @@ mpbin.c - Author: Pekka Riikonen + Author: Pekka Riikonen - Copyright (C) 2000 - 2001 Pekka Riikonen + Copyright (C) 2000 - 2005 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - + the Free Software Foundation; version 2 of the License. + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -35,7 +34,7 @@ unsigned char *silc_mp_mp2bin(SilcMPInt *val, SilcUInt32 len, size = (len ? len : ((silc_mp_sizeinbase(val, 2) + 7) / 8)); ret = silc_calloc(size, sizeof(*ret)); - + silc_mp_init(&tmp); silc_mp_set(&tmp, val); diff --git a/lib/silcmath/silcmp.h b/lib/silcmath/silcmp.h index ba18ae35..ccc15a67 100644 --- a/lib/silcmath/silcmp.h +++ b/lib/silcmath/silcmp.h @@ -1,21 +1,20 @@ /* silcmp.h - + Author: Pekka Riikonen - - Copyright (C) 1997 - 2001 Pekka Riikonen - + + Copyright (C) 1997 - 2005 Pekka Riikonen + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - + the Free Software Foundation; version 2 of the License. + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + */ /****h* silcmath/SILC MP Interface @@ -23,11 +22,10 @@ * DESCRIPTION * * SILC MP Library Interface. This interface defines the arbitrary - * precision arithmetic routines for SILC. Currently the actual routines - * are implemented separately, usually by some other MP library. The - * interface is generic but is mainly intended for crypto usage. This - * interface is used by SILC routines that needs big numbers, such as - * RSA implementation, Diffie-Hellman implementation etc. + * precision arithmetic routines for SILC. The interface is generic but + * is mainly intended for crypto usage. This interface is used by SILC + * routines that needs big numbers, such as RSA implementation, + * Diffie-Hellman implementation etc. * ***/ @@ -37,7 +35,12 @@ #if defined(SILC_MP_GMP) #include "mp_gmp.h" /* SILC_MP_GMP */ #else -#include "mp_mpi.h" /* SILC_MP_NSS_MPI */ +#ifdef SILC_DIST_TMA +#include "mp_tma.h" +#endif /* SILC_DIST_TMA */ +#ifdef SILC_DIST_TFM +#include "mp_tfm.h" +#endif /* SILC_DIST_TFM */ #endif /****d* silcmath/SilcMPAPI/SilcMPInt @@ -303,7 +306,7 @@ void silc_mp_mul_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui); * * DESCRIPTION * - * Multiply integers `mp1' with 2 ** `exp' and save the result to + * Multiply integers `mp1' with 2 ** `exp' and save the result to * `dst'. This is equivalent to dst = mp1 * (2 ^ exp). * ***/ @@ -354,18 +357,18 @@ void silc_mp_div_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui); * * SYNOPSIS * - * void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, + * void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, * SilcMPInt *mp2); * * DESCRIPTION * * Divide the `mp1' and `mp2' and save the quotient to the `q' and - * the remainder to the `r'. This is equivalent to the q = mp1 / mp2, + * the remainder to the `r'. This is equivalent to the q = mp1 / mp2, * r = mp1 mod mp2 (or mp1 = mp2 * q + r). If the `q' or `r' is NULL * then the operation is omitted. * ***/ -void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, +void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, SilcMPInt *mp2); /****f* silcmath/SilcMPAPI/silc_mp_div_2exp @@ -386,7 +389,7 @@ void silc_mp_div_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp); * * SYNOPSIS * - * void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, + * void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, * SilcUInt32 exp); * * DESCRIPTION @@ -397,7 +400,7 @@ void silc_mp_div_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp); * is omitted. * ***/ -void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, +void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1, SilcUInt32 exp); /****f* silcmath/SilcMPAPI/silc_mp_mod @@ -424,7 +427,7 @@ void silc_mp_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2); * * DESCRIPTION * - * Mathematical MOD function. Produces the remainder of `mp1' and + * Mathematical MOD function. Produces the remainder of `mp1' and * unsigned word `ui' and saves the result to `dst'. This is equivalent * to dst = mp1 mod ui. * @@ -479,7 +482,7 @@ void silc_mp_pow_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp); * * SYNOPSIS * - * void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp, + * void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp, * SilcMPInt *mod); * * DESCRIPTION @@ -488,14 +491,14 @@ void silc_mp_pow_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp); * This is equivalent to dst = (mp1 ^ exp) mod mod. * ***/ -void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp, +void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp, SilcMPInt *mod); /****f* silcmath/SilcMPAPI/silc_mp_pow_mod_ui * * SYNOPSIS * - * void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp, + * void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp, * SilcMPInt *mod); * * DESCRIPTION @@ -504,7 +507,7 @@ void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp, * This is equivalent to dst = (mp1 ^ exp) mod mod. * ***/ -void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp, +void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp, SilcMPInt *mod); /****f* silcmath/SilcMPAPI/silc_mp_modinv @@ -515,21 +518,21 @@ void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp, * * DESCRIPTION * - * Find multiplicative inverse using Euclid's extended algorithm. - * Computes inverse such that a * inv mod n = 1, where 0 < a < n. + * Find multiplicative inverse using Euclid's extended algorithm. + * Computes inverse such that a * inv mod n = 1, where 0 < a < n. * Algorithm goes like this: - * + * * g(0) = n v(0) = 0 * g(1) = a v(1) = 1 - * + * * y = g(i-1) / g(i) * g(i+1) = g(i-1) - y * g(i) = g(i)-1 mod g(i) * v(i+1) = v(i-1) - y * v(i) - * - * do until g(i) = 0, then inverse = v(i-1). If inverse is negative then n, - * is added to inverse making it positive again. (Sometimes the algorithm - * has a variable u defined too and it behaves just like v, except that - * initalize values are swapped (i.e. u(0) = 1, u(1) = 0). However, u is + * + * do until g(i) = 0, then inverse = v(i-1). If inverse is negative then n, + * is added to inverse making it positive again. (Sometimes the algorithm + * has a variable u defined too and it behaves just like v, except that + * initalize values are swapped (i.e. u(0) = 1, u(1) = 0). However, u is * not needed by the algorithm so it does not have to be included.) * ***/ @@ -553,7 +556,7 @@ void silc_mp_gcd(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2); * * SYNOPSIS * - * void silc_mp_gcdext(SilcMPInt *g, SilcMPInt *s, SilcMPInt *t, + * void silc_mp_gcdext(SilcMPInt *g, SilcMPInt *s, SilcMPInt *t, * SilcMPInt *mp1, SilcMPInt *mp2); * * DESCRIPTION @@ -601,8 +604,8 @@ int silc_mp_cmp_si(SilcMPInt *mp1, SilcInt32 si); * * DESCRIPTION * - * Compare `mp1' and unsigned word `ui'. Returns posivite, zero, or - * negative if `mp1' > `ui', `mp1' == `ui', or `mp1' < `ui', + * Compare `mp1' and unsigned word `ui'. Returns posivite, zero, or + * negative if `mp1' > `ui', `mp1' == `ui', or `mp1' < `ui', * respectively. * ***/ @@ -612,7 +615,7 @@ int silc_mp_cmp_ui(SilcMPInt *mp1, SilcUInt32 ui); * * SYNOPSIS * - * unsigned char *silc_mp_mp2bin(SilcMPInt *val, SilcUInt32 len, + * unsigned char *silc_mp_mp2bin(SilcMPInt *val, SilcUInt32 len, * SilcUInt32 *ret_len); * * DESCRIPTION @@ -622,7 +625,7 @@ int silc_mp_cmp_ui(SilcMPInt *mp1, SilcUInt32 ui); * buffer is allocated that large. If zero then the size is approximated. * ***/ -unsigned char *silc_mp_mp2bin(SilcMPInt *val, SilcUInt32 len, +unsigned char *silc_mp_mp2bin(SilcMPInt *val, SilcUInt32 len, SilcUInt32 *ret_len); /****f* silcmath/SilcMPAPI/silc_mp_mp2bin_noalloc @@ -645,7 +648,7 @@ void silc_mp_mp2bin_noalloc(SilcMPInt *val, unsigned char *dst, * * SYNOPSIS * - * void silc_mp_bin2mp(unsigned char *data, SilcUInt32 len, + * void silc_mp_bin2mp(unsigned char *data, SilcUInt32 len, * SilcMPInt *ret); * * DESCRIPTION diff --git a/lib/silcmath/silcprimegen.c b/lib/silcmath/silcprimegen.c index 7079f7d1..80bbf54e 100644 --- a/lib/silcmath/silcprimegen.c +++ b/lib/silcmath/silcprimegen.c @@ -1,16 +1,15 @@ /* silcprimegen.c - - Author: Pekka Riikonen - Copyright (C) 1997 - 2000 Pekka Riikonen + Author: Pekka Riikonen + + Copyright (C) 1997 - 2005 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - + the Free Software Foundation; version 2 of the License. + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -22,17 +21,17 @@ #include "silcincludes.h" -/* - Fixed primetable for small prime division. We use this primetable to - test if possible prime is divisible any of these. Primetable is NULL - terminated. This same primetable can be found in SSH and PGP except that - SSH don't have prime 2 in the table. This sort of prime table is easily +/* + Fixed primetable for small prime division. We use this primetable to + test if possible prime is divisible any of these. Primetable is NULL + terminated. This same primetable can be found in SSH and PGP except that + SSH don't have prime 2 in the table. This sort of prime table is easily created, for example, using sieve of Erastosthenes. - - Just to include; if somebody is interested about Erastosthenes. Creating a - small prime table using sieve of Erastosthenes would go like this: Write + + Just to include; if somebody is interested about Erastosthenes. Creating a + small prime table using sieve of Erastosthenes would go like this: Write down a list of integers in range of 2 to n. Here in example n = 20. - + 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 Then mark all multiples of 2 (Note: 2 is a prime number): @@ -46,9 +45,9 @@ x x x x x x x x x x x And continue doing this, marking all multiples of the next unmarked - number until there are now new unmarked numbers. And there you have a + number until there are now new unmarked numbers. And there you have a prime table. So in this example, primes between 2 - 20 are: - + 2 3 5 7 11 13 17 19 */ @@ -187,10 +186,10 @@ static SilcUInt32 primetable[] = }; -/* Find appropriate prime. It generates a number by taking random bytes. - It then tests the number that it's not divisible by any of the small - primes and then it performs Fermat's prime test. I thank Rieks Joosten - (r.joosten@pijnenburg.nl) for such a good help with prime tests. +/* Find appropriate prime. It generates a number by taking random bytes. + It then tests the number that it's not divisible by any of the small + primes and then it performs Fermat's prime test. I thank Rieks Joosten + (r.joosten@pijnenburg.nl) for such a good help with prime tests. If argument verbose is TRUE this will display some status information about the progress of generation. */ @@ -202,6 +201,7 @@ bool silc_math_gen_prime(SilcMPInt *prime, SilcUInt32 bits, bool verbose, SilcUInt32 i, b, k; SilcUInt32 *spmods; SilcMPInt r, base, tmp, tmp2, oprime; + bool valid = FALSE; silc_mp_init(&r); silc_mp_init(&base); @@ -213,92 +213,99 @@ bool silc_math_gen_prime(SilcMPInt *prime, SilcUInt32 bits, bool verbose, SILC_LOG_DEBUG(("Generating new prime")); - /* Get random number and assure that the first digit is not zero since - our conversion routines does not like the first digit being zero. */ - do { - if (numbuf) { - memset(numbuf, 0, (bits / 8)); - silc_free(numbuf); - } + while (valid == FALSE) { + /* Get random number */ if (rng) - numbuf = silc_rng_get_rn_string(rng, (bits / 8)); + numbuf = silc_rng_get_rn_data(rng, (bits / 8)); else - numbuf = silc_rng_global_get_rn_string((bits / 8)); + numbuf = silc_rng_global_get_rn_data((bits / 8)); if (!numbuf) return FALSE; - } while (numbuf[0] == '0'); - - /* Convert into MP and set the size */ - silc_mp_set_str(prime, numbuf, 16); - silc_mp_mod_2exp(prime, prime, bits); - - /* Empty buffer */ - memset(numbuf, 0, (bits / 8)); - silc_free(numbuf); - /* Number could be even number, so we'll make it odd. */ - silc_mp_set_ui(&tmp, 1); - silc_mp_or(prime, prime, &tmp); /* OR operator */ + /* Convert into MP and set the size */ + silc_mp_bin2mp(numbuf, (bits / 8), prime); + silc_mp_mod_2exp(prime, prime, bits); + + /* Empty buffer */ + memset(numbuf, 0, (bits / 8)); + silc_free(numbuf); + + /* Set highest bit */ + silc_mp_set_ui(&tmp, 1); + silc_mp_mul_2exp(&tmp, &tmp, bits - 1); + silc_mp_or(prime, prime, &tmp); + + /* Number could be even number, so we'll make it odd. */ + silc_mp_set_ui(&tmp, 1); + silc_mp_or(prime, prime, &tmp); + + /* Init modulo table with the prime candidate and the primes + in the primetable. */ + spmods = silc_calloc(1, sizeof(primetable) * sizeof(SilcUInt32)); + for (i = 0; primetable[i] != 0; i++) { + silc_mp_mod_ui(&tmp, prime, primetable[i]); + spmods[i] = silc_mp_get_ui(&tmp); + } - /* Init modulo table with the prime candidate and the primes - in the primetable. */ - spmods = silc_calloc(1, sizeof(primetable) * sizeof(SilcUInt32)); - for (i = 0; primetable[i] != 0; i++) { - silc_mp_mod_ui(&tmp, prime, primetable[i]); - spmods[i] = silc_mp_get_ui(&tmp); - } + /* k is added by 2, this way we skip all even numbers (prime is odd). */ + silc_mp_set(&oprime, prime); + for (k = 0;; k += 2) { + silc_mp_add_ui(&oprime, prime, k); + + /* See if the prime has a divisor in primetable[]. + * If it has then it's a composite. We add k to the + * original modulo value, k is added by 2 on every roll. + * This way we don't have to re-init the whole table if + * the number is composite. + */ + for (b = 0; b < i; b++) { + silc_mp_set_ui(&tmp2, spmods[b]); + silc_mp_add_ui(&tmp2, &tmp2, k); + silc_mp_mod_ui(&tmp, &tmp2, primetable[b]); + + /* If mod is 0, the number is composite */ + if (silc_mp_cmp_ui(&tmp, 0) == 0) + break; + } + if (b < i) + continue; + + /* Passed the quick test. */ + + /* Does the prime pass the Fermat's prime test. + * r = 2 ^ p mod p, if r == 2, then p is probably a prime. + */ + silc_mp_pow_mod(&r, &base, &oprime, &oprime); + if (silc_mp_cmp_ui(&r, 2) != 0) { + if (verbose) { + printf("."); + fflush(stdout); + } + continue; + } - /* k is added by 2, this way we skip all even numbers (prime is odd). */ - silc_mp_set(&oprime, prime); - for (k = 0;; k += 2) { - silc_mp_add_ui(&oprime, prime, k); - - /* See if the prime has a divisor in primetable[]. - * If it has then it's a composite. We add k to the - * original modulo value, k is added by 2 on every roll. - * This way we don't have to re-init the whole table if - * the number is composite. - */ - for (b = 0; b < i; b++) { - silc_mp_set_ui(&tmp2, spmods[b]); - silc_mp_add_ui(&tmp2, &tmp2, k); - silc_mp_mod_ui(&tmp, &tmp2, primetable[b]); - - /* If mod is 0, the number is composite */ - if (silc_mp_cmp_ui(&tmp, 0) == 0) - break; + /* Passed the Fermat's test. And I don't think + * we have to do more tests. If anyone wants to do more + * tests, MP library has probability of prime test: + * + * if (silc_mp_probab_prime_p(prime, 25)) + * break; found a probable prime + * + * However, this is very slow. + */ + /* XXX: this could be done if some argument, say strict_primes, is + TRUE when we are willing to spend more time on the prime test and + to get, perhaps, better primes. */ + silc_mp_set(prime, &oprime); + break; } - if (b < i) - continue; - - /* Passed the quick test. */ - - /* Does the prime pass the Fermat's prime test. - * r = 2 ^ p mod p, if r == 2, then p is probably a prime. - */ - silc_mp_pow_mod(&r, &base, &oprime, &oprime); - if (silc_mp_cmp_ui(&r, 2) != 0) { - if (verbose) { - printf("."); - fflush(stdout); - } - continue; + + /* Check highest bit */ + silc_mp_div_2exp(&tmp, prime, bits - 1); + if (silc_mp_get_ui(&tmp) == 1) { + valid = TRUE; + break; } - - /* Passed the Fermat's test. And I don't think - * we have to do more tests. If anyone wants to do more - * tests, MP library has probability of prime test: - * - * if (silc_mp_probab_prime_p(prime, 25)) - * break; found a probable prime - * - * However, this is very slow. - */ - /* XXX: this could be done if some argument, say strict_primes, is - TRUE when we are willing to spend more time on the prime test and - to get, perhaps, better primes. */ - silc_mp_set(prime, &oprime); - break; } silc_free(spmods); @@ -308,34 +315,34 @@ bool silc_math_gen_prime(SilcMPInt *prime, SilcUInt32 bits, bool verbose, silc_mp_uninit(&tmp2); silc_mp_uninit(&oprime); - return TRUE; + return valid; } -/* Performs primality testings for given number. Returns TRUE if the +/* Performs primality testings for given number. Returns TRUE if the number is probably a prime. */ bool silc_math_prime_test(SilcMPInt *p) { SilcMPInt r, base, tmp; int i, ret = 0; - + silc_mp_init(&r); silc_mp_init(&tmp); silc_mp_init(&base); silc_mp_set_ui(&base, 2); - + SILC_LOG_DEBUG(("Testing probability of prime")); /* See if the number is divisible by any of the small primes in primetable[]. */ for (i = 0; primetable[i] != 0; i++) { silc_mp_mod_ui(&tmp, p, primetable[i]); - + /* If mod is 0, the number is composite */ if (silc_mp_cmp_ui(&tmp, 0) == 0) ret = -1; } - + /* Does the prime pass the Fermat's prime test. * r = 2 ^ p mod p, if r == 2, then p is probably a prime. */ @@ -346,7 +353,7 @@ bool silc_math_prime_test(SilcMPInt *p) silc_mp_uninit(&r); silc_mp_uninit(&tmp); silc_mp_uninit(&base); - + if (ret) return FALSE; diff --git a/lib/silcmath/tfm.c b/lib/silcmath/tfm.c new file mode 100644 index 00000000..b99a32f0 --- /dev/null +++ b/lib/silcmath/tfm.c @@ -0,0 +1,5481 @@ +/* Start: fp_2expt.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* computes a = 2**b */ +void fp_2expt(fp_int *a, int b) +{ + int z; + + /* zero a as per default */ + fp_zero (a); + + if (b < 0) { + return; + } + + z = b / DIGIT_BIT; + if (z >= FP_SIZE) { + return; + } + + /* set the used count of where the bit will go */ + a->used = z + 1; + + /* put the single bit in its place */ + a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT); +} + + +/* End: fp_2expt.c */ + +/* Start: fp_add.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_add(fp_int *a, fp_int *b, fp_int *c) +{ + int sa, sb; + + /* get sign of both inputs */ + sa = a->sign; + sb = b->sign; + + /* handle two cases, not four */ + if (sa == sb) { + /* both positive or both negative */ + /* add their magnitudes, copy the sign */ + c->sign = sa; + s_fp_add (a, b, c); + } else { + /* one positive, the other negative */ + /* subtract the one with the greater magnitude from */ + /* the one of the lesser magnitude. The result gets */ + /* the sign of the one with the greater magnitude. */ + if (fp_cmp_mag (a, b) == FP_LT) { + c->sign = sb; + s_fp_sub (b, a, c); + } else { + c->sign = sa; + s_fp_sub (a, b, c); + } + } +} + +/* End: fp_add.c */ + +/* Start: fp_add_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a + b */ +void fp_add_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_int tmp; + fp_set(&tmp, b); + fp_add(a,&tmp,c); +} + +/* End: fp_add_d.c */ + +/* Start: fp_addmod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* d = a + b (mod c) */ +int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int tmp; + fp_zero(&tmp); + fp_add(a, b, &tmp); + return fp_mod(&tmp, c, d); +} + +/* End: fp_addmod.c */ + +/* Start: fp_cmp.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_cmp(fp_int *a, fp_int *b) +{ + if (a->sign == FP_NEG && b->sign == FP_ZPOS) { + return FP_LT; + } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) { + return FP_GT; + } else { + /* compare digits */ + if (a->sign == FP_NEG) { + /* if negative compare opposite direction */ + return fp_cmp_mag(b, a); + } else { + return fp_cmp_mag(a, b); + } + } +} + +/* End: fp_cmp.c */ + +/* Start: fp_cmp_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* compare against a single digit */ +int fp_cmp_d(fp_int *a, fp_digit b) +{ + /* compare based on sign */ + if ((b && a->used == 0) || a->sign == FP_NEG) { + return FP_LT; + } + + /* compare based on magnitude */ + if (a->used > 1) { + return FP_GT; + } + + /* compare the only digit of a to b */ + if (a->dp[0] > b) { + return FP_GT; + } else if (a->dp[0] < b) { + return FP_LT; + } else { + return FP_EQ; + } + +} + +/* End: fp_cmp_d.c */ + +/* Start: fp_cmp_mag.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_cmp_mag(fp_int *a, fp_int *b) +{ + int x; + + if (a->used > b->used) { + return FP_GT; + } else if (a->used < b->used) { + return FP_LT; + } else { + for (x = a->used - 1; x >= 0; x--) { + if (a->dp[x] > b->dp[x]) { + return FP_GT; + } else if (a->dp[x] < b->dp[x]) { + return FP_LT; + } + } + } + return FP_EQ; +} + + +/* End: fp_cmp_mag.c */ + +/* Start: fp_cnt_lsb.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +static const int lnz[16] = { + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +/* Counts the number of lsbs which are zero before the first zero bit */ +int fp_cnt_lsb(fp_int *a) +{ + int x; + fp_digit q, qq; + + /* easy out */ + if (fp_iszero(a) == 1) { + return 0; + } + + /* scan lower digits until non-zero */ + for (x = 0; x < a->used && a->dp[x] == 0; x++); + q = a->dp[x]; + x *= DIGIT_BIT; + + /* now scan this digit until a 1 is found */ + if ((q & 1) == 0) { + do { + qq = q & 15; + x += lnz[qq]; + q >>= 4; + } while (qq == 0); + } + return x; +} + + +/* End: fp_cnt_lsb.c */ + +/* Start: fp_count_bits.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_count_bits (fp_int * a) +{ + int r; + fp_digit q; + + /* shortcut */ + if (a->used == 0) { + return 0; + } + + /* get number of digits and add that */ + r = (a->used - 1) * DIGIT_BIT; + + /* take the last digit and count the bits in it */ + q = a->dp[a->used - 1]; + while (q > ((fp_digit) 0)) { + ++r; + q >>= ((fp_digit) 1); + } + return r; +} + +/* End: fp_count_bits.c */ + +/* Start: fp_div.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* a/b => cb + d == a */ +int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int q, x, y, t1, t2; + int n, t, i, norm, neg; + + /* is divisor zero ? */ + if (fp_iszero (b) == 1) { + return FP_VAL; + } + + /* if a < b then q=0, r = a */ + if (fp_cmp_mag (a, b) == FP_LT) { + if (d != NULL) { + fp_copy (a, d); + } + if (c != NULL) { + fp_zero (c); + } + return FP_OKAY; + } + + fp_init(&q); + q.used = a->used + 2; + + fp_init(&t1); + fp_init(&t2); + fp_init_copy(&x, a); + fp_init_copy(&y, b); + + /* fix the sign */ + neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG; + x.sign = y.sign = FP_ZPOS; + + /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ + norm = fp_count_bits(&y) % DIGIT_BIT; + if (norm < (int)(DIGIT_BIT-1)) { + norm = (DIGIT_BIT-1) - norm; + fp_mul_2d (&x, norm, &x); + fp_mul_2d (&y, norm, &y); + } else { + norm = 0; + } + + /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ + n = x.used - 1; + t = y.used - 1; + + /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ + fp_lshd (&y, n - t); /* y = y*b**{n-t} */ + + while (fp_cmp (&x, &y) != FP_LT) { + ++(q.dp[n - t]); + fp_sub (&x, &y, &x); + } + + /* reset y by shifting it back down */ + fp_rshd (&y, n - t); + + /* step 3. for i from n down to (t + 1) */ + for (i = n; i >= (t + 1); i--) { + if (i > x.used) { + continue; + } + + /* step 3.1 if xi == yt then set q{i-t-1} to b-1, + * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ + if (x.dp[i] == y.dp[t]) { + q.dp[i - t - 1] = ((((fp_word)1) << DIGIT_BIT) - 1); + } else { + fp_word tmp; + tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT); + tmp |= ((fp_word) x.dp[i - 1]); + tmp /= ((fp_word) y.dp[t]); + q.dp[i - t - 1] = (fp_digit) (tmp); + } + + /* while (q{i-t-1} * (yt * b + y{t-1})) > + xi * b**2 + xi-1 * b + xi-2 + + do q{i-t-1} -= 1; + */ + q.dp[i - t - 1] = (q.dp[i - t - 1] + 1); + do { + q.dp[i - t - 1] = (q.dp[i - t - 1] - 1); + + /* find left hand */ + fp_zero (&t1); + t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; + t1.dp[1] = y.dp[t]; + t1.used = 2; + fp_mul_d (&t1, q.dp[i - t - 1], &t1); + + /* find right hand */ + t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; + t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; + t2.dp[2] = x.dp[i]; + t2.used = 3; + } while (fp_cmp_mag(&t1, &t2) == FP_GT); + + /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ + fp_mul_d (&y, q.dp[i - t - 1], &t1); + fp_lshd (&t1, i - t - 1); + fp_sub (&x, &t1, &x); + + /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ + if (x.sign == FP_NEG) { + fp_copy (&y, &t1); + fp_lshd (&t1, i - t - 1); + fp_add (&x, &t1, &x); + q.dp[i - t - 1] = q.dp[i - t - 1] - 1; + } + } + + /* now q is the quotient and x is the remainder + * [which we have to normalize] + */ + + /* get sign before writing to c */ + x.sign = x.used == 0 ? FP_ZPOS : a->sign; + + if (c != NULL) { + fp_clamp (&q); + fp_copy (&q, c); + c->sign = neg; + } + + if (d != NULL) { + fp_div_2d (&x, norm, &x, NULL); + +/* the following is a kludge, essentially we were seeing the right remainder but + with excess digits that should have been zero + */ + for (i = b->used; i < x.used; i++) { + x.dp[i] = 0; + } + fp_clamp(&x); + fp_copy (&x, d); + } + + return FP_OKAY; +} + +/* End: fp_div.c */ + +/* Start: fp_div_2.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* b = a/2 */ +void fp_div_2(fp_int * a, fp_int * b) +{ + int x, oldused; + + oldused = b->used; + b->used = a->used; + { + register fp_digit r, rr, *tmpa, *tmpb; + + /* source alias */ + tmpa = a->dp + b->used - 1; + + /* dest alias */ + tmpb = b->dp + b->used - 1; + + /* carry */ + r = 0; + for (x = b->used - 1; x >= 0; x--) { + /* get the carry for the next iteration */ + rr = *tmpa & 1; + + /* shift the current digit, add in carry and store */ + *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); + + /* forward carry to next iteration */ + r = rr; + } + + /* zero excess digits */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; + fp_clamp (b); +} + +/* End: fp_div_2.c */ + +/* Start: fp_div_2d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a / 2**b */ +void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d) +{ + fp_digit D, r, rr; + int x; + fp_int t; + + /* if the shift count is <= 0 then we do no work */ + if (b <= 0) { + fp_copy (a, c); + if (d != NULL) { + fp_zero (d); + } + return; + } + + fp_init(&t); + + /* get the remainder */ + if (d != NULL) { + fp_mod_2d (a, b, &t); + } + + /* copy */ + fp_copy(a, c); + + /* shift by as many digits in the bit count */ + if (b >= (int)DIGIT_BIT) { + fp_rshd (c, b / DIGIT_BIT); + } + + /* shift any bit count < DIGIT_BIT */ + D = (fp_digit) (b % DIGIT_BIT); + if (D != 0) { + register fp_digit *tmpc, mask, shift; + + /* mask */ + mask = (((fp_digit)1) << D) - 1; + + /* shift for lsb */ + shift = DIGIT_BIT - D; + + /* alias */ + tmpc = c->dp + (c->used - 1); + + /* carry */ + r = 0; + for (x = c->used - 1; x >= 0; x--) { + /* get the lower bits of this word in a temp */ + rr = *tmpc & mask; + + /* shift the current word and mix in the carry bits from the previous word */ + *tmpc = (*tmpc >> D) | (r << shift); + --tmpc; + + /* set the carry to the carry bits of the current word found above */ + r = rr; + } + } + fp_clamp (c); + if (d != NULL) { + fp_copy (&t, d); + } +} + +/* End: fp_div_2d.c */ + +/* Start: fp_div_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +static int s_is_power_of_two(fp_digit b, int *p) +{ + int x; + + for (x = 1; x < DIGIT_BIT; x++) { + if (b == (((fp_digit)1)< cb + d == a */ +int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d) +{ + fp_int q; + fp_word w; + fp_digit t; + int ix; + + /* cannot divide by zero */ + if (b == 0) { + return FP_VAL; + } + + /* quick outs */ + if (b == 1 || fp_iszero(a) == 1) { + if (d != NULL) { + *d = 0; + } + if (c != NULL) { + fp_copy(a, c); + } + return FP_OKAY; + } + + /* power of two ? */ + if (s_is_power_of_two(b, &ix) == 1) { + if (d != NULL) { + *d = a->dp[0] & ((((fp_digit)1)<used; + q.sign = a->sign; + w = 0; + for (ix = a->used - 1; ix >= 0; ix--) { + w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]); + + if (w >= b) { + t = (fp_digit)(w / b); + w -= ((fp_word)t) * ((fp_word)b); + } else { + t = 0; + } + q.dp[ix] = (fp_digit)t; + } + + if (d != NULL) { + *d = (fp_digit)w; + } + + if (c != NULL) { + fp_clamp(&q); + fp_copy(&q, c); + } + + return FP_OKAY; +} + + +/* End: fp_div_d.c */ + +/* Start: fp_exptmod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* y = g**x (mod b) + * Some restrictions... x must be positive and < b + */ +static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) +{ + fp_int M[64], res; + fp_digit buf, mp; + int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; + + /* find window size */ + x = fp_count_bits (X); + if (x <= 7) { + winsize = 2; + } else if (x <= 36) { + winsize = 3; + } else if (x <= 140) { + winsize = 4; + } else if (x <= 450) { + winsize = 5; + } else { + winsize = 6; + } + + /* init M array */ + memset(M, 0, sizeof(M)); + + /* now setup montgomery */ + if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) { + return err; + } + + /* setup result */ + fp_init(&res); + + /* create M table + * + * The M table contains powers of the input base, e.g. M[x] = G^x mod P + * + * The first half of the table is not computed though accept for M[0] and M[1] + */ + + /* now we need R mod m */ + fp_montgomery_calc_normalization (&res, P); + + /* now set M[1] to G * R mod m */ + if (fp_cmp_mag(P, G) != FP_GT) { + /* G > P so we reduce it first */ + fp_mod(G, P, &M[1]); + } else { + fp_copy(G, &M[1]); + } + fp_mulmod (&M[1], &res, P, &M[1]); + + /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ + fp_copy (&M[1], &M[1 << (winsize - 1)]); + for (x = 0; x < (winsize - 1); x++) { + fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]); + fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp); + } + + /* create upper table */ + for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { + fp_mul(&M[x - 1], &M[1], &M[x]); + fp_montgomery_reduce(&M[x], P, mp); + } + + /* set initial mode and bit cnt */ + mode = 0; + bitcnt = 1; + buf = 0; + digidx = X->used - 1; + bitcpy = 0; + bitbuf = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (fp_digit)1; + + /* if the bit is zero and mode == 0 then we ignore it + * These represent the leading zero bits before the first 1 bit + * in the exponent. Technically this opt is not required but it + * does lower the # of trivial squaring/reductions used + */ + if (mode == 0 && y == 0) { + continue; + } + + /* if the bit is zero and mode == 1 then we square */ + if (mode == 1 && y == 0) { + fp_sqr(&res, &res); + fp_montgomery_reduce(&res, P, mp); + continue; + } + + /* else we add it to the window */ + bitbuf |= (y << (winsize - ++bitcpy)); + mode = 2; + + if (bitcpy == winsize) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < winsize; x++) { + fp_sqr(&res, &res); + fp_montgomery_reduce(&res, P, mp); + } + + /* then multiply */ + fp_mul(&res, &M[bitbuf], &res); + fp_montgomery_reduce(&res, P, mp); + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + mode = 1; + } + } + + /* if bits remain then square/multiply */ + if (mode == 2 && bitcpy > 0) { + /* square then multiply if the bit is set */ + for (x = 0; x < bitcpy; x++) { + fp_sqr(&res, &res); + fp_montgomery_reduce(&res, P, mp); + + /* get next bit of the window */ + bitbuf <<= 1; + if ((bitbuf & (1 << winsize)) != 0) { + /* then multiply */ + fp_mul(&res, &M[1], &res); + fp_montgomery_reduce(&res, P, mp); + } + } + } + + /* fixup result if Montgomery reduction is used + * recall that any value in a Montgomery system is + * actually multiplied by R mod n. So we have + * to reduce one more time to cancel out the factor + * of R. + */ + fp_montgomery_reduce(&res, P, mp); + + /* swap res with Y */ + fp_copy (&res, Y); + return FP_OKAY; +} + + +int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) +{ + fp_int tmp; + int err; + + /* is X negative? */ + if (X->sign == FP_NEG) { + /* yes, copy G and invmod it */ + fp_copy(G, &tmp); + if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) { + return err; + } + X->sign = FP_ZPOS; + err = _fp_exptmod(&tmp, X, P, Y); + X->sign = FP_NEG; + return err; + } else { + /* Positive exponent so just exptmod */ + return _fp_exptmod(G, X, P, Y); + } +} + +/* End: fp_exptmod.c */ + +/* Start: fp_gcd.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = (a, b) */ +void fp_gcd(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int u, v, r; + + /* either zero than gcd is the largest */ + if (fp_iszero (a) == 1 && fp_iszero (b) == 0) { + fp_abs (b, c); + return; + } + if (fp_iszero (a) == 0 && fp_iszero (b) == 1) { + fp_abs (a, c); + return; + } + + /* optimized. At this point if a == 0 then + * b must equal zero too + */ + if (fp_iszero (a) == 1) { + fp_zero(c); + return; + } + + /* sort inputs */ + if (fp_cmp_mag(a, b) != FP_LT) { + fp_init_copy(&u, a); + fp_init_copy(&v, b); + } else { + fp_init_copy(&u, b); + fp_init_copy(&v, a); + } + + fp_zero(&r); + while (fp_iszero(&v) == FP_NO) { + fp_mod(&u, &v, &r); + fp_copy(&v, &u); + fp_copy(&r, &v); + } + fp_copy(&u, c); +} + +/* End: fp_gcd.c */ + +/* Start: fp_ident.c */ +#include "tfm.h" + +const char *fp_ident(void) +{ + static char buf[1024]; + + memset(buf, 0, sizeof(buf)); + snprintf(buf, sizeof(buf)-1, +"TomsFastMath (%s)\n" +"\n" +"Sizeofs\n" +"\tfp_digit = %u\n" +"\tfp_word = %u\n" +"\n" +"FP_MAX_SIZE = %u\n" +"\n" +"Defines: \n" +#ifdef __i386__ +" __i386__ " +#endif +#ifdef __x86_64__ +" __x86_64__ " +#endif +#ifdef TFM_X86 +" TFM_X86 " +#endif +#ifdef TFM_X86_64 +" TFM_X86_64 " +#endif +#ifdef TFM_SSE2 +" TFM_SSE2 " +#endif +#ifdef TFM_ARM +" TFM_ARM " +#endif +#ifdef TFM_NO_ASM +" TFM_NO_ASM " +#endif +#ifdef FP_64BIT +" FP_64BIT " +#endif +#ifdef TFM_LARGE +" TFM_LARGE " +#endif +#ifdef TFM_HUGE +" TFM_HUGE " +#endif +"\n", __DATE__, sizeof(fp_digit), sizeof(fp_word), FP_MAX_SIZE); + + if (sizeof(fp_digit) == sizeof(fp_word)) { + strncat(buf, "WARNING: sizeof(fp_digit) == sizeof(fp_word), this build is likely to not work properly.\n", + sizeof(buf)-1); + } + return buf; +} + +#ifdef STANDALONE + +int main(void) +{ + printf("%s\n", fp_ident()); + return 0; +} + +#endif + + +/* End: fp_ident.c */ + +/* Start: fp_invmod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = 1/a (mod b) for odd b only */ +int fp_invmod(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int x, y, u, v, B, D; + int neg; + + /* 2. [modified] b must be odd */ + if (fp_iseven (b) == FP_YES) { + return FP_VAL; + } + + /* init all our temps */ + fp_init(&x); fp_init(&y); + fp_init(&u); fp_init(&v); + fp_init(&B); fp_init(&D); + + /* x == modulus, y == value to invert */ + fp_copy(b, &x); + + /* we need y = |a| */ + fp_abs(a, &y); + + /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ + fp_copy(&x, &u); + fp_copy(&y, &v); + fp_set (&D, 1); + +top: + /* 4. while u is even do */ + while (fp_iseven (&u) == FP_YES) { + /* 4.1 u = u/2 */ + fp_div_2 (&u, &u); + + /* 4.2 if B is odd then */ + if (fp_isodd (&B) == FP_YES) { + fp_sub (&B, &x, &B); + } + /* B = B/2 */ + fp_div_2 (&B, &B); + } + + /* 5. while v is even do */ + while (fp_iseven (&v) == FP_YES) { + /* 5.1 v = v/2 */ + fp_div_2 (&v, &v); + + /* 5.2 if D is odd then */ + if (fp_isodd (&D) == FP_YES) { + /* D = (D-x)/2 */ + fp_sub (&D, &x, &D); + } + /* D = D/2 */ + fp_div_2 (&D, &D); + } + + /* 6. if u >= v then */ + if (fp_cmp (&u, &v) != FP_LT) { + /* u = u - v, B = B - D */ + fp_sub (&u, &v, &u); + fp_sub (&B, &D, &B); + } else { + /* v - v - u, D = D - B */ + fp_sub (&v, &u, &v); + fp_sub (&D, &B, &D); + } + + /* if not zero goto step 4 */ + if (fp_iszero (&u) == FP_NO) { + goto top; + } + + /* now a = C, b = D, gcd == g*v */ + + /* if v != 1 then there is no inverse */ + if (fp_cmp_d (&v, 1) != FP_EQ) { + return FP_VAL; + } + + /* b is now the inverse */ + neg = a->sign; + while (D.sign == FP_NEG) { + fp_add (&D, b, &D); + } + fp_copy (&D, c); + c->sign = neg; + return FP_OKAY; +} + +/* End: fp_invmod.c */ + +/* Start: fp_isprime.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* a few primes */ +static const fp_digit primes[256] = { + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, + + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 +}; + +int fp_isprime(fp_int *a) +{ + fp_int b; + fp_digit d; + int r, res; + + /* do trial division */ + for (r = 0; r < 256; r++) { + fp_mod_d(a, primes[r], &d); + if (d == 0) { + return FP_NO; + } + } + + /* now do 8 miller rabins */ + for (r = 0; r < 8; r++) { + fp_set(&b, primes[r]); + fp_prime_miller_rabin(a, &b, &res); + if (res == FP_NO) { + return FP_NO; + } + } + return FP_YES; +} + +/* End: fp_isprime.c */ + +/* Start: fp_lcm.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = [a, b] */ +void fp_lcm(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int t1, t2; + + fp_init(&t1); + fp_init(&t2); + fp_gcd(a, b, &t1); + if (fp_cmp_mag(a, b) == FP_GT) { + fp_div(a, &t1, &t2, NULL); + fp_mul(b, &t2, c); + } else { + fp_div(b, &t1, &t2, NULL); + fp_mul(a, &t2, c); + } +} + +/* End: fp_lcm.c */ + +/* Start: fp_lshd.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_lshd(fp_int *a, int x) +{ + int y; + + /* move up and truncate as required */ + y = MIN(a->used + x - 1, (int)(FP_SIZE-1)); + + /* store new size */ + a->used = y + 1; + + /* move digits */ + for (; y >= x; y--) { + a->dp[y] = a->dp[y-x]; + } + + /* zero lower digits */ + for (; y >= 0; y--) { + a->dp[y] = 0; + } + + /* clamp digits */ + fp_clamp(a); +} + +/* End: fp_lshd.c */ + +/* Start: fp_mod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a mod b, 0 <= c < b */ +int fp_mod(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int t; + int err; + + fp_zero(&t); + if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) { + return err; + } + if (t.sign != b->sign) { + fp_add(&t, b, c); + } else { + fp_copy(&t, c); + } + return FP_OKAY; +} + + + +/* End: fp_mod.c */ + +/* Start: fp_mod_2d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a mod 2**d */ +void fp_mod_2d(fp_int *a, int b, fp_int *c) +{ + int x; + + /* zero if count less than or equal to zero */ + if (b <= 0) { + fp_zero(c); + return; + } + + /* get copy of input */ + fp_copy(a, c); + + /* if 2**d is larger than we just return */ + if (b >= (DIGIT_BIT * a->used)) { + return; + } + + /* zero digits above the last digit of the modulus */ + for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { + c->dp[x] = 0; + } + /* clear the digit that is not completely outside/inside the modulus */ + c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b); + fp_clamp (c); +} + +/* End: fp_mod_2d.c */ + +/* Start: fp_mod_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a mod b, 0 <= c < b */ +int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c) +{ + return fp_div_d(a, b, NULL, c); +} + +/* End: fp_mod_d.c */ + +/* Start: fp_montgomery_calc_normalization.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* computes a = B**n mod b without division or multiplication useful for + * normalizing numbers in a Montgomery system. + */ +void fp_montgomery_calc_normalization(fp_int *a, fp_int *b) +{ + int x, bits; + + /* how many bits of last digit does b use */ + bits = fp_count_bits (b) % DIGIT_BIT; + + /* compute A = B^(n-1) * 2^(bits-1) */ + if (b->used > 1) { + fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1); + } else { + fp_set(a, 1); + bits = 1; + } + + /* now compute C = A * B mod b */ + for (x = bits - 1; x < (int)DIGIT_BIT; x++) { + fp_mul_2 (a, a); + if (fp_cmp_mag (a, b) != FP_LT) { + s_fp_sub (a, b, a); + } + } +} + + +/* End: fp_montgomery_calc_normalization.c */ + +/* Start: fp_montgomery_reduce.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +#if defined(TFM_X86) + +/* x86-32 code */ + +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +#define INNERMUL \ +asm( \ +"movl %7,%%eax \n\t" \ +"mull %6 \n\t" \ +"addl %%eax,%0 \n\t" \ +"adcl %%edx,%1 \n\t" \ +"adcl $0,%2 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "g"(mu), "g"(*tmpm++) \ + : "%eax", "%edx", "%cc"); + +#define PROPCARRY \ +asm( \ +"movl %1,%%eax \n\t" \ +"addl %%eax,%6 \n\t" \ +"movl %2,%%eax \n\t" \ +"adcl %%eax,%7 \n\t" \ +"adcl $0,%8 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \ +: "%eax", "%cc"); + +#elif defined(TFM_X86_64) +/* x86-64 code */ + +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +#define INNERMUL \ +asm( \ +"movq %7,%%rax \n\t" \ +"mulq %6 \n\t" \ +"addq %%rax,%0 \n\t" \ +"adcq %%rdx,%1 \n\t" \ +"adcq $0,%2 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "g"(mu), "g"(*tmpm++) \ + : "%rax", "%rdx", "%cc"); + +#define PROPCARRY \ +asm( \ +"movq %1,%%rax \n\t" \ +"movq %2,%%rbx \n\t" \ +"addq %%rax,%6 \n\t" \ +"adcq %%rbx,%7 \n\t" \ +"adcq $0,%8 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \ +: "%rax", "%rbx", "%cc"); + +#elif defined(TFM_SSE2) + +/* SSE2 code */ + +#define MONT_START \ +asm("movd %0,%%mm2"::"g"(mp)); + +#define MONT_FINI \ +asm("emms"); + +#define LOOP_START \ +asm(\ +"movd %0,%%mm1 \n\t" \ +"pmuludq %%mm2,%%mm1 \n\t" \ +:: "g"(c[x])); + +#define INNERMUL \ +asm( \ +"movd %6,%%mm0 \n\t" \ +"pmuludq %%mm1,%%mm0 \n\t" \ +"movd %%mm0,%%eax \n\t" \ +"psrlq $32, %%mm0 \n\t" \ +"addl %%eax,%0 \n\t" \ +"movd %%mm0,%%eax \n\t" \ +"adcl %%eax,%1 \n\t" \ +"adcl $0,%2 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "g"(*tmpm++) \ + : "%eax", "%cc"); + +#define PROPCARRY \ +asm( \ +"movl %1,%%eax \n\t" \ +"addl %%eax,%6 \n\t" \ +"movl %2,%%eax \n\t" \ +"adcl %%eax,%7 \n\t" \ +"adcl $0,%8 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "g"(_c[OFF0+1]), "g"(_c[OFF1+1]), "g"(_c[OFF2+1]) \ +: "%eax", "%cc"); + +#elif defined(TFM_ARM) + +/* ISO C code */ +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +/* NOTE: later write it using two regs instead of three for _c + ... */ +#define INNERMUL \ +asm( \ +"UMULL r0,r1,%0,%1 \n\t" \ +"LDR r2,[%2] \n\t" \ +"ADDS r2,r2,r0 \n\t" \ +"STR r2,[%2] \n\t" \ +"LDR r2,[%3] \n\t" \ +"ADCS r2,r2,r1 \n\t" \ +"STR r2,[%3] \n\t" \ +"LDR r2,[%4] \n\t" \ +"ADC r2,r2,#0 \n\t" \ +"STR r2,[%4] \n\t" \ +::"r"(mu),"r"(*tmpm++),"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "r2", "%cc"); + +#define PROPCARRY \ +asm( \ +"LDR r0,[%1] \n\t" \ +"LDR r1,[%0,#4] \n\t" \ +"ADDS r0,r0,r1 \n\t" \ +"STR r0,[%0,#4] \n\t" \ +"LDR r0,[%2] \n\t" \ +"LDR r1,[%1,#4] \n\t" \ +"ADCS r0,r0,r1 \n\t" \ +"STR r0,[%1,#4] \n\t" \ +"LDR r0,[%2,#4] \n\t" \ +"ADC r0,r0,#0 \n\t" \ +"STR r0,[%2,#4] \n\t" \ +::"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "%cc"); + +#else + +/* ISO C code */ +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +#define INNERMUL \ + do { fp_word t; \ + t = (fp_word)_c[OFF0] + ((fp_word)mu) * ((fp_word)*tmpm++); _c[OFF0] = t; \ + t = (fp_word)_c[OFF1] + (t >> DIGIT_BIT); _c[OFF1] = t; \ + _c[OFF2] += (t >> DIGIT_BIT); \ + } while (0); + +#define PROPCARRY \ + do { fp_word t; \ + t = (fp_word)_c[OFF0+1] + (fp_word)_c[OFF1]; _c[OFF0+1] = t; \ + t = (fp_word)_c[OFF1+1] + (t >> DIGIT_BIT) + (fp_word)_c[OFF2]; _c[OFF1+1] = t; \ + _c[OFF2+1] += (t >> DIGIT_BIT); \ + } while (0); + +#endif + + +#define OFF0 (0) +#define OFF1 (FP_SIZE) +#define OFF2 (FP_SIZE+FP_SIZE) + +/* computes x/R == x (mod N) via Montgomery Reduction */ +void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) +{ + fp_digit c[3*FP_SIZE], *_c, *tmpm, mu; + int oldused, x, y, pa; + + /* now zero the buff */ + pa = m->used; + memset(c, 0, sizeof(c)); + + /* copy the input */ + oldused = a->used; + for (x = 0; x < oldused; x++) { + c[x] = a->dp[x]; + } + + MONT_START; + + /* now let's get bizz-sy! */ + for (x = 0; x < pa; x++) { + /* get Mu for this round */ + LOOP_START; + + /* our friendly neighbourhood alias */ + _c = c + x; + tmpm = m->dp; + + for (y = 0; y < pa; y++) { + INNERMUL; + ++_c; + } + /* send carry up man... */ + _c = c + x; + PROPCARRY; + } + + /* fix the rest of the carries */ + _c = c + pa; + for (x = pa; x < pa * 2 + 2; x++) { + PROPCARRY; + ++_c; + } + + /* now copy out */ + _c = c + pa; + tmpm = a->dp; + for (x = 0; x < pa+1; x++) { + *tmpm++ = *_c++; + } + + for (; x < oldused; x++) { + *tmpm++ = 0; + } + + MONT_FINI; + + a->used = pa+1; + fp_clamp(a); + + /* if A >= m then A = A - m */ + if (fp_cmp_mag (a, m) != FP_LT) { + s_fp_sub (a, m, a); + } +} + +/* End: fp_montgomery_reduce.c */ + +/* Start: fp_montgomery_setup.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* setups the montgomery reduction */ +int fp_montgomery_setup(fp_int *a, fp_digit *rho) +{ + fp_digit x, b; + +/* fast inversion mod 2**k + * + * Based on the fact that + * + * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) + * => 2*X*A - X*X*A*A = 1 + * => 2*(1) - (1) = 1 + */ + b = a->dp[0]; + + if ((b & 1) == 0) { + return FP_VAL; + } + + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ +#ifdef FP_64BIT + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ +#endif + + /* rho = -1/m mod b */ + *rho = (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x)); + + return FP_OKAY; +} + + +/* End: fp_montgomery_setup.c */ + +/* Start: fp_mul.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * b */ +void fp_mul(fp_int *A, fp_int *B, fp_int *C) +{ + int r, y, yy, s; + fp_int ac, bd, comp, amb, cmd, t1, t2; + + y = MAX(A->used, B->used); + yy = MIN(A->used, B->used); + if (yy <= 8 || y <= 64) { + + /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size + of the largest input. We also want to avoid doing excess mults if the + inputs are not close to the next power of two. That is, for example, + if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications + */ + if (y <= 4) { + fp_mul_comba4(A,B,C); + } else if (y <= 8) { + fp_mul_comba8(A,B,C); +#if defined(TFM_LARGE) + } else if (y <= 16 && y >= 10) { + fp_mul_comba16(A,B,C); +#endif +#if defined(TFM_HUGE) + } else if (y <= 32 && y >= 24) { + fp_mul_comba32(A,B,C); +#endif + } else { + fp_mul_comba(A,B,C); + } + } else { + /* do the karatsuba action + + if A = ab and B = cd for ||a|| = r we need to solve + + ac*r^2 + (-(a-b)(c-d) + ac + bd)*r + bd + + So we solve for the three products then we form the final result with careful shifting + and addition. + +Obvious points of optimization + +- "ac" parts can be memcpy'ed with an offset [all you have to do is zero upto the next 8 digits] +- Similarly the "bd" parts can be memcpy'ed and zeroed to 8 +- + + */ + /* get our value of r */ + r = yy >> 1; + + /* now solve for ac */ +// fp_copy(A, &t1); fp_rshd(&t1, r); + for (s = 0; s < A->used - r; s++) { + t1.dp[s] = A->dp[s+r]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + } + if (A->used >= r) { + t1.used = A->used - r; + } else { + t1.used = 0; + } + t1.sign = A->sign; + +// fp_copy(B, &t2); fp_rshd(&t2, r); + for (s = 0; s < B->used - r; s++) { + t2.dp[s] = B->dp[s+r]; + } + for (; s < FP_SIZE; s++) { + t2.dp[s] = 0; + } + if (B->used >= r) { + t2.used = B->used - r; + } else { + t2.used = 0; + } + t2.sign = B->sign; + + fp_copy(&t1, &amb); fp_copy(&t2, &cmd); + fp_zero(&ac); + fp_mul(&t1, &t2, &ac); + + /* now solve for bd */ +// fp_mod_2d(A, r * DIGIT_BIT, &t1); +// fp_mod_2d(B, r * DIGIT_BIT, &t2); + for (s = 0; s < r; s++) { + t1.dp[s] = A->dp[s]; + t2.dp[s] = B->dp[s]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + t2.dp[s] = 0; + } + t1.used = r; + t2.used = r; + fp_clamp(&t1); + fp_clamp(&t2); + + fp_sub(&amb, &t1, &amb); fp_sub(&cmd, &t2, &cmd); + fp_zero(&bd); + fp_mul(&t1, &t2, &bd); + + /* now get the (a-b)(c-d) term */ + fp_zero(&comp); + fp_mul(&amb, &cmd, &comp); + + /* now solve the system, do the middle term first */ + comp.sign ^= 1; + fp_add(&comp, &ac, &comp); + fp_add(&comp, &bd, &comp); + fp_lshd(&comp, r); + + /* leading term */ + fp_lshd(&ac, r+r); + + /* now sum them together */ + s = A->sign ^ B->sign; + fp_zero(C); + fp_add(&ac, &comp, C); + fp_add(&bd, C, C); + C->sign = C->used ? s : FP_ZPOS; + } +} + + +/* End: fp_mul.c */ + +/* Start: fp_mul_2.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_mul_2(fp_int * a, fp_int * b) +{ + int x, oldused; + + oldused = b->used; + b->used = a->used; + + { + register fp_digit r, rr, *tmpa, *tmpb; + + /* alias for source */ + tmpa = a->dp; + + /* alias for dest */ + tmpb = b->dp; + + /* carry */ + r = 0; + for (x = 0; x < a->used; x++) { + + /* get what will be the *next* carry bit from the + * MSB of the current digit + */ + rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1)); + + /* now shift up this digit, add in the carry [from the previous] */ + *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r); + + /* copy the carry that would be from the source + * digit into the next iteration + */ + r = rr; + } + + /* new leading digit? */ + if (r != 0 && b->used != (FP_SIZE-1)) { + /* add a MSB which is always 1 at this point */ + *tmpb = 1; + ++(b->used); + } + + /* now zero any excess digits on the destination + * that we didn't write to + */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; +} + + +/* End: fp_mul_2.c */ + +/* Start: fp_mul_2d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * 2**d */ +void fp_mul_2d(fp_int *a, int b, fp_int *c) +{ + fp_digit carry, carrytmp, shift; + int x; + + /* copy it */ + fp_copy(a, c); + + /* handle whole digits */ + if (b >= DIGIT_BIT) { + fp_lshd(c, b/DIGIT_BIT); + } + b %= DIGIT_BIT; + + /* shift the digits */ + if (b != 0) { + carry = 0; + shift = DIGIT_BIT - b; + for (x = 0; x < c->used; x++) { + carrytmp = c->dp[x] >> shift; + c->dp[x] = (c->dp[x] << b) + carry; + carry = carrytmp; + } + /* store last carry if room */ + if (carry && x < FP_SIZE) { + c->dp[c->used++] = carry; + } + } + fp_clamp(c); +} + + +/* End: fp_mul_2d.c */ + +/* Start: fp_mul_comba.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ + +/* About this file... + +*/ + +#include + +/* these are the combas. Worship them. */ +#if defined(TFM_X86) +/* Generic x86 optimized code */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ +asm ( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); + +#elif defined(TFM_X86_64) +/* x86-64 optimized */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ +asm ( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); + +#elif defined(TFM_SSE2) +/* use SSE2 optimizations */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI \ + asm("emms"); + +/* this should multiply i and j */ + #define MULADD(i, j) \ + asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "movd %7,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "addl %%eax,%0 \n\t" \ + "movd %%mm0,%%eax \n\t" \ + "adcl %%eax,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc"); + +#elif defined(TFM_ARM) +/* ARM code */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +#define MULADD(i, j) \ +asm( \ +" UMULL r0,r1,%6,%7 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2, %2, #0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); + +#else +/* ISO C code */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +#define MULADD(i, j) \ + do { fp_word t; \ + t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ + t = (fp_word)c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ + } while (0); + +#endif + + +/* generic PxQ multiplier */ +void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) +{ + int ix, iy, iz, tx, ty, pa; + fp_digit c0, c1, c2, *tmpx, *tmpy; + fp_int tmp, *dst; + + COMBA_START; + COMBA_CLEAR; + + /* get size of output and trim */ + pa = A->used + B->used; + if (pa >= FP_SIZE) { + pa = FP_SIZE-1; + } + + if (A == C || B == C) { + fp_zero(&tmp); + dst = &tmp; + } else { + fp_zero(C); + dst = C; + } + + for (ix = 0; ix < pa; ix++) { + /* get offsets into the two bignums */ + ty = MIN(ix, B->used-1); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = A->dp + tx; + tmpy = B->dp + ty; + + /* this is the number of times the loop will iterrate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(A->used-tx, ty+1); + + /* execute loop */ + COMBA_FORWARD; + for (iz = 0; iz < iy; ++iz) { + MULADD(*tmpx++, *tmpy--); + } + + /* store term */ + COMBA_STORE(dst->dp[ix]); + } + /* store final carry */ + COMBA_STORE2(dst->dp[ix]); + COMBA_FINI; + + dst->used = pa; + fp_clamp(dst); + dst->sign = dst->used ? A->sign ^ B->sign : FP_ZPOS; + fp_copy(dst, C); +} + +void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2, at[8]; + + memcpy(at, A->dp, 4 * sizeof(fp_digit)); + memcpy(at+4, B->dp, 4 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[4]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[5]); MULADD(at[1], at[4]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[6]); MULADD(at[1], at[5]); MULADD(at[2], at[4]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); MULADD(at[1], at[6]); MULADD(at[2], at[5]); MULADD(at[3], at[4]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[1], at[7]); MULADD(at[2], at[6]); MULADD(at[3], at[5]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[2], at[7]); MULADD(at[3], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[3], at[7]); + COMBA_STORE(C->dp[6]); + COMBA_STORE2(C->dp[7]); + C->used = 8; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + + +void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2, at[16]; + + memcpy(at, A->dp, 8 * sizeof(fp_digit)); + memcpy(at+8, B->dp, 8 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[8]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); MULADD(at[1], at[8]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); MULADD(at[2], at[8]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); MULADD(at[3], at[8]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); MULADD(at[4], at[8]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); MULADD(at[5], at[8]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); MULADD(at[6], at[8]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); MULADD(at[6], at[9]); MULADD(at[7], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); MULADD(at[7], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); MULADD(at[7], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); MULADD(at[7], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[6], at[15]); MULADD(at[7], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[7], at[15]); + COMBA_STORE(C->dp[14]); + COMBA_STORE2(C->dp[15]); + C->used = 16; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + +#if defined(TFM_LARGE) + +void fp_mul_comba16(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2, at[32]; + + memcpy(at, A->dp, 16 * sizeof(fp_digit)); + memcpy(at+16, B->dp, 16 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[16]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); MULADD(at[13], at[16]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); MULADD(at[14], at[16]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); MULADD(at[14], at[17]); MULADD(at[15], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); MULADD(at[14], at[18]); MULADD(at[15], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); MULADD(at[14], at[19]); MULADD(at[15], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); MULADD(at[15], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[14], at[31]); MULADD(at[15], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[15], at[31]); + COMBA_STORE(C->dp[30]); + COMBA_STORE2(C->dp[31]); + C->used = 32; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + +#endif /* TFM_LARGE */ + +#ifdef TFM_HUGE + +void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2, at[64]; + + memcpy(at, A->dp, 32 * sizeof(fp_digit)); + memcpy(at+32, B->dp, 32 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[32]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); MULADD(at[1], at[32]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[0], at[56]); MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[0], at[57]); MULADD(at[1], at[56]); MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[0], at[58]); MULADD(at[1], at[57]); MULADD(at[2], at[56]); MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[0], at[59]); MULADD(at[1], at[58]); MULADD(at[2], at[57]); MULADD(at[3], at[56]); MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[0], at[60]); MULADD(at[1], at[59]); MULADD(at[2], at[58]); MULADD(at[3], at[57]); MULADD(at[4], at[56]); MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]); MULADD(at[28], at[32]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[0], at[61]); MULADD(at[1], at[60]); MULADD(at[2], at[59]); MULADD(at[3], at[58]); MULADD(at[4], at[57]); MULADD(at[5], at[56]); MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]); MULADD(at[28], at[33]); MULADD(at[29], at[32]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[0], at[62]); MULADD(at[1], at[61]); MULADD(at[2], at[60]); MULADD(at[3], at[59]); MULADD(at[4], at[58]); MULADD(at[5], at[57]); MULADD(at[6], at[56]); MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]); MULADD(at[28], at[34]); MULADD(at[29], at[33]); MULADD(at[30], at[32]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[0], at[63]); MULADD(at[1], at[62]); MULADD(at[2], at[61]); MULADD(at[3], at[60]); MULADD(at[4], at[59]); MULADD(at[5], at[58]); MULADD(at[6], at[57]); MULADD(at[7], at[56]); MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]); MULADD(at[28], at[35]); MULADD(at[29], at[34]); MULADD(at[30], at[33]); MULADD(at[31], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[1], at[63]); MULADD(at[2], at[62]); MULADD(at[3], at[61]); MULADD(at[4], at[60]); MULADD(at[5], at[59]); MULADD(at[6], at[58]); MULADD(at[7], at[57]); MULADD(at[8], at[56]); MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]); MULADD(at[28], at[36]); MULADD(at[29], at[35]); MULADD(at[30], at[34]); MULADD(at[31], at[33]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[2], at[63]); MULADD(at[3], at[62]); MULADD(at[4], at[61]); MULADD(at[5], at[60]); MULADD(at[6], at[59]); MULADD(at[7], at[58]); MULADD(at[8], at[57]); MULADD(at[9], at[56]); MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]); MULADD(at[28], at[37]); MULADD(at[29], at[36]); MULADD(at[30], at[35]); MULADD(at[31], at[34]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[3], at[63]); MULADD(at[4], at[62]); MULADD(at[5], at[61]); MULADD(at[6], at[60]); MULADD(at[7], at[59]); MULADD(at[8], at[58]); MULADD(at[9], at[57]); MULADD(at[10], at[56]); MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]); MULADD(at[28], at[38]); MULADD(at[29], at[37]); MULADD(at[30], at[36]); MULADD(at[31], at[35]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[4], at[63]); MULADD(at[5], at[62]); MULADD(at[6], at[61]); MULADD(at[7], at[60]); MULADD(at[8], at[59]); MULADD(at[9], at[58]); MULADD(at[10], at[57]); MULADD(at[11], at[56]); MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]); MULADD(at[28], at[39]); MULADD(at[29], at[38]); MULADD(at[30], at[37]); MULADD(at[31], at[36]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[5], at[63]); MULADD(at[6], at[62]); MULADD(at[7], at[61]); MULADD(at[8], at[60]); MULADD(at[9], at[59]); MULADD(at[10], at[58]); MULADD(at[11], at[57]); MULADD(at[12], at[56]); MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]); MULADD(at[28], at[40]); MULADD(at[29], at[39]); MULADD(at[30], at[38]); MULADD(at[31], at[37]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[6], at[63]); MULADD(at[7], at[62]); MULADD(at[8], at[61]); MULADD(at[9], at[60]); MULADD(at[10], at[59]); MULADD(at[11], at[58]); MULADD(at[12], at[57]); MULADD(at[13], at[56]); MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]); MULADD(at[28], at[41]); MULADD(at[29], at[40]); MULADD(at[30], at[39]); MULADD(at[31], at[38]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[7], at[63]); MULADD(at[8], at[62]); MULADD(at[9], at[61]); MULADD(at[10], at[60]); MULADD(at[11], at[59]); MULADD(at[12], at[58]); MULADD(at[13], at[57]); MULADD(at[14], at[56]); MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]); MULADD(at[28], at[42]); MULADD(at[29], at[41]); MULADD(at[30], at[40]); MULADD(at[31], at[39]); + COMBA_STORE(C->dp[38]); + /* 39 */ + COMBA_FORWARD; + MULADD(at[8], at[63]); MULADD(at[9], at[62]); MULADD(at[10], at[61]); MULADD(at[11], at[60]); MULADD(at[12], at[59]); MULADD(at[13], at[58]); MULADD(at[14], at[57]); MULADD(at[15], at[56]); MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]); MULADD(at[28], at[43]); MULADD(at[29], at[42]); MULADD(at[30], at[41]); MULADD(at[31], at[40]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[9], at[63]); MULADD(at[10], at[62]); MULADD(at[11], at[61]); MULADD(at[12], at[60]); MULADD(at[13], at[59]); MULADD(at[14], at[58]); MULADD(at[15], at[57]); MULADD(at[16], at[56]); MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]); MULADD(at[28], at[44]); MULADD(at[29], at[43]); MULADD(at[30], at[42]); MULADD(at[31], at[41]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[10], at[63]); MULADD(at[11], at[62]); MULADD(at[12], at[61]); MULADD(at[13], at[60]); MULADD(at[14], at[59]); MULADD(at[15], at[58]); MULADD(at[16], at[57]); MULADD(at[17], at[56]); MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]); MULADD(at[28], at[45]); MULADD(at[29], at[44]); MULADD(at[30], at[43]); MULADD(at[31], at[42]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[11], at[63]); MULADD(at[12], at[62]); MULADD(at[13], at[61]); MULADD(at[14], at[60]); MULADD(at[15], at[59]); MULADD(at[16], at[58]); MULADD(at[17], at[57]); MULADD(at[18], at[56]); MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]); MULADD(at[28], at[46]); MULADD(at[29], at[45]); MULADD(at[30], at[44]); MULADD(at[31], at[43]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[12], at[63]); MULADD(at[13], at[62]); MULADD(at[14], at[61]); MULADD(at[15], at[60]); MULADD(at[16], at[59]); MULADD(at[17], at[58]); MULADD(at[18], at[57]); MULADD(at[19], at[56]); MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); MULADD(at[28], at[47]); MULADD(at[29], at[46]); MULADD(at[30], at[45]); MULADD(at[31], at[44]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[13], at[63]); MULADD(at[14], at[62]); MULADD(at[15], at[61]); MULADD(at[16], at[60]); MULADD(at[17], at[59]); MULADD(at[18], at[58]); MULADD(at[19], at[57]); MULADD(at[20], at[56]); MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); MULADD(at[28], at[48]); MULADD(at[29], at[47]); MULADD(at[30], at[46]); MULADD(at[31], at[45]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[14], at[63]); MULADD(at[15], at[62]); MULADD(at[16], at[61]); MULADD(at[17], at[60]); MULADD(at[18], at[59]); MULADD(at[19], at[58]); MULADD(at[20], at[57]); MULADD(at[21], at[56]); MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); MULADD(at[28], at[49]); MULADD(at[29], at[48]); MULADD(at[30], at[47]); MULADD(at[31], at[46]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[15], at[63]); MULADD(at[16], at[62]); MULADD(at[17], at[61]); MULADD(at[18], at[60]); MULADD(at[19], at[59]); MULADD(at[20], at[58]); MULADD(at[21], at[57]); MULADD(at[22], at[56]); MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); MULADD(at[28], at[50]); MULADD(at[29], at[49]); MULADD(at[30], at[48]); MULADD(at[31], at[47]); + COMBA_STORE(C->dp[46]); + /* 47 */ + COMBA_FORWARD; + MULADD(at[16], at[63]); MULADD(at[17], at[62]); MULADD(at[18], at[61]); MULADD(at[19], at[60]); MULADD(at[20], at[59]); MULADD(at[21], at[58]); MULADD(at[22], at[57]); MULADD(at[23], at[56]); MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); MULADD(at[28], at[51]); MULADD(at[29], at[50]); MULADD(at[30], at[49]); MULADD(at[31], at[48]); + COMBA_STORE(C->dp[47]); + /* 48 */ + COMBA_FORWARD; + MULADD(at[17], at[63]); MULADD(at[18], at[62]); MULADD(at[19], at[61]); MULADD(at[20], at[60]); MULADD(at[21], at[59]); MULADD(at[22], at[58]); MULADD(at[23], at[57]); MULADD(at[24], at[56]); MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); MULADD(at[28], at[52]); MULADD(at[29], at[51]); MULADD(at[30], at[50]); MULADD(at[31], at[49]); + COMBA_STORE(C->dp[48]); + /* 49 */ + COMBA_FORWARD; + MULADD(at[18], at[63]); MULADD(at[19], at[62]); MULADD(at[20], at[61]); MULADD(at[21], at[60]); MULADD(at[22], at[59]); MULADD(at[23], at[58]); MULADD(at[24], at[57]); MULADD(at[25], at[56]); MULADD(at[26], at[55]); MULADD(at[27], at[54]); MULADD(at[28], at[53]); MULADD(at[29], at[52]); MULADD(at[30], at[51]); MULADD(at[31], at[50]); + COMBA_STORE(C->dp[49]); + /* 50 */ + COMBA_FORWARD; + MULADD(at[19], at[63]); MULADD(at[20], at[62]); MULADD(at[21], at[61]); MULADD(at[22], at[60]); MULADD(at[23], at[59]); MULADD(at[24], at[58]); MULADD(at[25], at[57]); MULADD(at[26], at[56]); MULADD(at[27], at[55]); MULADD(at[28], at[54]); MULADD(at[29], at[53]); MULADD(at[30], at[52]); MULADD(at[31], at[51]); + COMBA_STORE(C->dp[50]); + /* 51 */ + COMBA_FORWARD; + MULADD(at[20], at[63]); MULADD(at[21], at[62]); MULADD(at[22], at[61]); MULADD(at[23], at[60]); MULADD(at[24], at[59]); MULADD(at[25], at[58]); MULADD(at[26], at[57]); MULADD(at[27], at[56]); MULADD(at[28], at[55]); MULADD(at[29], at[54]); MULADD(at[30], at[53]); MULADD(at[31], at[52]); + COMBA_STORE(C->dp[51]); + /* 52 */ + COMBA_FORWARD; + MULADD(at[21], at[63]); MULADD(at[22], at[62]); MULADD(at[23], at[61]); MULADD(at[24], at[60]); MULADD(at[25], at[59]); MULADD(at[26], at[58]); MULADD(at[27], at[57]); MULADD(at[28], at[56]); MULADD(at[29], at[55]); MULADD(at[30], at[54]); MULADD(at[31], at[53]); + COMBA_STORE(C->dp[52]); + /* 53 */ + COMBA_FORWARD; + MULADD(at[22], at[63]); MULADD(at[23], at[62]); MULADD(at[24], at[61]); MULADD(at[25], at[60]); MULADD(at[26], at[59]); MULADD(at[27], at[58]); MULADD(at[28], at[57]); MULADD(at[29], at[56]); MULADD(at[30], at[55]); MULADD(at[31], at[54]); + COMBA_STORE(C->dp[53]); + /* 54 */ + COMBA_FORWARD; + MULADD(at[23], at[63]); MULADD(at[24], at[62]); MULADD(at[25], at[61]); MULADD(at[26], at[60]); MULADD(at[27], at[59]); MULADD(at[28], at[58]); MULADD(at[29], at[57]); MULADD(at[30], at[56]); MULADD(at[31], at[55]); + COMBA_STORE(C->dp[54]); + /* 55 */ + COMBA_FORWARD; + MULADD(at[24], at[63]); MULADD(at[25], at[62]); MULADD(at[26], at[61]); MULADD(at[27], at[60]); MULADD(at[28], at[59]); MULADD(at[29], at[58]); MULADD(at[30], at[57]); MULADD(at[31], at[56]); + COMBA_STORE(C->dp[55]); + /* 56 */ + COMBA_FORWARD; + MULADD(at[25], at[63]); MULADD(at[26], at[62]); MULADD(at[27], at[61]); MULADD(at[28], at[60]); MULADD(at[29], at[59]); MULADD(at[30], at[58]); MULADD(at[31], at[57]); + COMBA_STORE(C->dp[56]); + /* 57 */ + COMBA_FORWARD; + MULADD(at[26], at[63]); MULADD(at[27], at[62]); MULADD(at[28], at[61]); MULADD(at[29], at[60]); MULADD(at[30], at[59]); MULADD(at[31], at[58]); + COMBA_STORE(C->dp[57]); + /* 58 */ + COMBA_FORWARD; + MULADD(at[27], at[63]); MULADD(at[28], at[62]); MULADD(at[29], at[61]); MULADD(at[30], at[60]); MULADD(at[31], at[59]); + COMBA_STORE(C->dp[58]); + /* 59 */ + COMBA_FORWARD; + MULADD(at[28], at[63]); MULADD(at[29], at[62]); MULADD(at[30], at[61]); MULADD(at[31], at[60]); + COMBA_STORE(C->dp[59]); + /* 60 */ + COMBA_FORWARD; + MULADD(at[29], at[63]); MULADD(at[30], at[62]); MULADD(at[31], at[61]); + COMBA_STORE(C->dp[60]); + /* 61 */ + COMBA_FORWARD; + MULADD(at[30], at[63]); MULADD(at[31], at[62]); + COMBA_STORE(C->dp[61]); + /* 62 */ + COMBA_FORWARD; + MULADD(at[31], at[63]); + COMBA_STORE(C->dp[62]); + COMBA_STORE2(C->dp[63]); + C->used = 64; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + +#endif + +/* End: fp_mul_comba.c */ + +/* Start: fp_mul_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * b */ +void fp_mul_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_word w; + int x, oldused; + + oldused = c->used; + c->used = a->used; + c->sign = a->sign; + w = 0; + for (x = 0; x < a->used; x++) { + w = ((fp_word)a->dp[x]) * ((fp_word)b) + w; + c->dp[x] = (fp_digit)w; + w = w >> DIGIT_BIT; + } + if (w != 0 && (a->used != FP_SIZE)) { + c->dp[c->used++] = w; + ++x; + } + for (; x < oldused; x++) { + c->dp[x] = 0; + } + fp_clamp(c); +} + + +/* End: fp_mul_d.c */ + +/* Start: fp_mulmod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include +/* d = a * b (mod c) */ +int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int tmp; + fp_zero(&tmp); + fp_mul(a, b, &tmp); + return fp_mod(&tmp, c, d); +} + +/* End: fp_mulmod.c */ + +/* Start: fp_prime_miller_rabin.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* Miller-Rabin test of "a" to the base of "b" as described in + * HAC pp. 139 Algorithm 4.24 + * + * Sets result to 0 if definitely composite or 1 if probably prime. + * Randomly the chance of error is no more than 1/4 and often + * very much lower. + */ +void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result) +{ + fp_int n1, y, r; + int s, j; + + /* default */ + *result = FP_NO; + + /* ensure b > 1 */ + if (fp_cmp_d(b, 1) != FP_GT) { + return; + } + + /* get n1 = a - 1 */ + fp_init_copy(&n1, a); + fp_sub_d(&n1, 1, &n1); + + /* set 2**s * r = n1 */ + fp_init_copy(&r, &n1); + + /* count the number of least significant bits + * which are zero + */ + s = fp_cnt_lsb(&r); + + /* now divide n - 1 by 2**s */ + fp_div_2d (&r, s, &r, NULL); + + /* compute y = b**r mod a */ + fp_init(&y); + fp_exptmod(b, &r, a, &y); + + /* if y != 1 and y != n1 do */ + if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) { + j = 1; + /* while j <= s-1 and y != n1 */ + while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) { + fp_sqrmod (&y, a, &y); + + /* if y == 1 then composite */ + if (fp_cmp_d (&y, 1) == FP_EQ) { + return; + } + ++j; + } + + /* if y != n1 then composite */ + if (fp_cmp (&y, &n1) != FP_EQ) { + return; + } + } + + /* probably prime now */ + *result = FP_YES; +} + +/* End: fp_prime_miller_rabin.c */ + +/* Start: fp_prime_random_ex.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* This is possibly the mother of all prime generation functions, muahahahahaha! */ +int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat) +{ + unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb; + int res, err, bsize, maskOR_msb_offset; + + /* sanity check the input */ + if (size <= 1 || t <= 0) { + return FP_VAL; + } + + /* TFM_PRIME_SAFE implies TFM_PRIME_BBS */ + if (flags & TFM_PRIME_SAFE) { + flags |= TFM_PRIME_BBS; + } + + /* calc the byte size */ + bsize = (size>>3)+(size&7?1:0); + + /* we need a buffer of bsize bytes */ + tmp = malloc(bsize); + if (tmp == NULL) { + return FP_MEM; + } + + /* calc the maskAND value for the MSbyte*/ + maskAND = 0xFF >> (8 - (size & 7)); + + /* calc the maskOR_msb */ + maskOR_msb = 0; + maskOR_msb_offset = (size - 2) >> 3; + if (flags & TFM_PRIME_2MSB_ON) { + maskOR_msb |= 1 << ((size - 2) & 7); + } else if (flags & TFM_PRIME_2MSB_OFF) { + maskAND &= ~(1 << ((size - 2) & 7)); + } + + /* get the maskOR_lsb */ + maskOR_lsb = 1; + if (flags & TFM_PRIME_BBS) { + maskOR_lsb |= 3; + } + + do { + /* read the bytes */ + if (cb(tmp, bsize, dat) != bsize) { + err = FP_VAL; + goto error; + } + + /* work over the MSbyte */ + tmp[0] &= maskAND; + tmp[0] |= 1 << ((size - 1) & 7); + + /* mix in the maskORs */ + tmp[maskOR_msb_offset] |= maskOR_msb; + tmp[bsize-1] |= maskOR_lsb; + + /* read it in */ + fp_read_unsigned_bin(a, tmp, bsize); + + /* is it prime? */ + res = fp_isprime(a); + if (res == FP_NO) continue; + + if (flags & TFM_PRIME_SAFE) { + /* see if (a-1)/2 is prime */ + fp_sub_d(a, 1, a); + fp_div_2(a, a); + + /* is it prime? */ + res = fp_isprime(a); + } + } while (res == FP_NO); + + if (flags & TFM_PRIME_SAFE) { + /* restore a to the original value */ + fp_mul_2(a, a); + fp_add_d(a, 1, a); + } + + err = FP_OKAY; +error: + free(tmp); + return err; +} + +/* End: fp_prime_random_ex.c */ + +/* Start: fp_radix_size.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_radix_size(fp_int *a, int radix, int *size) +{ + int digs; + fp_int t; + fp_digit d; + + *size = 0; + + /* check range of the radix */ + if (radix < 2 || radix > 64) { + return FP_VAL; + } + + /* quick out if its zero */ + if (fp_iszero(a) == 1) { + *size = 2; + return FP_OKAY; + } + + fp_init_copy(&t, a); + + /* if it is negative output a - */ + if (t.sign == FP_NEG) { + *size++; + t.sign = FP_ZPOS; + } + + digs = 0; + while (fp_iszero (&t) == FP_NO) { + fp_div_d (&t, (fp_digit) radix, &t, &d); + *size++; + } + + /* append a NULL so the string is properly terminated */ + *size++; + return FP_OKAY; + +} + +/* End: fp_radix_size.c */ + +/* Start: fp_read_radix.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_read_radix(fp_int *a, char *str, int radix) +{ + int y, neg; + char ch; + + /* make sure the radix is ok */ + if (radix < 2 || radix > 64) { + return FP_VAL; + } + + /* if the leading digit is a + * minus set the sign to negative. + */ + if (*str == '-') { + ++str; + neg = FP_NEG; + } else { + neg = FP_ZPOS; + } + + /* set the integer to the default of zero */ + fp_zero (a); + + /* process each digit of the string */ + while (*str) { + /* if the radix < 36 the conversion is case insensitive + * this allows numbers like 1AB and 1ab to represent the same value + * [e.g. in hex] + */ + ch = (char) ((radix < 36) ? toupper (*str) : *str); + for (y = 0; y < 64; y++) { + if (ch == fp_s_rmap[y]) { + break; + } + } + + /* if the char was found in the map + * and is less than the given radix add it + * to the number, otherwise exit the loop. + */ + if (y < radix) { + fp_mul_d (a, (fp_digit) radix, a); + fp_add_d (a, (fp_digit) y, a); + } else { + break; + } + ++str; + } + + /* set the sign only if a != 0 */ + if (fp_iszero(a) != FP_YES) { + a->sign = neg; + } + return FP_OKAY; +} + +/* End: fp_read_radix.c */ + +/* Start: fp_read_signed_bin.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_read_signed_bin(fp_int *a, unsigned char *b, int c) +{ + /* read magnitude */ + fp_read_unsigned_bin (a, b + 1, c - 1); + + /* first byte is 0 for positive, non-zero for negative */ + if (b[0] == 0) { + a->sign = FP_ZPOS; + } else { + a->sign = FP_NEG; + } +} + +/* End: fp_read_signed_bin.c */ + +/* Start: fp_read_unsigned_bin.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c) +{ + /* zero the int */ + fp_zero (a); + + /* read the bytes in */ + for (; c > 0; c--) { + fp_mul_2d (a, 8, a); + a->dp[0] |= *b++; + a->used += 1; + } + fp_clamp (a); +} + +/* End: fp_read_unsigned_bin.c */ + +/* Start: fp_reverse.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* reverse an array, used for radix code */ +void bn_reverse (unsigned char *s, int len) +{ + int ix, iy; + unsigned char t; + + ix = 0; + iy = len - 1; + while (ix < iy) { + t = s[ix]; + s[ix] = s[iy]; + s[iy] = t; + ++ix; + --iy; + } +} + +/* End: fp_reverse.c */ + +/* Start: fp_rshd.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_rshd(fp_int *a, int x) +{ + int y; + + /* too many digits just zero and return */ + if (x >= a->used) { + fp_zero(a); + return; + } + + /* shift */ + for (y = 0; y < a->used - x; y++) { + a->dp[y] = a->dp[y+x]; + } + + /* zero rest */ + for (; y < a->used; y++) { + a->dp[y] = 0; + } + + /* decrement count */ + a->used -= x; + fp_clamp(a); +} + + +/* End: fp_rshd.c */ + +/* Start: fp_s_rmap.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* chars used in radix conversions */ +const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; + +/* End: fp_s_rmap.c */ + +/* Start: fp_set.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_set(fp_int *a, fp_digit b) +{ + fp_zero(a); + a->dp[0] = b; + a->used = b ? 1 : 0; +} + +/* End: fp_set.c */ + +/* Start: fp_signed_bin_size.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_signed_bin_size(fp_int *a) +{ + return 1 + fp_unsigned_bin_size (a); +} + +/* End: fp_signed_bin_size.c */ + +/* Start: fp_sqr.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* b = a*a */ +void fp_sqr(fp_int *A, fp_int *B) +{ + int r, y, s; + fp_int aa, bb, comp, amb, t1; + + y = A->used; + if (y <= 64) { + if (y <= 4) { + fp_sqr_comba4(A,B); + } else if (y <= 8) { + fp_sqr_comba8(A,B); +#if defined(TFM_LARGE) + } else if (y <= 16 && y >= 12) { + fp_sqr_comba16(A,B); +#endif +#if defined(TFM_HUGE) + } else if (y <= 32 && y >= 20) { + fp_sqr_comba32(A,B); + } else if (y <= 64 && y >= 48) { + fp_sqr_comba64(A,B); +#endif + } else { + fp_sqr_comba(A, B); + } + + } else { + /* do the karatsuba action + + if A = ab ||a|| = r we need to solve + + a^2*r^2 + (-(a-b)^2 + a^2 + b^2)*r + b^2 + + So we solve for the three products then we form the final result with careful shifting + and addition. + +Obvious points of optimization + +- "ac" parts can be memcpy'ed with an offset [all you have to do is zero upto the next 8 digits] +- Similarly the "bd" parts can be memcpy'ed and zeroed to 8 +- + + */ + /* get our value of r */ + r = y >> 1; + + /* now solve for ac */ +// fp_copy(A, &t1); fp_rshd(&t1, r); + for (s = 0; s < A->used - r; s++) { + t1.dp[s] = A->dp[s+r]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + } + if (A->used >= r) { + t1.used = A->used - r; + } else { + t1.used = 0; + } + t1.sign = A->sign; + fp_copy(&t1, &amb); + fp_zero(&aa); + fp_sqr(&t1, &aa); + + /* now solve for bd */ +// fp_mod_2d(A, r * DIGIT_BIT, &t1); + for (s = 0; s < r; s++) { + t1.dp[s] = A->dp[s]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + } + t1.used = r; + fp_clamp(&t1); + + fp_sub(&amb, &t1, &amb); + fp_zero(&bb); + fp_sqr(&t1, &bb); + + /* now get the (a-b) term */ + fp_zero(&comp); + fp_sqr(&amb, &comp); + + /* now solve the system, do the middle term first */ + comp.sign ^= 1; + fp_add(&comp, &aa, &comp); + fp_add(&comp, &bb, &comp); + fp_lshd(&comp, r); + + /* leading term */ + fp_lshd(&aa, r+r); + + /* now sum them together */ + fp_zero(B); + fp_add(&aa, &comp, B); + fp_add(&bb, B, B); + B->sign = FP_ZPOS; + } +} + + +/* End: fp_sqr.c */ + +/* Start: fp_sqr_comba.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* About this file... +*/ + +#if defined(TFM_X86) + +/* x86-32 optimized */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +#define SQRADD(i, j) \ +asm volatile ( \ + "movl %6,%%eax \n\t" \ + "mull %%eax \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); + +#define SQRADD2(i, j) \ +asm volatile ( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); + +#define SQRADDSC(i, j) \ +asm ( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "movl %%eax,%0 \n\t" \ + "movl %%edx,%1 \n\t" \ + "xorl %2,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); + +#define SQRADDAC(i, j) \ +asm ( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); + +#define SQRADDDB \ +asm ( \ + "addl %3,%0 \n\t" \ + "adcl %4,%1 \n\t" \ + "adcl %5,%2 \n\t" \ + "addl %3,%0 \n\t" \ + "adcl %4,%1 \n\t" \ + "adcl %5,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2), "=g"(sc0), "=g"(sc1), "=g"(sc2) : "0"(c0), "1"(c1), "2"(c2), "3"(sc0), "4"(sc1), "5"(sc2) : "%cc"); + +#elif defined(TFM_X86_64) +/* x86-64 optimized */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +#define SQRADD(i, j) \ +asm ( \ + "movq %6,%%rax \n\t" \ + "mulq %%rax \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc"); + +#define SQRADD2(i, j) \ +asm ( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); + +#define SQRADDSC(i, j) \ +asm ( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "movq %%rax,%0 \n\t" \ + "movq %%rdx,%1 \n\t" \ + "xorq %2,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); + +#define SQRADDAC(i, j) \ +asm ( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); + +#define SQRADDDB \ +asm ( \ + "addq %3,%0 \n\t" \ + "adcq %4,%1 \n\t" \ + "adcq %5,%2 \n\t" \ + "addq %3,%0 \n\t" \ + "adcq %4,%1 \n\t" \ + "adcq %5,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2), "=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(c0), "1"(c1), "2"(c2), "3"(sc0), "4"(sc1), "5"(sc2) : "%cc"); + +#elif defined(TFM_SSE2) + +/* SSE2 Optimized */ +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI \ + asm("emms"); + +#define SQRADD(i, j) \ +asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "pmuludq %%mm0,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "addl %%eax,%0 \n\t" \ + "movd %%mm0,%%eax \n\t" \ + "adcl %%eax,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc"); + +#define SQRADD2(i, j) \ +asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "movd %7,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "movd %%mm0,%%edx \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); + +#define SQRADDSC(i, j) \ +asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "pmuludq %%mm0,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "movl %%eax,%0 \n\t" \ + "movd %%mm0,%%eax \n\t" \ + "movl %%eax,%1 \n\t" \ + "xorl %2,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i) :"%eax","%cc"); + +#define SQRADDAC(i, j) \ +asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "movd %7,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "movd %%mm0,%%edx \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc"); + +#define SQRADDDB \ +asm ( \ + "addl %3,%0 \n\t" \ + "adcl %4,%1 \n\t" \ + "adcl %5,%2 \n\t" \ + "addl %3,%0 \n\t" \ + "adcl %4,%1 \n\t" \ + "adcl %5,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2), "=g"(sc0), "=g"(sc1), "=g"(sc2) : "0"(c0), "1"(c1), "2"(c2), "3"(sc0), "4"(sc1), "5"(sc2) : "%cc"); + +#elif defined(TFM_ARM) + +/* ARM code */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ +asm( \ +" UMULL r0,r1,%6,%6 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc"); + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ +asm( \ +" UMULL r0,r1,%6,%7 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); + +#define SQRADDSC(i, j) \ +asm( \ +" UMULL %0,%1,%6,%7 \n\t" \ +" SUB %2,%2,%2 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc"); + +#define SQRADDAC(i, j) \ +asm( \ +" UMULL r0,r1,%6,%7 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc"); + +#define SQRADDDB \ +asm( \ +" ADDS %3,%0 \n\t" \ +" ADCS %4,%1 \n\t" \ +" ADC %5,%2 \n\t" \ +" ADDS %3,%0 \n\t" \ +" ADCS %4,%1 \n\t" \ +" ADC %5,%2 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2), "=r"(c0), "=r"(c1), "=r"(c2) : "0"(sc0), "1"(sc1), "2"(sc2), "3"(c0), "4"(c1), "5"(c2) : "%cc"); + +#else + +/* ISO C portable code */ + +#define COMBA_START \ + { fp_word tt; + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI \ + } + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ + do { fp_word t; \ + t = c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ + t = c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ + } while (0); + + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ + do { fp_word t; \ + t = ((fp_word)i) * ((fp_word)j); \ + tt = (fp_word)c0 + t; c0 = tt; \ + tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ + tt = (fp_word)c0 + t; c0 = tt; \ + tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ + } while (0); + +#define SQRADDSC(i, j) \ + do { fp_word t; \ + t = ((fp_word)i) * ((fp_word)j); \ + sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ + } while (0); + +#define SQRADDAC(i, j) \ + do { fp_word t; \ + t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \ + t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \ + } while (0); + +#define SQRADDDB \ + do { fp_word t; \ + t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \ + t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \ + c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \ + } while (0); + +#endif + +#include "fp_sqr_comba_generic.c" +void fp_sqr_comba4(fp_int *A, fp_int *B) +{ + fp_digit *a, b[8], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + COMBA_STORE2(b[7]); + COMBA_FINI; + + B->used = 8; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 8 * sizeof(fp_digit)); + fp_clamp(B); +} + + +void fp_sqr_comba8(fp_int *A, fp_int *B) +{ + fp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + COMBA_STORE2(b[15]); + COMBA_FINI; + + B->used = 16; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 16 * sizeof(fp_digit)); + fp_clamp(B); +} + + +#ifdef TFM_LARGE +void fp_sqr_comba16(fp_int *A, fp_int *B) +{ + fp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]); + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADD2(a[14], a[15]); + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + COMBA_STORE2(b[31]); + COMBA_FINI; + + B->used = 32; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 32 * sizeof(fp_digit)); + fp_clamp(B); +} + + +#endif +#ifdef TFM_HUGE +void fp_sqr_comba32(fp_int *A, fp_int *B) +{ + fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + + /* output 55 */ + CARRY_FORWARD; + SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; + COMBA_STORE(b[55]); + + /* output 56 */ + CARRY_FORWARD; + SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); + COMBA_STORE(b[56]); + + /* output 57 */ + CARRY_FORWARD; + SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; + COMBA_STORE(b[57]); + + /* output 58 */ + CARRY_FORWARD; + SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]); + COMBA_STORE(b[58]); + + /* output 59 */ + CARRY_FORWARD; + SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]); + COMBA_STORE(b[59]); + + /* output 60 */ + CARRY_FORWARD; + SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]); + COMBA_STORE(b[60]); + + /* output 61 */ + CARRY_FORWARD; + SQRADD2(a[30], a[31]); + COMBA_STORE(b[61]); + + /* output 62 */ + CARRY_FORWARD; + SQRADD(a[31], a[31]); + COMBA_STORE(b[62]); + COMBA_STORE2(b[63]); + COMBA_FINI; + + B->used = 64; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 64 * sizeof(fp_digit)); + fp_clamp(B); +} + + +#endif + +#ifdef TFM_HUGE +void fp_sqr_comba64(fp_int *A, fp_int *B) +{ + fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[32]); SQRADDAC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[33]); SQRADDAC(a[1], a[32]); SQRADDAC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[34]); SQRADDAC(a[1], a[33]); SQRADDAC(a[2], a[32]); SQRADDAC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[35]); SQRADDAC(a[1], a[34]); SQRADDAC(a[2], a[33]); SQRADDAC(a[3], a[32]); SQRADDAC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[36]); SQRADDAC(a[1], a[35]); SQRADDAC(a[2], a[34]); SQRADDAC(a[3], a[33]); SQRADDAC(a[4], a[32]); SQRADDAC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[37]); SQRADDAC(a[1], a[36]); SQRADDAC(a[2], a[35]); SQRADDAC(a[3], a[34]); SQRADDAC(a[4], a[33]); SQRADDAC(a[5], a[32]); SQRADDAC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[38]); SQRADDAC(a[1], a[37]); SQRADDAC(a[2], a[36]); SQRADDAC(a[3], a[35]); SQRADDAC(a[4], a[34]); SQRADDAC(a[5], a[33]); SQRADDAC(a[6], a[32]); SQRADDAC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[39]); SQRADDAC(a[1], a[38]); SQRADDAC(a[2], a[37]); SQRADDAC(a[3], a[36]); SQRADDAC(a[4], a[35]); SQRADDAC(a[5], a[34]); SQRADDAC(a[6], a[33]); SQRADDAC(a[7], a[32]); SQRADDAC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[40]); SQRADDAC(a[1], a[39]); SQRADDAC(a[2], a[38]); SQRADDAC(a[3], a[37]); SQRADDAC(a[4], a[36]); SQRADDAC(a[5], a[35]); SQRADDAC(a[6], a[34]); SQRADDAC(a[7], a[33]); SQRADDAC(a[8], a[32]); SQRADDAC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[41]); SQRADDAC(a[1], a[40]); SQRADDAC(a[2], a[39]); SQRADDAC(a[3], a[38]); SQRADDAC(a[4], a[37]); SQRADDAC(a[5], a[36]); SQRADDAC(a[6], a[35]); SQRADDAC(a[7], a[34]); SQRADDAC(a[8], a[33]); SQRADDAC(a[9], a[32]); SQRADDAC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[42]); SQRADDAC(a[1], a[41]); SQRADDAC(a[2], a[40]); SQRADDAC(a[3], a[39]); SQRADDAC(a[4], a[38]); SQRADDAC(a[5], a[37]); SQRADDAC(a[6], a[36]); SQRADDAC(a[7], a[35]); SQRADDAC(a[8], a[34]); SQRADDAC(a[9], a[33]); SQRADDAC(a[10], a[32]); SQRADDAC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[43]); SQRADDAC(a[1], a[42]); SQRADDAC(a[2], a[41]); SQRADDAC(a[3], a[40]); SQRADDAC(a[4], a[39]); SQRADDAC(a[5], a[38]); SQRADDAC(a[6], a[37]); SQRADDAC(a[7], a[36]); SQRADDAC(a[8], a[35]); SQRADDAC(a[9], a[34]); SQRADDAC(a[10], a[33]); SQRADDAC(a[11], a[32]); SQRADDAC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[44]); SQRADDAC(a[1], a[43]); SQRADDAC(a[2], a[42]); SQRADDAC(a[3], a[41]); SQRADDAC(a[4], a[40]); SQRADDAC(a[5], a[39]); SQRADDAC(a[6], a[38]); SQRADDAC(a[7], a[37]); SQRADDAC(a[8], a[36]); SQRADDAC(a[9], a[35]); SQRADDAC(a[10], a[34]); SQRADDAC(a[11], a[33]); SQRADDAC(a[12], a[32]); SQRADDAC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[45]); SQRADDAC(a[1], a[44]); SQRADDAC(a[2], a[43]); SQRADDAC(a[3], a[42]); SQRADDAC(a[4], a[41]); SQRADDAC(a[5], a[40]); SQRADDAC(a[6], a[39]); SQRADDAC(a[7], a[38]); SQRADDAC(a[8], a[37]); SQRADDAC(a[9], a[36]); SQRADDAC(a[10], a[35]); SQRADDAC(a[11], a[34]); SQRADDAC(a[12], a[33]); SQRADDAC(a[13], a[32]); SQRADDAC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[46]); SQRADDAC(a[1], a[45]); SQRADDAC(a[2], a[44]); SQRADDAC(a[3], a[43]); SQRADDAC(a[4], a[42]); SQRADDAC(a[5], a[41]); SQRADDAC(a[6], a[40]); SQRADDAC(a[7], a[39]); SQRADDAC(a[8], a[38]); SQRADDAC(a[9], a[37]); SQRADDAC(a[10], a[36]); SQRADDAC(a[11], a[35]); SQRADDAC(a[12], a[34]); SQRADDAC(a[13], a[33]); SQRADDAC(a[14], a[32]); SQRADDAC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[47]); SQRADDAC(a[1], a[46]); SQRADDAC(a[2], a[45]); SQRADDAC(a[3], a[44]); SQRADDAC(a[4], a[43]); SQRADDAC(a[5], a[42]); SQRADDAC(a[6], a[41]); SQRADDAC(a[7], a[40]); SQRADDAC(a[8], a[39]); SQRADDAC(a[9], a[38]); SQRADDAC(a[10], a[37]); SQRADDAC(a[11], a[36]); SQRADDAC(a[12], a[35]); SQRADDAC(a[13], a[34]); SQRADDAC(a[14], a[33]); SQRADDAC(a[15], a[32]); SQRADDAC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[48]); SQRADDAC(a[1], a[47]); SQRADDAC(a[2], a[46]); SQRADDAC(a[3], a[45]); SQRADDAC(a[4], a[44]); SQRADDAC(a[5], a[43]); SQRADDAC(a[6], a[42]); SQRADDAC(a[7], a[41]); SQRADDAC(a[8], a[40]); SQRADDAC(a[9], a[39]); SQRADDAC(a[10], a[38]); SQRADDAC(a[11], a[37]); SQRADDAC(a[12], a[36]); SQRADDAC(a[13], a[35]); SQRADDAC(a[14], a[34]); SQRADDAC(a[15], a[33]); SQRADDAC(a[16], a[32]); SQRADDAC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[49]); SQRADDAC(a[1], a[48]); SQRADDAC(a[2], a[47]); SQRADDAC(a[3], a[46]); SQRADDAC(a[4], a[45]); SQRADDAC(a[5], a[44]); SQRADDAC(a[6], a[43]); SQRADDAC(a[7], a[42]); SQRADDAC(a[8], a[41]); SQRADDAC(a[9], a[40]); SQRADDAC(a[10], a[39]); SQRADDAC(a[11], a[38]); SQRADDAC(a[12], a[37]); SQRADDAC(a[13], a[36]); SQRADDAC(a[14], a[35]); SQRADDAC(a[15], a[34]); SQRADDAC(a[16], a[33]); SQRADDAC(a[17], a[32]); SQRADDAC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[50]); SQRADDAC(a[1], a[49]); SQRADDAC(a[2], a[48]); SQRADDAC(a[3], a[47]); SQRADDAC(a[4], a[46]); SQRADDAC(a[5], a[45]); SQRADDAC(a[6], a[44]); SQRADDAC(a[7], a[43]); SQRADDAC(a[8], a[42]); SQRADDAC(a[9], a[41]); SQRADDAC(a[10], a[40]); SQRADDAC(a[11], a[39]); SQRADDAC(a[12], a[38]); SQRADDAC(a[13], a[37]); SQRADDAC(a[14], a[36]); SQRADDAC(a[15], a[35]); SQRADDAC(a[16], a[34]); SQRADDAC(a[17], a[33]); SQRADDAC(a[18], a[32]); SQRADDAC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[51]); SQRADDAC(a[1], a[50]); SQRADDAC(a[2], a[49]); SQRADDAC(a[3], a[48]); SQRADDAC(a[4], a[47]); SQRADDAC(a[5], a[46]); SQRADDAC(a[6], a[45]); SQRADDAC(a[7], a[44]); SQRADDAC(a[8], a[43]); SQRADDAC(a[9], a[42]); SQRADDAC(a[10], a[41]); SQRADDAC(a[11], a[40]); SQRADDAC(a[12], a[39]); SQRADDAC(a[13], a[38]); SQRADDAC(a[14], a[37]); SQRADDAC(a[15], a[36]); SQRADDAC(a[16], a[35]); SQRADDAC(a[17], a[34]); SQRADDAC(a[18], a[33]); SQRADDAC(a[19], a[32]); SQRADDAC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[52]); SQRADDAC(a[1], a[51]); SQRADDAC(a[2], a[50]); SQRADDAC(a[3], a[49]); SQRADDAC(a[4], a[48]); SQRADDAC(a[5], a[47]); SQRADDAC(a[6], a[46]); SQRADDAC(a[7], a[45]); SQRADDAC(a[8], a[44]); SQRADDAC(a[9], a[43]); SQRADDAC(a[10], a[42]); SQRADDAC(a[11], a[41]); SQRADDAC(a[12], a[40]); SQRADDAC(a[13], a[39]); SQRADDAC(a[14], a[38]); SQRADDAC(a[15], a[37]); SQRADDAC(a[16], a[36]); SQRADDAC(a[17], a[35]); SQRADDAC(a[18], a[34]); SQRADDAC(a[19], a[33]); SQRADDAC(a[20], a[32]); SQRADDAC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[53]); SQRADDAC(a[1], a[52]); SQRADDAC(a[2], a[51]); SQRADDAC(a[3], a[50]); SQRADDAC(a[4], a[49]); SQRADDAC(a[5], a[48]); SQRADDAC(a[6], a[47]); SQRADDAC(a[7], a[46]); SQRADDAC(a[8], a[45]); SQRADDAC(a[9], a[44]); SQRADDAC(a[10], a[43]); SQRADDAC(a[11], a[42]); SQRADDAC(a[12], a[41]); SQRADDAC(a[13], a[40]); SQRADDAC(a[14], a[39]); SQRADDAC(a[15], a[38]); SQRADDAC(a[16], a[37]); SQRADDAC(a[17], a[36]); SQRADDAC(a[18], a[35]); SQRADDAC(a[19], a[34]); SQRADDAC(a[20], a[33]); SQRADDAC(a[21], a[32]); SQRADDAC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[54]); SQRADDAC(a[1], a[53]); SQRADDAC(a[2], a[52]); SQRADDAC(a[3], a[51]); SQRADDAC(a[4], a[50]); SQRADDAC(a[5], a[49]); SQRADDAC(a[6], a[48]); SQRADDAC(a[7], a[47]); SQRADDAC(a[8], a[46]); SQRADDAC(a[9], a[45]); SQRADDAC(a[10], a[44]); SQRADDAC(a[11], a[43]); SQRADDAC(a[12], a[42]); SQRADDAC(a[13], a[41]); SQRADDAC(a[14], a[40]); SQRADDAC(a[15], a[39]); SQRADDAC(a[16], a[38]); SQRADDAC(a[17], a[37]); SQRADDAC(a[18], a[36]); SQRADDAC(a[19], a[35]); SQRADDAC(a[20], a[34]); SQRADDAC(a[21], a[33]); SQRADDAC(a[22], a[32]); SQRADDAC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + + /* output 55 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[55]); SQRADDAC(a[1], a[54]); SQRADDAC(a[2], a[53]); SQRADDAC(a[3], a[52]); SQRADDAC(a[4], a[51]); SQRADDAC(a[5], a[50]); SQRADDAC(a[6], a[49]); SQRADDAC(a[7], a[48]); SQRADDAC(a[8], a[47]); SQRADDAC(a[9], a[46]); SQRADDAC(a[10], a[45]); SQRADDAC(a[11], a[44]); SQRADDAC(a[12], a[43]); SQRADDAC(a[13], a[42]); SQRADDAC(a[14], a[41]); SQRADDAC(a[15], a[40]); SQRADDAC(a[16], a[39]); SQRADDAC(a[17], a[38]); SQRADDAC(a[18], a[37]); SQRADDAC(a[19], a[36]); SQRADDAC(a[20], a[35]); SQRADDAC(a[21], a[34]); SQRADDAC(a[22], a[33]); SQRADDAC(a[23], a[32]); SQRADDAC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; + COMBA_STORE(b[55]); + + /* output 56 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[56]); SQRADDAC(a[1], a[55]); SQRADDAC(a[2], a[54]); SQRADDAC(a[3], a[53]); SQRADDAC(a[4], a[52]); SQRADDAC(a[5], a[51]); SQRADDAC(a[6], a[50]); SQRADDAC(a[7], a[49]); SQRADDAC(a[8], a[48]); SQRADDAC(a[9], a[47]); SQRADDAC(a[10], a[46]); SQRADDAC(a[11], a[45]); SQRADDAC(a[12], a[44]); SQRADDAC(a[13], a[43]); SQRADDAC(a[14], a[42]); SQRADDAC(a[15], a[41]); SQRADDAC(a[16], a[40]); SQRADDAC(a[17], a[39]); SQRADDAC(a[18], a[38]); SQRADDAC(a[19], a[37]); SQRADDAC(a[20], a[36]); SQRADDAC(a[21], a[35]); SQRADDAC(a[22], a[34]); SQRADDAC(a[23], a[33]); SQRADDAC(a[24], a[32]); SQRADDAC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); + COMBA_STORE(b[56]); + + /* output 57 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[57]); SQRADDAC(a[1], a[56]); SQRADDAC(a[2], a[55]); SQRADDAC(a[3], a[54]); SQRADDAC(a[4], a[53]); SQRADDAC(a[5], a[52]); SQRADDAC(a[6], a[51]); SQRADDAC(a[7], a[50]); SQRADDAC(a[8], a[49]); SQRADDAC(a[9], a[48]); SQRADDAC(a[10], a[47]); SQRADDAC(a[11], a[46]); SQRADDAC(a[12], a[45]); SQRADDAC(a[13], a[44]); SQRADDAC(a[14], a[43]); SQRADDAC(a[15], a[42]); SQRADDAC(a[16], a[41]); SQRADDAC(a[17], a[40]); SQRADDAC(a[18], a[39]); SQRADDAC(a[19], a[38]); SQRADDAC(a[20], a[37]); SQRADDAC(a[21], a[36]); SQRADDAC(a[22], a[35]); SQRADDAC(a[23], a[34]); SQRADDAC(a[24], a[33]); SQRADDAC(a[25], a[32]); SQRADDAC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; + COMBA_STORE(b[57]); + + /* output 58 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[58]); SQRADDAC(a[1], a[57]); SQRADDAC(a[2], a[56]); SQRADDAC(a[3], a[55]); SQRADDAC(a[4], a[54]); SQRADDAC(a[5], a[53]); SQRADDAC(a[6], a[52]); SQRADDAC(a[7], a[51]); SQRADDAC(a[8], a[50]); SQRADDAC(a[9], a[49]); SQRADDAC(a[10], a[48]); SQRADDAC(a[11], a[47]); SQRADDAC(a[12], a[46]); SQRADDAC(a[13], a[45]); SQRADDAC(a[14], a[44]); SQRADDAC(a[15], a[43]); SQRADDAC(a[16], a[42]); SQRADDAC(a[17], a[41]); SQRADDAC(a[18], a[40]); SQRADDAC(a[19], a[39]); SQRADDAC(a[20], a[38]); SQRADDAC(a[21], a[37]); SQRADDAC(a[22], a[36]); SQRADDAC(a[23], a[35]); SQRADDAC(a[24], a[34]); SQRADDAC(a[25], a[33]); SQRADDAC(a[26], a[32]); SQRADDAC(a[27], a[31]); SQRADDAC(a[28], a[30]); SQRADDDB; SQRADD(a[29], a[29]); + COMBA_STORE(b[58]); + + /* output 59 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[59]); SQRADDAC(a[1], a[58]); SQRADDAC(a[2], a[57]); SQRADDAC(a[3], a[56]); SQRADDAC(a[4], a[55]); SQRADDAC(a[5], a[54]); SQRADDAC(a[6], a[53]); SQRADDAC(a[7], a[52]); SQRADDAC(a[8], a[51]); SQRADDAC(a[9], a[50]); SQRADDAC(a[10], a[49]); SQRADDAC(a[11], a[48]); SQRADDAC(a[12], a[47]); SQRADDAC(a[13], a[46]); SQRADDAC(a[14], a[45]); SQRADDAC(a[15], a[44]); SQRADDAC(a[16], a[43]); SQRADDAC(a[17], a[42]); SQRADDAC(a[18], a[41]); SQRADDAC(a[19], a[40]); SQRADDAC(a[20], a[39]); SQRADDAC(a[21], a[38]); SQRADDAC(a[22], a[37]); SQRADDAC(a[23], a[36]); SQRADDAC(a[24], a[35]); SQRADDAC(a[25], a[34]); SQRADDAC(a[26], a[33]); SQRADDAC(a[27], a[32]); SQRADDAC(a[28], a[31]); SQRADDAC(a[29], a[30]); SQRADDDB; + COMBA_STORE(b[59]); + + /* output 60 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[60]); SQRADDAC(a[1], a[59]); SQRADDAC(a[2], a[58]); SQRADDAC(a[3], a[57]); SQRADDAC(a[4], a[56]); SQRADDAC(a[5], a[55]); SQRADDAC(a[6], a[54]); SQRADDAC(a[7], a[53]); SQRADDAC(a[8], a[52]); SQRADDAC(a[9], a[51]); SQRADDAC(a[10], a[50]); SQRADDAC(a[11], a[49]); SQRADDAC(a[12], a[48]); SQRADDAC(a[13], a[47]); SQRADDAC(a[14], a[46]); SQRADDAC(a[15], a[45]); SQRADDAC(a[16], a[44]); SQRADDAC(a[17], a[43]); SQRADDAC(a[18], a[42]); SQRADDAC(a[19], a[41]); SQRADDAC(a[20], a[40]); SQRADDAC(a[21], a[39]); SQRADDAC(a[22], a[38]); SQRADDAC(a[23], a[37]); SQRADDAC(a[24], a[36]); SQRADDAC(a[25], a[35]); SQRADDAC(a[26], a[34]); SQRADDAC(a[27], a[33]); SQRADDAC(a[28], a[32]); SQRADDAC(a[29], a[31]); SQRADDDB; SQRADD(a[30], a[30]); + COMBA_STORE(b[60]); + + /* output 61 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[61]); SQRADDAC(a[1], a[60]); SQRADDAC(a[2], a[59]); SQRADDAC(a[3], a[58]); SQRADDAC(a[4], a[57]); SQRADDAC(a[5], a[56]); SQRADDAC(a[6], a[55]); SQRADDAC(a[7], a[54]); SQRADDAC(a[8], a[53]); SQRADDAC(a[9], a[52]); SQRADDAC(a[10], a[51]); SQRADDAC(a[11], a[50]); SQRADDAC(a[12], a[49]); SQRADDAC(a[13], a[48]); SQRADDAC(a[14], a[47]); SQRADDAC(a[15], a[46]); SQRADDAC(a[16], a[45]); SQRADDAC(a[17], a[44]); SQRADDAC(a[18], a[43]); SQRADDAC(a[19], a[42]); SQRADDAC(a[20], a[41]); SQRADDAC(a[21], a[40]); SQRADDAC(a[22], a[39]); SQRADDAC(a[23], a[38]); SQRADDAC(a[24], a[37]); SQRADDAC(a[25], a[36]); SQRADDAC(a[26], a[35]); SQRADDAC(a[27], a[34]); SQRADDAC(a[28], a[33]); SQRADDAC(a[29], a[32]); SQRADDAC(a[30], a[31]); SQRADDDB; + COMBA_STORE(b[61]); + + /* output 62 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[62]); SQRADDAC(a[1], a[61]); SQRADDAC(a[2], a[60]); SQRADDAC(a[3], a[59]); SQRADDAC(a[4], a[58]); SQRADDAC(a[5], a[57]); SQRADDAC(a[6], a[56]); SQRADDAC(a[7], a[55]); SQRADDAC(a[8], a[54]); SQRADDAC(a[9], a[53]); SQRADDAC(a[10], a[52]); SQRADDAC(a[11], a[51]); SQRADDAC(a[12], a[50]); SQRADDAC(a[13], a[49]); SQRADDAC(a[14], a[48]); SQRADDAC(a[15], a[47]); SQRADDAC(a[16], a[46]); SQRADDAC(a[17], a[45]); SQRADDAC(a[18], a[44]); SQRADDAC(a[19], a[43]); SQRADDAC(a[20], a[42]); SQRADDAC(a[21], a[41]); SQRADDAC(a[22], a[40]); SQRADDAC(a[23], a[39]); SQRADDAC(a[24], a[38]); SQRADDAC(a[25], a[37]); SQRADDAC(a[26], a[36]); SQRADDAC(a[27], a[35]); SQRADDAC(a[28], a[34]); SQRADDAC(a[29], a[33]); SQRADDAC(a[30], a[32]); SQRADDDB; SQRADD(a[31], a[31]); + COMBA_STORE(b[62]); + + /* output 63 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[63]); SQRADDAC(a[1], a[62]); SQRADDAC(a[2], a[61]); SQRADDAC(a[3], a[60]); SQRADDAC(a[4], a[59]); SQRADDAC(a[5], a[58]); SQRADDAC(a[6], a[57]); SQRADDAC(a[7], a[56]); SQRADDAC(a[8], a[55]); SQRADDAC(a[9], a[54]); SQRADDAC(a[10], a[53]); SQRADDAC(a[11], a[52]); SQRADDAC(a[12], a[51]); SQRADDAC(a[13], a[50]); SQRADDAC(a[14], a[49]); SQRADDAC(a[15], a[48]); SQRADDAC(a[16], a[47]); SQRADDAC(a[17], a[46]); SQRADDAC(a[18], a[45]); SQRADDAC(a[19], a[44]); SQRADDAC(a[20], a[43]); SQRADDAC(a[21], a[42]); SQRADDAC(a[22], a[41]); SQRADDAC(a[23], a[40]); SQRADDAC(a[24], a[39]); SQRADDAC(a[25], a[38]); SQRADDAC(a[26], a[37]); SQRADDAC(a[27], a[36]); SQRADDAC(a[28], a[35]); SQRADDAC(a[29], a[34]); SQRADDAC(a[30], a[33]); SQRADDAC(a[31], a[32]); SQRADDDB; + COMBA_STORE(b[63]); + + /* output 64 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[63]); SQRADDAC(a[2], a[62]); SQRADDAC(a[3], a[61]); SQRADDAC(a[4], a[60]); SQRADDAC(a[5], a[59]); SQRADDAC(a[6], a[58]); SQRADDAC(a[7], a[57]); SQRADDAC(a[8], a[56]); SQRADDAC(a[9], a[55]); SQRADDAC(a[10], a[54]); SQRADDAC(a[11], a[53]); SQRADDAC(a[12], a[52]); SQRADDAC(a[13], a[51]); SQRADDAC(a[14], a[50]); SQRADDAC(a[15], a[49]); SQRADDAC(a[16], a[48]); SQRADDAC(a[17], a[47]); SQRADDAC(a[18], a[46]); SQRADDAC(a[19], a[45]); SQRADDAC(a[20], a[44]); SQRADDAC(a[21], a[43]); SQRADDAC(a[22], a[42]); SQRADDAC(a[23], a[41]); SQRADDAC(a[24], a[40]); SQRADDAC(a[25], a[39]); SQRADDAC(a[26], a[38]); SQRADDAC(a[27], a[37]); SQRADDAC(a[28], a[36]); SQRADDAC(a[29], a[35]); SQRADDAC(a[30], a[34]); SQRADDAC(a[31], a[33]); SQRADDDB; SQRADD(a[32], a[32]); + COMBA_STORE(b[64]); + + /* output 65 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[63]); SQRADDAC(a[3], a[62]); SQRADDAC(a[4], a[61]); SQRADDAC(a[5], a[60]); SQRADDAC(a[6], a[59]); SQRADDAC(a[7], a[58]); SQRADDAC(a[8], a[57]); SQRADDAC(a[9], a[56]); SQRADDAC(a[10], a[55]); SQRADDAC(a[11], a[54]); SQRADDAC(a[12], a[53]); SQRADDAC(a[13], a[52]); SQRADDAC(a[14], a[51]); SQRADDAC(a[15], a[50]); SQRADDAC(a[16], a[49]); SQRADDAC(a[17], a[48]); SQRADDAC(a[18], a[47]); SQRADDAC(a[19], a[46]); SQRADDAC(a[20], a[45]); SQRADDAC(a[21], a[44]); SQRADDAC(a[22], a[43]); SQRADDAC(a[23], a[42]); SQRADDAC(a[24], a[41]); SQRADDAC(a[25], a[40]); SQRADDAC(a[26], a[39]); SQRADDAC(a[27], a[38]); SQRADDAC(a[28], a[37]); SQRADDAC(a[29], a[36]); SQRADDAC(a[30], a[35]); SQRADDAC(a[31], a[34]); SQRADDAC(a[32], a[33]); SQRADDDB; + COMBA_STORE(b[65]); + + /* output 66 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[63]); SQRADDAC(a[4], a[62]); SQRADDAC(a[5], a[61]); SQRADDAC(a[6], a[60]); SQRADDAC(a[7], a[59]); SQRADDAC(a[8], a[58]); SQRADDAC(a[9], a[57]); SQRADDAC(a[10], a[56]); SQRADDAC(a[11], a[55]); SQRADDAC(a[12], a[54]); SQRADDAC(a[13], a[53]); SQRADDAC(a[14], a[52]); SQRADDAC(a[15], a[51]); SQRADDAC(a[16], a[50]); SQRADDAC(a[17], a[49]); SQRADDAC(a[18], a[48]); SQRADDAC(a[19], a[47]); SQRADDAC(a[20], a[46]); SQRADDAC(a[21], a[45]); SQRADDAC(a[22], a[44]); SQRADDAC(a[23], a[43]); SQRADDAC(a[24], a[42]); SQRADDAC(a[25], a[41]); SQRADDAC(a[26], a[40]); SQRADDAC(a[27], a[39]); SQRADDAC(a[28], a[38]); SQRADDAC(a[29], a[37]); SQRADDAC(a[30], a[36]); SQRADDAC(a[31], a[35]); SQRADDAC(a[32], a[34]); SQRADDDB; SQRADD(a[33], a[33]); + COMBA_STORE(b[66]); + + /* output 67 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[63]); SQRADDAC(a[5], a[62]); SQRADDAC(a[6], a[61]); SQRADDAC(a[7], a[60]); SQRADDAC(a[8], a[59]); SQRADDAC(a[9], a[58]); SQRADDAC(a[10], a[57]); SQRADDAC(a[11], a[56]); SQRADDAC(a[12], a[55]); SQRADDAC(a[13], a[54]); SQRADDAC(a[14], a[53]); SQRADDAC(a[15], a[52]); SQRADDAC(a[16], a[51]); SQRADDAC(a[17], a[50]); SQRADDAC(a[18], a[49]); SQRADDAC(a[19], a[48]); SQRADDAC(a[20], a[47]); SQRADDAC(a[21], a[46]); SQRADDAC(a[22], a[45]); SQRADDAC(a[23], a[44]); SQRADDAC(a[24], a[43]); SQRADDAC(a[25], a[42]); SQRADDAC(a[26], a[41]); SQRADDAC(a[27], a[40]); SQRADDAC(a[28], a[39]); SQRADDAC(a[29], a[38]); SQRADDAC(a[30], a[37]); SQRADDAC(a[31], a[36]); SQRADDAC(a[32], a[35]); SQRADDAC(a[33], a[34]); SQRADDDB; + COMBA_STORE(b[67]); + + /* output 68 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[63]); SQRADDAC(a[6], a[62]); SQRADDAC(a[7], a[61]); SQRADDAC(a[8], a[60]); SQRADDAC(a[9], a[59]); SQRADDAC(a[10], a[58]); SQRADDAC(a[11], a[57]); SQRADDAC(a[12], a[56]); SQRADDAC(a[13], a[55]); SQRADDAC(a[14], a[54]); SQRADDAC(a[15], a[53]); SQRADDAC(a[16], a[52]); SQRADDAC(a[17], a[51]); SQRADDAC(a[18], a[50]); SQRADDAC(a[19], a[49]); SQRADDAC(a[20], a[48]); SQRADDAC(a[21], a[47]); SQRADDAC(a[22], a[46]); SQRADDAC(a[23], a[45]); SQRADDAC(a[24], a[44]); SQRADDAC(a[25], a[43]); SQRADDAC(a[26], a[42]); SQRADDAC(a[27], a[41]); SQRADDAC(a[28], a[40]); SQRADDAC(a[29], a[39]); SQRADDAC(a[30], a[38]); SQRADDAC(a[31], a[37]); SQRADDAC(a[32], a[36]); SQRADDAC(a[33], a[35]); SQRADDDB; SQRADD(a[34], a[34]); + COMBA_STORE(b[68]); + + /* output 69 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[63]); SQRADDAC(a[7], a[62]); SQRADDAC(a[8], a[61]); SQRADDAC(a[9], a[60]); SQRADDAC(a[10], a[59]); SQRADDAC(a[11], a[58]); SQRADDAC(a[12], a[57]); SQRADDAC(a[13], a[56]); SQRADDAC(a[14], a[55]); SQRADDAC(a[15], a[54]); SQRADDAC(a[16], a[53]); SQRADDAC(a[17], a[52]); SQRADDAC(a[18], a[51]); SQRADDAC(a[19], a[50]); SQRADDAC(a[20], a[49]); SQRADDAC(a[21], a[48]); SQRADDAC(a[22], a[47]); SQRADDAC(a[23], a[46]); SQRADDAC(a[24], a[45]); SQRADDAC(a[25], a[44]); SQRADDAC(a[26], a[43]); SQRADDAC(a[27], a[42]); SQRADDAC(a[28], a[41]); SQRADDAC(a[29], a[40]); SQRADDAC(a[30], a[39]); SQRADDAC(a[31], a[38]); SQRADDAC(a[32], a[37]); SQRADDAC(a[33], a[36]); SQRADDAC(a[34], a[35]); SQRADDDB; + COMBA_STORE(b[69]); + + /* output 70 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[63]); SQRADDAC(a[8], a[62]); SQRADDAC(a[9], a[61]); SQRADDAC(a[10], a[60]); SQRADDAC(a[11], a[59]); SQRADDAC(a[12], a[58]); SQRADDAC(a[13], a[57]); SQRADDAC(a[14], a[56]); SQRADDAC(a[15], a[55]); SQRADDAC(a[16], a[54]); SQRADDAC(a[17], a[53]); SQRADDAC(a[18], a[52]); SQRADDAC(a[19], a[51]); SQRADDAC(a[20], a[50]); SQRADDAC(a[21], a[49]); SQRADDAC(a[22], a[48]); SQRADDAC(a[23], a[47]); SQRADDAC(a[24], a[46]); SQRADDAC(a[25], a[45]); SQRADDAC(a[26], a[44]); SQRADDAC(a[27], a[43]); SQRADDAC(a[28], a[42]); SQRADDAC(a[29], a[41]); SQRADDAC(a[30], a[40]); SQRADDAC(a[31], a[39]); SQRADDAC(a[32], a[38]); SQRADDAC(a[33], a[37]); SQRADDAC(a[34], a[36]); SQRADDDB; SQRADD(a[35], a[35]); + COMBA_STORE(b[70]); + + /* output 71 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[63]); SQRADDAC(a[9], a[62]); SQRADDAC(a[10], a[61]); SQRADDAC(a[11], a[60]); SQRADDAC(a[12], a[59]); SQRADDAC(a[13], a[58]); SQRADDAC(a[14], a[57]); SQRADDAC(a[15], a[56]); SQRADDAC(a[16], a[55]); SQRADDAC(a[17], a[54]); SQRADDAC(a[18], a[53]); SQRADDAC(a[19], a[52]); SQRADDAC(a[20], a[51]); SQRADDAC(a[21], a[50]); SQRADDAC(a[22], a[49]); SQRADDAC(a[23], a[48]); SQRADDAC(a[24], a[47]); SQRADDAC(a[25], a[46]); SQRADDAC(a[26], a[45]); SQRADDAC(a[27], a[44]); SQRADDAC(a[28], a[43]); SQRADDAC(a[29], a[42]); SQRADDAC(a[30], a[41]); SQRADDAC(a[31], a[40]); SQRADDAC(a[32], a[39]); SQRADDAC(a[33], a[38]); SQRADDAC(a[34], a[37]); SQRADDAC(a[35], a[36]); SQRADDDB; + COMBA_STORE(b[71]); + + /* output 72 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[63]); SQRADDAC(a[10], a[62]); SQRADDAC(a[11], a[61]); SQRADDAC(a[12], a[60]); SQRADDAC(a[13], a[59]); SQRADDAC(a[14], a[58]); SQRADDAC(a[15], a[57]); SQRADDAC(a[16], a[56]); SQRADDAC(a[17], a[55]); SQRADDAC(a[18], a[54]); SQRADDAC(a[19], a[53]); SQRADDAC(a[20], a[52]); SQRADDAC(a[21], a[51]); SQRADDAC(a[22], a[50]); SQRADDAC(a[23], a[49]); SQRADDAC(a[24], a[48]); SQRADDAC(a[25], a[47]); SQRADDAC(a[26], a[46]); SQRADDAC(a[27], a[45]); SQRADDAC(a[28], a[44]); SQRADDAC(a[29], a[43]); SQRADDAC(a[30], a[42]); SQRADDAC(a[31], a[41]); SQRADDAC(a[32], a[40]); SQRADDAC(a[33], a[39]); SQRADDAC(a[34], a[38]); SQRADDAC(a[35], a[37]); SQRADDDB; SQRADD(a[36], a[36]); + COMBA_STORE(b[72]); + + /* output 73 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[63]); SQRADDAC(a[11], a[62]); SQRADDAC(a[12], a[61]); SQRADDAC(a[13], a[60]); SQRADDAC(a[14], a[59]); SQRADDAC(a[15], a[58]); SQRADDAC(a[16], a[57]); SQRADDAC(a[17], a[56]); SQRADDAC(a[18], a[55]); SQRADDAC(a[19], a[54]); SQRADDAC(a[20], a[53]); SQRADDAC(a[21], a[52]); SQRADDAC(a[22], a[51]); SQRADDAC(a[23], a[50]); SQRADDAC(a[24], a[49]); SQRADDAC(a[25], a[48]); SQRADDAC(a[26], a[47]); SQRADDAC(a[27], a[46]); SQRADDAC(a[28], a[45]); SQRADDAC(a[29], a[44]); SQRADDAC(a[30], a[43]); SQRADDAC(a[31], a[42]); SQRADDAC(a[32], a[41]); SQRADDAC(a[33], a[40]); SQRADDAC(a[34], a[39]); SQRADDAC(a[35], a[38]); SQRADDAC(a[36], a[37]); SQRADDDB; + COMBA_STORE(b[73]); + + /* output 74 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[63]); SQRADDAC(a[12], a[62]); SQRADDAC(a[13], a[61]); SQRADDAC(a[14], a[60]); SQRADDAC(a[15], a[59]); SQRADDAC(a[16], a[58]); SQRADDAC(a[17], a[57]); SQRADDAC(a[18], a[56]); SQRADDAC(a[19], a[55]); SQRADDAC(a[20], a[54]); SQRADDAC(a[21], a[53]); SQRADDAC(a[22], a[52]); SQRADDAC(a[23], a[51]); SQRADDAC(a[24], a[50]); SQRADDAC(a[25], a[49]); SQRADDAC(a[26], a[48]); SQRADDAC(a[27], a[47]); SQRADDAC(a[28], a[46]); SQRADDAC(a[29], a[45]); SQRADDAC(a[30], a[44]); SQRADDAC(a[31], a[43]); SQRADDAC(a[32], a[42]); SQRADDAC(a[33], a[41]); SQRADDAC(a[34], a[40]); SQRADDAC(a[35], a[39]); SQRADDAC(a[36], a[38]); SQRADDDB; SQRADD(a[37], a[37]); + COMBA_STORE(b[74]); + + /* output 75 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[63]); SQRADDAC(a[13], a[62]); SQRADDAC(a[14], a[61]); SQRADDAC(a[15], a[60]); SQRADDAC(a[16], a[59]); SQRADDAC(a[17], a[58]); SQRADDAC(a[18], a[57]); SQRADDAC(a[19], a[56]); SQRADDAC(a[20], a[55]); SQRADDAC(a[21], a[54]); SQRADDAC(a[22], a[53]); SQRADDAC(a[23], a[52]); SQRADDAC(a[24], a[51]); SQRADDAC(a[25], a[50]); SQRADDAC(a[26], a[49]); SQRADDAC(a[27], a[48]); SQRADDAC(a[28], a[47]); SQRADDAC(a[29], a[46]); SQRADDAC(a[30], a[45]); SQRADDAC(a[31], a[44]); SQRADDAC(a[32], a[43]); SQRADDAC(a[33], a[42]); SQRADDAC(a[34], a[41]); SQRADDAC(a[35], a[40]); SQRADDAC(a[36], a[39]); SQRADDAC(a[37], a[38]); SQRADDDB; + COMBA_STORE(b[75]); + + /* output 76 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[63]); SQRADDAC(a[14], a[62]); SQRADDAC(a[15], a[61]); SQRADDAC(a[16], a[60]); SQRADDAC(a[17], a[59]); SQRADDAC(a[18], a[58]); SQRADDAC(a[19], a[57]); SQRADDAC(a[20], a[56]); SQRADDAC(a[21], a[55]); SQRADDAC(a[22], a[54]); SQRADDAC(a[23], a[53]); SQRADDAC(a[24], a[52]); SQRADDAC(a[25], a[51]); SQRADDAC(a[26], a[50]); SQRADDAC(a[27], a[49]); SQRADDAC(a[28], a[48]); SQRADDAC(a[29], a[47]); SQRADDAC(a[30], a[46]); SQRADDAC(a[31], a[45]); SQRADDAC(a[32], a[44]); SQRADDAC(a[33], a[43]); SQRADDAC(a[34], a[42]); SQRADDAC(a[35], a[41]); SQRADDAC(a[36], a[40]); SQRADDAC(a[37], a[39]); SQRADDDB; SQRADD(a[38], a[38]); + COMBA_STORE(b[76]); + + /* output 77 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[63]); SQRADDAC(a[15], a[62]); SQRADDAC(a[16], a[61]); SQRADDAC(a[17], a[60]); SQRADDAC(a[18], a[59]); SQRADDAC(a[19], a[58]); SQRADDAC(a[20], a[57]); SQRADDAC(a[21], a[56]); SQRADDAC(a[22], a[55]); SQRADDAC(a[23], a[54]); SQRADDAC(a[24], a[53]); SQRADDAC(a[25], a[52]); SQRADDAC(a[26], a[51]); SQRADDAC(a[27], a[50]); SQRADDAC(a[28], a[49]); SQRADDAC(a[29], a[48]); SQRADDAC(a[30], a[47]); SQRADDAC(a[31], a[46]); SQRADDAC(a[32], a[45]); SQRADDAC(a[33], a[44]); SQRADDAC(a[34], a[43]); SQRADDAC(a[35], a[42]); SQRADDAC(a[36], a[41]); SQRADDAC(a[37], a[40]); SQRADDAC(a[38], a[39]); SQRADDDB; + COMBA_STORE(b[77]); + + /* output 78 */ + CARRY_FORWARD; + SQRADDSC(a[15], a[63]); SQRADDAC(a[16], a[62]); SQRADDAC(a[17], a[61]); SQRADDAC(a[18], a[60]); SQRADDAC(a[19], a[59]); SQRADDAC(a[20], a[58]); SQRADDAC(a[21], a[57]); SQRADDAC(a[22], a[56]); SQRADDAC(a[23], a[55]); SQRADDAC(a[24], a[54]); SQRADDAC(a[25], a[53]); SQRADDAC(a[26], a[52]); SQRADDAC(a[27], a[51]); SQRADDAC(a[28], a[50]); SQRADDAC(a[29], a[49]); SQRADDAC(a[30], a[48]); SQRADDAC(a[31], a[47]); SQRADDAC(a[32], a[46]); SQRADDAC(a[33], a[45]); SQRADDAC(a[34], a[44]); SQRADDAC(a[35], a[43]); SQRADDAC(a[36], a[42]); SQRADDAC(a[37], a[41]); SQRADDAC(a[38], a[40]); SQRADDDB; SQRADD(a[39], a[39]); + COMBA_STORE(b[78]); + + /* output 79 */ + CARRY_FORWARD; + SQRADDSC(a[16], a[63]); SQRADDAC(a[17], a[62]); SQRADDAC(a[18], a[61]); SQRADDAC(a[19], a[60]); SQRADDAC(a[20], a[59]); SQRADDAC(a[21], a[58]); SQRADDAC(a[22], a[57]); SQRADDAC(a[23], a[56]); SQRADDAC(a[24], a[55]); SQRADDAC(a[25], a[54]); SQRADDAC(a[26], a[53]); SQRADDAC(a[27], a[52]); SQRADDAC(a[28], a[51]); SQRADDAC(a[29], a[50]); SQRADDAC(a[30], a[49]); SQRADDAC(a[31], a[48]); SQRADDAC(a[32], a[47]); SQRADDAC(a[33], a[46]); SQRADDAC(a[34], a[45]); SQRADDAC(a[35], a[44]); SQRADDAC(a[36], a[43]); SQRADDAC(a[37], a[42]); SQRADDAC(a[38], a[41]); SQRADDAC(a[39], a[40]); SQRADDDB; + COMBA_STORE(b[79]); + + /* output 80 */ + CARRY_FORWARD; + SQRADDSC(a[17], a[63]); SQRADDAC(a[18], a[62]); SQRADDAC(a[19], a[61]); SQRADDAC(a[20], a[60]); SQRADDAC(a[21], a[59]); SQRADDAC(a[22], a[58]); SQRADDAC(a[23], a[57]); SQRADDAC(a[24], a[56]); SQRADDAC(a[25], a[55]); SQRADDAC(a[26], a[54]); SQRADDAC(a[27], a[53]); SQRADDAC(a[28], a[52]); SQRADDAC(a[29], a[51]); SQRADDAC(a[30], a[50]); SQRADDAC(a[31], a[49]); SQRADDAC(a[32], a[48]); SQRADDAC(a[33], a[47]); SQRADDAC(a[34], a[46]); SQRADDAC(a[35], a[45]); SQRADDAC(a[36], a[44]); SQRADDAC(a[37], a[43]); SQRADDAC(a[38], a[42]); SQRADDAC(a[39], a[41]); SQRADDDB; SQRADD(a[40], a[40]); + COMBA_STORE(b[80]); + + /* output 81 */ + CARRY_FORWARD; + SQRADDSC(a[18], a[63]); SQRADDAC(a[19], a[62]); SQRADDAC(a[20], a[61]); SQRADDAC(a[21], a[60]); SQRADDAC(a[22], a[59]); SQRADDAC(a[23], a[58]); SQRADDAC(a[24], a[57]); SQRADDAC(a[25], a[56]); SQRADDAC(a[26], a[55]); SQRADDAC(a[27], a[54]); SQRADDAC(a[28], a[53]); SQRADDAC(a[29], a[52]); SQRADDAC(a[30], a[51]); SQRADDAC(a[31], a[50]); SQRADDAC(a[32], a[49]); SQRADDAC(a[33], a[48]); SQRADDAC(a[34], a[47]); SQRADDAC(a[35], a[46]); SQRADDAC(a[36], a[45]); SQRADDAC(a[37], a[44]); SQRADDAC(a[38], a[43]); SQRADDAC(a[39], a[42]); SQRADDAC(a[40], a[41]); SQRADDDB; + COMBA_STORE(b[81]); + + /* output 82 */ + CARRY_FORWARD; + SQRADDSC(a[19], a[63]); SQRADDAC(a[20], a[62]); SQRADDAC(a[21], a[61]); SQRADDAC(a[22], a[60]); SQRADDAC(a[23], a[59]); SQRADDAC(a[24], a[58]); SQRADDAC(a[25], a[57]); SQRADDAC(a[26], a[56]); SQRADDAC(a[27], a[55]); SQRADDAC(a[28], a[54]); SQRADDAC(a[29], a[53]); SQRADDAC(a[30], a[52]); SQRADDAC(a[31], a[51]); SQRADDAC(a[32], a[50]); SQRADDAC(a[33], a[49]); SQRADDAC(a[34], a[48]); SQRADDAC(a[35], a[47]); SQRADDAC(a[36], a[46]); SQRADDAC(a[37], a[45]); SQRADDAC(a[38], a[44]); SQRADDAC(a[39], a[43]); SQRADDAC(a[40], a[42]); SQRADDDB; SQRADD(a[41], a[41]); + COMBA_STORE(b[82]); + + /* output 83 */ + CARRY_FORWARD; + SQRADDSC(a[20], a[63]); SQRADDAC(a[21], a[62]); SQRADDAC(a[22], a[61]); SQRADDAC(a[23], a[60]); SQRADDAC(a[24], a[59]); SQRADDAC(a[25], a[58]); SQRADDAC(a[26], a[57]); SQRADDAC(a[27], a[56]); SQRADDAC(a[28], a[55]); SQRADDAC(a[29], a[54]); SQRADDAC(a[30], a[53]); SQRADDAC(a[31], a[52]); SQRADDAC(a[32], a[51]); SQRADDAC(a[33], a[50]); SQRADDAC(a[34], a[49]); SQRADDAC(a[35], a[48]); SQRADDAC(a[36], a[47]); SQRADDAC(a[37], a[46]); SQRADDAC(a[38], a[45]); SQRADDAC(a[39], a[44]); SQRADDAC(a[40], a[43]); SQRADDAC(a[41], a[42]); SQRADDDB; + COMBA_STORE(b[83]); + + /* output 84 */ + CARRY_FORWARD; + SQRADDSC(a[21], a[63]); SQRADDAC(a[22], a[62]); SQRADDAC(a[23], a[61]); SQRADDAC(a[24], a[60]); SQRADDAC(a[25], a[59]); SQRADDAC(a[26], a[58]); SQRADDAC(a[27], a[57]); SQRADDAC(a[28], a[56]); SQRADDAC(a[29], a[55]); SQRADDAC(a[30], a[54]); SQRADDAC(a[31], a[53]); SQRADDAC(a[32], a[52]); SQRADDAC(a[33], a[51]); SQRADDAC(a[34], a[50]); SQRADDAC(a[35], a[49]); SQRADDAC(a[36], a[48]); SQRADDAC(a[37], a[47]); SQRADDAC(a[38], a[46]); SQRADDAC(a[39], a[45]); SQRADDAC(a[40], a[44]); SQRADDAC(a[41], a[43]); SQRADDDB; SQRADD(a[42], a[42]); + COMBA_STORE(b[84]); + + /* output 85 */ + CARRY_FORWARD; + SQRADDSC(a[22], a[63]); SQRADDAC(a[23], a[62]); SQRADDAC(a[24], a[61]); SQRADDAC(a[25], a[60]); SQRADDAC(a[26], a[59]); SQRADDAC(a[27], a[58]); SQRADDAC(a[28], a[57]); SQRADDAC(a[29], a[56]); SQRADDAC(a[30], a[55]); SQRADDAC(a[31], a[54]); SQRADDAC(a[32], a[53]); SQRADDAC(a[33], a[52]); SQRADDAC(a[34], a[51]); SQRADDAC(a[35], a[50]); SQRADDAC(a[36], a[49]); SQRADDAC(a[37], a[48]); SQRADDAC(a[38], a[47]); SQRADDAC(a[39], a[46]); SQRADDAC(a[40], a[45]); SQRADDAC(a[41], a[44]); SQRADDAC(a[42], a[43]); SQRADDDB; + COMBA_STORE(b[85]); + + /* output 86 */ + CARRY_FORWARD; + SQRADDSC(a[23], a[63]); SQRADDAC(a[24], a[62]); SQRADDAC(a[25], a[61]); SQRADDAC(a[26], a[60]); SQRADDAC(a[27], a[59]); SQRADDAC(a[28], a[58]); SQRADDAC(a[29], a[57]); SQRADDAC(a[30], a[56]); SQRADDAC(a[31], a[55]); SQRADDAC(a[32], a[54]); SQRADDAC(a[33], a[53]); SQRADDAC(a[34], a[52]); SQRADDAC(a[35], a[51]); SQRADDAC(a[36], a[50]); SQRADDAC(a[37], a[49]); SQRADDAC(a[38], a[48]); SQRADDAC(a[39], a[47]); SQRADDAC(a[40], a[46]); SQRADDAC(a[41], a[45]); SQRADDAC(a[42], a[44]); SQRADDDB; SQRADD(a[43], a[43]); + COMBA_STORE(b[86]); + + /* output 87 */ + CARRY_FORWARD; + SQRADDSC(a[24], a[63]); SQRADDAC(a[25], a[62]); SQRADDAC(a[26], a[61]); SQRADDAC(a[27], a[60]); SQRADDAC(a[28], a[59]); SQRADDAC(a[29], a[58]); SQRADDAC(a[30], a[57]); SQRADDAC(a[31], a[56]); SQRADDAC(a[32], a[55]); SQRADDAC(a[33], a[54]); SQRADDAC(a[34], a[53]); SQRADDAC(a[35], a[52]); SQRADDAC(a[36], a[51]); SQRADDAC(a[37], a[50]); SQRADDAC(a[38], a[49]); SQRADDAC(a[39], a[48]); SQRADDAC(a[40], a[47]); SQRADDAC(a[41], a[46]); SQRADDAC(a[42], a[45]); SQRADDAC(a[43], a[44]); SQRADDDB; + COMBA_STORE(b[87]); + + /* output 88 */ + CARRY_FORWARD; + SQRADDSC(a[25], a[63]); SQRADDAC(a[26], a[62]); SQRADDAC(a[27], a[61]); SQRADDAC(a[28], a[60]); SQRADDAC(a[29], a[59]); SQRADDAC(a[30], a[58]); SQRADDAC(a[31], a[57]); SQRADDAC(a[32], a[56]); SQRADDAC(a[33], a[55]); SQRADDAC(a[34], a[54]); SQRADDAC(a[35], a[53]); SQRADDAC(a[36], a[52]); SQRADDAC(a[37], a[51]); SQRADDAC(a[38], a[50]); SQRADDAC(a[39], a[49]); SQRADDAC(a[40], a[48]); SQRADDAC(a[41], a[47]); SQRADDAC(a[42], a[46]); SQRADDAC(a[43], a[45]); SQRADDDB; SQRADD(a[44], a[44]); + COMBA_STORE(b[88]); + + /* output 89 */ + CARRY_FORWARD; + SQRADDSC(a[26], a[63]); SQRADDAC(a[27], a[62]); SQRADDAC(a[28], a[61]); SQRADDAC(a[29], a[60]); SQRADDAC(a[30], a[59]); SQRADDAC(a[31], a[58]); SQRADDAC(a[32], a[57]); SQRADDAC(a[33], a[56]); SQRADDAC(a[34], a[55]); SQRADDAC(a[35], a[54]); SQRADDAC(a[36], a[53]); SQRADDAC(a[37], a[52]); SQRADDAC(a[38], a[51]); SQRADDAC(a[39], a[50]); SQRADDAC(a[40], a[49]); SQRADDAC(a[41], a[48]); SQRADDAC(a[42], a[47]); SQRADDAC(a[43], a[46]); SQRADDAC(a[44], a[45]); SQRADDDB; + COMBA_STORE(b[89]); + + /* output 90 */ + CARRY_FORWARD; + SQRADDSC(a[27], a[63]); SQRADDAC(a[28], a[62]); SQRADDAC(a[29], a[61]); SQRADDAC(a[30], a[60]); SQRADDAC(a[31], a[59]); SQRADDAC(a[32], a[58]); SQRADDAC(a[33], a[57]); SQRADDAC(a[34], a[56]); SQRADDAC(a[35], a[55]); SQRADDAC(a[36], a[54]); SQRADDAC(a[37], a[53]); SQRADDAC(a[38], a[52]); SQRADDAC(a[39], a[51]); SQRADDAC(a[40], a[50]); SQRADDAC(a[41], a[49]); SQRADDAC(a[42], a[48]); SQRADDAC(a[43], a[47]); SQRADDAC(a[44], a[46]); SQRADDDB; SQRADD(a[45], a[45]); + COMBA_STORE(b[90]); + + /* output 91 */ + CARRY_FORWARD; + SQRADDSC(a[28], a[63]); SQRADDAC(a[29], a[62]); SQRADDAC(a[30], a[61]); SQRADDAC(a[31], a[60]); SQRADDAC(a[32], a[59]); SQRADDAC(a[33], a[58]); SQRADDAC(a[34], a[57]); SQRADDAC(a[35], a[56]); SQRADDAC(a[36], a[55]); SQRADDAC(a[37], a[54]); SQRADDAC(a[38], a[53]); SQRADDAC(a[39], a[52]); SQRADDAC(a[40], a[51]); SQRADDAC(a[41], a[50]); SQRADDAC(a[42], a[49]); SQRADDAC(a[43], a[48]); SQRADDAC(a[44], a[47]); SQRADDAC(a[45], a[46]); SQRADDDB; + COMBA_STORE(b[91]); + + /* output 92 */ + CARRY_FORWARD; + SQRADDSC(a[29], a[63]); SQRADDAC(a[30], a[62]); SQRADDAC(a[31], a[61]); SQRADDAC(a[32], a[60]); SQRADDAC(a[33], a[59]); SQRADDAC(a[34], a[58]); SQRADDAC(a[35], a[57]); SQRADDAC(a[36], a[56]); SQRADDAC(a[37], a[55]); SQRADDAC(a[38], a[54]); SQRADDAC(a[39], a[53]); SQRADDAC(a[40], a[52]); SQRADDAC(a[41], a[51]); SQRADDAC(a[42], a[50]); SQRADDAC(a[43], a[49]); SQRADDAC(a[44], a[48]); SQRADDAC(a[45], a[47]); SQRADDDB; SQRADD(a[46], a[46]); + COMBA_STORE(b[92]); + + /* output 93 */ + CARRY_FORWARD; + SQRADDSC(a[30], a[63]); SQRADDAC(a[31], a[62]); SQRADDAC(a[32], a[61]); SQRADDAC(a[33], a[60]); SQRADDAC(a[34], a[59]); SQRADDAC(a[35], a[58]); SQRADDAC(a[36], a[57]); SQRADDAC(a[37], a[56]); SQRADDAC(a[38], a[55]); SQRADDAC(a[39], a[54]); SQRADDAC(a[40], a[53]); SQRADDAC(a[41], a[52]); SQRADDAC(a[42], a[51]); SQRADDAC(a[43], a[50]); SQRADDAC(a[44], a[49]); SQRADDAC(a[45], a[48]); SQRADDAC(a[46], a[47]); SQRADDDB; + COMBA_STORE(b[93]); + + /* output 94 */ + CARRY_FORWARD; + SQRADDSC(a[31], a[63]); SQRADDAC(a[32], a[62]); SQRADDAC(a[33], a[61]); SQRADDAC(a[34], a[60]); SQRADDAC(a[35], a[59]); SQRADDAC(a[36], a[58]); SQRADDAC(a[37], a[57]); SQRADDAC(a[38], a[56]); SQRADDAC(a[39], a[55]); SQRADDAC(a[40], a[54]); SQRADDAC(a[41], a[53]); SQRADDAC(a[42], a[52]); SQRADDAC(a[43], a[51]); SQRADDAC(a[44], a[50]); SQRADDAC(a[45], a[49]); SQRADDAC(a[46], a[48]); SQRADDDB; SQRADD(a[47], a[47]); + COMBA_STORE(b[94]); + + /* output 95 */ + CARRY_FORWARD; + SQRADDSC(a[32], a[63]); SQRADDAC(a[33], a[62]); SQRADDAC(a[34], a[61]); SQRADDAC(a[35], a[60]); SQRADDAC(a[36], a[59]); SQRADDAC(a[37], a[58]); SQRADDAC(a[38], a[57]); SQRADDAC(a[39], a[56]); SQRADDAC(a[40], a[55]); SQRADDAC(a[41], a[54]); SQRADDAC(a[42], a[53]); SQRADDAC(a[43], a[52]); SQRADDAC(a[44], a[51]); SQRADDAC(a[45], a[50]); SQRADDAC(a[46], a[49]); SQRADDAC(a[47], a[48]); SQRADDDB; + COMBA_STORE(b[95]); + + /* output 96 */ + CARRY_FORWARD; + SQRADDSC(a[33], a[63]); SQRADDAC(a[34], a[62]); SQRADDAC(a[35], a[61]); SQRADDAC(a[36], a[60]); SQRADDAC(a[37], a[59]); SQRADDAC(a[38], a[58]); SQRADDAC(a[39], a[57]); SQRADDAC(a[40], a[56]); SQRADDAC(a[41], a[55]); SQRADDAC(a[42], a[54]); SQRADDAC(a[43], a[53]); SQRADDAC(a[44], a[52]); SQRADDAC(a[45], a[51]); SQRADDAC(a[46], a[50]); SQRADDAC(a[47], a[49]); SQRADDDB; SQRADD(a[48], a[48]); + COMBA_STORE(b[96]); + + /* output 97 */ + CARRY_FORWARD; + SQRADDSC(a[34], a[63]); SQRADDAC(a[35], a[62]); SQRADDAC(a[36], a[61]); SQRADDAC(a[37], a[60]); SQRADDAC(a[38], a[59]); SQRADDAC(a[39], a[58]); SQRADDAC(a[40], a[57]); SQRADDAC(a[41], a[56]); SQRADDAC(a[42], a[55]); SQRADDAC(a[43], a[54]); SQRADDAC(a[44], a[53]); SQRADDAC(a[45], a[52]); SQRADDAC(a[46], a[51]); SQRADDAC(a[47], a[50]); SQRADDAC(a[48], a[49]); SQRADDDB; + COMBA_STORE(b[97]); + + /* output 98 */ + CARRY_FORWARD; + SQRADDSC(a[35], a[63]); SQRADDAC(a[36], a[62]); SQRADDAC(a[37], a[61]); SQRADDAC(a[38], a[60]); SQRADDAC(a[39], a[59]); SQRADDAC(a[40], a[58]); SQRADDAC(a[41], a[57]); SQRADDAC(a[42], a[56]); SQRADDAC(a[43], a[55]); SQRADDAC(a[44], a[54]); SQRADDAC(a[45], a[53]); SQRADDAC(a[46], a[52]); SQRADDAC(a[47], a[51]); SQRADDAC(a[48], a[50]); SQRADDDB; SQRADD(a[49], a[49]); + COMBA_STORE(b[98]); + + /* output 99 */ + CARRY_FORWARD; + SQRADDSC(a[36], a[63]); SQRADDAC(a[37], a[62]); SQRADDAC(a[38], a[61]); SQRADDAC(a[39], a[60]); SQRADDAC(a[40], a[59]); SQRADDAC(a[41], a[58]); SQRADDAC(a[42], a[57]); SQRADDAC(a[43], a[56]); SQRADDAC(a[44], a[55]); SQRADDAC(a[45], a[54]); SQRADDAC(a[46], a[53]); SQRADDAC(a[47], a[52]); SQRADDAC(a[48], a[51]); SQRADDAC(a[49], a[50]); SQRADDDB; + COMBA_STORE(b[99]); + + /* output 100 */ + CARRY_FORWARD; + SQRADDSC(a[37], a[63]); SQRADDAC(a[38], a[62]); SQRADDAC(a[39], a[61]); SQRADDAC(a[40], a[60]); SQRADDAC(a[41], a[59]); SQRADDAC(a[42], a[58]); SQRADDAC(a[43], a[57]); SQRADDAC(a[44], a[56]); SQRADDAC(a[45], a[55]); SQRADDAC(a[46], a[54]); SQRADDAC(a[47], a[53]); SQRADDAC(a[48], a[52]); SQRADDAC(a[49], a[51]); SQRADDDB; SQRADD(a[50], a[50]); + COMBA_STORE(b[100]); + + /* output 101 */ + CARRY_FORWARD; + SQRADDSC(a[38], a[63]); SQRADDAC(a[39], a[62]); SQRADDAC(a[40], a[61]); SQRADDAC(a[41], a[60]); SQRADDAC(a[42], a[59]); SQRADDAC(a[43], a[58]); SQRADDAC(a[44], a[57]); SQRADDAC(a[45], a[56]); SQRADDAC(a[46], a[55]); SQRADDAC(a[47], a[54]); SQRADDAC(a[48], a[53]); SQRADDAC(a[49], a[52]); SQRADDAC(a[50], a[51]); SQRADDDB; + COMBA_STORE(b[101]); + + /* output 102 */ + CARRY_FORWARD; + SQRADDSC(a[39], a[63]); SQRADDAC(a[40], a[62]); SQRADDAC(a[41], a[61]); SQRADDAC(a[42], a[60]); SQRADDAC(a[43], a[59]); SQRADDAC(a[44], a[58]); SQRADDAC(a[45], a[57]); SQRADDAC(a[46], a[56]); SQRADDAC(a[47], a[55]); SQRADDAC(a[48], a[54]); SQRADDAC(a[49], a[53]); SQRADDAC(a[50], a[52]); SQRADDDB; SQRADD(a[51], a[51]); + COMBA_STORE(b[102]); + + /* output 103 */ + CARRY_FORWARD; + SQRADDSC(a[40], a[63]); SQRADDAC(a[41], a[62]); SQRADDAC(a[42], a[61]); SQRADDAC(a[43], a[60]); SQRADDAC(a[44], a[59]); SQRADDAC(a[45], a[58]); SQRADDAC(a[46], a[57]); SQRADDAC(a[47], a[56]); SQRADDAC(a[48], a[55]); SQRADDAC(a[49], a[54]); SQRADDAC(a[50], a[53]); SQRADDAC(a[51], a[52]); SQRADDDB; + COMBA_STORE(b[103]); + + /* output 104 */ + CARRY_FORWARD; + SQRADDSC(a[41], a[63]); SQRADDAC(a[42], a[62]); SQRADDAC(a[43], a[61]); SQRADDAC(a[44], a[60]); SQRADDAC(a[45], a[59]); SQRADDAC(a[46], a[58]); SQRADDAC(a[47], a[57]); SQRADDAC(a[48], a[56]); SQRADDAC(a[49], a[55]); SQRADDAC(a[50], a[54]); SQRADDAC(a[51], a[53]); SQRADDDB; SQRADD(a[52], a[52]); + COMBA_STORE(b[104]); + + /* output 105 */ + CARRY_FORWARD; + SQRADDSC(a[42], a[63]); SQRADDAC(a[43], a[62]); SQRADDAC(a[44], a[61]); SQRADDAC(a[45], a[60]); SQRADDAC(a[46], a[59]); SQRADDAC(a[47], a[58]); SQRADDAC(a[48], a[57]); SQRADDAC(a[49], a[56]); SQRADDAC(a[50], a[55]); SQRADDAC(a[51], a[54]); SQRADDAC(a[52], a[53]); SQRADDDB; + COMBA_STORE(b[105]); + + /* output 106 */ + CARRY_FORWARD; + SQRADDSC(a[43], a[63]); SQRADDAC(a[44], a[62]); SQRADDAC(a[45], a[61]); SQRADDAC(a[46], a[60]); SQRADDAC(a[47], a[59]); SQRADDAC(a[48], a[58]); SQRADDAC(a[49], a[57]); SQRADDAC(a[50], a[56]); SQRADDAC(a[51], a[55]); SQRADDAC(a[52], a[54]); SQRADDDB; SQRADD(a[53], a[53]); + COMBA_STORE(b[106]); + + /* output 107 */ + CARRY_FORWARD; + SQRADDSC(a[44], a[63]); SQRADDAC(a[45], a[62]); SQRADDAC(a[46], a[61]); SQRADDAC(a[47], a[60]); SQRADDAC(a[48], a[59]); SQRADDAC(a[49], a[58]); SQRADDAC(a[50], a[57]); SQRADDAC(a[51], a[56]); SQRADDAC(a[52], a[55]); SQRADDAC(a[53], a[54]); SQRADDDB; + COMBA_STORE(b[107]); + + /* output 108 */ + CARRY_FORWARD; + SQRADDSC(a[45], a[63]); SQRADDAC(a[46], a[62]); SQRADDAC(a[47], a[61]); SQRADDAC(a[48], a[60]); SQRADDAC(a[49], a[59]); SQRADDAC(a[50], a[58]); SQRADDAC(a[51], a[57]); SQRADDAC(a[52], a[56]); SQRADDAC(a[53], a[55]); SQRADDDB; SQRADD(a[54], a[54]); + COMBA_STORE(b[108]); + + /* output 109 */ + CARRY_FORWARD; + SQRADDSC(a[46], a[63]); SQRADDAC(a[47], a[62]); SQRADDAC(a[48], a[61]); SQRADDAC(a[49], a[60]); SQRADDAC(a[50], a[59]); SQRADDAC(a[51], a[58]); SQRADDAC(a[52], a[57]); SQRADDAC(a[53], a[56]); SQRADDAC(a[54], a[55]); SQRADDDB; + COMBA_STORE(b[109]); + + /* output 110 */ + CARRY_FORWARD; + SQRADDSC(a[47], a[63]); SQRADDAC(a[48], a[62]); SQRADDAC(a[49], a[61]); SQRADDAC(a[50], a[60]); SQRADDAC(a[51], a[59]); SQRADDAC(a[52], a[58]); SQRADDAC(a[53], a[57]); SQRADDAC(a[54], a[56]); SQRADDDB; SQRADD(a[55], a[55]); + COMBA_STORE(b[110]); + + /* output 111 */ + CARRY_FORWARD; + SQRADDSC(a[48], a[63]); SQRADDAC(a[49], a[62]); SQRADDAC(a[50], a[61]); SQRADDAC(a[51], a[60]); SQRADDAC(a[52], a[59]); SQRADDAC(a[53], a[58]); SQRADDAC(a[54], a[57]); SQRADDAC(a[55], a[56]); SQRADDDB; + COMBA_STORE(b[111]); + + /* output 112 */ + CARRY_FORWARD; + SQRADDSC(a[49], a[63]); SQRADDAC(a[50], a[62]); SQRADDAC(a[51], a[61]); SQRADDAC(a[52], a[60]); SQRADDAC(a[53], a[59]); SQRADDAC(a[54], a[58]); SQRADDAC(a[55], a[57]); SQRADDDB; SQRADD(a[56], a[56]); + COMBA_STORE(b[112]); + + /* output 113 */ + CARRY_FORWARD; + SQRADDSC(a[50], a[63]); SQRADDAC(a[51], a[62]); SQRADDAC(a[52], a[61]); SQRADDAC(a[53], a[60]); SQRADDAC(a[54], a[59]); SQRADDAC(a[55], a[58]); SQRADDAC(a[56], a[57]); SQRADDDB; + COMBA_STORE(b[113]); + + /* output 114 */ + CARRY_FORWARD; + SQRADDSC(a[51], a[63]); SQRADDAC(a[52], a[62]); SQRADDAC(a[53], a[61]); SQRADDAC(a[54], a[60]); SQRADDAC(a[55], a[59]); SQRADDAC(a[56], a[58]); SQRADDDB; SQRADD(a[57], a[57]); + COMBA_STORE(b[114]); + + /* output 115 */ + CARRY_FORWARD; + SQRADDSC(a[52], a[63]); SQRADDAC(a[53], a[62]); SQRADDAC(a[54], a[61]); SQRADDAC(a[55], a[60]); SQRADDAC(a[56], a[59]); SQRADDAC(a[57], a[58]); SQRADDDB; + COMBA_STORE(b[115]); + + /* output 116 */ + CARRY_FORWARD; + SQRADDSC(a[53], a[63]); SQRADDAC(a[54], a[62]); SQRADDAC(a[55], a[61]); SQRADDAC(a[56], a[60]); SQRADDAC(a[57], a[59]); SQRADDDB; SQRADD(a[58], a[58]); + COMBA_STORE(b[116]); + + /* output 117 */ + CARRY_FORWARD; + SQRADDSC(a[54], a[63]); SQRADDAC(a[55], a[62]); SQRADDAC(a[56], a[61]); SQRADDAC(a[57], a[60]); SQRADDAC(a[58], a[59]); SQRADDDB; + COMBA_STORE(b[117]); + + /* output 118 */ + CARRY_FORWARD; + SQRADDSC(a[55], a[63]); SQRADDAC(a[56], a[62]); SQRADDAC(a[57], a[61]); SQRADDAC(a[58], a[60]); SQRADDDB; SQRADD(a[59], a[59]); + COMBA_STORE(b[118]); + + /* output 119 */ + CARRY_FORWARD; + SQRADDSC(a[56], a[63]); SQRADDAC(a[57], a[62]); SQRADDAC(a[58], a[61]); SQRADDAC(a[59], a[60]); SQRADDDB; + COMBA_STORE(b[119]); + + /* output 120 */ + CARRY_FORWARD; + SQRADDSC(a[57], a[63]); SQRADDAC(a[58], a[62]); SQRADDAC(a[59], a[61]); SQRADDDB; SQRADD(a[60], a[60]); + COMBA_STORE(b[120]); + + /* output 121 */ + CARRY_FORWARD; + SQRADDSC(a[58], a[63]); SQRADDAC(a[59], a[62]); SQRADDAC(a[60], a[61]); SQRADDDB; + COMBA_STORE(b[121]); + + /* output 122 */ + CARRY_FORWARD; + SQRADD2(a[59], a[63]); SQRADD2(a[60], a[62]); SQRADD(a[61], a[61]); + COMBA_STORE(b[122]); + + /* output 123 */ + CARRY_FORWARD; + SQRADD2(a[60], a[63]); SQRADD2(a[61], a[62]); + COMBA_STORE(b[123]); + + /* output 124 */ + CARRY_FORWARD; + SQRADD2(a[61], a[63]); SQRADD(a[62], a[62]); + COMBA_STORE(b[124]); + + /* output 125 */ + CARRY_FORWARD; + SQRADD2(a[62], a[63]); + COMBA_STORE(b[125]); + + /* output 126 */ + CARRY_FORWARD; + SQRADD(a[63], a[63]); + COMBA_STORE(b[126]); + COMBA_STORE2(b[127]); + COMBA_FINI; + + B->used = 128; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 128 * sizeof(fp_digit)); + fp_clamp(B); +} + + +#endif + +/* End: fp_sqr_comba.c */ + +/* Start: fp_sqr_comba_generic.c */ +/* generic comba squarer */ +void fp_sqr_comba(fp_int *A, fp_int *B) +{ + int pa, ix, iz; + fp_digit c0, c1, c2; + fp_int tmp, *dst; + + /* get size of output and trim */ + pa = A->used + A->used; + if (pa >= FP_SIZE) { + pa = FP_SIZE-1; + } + + /* number of output digits to produce */ + COMBA_START; + CLEAR_CARRY; + + if (A == B) { + fp_zero(&tmp); + dst = &tmp; + } else { + fp_zero(B); + dst = B; + } + + for (ix = 0; ix < pa; ix++) { + int tx, ty, iy; + fp_digit *tmpy, *tmpx; + + /* get offsets into the two bignums */ + ty = MIN(A->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = A->dp + tx; + tmpy = A->dp + ty; + + /* this is the number of times the loop will iterrate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(A->used-tx, ty+1); + + /* now for squaring tx can never equal ty + * we halve the distance since they approach at a rate of 2x + * and we have to round because odd cases need to be executed + */ + iy = MIN(iy, (ty-tx+1)>>1); + + /* forward carries */ + CARRY_FORWARD; + + /* execute loop */ + for (iz = 0; iz < iy; iz++) { + SQRADD2(*tmpx++, *tmpy--); + } + + /* even columns have the square term in them */ + if ((ix&1) == 0) { + SQRADD(A->dp[ix>>1], A->dp[ix>>1]); + } + + /* store it */ + COMBA_STORE(dst->dp[ix]); + } + COMBA_STORE2(dst->dp[ix]); + + COMBA_FINI; + + /* setup dest */ + dst->used = pa; + fp_clamp (dst); + if (dst != B) { + fp_copy(dst, B); + } +} + +/* End: fp_sqr_comba_generic.c */ + +/* Start: fp_sqrmod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * a (mod b) */ +int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int tmp; + fp_zero(&tmp); + fp_sqr(a, &tmp); + return fp_mod(&tmp, b, c); +} + +/* End: fp_sqrmod.c */ + +/* Start: fp_sub.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a - b */ +void fp_sub(fp_int *a, fp_int *b, fp_int *c) +{ + int sa, sb; + + sa = a->sign; + sb = b->sign; + + if (sa != sb) { + /* subtract a negative from a positive, OR */ + /* subtract a positive from a negative. */ + /* In either case, ADD their magnitudes, */ + /* and use the sign of the first number. */ + c->sign = sa; + s_fp_add (a, b, c); + } else { + /* subtract a positive from a positive, OR */ + /* subtract a negative from a negative. */ + /* First, take the difference between their */ + /* magnitudes, then... */ + if (fp_cmp_mag (a, b) != FP_LT) { + /* Copy the sign from the first */ + c->sign = sa; + /* The first has a larger or equal magnitude */ + s_fp_sub (a, b, c); + } else { + /* The result has the *opposite* sign from */ + /* the first number. */ + c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS; + /* The second has a larger magnitude */ + s_fp_sub (b, a, c); + } + } +} + + +/* End: fp_sub.c */ + +/* Start: fp_sub_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a - b */ +void fp_sub_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_int tmp; + fp_set(&tmp, b); + fp_sub(a, &tmp, c); +} + +/* End: fp_sub_d.c */ + +/* Start: fp_submod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* d = a - b (mod c) */ +int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int tmp; + fp_zero(&tmp); + fp_sub(a, b, &tmp); + return fp_mod(&tmp, c, d); +} + + +/* End: fp_submod.c */ + +/* Start: fp_to_signed_bin.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_to_signed_bin(fp_int *a, unsigned char *b) +{ + fp_to_unsigned_bin (a, b + 1); + b[0] = (unsigned char) ((a->sign == FP_ZPOS) ? 0 : 1); +} + +/* End: fp_to_signed_bin.c */ + +/* Start: fp_to_unsigned_bin.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_to_unsigned_bin(fp_int *a, unsigned char *b) +{ + int x; + fp_int t; + + fp_init_copy(&t, a); + + x = 0; + while (fp_iszero (&t) == FP_NO) { + b[x++] = (unsigned char) (t.dp[0] & 255); + fp_div_2d (&t, 8, &t, NULL); + } + bn_reverse (b, x); +} + +/* End: fp_to_unsigned_bin.c */ + +/* Start: fp_toradix.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_toradix(fp_int *a, char *str, int radix) +{ + int digs; + fp_int t; + fp_digit d; + char *_s = str; + + /* check range of the radix */ + if (radix < 2 || radix > 64) { + return FP_VAL; + } + + /* quick out if its zero */ + if (fp_iszero(a) == 1) { + *str++ = '0'; + *str = '\0'; + return FP_OKAY; + } + + fp_init_copy(&t, a); + + /* if it is negative output a - */ + if (t.sign == FP_NEG) { + ++_s; + *str++ = '-'; + t.sign = FP_ZPOS; + } + + digs = 0; + while (fp_iszero (&t) == FP_NO) { + fp_div_d (&t, (fp_digit) radix, &t, &d); + *str++ = fp_s_rmap[d]; + ++digs; + } + + /* reverse the digits of the string. In this case _s points + * to the first digit [exluding the sign] of the number] + */ + bn_reverse ((unsigned char *)_s, digs); + + /* append a NULL so the string is properly terminated */ + *str = '\0'; + return FP_OKAY; +} + +/* End: fp_toradix.c */ + +/* Start: fp_unsigned_bin_size.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_unsigned_bin_size(fp_int *a) +{ + int size = fp_count_bits (a); + return (size / 8 + ((size & 7) != 0 ? 1 : 0)); +} + +/* End: fp_unsigned_bin_size.c */ + +/* Start: s_fp_add.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* unsigned addition */ +void s_fp_add(fp_int *a, fp_int *b, fp_int *c) +{ + int x, y, oldused; + fp_word t; + + y = MAX(a->used, b->used); + oldused = c->used; + c->used = y; + + t = 0; + for (x = 0; x < y; x++) { + t += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]); + c->dp[x] = (fp_digit)t; + t >>= DIGIT_BIT; + } + if (t != 0 && x != FP_SIZE) { + c->dp[c->used++] = (fp_digit)t; + ++x; + } + + for (; x < oldused; x++) { + c->dp[x] = 0; + } + fp_clamp(c); +} + +/* End: s_fp_add.c */ + +/* Start: s_fp_sub.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* unsigned subtraction ||a|| >= ||b|| ALWAYS! */ +void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) +{ + int x, oldused; + fp_word t; + + oldused = c->used; + c->used = a->used; + t = 0; + for (x = 0; x < a->used; x++) { + t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t); + c->dp[x] = (fp_digit)t; + t = (t >> DIGIT_BIT) & 1; + } + + for (; x < oldused; x++) { + c->dp[x] = 0; + } + fp_clamp(c); +} + +/* End: s_fp_sub.c */ + + +/* EOF */ diff --git a/lib/silcmath/tfm.h b/lib/silcmath/tfm.h new file mode 100644 index 00000000..677f2885 --- /dev/null +++ b/lib/silcmath/tfm.h @@ -0,0 +1,362 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#ifndef TFM_H_ +#define TFM_H_ + +#include +#include +#include +#include +#include + +/* Assure these -Pekka */ +#undef CRYPT + +#undef MIN +#define MIN(x,y) ((x)<(y)?(x):(y)) +#undef MAX +#define MAX(x,y) ((x)>(y)?(x):(y)) + +/* do we want large code? */ +#define TFM_LARGE + +/* do we want huge code (implies large)? The answer is, yes. */ +#define TFM_HUGE + +/* imply TFM_LARGE as required */ +#if defined(TFM_HUGE) + #if !defined(TFM_LARGE) + #define TFM_LARGE + #endif +#endif + +/* Max size of any number in bits. Basically the largest size you will be multiplying + * should be half [or smaller] of FP_MAX_SIZE-four_digit + * + * You can externally define this or it defaults to 4096-bits. + */ +#ifndef FP_MAX_SIZE +/* For SILC -Pekka */ + #define FP_MAX_SIZE (8192+(4*DIGIT_BIT)) +/* #define FP_MAX_SIZE (4096+(4*DIGIT_BIT))*/ +#endif + +/* will this lib work? */ +#if (CHAR_BIT & 7) + #error CHAR_BIT must be a multiple of eight. +#endif +#if FP_MAX_SIZE % CHAR_BIT + #error FP_MAX_SIZE must be a multiple of CHAR_BIT +#endif + +/* autodetect x86-64 and make sure we are using 64-bit digits with x86-64 asm */ +#if defined(__x86_64__) + #if defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM) + #error x86-64 detected, x86-32/SSE2/ARM optimizations are not valid! + #endif + #if !defined(TFM_X86_64) && !defined(TFM_NO_ASM) + #define TFM_X86_64 + #endif +#endif +#if defined(TFM_X86_64) + #if !defined(FP_64BIT) + #define FP_64BIT + #endif +#endif + +/* try to detect x86-32 */ +#if defined(__i386__) && !defined(TFM_SSE2) + #if defined(TFM_X86_64) || defined(TFM_ARM) + #error x86-32 detected, x86-64/ARM optimizations are not valid! + #endif + #if !defined(TFM_X86) && !defined(TFM_NO_ASM) + #define TFM_X86 + #endif +#endif + +/* make sure we're 32-bit for x86-32/sse/arm */ +#if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM)) && defined(FP_64BIT) + #warning x86-32, SSE2 and ARM optimizations require 32-bit digits (undefining) + #undef FP_64BIT +#endif + +/* multi asms? */ +#ifdef TFM_X86 + #define TFM_ASM +#endif +#ifdef TFM_X86_64 + #ifdef TFM_ASM + #error TFM_ASM already defined! + #endif + #define TFM_ASM +#endif +#ifdef TFM_SSE2 + #ifdef TFM_ASM + #error TFM_ASM already defined! + #endif + #define TFM_ASM +#endif +#ifdef TFM_ARM + #ifdef TFM_ASM + #error TFM_ASM already defined! + #endif + #define TFM_ASM +#endif + +/* we want no asm? */ +#ifdef TFM_NO_ASM + #undef TFM_X86 + #undef TFM_X86_64 + #undef TFM_SSE2 + #undef TFM_ARM + #undef TFM_ASM +#endif + +/* some default configurations. + */ +#if defined(FP_64BIT) + /* for GCC only on supported platforms */ +#ifndef CRYPT + typedef unsigned long ulong64; +#endif + typedef ulong64 fp_digit; + typedef unsigned long fp_word __attribute__ ((mode(TI))); +#else + /* this is to make porting into LibTomCrypt easier :-) */ +#ifndef CRYPT + #if defined(_MSC_VER) || defined(__BORLANDC__) + typedef unsigned __int64 ulong64; + typedef signed __int64 long64; + #else + typedef unsigned long long ulong64; + typedef signed long long long64; + #endif +#endif + typedef unsigned long fp_digit; + typedef ulong64 fp_word; +#endif + +/* # of digits this is */ +#define DIGIT_BIT (int)((CHAR_BIT) * sizeof(fp_digit)) +#define FP_MASK (fp_digit)(-1) +#define FP_SIZE (FP_MAX_SIZE/DIGIT_BIT) + +/* signs */ +#define FP_ZPOS 0 +#define FP_NEG 1 + +/* return codes */ +#define FP_OKAY 0 +#define FP_VAL 1 +#define FP_MEM 2 + +/* equalities */ +#define FP_LT -1 /* less than */ +#define FP_EQ 0 /* equal to */ +#define FP_GT 1 /* greater than */ + +/* replies */ +#define FP_YES 1 /* yes response */ +#define FP_NO 0 /* no response */ + +/* a FP type */ +typedef struct { + fp_digit dp[FP_SIZE]; + int used, + sign; +} fp_int; + +/* functions */ + +/* returns a TFM ident string useful for debugging... */ +const char *fp_ident(void); + +/* initialize [or zero] an fp int */ +#define fp_init(a) (void)memset((a), 0, sizeof(fp_int)) +#define fp_zero(a) fp_init(a) + +/* zero/even/odd ? */ +#define fp_iszero(a) (((a)->used == 0) ? FP_YES : FP_NO) +#define fp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? FP_YES : FP_NO) +#define fp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? FP_YES : FP_NO) + +/* set to a small digit */ +void fp_set(fp_int *a, fp_digit b); + +/* copy from a to b */ +#define fp_copy(a, b) (void)(((a) != (b)) && memcpy((b), (a), sizeof(fp_int))) +#define fp_init_copy(a, b) fp_copy(b, a) + +/* negate and absolute */ +#define fp_neg(a, b) { fp_copy(a, b); (b)->sign ^= 1; } +#define fp_abs(a, b) { fp_copy(a, b); (b)->sign = 0; } + +/* clamp digits */ +#define fp_clamp(a) { while ((a)->used && (a)->dp[(a)->used-1] == 0) --((a)->used); (a)->sign = (a)->used ? (a)->sign : FP_ZPOS; } + +/* right shift x digits */ +void fp_rshd(fp_int *a, int x); + +/* left shift x digits */ +void fp_lshd(fp_int *a, int x); + +/* signed comparison */ +int fp_cmp(fp_int *a, fp_int *b); + +/* unsigned comparison */ +int fp_cmp_mag(fp_int *a, fp_int *b); + +/* power of 2 operations */ +void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d); +void fp_mod_2d(fp_int *a, int b, fp_int *c); +void fp_mul_2d(fp_int *a, int b, fp_int *c); +void fp_2expt (fp_int *a, int b); +void fp_mul_2(fp_int *a, fp_int *c); +void fp_div_2(fp_int *a, fp_int *c); + +/* Counts the number of lsbs which are zero before the first zero bit */ +int fp_cnt_lsb(fp_int *a); + +/* c = a + b */ +void fp_add(fp_int *a, fp_int *b, fp_int *c); + +/* c = a - b */ +void fp_sub(fp_int *a, fp_int *b, fp_int *c); + +/* c = a * b */ +void fp_mul(fp_int *a, fp_int *b, fp_int *c); + +/* b = a*a */ +void fp_sqr(fp_int *a, fp_int *b); + +/* a/b => cb + d == a */ +int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d); + +/* c = a mod b, 0 <= c < b */ +int fp_mod(fp_int *a, fp_int *b, fp_int *c); + +/* compare against a single digit */ +int fp_cmp_d(fp_int *a, fp_digit b); + +/* c = a + b */ +void fp_add_d(fp_int *a, fp_digit b, fp_int *c); + +/* c = a - b */ +void fp_sub_d(fp_int *a, fp_digit b, fp_int *c); + +/* c = a * b */ +void fp_mul_d(fp_int *a, fp_digit b, fp_int *c); + +/* a/b => cb + d == a */ +int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d); + +/* c = a mod b, 0 <= c < b */ +int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c); + +/* ---> number theory <--- */ +/* d = a + b (mod c) */ +int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d); + +/* d = a - b (mod c) */ +int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d); + +/* d = a * b (mod c) */ +int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d); + +/* c = a * a (mod b) */ +int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c); + +/* c = 1/a (mod b) */ +int fp_invmod(fp_int *a, fp_int *b, fp_int *c); + +/* c = (a, b) */ +void fp_gcd(fp_int *a, fp_int *b, fp_int *c); + +/* c = [a, b] */ +void fp_lcm(fp_int *a, fp_int *b, fp_int *c); + +/* setups the montgomery reduction */ +int fp_montgomery_setup(fp_int *a, fp_digit *mp); + +/* computes a = B**n mod b without division or multiplication useful for + * normalizing numbers in a Montgomery system. + */ +void fp_montgomery_calc_normalization(fp_int *a, fp_int *b); + +/* computes x/R == x (mod N) via Montgomery Reduction */ +void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp); + +/* d = a**b (mod c) */ +int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d); + +/* primality stuff */ + +/* perform a Miller-Rabin test of a to the base b and store result in "result" */ +void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result); + +/* 256 trial divisions + 8 Miller-Rabins, returns FP_YES if probable prime */ +int fp_isprime(fp_int *a); + +/* Primality generation flags */ +#define TFM_PRIME_BBS 0x0001 /* BBS style prime */ +#define TFM_PRIME_SAFE 0x0002 /* Safe prime (p-1)/2 == prime */ +#define TFM_PRIME_2MSB_OFF 0x0004 /* force 2nd MSB to 0 */ +#define TFM_PRIME_2MSB_ON 0x0008 /* force 2nd MSB to 1 */ + +/* callback for fp_prime_random, should fill dst with random bytes and return how many read [upto len] */ +typedef int tfm_prime_callback(unsigned char *dst, int len, void *dat); + +#define fp_prime_random(a, t, size, bbs, cb, dat) fp_prime_random_ex(a, t, ((size) * 8) + 1, (bbs==1)?TFM_PRIME_BBS:0, cb, dat) + +int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat); + +/* radix conersions */ +int fp_count_bits(fp_int *a); + +int fp_unsigned_bin_size(fp_int *a); +void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c); +void fp_to_unsigned_bin(fp_int *a, unsigned char *b); + +int fp_signed_bin_size(fp_int *a); +void fp_read_signed_bin(fp_int *a, unsigned char *b, int c); +void fp_to_signed_bin(fp_int *a, unsigned char *b); + +int fp_read_radix(fp_int *a, char *str, int radix); +int fp_toradix(fp_int *a, char *str, int radix); +int fp_toradix_n(fp_int * a, char *str, int radix, int maxlen); +int fp_radix_size(fp_int *a, int radix, int *size); + +/* VARIOUS LOW LEVEL STUFFS */ +void s_fp_add(fp_int *a, fp_int *b, fp_int *c); +void s_fp_sub(fp_int *a, fp_int *b, fp_int *c); +void bn_reverse(unsigned char *s, int len); +void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C); +#ifdef TFM_HUGE +void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C); +#endif +#ifdef TFM_LARGE +void fp_mul_comba16(fp_int *A, fp_int *B, fp_int *C); +#endif +void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C); +void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C); + +void fp_sqr_comba(fp_int *A, fp_int *B); +void fp_sqr_comba4(fp_int *A, fp_int *B); +void fp_sqr_comba8(fp_int *A, fp_int *B); +#ifdef TFM_LARGE +void fp_sqr_comba16(fp_int *A, fp_int *B); +#endif +#ifdef TFM_HUGE +void fp_sqr_comba32(fp_int *A, fp_int *B); +void fp_sqr_comba64(fp_int *A, fp_int *B); +#endif +extern const char *fp_s_rmap; + +#endif diff --git a/lib/silcmath/tma.c b/lib/silcmath/tma.c new file mode 100644 index 00000000..12f9d3ff --- /dev/null +++ b/lib/silcmath/tma.c @@ -0,0 +1,9048 @@ +/* Start: bn_error.c */ +#include "tma.h" +#ifdef BN_ERROR_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +static const struct { + int code; + char *msg; +} msgs[] = { + { MP_OKAY, "Successful" }, + { MP_MEM, "Out of heap" }, + { MP_VAL, "Value out of range" } +}; + +/* return a char * string for a given code */ +char *mp_error_to_string(int code) +{ + int x; + + /* scan the lookup table for the given message */ + for (x = 0; x < (int)(sizeof(msgs) / sizeof(msgs[0])); x++) { + if (msgs[x].code == code) { + return msgs[x].msg; + } + } + + /* generic reply for invalid code */ + return "Invalid error code"; +} + +#endif + +/* End: bn_error.c */ + +/* Start: bn_fast_mp_invmod.c */ +#include "tma.h" +#ifdef BN_FAST_MP_INVMOD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* computes the modular inverse via binary extended euclidean algorithm, + * that is c = 1/a mod b + * + * Based on slow invmod except this is optimized for the case where b is + * odd as per HAC Note 14.64 on pp. 610 + */ +int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c) +{ + mp_int x, y, u, v, B, D; + int res, neg; + + /* 2. [modified] b must be odd */ + if (mp_iseven (b) == 1) { + return MP_VAL; + } + + /* init all our temps */ + if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) { + return res; + } + + /* x == modulus, y == value to invert */ + if ((res = mp_copy (b, &x)) != MP_OKAY) { + goto LBL_ERR; + } + + /* we need y = |a| */ + if ((res = mp_mod (a, b, &y)) != MP_OKAY) { + goto LBL_ERR; + } + + /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ + if ((res = mp_copy (&x, &u)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_copy (&y, &v)) != MP_OKAY) { + goto LBL_ERR; + } + mp_set (&D, 1); + +top: + /* 4. while u is even do */ + while (mp_iseven (&u) == 1) { + /* 4.1 u = u/2 */ + if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { + goto LBL_ERR; + } + /* 4.2 if B is odd then */ + if (mp_isodd (&B) == 1) { + if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } + /* B = B/2 */ + if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* 5. while v is even do */ + while (mp_iseven (&v) == 1) { + /* 5.1 v = v/2 */ + if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { + goto LBL_ERR; + } + /* 5.2 if D is odd then */ + if (mp_isodd (&D) == 1) { + /* D = (D-x)/2 */ + if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + /* D = D/2 */ + if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* 6. if u >= v then */ + if (mp_cmp (&u, &v) != MP_LT) { + /* u = u - v, B = B - D */ + if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } else { + /* v - v - u, D = D - B */ + if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* if not zero goto step 4 */ + if (mp_iszero (&u) == 0) { + goto top; + } + + /* now a = C, b = D, gcd == g*v */ + + /* if v != 1 then there is no inverse */ + if (mp_cmp_d (&v, 1) != MP_EQ) { + res = MP_VAL; + goto LBL_ERR; + } + + /* b is now the inverse */ + neg = a->sign; + while (D.sign == MP_NEG) { + if ((res = mp_add (&D, b, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + mp_exch (&D, c); + c->sign = neg; + res = MP_OKAY; + +LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); + return res; +} +#endif + +/* End: bn_fast_mp_invmod.c */ + +/* Start: bn_fast_mp_montgomery_reduce.c */ +#include "tma.h" +#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* computes xR**-1 == x (mod N) via Montgomery Reduction + * + * This is an optimized implementation of montgomery_reduce + * which uses the comba method to quickly calculate the columns of the + * reduction. + * + * Based on Algorithm 14.32 on pp.601 of HAC. +*/ +int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) +{ + int ix, res, olduse; + mp_word W[MP_WARRAY]; + + /* get old used count */ + olduse = x->used; + + /* grow a as required */ + if (x->alloc < n->used + 1) { + if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { + return res; + } + } + + /* first we have to get the digits of the input into + * an array of double precision words W[...] + */ + { + register mp_word *_W; + register mp_digit *tmpx; + + /* alias for the W[] array */ + _W = W; + + /* alias for the digits of x*/ + tmpx = x->dp; + + /* copy the digits of a into W[0..a->used-1] */ + for (ix = 0; ix < x->used; ix++) { + *_W++ = *tmpx++; + } + + /* zero the high words of W[a->used..m->used*2] */ + for (; ix < n->used * 2 + 1; ix++) { + *_W++ = 0; + } + } + + /* now we proceed to zero successive digits + * from the least significant upwards + */ + for (ix = 0; ix < n->used; ix++) { + /* mu = ai * m' mod b + * + * We avoid a double precision multiplication (which isn't required) + * by casting the value down to a mp_digit. Note this requires + * that W[ix-1] have the carry cleared (see after the inner loop) + */ + register mp_digit mu; + mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); + + /* a = a + mu * m * b**i + * + * This is computed in place and on the fly. The multiplication + * by b**i is handled by offseting which columns the results + * are added to. + * + * Note the comba method normally doesn't handle carries in the + * inner loop In this case we fix the carry from the previous + * column since the Montgomery reduction requires digits of the + * result (so far) [see above] to work. This is + * handled by fixing up one carry after the inner loop. The + * carry fixups are done in order so after these loops the + * first m->used words of W[] have the carries fixed + */ + { + register int iy; + register mp_digit *tmpn; + register mp_word *_W; + + /* alias for the digits of the modulus */ + tmpn = n->dp; + + /* Alias for the columns set by an offset of ix */ + _W = W + ix; + + /* inner loop */ + for (iy = 0; iy < n->used; iy++) { + *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); + } + } + + /* now fix carry for next digit, W[ix+1] */ + W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); + } + + /* now we have to propagate the carries and + * shift the words downward [all those least + * significant digits we zeroed]. + */ + { + register mp_digit *tmpx; + register mp_word *_W, *_W1; + + /* nox fix rest of carries */ + + /* alias for current word */ + _W1 = W + ix; + + /* alias for next word, where the carry goes */ + _W = W + ++ix; + + for (; ix <= n->used * 2 + 1; ix++) { + *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); + } + + /* copy out, A = A/b**n + * + * The result is A/b**n but instead of converting from an + * array of mp_word to mp_digit than calling mp_rshd + * we just copy them in the right order + */ + + /* alias for destination word */ + tmpx = x->dp; + + /* alias for shifted double precision result */ + _W = W + n->used; + + for (ix = 0; ix < n->used + 1; ix++) { + *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); + } + + /* zero oldused digits, if the input a was larger than + * m->used+1 we'll have to clear the digits + */ + for (; ix < olduse; ix++) { + *tmpx++ = 0; + } + } + + /* set the max used and clamp */ + x->used = n->used + 1; + mp_clamp (x); + + /* if A >= m then A = A - m */ + if (mp_cmp_mag (x, n) != MP_LT) { + return s_mp_sub (x, n, x); + } + return MP_OKAY; +} +#endif + +/* End: bn_fast_mp_montgomery_reduce.c */ + +/* Start: bn_fast_s_mp_mul_digs.c */ +#include "tma.h" +#ifdef BN_FAST_S_MP_MUL_DIGS_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* Fast (comba) multiplier + * + * This is the fast column-array [comba] multiplier. It is + * designed to compute the columns of the product first + * then handle the carries afterwards. This has the effect + * of making the nested loops that compute the columns very + * simple and schedulable on super-scalar processors. + * + * This has been modified to produce a variable number of + * digits of output so if say only a half-product is required + * you don't have to compute the upper half (a feature + * required for fast Barrett reduction). + * + * Based on Algorithm 14.12 on pp.595 of HAC. + * + */ +int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +{ + int olduse, res, pa, ix, iz; + mp_digit W[MP_WARRAY]; + register mp_word _W; + + /* grow the destination as required */ + if (c->alloc < digs) { + if ((res = mp_grow (c, digs)) != MP_OKAY) { + return res; + } + } + + /* number of output digits to produce */ + pa = MIN(digs, a->used + b->used); + + /* clear the carry */ + _W = 0; + for (ix = 0; ix < pa; ix++) { + int tx, ty; + int iy; + mp_digit *tmpx, *tmpy; + + /* get offsets into the two bignums */ + ty = MIN(b->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = a->dp + tx; + tmpy = b->dp + ty; + + /* this is the number of times the loop will iterrate, essentially + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(a->used-tx, ty+1); + + /* execute loop */ + for (iz = 0; iz < iy; ++iz) { + _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); + } + + /* store term */ + W[ix] = ((mp_digit)_W) & MP_MASK; + + /* make next carry */ + _W = _W >> ((mp_word)DIGIT_BIT); + } + + /* store final carry */ + W[ix] = (mp_digit)(_W & MP_MASK); + + /* setup dest */ + olduse = c->used; + c->used = pa; + + { + register mp_digit *tmpc; + tmpc = c->dp; + for (ix = 0; ix < pa+1; ix++) { + /* now extract the previous digit [below the carry] */ + *tmpc++ = W[ix]; + } + + /* clear unused digits [that existed in the old copy of c] */ + for (; ix < olduse; ix++) { + *tmpc++ = 0; + } + } + mp_clamp (c); + return MP_OKAY; +} +#endif + +/* End: bn_fast_s_mp_mul_digs.c */ + +/* Start: bn_fast_s_mp_mul_high_digs.c */ +#include "tma.h" +#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* this is a modified version of fast_s_mul_digs that only produces + * output digits *above* digs. See the comments for fast_s_mul_digs + * to see how it works. + * + * This is used in the Barrett reduction since for one of the multiplications + * only the higher digits were needed. This essentially halves the work. + * + * Based on Algorithm 14.12 on pp.595 of HAC. + */ +int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +{ + int olduse, res, pa, ix, iz; + mp_digit W[MP_WARRAY]; + mp_word _W; + + /* grow the destination as required */ + pa = a->used + b->used; + if (c->alloc < pa) { + if ((res = mp_grow (c, pa)) != MP_OKAY) { + return res; + } + } + + /* number of output digits to produce */ + pa = a->used + b->used; + _W = 0; + for (ix = digs; ix < pa; ix++) { + int tx, ty, iy; + mp_digit *tmpx, *tmpy; + + /* get offsets into the two bignums */ + ty = MIN(b->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = a->dp + tx; + tmpy = b->dp + ty; + + /* this is the number of times the loop will iterrate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(a->used-tx, ty+1); + + /* execute loop */ + for (iz = 0; iz < iy; iz++) { + _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); + } + + /* store term */ + W[ix] = ((mp_digit)_W) & MP_MASK; + + /* make next carry */ + _W = _W >> ((mp_word)DIGIT_BIT); + } + + /* store final carry */ + W[ix] = (mp_digit)(_W & MP_MASK); + + /* setup dest */ + olduse = c->used; + c->used = pa; + + { + register mp_digit *tmpc; + + tmpc = c->dp + digs; + for (ix = digs; ix <= pa; ix++) { + /* now extract the previous digit [below the carry] */ + *tmpc++ = W[ix]; + } + + /* clear unused digits [that existed in the old copy of c] */ + for (; ix < olduse; ix++) { + *tmpc++ = 0; + } + } + mp_clamp (c); + return MP_OKAY; +} +#endif + +/* End: bn_fast_s_mp_mul_high_digs.c */ + +/* Start: bn_fast_s_mp_sqr.c */ +#include "tma.h" +#ifdef BN_FAST_S_MP_SQR_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* the jist of squaring... + * you do like mult except the offset of the tmpx [one that + * starts closer to zero] can't equal the offset of tmpy. + * So basically you set up iy like before then you min it with + * (ty-tx) so that it never happens. You double all those + * you add in the inner loop + +After that loop you do the squares and add them in. +*/ + +int fast_s_mp_sqr (mp_int * a, mp_int * b) +{ + int olduse, res, pa, ix, iz; + mp_digit W[MP_WARRAY], *tmpx; + mp_word W1; + + /* grow the destination as required */ + pa = a->used + a->used; + if (b->alloc < pa) { + if ((res = mp_grow (b, pa)) != MP_OKAY) { + return res; + } + } + + /* number of output digits to produce */ + W1 = 0; + for (ix = 0; ix < pa; ix++) { + int tx, ty, iy; + mp_word _W; + mp_digit *tmpy; + + /* clear counter */ + _W = 0; + + /* get offsets into the two bignums */ + ty = MIN(a->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = a->dp + tx; + tmpy = a->dp + ty; + + /* this is the number of times the loop will iterrate, essentially + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(a->used-tx, ty+1); + + /* now for squaring tx can never equal ty + * we halve the distance since they approach at a rate of 2x + * and we have to round because odd cases need to be executed + */ + iy = MIN(iy, (ty-tx+1)>>1); + + /* execute loop */ + for (iz = 0; iz < iy; iz++) { + _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); + } + + /* double the inner product and add carry */ + _W = _W + _W + W1; + + /* even columns have the square term in them */ + if ((ix&1) == 0) { + _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); + } + + /* store it */ + W[ix] = (mp_digit)(_W & MP_MASK); + + /* make next carry */ + W1 = _W >> ((mp_word)DIGIT_BIT); + } + + /* setup dest */ + olduse = b->used; + b->used = a->used+a->used; + + { + mp_digit *tmpb; + tmpb = b->dp; + for (ix = 0; ix < pa; ix++) { + *tmpb++ = W[ix] & MP_MASK; + } + + /* clear unused digits [that existed in the old copy of c] */ + for (; ix < olduse; ix++) { + *tmpb++ = 0; + } + } + mp_clamp (b); + return MP_OKAY; +} +#endif + +/* End: bn_fast_s_mp_sqr.c */ + +/* Start: bn_mp_2expt.c */ +#include "tma.h" +#ifdef BN_MP_2EXPT_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* computes a = 2**b + * + * Simple algorithm which zeroes the int, grows it then just sets one bit + * as required. + */ +int +mp_2expt (mp_int * a, int b) +{ + int res; + + /* zero a as per default */ + mp_zero (a); + + /* grow a to accomodate the single bit */ + if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) { + return res; + } + + /* set the used count of where the bit will go */ + a->used = b / DIGIT_BIT + 1; + + /* put the single bit in its place */ + a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); + + return MP_OKAY; +} +#endif + +/* End: bn_mp_2expt.c */ + +/* Start: bn_mp_abs.c */ +#include "tma.h" +#ifdef BN_MP_ABS_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* b = |a| + * + * Simple function copies the input and fixes the sign to positive + */ +int +mp_abs (mp_int * a, mp_int * b) +{ + int res; + + /* copy a to b */ + if (a != b) { + if ((res = mp_copy (a, b)) != MP_OKAY) { + return res; + } + } + + /* force the sign of b to positive */ + b->sign = MP_ZPOS; + + return MP_OKAY; +} +#endif + +/* End: bn_mp_abs.c */ + +/* Start: bn_mp_add.c */ +#include "tma.h" +#ifdef BN_MP_ADD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* high level addition (handles signs) */ +int mp_add (mp_int * a, mp_int * b, mp_int * c) +{ + int sa, sb, res; + + /* get sign of both inputs */ + sa = a->sign; + sb = b->sign; + + /* handle two cases, not four */ + if (sa == sb) { + /* both positive or both negative */ + /* add their magnitudes, copy the sign */ + c->sign = sa; + res = s_mp_add (a, b, c); + } else { + /* one positive, the other negative */ + /* subtract the one with the greater magnitude from */ + /* the one of the lesser magnitude. The result gets */ + /* the sign of the one with the greater magnitude. */ + if (mp_cmp_mag (a, b) == MP_LT) { + c->sign = sb; + res = s_mp_sub (b, a, c); + } else { + c->sign = sa; + res = s_mp_sub (a, b, c); + } + } + return res; +} + +#endif + +/* End: bn_mp_add.c */ + +/* Start: bn_mp_add_d.c */ +#include "tma.h" +#ifdef BN_MP_ADD_D_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* single digit addition */ +int +mp_add_d (mp_int * a, mp_digit b, mp_int * c) +{ + int res, ix, oldused; + mp_digit *tmpa, *tmpc, mu; + + /* grow c as required */ + if (c->alloc < a->used + 1) { + if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { + return res; + } + } + + /* if a is negative and |a| >= b, call c = |a| - b */ + if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { + /* temporarily fix sign of a */ + a->sign = MP_ZPOS; + + /* c = |a| - b */ + res = mp_sub_d(a, b, c); + + /* fix sign */ + a->sign = c->sign = MP_NEG; + + return res; + } + + /* old number of used digits in c */ + oldused = c->used; + + /* sign always positive */ + c->sign = MP_ZPOS; + + /* source alias */ + tmpa = a->dp; + + /* destination alias */ + tmpc = c->dp; + + /* if a is positive */ + if (a->sign == MP_ZPOS) { + /* add digit, after this we're propagating + * the carry. + */ + *tmpc = *tmpa++ + b; + mu = *tmpc >> DIGIT_BIT; + *tmpc++ &= MP_MASK; + + /* now handle rest of the digits */ + for (ix = 1; ix < a->used; ix++) { + *tmpc = *tmpa++ + mu; + mu = *tmpc >> DIGIT_BIT; + *tmpc++ &= MP_MASK; + } + /* set final carry */ + ix++; + *tmpc++ = mu; + + /* setup size */ + c->used = a->used + 1; + } else { + /* a was negative and |a| < b */ + c->used = 1; + + /* the result is a single digit */ + if (a->used == 1) { + *tmpc++ = b - a->dp[0]; + } else { + *tmpc++ = b; + } + + /* setup count so the clearing of oldused + * can fall through correctly + */ + ix = 1; + } + + /* now zero to oldused */ + while (ix++ < oldused) { + *tmpc++ = 0; + } + mp_clamp(c); + + return MP_OKAY; +} + +#endif + +/* End: bn_mp_add_d.c */ + +/* Start: bn_mp_addmod.c */ +#include "tma.h" +#ifdef BN_MP_ADDMOD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* d = a + b (mod c) */ +int +mp_addmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) +{ + int res; + mp_int t; + + if ((res = mp_init (&t)) != MP_OKAY) { + return res; + } + + if ((res = mp_add (a, b, &t)) != MP_OKAY) { + mp_clear (&t); + return res; + } + res = mp_mod (&t, c, d); + mp_clear (&t); + return res; +} +#endif + +/* End: bn_mp_addmod.c */ + +/* Start: bn_mp_and.c */ +#include "tma.h" +#ifdef BN_MP_AND_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* AND two ints together */ +int +mp_and (mp_int * a, mp_int * b, mp_int * c) +{ + int res, ix, px; + mp_int t, *x; + + if (a->used > b->used) { + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + px = b->used; + x = b; + } else { + if ((res = mp_init_copy (&t, b)) != MP_OKAY) { + return res; + } + px = a->used; + x = a; + } + + for (ix = 0; ix < px; ix++) { + t.dp[ix] &= x->dp[ix]; + } + + /* zero digits above the last from the smallest mp_int */ + for (; ix < t.used; ix++) { + t.dp[ix] = 0; + } + + mp_clamp (&t); + mp_exch (c, &t); + mp_clear (&t); + return MP_OKAY; +} +#endif + +/* End: bn_mp_and.c */ + +/* Start: bn_mp_clamp.c */ +#include "tma.h" +#ifdef BN_MP_CLAMP_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* trim unused digits + * + * This is used to ensure that leading zero digits are + * trimed and the leading "used" digit will be non-zero + * Typically very fast. Also fixes the sign if there + * are no more leading digits + */ +void +mp_clamp (mp_int * a) +{ + /* decrease used while the most significant digit is + * zero. + */ + while (a->used > 0 && a->dp[a->used - 1] == 0) { + --(a->used); + } + + /* reset the sign flag if used == 0 */ + if (a->used == 0) { + a->sign = MP_ZPOS; + } +} +#endif + +/* End: bn_mp_clamp.c */ + +/* Start: bn_mp_clear.c */ +#include "tma.h" +#ifdef BN_MP_CLEAR_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* clear one (frees) */ +void +mp_clear (mp_int * a) +{ + int i; + + /* only do anything if a hasn't been freed previously */ + if (a->dp != NULL) { + /* first zero the digits */ + for (i = 0; i < a->used; i++) { + a->dp[i] = 0; + } + + /* free ram */ + XFREE(a->dp); + + /* reset members to make debugging easier */ + a->dp = NULL; + a->alloc = a->used = 0; + a->sign = MP_ZPOS; + } +} +#endif + +/* End: bn_mp_clear.c */ + +/* Start: bn_mp_clear_multi.c */ +#include "tma.h" +#ifdef BN_MP_CLEAR_MULTI_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +void mp_clear_multi(mp_int *mp, ...) +{ + mp_int* next_mp = mp; + va_list args; + va_start(args, mp); + while (next_mp != NULL) { + mp_clear(next_mp); + next_mp = va_arg(args, mp_int*); + } + va_end(args); +} +#endif + +/* End: bn_mp_clear_multi.c */ + +/* Start: bn_mp_cmp.c */ +#include "tma.h" +#ifdef BN_MP_CMP_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* compare two ints (signed)*/ +int +mp_cmp (mp_int * a, mp_int * b) +{ + /* compare based on sign */ + if (a->sign != b->sign) { + if (a->sign == MP_NEG) { + return MP_LT; + } else { + return MP_GT; + } + } + + /* compare digits */ + if (a->sign == MP_NEG) { + /* if negative compare opposite direction */ + return mp_cmp_mag(b, a); + } else { + return mp_cmp_mag(a, b); + } +} +#endif + +/* End: bn_mp_cmp.c */ + +/* Start: bn_mp_cmp_d.c */ +#include "tma.h" +#ifdef BN_MP_CMP_D_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* compare a digit */ +int mp_cmp_d(mp_int * a, mp_digit b) +{ + /* compare based on sign */ + if (a->sign == MP_NEG) { + return MP_LT; + } + + /* compare based on magnitude */ + if (a->used > 1) { + return MP_GT; + } + + /* compare the only digit of a to b */ + if (a->dp[0] > b) { + return MP_GT; + } else if (a->dp[0] < b) { + return MP_LT; + } else { + return MP_EQ; + } +} +#endif + +/* End: bn_mp_cmp_d.c */ + +/* Start: bn_mp_cmp_mag.c */ +#include "tma.h" +#ifdef BN_MP_CMP_MAG_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* compare maginitude of two ints (unsigned) */ +int mp_cmp_mag (mp_int * a, mp_int * b) +{ + int n; + mp_digit *tmpa, *tmpb; + + /* compare based on # of non-zero digits */ + if (a->used > b->used) { + return MP_GT; + } + + if (a->used < b->used) { + return MP_LT; + } + + /* alias for a */ + tmpa = a->dp + (a->used - 1); + + /* alias for b */ + tmpb = b->dp + (a->used - 1); + + /* compare based on digits */ + for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { + if (*tmpa > *tmpb) { + return MP_GT; + } + + if (*tmpa < *tmpb) { + return MP_LT; + } + } + return MP_EQ; +} +#endif + +/* End: bn_mp_cmp_mag.c */ + +/* Start: bn_mp_cnt_lsb.c */ +#include "tma.h" +#ifdef BN_MP_CNT_LSB_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +static const int lnz[16] = { + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +/* Counts the number of lsbs which are zero before the first zero bit */ +int mp_cnt_lsb(mp_int *a) +{ + int x; + mp_digit q, qq; + + /* easy out */ + if (mp_iszero(a) == 1) { + return 0; + } + + /* scan lower digits until non-zero */ + for (x = 0; x < a->used && a->dp[x] == 0; x++); + q = a->dp[x]; + x *= DIGIT_BIT; + + /* now scan this digit until a 1 is found */ + if ((q & 1) == 0) { + do { + qq = q & 15; + x += lnz[qq]; + q >>= 4; + } while (qq == 0); + } + return x; +} + +#endif + +/* End: bn_mp_cnt_lsb.c */ + +/* Start: bn_mp_copy.c */ +#include "tma.h" +#ifdef BN_MP_COPY_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* copy, b = a */ +int +mp_copy (mp_int * a, mp_int * b) +{ + int res, n; + + /* if dst == src do nothing */ + if (a == b) { + return MP_OKAY; + } + + /* grow dest */ + if (b->alloc < a->used) { + if ((res = mp_grow (b, a->used)) != MP_OKAY) { + return res; + } + } + + /* zero b and copy the parameters over */ + { + register mp_digit *tmpa, *tmpb; + + /* pointer aliases */ + + /* source */ + tmpa = a->dp; + + /* destination */ + tmpb = b->dp; + + /* copy all the digits */ + for (n = 0; n < a->used; n++) { + *tmpb++ = *tmpa++; + } + + /* clear high digits */ + for (; n < b->used; n++) { + *tmpb++ = 0; + } + } + + /* copy used count and sign */ + b->used = a->used; + b->sign = a->sign; + return MP_OKAY; +} +#endif + +/* End: bn_mp_copy.c */ + +/* Start: bn_mp_count_bits.c */ +#include "tma.h" +#ifdef BN_MP_COUNT_BITS_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* returns the number of bits in an int */ +int +mp_count_bits (mp_int * a) +{ + int r; + mp_digit q; + + /* shortcut */ + if (a->used == 0) { + return 0; + } + + /* get number of digits and add that */ + r = (a->used - 1) * DIGIT_BIT; + + /* take the last digit and count the bits in it */ + q = a->dp[a->used - 1]; + while (q > ((mp_digit) 0)) { + ++r; + q >>= ((mp_digit) 1); + } + return r; +} +#endif + +/* End: bn_mp_count_bits.c */ + +/* Start: bn_mp_div.c */ +#include "tma.h" +#ifdef BN_MP_DIV_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +#ifdef BN_MP_DIV_SMALL + +/* slower bit-bang division... also smaller */ +int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d) +{ + mp_int ta, tb, tq, q; + int res, n, n2; + + /* is divisor zero ? */ + if (mp_iszero (b) == 1) { + return MP_VAL; + } + + /* if a < b then q=0, r = a */ + if (mp_cmp_mag (a, b) == MP_LT) { + if (d != NULL) { + res = mp_copy (a, d); + } else { + res = MP_OKAY; + } + if (c != NULL) { + mp_zero (c); + } + return res; + } + + /* init our temps */ + if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) { + return res; + } + + + mp_set(&tq, 1); + n = mp_count_bits(a) - mp_count_bits(b); + if (((res = mp_abs(a, &ta)) != MP_OKAY) || + ((res = mp_abs(b, &tb)) != MP_OKAY) || + ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) || + ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) { + goto LBL_ERR; + } + + while (n-- >= 0) { + if (mp_cmp(&tb, &ta) != MP_GT) { + if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) || + ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) { + goto LBL_ERR; + } + } + if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) || + ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) { + goto LBL_ERR; + } + } + + /* now q == quotient and ta == remainder */ + n = a->sign; + n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG); + if (c != NULL) { + mp_exch(c, &q); + c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2; + } + if (d != NULL) { + mp_exch(d, &ta); + d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n; + } +LBL_ERR: + mp_clear_multi(&ta, &tb, &tq, &q, NULL); + return res; +} + +#else + +/* integer signed division. + * c*b + d == a [e.g. a/b, c=quotient, d=remainder] + * HAC pp.598 Algorithm 14.20 + * + * Note that the description in HAC is horribly + * incomplete. For example, it doesn't consider + * the case where digits are removed from 'x' in + * the inner loop. It also doesn't consider the + * case that y has fewer than three digits, etc.. + * + * The overall algorithm is as described as + * 14.20 from HAC but fixed to treat these cases. +*/ +int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d) +{ + mp_int q, x, y, t1, t2; + int res, n, t, i, norm, neg; + + /* is divisor zero ? */ + if (mp_iszero (b) == 1) { + return MP_VAL; + } + + /* if a < b then q=0, r = a */ + if (mp_cmp_mag (a, b) == MP_LT) { + if (d != NULL) { + res = mp_copy (a, d); + } else { + res = MP_OKAY; + } + if (c != NULL) { + mp_zero (c); + } + return res; + } + + if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) { + return res; + } + q.used = a->used + 2; + + if ((res = mp_init (&t1)) != MP_OKAY) { + goto LBL_Q; + } + + if ((res = mp_init (&t2)) != MP_OKAY) { + goto LBL_T1; + } + + if ((res = mp_init_copy (&x, a)) != MP_OKAY) { + goto LBL_T2; + } + + if ((res = mp_init_copy (&y, b)) != MP_OKAY) { + goto LBL_X; + } + + /* fix the sign */ + neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; + x.sign = y.sign = MP_ZPOS; + + /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ + norm = mp_count_bits(&y) % DIGIT_BIT; + if (norm < (int)(DIGIT_BIT-1)) { + norm = (DIGIT_BIT-1) - norm; + if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) { + goto LBL_Y; + } + if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) { + goto LBL_Y; + } + } else { + norm = 0; + } + + /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ + n = x.used - 1; + t = y.used - 1; + + /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ + if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ + goto LBL_Y; + } + + while (mp_cmp (&x, &y) != MP_LT) { + ++(q.dp[n - t]); + if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) { + goto LBL_Y; + } + } + + /* reset y by shifting it back down */ + mp_rshd (&y, n - t); + + /* step 3. for i from n down to (t + 1) */ + for (i = n; i >= (t + 1); i--) { + if (i > x.used) { + continue; + } + + /* step 3.1 if xi == yt then set q{i-t-1} to b-1, + * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ + if (x.dp[i] == y.dp[t]) { + q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); + } else { + mp_word tmp; + tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); + tmp |= ((mp_word) x.dp[i - 1]); + tmp /= ((mp_word) y.dp[t]); + if (tmp > (mp_word) MP_MASK) + tmp = MP_MASK; + q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); + } + + /* while (q{i-t-1} * (yt * b + y{t-1})) > + xi * b**2 + xi-1 * b + xi-2 + + do q{i-t-1} -= 1; + */ + q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; + do { + q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; + + /* find left hand */ + mp_zero (&t1); + t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; + t1.dp[1] = y.dp[t]; + t1.used = 2; + if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { + goto LBL_Y; + } + + /* find right hand */ + t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; + t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; + t2.dp[2] = x.dp[i]; + t2.used = 3; + } while (mp_cmp_mag(&t1, &t2) == MP_GT); + + /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ + if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { + goto LBL_Y; + } + + if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { + goto LBL_Y; + } + + if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) { + goto LBL_Y; + } + + /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ + if (x.sign == MP_NEG) { + if ((res = mp_copy (&y, &t1)) != MP_OKAY) { + goto LBL_Y; + } + if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { + goto LBL_Y; + } + if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) { + goto LBL_Y; + } + + q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; + } + } + + /* now q is the quotient and x is the remainder + * [which we have to normalize] + */ + + /* get sign before writing to c */ + x.sign = x.used == 0 ? MP_ZPOS : a->sign; + + if (c != NULL) { + mp_clamp (&q); + mp_exch (&q, c); + c->sign = neg; + } + + if (d != NULL) { + mp_div_2d (&x, norm, &x, NULL); + mp_exch (&x, d); + } + + res = MP_OKAY; + +LBL_Y:mp_clear (&y); +LBL_X:mp_clear (&x); +LBL_T2:mp_clear (&t2); +LBL_T1:mp_clear (&t1); +LBL_Q:mp_clear (&q); + return res; +} + +#endif + +#endif + +/* End: bn_mp_div.c */ + +/* Start: bn_mp_div_2.c */ +#include "tma.h" +#ifdef BN_MP_DIV_2_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* b = a/2 */ +int mp_div_2(mp_int * a, mp_int * b) +{ + int x, res, oldused; + + /* copy */ + if (b->alloc < a->used) { + if ((res = mp_grow (b, a->used)) != MP_OKAY) { + return res; + } + } + + oldused = b->used; + b->used = a->used; + { + register mp_digit r, rr, *tmpa, *tmpb; + + /* source alias */ + tmpa = a->dp + b->used - 1; + + /* dest alias */ + tmpb = b->dp + b->used - 1; + + /* carry */ + r = 0; + for (x = b->used - 1; x >= 0; x--) { + /* get the carry for the next iteration */ + rr = *tmpa & 1; + + /* shift the current digit, add in carry and store */ + *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); + + /* forward carry to next iteration */ + r = rr; + } + + /* zero excess digits */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; + mp_clamp (b); + return MP_OKAY; +} +#endif + +/* End: bn_mp_div_2.c */ + +/* Start: bn_mp_div_2d.c */ +#include "tma.h" +#ifdef BN_MP_DIV_2D_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* shift right by a certain bit count (store quotient in c, optional remainder in d) */ +int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) +{ + mp_digit D, r, rr; + int x, res; + mp_int t; + + + /* if the shift count is <= 0 then we do no work */ + if (b <= 0) { + res = mp_copy (a, c); + if (d != NULL) { + mp_zero (d); + } + return res; + } + + if ((res = mp_init (&t)) != MP_OKAY) { + return res; + } + + /* get the remainder */ + if (d != NULL) { + if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) { + mp_clear (&t); + return res; + } + } + + /* copy */ + if ((res = mp_copy (a, c)) != MP_OKAY) { + mp_clear (&t); + return res; + } + + /* shift by as many digits in the bit count */ + if (b >= (int)DIGIT_BIT) { + mp_rshd (c, b / DIGIT_BIT); + } + + /* shift any bit count < DIGIT_BIT */ + D = (mp_digit) (b % DIGIT_BIT); + if (D != 0) { + register mp_digit *tmpc, mask, shift; + + /* mask */ + mask = (((mp_digit)1) << D) - 1; + + /* shift for lsb */ + shift = DIGIT_BIT - D; + + /* alias */ + tmpc = c->dp + (c->used - 1); + + /* carry */ + r = 0; + for (x = c->used - 1; x >= 0; x--) { + /* get the lower bits of this word in a temp */ + rr = *tmpc & mask; + + /* shift the current word and mix in the carry bits from the previous word */ + *tmpc = (*tmpc >> D) | (r << shift); + --tmpc; + + /* set the carry to the carry bits of the current word found above */ + r = rr; + } + } + mp_clamp (c); + if (d != NULL) { + mp_exch (&t, d); + } + mp_clear (&t); + return MP_OKAY; +} +#endif + +/* End: bn_mp_div_2d.c */ + +/* Start: bn_mp_div_3.c */ +#include "tma.h" +#ifdef BN_MP_DIV_3_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* divide by three (based on routine from MPI and the GMP manual) */ +int +mp_div_3 (mp_int * a, mp_int *c, mp_digit * d) +{ + mp_int q; + mp_word w, t; + mp_digit b; + int res, ix; + + /* b = 2**DIGIT_BIT / 3 */ + b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3); + + if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { + return res; + } + + q.used = a->used; + q.sign = a->sign; + w = 0; + for (ix = a->used - 1; ix >= 0; ix--) { + w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); + + if (w >= 3) { + /* multiply w by [1/3] */ + t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT); + + /* now subtract 3 * [w/3] from w, to get the remainder */ + w -= t+t+t; + + /* fixup the remainder as required since + * the optimization is not exact. + */ + while (w >= 3) { + t += 1; + w -= 3; + } + } else { + t = 0; + } + q.dp[ix] = (mp_digit)t; + } + + /* [optional] store the remainder */ + if (d != NULL) { + *d = (mp_digit)w; + } + + /* [optional] store the quotient */ + if (c != NULL) { + mp_clamp(&q); + mp_exch(&q, c); + } + mp_clear(&q); + + return res; +} + +#endif + +/* End: bn_mp_div_3.c */ + +/* Start: bn_mp_div_d.c */ +#include "tma.h" +#ifdef BN_MP_DIV_D_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +static int s_is_power_of_two(mp_digit b, int *p) +{ + int x; + + for (x = 1; x < DIGIT_BIT; x++) { + if (b == (((mp_digit)1)<dp[0] & ((((mp_digit)1)<used)) != MP_OKAY) { + return res; + } + + q.used = a->used; + q.sign = a->sign; + w = 0; + for (ix = a->used - 1; ix >= 0; ix--) { + w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); + + if (w >= b) { + t = (mp_digit)(w / b); + w -= ((mp_word)t) * ((mp_word)b); + } else { + t = 0; + } + q.dp[ix] = (mp_digit)t; + } + + if (d != NULL) { + *d = (mp_digit)w; + } + + if (c != NULL) { + mp_clamp(&q); + mp_exch(&q, c); + } + mp_clear(&q); + + return res; +} + +#endif + +/* End: bn_mp_div_d.c */ + +/* Start: bn_mp_dr_is_modulus.c */ +#include "tma.h" +#ifdef BN_MP_DR_IS_MODULUS_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* determines if a number is a valid DR modulus */ +int mp_dr_is_modulus(mp_int *a) +{ + int ix; + + /* must be at least two digits */ + if (a->used < 2) { + return 0; + } + + /* must be of the form b**k - a [a <= b] so all + * but the first digit must be equal to -1 (mod b). + */ + for (ix = 1; ix < a->used; ix++) { + if (a->dp[ix] != MP_MASK) { + return 0; + } + } + return 1; +} + +#endif + +/* End: bn_mp_dr_is_modulus.c */ + +/* Start: bn_mp_dr_reduce.c */ +#include "tma.h" +#ifdef BN_MP_DR_REDUCE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* reduce "x" in place modulo "n" using the Diminished Radix algorithm. + * + * Based on algorithm from the paper + * + * "Generating Efficient Primes for Discrete Log Cryptosystems" + * Chae Hoon Lim, Pil Joong Lee, + * POSTECH Information Research Laboratories + * + * The modulus must be of a special format [see manual] + * + * Has been modified to use algorithm 7.10 from the LTM book instead + * + * Input x must be in the range 0 <= x <= (n-1)**2 + */ +int +mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) +{ + int err, i, m; + mp_word r; + mp_digit mu, *tmpx1, *tmpx2; + + /* m = digits in modulus */ + m = n->used; + + /* ensure that "x" has at least 2m digits */ + if (x->alloc < m + m) { + if ((err = mp_grow (x, m + m)) != MP_OKAY) { + return err; + } + } + +/* top of loop, this is where the code resumes if + * another reduction pass is required. + */ +top: + /* aliases for digits */ + /* alias for lower half of x */ + tmpx1 = x->dp; + + /* alias for upper half of x, or x/B**m */ + tmpx2 = x->dp + m; + + /* set carry to zero */ + mu = 0; + + /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ + for (i = 0; i < m; i++) { + r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; + *tmpx1++ = (mp_digit)(r & MP_MASK); + mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); + } + + /* set final carry */ + *tmpx1++ = mu; + + /* zero words above m */ + for (i = m + 1; i < x->used; i++) { + *tmpx1++ = 0; + } + + /* clamp, sub and return */ + mp_clamp (x); + + /* if x >= n then subtract and reduce again + * Each successive "recursion" makes the input smaller and smaller. + */ + if (mp_cmp_mag (x, n) != MP_LT) { + s_mp_sub(x, n, x); + goto top; + } + return MP_OKAY; +} +#endif + +/* End: bn_mp_dr_reduce.c */ + +/* Start: bn_mp_dr_setup.c */ +#include "tma.h" +#ifdef BN_MP_DR_SETUP_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* determines the setup value */ +void mp_dr_setup(mp_int *a, mp_digit *d) +{ + /* the casts are required if DIGIT_BIT is one less than + * the number of bits in a mp_digit [e.g. DIGIT_BIT==31] + */ + *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - + ((mp_word)a->dp[0])); +} + +#endif + +/* End: bn_mp_dr_setup.c */ + +/* Start: bn_mp_exch.c */ +#include "tma.h" +#ifdef BN_MP_EXCH_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* swap the elements of two integers, for cases where you can't simply swap the + * mp_int pointers around + */ +void +mp_exch (mp_int * a, mp_int * b) +{ + mp_int t; + + t = *a; + *a = *b; + *b = t; +} +#endif + +/* End: bn_mp_exch.c */ + +/* Start: bn_mp_expt_d.c */ +#include "tma.h" +#ifdef BN_MP_EXPT_D_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* calculate c = a**b using a square-multiply algorithm */ +int mp_expt_d (mp_int * a, mp_digit b, mp_int * c) +{ + int res, x; + mp_int g; + + if ((res = mp_init_copy (&g, a)) != MP_OKAY) { + return res; + } + + /* set initial result */ + mp_set (c, 1); + + for (x = 0; x < (int) DIGIT_BIT; x++) { + /* square */ + if ((res = mp_sqr (c, c)) != MP_OKAY) { + mp_clear (&g); + return res; + } + + /* if the bit is set multiply */ + if ((b & (mp_digit) (((mp_digit)1) << (DIGIT_BIT - 1))) != 0) { + if ((res = mp_mul (c, &g, c)) != MP_OKAY) { + mp_clear (&g); + return res; + } + } + + /* shift to next bit */ + b <<= 1; + } + + mp_clear (&g); + return MP_OKAY; +} +#endif + +/* End: bn_mp_expt_d.c */ + +/* Start: bn_mp_exptmod.c */ +#include "tma.h" +#ifdef BN_MP_EXPTMOD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + + +/* this is a shell function that calls either the normal or Montgomery + * exptmod functions. Originally the call to the montgomery code was + * embedded in the normal function but that wasted alot of stack space + * for nothing (since 99% of the time the Montgomery code would be called) + */ +int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) +{ + int dr; + + /* modulus P must be positive */ + if (P->sign == MP_NEG) { + return MP_VAL; + } + + /* if exponent X is negative we have to recurse */ + if (X->sign == MP_NEG) { +#ifdef BN_MP_INVMOD_C + mp_int tmpG, tmpX; + int err; + + /* first compute 1/G mod P */ + if ((err = mp_init(&tmpG)) != MP_OKAY) { + return err; + } + if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) { + mp_clear(&tmpG); + return err; + } + + /* now get |X| */ + if ((err = mp_init(&tmpX)) != MP_OKAY) { + mp_clear(&tmpG); + return err; + } + if ((err = mp_abs(X, &tmpX)) != MP_OKAY) { + mp_clear_multi(&tmpG, &tmpX, NULL); + return err; + } + + /* and now compute (1/G)**|X| instead of G**X [X < 0] */ + err = mp_exptmod(&tmpG, &tmpX, P, Y); + mp_clear_multi(&tmpG, &tmpX, NULL); + return err; +#else + /* no invmod */ + return MP_VAL; +#endif + } + +/* modified diminished radix reduction */ +#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) + if (mp_reduce_is_2k_l(P) == MP_YES) { + return s_mp_exptmod(G, X, P, Y, 1); + } +#endif + +#ifdef BN_MP_DR_IS_MODULUS_C + /* is it a DR modulus? */ + dr = mp_dr_is_modulus(P); +#else + /* default to no */ + dr = 0; +#endif + +#ifdef BN_MP_REDUCE_IS_2K_C + /* if not, is it a unrestricted DR modulus? */ + if (dr == 0) { + dr = mp_reduce_is_2k(P) << 1; + } +#endif + + /* if the modulus is odd or dr != 0 use the montgomery method */ +#ifdef BN_MP_EXPTMOD_FAST_C + if (mp_isodd (P) == 1 || dr != 0) { + return mp_exptmod_fast (G, X, P, Y, dr); + } else { +#endif +#ifdef BN_S_MP_EXPTMOD_C + /* otherwise use the generic Barrett reduction technique */ + return s_mp_exptmod (G, X, P, Y, 0); +#else + /* no exptmod for evens */ + return MP_VAL; +#endif +#ifdef BN_MP_EXPTMOD_FAST_C + } +#endif +} + +#endif + +/* End: bn_mp_exptmod.c */ + +/* Start: bn_mp_exptmod_fast.c */ +#include "tma.h" +#ifdef BN_MP_EXPTMOD_FAST_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 + * + * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. + * The value of k changes based on the size of the exponent. + * + * Uses Montgomery or Diminished Radix reduction [whichever appropriate] + */ + +#ifdef MP_LOW_MEM + #define TAB_SIZE 32 +#else + #define TAB_SIZE 256 +#endif + +int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) +{ + mp_int M[TAB_SIZE], res; + mp_digit buf, mp; + int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; + + /* use a pointer to the reduction algorithm. This allows us to use + * one of many reduction algorithms without modding the guts of + * the code with if statements everywhere. + */ + int (*redux)(mp_int*,mp_int*,mp_digit); + + /* find window size */ + x = mp_count_bits (X); + if (x <= 7) { + winsize = 2; + } else if (x <= 36) { + winsize = 3; + } else if (x <= 140) { + winsize = 4; + } else if (x <= 450) { + winsize = 5; + } else if (x <= 1303) { + winsize = 6; + } else if (x <= 3529) { + winsize = 7; + } else { + winsize = 8; + } + +#ifdef MP_LOW_MEM + if (winsize > 5) { + winsize = 5; + } +#endif + + /* init M array */ + /* init first cell */ + if ((err = mp_init(&M[1])) != MP_OKAY) { + return err; + } + + /* now init the second half of the array */ + for (x = 1<<(winsize-1); x < (1 << winsize); x++) { + if ((err = mp_init(&M[x])) != MP_OKAY) { + for (y = 1<<(winsize-1); y < x; y++) { + mp_clear (&M[y]); + } + mp_clear(&M[1]); + return err; + } + } + + /* determine and setup reduction code */ + if (redmode == 0) { +#ifdef BN_MP_MONTGOMERY_SETUP_C + /* now setup montgomery */ + if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { + goto LBL_M; + } +#else + err = MP_VAL; + goto LBL_M; +#endif + + /* automatically pick the comba one if available (saves quite a few calls/ifs) */ +#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C + if (((P->used * 2 + 1) < MP_WARRAY) && + P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + redux = fast_mp_montgomery_reduce; + } else +#endif + { +#ifdef BN_MP_MONTGOMERY_REDUCE_C + /* use slower baseline Montgomery method */ + redux = mp_montgomery_reduce; +#else + err = MP_VAL; + goto LBL_M; +#endif + } + } else if (redmode == 1) { +#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C) + /* setup DR reduction for moduli of the form B**k - b */ + mp_dr_setup(P, &mp); + redux = mp_dr_reduce; +#else + err = MP_VAL; + goto LBL_M; +#endif + } else { +#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C) + /* setup DR reduction for moduli of the form 2**k - b */ + if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { + goto LBL_M; + } + redux = mp_reduce_2k; +#else + err = MP_VAL; + goto LBL_M; +#endif + } + + /* setup result */ + if ((err = mp_init (&res)) != MP_OKAY) { + goto LBL_M; + } + + /* create M table + * + + * + * The first half of the table is not computed though accept for M[0] and M[1] + */ + + if (redmode == 0) { +#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C + /* now we need R mod m */ + if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { + goto LBL_RES; + } +#else + err = MP_VAL; + goto LBL_RES; +#endif + + /* now set M[1] to G * R mod m */ + if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) { + goto LBL_RES; + } + } else { + mp_set(&res, 1); + if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { + goto LBL_RES; + } + } + + /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ + if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { + goto LBL_RES; + } + + for (x = 0; x < (winsize - 1); x++) { + if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { + goto LBL_RES; + } + } + + /* create upper table */ + for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { + if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&M[x], P, mp)) != MP_OKAY) { + goto LBL_RES; + } + } + + /* set initial mode and bit cnt */ + mode = 0; + bitcnt = 1; + buf = 0; + digidx = X->used - 1; + bitcpy = 0; + bitbuf = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (mp_digit)1; + + /* if the bit is zero and mode == 0 then we ignore it + * These represent the leading zero bits before the first 1 bit + * in the exponent. Technically this opt is not required but it + * does lower the # of trivial squaring/reductions used + */ + if (mode == 0 && y == 0) { + continue; + } + + /* if the bit is zero and mode == 1 then we square */ + if (mode == 1 && y == 0) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + continue; + } + + /* else we add it to the window */ + bitbuf |= (y << (winsize - ++bitcpy)); + mode = 2; + + if (bitcpy == winsize) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < winsize; x++) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + } + + /* then multiply */ + if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + mode = 1; + } + } + + /* if bits remain then square/multiply */ + if (mode == 2 && bitcpy > 0) { + /* square then multiply if the bit is set */ + for (x = 0; x < bitcpy; x++) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + + /* get next bit of the window */ + bitbuf <<= 1; + if ((bitbuf & (1 << winsize)) != 0) { + /* then multiply */ + if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + } + } + } + + if (redmode == 0) { + /* fixup result if Montgomery reduction is used + * recall that any value in a Montgomery system is + * actually multiplied by R mod n. So we have + * to reduce one more time to cancel out the factor + * of R. + */ + if ((err = redux(&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + } + + /* swap res with Y */ + mp_exch (&res, Y); + err = MP_OKAY; +LBL_RES:mp_clear (&res); +LBL_M: + mp_clear(&M[1]); + for (x = 1<<(winsize-1); x < (1 << winsize); x++) { + mp_clear (&M[x]); + } + return err; +} +#endif + + +/* End: bn_mp_exptmod_fast.c */ + +/* Start: bn_mp_exteuclid.c */ +#include "tma.h" +#ifdef BN_MP_EXTEUCLID_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* Extended euclidean algorithm of (a, b) produces + a*u1 + b*u2 = u3 + */ +int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3) +{ + mp_int u1,u2,u3,v1,v2,v3,t1,t2,t3,q,tmp; + int err; + + if ((err = mp_init_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL)) != MP_OKAY) { + return err; + } + + /* initialize, (u1,u2,u3) = (1,0,a) */ + mp_set(&u1, 1); + if ((err = mp_copy(a, &u3)) != MP_OKAY) { goto _ERR; } + + /* initialize, (v1,v2,v3) = (0,1,b) */ + mp_set(&v2, 1); + if ((err = mp_copy(b, &v3)) != MP_OKAY) { goto _ERR; } + + /* loop while v3 != 0 */ + while (mp_iszero(&v3) == MP_NO) { + /* q = u3/v3 */ + if ((err = mp_div(&u3, &v3, &q, NULL)) != MP_OKAY) { goto _ERR; } + + /* (t1,t2,t3) = (u1,u2,u3) - (v1,v2,v3)q */ + if ((err = mp_mul(&v1, &q, &tmp)) != MP_OKAY) { goto _ERR; } + if ((err = mp_sub(&u1, &tmp, &t1)) != MP_OKAY) { goto _ERR; } + if ((err = mp_mul(&v2, &q, &tmp)) != MP_OKAY) { goto _ERR; } + if ((err = mp_sub(&u2, &tmp, &t2)) != MP_OKAY) { goto _ERR; } + if ((err = mp_mul(&v3, &q, &tmp)) != MP_OKAY) { goto _ERR; } + if ((err = mp_sub(&u3, &tmp, &t3)) != MP_OKAY) { goto _ERR; } + + /* (u1,u2,u3) = (v1,v2,v3) */ + if ((err = mp_copy(&v1, &u1)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&v2, &u2)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&v3, &u3)) != MP_OKAY) { goto _ERR; } + + /* (v1,v2,v3) = (t1,t2,t3) */ + if ((err = mp_copy(&t1, &v1)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&t2, &v2)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&t3, &v3)) != MP_OKAY) { goto _ERR; } + } + + /* make sure U3 >= 0 */ + if (u3.sign == MP_NEG) { + mp_neg(&u1, &u1); + mp_neg(&u2, &u2); + mp_neg(&u3, &u3); + } + + /* copy result out */ + if (U1 != NULL) { mp_exch(U1, &u1); } + if (U2 != NULL) { mp_exch(U2, &u2); } + if (U3 != NULL) { mp_exch(U3, &u3); } + + err = MP_OKAY; +_ERR: mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL); + return err; +} +#endif + +/* End: bn_mp_exteuclid.c */ + +/* Start: bn_mp_fread.c */ +#include "tma.h" +#ifdef BN_MP_FREAD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* read a bigint from a file stream in ASCII */ +int mp_fread(mp_int *a, int radix, FILE *stream) +{ + int err, ch, neg, y; + + /* clear a */ + mp_zero(a); + + /* if first digit is - then set negative */ + ch = fgetc(stream); + if (ch == '-') { + neg = MP_NEG; + ch = fgetc(stream); + } else { + neg = MP_ZPOS; + } + + for (;;) { + /* find y in the radix map */ + for (y = 0; y < radix; y++) { + if (mp_s_rmap[y] == ch) { + break; + } + } + if (y == radix) { + break; + } + + /* shift up and add */ + if ((err = mp_mul_d(a, radix, a)) != MP_OKAY) { + return err; + } + if ((err = mp_add_d(a, y, a)) != MP_OKAY) { + return err; + } + + ch = fgetc(stream); + } + if (mp_cmp_d(a, 0) != MP_EQ) { + a->sign = neg; + } + + return MP_OKAY; +} + +#endif + +/* End: bn_mp_fread.c */ + +/* Start: bn_mp_fwrite.c */ +#include "tma.h" +#ifdef BN_MP_FWRITE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +int mp_fwrite(mp_int *a, int radix, FILE *stream) +{ + char *buf; + int err, len, x; + + if ((err = mp_radix_size(a, radix, &len)) != MP_OKAY) { + return err; + } + + buf = OPT_CAST(char) XMALLOC (len); + if (buf == NULL) { + return MP_MEM; + } + + if ((err = mp_toradix(a, buf, radix)) != MP_OKAY) { + XFREE (buf); + return err; + } + + for (x = 0; x < len; x++) { + if (fputc(buf[x], stream) == EOF) { + XFREE (buf); + return MP_VAL; + } + } + + XFREE (buf); + return MP_OKAY; +} + +#endif + +/* End: bn_mp_fwrite.c */ + +/* Start: bn_mp_gcd.c */ +#include "tma.h" +#ifdef BN_MP_GCD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* Greatest Common Divisor using the binary method */ +int mp_gcd (mp_int * a, mp_int * b, mp_int * c) +{ + mp_int u, v; + int k, u_lsb, v_lsb, res; + + /* either zero than gcd is the largest */ + if (mp_iszero (a) == 1 && mp_iszero (b) == 0) { + return mp_abs (b, c); + } + if (mp_iszero (a) == 0 && mp_iszero (b) == 1) { + return mp_abs (a, c); + } + + /* optimized. At this point if a == 0 then + * b must equal zero too + */ + if (mp_iszero (a) == 1) { + mp_zero(c); + return MP_OKAY; + } + + /* get copies of a and b we can modify */ + if ((res = mp_init_copy (&u, a)) != MP_OKAY) { + return res; + } + + if ((res = mp_init_copy (&v, b)) != MP_OKAY) { + goto LBL_U; + } + + /* must be positive for the remainder of the algorithm */ + u.sign = v.sign = MP_ZPOS; + + /* B1. Find the common power of two for u and v */ + u_lsb = mp_cnt_lsb(&u); + v_lsb = mp_cnt_lsb(&v); + k = MIN(u_lsb, v_lsb); + + if (k > 0) { + /* divide the power of two out */ + if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) { + goto LBL_V; + } + + if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) { + goto LBL_V; + } + } + + /* divide any remaining factors of two out */ + if (u_lsb != k) { + if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) { + goto LBL_V; + } + } + + if (v_lsb != k) { + if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) { + goto LBL_V; + } + } + + while (mp_iszero(&v) == 0) { + /* make sure v is the largest */ + if (mp_cmp_mag(&u, &v) == MP_GT) { + /* swap u and v to make sure v is >= u */ + mp_exch(&u, &v); + } + + /* subtract smallest from largest */ + if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) { + goto LBL_V; + } + + /* Divide out all factors of two */ + if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) { + goto LBL_V; + } + } + + /* multiply by 2**k which we divided out at the beginning */ + if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) { + goto LBL_V; + } + c->sign = MP_ZPOS; + res = MP_OKAY; +LBL_V:mp_clear (&u); +LBL_U:mp_clear (&v); + return res; +} +#endif + +/* End: bn_mp_gcd.c */ + +/* Start: bn_mp_get_int.c */ +#include "tma.h" +#ifdef BN_MP_GET_INT_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* get the lower 32-bits of an mp_int */ +unsigned long mp_get_int(mp_int * a) +{ + int i; + unsigned long res; + + if (a->used == 0) { + return 0; + } + + /* get number of digits of the lsb we have to read */ + i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1; + + /* get most significant digit of result */ + res = DIGIT(a,i); + + while (--i >= 0) { + res = (res << DIGIT_BIT) | DIGIT(a,i); + } + + /* force result to 32-bits always so it is consistent on non 32-bit platforms */ + return res & 0xFFFFFFFFUL; +} +#endif + +/* End: bn_mp_get_int.c */ + +/* Start: bn_mp_grow.c */ +#include "tma.h" +#ifdef BN_MP_GROW_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* grow as required */ +int mp_grow (mp_int * a, int size) +{ + int i; + mp_digit *tmp; + + /* if the alloc size is smaller alloc more ram */ + if (a->alloc < size) { + /* ensure there are always at least MP_PREC digits extra on top */ + size += (MP_PREC * 2) - (size % MP_PREC); + + /* reallocate the array a->dp + * + * We store the return in a temporary variable + * in case the operation failed we don't want + * to overwrite the dp member of a. + */ + tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size); + if (tmp == NULL) { + /* reallocation failed but "a" is still valid [can be freed] */ + return MP_MEM; + } + + /* reallocation succeeded so set a->dp */ + a->dp = tmp; + + /* zero excess digits */ + i = a->alloc; + a->alloc = size; + for (; i < a->alloc; i++) { + a->dp[i] = 0; + } + } + return MP_OKAY; +} +#endif + +/* End: bn_mp_grow.c */ + +/* Start: bn_mp_init.c */ +#include "tma.h" +#ifdef BN_MP_INIT_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* init a new mp_int */ +int mp_init (mp_int * a) +{ + int i; + + /* allocate memory required and clear it */ + a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC); + if (a->dp == NULL) { + return MP_MEM; + } + + /* set the digits to zero */ + for (i = 0; i < MP_PREC; i++) { + a->dp[i] = 0; + } + + /* set the used to zero, allocated digits to the default precision + * and sign to positive */ + a->used = 0; + a->alloc = MP_PREC; + a->sign = MP_ZPOS; + + return MP_OKAY; +} +#endif + +/* End: bn_mp_init.c */ + +/* Start: bn_mp_init_copy.c */ +#include "tma.h" +#ifdef BN_MP_INIT_COPY_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* creates "a" then copies b into it */ +int mp_init_copy (mp_int * a, mp_int * b) +{ + int res; + + if ((res = mp_init (a)) != MP_OKAY) { + return res; + } + return mp_copy (b, a); +} +#endif + +/* End: bn_mp_init_copy.c */ + +/* Start: bn_mp_init_multi.c */ +#include "tma.h" +#ifdef BN_MP_INIT_MULTI_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +int mp_init_multi(mp_int *mp, ...) +{ + mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ + int n = 0; /* Number of ok inits */ + mp_int* cur_arg = mp; + va_list args; + + va_start(args, mp); /* init args to next argument from caller */ + while (cur_arg != NULL) { + if (mp_init(cur_arg) != MP_OKAY) { + /* Oops - error! Back-track and mp_clear what we already + succeeded in init-ing, then return error. + */ + va_list clean_args; + + /* end the current list */ + va_end(args); + + /* now start cleaning up */ + cur_arg = mp; + va_start(clean_args, mp); + while (n--) { + mp_clear(cur_arg); + cur_arg = va_arg(clean_args, mp_int*); + } + va_end(clean_args); + res = MP_MEM; + break; + } + n++; + cur_arg = va_arg(args, mp_int*); + } + va_end(args); + return res; /* Assumed ok, if error flagged above. */ +} + +#endif + +/* End: bn_mp_init_multi.c */ + +/* Start: bn_mp_init_set.c */ +#include "tma.h" +#ifdef BN_MP_INIT_SET_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* initialize and set a digit */ +int mp_init_set (mp_int * a, mp_digit b) +{ + int err; + if ((err = mp_init(a)) != MP_OKAY) { + return err; + } + mp_set(a, b); + return err; +} +#endif + +/* End: bn_mp_init_set.c */ + +/* Start: bn_mp_init_set_int.c */ +#include "tma.h" +#ifdef BN_MP_INIT_SET_INT_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* initialize and set a digit */ +int mp_init_set_int (mp_int * a, unsigned long b) +{ + int err; + if ((err = mp_init(a)) != MP_OKAY) { + return err; + } + return mp_set_int(a, b); +} +#endif + +/* End: bn_mp_init_set_int.c */ + +/* Start: bn_mp_init_size.c */ +#include "tma.h" +#ifdef BN_MP_INIT_SIZE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* init an mp_init for a given size */ +int mp_init_size (mp_int * a, int size) +{ + int x; + + /* pad size so there are always extra digits */ + size += (MP_PREC * 2) - (size % MP_PREC); + + /* alloc mem */ + a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size); + if (a->dp == NULL) { + return MP_MEM; + } + + /* set the members */ + a->used = 0; + a->alloc = size; + a->sign = MP_ZPOS; + + /* zero the digits */ + for (x = 0; x < size; x++) { + a->dp[x] = 0; + } + + return MP_OKAY; +} +#endif + +/* End: bn_mp_init_size.c */ + +/* Start: bn_mp_invmod.c */ +#include "tma.h" +#ifdef BN_MP_INVMOD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* hac 14.61, pp608 */ +int mp_invmod (mp_int * a, mp_int * b, mp_int * c) +{ + /* b cannot be negative */ + if (b->sign == MP_NEG || mp_iszero(b) == 1) { + return MP_VAL; + } + +#ifdef BN_FAST_MP_INVMOD_C + /* if the modulus is odd we can use a faster routine instead */ + if (mp_isodd (b) == 1) { + return fast_mp_invmod (a, b, c); + } +#endif + +#ifdef BN_MP_INVMOD_SLOW_C + return mp_invmod_slow(a, b, c); +#endif + + return MP_VAL; +} +#endif + +/* End: bn_mp_invmod.c */ + +/* Start: bn_mp_invmod_slow.c */ +#include "tma.h" +#ifdef BN_MP_INVMOD_SLOW_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* hac 14.61, pp608 */ +int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c) +{ + mp_int x, y, u, v, A, B, C, D; + int res; + + /* b cannot be negative */ + if (b->sign == MP_NEG || mp_iszero(b) == 1) { + return MP_VAL; + } + + /* init temps */ + if ((res = mp_init_multi(&x, &y, &u, &v, + &A, &B, &C, &D, NULL)) != MP_OKAY) { + return res; + } + + /* x = a, y = b */ + if ((res = mp_mod(a, b, &x)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_copy (b, &y)) != MP_OKAY) { + goto LBL_ERR; + } + + /* 2. [modified] if x,y are both even then return an error! */ + if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) { + res = MP_VAL; + goto LBL_ERR; + } + + /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ + if ((res = mp_copy (&x, &u)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_copy (&y, &v)) != MP_OKAY) { + goto LBL_ERR; + } + mp_set (&A, 1); + mp_set (&D, 1); + +top: + /* 4. while u is even do */ + while (mp_iseven (&u) == 1) { + /* 4.1 u = u/2 */ + if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { + goto LBL_ERR; + } + /* 4.2 if A or B is odd then */ + if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) { + /* A = (A+y)/2, B = (B-x)/2 */ + if ((res = mp_add (&A, &y, &A)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } + /* A = A/2, B = B/2 */ + if ((res = mp_div_2 (&A, &A)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* 5. while v is even do */ + while (mp_iseven (&v) == 1) { + /* 5.1 v = v/2 */ + if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { + goto LBL_ERR; + } + /* 5.2 if C or D is odd then */ + if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) { + /* C = (C+y)/2, D = (D-x)/2 */ + if ((res = mp_add (&C, &y, &C)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + /* C = C/2, D = D/2 */ + if ((res = mp_div_2 (&C, &C)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* 6. if u >= v then */ + if (mp_cmp (&u, &v) != MP_LT) { + /* u = u - v, A = A - C, B = B - D */ + if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } else { + /* v - v - u, C = C - A, D = D - B */ + if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* if not zero goto step 4 */ + if (mp_iszero (&u) == 0) + goto top; + + /* now a = C, b = D, gcd == g*v */ + + /* if v != 1 then there is no inverse */ + if (mp_cmp_d (&v, 1) != MP_EQ) { + res = MP_VAL; + goto LBL_ERR; + } + + /* if its too low */ + while (mp_cmp_d(&C, 0) == MP_LT) { + if ((res = mp_add(&C, b, &C)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* too big */ + while (mp_cmp_mag(&C, b) != MP_LT) { + if ((res = mp_sub(&C, b, &C)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* C is now the inverse */ + mp_exch (&C, c); + res = MP_OKAY; +LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); + return res; +} +#endif + +/* End: bn_mp_invmod_slow.c */ + +/* Start: bn_mp_is_square.c */ +#include "tma.h" +#ifdef BN_MP_IS_SQUARE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* Check if remainders are possible squares - fast exclude non-squares */ +static const char rem_128[128] = { + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1 +}; + +static const char rem_105[105] = { + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 +}; + +/* Store non-zero to ret if arg is square, and zero if not */ +int mp_is_square(mp_int *arg,int *ret) +{ + int res; + mp_digit c; + mp_int t; + unsigned long r; + + /* Default to Non-square :) */ + *ret = MP_NO; + + if (arg->sign == MP_NEG) { + return MP_VAL; + } + + /* digits used? (TSD) */ + if (arg->used == 0) { + return MP_OKAY; + } + + /* First check mod 128 (suppose that DIGIT_BIT is at least 7) */ + if (rem_128[127 & DIGIT(arg,0)] == 1) { + return MP_OKAY; + } + + /* Next check mod 105 (3*5*7) */ + if ((res = mp_mod_d(arg,105,&c)) != MP_OKAY) { + return res; + } + if (rem_105[c] == 1) { + return MP_OKAY; + } + + + if ((res = mp_init_set_int(&t,11L*13L*17L*19L*23L*29L*31L)) != MP_OKAY) { + return res; + } + if ((res = mp_mod(arg,&t,&t)) != MP_OKAY) { + goto ERR; + } + r = mp_get_int(&t); + /* Check for other prime modules, note it's not an ERROR but we must + * free "t" so the easiest way is to goto ERR. We know that res + * is already equal to MP_OKAY from the mp_mod call + */ + if ( (1L<<(r%11)) & 0x5C4L ) goto ERR; + if ( (1L<<(r%13)) & 0x9E4L ) goto ERR; + if ( (1L<<(r%17)) & 0x5CE8L ) goto ERR; + if ( (1L<<(r%19)) & 0x4F50CL ) goto ERR; + if ( (1L<<(r%23)) & 0x7ACCA0L ) goto ERR; + if ( (1L<<(r%29)) & 0xC2EDD0CL ) goto ERR; + if ( (1L<<(r%31)) & 0x6DE2B848L ) goto ERR; + + /* Final check - is sqr(sqrt(arg)) == arg ? */ + if ((res = mp_sqrt(arg,&t)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sqr(&t,&t)) != MP_OKAY) { + goto ERR; + } + + *ret = (mp_cmp_mag(&t,arg) == MP_EQ) ? MP_YES : MP_NO; +ERR:mp_clear(&t); + return res; +} +#endif + +/* End: bn_mp_is_square.c */ + +/* Start: bn_mp_jacobi.c */ +#include "tma.h" +#ifdef BN_MP_JACOBI_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* computes the jacobi c = (a | n) (or Legendre if n is prime) + * HAC pp. 73 Algorithm 2.149 + */ +int mp_jacobi (mp_int * a, mp_int * p, int *c) +{ + mp_int a1, p1; + int k, s, r, res; + mp_digit residue; + + /* if p <= 0 return MP_VAL */ + if (mp_cmp_d(p, 0) != MP_GT) { + return MP_VAL; + } + + /* step 1. if a == 0, return 0 */ + if (mp_iszero (a) == 1) { + *c = 0; + return MP_OKAY; + } + + /* step 2. if a == 1, return 1 */ + if (mp_cmp_d (a, 1) == MP_EQ) { + *c = 1; + return MP_OKAY; + } + + /* default */ + s = 0; + + /* step 3. write a = a1 * 2**k */ + if ((res = mp_init_copy (&a1, a)) != MP_OKAY) { + return res; + } + + if ((res = mp_init (&p1)) != MP_OKAY) { + goto LBL_A1; + } + + /* divide out larger power of two */ + k = mp_cnt_lsb(&a1); + if ((res = mp_div_2d(&a1, k, &a1, NULL)) != MP_OKAY) { + goto LBL_P1; + } + + /* step 4. if e is even set s=1 */ + if ((k & 1) == 0) { + s = 1; + } else { + /* else set s=1 if p = 1/7 (mod 8) or s=-1 if p = 3/5 (mod 8) */ + residue = p->dp[0] & 7; + + if (residue == 1 || residue == 7) { + s = 1; + } else if (residue == 3 || residue == 5) { + s = -1; + } + } + + /* step 5. if p == 3 (mod 4) *and* a1 == 3 (mod 4) then s = -s */ + if ( ((p->dp[0] & 3) == 3) && ((a1.dp[0] & 3) == 3)) { + s = -s; + } + + /* if a1 == 1 we're done */ + if (mp_cmp_d (&a1, 1) == MP_EQ) { + *c = s; + } else { + /* n1 = n mod a1 */ + if ((res = mp_mod (p, &a1, &p1)) != MP_OKAY) { + goto LBL_P1; + } + if ((res = mp_jacobi (&p1, &a1, &r)) != MP_OKAY) { + goto LBL_P1; + } + *c = s * r; + } + + /* done */ + res = MP_OKAY; +LBL_P1:mp_clear (&p1); +LBL_A1:mp_clear (&a1); + return res; +} +#endif + +/* End: bn_mp_jacobi.c */ + +/* Start: bn_mp_karatsuba_mul.c */ +#include "tma.h" +#ifdef BN_MP_KARATSUBA_MUL_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* c = |a| * |b| using Karatsuba Multiplication using + * three half size multiplications + * + * Let B represent the radix [e.g. 2**DIGIT_BIT] and + * let n represent half of the number of digits in + * the min(a,b) + * + * a = a1 * B**n + a0 + * b = b1 * B**n + b0 + * + * Then, a * b => + a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0 + * + * Note that a1b1 and a0b0 are used twice and only need to be + * computed once. So in total three half size (half # of + * digit) multiplications are performed, a0b0, a1b1 and + * (a1-b1)(a0-b0) + * + * Note that a multiplication of half the digits requires + * 1/4th the number of single precision multiplications so in + * total after one call 25% of the single precision multiplications + * are saved. Note also that the call to mp_mul can end up back + * in this function if the a0, a1, b0, or b1 are above the threshold. + * This is known as divide-and-conquer and leads to the famous + * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than + * the standard O(N**2) that the baseline/comba methods use. + * Generally though the overhead of this method doesn't pay off + * until a certain size (N ~ 80) is reached. + */ +int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) +{ + mp_int x0, x1, y0, y1, t1, x0y0, x1y1; + int B, err; + + /* default the return code to an error */ + err = MP_MEM; + + /* min # of digits */ + B = MIN (a->used, b->used); + + /* now divide in two */ + B = B >> 1; + + /* init copy all the temps */ + if (mp_init_size (&x0, B) != MP_OKAY) + goto ERR; + if (mp_init_size (&x1, a->used - B) != MP_OKAY) + goto X0; + if (mp_init_size (&y0, B) != MP_OKAY) + goto X1; + if (mp_init_size (&y1, b->used - B) != MP_OKAY) + goto Y0; + + /* init temps */ + if (mp_init_size (&t1, B * 2) != MP_OKAY) + goto Y1; + if (mp_init_size (&x0y0, B * 2) != MP_OKAY) + goto T1; + if (mp_init_size (&x1y1, B * 2) != MP_OKAY) + goto X0Y0; + + /* now shift the digits */ + x0.used = y0.used = B; + x1.used = a->used - B; + y1.used = b->used - B; + + { + register int x; + register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; + + /* we copy the digits directly instead of using higher level functions + * since we also need to shift the digits + */ + tmpa = a->dp; + tmpb = b->dp; + + tmpx = x0.dp; + tmpy = y0.dp; + for (x = 0; x < B; x++) { + *tmpx++ = *tmpa++; + *tmpy++ = *tmpb++; + } + + tmpx = x1.dp; + for (x = B; x < a->used; x++) { + *tmpx++ = *tmpa++; + } + + tmpy = y1.dp; + for (x = B; x < b->used; x++) { + *tmpy++ = *tmpb++; + } + } + + /* only need to clamp the lower words since by definition the + * upper words x1/y1 must have a known number of digits + */ + mp_clamp (&x0); + mp_clamp (&y0); + + /* now calc the products x0y0 and x1y1 */ + /* after this x0 is no longer required, free temp [x0==t2]! */ + if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) + goto X1Y1; /* x0y0 = x0*y0 */ + if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) + goto X1Y1; /* x1y1 = x1*y1 */ + + /* now calc x1-x0 and y1-y0 */ + if (mp_sub (&x1, &x0, &t1) != MP_OKAY) + goto X1Y1; /* t1 = x1 - x0 */ + if (mp_sub (&y1, &y0, &x0) != MP_OKAY) + goto X1Y1; /* t2 = y1 - y0 */ + if (mp_mul (&t1, &x0, &t1) != MP_OKAY) + goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */ + + /* add x0y0 */ + if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) + goto X1Y1; /* t2 = x0y0 + x1y1 */ + if (mp_sub (&x0, &t1, &t1) != MP_OKAY) + goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */ + + /* shift by B */ + if (mp_lshd (&t1, B) != MP_OKAY) + goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<used; + + /* now divide in two */ + B = B >> 1; + + /* init copy all the temps */ + if (mp_init_size (&x0, B) != MP_OKAY) + goto ERR; + if (mp_init_size (&x1, a->used - B) != MP_OKAY) + goto X0; + + /* init temps */ + if (mp_init_size (&t1, a->used * 2) != MP_OKAY) + goto X1; + if (mp_init_size (&t2, a->used * 2) != MP_OKAY) + goto T1; + if (mp_init_size (&x0x0, B * 2) != MP_OKAY) + goto T2; + if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY) + goto X0X0; + + { + register int x; + register mp_digit *dst, *src; + + src = a->dp; + + /* now shift the digits */ + dst = x0.dp; + for (x = 0; x < B; x++) { + *dst++ = *src++; + } + + dst = x1.dp; + for (x = B; x < a->used; x++) { + *dst++ = *src++; + } + } + + x0.used = B; + x1.used = a->used - B; + + mp_clamp (&x0); + + /* now calc the products x0*x0 and x1*x1 */ + if (mp_sqr (&x0, &x0x0) != MP_OKAY) + goto X1X1; /* x0x0 = x0*x0 */ + if (mp_sqr (&x1, &x1x1) != MP_OKAY) + goto X1X1; /* x1x1 = x1*x1 */ + + /* now calc (x1-x0)**2 */ + if (mp_sub (&x1, &x0, &t1) != MP_OKAY) + goto X1X1; /* t1 = x1 - x0 */ + if (mp_sqr (&t1, &t1) != MP_OKAY) + goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ + + /* add x0y0 */ + if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) + goto X1X1; /* t2 = x0x0 + x1x1 */ + if (mp_sub (&t2, &t1, &t1) != MP_OKAY) + goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */ + + /* shift by B */ + if (mp_lshd (&t1, B) != MP_OKAY) + goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<sign = MP_ZPOS; + +LBL_T: + mp_clear_multi (&t1, &t2, NULL); + return res; +} +#endif + +/* End: bn_mp_lcm.c */ + +/* Start: bn_mp_lshd.c */ +#include "tma.h" +#ifdef BN_MP_LSHD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* shift left a certain amount of digits */ +int mp_lshd (mp_int * a, int b) +{ + int x, res; + + /* if its less than zero return */ + if (b <= 0) { + return MP_OKAY; + } + + /* grow to fit the new digits */ + if (a->alloc < a->used + b) { + if ((res = mp_grow (a, a->used + b)) != MP_OKAY) { + return res; + } + } + + { + register mp_digit *top, *bottom; + + /* increment the used by the shift amount then copy upwards */ + a->used += b; + + /* top */ + top = a->dp + a->used - 1; + + /* base */ + bottom = a->dp + a->used - 1 - b; + + /* much like mp_rshd this is implemented using a sliding window + * except the window goes the otherway around. Copying from + * the bottom to the top. see bn_mp_rshd.c for more info. + */ + for (x = a->used - 1; x >= b; x--) { + *top-- = *bottom--; + } + + /* zero the lower digits */ + top = a->dp; + for (x = 0; x < b; x++) { + *top++ = 0; + } + } + return MP_OKAY; +} +#endif + +/* End: bn_mp_lshd.c */ + +/* Start: bn_mp_mod.c */ +#include "tma.h" +#ifdef BN_MP_MOD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* c = a mod b, 0 <= c < b */ +int +mp_mod (mp_int * a, mp_int * b, mp_int * c) +{ + mp_int t; + int res; + + if ((res = mp_init (&t)) != MP_OKAY) { + return res; + } + + if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) { + mp_clear (&t); + return res; + } + + if (t.sign != b->sign) { + res = mp_add (b, &t, c); + } else { + res = MP_OKAY; + mp_exch (&t, c); + } + + mp_clear (&t); + return res; +} +#endif + +/* End: bn_mp_mod.c */ + +/* Start: bn_mp_mod_2d.c */ +#include "tma.h" +#ifdef BN_MP_MOD_2D_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* calc a value mod 2**b */ +int +mp_mod_2d (mp_int * a, int b, mp_int * c) +{ + int x, res; + + /* if b is <= 0 then zero the int */ + if (b <= 0) { + mp_zero (c); + return MP_OKAY; + } + + /* if the modulus is larger than the value than return */ + if (b >= (int) (a->used * DIGIT_BIT)) { + res = mp_copy (a, c); + return res; + } + + /* copy */ + if ((res = mp_copy (a, c)) != MP_OKAY) { + return res; + } + + /* zero digits above the last digit of the modulus */ + for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { + c->dp[x] = 0; + } + /* clear the digit that is not completely outside/inside the modulus */ + c->dp[b / DIGIT_BIT] &= + (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); + mp_clamp (c); + return MP_OKAY; +} +#endif + +/* End: bn_mp_mod_2d.c */ + +/* Start: bn_mp_mod_d.c */ +#include "tma.h" +#ifdef BN_MP_MOD_D_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +int +mp_mod_d (mp_int * a, mp_digit b, mp_digit * c) +{ + return mp_div_d(a, b, NULL, c); +} +#endif + +/* End: bn_mp_mod_d.c */ + +/* Start: bn_mp_montgomery_calc_normalization.c */ +#include "tma.h" +#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* + * shifts with subtractions when the result is greater than b. + * + * The method is slightly modified to shift B unconditionally upto just under + * the leading bit of b. This saves alot of multiple precision shifting. + */ +int mp_montgomery_calc_normalization (mp_int * a, mp_int * b) +{ + int x, bits, res; + + /* how many bits of last digit does b use */ + bits = mp_count_bits (b) % DIGIT_BIT; + + if (b->used > 1) { + if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { + return res; + } + } else { + mp_set(a, 1); + bits = 1; + } + + + /* now compute C = A * B mod b */ + for (x = bits - 1; x < (int)DIGIT_BIT; x++) { + if ((res = mp_mul_2 (a, a)) != MP_OKAY) { + return res; + } + if (mp_cmp_mag (a, b) != MP_LT) { + if ((res = s_mp_sub (a, b, a)) != MP_OKAY) { + return res; + } + } + } + + return MP_OKAY; +} +#endif + +/* End: bn_mp_montgomery_calc_normalization.c */ + +/* Start: bn_mp_montgomery_reduce.c */ +#include "tma.h" +#ifdef BN_MP_MONTGOMERY_REDUCE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* computes xR**-1 == x (mod N) via Montgomery Reduction */ +int +mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) +{ + int ix, res, digs; + mp_digit mu; + + /* can the fast reduction [comba] method be used? + * + * Note that unlike in mul you're safely allowed *less* + * than the available columns [255 per default] since carries + * are fixed up in the inner loop. + */ + digs = n->used * 2 + 1; + if ((digs < MP_WARRAY) && + n->used < + (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + return fast_mp_montgomery_reduce (x, n, rho); + } + + /* grow the input as required */ + if (x->alloc < digs) { + if ((res = mp_grow (x, digs)) != MP_OKAY) { + return res; + } + } + x->used = digs; + + for (ix = 0; ix < n->used; ix++) { + /* mu = ai * rho mod b + * + * The value of rho must be precalculated via + * montgomery_setup() such that + * it equals -1/n0 mod b this allows the + * following inner loop to reduce the + * input one digit at a time + */ + mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); + + /* a = a + mu * m * b**i */ + { + register int iy; + register mp_digit *tmpn, *tmpx, u; + register mp_word r; + + /* alias for digits of the modulus */ + tmpn = n->dp; + + /* alias for the digits of x [the input] */ + tmpx = x->dp + ix; + + /* set the carry to zero */ + u = 0; + + /* Multiply and add in place */ + for (iy = 0; iy < n->used; iy++) { + /* compute product and sum */ + r = ((mp_word)mu) * ((mp_word)*tmpn++) + + ((mp_word) u) + ((mp_word) * tmpx); + + /* get carry */ + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + + /* fix digit */ + *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); + } + /* At this point the ix'th digit of x should be zero */ + + + /* propagate carries upwards as required*/ + while (u) { + *tmpx += u; + u = *tmpx >> DIGIT_BIT; + *tmpx++ &= MP_MASK; + } + } + } + + /* at this point the n.used'th least + * significant digits of x are all zero + * which means we can shift x to the + * right by n.used digits and the + * residue is unchanged. + */ + + /* x = x/b**n.used */ + mp_clamp(x); + mp_rshd (x, n->used); + + /* if x >= n then x = x - n */ + if (mp_cmp_mag (x, n) != MP_LT) { + return s_mp_sub (x, n, x); + } + + return MP_OKAY; +} +#endif + +/* End: bn_mp_montgomery_reduce.c */ + +/* Start: bn_mp_montgomery_setup.c */ +#include "tma.h" +#ifdef BN_MP_MONTGOMERY_SETUP_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* setups the montgomery reduction stuff */ +int +mp_montgomery_setup (mp_int * n, mp_digit * rho) +{ + mp_digit x, b; + +/* fast inversion mod 2**k + * + * Based on the fact that + * + * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) + * => 2*X*A - X*X*A*A = 1 + * => 2*(1) - (1) = 1 + */ + b = n->dp[0]; + + if ((b & 1) == 0) { + return MP_VAL; + } + + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ +#if !defined(MP_8BIT) + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ +#endif +#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT)) + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ +#endif +#ifdef MP_64BIT + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ +#endif + + /* rho = -1/m mod b */ + *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; + + return MP_OKAY; +} +#endif + +/* End: bn_mp_montgomery_setup.c */ + +/* Start: bn_mp_mul.c */ +#include "tma.h" +#ifdef BN_MP_MUL_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* high level multiplication (handles sign) */ +int mp_mul (mp_int * a, mp_int * b, mp_int * c) +{ + int res, neg; + neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; + + /* use Toom-Cook? */ +#ifdef BN_MP_TOOM_MUL_C + if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) { + res = mp_toom_mul(a, b, c); + } else +#endif +#ifdef BN_MP_KARATSUBA_MUL_C + /* use Karatsuba? */ + if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { + res = mp_karatsuba_mul (a, b, c); + } else +#endif + { + /* can we use the fast multiplier? + * + * The fast multiplier can be used if the output will + * have less than MP_WARRAY digits and the number of + * digits won't affect carry propagation + */ + int digs = a->used + b->used + 1; + +#ifdef BN_FAST_S_MP_MUL_DIGS_C + if ((digs < MP_WARRAY) && + MIN(a->used, b->used) <= + (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + res = fast_s_mp_mul_digs (a, b, c, digs); + } else +#endif +#ifdef BN_S_MP_MUL_DIGS_C + res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */ +#else + res = MP_VAL; +#endif + + } + c->sign = (c->used > 0) ? neg : MP_ZPOS; + return res; +} +#endif + +/* End: bn_mp_mul.c */ + +/* Start: bn_mp_mul_2.c */ +#include "tma.h" +#ifdef BN_MP_MUL_2_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* b = a*2 */ +int mp_mul_2(mp_int * a, mp_int * b) +{ + int x, res, oldused; + + /* grow to accomodate result */ + if (b->alloc < a->used + 1) { + if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) { + return res; + } + } + + oldused = b->used; + b->used = a->used; + + { + register mp_digit r, rr, *tmpa, *tmpb; + + /* alias for source */ + tmpa = a->dp; + + /* alias for dest */ + tmpb = b->dp; + + /* carry */ + r = 0; + for (x = 0; x < a->used; x++) { + + /* get what will be the *next* carry bit from the + * MSB of the current digit + */ + rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); + + /* now shift up this digit, add in the carry [from the previous] */ + *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; + + /* copy the carry that would be from the source + * digit into the next iteration + */ + r = rr; + } + + /* new leading digit? */ + if (r != 0) { + /* add a MSB which is always 1 at this point */ + *tmpb = 1; + ++(b->used); + } + + /* now zero any excess digits on the destination + * that we didn't write to + */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; + return MP_OKAY; +} +#endif + +/* End: bn_mp_mul_2.c */ + +/* Start: bn_mp_mul_2d.c */ +#include "tma.h" +#ifdef BN_MP_MUL_2D_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* shift left by a certain bit count */ +int mp_mul_2d (mp_int * a, int b, mp_int * c) +{ + mp_digit d; + int res; + + /* copy */ + if (a != c) { + if ((res = mp_copy (a, c)) != MP_OKAY) { + return res; + } + } + + if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { + if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { + return res; + } + } + + /* shift by as many digits in the bit count */ + if (b >= (int)DIGIT_BIT) { + if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) { + return res; + } + } + + /* shift any bit count < DIGIT_BIT */ + d = (mp_digit) (b % DIGIT_BIT); + if (d != 0) { + register mp_digit *tmpc, shift, mask, r, rr; + register int x; + + /* bitmask for carries */ + mask = (((mp_digit)1) << d) - 1; + + /* shift for msbs */ + shift = DIGIT_BIT - d; + + /* alias */ + tmpc = c->dp; + + /* carry */ + r = 0; + for (x = 0; x < c->used; x++) { + /* get the higher bits of the current word */ + rr = (*tmpc >> shift) & mask; + + /* shift the current word and OR in the carry */ + *tmpc = ((*tmpc << d) | r) & MP_MASK; + ++tmpc; + + /* set the carry to the carry bits of the current word */ + r = rr; + } + + /* set final carry */ + if (r != 0) { + c->dp[(c->used)++] = r; + } + } + mp_clamp (c); + return MP_OKAY; +} +#endif + +/* End: bn_mp_mul_2d.c */ + +/* Start: bn_mp_mul_d.c */ +#include "tma.h" +#ifdef BN_MP_MUL_D_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* multiply by a digit */ +int +mp_mul_d (mp_int * a, mp_digit b, mp_int * c) +{ + mp_digit u, *tmpa, *tmpc; + mp_word r; + int ix, res, olduse; + + /* make sure c is big enough to hold a*b */ + if (c->alloc < a->used + 1) { + if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) { + return res; + } + } + + /* get the original destinations used count */ + olduse = c->used; + + /* set the sign */ + c->sign = a->sign; + + /* alias for a->dp [source] */ + tmpa = a->dp; + + /* alias for c->dp [dest] */ + tmpc = c->dp; + + /* zero carry */ + u = 0; + + /* compute columns */ + for (ix = 0; ix < a->used; ix++) { + /* compute product and carry sum for this term */ + r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); + + /* mask off higher bits to get a single digit */ + *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); + + /* send carry into next iteration */ + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); + } + + /* store final carry [if any] and increment ix offset */ + *tmpc++ = u; + ++ix; + + /* now zero digits above the top */ + while (ix++ < olduse) { + *tmpc++ = 0; + } + + /* set used count */ + c->used = a->used + 1; + mp_clamp(c); + + return MP_OKAY; +} +#endif + +/* End: bn_mp_mul_d.c */ + +/* Start: bn_mp_mulmod.c */ +#include "tma.h" +#ifdef BN_MP_MULMOD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* d = a * b (mod c) */ +int +mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) +{ + int res; + mp_int t; + + if ((res = mp_init (&t)) != MP_OKAY) { + return res; + } + + if ((res = mp_mul (a, b, &t)) != MP_OKAY) { + mp_clear (&t); + return res; + } + res = mp_mod (&t, c, d); + mp_clear (&t); + return res; +} +#endif + +/* End: bn_mp_mulmod.c */ + +/* Start: bn_mp_n_root.c */ +#include "tma.h" +#ifdef BN_MP_N_ROOT_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* find the n'th root of an integer + * + * Result found such that (c)**b <= a and (c+1)**b > a + * + * This algorithm uses Newton's approximation + * x[i+1] = x[i] - f(x[i])/f'(x[i]) + * which will find the root in log(N) time where + * each step involves a fair bit. This is not meant to + * find huge roots [square and cube, etc]. + */ +int mp_n_root (mp_int * a, mp_digit b, mp_int * c) +{ + mp_int t1, t2, t3; + int res, neg; + + /* input must be positive if b is even */ + if ((b & 1) == 0 && a->sign == MP_NEG) { + return MP_VAL; + } + + if ((res = mp_init (&t1)) != MP_OKAY) { + return res; + } + + if ((res = mp_init (&t2)) != MP_OKAY) { + goto LBL_T1; + } + + if ((res = mp_init (&t3)) != MP_OKAY) { + goto LBL_T2; + } + + /* if a is negative fudge the sign but keep track */ + neg = a->sign; + a->sign = MP_ZPOS; + + /* t2 = 2 */ + mp_set (&t2, 2); + + do { + /* t1 = t2 */ + if ((res = mp_copy (&t2, &t1)) != MP_OKAY) { + goto LBL_T3; + } + + /* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */ + + /* t3 = t1**(b-1) */ + if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) { + goto LBL_T3; + } + + /* numerator */ + /* t2 = t1**b */ + if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) { + goto LBL_T3; + } + + /* t2 = t1**b - a */ + if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) { + goto LBL_T3; + } + + /* denominator */ + /* t3 = t1**(b-1) * b */ + if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) { + goto LBL_T3; + } + + /* t3 = (t1**b - a)/(b * t1**(b-1)) */ + if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) { + goto LBL_T3; + } + + if ((res = mp_sub (&t1, &t3, &t2)) != MP_OKAY) { + goto LBL_T3; + } + } while (mp_cmp (&t1, &t2) != MP_EQ); + + /* result can be off by a few so check */ + for (;;) { + if ((res = mp_expt_d (&t1, b, &t2)) != MP_OKAY) { + goto LBL_T3; + } + + if (mp_cmp (&t2, a) == MP_GT) { + if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) { + goto LBL_T3; + } + } else { + break; + } + } + + /* reset the sign of a first */ + a->sign = neg; + + /* set the result */ + mp_exch (&t1, c); + + /* set the sign of the result */ + c->sign = neg; + + res = MP_OKAY; + +LBL_T3:mp_clear (&t3); +LBL_T2:mp_clear (&t2); +LBL_T1:mp_clear (&t1); + return res; +} +#endif + +/* End: bn_mp_n_root.c */ + +/* Start: bn_mp_neg.c */ +#include "tma.h" +#ifdef BN_MP_NEG_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* b = -a */ +int mp_neg (mp_int * a, mp_int * b) +{ + int res; + if (a != b) { + if ((res = mp_copy (a, b)) != MP_OKAY) { + return res; + } + } + + if (mp_iszero(b) != MP_YES) { + b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS; + } else { + b->sign = MP_ZPOS; + } + + return MP_OKAY; +} +#endif + +/* End: bn_mp_neg.c */ + +/* Start: bn_mp_or.c */ +#include "tma.h" +#ifdef BN_MP_OR_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* OR two ints together */ +int mp_or (mp_int * a, mp_int * b, mp_int * c) +{ + int res, ix, px; + mp_int t, *x; + + if (a->used > b->used) { + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + px = b->used; + x = b; + } else { + if ((res = mp_init_copy (&t, b)) != MP_OKAY) { + return res; + } + px = a->used; + x = a; + } + + for (ix = 0; ix < px; ix++) { + t.dp[ix] |= x->dp[ix]; + } + mp_clamp (&t); + mp_exch (c, &t); + mp_clear (&t); + return MP_OKAY; +} +#endif + +/* End: bn_mp_or.c */ + +/* Start: bn_mp_prime_fermat.c */ +#include "tma.h" +#ifdef BN_MP_PRIME_FERMAT_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* performs one Fermat test. + * + * If "a" were prime then b**a == b (mod a) since the order of + * the multiplicative sub-group would be phi(a) = a-1. That means + * it would be the same as b**(a mod (a-1)) == b**1 == b (mod a). + * + * Sets result to 1 if the congruence holds, or zero otherwise. + */ +int mp_prime_fermat (mp_int * a, mp_int * b, int *result) +{ + mp_int t; + int err; + + /* default to composite */ + *result = MP_NO; + + /* ensure b > 1 */ + if (mp_cmp_d(b, 1) != MP_GT) { + return MP_VAL; + } + + /* init t */ + if ((err = mp_init (&t)) != MP_OKAY) { + return err; + } + + /* compute t = b**a mod a */ + if ((err = mp_exptmod (b, a, a, &t)) != MP_OKAY) { + goto LBL_T; + } + + /* is it equal to b? */ + if (mp_cmp (&t, b) == MP_EQ) { + *result = MP_YES; + } + + err = MP_OKAY; +LBL_T:mp_clear (&t); + return err; +} +#endif + +/* End: bn_mp_prime_fermat.c */ + +/* Start: bn_mp_prime_is_divisible.c */ +#include "tma.h" +#ifdef BN_MP_PRIME_IS_DIVISIBLE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* determines if an integers is divisible by one + * of the first PRIME_SIZE primes or not + * + * sets result to 0 if not, 1 if yes + */ +int mp_prime_is_divisible (mp_int * a, int *result) +{ + int err, ix; + mp_digit res; + + /* default to not */ + *result = MP_NO; + + for (ix = 0; ix < PRIME_SIZE; ix++) { + /* what is a mod LBL_prime_tab[ix] */ + if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) { + return err; + } + + /* is the residue zero? */ + if (res == 0) { + *result = MP_YES; + return MP_OKAY; + } + } + + return MP_OKAY; +} +#endif + +/* End: bn_mp_prime_is_divisible.c */ + +/* Start: bn_mp_prime_is_prime.c */ +#include "tma.h" +#ifdef BN_MP_PRIME_IS_PRIME_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* performs a variable number of rounds of Miller-Rabin + * + * Probability of error after t rounds is no more than + + * + * Sets result to 1 if probably prime, 0 otherwise + */ +int mp_prime_is_prime (mp_int * a, int t, int *result) +{ + mp_int b; + int ix, err, res; + + /* default to no */ + *result = MP_NO; + + /* valid value of t? */ + if (t <= 0 || t > PRIME_SIZE) { + return MP_VAL; + } + + /* is the input equal to one of the primes in the table? */ + for (ix = 0; ix < PRIME_SIZE; ix++) { + if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) { + *result = 1; + return MP_OKAY; + } + } + + /* first perform trial division */ + if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) { + return err; + } + + /* return if it was trivially divisible */ + if (res == MP_YES) { + return MP_OKAY; + } + + /* now perform the miller-rabin rounds */ + if ((err = mp_init (&b)) != MP_OKAY) { + return err; + } + + for (ix = 0; ix < t; ix++) { + /* set the prime */ + mp_set (&b, ltm_prime_tab[ix]); + + if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) { + goto LBL_B; + } + + if (res == MP_NO) { + goto LBL_B; + } + } + + /* passed the test */ + *result = MP_YES; +LBL_B:mp_clear (&b); + return err; +} +#endif + +/* End: bn_mp_prime_is_prime.c */ + +/* Start: bn_mp_prime_miller_rabin.c */ +#include "tma.h" +#ifdef BN_MP_PRIME_MILLER_RABIN_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* Miller-Rabin test of "a" to the base of "b" as described in + * HAC pp. 139 Algorithm 4.24 + * + * Sets result to 0 if definitely composite or 1 if probably prime. + * Randomly the chance of error is no more than 1/4 and often + * very much lower. + */ +int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result) +{ + mp_int n1, y, r; + int s, j, err; + + /* default */ + *result = MP_NO; + + /* ensure b > 1 */ + if (mp_cmp_d(b, 1) != MP_GT) { + return MP_VAL; + } + + /* get n1 = a - 1 */ + if ((err = mp_init_copy (&n1, a)) != MP_OKAY) { + return err; + } + if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) { + goto LBL_N1; + } + + /* set 2**s * r = n1 */ + if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) { + goto LBL_N1; + } + + /* count the number of least significant bits + * which are zero + */ + s = mp_cnt_lsb(&r); + + /* now divide n - 1 by 2**s */ + if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) { + goto LBL_R; + } + + /* compute y = b**r mod a */ + if ((err = mp_init (&y)) != MP_OKAY) { + goto LBL_R; + } + if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) { + goto LBL_Y; + } + + /* if y != 1 and y != n1 do */ + if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) { + j = 1; + /* while j <= s-1 and y != n1 */ + while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) { + if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) { + goto LBL_Y; + } + + /* if y == 1 then composite */ + if (mp_cmp_d (&y, 1) == MP_EQ) { + goto LBL_Y; + } + + ++j; + } + + /* if y != n1 then composite */ + if (mp_cmp (&y, &n1) != MP_EQ) { + goto LBL_Y; + } + } + + /* probably prime now */ + *result = MP_YES; +LBL_Y:mp_clear (&y); +LBL_R:mp_clear (&r); +LBL_N1:mp_clear (&n1); + return err; +} +#endif + +/* End: bn_mp_prime_miller_rabin.c */ + +/* Start: bn_mp_prime_next_prime.c */ +#include "tma.h" +#ifdef BN_MP_PRIME_NEXT_PRIME_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* finds the next prime after the number "a" using "t" trials + * of Miller-Rabin. + * + * bbs_style = 1 means the prime must be congruent to 3 mod 4 + */ +int mp_prime_next_prime(mp_int *a, int t, int bbs_style) +{ + int err, res, x, y; + mp_digit res_tab[PRIME_SIZE], step, kstep; + mp_int b; + + /* ensure t is valid */ + if (t <= 0 || t > PRIME_SIZE) { + return MP_VAL; + } + + /* force positive */ + a->sign = MP_ZPOS; + + /* simple algo if a is less than the largest prime in the table */ + if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) { + /* find which prime it is bigger than */ + for (x = PRIME_SIZE - 2; x >= 0; x--) { + if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) { + if (bbs_style == 1) { + /* ok we found a prime smaller or + * equal [so the next is larger] + * + * however, the prime must be + * congruent to 3 mod 4 + */ + if ((ltm_prime_tab[x + 1] & 3) != 3) { + /* scan upwards for a prime congruent to 3 mod 4 */ + for (y = x + 1; y < PRIME_SIZE; y++) { + if ((ltm_prime_tab[y] & 3) == 3) { + mp_set(a, ltm_prime_tab[y]); + return MP_OKAY; + } + } + } + } else { + mp_set(a, ltm_prime_tab[x + 1]); + return MP_OKAY; + } + } + } + /* at this point a maybe 1 */ + if (mp_cmp_d(a, 1) == MP_EQ) { + mp_set(a, 2); + return MP_OKAY; + } + /* fall through to the sieve */ + } + + /* generate a prime congruent to 3 mod 4 or 1/3 mod 4? */ + if (bbs_style == 1) { + kstep = 4; + } else { + kstep = 2; + } + + /* at this point we will use a combination of a sieve and Miller-Rabin */ + + if (bbs_style == 1) { + /* if a mod 4 != 3 subtract the correct value to make it so */ + if ((a->dp[0] & 3) != 3) { + if ((err = mp_sub_d(a, (a->dp[0] & 3) + 1, a)) != MP_OKAY) { return err; }; + } + } else { + if (mp_iseven(a) == 1) { + /* force odd */ + if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { + return err; + } + } + } + + /* generate the restable */ + for (x = 1; x < PRIME_SIZE; x++) { + if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) { + return err; + } + } + + /* init temp used for Miller-Rabin Testing */ + if ((err = mp_init(&b)) != MP_OKAY) { + return err; + } + + for (;;) { + /* skip to the next non-trivially divisible candidate */ + step = 0; + do { + /* y == 1 if any residue was zero [e.g. cannot be prime] */ + y = 0; + + /* increase step to next candidate */ + step += kstep; + + /* compute the new residue without using division */ + for (x = 1; x < PRIME_SIZE; x++) { + /* add the step to each residue */ + res_tab[x] += kstep; + + /* subtract the modulus [instead of using division] */ + if (res_tab[x] >= ltm_prime_tab[x]) { + res_tab[x] -= ltm_prime_tab[x]; + } + + /* set flag if zero */ + if (res_tab[x] == 0) { + y = 1; + } + } + } while (y == 1 && step < ((((mp_digit)1)<= ((((mp_digit)1)< size) { + return (x == 0) ? sizes[0].t : sizes[x - 1].t; + } + } + return sizes[x-1].t + 1; +} + + +#endif + +/* End: bn_mp_prime_rabin_miller_trials.c */ + +/* Start: bn_mp_prime_random_ex.c */ +#include "tma.h" +#ifdef BN_MP_PRIME_RANDOM_EX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* makes a truly random prime of a given size (bits), + * + * Flags are as follows: + * + * LTM_PRIME_BBS - make prime congruent to 3 mod 4 + * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS) + * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero + * LTM_PRIME_2MSB_ON - make the 2nd highest bit one + * + * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can + * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself + * so it can be NULL + * + */ + +/* This is possibly the mother of all prime generation functions, muahahahahaha! */ +int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat) +{ + unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb; + int res, err, bsize, maskOR_msb_offset; + + /* sanity check the input */ + if (size <= 1 || t <= 0) { + return MP_VAL; + } + + /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */ + if (flags & LTM_PRIME_SAFE) { + flags |= LTM_PRIME_BBS; + } + + /* calc the byte size */ + bsize = (size>>3) + ((size&7)?1:0); + + /* we need a buffer of bsize bytes */ + tmp = OPT_CAST(unsigned char) XMALLOC(bsize); + if (tmp == NULL) { + return MP_MEM; + } + + /* calc the maskAND value for the MSbyte*/ + maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7))); + + /* calc the maskOR_msb */ + maskOR_msb = 0; + maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0; + if (flags & LTM_PRIME_2MSB_ON) { + maskOR_msb |= 1 << ((size - 2) & 7); + } else if (flags & LTM_PRIME_2MSB_OFF) { + maskAND &= ~(1 << ((size - 2) & 7)); + } + + /* get the maskOR_lsb */ + maskOR_lsb = 1; + if (flags & LTM_PRIME_BBS) { + maskOR_lsb |= 3; + } + + do { + /* read the bytes */ + if (cb(tmp, bsize, dat) != bsize) { + err = MP_VAL; + goto error; + } + + /* work over the MSbyte */ + tmp[0] &= maskAND; + tmp[0] |= 1 << ((size - 1) & 7); + + /* mix in the maskORs */ + tmp[maskOR_msb_offset] |= maskOR_msb; + tmp[bsize-1] |= maskOR_lsb; + + /* read it in */ + if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; } + + /* is it prime? */ + if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; } + if (res == MP_NO) { + continue; + } + + if (flags & LTM_PRIME_SAFE) { + /* see if (a-1)/2 is prime */ + if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; } + if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; } + + /* is it prime? */ + if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; } + } + } while (res == MP_NO); + + if (flags & LTM_PRIME_SAFE) { + /* restore a to the original value */ + if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; } + if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; } + } + + err = MP_OKAY; +error: + XFREE(tmp); + return err; +} + + +#endif + +/* End: bn_mp_prime_random_ex.c */ + +/* Start: bn_mp_radix_size.c */ +#include "tma.h" +#ifdef BN_MP_RADIX_SIZE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* returns size of ASCII reprensentation */ +int mp_radix_size (mp_int * a, int radix, int *size) +{ + int res, digs; + mp_int t; + mp_digit d; + + *size = 0; + + /* special case for binary */ + if (radix == 2) { + *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1; + return MP_OKAY; + } + + /* make sure the radix is in range */ + if (radix < 2 || radix > 64) { + return MP_VAL; + } + + if (mp_iszero(a) == MP_YES) { + *size = 2; + return MP_OKAY; + } + + /* digs is the digit count */ + digs = 0; + + /* if it's negative add one for the sign */ + if (a->sign == MP_NEG) { + ++digs; + } + + /* init a copy of the input */ + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + + /* force temp to positive */ + t.sign = MP_ZPOS; + + /* fetch out all of the digits */ + while (mp_iszero (&t) == MP_NO) { + if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { + mp_clear (&t); + return res; + } + ++digs; + } + mp_clear (&t); + + /* return digs + 1, the 1 is for the NULL byte that would be required. */ + *size = digs + 1; + return MP_OKAY; +} + +#endif + +/* End: bn_mp_radix_size.c */ + +/* Start: bn_mp_radix_smap.c */ +#include "tma.h" +#ifdef BN_MP_RADIX_SMAP_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* chars used in radix conversions */ +const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; +#endif + +/* End: bn_mp_radix_smap.c */ + +/* Start: bn_mp_rand.c */ +#include "tma.h" +#ifdef BN_MP_RAND_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* makes a pseudo-random int of a given size */ +int +mp_rand (mp_int * a, int digits) +{ + int res; + mp_digit d; + + mp_zero (a); + if (digits <= 0) { + return MP_OKAY; + } + + /* first place a random non-zero digit */ + do { + d = ((mp_digit) abs (rand ())) & MP_MASK; + } while (d == 0); + + if ((res = mp_add_d (a, d, a)) != MP_OKAY) { + return res; + } + + while (--digits > 0) { + if ((res = mp_lshd (a, 1)) != MP_OKAY) { + return res; + } + + if ((res = mp_add_d (a, ((mp_digit) abs (rand ())), a)) != MP_OKAY) { + return res; + } + } + + return MP_OKAY; +} +#endif + +/* End: bn_mp_rand.c */ + +/* Start: bn_mp_read_radix.c */ +#include "tma.h" +#ifdef BN_MP_READ_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* read a string [ASCII] in a given radix */ +int mp_read_radix (mp_int * a, const char *str, int radix) +{ + int y, res, neg; + char ch; + + /* make sure the radix is ok */ + if (radix < 2 || radix > 64) { + return MP_VAL; + } + + /* if the leading digit is a + * minus set the sign to negative. + */ + if (*str == '-') { + ++str; + neg = MP_NEG; + } else { + neg = MP_ZPOS; + } + + /* set the integer to the default of zero */ + mp_zero (a); + + /* process each digit of the string */ + while (*str) { + /* if the radix < 36 the conversion is case insensitive + * this allows numbers like 1AB and 1ab to represent the same value + * [e.g. in hex] + */ + ch = (char) ((radix < 36) ? toupper (*str) : *str); + for (y = 0; y < 64; y++) { + if (ch == mp_s_rmap[y]) { + break; + } + } + + /* if the char was found in the map + * and is less than the given radix add it + * to the number, otherwise exit the loop. + */ + if (y < radix) { + if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) { + return res; + } + if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) { + return res; + } + } else { + break; + } + ++str; + } + + /* set the sign only if a != 0 */ + if (mp_iszero(a) != 1) { + a->sign = neg; + } + return MP_OKAY; +} +#endif + +/* End: bn_mp_read_radix.c */ + +/* Start: bn_mp_read_signed_bin.c */ +#include "tma.h" +#ifdef BN_MP_READ_SIGNED_BIN_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* read signed bin, big endian, first byte is 0==positive or 1==negative */ +int +mp_read_signed_bin (mp_int * a, unsigned char *b, int c) +{ + int res; + + /* read magnitude */ + if ((res = mp_read_unsigned_bin (a, b + 1, c - 1)) != MP_OKAY) { + return res; + } + + /* first byte is 0 for positive, non-zero for negative */ + if (b[0] == 0) { + a->sign = MP_ZPOS; + } else { + a->sign = MP_NEG; + } + + return MP_OKAY; +} +#endif + +/* End: bn_mp_read_signed_bin.c */ + +/* Start: bn_mp_read_unsigned_bin.c */ +#include "tma.h" +#ifdef BN_MP_READ_UNSIGNED_BIN_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* reads a unsigned char array, assumes the msb is stored first [big endian] */ +int +mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c) +{ + int res; + + /* make sure there are at least two digits */ + if (a->alloc < 2) { + if ((res = mp_grow(a, 2)) != MP_OKAY) { + return res; + } + } + + /* zero the int */ + mp_zero (a); + + /* read the bytes in */ + while (c-- > 0) { + if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) { + return res; + } + +#ifndef MP_8BIT + a->dp[0] |= *b++; + a->used += 1; +#else + a->dp[0] = (*b & MP_MASK); + a->dp[1] |= ((*b++ >> 7U) & 1); + a->used += 2; +#endif + } + mp_clamp (a); + return MP_OKAY; +} +#endif + +/* End: bn_mp_read_unsigned_bin.c */ + +/* Start: bn_mp_reduce.c */ +#include "tma.h" +#ifdef BN_MP_REDUCE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* reduces x mod m, assumes 0 < x < m**2, mu is + * precomputed via mp_reduce_setup. + * From HAC pp.604 Algorithm 14.42 + */ +int mp_reduce (mp_int * x, mp_int * m, mp_int * mu) +{ + mp_int q; + int res, um = m->used; + + /* q = x */ + if ((res = mp_init_copy (&q, x)) != MP_OKAY) { + return res; + } + + /* q1 = x / b**(k-1) */ + mp_rshd (&q, um - 1); + + /* according to HAC this optimization is ok */ + if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { + if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { + goto CLEANUP; + } + } else { +#ifdef BN_S_MP_MUL_HIGH_DIGS_C + if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { + goto CLEANUP; + } +#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C) + if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { + goto CLEANUP; + } +#else + { + res = MP_VAL; + goto CLEANUP; + } +#endif + } + + /* q3 = q2 / b**(k+1) */ + mp_rshd (&q, um + 1); + + /* x = x mod b**(k+1), quick (no division) */ + if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { + goto CLEANUP; + } + + /* q = q * m mod b**(k+1), quick (no division) */ + if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { + goto CLEANUP; + } + + /* x = x - q */ + if ((res = mp_sub (x, &q, x)) != MP_OKAY) { + goto CLEANUP; + } + + /* If x < 0, add b**(k+1) to it */ + if (mp_cmp_d (x, 0) == MP_LT) { + mp_set (&q, 1); + if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) + goto CLEANUP; + if ((res = mp_add (x, &q, x)) != MP_OKAY) + goto CLEANUP; + } + + /* Back off if it's too big */ + while (mp_cmp (x, m) != MP_LT) { + if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { + goto CLEANUP; + } + } + +CLEANUP: + mp_clear (&q); + + return res; +} +#endif + +/* End: bn_mp_reduce.c */ + +/* Start: bn_mp_reduce_2k.c */ +#include "tma.h" +#ifdef BN_MP_REDUCE_2K_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* reduces a modulo n where n is of the form 2**p - d */ +int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) +{ + mp_int q; + int p, res; + + if ((res = mp_init(&q)) != MP_OKAY) { + return res; + } + + p = mp_count_bits(n); +top: + /* q = a/2**p, a = a mod 2**p */ + if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { + goto ERR; + } + + if (d != 1) { + /* q = q * d */ + if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { + goto ERR; + } + } + + /* a = a + q */ + if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { + goto ERR; + } + + if (mp_cmp_mag(a, n) != MP_LT) { + s_mp_sub(a, n, a); + goto top; + } + +ERR: + mp_clear(&q); + return res; +} + +#endif + +/* End: bn_mp_reduce_2k.c */ + +/* Start: bn_mp_reduce_2k_l.c */ +#include "tma.h" +#ifdef BN_MP_REDUCE_2K_L_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* reduces a modulo n where n is of the form 2**p - d + This differs from reduce_2k since "d" can be larger + than a single digit. +*/ +int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) +{ + mp_int q; + int p, res; + + if ((res = mp_init(&q)) != MP_OKAY) { + return res; + } + + p = mp_count_bits(n); +top: + /* q = a/2**p, a = a mod 2**p */ + if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { + goto ERR; + } + + /* q = q * d */ + if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { + goto ERR; + } + + /* a = a + q */ + if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { + goto ERR; + } + + if (mp_cmp_mag(a, n) != MP_LT) { + s_mp_sub(a, n, a); + goto top; + } + +ERR: + mp_clear(&q); + return res; +} + +#endif + +/* End: bn_mp_reduce_2k_l.c */ + +/* Start: bn_mp_reduce_2k_setup.c */ +#include "tma.h" +#ifdef BN_MP_REDUCE_2K_SETUP_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* determines the setup value */ +int mp_reduce_2k_setup(mp_int *a, mp_digit *d) +{ + int res, p; + mp_int tmp; + + if ((res = mp_init(&tmp)) != MP_OKAY) { + return res; + } + + p = mp_count_bits(a); + if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + + if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + + *d = tmp.dp[0]; + mp_clear(&tmp); + return MP_OKAY; +} +#endif + +/* End: bn_mp_reduce_2k_setup.c */ + +/* Start: bn_mp_reduce_2k_setup_l.c */ +#include "tma.h" +#ifdef BN_MP_REDUCE_2K_SETUP_L_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* determines the setup value */ +int mp_reduce_2k_setup_l(mp_int *a, mp_int *d) +{ + int res; + mp_int tmp; + + if ((res = mp_init(&tmp)) != MP_OKAY) { + return res; + } + + if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { + goto ERR; + } + + if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) { + goto ERR; + } + +ERR: + mp_clear(&tmp); + return res; +} +#endif + +/* End: bn_mp_reduce_2k_setup_l.c */ + +/* Start: bn_mp_reduce_is_2k.c */ +#include "tma.h" +#ifdef BN_MP_REDUCE_IS_2K_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* determines if mp_reduce_2k can be used */ +int mp_reduce_is_2k(mp_int *a) +{ + int ix, iy, iw; + mp_digit iz; + + if (a->used == 0) { + return MP_NO; + } else if (a->used == 1) { + return MP_YES; + } else if (a->used > 1) { + iy = mp_count_bits(a); + iz = 1; + iw = 1; + + /* Test every bit from the second digit up, must be 1 */ + for (ix = DIGIT_BIT; ix < iy; ix++) { + if ((a->dp[iw] & iz) == 0) { + return MP_NO; + } + iz <<= 1; + if (iz > (mp_digit)MP_MASK) { + ++iw; + iz = 1; + } + } + } + return MP_YES; +} + +#endif + +/* End: bn_mp_reduce_is_2k.c */ + +/* Start: bn_mp_reduce_is_2k_l.c */ +#include "tma.h" +#ifdef BN_MP_REDUCE_IS_2K_L_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* determines if reduce_2k_l can be used */ +int mp_reduce_is_2k_l(mp_int *a) +{ + int ix, iy; + + if (a->used == 0) { + return MP_NO; + } else if (a->used == 1) { + return MP_YES; + } else if (a->used > 1) { + /* if more than half of the digits are -1 we're sold */ + for (iy = ix = 0; ix < a->used; ix++) { + if (a->dp[ix] == MP_MASK) { + ++iy; + } + } + return (iy >= (a->used/2)) ? MP_YES : MP_NO; + + } + return MP_NO; +} + +#endif + +/* End: bn_mp_reduce_is_2k_l.c */ + +/* Start: bn_mp_reduce_setup.c */ +#include "tma.h" +#ifdef BN_MP_REDUCE_SETUP_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* pre-calculate the value required for Barrett reduction + * For a given modulus "b" it calulates the value required in "a" + */ +int mp_reduce_setup (mp_int * a, mp_int * b) +{ + int res; + + if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { + return res; + } + return mp_div (a, b, a, NULL); +} +#endif + +/* End: bn_mp_reduce_setup.c */ + +/* Start: bn_mp_rshd.c */ +#include "tma.h" +#ifdef BN_MP_RSHD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* shift right a certain amount of digits */ +void mp_rshd (mp_int * a, int b) +{ + int x; + + /* if b <= 0 then ignore it */ + if (b <= 0) { + return; + } + + /* if b > used then simply zero it and return */ + if (a->used <= b) { + mp_zero (a); + return; + } + + { + register mp_digit *bottom, *top; + + /* shift the digits down */ + + /* bottom */ + bottom = a->dp; + + /* top [offset into digits] */ + top = a->dp + b; + + /* this is implemented as a sliding window where + * the window is b-digits long and digits from + * the top of the window are copied to the bottom + * + * e.g. + + b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> + /\ | ----> + \-------------------/ ----> + */ + for (x = 0; x < (a->used - b); x++) { + *bottom++ = *top++; + } + + /* zero the top digits */ + for (; x < a->used; x++) { + *bottom++ = 0; + } + } + + /* remove excess digits */ + a->used -= b; +} +#endif + +/* End: bn_mp_rshd.c */ + +/* Start: bn_mp_set.c */ +#include "tma.h" +#ifdef BN_MP_SET_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* set to a digit */ +void mp_set (mp_int * a, mp_digit b) +{ + mp_zero (a); + a->dp[0] = b & MP_MASK; + a->used = (a->dp[0] != 0) ? 1 : 0; +} +#endif + +/* End: bn_mp_set.c */ + +/* Start: bn_mp_set_int.c */ +#include "tma.h" +#ifdef BN_MP_SET_INT_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* set a 32-bit const */ +int mp_set_int (mp_int * a, unsigned long b) +{ + int x, res; + + mp_zero (a); + + /* set four bits at a time */ + for (x = 0; x < 8; x++) { + /* shift the number up four bits */ + if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) { + return res; + } + + /* OR in the top four bits of the source */ + a->dp[0] |= (b >> 28) & 15; + + /* shift the source up to the next four bits */ + b <<= 4; + + /* ensure that digits are not clamped off */ + a->used += 1; + } + mp_clamp (a); + return MP_OKAY; +} +#endif + +/* End: bn_mp_set_int.c */ + +/* Start: bn_mp_shrink.c */ +#include "tma.h" +#ifdef BN_MP_SHRINK_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* shrink a bignum */ +int mp_shrink (mp_int * a) +{ + mp_digit *tmp; + if (a->alloc != a->used && a->used > 0) { + if ((tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * a->used)) == NULL) { + return MP_MEM; + } + a->dp = tmp; + a->alloc = a->used; + } + return MP_OKAY; +} +#endif + +/* End: bn_mp_shrink.c */ + +/* Start: bn_mp_signed_bin_size.c */ +#include "tma.h" +#ifdef BN_MP_SIGNED_BIN_SIZE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* get the size for an signed equivalent */ +int mp_signed_bin_size (mp_int * a) +{ + return 1 + mp_unsigned_bin_size (a); +} +#endif + +/* End: bn_mp_signed_bin_size.c */ + +/* Start: bn_mp_sqr.c */ +#include "tma.h" +#ifdef BN_MP_SQR_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* computes b = a*a */ +int +mp_sqr (mp_int * a, mp_int * b) +{ + int res; + +#ifdef BN_MP_TOOM_SQR_C + /* use Toom-Cook? */ + if (a->used >= TOOM_SQR_CUTOFF) { + res = mp_toom_sqr(a, b); + /* Karatsuba? */ + } else +#endif +#ifdef BN_MP_KARATSUBA_SQR_C +if (a->used >= KARATSUBA_SQR_CUTOFF) { + res = mp_karatsuba_sqr (a, b); + } else +#endif + { +#ifdef BN_FAST_S_MP_SQR_C + /* can we use the fast comba multiplier? */ + if ((a->used * 2 + 1) < MP_WARRAY && + a->used < + (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) { + res = fast_s_mp_sqr (a, b); + } else +#endif +#ifdef BN_S_MP_SQR_C + res = s_mp_sqr (a, b); +#else + res = MP_VAL; +#endif + } + b->sign = MP_ZPOS; + return res; +} +#endif + +/* End: bn_mp_sqr.c */ + +/* Start: bn_mp_sqrmod.c */ +#include "tma.h" +#ifdef BN_MP_SQRMOD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* c = a * a (mod b) */ +int +mp_sqrmod (mp_int * a, mp_int * b, mp_int * c) +{ + int res; + mp_int t; + + if ((res = mp_init (&t)) != MP_OKAY) { + return res; + } + + if ((res = mp_sqr (a, &t)) != MP_OKAY) { + mp_clear (&t); + return res; + } + res = mp_mod (&t, b, c); + mp_clear (&t); + return res; +} +#endif + +/* End: bn_mp_sqrmod.c */ + +/* Start: bn_mp_sqrt.c */ +#include "tma.h" +#ifdef BN_MP_SQRT_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* this function is less generic than mp_n_root, simpler and faster */ +int mp_sqrt(mp_int *arg, mp_int *ret) +{ + int res; + mp_int t1,t2; + + /* must be positive */ + if (arg->sign == MP_NEG) { + return MP_VAL; + } + + /* easy out */ + if (mp_iszero(arg) == MP_YES) { + mp_zero(ret); + return MP_OKAY; + } + + if ((res = mp_init_copy(&t1, arg)) != MP_OKAY) { + return res; + } + + if ((res = mp_init(&t2)) != MP_OKAY) { + goto E2; + } + + /* First approx. (not very bad for large arg) */ + mp_rshd (&t1,t1.used/2); + + /* t1 > 0 */ + if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) { + goto E1; + } + if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) { + goto E1; + } + if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) { + goto E1; + } + /* And now t1 > sqrt(arg) */ + do { + if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) { + goto E1; + } + if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) { + goto E1; + } + if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) { + goto E1; + } + /* t1 >= sqrt(arg) >= t2 at this point */ + } while (mp_cmp_mag(&t1,&t2) == MP_GT); + + mp_exch(&t1,ret); + +E1: mp_clear(&t2); +E2: mp_clear(&t1); + return res; +} + +#endif + +/* End: bn_mp_sqrt.c */ + +/* Start: bn_mp_sub.c */ +#include "tma.h" +#ifdef BN_MP_SUB_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* high level subtraction (handles signs) */ +int +mp_sub (mp_int * a, mp_int * b, mp_int * c) +{ + int sa, sb, res; + + sa = a->sign; + sb = b->sign; + + if (sa != sb) { + /* subtract a negative from a positive, OR */ + /* subtract a positive from a negative. */ + /* In either case, ADD their magnitudes, */ + /* and use the sign of the first number. */ + c->sign = sa; + res = s_mp_add (a, b, c); + } else { + /* subtract a positive from a positive, OR */ + /* subtract a negative from a negative. */ + /* First, take the difference between their */ + /* magnitudes, then... */ + if (mp_cmp_mag (a, b) != MP_LT) { + /* Copy the sign from the first */ + c->sign = sa; + /* The first has a larger or equal magnitude */ + res = s_mp_sub (a, b, c); + } else { + /* The result has the *opposite* sign from */ + /* the first number. */ + c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; + /* The second has a larger magnitude */ + res = s_mp_sub (b, a, c); + } + } + return res; +} + +#endif + +/* End: bn_mp_sub.c */ + +/* Start: bn_mp_sub_d.c */ +#include "tma.h" +#ifdef BN_MP_SUB_D_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* single digit subtraction */ +int +mp_sub_d (mp_int * a, mp_digit b, mp_int * c) +{ + mp_digit *tmpa, *tmpc, mu; + int res, ix, oldused; + + /* grow c as required */ + if (c->alloc < a->used + 1) { + if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { + return res; + } + } + + /* if a is negative just do an unsigned + * addition [with fudged signs] + */ + if (a->sign == MP_NEG) { + a->sign = MP_ZPOS; + res = mp_add_d(a, b, c); + a->sign = c->sign = MP_NEG; + return res; + } + + /* setup regs */ + oldused = c->used; + tmpa = a->dp; + tmpc = c->dp; + + /* if a <= b simply fix the single digit */ + if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { + if (a->used == 1) { + *tmpc++ = b - *tmpa; + } else { + *tmpc++ = b; + } + ix = 1; + + /* negative/1digit */ + c->sign = MP_NEG; + c->used = 1; + } else { + /* positive/size */ + c->sign = MP_ZPOS; + c->used = a->used; + + /* subtract first digit */ + *tmpc = *tmpa++ - b; + mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); + *tmpc++ &= MP_MASK; + + /* handle rest of the digits */ + for (ix = 1; ix < a->used; ix++) { + *tmpc = *tmpa++ - mu; + mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); + *tmpc++ &= MP_MASK; + } + } + + /* zero excess digits */ + while (ix++ < oldused) { + *tmpc++ = 0; + } + mp_clamp(c); + return MP_OKAY; +} + +#endif + +/* End: bn_mp_sub_d.c */ + +/* Start: bn_mp_submod.c */ +#include "tma.h" +#ifdef BN_MP_SUBMOD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* d = a - b (mod c) */ +int +mp_submod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) +{ + int res; + mp_int t; + + + if ((res = mp_init (&t)) != MP_OKAY) { + return res; + } + + if ((res = mp_sub (a, b, &t)) != MP_OKAY) { + mp_clear (&t); + return res; + } + res = mp_mod (&t, c, d); + mp_clear (&t); + return res; +} +#endif + +/* End: bn_mp_submod.c */ + +/* Start: bn_mp_to_signed_bin.c */ +#include "tma.h" +#ifdef BN_MP_TO_SIGNED_BIN_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* store in signed [big endian] format */ +int mp_to_signed_bin (mp_int * a, unsigned char *b) +{ + int res; + + if ((res = mp_to_unsigned_bin (a, b + 1)) != MP_OKAY) { + return res; + } + b[0] = (unsigned char) ((a->sign == MP_ZPOS) ? 0 : 1); + return MP_OKAY; +} +#endif + +/* End: bn_mp_to_signed_bin.c */ + +/* Start: bn_mp_to_signed_bin_n.c */ +#include "tma.h" +#ifdef BN_MP_TO_SIGNED_BIN_N_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* store in signed [big endian] format */ +int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen) +{ + if (*outlen < (unsigned long)mp_signed_bin_size(a)) { + return MP_VAL; + } + *outlen = mp_signed_bin_size(a); + return mp_to_signed_bin(a, b); +} +#endif + +/* End: bn_mp_to_signed_bin_n.c */ + +/* Start: bn_mp_to_unsigned_bin.c */ +#include "tma.h" +#ifdef BN_MP_TO_UNSIGNED_BIN_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* store in unsigned [big endian] format */ +int mp_to_unsigned_bin (mp_int * a, unsigned char *b) +{ + int x, res; + mp_int t; + + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + + x = 0; + while (mp_iszero (&t) == 0) { +#ifndef MP_8BIT + b[x++] = (unsigned char) (t.dp[0] & 255); +#else + b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7)); +#endif + if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) { + mp_clear (&t); + return res; + } + } + bn_reverse (b, x); + mp_clear (&t); + return MP_OKAY; +} +#endif + +/* End: bn_mp_to_unsigned_bin.c */ + +/* Start: bn_mp_to_unsigned_bin_n.c */ +#include "tma.h" +#ifdef BN_MP_TO_UNSIGNED_BIN_N_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* store in unsigned [big endian] format */ +int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen) +{ + if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) { + return MP_VAL; + } + *outlen = mp_unsigned_bin_size(a); + return mp_to_unsigned_bin(a, b); +} +#endif + +/* End: bn_mp_to_unsigned_bin_n.c */ + +/* Start: bn_mp_toom_mul.c */ +#include "tma.h" +#ifdef BN_MP_TOOM_MUL_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* multiplication using the Toom-Cook 3-way algorithm + * + * Much more complicated than Karatsuba but has a lower + * asymptotic running time of O(N**1.464). This algorithm is + * only particularly useful on VERY large inputs + * (we're talking 1000s of digits here...). +*/ +int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; + int res, B; + + /* init temps */ + if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, + &a0, &a1, &a2, &b0, &b1, + &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { + return res; + } + + /* B */ + B = MIN(a->used, b->used) / 3; + + /* a = a2 * B**2 + a1 * B + a0 */ + if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_copy(a, &a1)) != MP_OKAY) { + goto ERR; + } + mp_rshd(&a1, B); + mp_mod_2d(&a1, DIGIT_BIT * B, &a1); + + if ((res = mp_copy(a, &a2)) != MP_OKAY) { + goto ERR; + } + mp_rshd(&a2, B*2); + + /* b = b2 * B**2 + b1 * B + b0 */ + if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_copy(b, &b1)) != MP_OKAY) { + goto ERR; + } + mp_rshd(&b1, B); + mp_mod_2d(&b1, DIGIT_BIT * B, &b1); + + if ((res = mp_copy(b, &b2)) != MP_OKAY) { + goto ERR; + } + mp_rshd(&b2, B*2); + + /* w0 = a0*b0 */ + if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { + goto ERR; + } + + /* w4 = a2 * b2 */ + if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { + goto ERR; + } + + /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ + if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { + goto ERR; + } + + /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ + if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { + goto ERR; + } + + + /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ + if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { + goto ERR; + } + + /* now solve the matrix + + 0 0 0 0 1 + 1 2 4 8 16 + 1 1 1 1 1 + 16 8 4 2 1 + 1 0 0 0 0 + + using 12 subtractions, 4 shifts, + 2 small divisions and 1 small multiplication + */ + + /* r1 - r4 */ + if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { + goto ERR; + } + /* r3 - r0 */ + if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { + goto ERR; + } + /* r1/2 */ + if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { + goto ERR; + } + /* r3/2 */ + if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { + goto ERR; + } + /* r2 - r0 - r4 */ + if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { + goto ERR; + } + /* r1 - r2 */ + if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { + goto ERR; + } + /* r3 - r2 */ + if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { + goto ERR; + } + /* r1 - 8r0 */ + if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { + goto ERR; + } + /* r3 - 8r4 */ + if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { + goto ERR; + } + /* 3r2 - r1 - r3 */ + if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { + goto ERR; + } + /* r1 - r2 */ + if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { + goto ERR; + } + /* r3 - r2 */ + if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { + goto ERR; + } + /* r1/3 */ + if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { + goto ERR; + } + /* r3/3 */ + if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { + goto ERR; + } + + /* at this point shift W[n] by B*n */ + if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { + goto ERR; + } + +ERR: + mp_clear_multi(&w0, &w1, &w2, &w3, &w4, + &a0, &a1, &a2, &b0, &b1, + &b2, &tmp1, &tmp2, NULL); + return res; +} + +#endif + +/* End: bn_mp_toom_mul.c */ + +/* Start: bn_mp_toom_sqr.c */ +#include "tma.h" +#ifdef BN_MP_TOOM_SQR_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* squaring using Toom-Cook 3-way algorithm */ +int +mp_toom_sqr(mp_int *a, mp_int *b) +{ + mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; + int res, B; + + /* init temps */ + if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { + return res; + } + + /* B */ + B = a->used / 3; + + /* a = a2 * B**2 + a1 * B + a0 */ + if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_copy(a, &a1)) != MP_OKAY) { + goto ERR; + } + mp_rshd(&a1, B); + mp_mod_2d(&a1, DIGIT_BIT * B, &a1); + + if ((res = mp_copy(a, &a2)) != MP_OKAY) { + goto ERR; + } + mp_rshd(&a2, B*2); + + /* w0 = a0*a0 */ + if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) { + goto ERR; + } + + /* w4 = a2 * a2 */ + if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) { + goto ERR; + } + + /* w1 = (a2 + 2(a1 + 2a0))**2 */ + if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) { + goto ERR; + } + + /* w3 = (a0 + 2(a1 + 2a2))**2 */ + if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) { + goto ERR; + } + + + /* w2 = (a2 + a1 + a0)**2 */ + if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) { + goto ERR; + } + + /* now solve the matrix + + 0 0 0 0 1 + 1 2 4 8 16 + 1 1 1 1 1 + 16 8 4 2 1 + 1 0 0 0 0 + + using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. + */ + + /* r1 - r4 */ + if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { + goto ERR; + } + /* r3 - r0 */ + if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { + goto ERR; + } + /* r1/2 */ + if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { + goto ERR; + } + /* r3/2 */ + if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { + goto ERR; + } + /* r2 - r0 - r4 */ + if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { + goto ERR; + } + /* r1 - r2 */ + if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { + goto ERR; + } + /* r3 - r2 */ + if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { + goto ERR; + } + /* r1 - 8r0 */ + if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { + goto ERR; + } + /* r3 - 8r4 */ + if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { + goto ERR; + } + /* 3r2 - r1 - r3 */ + if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { + goto ERR; + } + /* r1 - r2 */ + if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { + goto ERR; + } + /* r3 - r2 */ + if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { + goto ERR; + } + /* r1/3 */ + if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { + goto ERR; + } + /* r3/3 */ + if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { + goto ERR; + } + + /* at this point shift W[n] by B*n */ + if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { + goto ERR; + } + + if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) { + goto ERR; + } + +ERR: + mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); + return res; +} + +#endif + +/* End: bn_mp_toom_sqr.c */ + +/* Start: bn_mp_toradix.c */ +#include "tma.h" +#ifdef BN_MP_TORADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* stores a bignum as a ASCII string in a given radix (2..64) */ +int mp_toradix (mp_int * a, char *str, int radix) +{ + int res, digs; + mp_int t; + mp_digit d; + char *_s = str; + + /* check range of the radix */ + if (radix < 2 || radix > 64) { + return MP_VAL; + } + + /* quick out if its zero */ + if (mp_iszero(a) == 1) { + *str++ = '0'; + *str = '\0'; + return MP_OKAY; + } + + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + + /* if it is negative output a - */ + if (t.sign == MP_NEG) { + ++_s; + *str++ = '-'; + t.sign = MP_ZPOS; + } + + digs = 0; + while (mp_iszero (&t) == 0) { + if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { + mp_clear (&t); + return res; + } + *str++ = mp_s_rmap[d]; + ++digs; + } + + /* reverse the digits of the string. In this case _s points + * to the first digit [exluding the sign] of the number] + */ + bn_reverse ((unsigned char *)_s, digs); + + /* append a NULL so the string is properly terminated */ + *str = '\0'; + + mp_clear (&t); + return MP_OKAY; +} + +#endif + +/* End: bn_mp_toradix.c */ + +/* Start: bn_mp_toradix_n.c */ +#include "tma.h" +#ifdef BN_MP_TORADIX_N_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* stores a bignum as a ASCII string in a given radix (2..64) + * + * Stores upto maxlen-1 chars and always a NULL byte + */ +int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) +{ + int res, digs; + mp_int t; + mp_digit d; + char *_s = str; + + /* check range of the maxlen, radix */ + if (maxlen < 3 || radix < 2 || radix > 64) { + return MP_VAL; + } + + /* quick out if its zero */ + if (mp_iszero(a) == 1) { + *str++ = '0'; + *str = '\0'; + return MP_OKAY; + } + + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + + /* if it is negative output a - */ + if (t.sign == MP_NEG) { + /* we have to reverse our digits later... but not the - sign!! */ + ++_s; + + /* store the flag and mark the number as positive */ + *str++ = '-'; + t.sign = MP_ZPOS; + + /* subtract a char */ + --maxlen; + } + + digs = 0; + while (mp_iszero (&t) == 0) { + if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { + mp_clear (&t); + return res; + } + *str++ = mp_s_rmap[d]; + ++digs; + + if (--maxlen == 1) { + /* no more room */ + break; + } + } + + /* reverse the digits of the string. In this case _s points + * to the first digit [exluding the sign] of the number] + */ + bn_reverse ((unsigned char *)_s, digs); + + /* append a NULL so the string is properly terminated */ + *str = '\0'; + + mp_clear (&t); + return MP_OKAY; +} + +#endif + +/* End: bn_mp_toradix_n.c */ + +/* Start: bn_mp_unsigned_bin_size.c */ +#include "tma.h" +#ifdef BN_MP_UNSIGNED_BIN_SIZE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* get the size for an unsigned equivalent */ +int mp_unsigned_bin_size (mp_int * a) +{ + int size = mp_count_bits (a); + return (size / 8 + ((size & 7) != 0 ? 1 : 0)); +} +#endif + +/* End: bn_mp_unsigned_bin_size.c */ + +/* Start: bn_mp_xor.c */ +#include "tma.h" +#ifdef BN_MP_XOR_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* XOR two ints together */ +int +mp_xor (mp_int * a, mp_int * b, mp_int * c) +{ + int res, ix, px; + mp_int t, *x; + + if (a->used > b->used) { + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + px = b->used; + x = b; + } else { + if ((res = mp_init_copy (&t, b)) != MP_OKAY) { + return res; + } + px = a->used; + x = a; + } + + for (ix = 0; ix < px; ix++) { + t.dp[ix] ^= x->dp[ix]; + } + mp_clamp (&t); + mp_exch (c, &t); + mp_clear (&t); + return MP_OKAY; +} +#endif + +/* End: bn_mp_xor.c */ + +/* Start: bn_mp_zero.c */ +#include "tma.h" +#ifdef BN_MP_ZERO_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* set to zero */ +void mp_zero (mp_int * a) +{ + int n; + mp_digit *tmp; + + a->sign = MP_ZPOS; + a->used = 0; + + tmp = a->dp; + for (n = 0; n < a->alloc; n++) { + *tmp++ = 0; + } +} +#endif + +/* End: bn_mp_zero.c */ + +/* Start: bn_prime_tab.c */ +#include "tma.h" +#ifdef BN_PRIME_TAB_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +const mp_digit ltm_prime_tab[] = { + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, +#ifndef MP_8BIT + 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, + + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 +#endif +}; +#endif + +/* End: bn_prime_tab.c */ + +/* Start: bn_reverse.c */ +#include "tma.h" +#ifdef BN_REVERSE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* reverse an array, used for radix code */ +void +bn_reverse (unsigned char *s, int len) +{ + int ix, iy; + unsigned char t; + + ix = 0; + iy = len - 1; + while (ix < iy) { + t = s[ix]; + s[ix] = s[iy]; + s[iy] = t; + ++ix; + --iy; + } +} +#endif + +/* End: bn_reverse.c */ + +/* Start: bn_s_mp_add.c */ +#include "tma.h" +#ifdef BN_S_MP_ADD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* low level addition, based on HAC pp.594, Algorithm 14.7 */ +int +s_mp_add (mp_int * a, mp_int * b, mp_int * c) +{ + mp_int *x; + int olduse, res, min, max; + + /* find sizes, we let |a| <= |b| which means we have to sort + * them. "x" will point to the input with the most digits + */ + if (a->used > b->used) { + min = b->used; + max = a->used; + x = a; + } else { + min = a->used; + max = b->used; + x = b; + } + + /* init result */ + if (c->alloc < max + 1) { + if ((res = mp_grow (c, max + 1)) != MP_OKAY) { + return res; + } + } + + /* get old used digit count and set new one */ + olduse = c->used; + c->used = max + 1; + + { + register mp_digit u, *tmpa, *tmpb, *tmpc; + register int i; + + /* alias for digit pointers */ + + /* first input */ + tmpa = a->dp; + + /* second input */ + tmpb = b->dp; + + /* destination */ + tmpc = c->dp; + + /* zero the carry */ + u = 0; + for (i = 0; i < min; i++) { + /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ + *tmpc = *tmpa++ + *tmpb++ + u; + + /* U = carry bit of T[i] */ + u = *tmpc >> ((mp_digit)DIGIT_BIT); + + /* take away carry bit from T[i] */ + *tmpc++ &= MP_MASK; + } + + /* now copy higher words if any, that is in A+B + * if A or B has more digits add those in + */ + if (min != max) { + for (; i < max; i++) { + /* T[i] = X[i] + U */ + *tmpc = x->dp[i] + u; + + /* U = carry bit of T[i] */ + u = *tmpc >> ((mp_digit)DIGIT_BIT); + + /* take away carry bit from T[i] */ + *tmpc++ &= MP_MASK; + } + } + + /* add carry */ + *tmpc++ = u; + + /* clear digits above oldused */ + for (i = c->used; i < olduse; i++) { + *tmpc++ = 0; + } + } + + mp_clamp (c); + return MP_OKAY; +} +#endif + +/* End: bn_s_mp_add.c */ + +/* Start: bn_s_mp_exptmod.c */ +#include "tma.h" +#ifdef BN_S_MP_EXPTMOD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +#ifdef MP_LOW_MEM + #define TAB_SIZE 32 +#else + #define TAB_SIZE 256 +#endif + +int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) +{ + mp_int M[TAB_SIZE], res, mu; + mp_digit buf; + int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; + int (*redux)(mp_int*,mp_int*,mp_int*); + + /* find window size */ + x = mp_count_bits (X); + if (x <= 7) { + winsize = 2; + } else if (x <= 36) { + winsize = 3; + } else if (x <= 140) { + winsize = 4; + } else if (x <= 450) { + winsize = 5; + } else if (x <= 1303) { + winsize = 6; + } else if (x <= 3529) { + winsize = 7; + } else { + winsize = 8; + } + +#ifdef MP_LOW_MEM + if (winsize > 5) { + winsize = 5; + } +#endif + + /* init M array */ + /* init first cell */ + if ((err = mp_init(&M[1])) != MP_OKAY) { + return err; + } + + /* now init the second half of the array */ + for (x = 1<<(winsize-1); x < (1 << winsize); x++) { + if ((err = mp_init(&M[x])) != MP_OKAY) { + for (y = 1<<(winsize-1); y < x; y++) { + mp_clear (&M[y]); + } + mp_clear(&M[1]); + return err; + } + } + + /* create mu, used for Barrett reduction */ + if ((err = mp_init (&mu)) != MP_OKAY) { + goto LBL_M; + } + + if (redmode == 0) { + if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) { + goto LBL_MU; + } + redux = mp_reduce; + } else { + if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) { + goto LBL_MU; + } + redux = mp_reduce_2k_l; + } + + /* create M table + * + * The M table contains powers of the base, + * e.g. M[x] = G**x mod P + * + * The first half of the table is not + * computed though accept for M[0] and M[1] + */ + if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) { + goto LBL_MU; + } + + /* compute the value at M[1<<(winsize-1)] by squaring + * M[1] (winsize-1) times + */ + if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { + goto LBL_MU; + } + + for (x = 0; x < (winsize - 1); x++) { + /* square it */ + if ((err = mp_sqr (&M[1 << (winsize - 1)], + &M[1 << (winsize - 1)])) != MP_OKAY) { + goto LBL_MU; + } + + /* reduce modulo P */ + if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { + goto LBL_MU; + } + } + + /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) + * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) + */ + for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { + if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { + goto LBL_MU; + } + if ((err = redux (&M[x], P, &mu)) != MP_OKAY) { + goto LBL_MU; + } + } + + /* setup result */ + if ((err = mp_init (&res)) != MP_OKAY) { + goto LBL_MU; + } + mp_set (&res, 1); + + /* set initial mode and bit cnt */ + mode = 0; + bitcnt = 1; + buf = 0; + digidx = X->used - 1; + bitcpy = 0; + bitbuf = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits */ + if (digidx == -1) { + break; + } + /* read next digit and reset the bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int) DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; + buf <<= (mp_digit)1; + + /* if the bit is zero and mode == 0 then we ignore it + * These represent the leading zero bits before the first 1 bit + * in the exponent. Technically this opt is not required but it + * does lower the # of trivial squaring/reductions used + */ + if (mode == 0 && y == 0) { + continue; + } + + /* if the bit is zero and mode == 1 then we square */ + if (mode == 1 && y == 0) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; + } + continue; + } + + /* else we add it to the window */ + bitbuf |= (y << (winsize - ++bitcpy)); + mode = 2; + + if (bitcpy == winsize) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < winsize; x++) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; + } + } + + /* then multiply */ + if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; + } + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + mode = 1; + } + } + + /* if bits remain then square/multiply */ + if (mode == 2 && bitcpy > 0) { + /* square then multiply if the bit is set */ + for (x = 0; x < bitcpy; x++) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; + } + + bitbuf <<= 1; + if ((bitbuf & (1 << winsize)) != 0) { + /* then multiply */ + if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; + } + } + } + } + + mp_exch (&res, Y); + err = MP_OKAY; +LBL_RES:mp_clear (&res); +LBL_MU:mp_clear (&mu); +LBL_M: + mp_clear(&M[1]); + for (x = 1<<(winsize-1); x < (1 << winsize); x++) { + mp_clear (&M[x]); + } + return err; +} +#endif + +/* End: bn_s_mp_exptmod.c */ + +/* Start: bn_s_mp_mul_digs.c */ +#include "tma.h" +#ifdef BN_S_MP_MUL_DIGS_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* multiplies |a| * |b| and only computes upto digs digits of result + * HAC pp. 595, Algorithm 14.12 Modified so you can control how + * many digits of output are created. + */ +int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +{ + mp_int t; + int res, pa, pb, ix, iy; + mp_digit u; + mp_word r; + mp_digit tmpx, *tmpt, *tmpy; + + /* can we use the fast multiplier? */ + if (((digs) < MP_WARRAY) && + MIN (a->used, b->used) < + (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + return fast_s_mp_mul_digs (a, b, c, digs); + } + + if ((res = mp_init_size (&t, digs)) != MP_OKAY) { + return res; + } + t.used = digs; + + /* compute the digits of the product directly */ + pa = a->used; + for (ix = 0; ix < pa; ix++) { + /* set the carry to zero */ + u = 0; + + /* limit ourselves to making digs digits of output */ + pb = MIN (b->used, digs - ix); + + /* setup some aliases */ + /* copy of the digit from a used within the nested loop */ + tmpx = a->dp[ix]; + + /* an alias for the destination shifted ix places */ + tmpt = t.dp + ix; + + /* an alias for the digits of b */ + tmpy = b->dp; + + /* compute the columns of the output and propagate the carry */ + for (iy = 0; iy < pb; iy++) { + /* compute the column as a mp_word */ + r = ((mp_word)*tmpt) + + ((mp_word)tmpx) * ((mp_word)*tmpy++) + + ((mp_word) u); + + /* the new column is the lower part of the result */ + *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); + + /* get the carry word from the result */ + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); + } + /* set carry if it is placed below digs */ + if (ix + iy < digs) { + *tmpt = u; + } + } + + mp_clamp (&t); + mp_exch (&t, c); + + mp_clear (&t); + return MP_OKAY; +} +#endif + +/* End: bn_s_mp_mul_digs.c */ + +/* Start: bn_s_mp_mul_high_digs.c */ +#include "tma.h" +#ifdef BN_S_MP_MUL_HIGH_DIGS_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* multiplies |a| * |b| and does not compute the lower digs digits + * [meant to get the higher part of the product] + */ +int +s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +{ + mp_int t; + int res, pa, pb, ix, iy; + mp_digit u; + mp_word r; + mp_digit tmpx, *tmpt, *tmpy; + + /* can we use the fast multiplier? */ +#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C + if (((a->used + b->used + 1) < MP_WARRAY) + && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + return fast_s_mp_mul_high_digs (a, b, c, digs); + } +#endif + + if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) { + return res; + } + t.used = a->used + b->used + 1; + + pa = a->used; + pb = b->used; + for (ix = 0; ix < pa; ix++) { + /* clear the carry */ + u = 0; + + /* left hand side of A[ix] * B[iy] */ + tmpx = a->dp[ix]; + + /* alias to the address of where the digits will be stored */ + tmpt = &(t.dp[digs]); + + /* alias for where to read the right hand side from */ + tmpy = b->dp + (digs - ix); + + for (iy = digs - ix; iy < pb; iy++) { + /* calculate the double precision result */ + r = ((mp_word)*tmpt) + + ((mp_word)tmpx) * ((mp_word)*tmpy++) + + ((mp_word) u); + + /* get the lower part */ + *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); + + /* carry the carry */ + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); + } + *tmpt = u; + } + mp_clamp (&t); + mp_exch (&t, c); + mp_clear (&t); + return MP_OKAY; +} +#endif + +/* End: bn_s_mp_mul_high_digs.c */ + +/* Start: bn_s_mp_sqr.c */ +#include "tma.h" +#ifdef BN_S_MP_SQR_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ +int s_mp_sqr (mp_int * a, mp_int * b) +{ + mp_int t; + int res, ix, iy, pa; + mp_word r; + mp_digit u, tmpx, *tmpt; + + pa = a->used; + if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { + return res; + } + + /* default used is maximum possible size */ + t.used = 2*pa + 1; + + for (ix = 0; ix < pa; ix++) { + /* first calculate the digit at 2*ix */ + /* calculate double precision result */ + r = ((mp_word) t.dp[2*ix]) + + ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); + + /* store lower part in result */ + t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); + + /* get the carry */ + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + + /* left hand side of A[ix] * A[iy] */ + tmpx = a->dp[ix]; + + /* alias for where to store the results */ + tmpt = t.dp + (2*ix + 1); + + for (iy = ix + 1; iy < pa; iy++) { + /* first calculate the product */ + r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); + + /* now calculate the double precision result, note we use + * addition instead of *2 since it's easier to optimize + */ + r = ((mp_word) *tmpt) + r + r + ((mp_word) u); + + /* store lower part */ + *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); + + /* get carry */ + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + } + /* propagate upwards */ + while (u != ((mp_digit) 0)) { + r = ((mp_word) *tmpt) + ((mp_word) u); + *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + } + } + + mp_clamp (&t); + mp_exch (&t, b); + mp_clear (&t); + return MP_OKAY; +} +#endif + +/* End: bn_s_mp_sqr.c */ + +/* Start: bn_s_mp_sub.c */ +#include "tma.h" +#ifdef BN_S_MP_SUB_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ +int +s_mp_sub (mp_int * a, mp_int * b, mp_int * c) +{ + int olduse, res, min, max; + + /* find sizes */ + min = b->used; + max = a->used; + + /* init result */ + if (c->alloc < max) { + if ((res = mp_grow (c, max)) != MP_OKAY) { + return res; + } + } + olduse = c->used; + c->used = max; + + { + register mp_digit u, *tmpa, *tmpb, *tmpc; + register int i; + + /* alias for digit pointers */ + tmpa = a->dp; + tmpb = b->dp; + tmpc = c->dp; + + /* set carry to zero */ + u = 0; + for (i = 0; i < min; i++) { + /* T[i] = A[i] - B[i] - U */ + *tmpc = *tmpa++ - *tmpb++ - u; + + /* U = carry bit of T[i] + * Note this saves performing an AND operation since + * if a carry does occur it will propagate all the way to the + * MSB. As a result a single shift is enough to get the carry + */ + u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); + + /* Clear carry from T[i] */ + *tmpc++ &= MP_MASK; + } + + /* now copy higher words if any, e.g. if A has more digits than B */ + for (; i < max; i++) { + /* T[i] = A[i] - U */ + *tmpc = *tmpa++ - u; + + /* U = carry bit of T[i] */ + u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); + + /* Clear carry from T[i] */ + *tmpc++ &= MP_MASK; + } + + /* clear digits above used (since we may not have grown result above) */ + for (i = c->used; i < olduse; i++) { + *tmpc++ = 0; + } + } + + mp_clamp (c); + return MP_OKAY; +} + +#endif + +/* End: bn_s_mp_sub.c */ + +/* Start: bncore.c */ +#include "tma.h" +#ifdef BNCORE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* Known optimal configurations + + CPU /Compiler /MUL CUTOFF/SQR CUTOFF +------------------------------------------------------------- + Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-) + AMD Athlon64 /GCC v3.4.4 / 74/ 124/LTM 0.34 + +*/ + +int KARATSUBA_MUL_CUTOFF = 74, /* Min. number of digits before Karatsuba multiplication is used. */ + KARATSUBA_SQR_CUTOFF = 124, /* Min. number of digits before Karatsuba squaring is used. */ + + TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */ + TOOM_SQR_CUTOFF = 400; +#endif + +/* End: bncore.c */ + + +/* EOF */ diff --git a/lib/silcmath/tma.h b/lib/silcmath/tma.h new file mode 100644 index 00000000..02e370ab --- /dev/null +++ b/lib/silcmath/tma.h @@ -0,0 +1,582 @@ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#ifndef TMA_H +#define TMA_H + +#include +#include +#include +#include +#include + +#include + +/* Assure these -Pekka */ +#undef MP_8BIT +#undef MP_16BIT +#undef CRYPT + +#undef MIN +#define MIN(x,y) ((x)<(y)?(x):(y)) +#undef MAX +#define MAX(x,y) ((x)>(y)?(x):(y)) + +#ifdef __cplusplus +extern "C" { + +/* C++ compilers don't like assigning void * to mp_digit * */ +#define OPT_CAST(x) (x *) + +#else + +/* C on the other hand doesn't care */ +#define OPT_CAST(x) + +#endif + + +/* detect 64-bit mode if possible */ +#if defined(__x86_64__) + #if !(defined(MP_64BIT) && defined(MP_16BIT) && defined(MP_8BIT)) + #define MP_64BIT + #endif +#endif + +/* some default configurations. + * + * A "mp_digit" must be able to hold DIGIT_BIT + 1 bits + * A "mp_word" must be able to hold 2*DIGIT_BIT + 1 bits + * + * At the very least a mp_digit must be able to hold 7 bits + * [any size beyond that is ok provided it doesn't overflow the data type] + */ +#ifdef MP_8BIT + typedef unsigned char mp_digit; + typedef unsigned short mp_word; +#elif defined(MP_16BIT) + typedef unsigned short mp_digit; + typedef unsigned long mp_word; +#elif defined(MP_64BIT) + /* for GCC only on supported platforms */ +#ifndef CRYPT + typedef unsigned long long ulong64; + typedef signed long long long64; +#endif + + typedef unsigned long mp_digit; + typedef unsigned long mp_word __attribute__ ((mode(TI))); + + #define DIGIT_BIT 60 +#else + /* this is the default case, 28-bit digits */ + + /* this is to make porting into LibTomCrypt easier :-) */ +#ifndef CRYPT + #if defined(_MSC_VER) || defined(__BORLANDC__) + typedef unsigned __int64 ulong64; + typedef signed __int64 long64; + #else + typedef unsigned long long ulong64; + typedef signed long long long64; + #endif +#endif + + typedef unsigned long mp_digit; + typedef ulong64 mp_word; + +#ifdef MP_31BIT + /* this is an extension that uses 31-bit digits */ + #define DIGIT_BIT 31 +#else + /* default case is 28-bit digits, defines MP_28BIT as a handy macro to test */ + #define DIGIT_BIT 28 + #define MP_28BIT +#endif +#endif + +/* define heap macros */ +#ifndef CRYPT + /* default to libc stuff */ + #ifndef XMALLOC + #define XMALLOC malloc + #define XFREE free + #define XREALLOC realloc + #define XCALLOC calloc + #else + /* prototypes for our heap functions */ + extern void *XMALLOC(size_t n); + extern void *REALLOC(void *p, size_t n); + extern void *XCALLOC(size_t n, size_t s); + extern void XFREE(void *p); + #endif +#endif + + +/* otherwise the bits per digit is calculated automatically from the size of a mp_digit */ +#ifndef DIGIT_BIT + #define DIGIT_BIT ((int)((CHAR_BIT * sizeof(mp_digit) - 1))) /* bits per digit */ +#endif + +#define MP_DIGIT_BIT DIGIT_BIT +#define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1)) +#define MP_DIGIT_MAX MP_MASK + +/* equalities */ +#define MP_LT -1 /* less than */ +#define MP_EQ 0 /* equal to */ +#define MP_GT 1 /* greater than */ + +#define MP_ZPOS 0 /* positive integer */ +#define MP_NEG 1 /* negative */ + +#define MP_OKAY 0 /* ok result */ +#define MP_MEM -2 /* out of mem */ +#define MP_VAL -3 /* invalid input */ +#define MP_RANGE MP_VAL + +#define MP_YES 1 /* yes response */ +#define MP_NO 0 /* no response */ + +/* Primality generation flags */ +#define LTM_PRIME_BBS 0x0001 /* BBS style prime */ +#define LTM_PRIME_SAFE 0x0002 /* Safe prime (p-1)/2 == prime */ +#define LTM_PRIME_2MSB_OFF 0x0004 /* force 2nd MSB to 0 */ +#define LTM_PRIME_2MSB_ON 0x0008 /* force 2nd MSB to 1 */ + +typedef int mp_err; + +/* you'll have to tune these... */ +extern int KARATSUBA_MUL_CUTOFF, + KARATSUBA_SQR_CUTOFF, + TOOM_MUL_CUTOFF, + TOOM_SQR_CUTOFF; + +/* define this to use lower memory usage routines (exptmods mostly) */ +/* #define MP_LOW_MEM */ + +/* default precision */ +#ifndef MP_PREC + #ifndef MP_LOW_MEM + #define MP_PREC 64 /* default digits of precision */ + #else + #define MP_PREC 8 /* default digits of precision */ + #endif +#endif + +/* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */ +#define MP_WARRAY (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1)) + +/* the infamous mp_int structure */ +typedef struct { + int used, alloc, sign; + mp_digit *dp; +} mp_int; + +/* callback for mp_prime_random, should fill dst with random bytes and return how many read [upto len] */ +typedef int ltm_prime_callback(unsigned char *dst, int len, void *dat); + + +#define USED(m) ((m)->used) +#define DIGIT(m,k) ((m)->dp[(k)]) +#define SIGN(m) ((m)->sign) + +/* error code to char* string */ +char *mp_error_to_string(int code); + +/* ---> init and deinit bignum functions <--- */ +/* init a bignum */ +int mp_init(mp_int *a); + +/* free a bignum */ +void mp_clear(mp_int *a); + +/* init a null terminated series of arguments */ +int mp_init_multi(mp_int *mp, ...); + +/* clear a null terminated series of arguments */ +void mp_clear_multi(mp_int *mp, ...); + +/* exchange two ints */ +void mp_exch(mp_int *a, mp_int *b); + +/* shrink ram required for a bignum */ +int mp_shrink(mp_int *a); + +/* grow an int to a given size */ +int mp_grow(mp_int *a, int size); + +/* init to a given number of digits */ +int mp_init_size(mp_int *a, int size); + +/* ---> Basic Manipulations <--- */ +#define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO) +#define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO) +#define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO) + +/* set to zero */ +void mp_zero(mp_int *a); + +/* set to a digit */ +void mp_set(mp_int *a, mp_digit b); + +/* set a 32-bit const */ +int mp_set_int(mp_int *a, unsigned long b); + +/* get a 32-bit value */ +unsigned long mp_get_int(mp_int * a); + +/* initialize and set a digit */ +int mp_init_set (mp_int * a, mp_digit b); + +/* initialize and set 32-bit value */ +int mp_init_set_int (mp_int * a, unsigned long b); + +/* copy, b = a */ +int mp_copy(mp_int *a, mp_int *b); + +/* inits and copies, a = b */ +int mp_init_copy(mp_int *a, mp_int *b); + +/* trim unused digits */ +void mp_clamp(mp_int *a); + +/* ---> digit manipulation <--- */ + +/* right shift by "b" digits */ +void mp_rshd(mp_int *a, int b); + +/* left shift by "b" digits */ +int mp_lshd(mp_int *a, int b); + +/* c = a / 2**b */ +int mp_div_2d(mp_int *a, int b, mp_int *c, mp_int *d); + +/* b = a/2 */ +int mp_div_2(mp_int *a, mp_int *b); + +/* c = a * 2**b */ +int mp_mul_2d(mp_int *a, int b, mp_int *c); + +/* b = a*2 */ +int mp_mul_2(mp_int *a, mp_int *b); + +/* c = a mod 2**d */ +int mp_mod_2d(mp_int *a, int b, mp_int *c); + +/* computes a = 2**b */ +int mp_2expt(mp_int *a, int b); + +/* Counts the number of lsbs which are zero before the first zero bit */ +int mp_cnt_lsb(mp_int *a); + +/* I Love Earth! */ + +/* makes a pseudo-random int of a given size */ +int mp_rand(mp_int *a, int digits); + +/* ---> binary operations <--- */ +/* c = a XOR b */ +int mp_xor(mp_int *a, mp_int *b, mp_int *c); + +/* c = a OR b */ +int mp_or(mp_int *a, mp_int *b, mp_int *c); + +/* c = a AND b */ +int mp_and(mp_int *a, mp_int *b, mp_int *c); + +/* ---> Basic arithmetic <--- */ + +/* b = -a */ +int mp_neg(mp_int *a, mp_int *b); + +/* b = |a| */ +int mp_abs(mp_int *a, mp_int *b); + +/* compare a to b */ +int mp_cmp(mp_int *a, mp_int *b); + +/* compare |a| to |b| */ +int mp_cmp_mag(mp_int *a, mp_int *b); + +/* c = a + b */ +int mp_add(mp_int *a, mp_int *b, mp_int *c); + +/* c = a - b */ +int mp_sub(mp_int *a, mp_int *b, mp_int *c); + +/* c = a * b */ +int mp_mul(mp_int *a, mp_int *b, mp_int *c); + +/* b = a*a */ +int mp_sqr(mp_int *a, mp_int *b); + +/* a/b => cb + d == a */ +int mp_div(mp_int *a, mp_int *b, mp_int *c, mp_int *d); + +/* c = a mod b, 0 <= c < b */ +int mp_mod(mp_int *a, mp_int *b, mp_int *c); + +/* ---> single digit functions <--- */ + +/* compare against a single digit */ +int mp_cmp_d(mp_int *a, mp_digit b); + +/* c = a + b */ +int mp_add_d(mp_int *a, mp_digit b, mp_int *c); + +/* c = a - b */ +int mp_sub_d(mp_int *a, mp_digit b, mp_int *c); + +/* c = a * b */ +int mp_mul_d(mp_int *a, mp_digit b, mp_int *c); + +/* a/b => cb + d == a */ +int mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d); + +/* a/3 => 3c + d == a */ +int mp_div_3(mp_int *a, mp_int *c, mp_digit *d); + +/* c = a**b */ +int mp_expt_d(mp_int *a, mp_digit b, mp_int *c); + +/* c = a mod b, 0 <= c < b */ +int mp_mod_d(mp_int *a, mp_digit b, mp_digit *c); + +/* ---> number theory <--- */ + +/* d = a + b (mod c) */ +int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d); + +/* d = a - b (mod c) */ +int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d); + +/* d = a * b (mod c) */ +int mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d); + +/* c = a * a (mod b) */ +int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c); + +/* c = 1/a (mod b) */ +int mp_invmod(mp_int *a, mp_int *b, mp_int *c); + +/* c = (a, b) */ +int mp_gcd(mp_int *a, mp_int *b, mp_int *c); + +/* produces value such that U1*a + U2*b = U3 */ +int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3); + +/* c = [a, b] or (a*b)/(a, b) */ +int mp_lcm(mp_int *a, mp_int *b, mp_int *c); + +/* finds one of the b'th root of a, such that |c|**b <= |a| + * + * returns error if a < 0 and b is even + */ +int mp_n_root(mp_int *a, mp_digit b, mp_int *c); + +/* special sqrt algo */ +int mp_sqrt(mp_int *arg, mp_int *ret); + +/* is number a square? */ +int mp_is_square(mp_int *arg, int *ret); + +/* computes the jacobi c = (a | n) (or Legendre if b is prime) */ +int mp_jacobi(mp_int *a, mp_int *n, int *c); + +/* used to setup the Barrett reduction for a given modulus b */ +int mp_reduce_setup(mp_int *a, mp_int *b); + +/* Barrett Reduction, computes a (mod b) with a precomputed value c + * + * Assumes that 0 < a <= b*b, note if 0 > a > -(b*b) then you can merely + * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code]. + */ +int mp_reduce(mp_int *a, mp_int *b, mp_int *c); + +/* setups the montgomery reduction */ +int mp_montgomery_setup(mp_int *a, mp_digit *mp); + +/* computes a = B**n mod b without division or multiplication useful for + * normalizing numbers in a Montgomery system. + */ +int mp_montgomery_calc_normalization(mp_int *a, mp_int *b); + +/* computes x/R == x (mod N) via Montgomery Reduction */ +int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp); + +/* returns 1 if a is a valid DR modulus */ +int mp_dr_is_modulus(mp_int *a); + +/* sets the value of "d" required for mp_dr_reduce */ +void mp_dr_setup(mp_int *a, mp_digit *d); + +/* reduces a modulo b using the Diminished Radix method */ +int mp_dr_reduce(mp_int *a, mp_int *b, mp_digit mp); + +/* returns true if a can be reduced with mp_reduce_2k */ +int mp_reduce_is_2k(mp_int *a); + +/* determines k value for 2k reduction */ +int mp_reduce_2k_setup(mp_int *a, mp_digit *d); + +/* reduces a modulo b where b is of the form 2**p - k [0 <= a] */ +int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d); + +/* returns true if a can be reduced with mp_reduce_2k_l */ +int mp_reduce_is_2k_l(mp_int *a); + +/* determines k value for 2k reduction */ +int mp_reduce_2k_setup_l(mp_int *a, mp_int *d); + +/* reduces a modulo b where b is of the form 2**p - k [0 <= a] */ +int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d); + +/* d = a**b (mod c) */ +int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d); + +/* ---> Primes <--- */ + +/* number of primes */ +#ifdef MP_8BIT + #define PRIME_SIZE 31 +#else + #define PRIME_SIZE 256 +#endif + +/* table of first PRIME_SIZE primes */ +extern const mp_digit ltm_prime_tab[]; + +/* result=1 if a is divisible by one of the first PRIME_SIZE primes */ +int mp_prime_is_divisible(mp_int *a, int *result); + +/* performs one Fermat test of "a" using base "b". + * Sets result to 0 if composite or 1 if probable prime + */ +int mp_prime_fermat(mp_int *a, mp_int *b, int *result); + +/* performs one Miller-Rabin test of "a" using base "b". + * Sets result to 0 if composite or 1 if probable prime + */ +int mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result); + +/* This gives [for a given bit size] the number of trials required + * such that Miller-Rabin gives a prob of failure lower than 2^-96 + */ +int mp_prime_rabin_miller_trials(int size); + +/* performs t rounds of Miller-Rabin on "a" using the first + * t prime bases. Also performs an initial sieve of trial + * division. Determines if "a" is prime with probability + * of error no more than (1/4)**t. + * + * Sets result to 1 if probably prime, 0 otherwise + */ +int mp_prime_is_prime(mp_int *a, int t, int *result); + +/* finds the next prime after the number "a" using "t" trials + * of Miller-Rabin. + * + * bbs_style = 1 means the prime must be congruent to 3 mod 4 + */ +int mp_prime_next_prime(mp_int *a, int t, int bbs_style); + +/* makes a truly random prime of a given size (bytes), + * call with bbs = 1 if you want it to be congruent to 3 mod 4 + * + * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can + * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself + * so it can be NULL + * + * The prime generated will be larger than 2^(8*size). + */ +#define mp_prime_random(a, t, size, bbs, cb, dat) mp_prime_random_ex(a, t, ((size) * 8) + 1, (bbs==1)?LTM_PRIME_BBS:0, cb, dat) + +/* makes a truly random prime of a given size (bits), + * + * Flags are as follows: + * + * LTM_PRIME_BBS - make prime congruent to 3 mod 4 + * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS) + * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero + * LTM_PRIME_2MSB_ON - make the 2nd highest bit one + * + * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can + * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself + * so it can be NULL + * + */ +int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat); + +/* ---> radix conversion <--- */ +int mp_count_bits(mp_int *a); + +int mp_unsigned_bin_size(mp_int *a); +int mp_read_unsigned_bin(mp_int *a, unsigned char *b, int c); +int mp_to_unsigned_bin(mp_int *a, unsigned char *b); +int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen); + +int mp_signed_bin_size(mp_int *a); +int mp_read_signed_bin(mp_int *a, unsigned char *b, int c); +int mp_to_signed_bin(mp_int *a, unsigned char *b); +int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen); + +int mp_read_radix(mp_int *a, const char *str, int radix); +int mp_toradix(mp_int *a, char *str, int radix); +int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen); +int mp_radix_size(mp_int *a, int radix, int *size); + +int mp_fread(mp_int *a, int radix, FILE *stream); +int mp_fwrite(mp_int *a, int radix, FILE *stream); + +#define mp_read_raw(mp, str, len) mp_read_signed_bin((mp), (str), (len)) +#define mp_raw_size(mp) mp_signed_bin_size(mp) +#define mp_toraw(mp, str) mp_to_signed_bin((mp), (str)) +#define mp_read_mag(mp, str, len) mp_read_unsigned_bin((mp), (str), (len)) +#define mp_mag_size(mp) mp_unsigned_bin_size(mp) +#define mp_tomag(mp, str) mp_to_unsigned_bin((mp), (str)) + +#define mp_tobinary(M, S) mp_toradix((M), (S), 2) +#define mp_tooctal(M, S) mp_toradix((M), (S), 8) +#define mp_todecimal(M, S) mp_toradix((M), (S), 10) +#define mp_tohex(M, S) mp_toradix((M), (S), 16) + +/* lowlevel functions, do not call! */ +int s_mp_add(mp_int *a, mp_int *b, mp_int *c); +int s_mp_sub(mp_int *a, mp_int *b, mp_int *c); +#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1) +int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs); +int s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs); +int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs); +int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs); +int fast_s_mp_sqr(mp_int *a, mp_int *b); +int s_mp_sqr(mp_int *a, mp_int *b); +int mp_karatsuba_mul(mp_int *a, mp_int *b, mp_int *c); +int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c); +int mp_karatsuba_sqr(mp_int *a, mp_int *b); +int mp_toom_sqr(mp_int *a, mp_int *b); +int fast_mp_invmod(mp_int *a, mp_int *b, mp_int *c); +int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c); +int fast_mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp); +int mp_exptmod_fast(mp_int *G, mp_int *X, mp_int *P, mp_int *Y, int mode); +int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int mode); +void bn_reverse(unsigned char *s, int len); + +extern const char *mp_s_rmap; + +#ifdef __cplusplus + } +#endif + +#endif /* TMA_H */ diff --git a/lib/silcske/silcske.c b/lib/silcske/silcske.c index e0012121..f3db0baf 100644 --- a/lib/silcske/silcske.c +++ b/lib/silcske/silcske.c @@ -1,10 +1,10 @@ /* - silcske.c + silcske.c Author: Pekka Riikonen - Copyright (C) 2000 - 2002 Pekka Riikonen + Copyright (C) 2000 - 2005 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,10 +23,10 @@ #include "groups_internal.h" /* Static functions */ -static SilcSKEStatus silc_ske_create_rnd(SilcSKE ske, SilcMPInt *n, - SilcUInt32 len, +static SilcSKEStatus silc_ske_create_rnd(SilcSKE ske, SilcMPInt *n, + SilcUInt32 len, SilcMPInt *rnd); -static SilcSKEStatus silc_ske_make_hash(SilcSKE ske, +static SilcSKEStatus silc_ske_make_hash(SilcSKE ske, unsigned char *return_hash, SilcUInt32 *return_hash_len, int initiator); @@ -117,11 +117,11 @@ void silc_ske_free(SilcSKE ske) } } -/* Sets the callback functions for the SKE session. +/* Sets the callback functions for the SKE session. The `send_packet' callback is a function that sends the packet to network. The SKE library will call it at any time packet needs to - be sent to the remote host. + be sent to the remote host. The `payload_receive' callback is called when the remote host's Key Exchange Start Payload has been processed. The payload is saved @@ -133,8 +133,8 @@ void silc_ske_free(SilcSKE ske) That is why the application must call the completion callback when the verification process has been completed. The library then calls the user callback (`proto_continue'), if it is provided to indicate that the SKE - protocol may continue. - + protocol may continue. + The `proto_continue' callback is called to indicate that it is safe to continue the execution of the SKE protocol after executing an asynchronous operation, such as calling the `verify_key' callback @@ -145,7 +145,7 @@ void silc_ske_free(SilcSKE ske) The `check_version' callback is called to verify the remote host's version. The application may check its own version against the remote host's version and determine whether supporting the remote host - is possible. + is possible. The `context' is passed as argument to all of the above callback functions. */ @@ -203,10 +203,10 @@ SilcSKEStatus silc_ske_initiator_start(SilcSKE ske, SilcRng rng, /* Send the packet. */ if (ske->callbacks->send_packet) - (*ske->callbacks->send_packet)(ske, payload_buf, SILC_PACKET_KEY_EXCHANGE, + (*ske->callbacks->send_packet)(ske, payload_buf, SILC_PACKET_KEY_EXCHANGE, ske->callbacks->context); - /* Save the the payload buffer for future use. It is later used to + /* Save the the payload buffer for future use. It is later used to compute the HASH value. */ ske->start_payload_copy = payload_buf; ske->start_payload = start_payload; @@ -219,7 +219,7 @@ SilcSKEStatus silc_ske_initiator_start(SilcSKE ske, SilcRng rng, security properties selected by the responder from our payload sent in the silc_ske_initiator_start function. */ -SilcSKEStatus silc_ske_initiator_phase_1(SilcSKE ske, +SilcSKEStatus silc_ske_initiator_phase_1(SilcSKE ske, SilcBuffer start_payload) { SilcSKEStatus status = SILC_SKE_STATUS_OK; @@ -248,7 +248,7 @@ SilcSKEStatus silc_ske_initiator_phase_1(SilcSKE ske, /* Check version string */ if (ske->callbacks->check_version) { - status = ske->callbacks->check_version(ske, payload->version, + status = ske->callbacks->check_version(ske, payload->version, payload->version_len, ske->callbacks->context); if (status != SILC_SKE_STATUS_OK) { @@ -262,7 +262,7 @@ SilcSKEStatus silc_ske_initiator_phase_1(SilcSKE ske, silc_ske_payload_start_free(ske->start_payload); /* Take the selected security properties into use while doing - the key exchange. This is used only while doing the key + the key exchange. This is used only while doing the key exchange. The same data is returned to upper levels by calling the callback function. */ ske->prop = prop = silc_calloc(1, sizeof(*prop)); @@ -328,8 +328,8 @@ SilcSKEStatus silc_ske_initiator_phase_1(SilcSKE ske, return status; } -/* This function creates random number x, such that 1 < x < q and - computes e = g ^ x mod p and sends the result to the remote end in +/* This function creates random number x, such that 1 < x < q and + computes e = g ^ x mod p and sends the result to the remote end in Key Exchange Payload. */ SilcSKEStatus silc_ske_initiator_phase_2(SilcSKE ske, @@ -352,7 +352,7 @@ SilcSKEStatus silc_ske_initiator_phase_2(SilcSKE ske, return ske->status; } silc_mp_init(x); - status = + status = silc_ske_create_rnd(ske, &ske->prop->group->group_order, silc_mp_sizeinbase(&ske->prop->group->group_order, 2), x); @@ -378,7 +378,7 @@ SilcSKEStatus silc_ske_initiator_phase_2(SilcSKE ske, /* Do the Diffie Hellman computation, e = g ^ x mod p */ silc_mp_init(&payload->x); - silc_mp_pow_mod(&payload->x, &ske->prop->group->generator, x, + silc_mp_pow_mod(&payload->x, &ske->prop->group->generator, x, &ske->prop->group->group); /* Get public key */ @@ -410,9 +410,9 @@ SilcSKEStatus silc_ske_initiator_phase_2(SilcSKE ske, silc_ske_make_hash(ske, hash, &hash_len, TRUE); SILC_LOG_DEBUG(("Signing HASH_i value")); - + /* Sign the hash value */ - silc_pkcs_private_key_data_set(ske->prop->pkcs, private_key->prv, + silc_pkcs_private_key_data_set(ske->prop->pkcs, private_key->prv, private_key->prv_len); if (silc_pkcs_get_key_len(ske->prop->pkcs) / 8 > sizeof(sign) - 1 || !silc_pkcs_sign(ske->prop->pkcs, hash, hash_len, sign, &sign_len)) { @@ -447,8 +447,8 @@ SilcSKEStatus silc_ske_initiator_phase_2(SilcSKE ske, /* Send the packet. */ if (ske->callbacks->send_packet) - (*ske->callbacks->send_packet)(ske, payload_buf, - SILC_PACKET_KEY_EXCHANGE_1, + (*ske->callbacks->send_packet)(ske, payload_buf, + SILC_PACKET_KEY_EXCHANGE_1, ske->callbacks->context); silc_buffer_free(payload_buf); @@ -495,7 +495,7 @@ static void silc_ske_initiator_finish_final(SilcSKE ske, if (payload->pk_data) { /* Decode the public key */ - if (!silc_pkcs_public_key_decode(payload->pk_data, payload->pk_len, + if (!silc_pkcs_public_key_decode(payload->pk_data, payload->pk_len, &public_key)) { status = SILC_SKE_STATUS_UNSUPPORTED_PUBLIC_KEY; SILC_LOG_ERROR(("Unsupported/malformed public key received")); @@ -518,7 +518,7 @@ static void silc_ske_initiator_finish_final(SilcSKE ske, /* Verify signature */ silc_pkcs_public_key_set(ske->prop->pkcs, public_key); - if (silc_pkcs_verify(ske->prop->pkcs, payload->sign_data, + if (silc_pkcs_verify(ske->prop->pkcs, payload->sign_data, payload->sign_len, hash, hash_len) == FALSE) { SILC_LOG_ERROR(("Signature verification failed, incorrect signature")); status = SILC_SKE_STATUS_INCORRECT_SIGNATURE; @@ -526,7 +526,7 @@ static void silc_ske_initiator_finish_final(SilcSKE ske, } SILC_LOG_DEBUG(("Signature is Ok")); - + silc_pkcs_public_key_free(public_key); memset(hash, 'F', hash_len); } @@ -569,7 +569,7 @@ static void silc_ske_initiator_finish_final(SilcSKE ske, /* Receives Key Exchange Payload from responder consisting responders public key, f, and signature. This function verifies the public key, - computes the secret shared key and verifies the signature. + computes the secret shared key and verifies the signature. The `proto_continue' will be called to indicate that the caller may continue with the SKE protocol. The caller must not continue @@ -581,7 +581,7 @@ static void silc_ske_initiator_finish_final(SilcSKE ske, This calls the `verify_key' callback to verify the received public key or certificate. If the `verify_key' is provided then the remote - must send public key and it is considered to be an error if remote + must send public key and it is considered to be an error if remote does not send its public key. If caller is performing a re-key with SKE then the `verify_key' is usually not provided when it is not also required for the remote to send its public key. */ @@ -620,12 +620,12 @@ SilcSKEStatus silc_ske_initiator_finish(SilcSKE ske, if (payload->pk_data && ske->callbacks->verify_key) { SILC_LOG_DEBUG(("Verifying public key")); - + ske->users++; (*ske->callbacks->verify_key)(ske, payload->pk_data, payload->pk_len, payload->pk_type, ske->callbacks->context, silc_ske_initiator_finish_final, NULL); - + /* We will continue to the final state after the public key has been verified by the caller. */ return SILC_SKE_STATUS_PENDING; @@ -655,7 +655,7 @@ SilcSKEStatus silc_ske_initiator_finish(SilcSKE ske, /* Starts Key Exchange protocol for responder. Responder receives Key Exchange Start Payload from initiator consisting of all the security properties the initiator supports. This function decodes - the payload and parses the payload further and selects the right + the payload and parses the payload further and selects the right security properties. */ SilcSKEStatus silc_ske_responder_start(SilcSKE ske, SilcRng rng, @@ -731,7 +731,7 @@ SilcSKEStatus silc_ske_responder_start(SilcSKE ske, SilcRng rng, return status; } -/* The selected security properties from the initiator payload is now +/* The selected security properties from the initiator payload is now encoded into Key Exchange Start Payload and sent to the initiator. */ SilcSKEStatus silc_ske_responder_phase_1(SilcSKE ske) @@ -753,13 +753,13 @@ SilcSKEStatus silc_ske_responder_phase_1(SilcSKE ske) prop->group = group; - if (silc_pkcs_alloc(ske->start_payload->pkcs_alg_list, + if (silc_pkcs_alloc(ske->start_payload->pkcs_alg_list, &prop->pkcs) == FALSE) { status = SILC_SKE_STATUS_UNKNOWN_PKCS; goto err; } - if (silc_cipher_alloc(ske->start_payload->enc_alg_list, + if (silc_cipher_alloc(ske->start_payload->enc_alg_list, &prop->cipher) == FALSE) { status = SILC_SKE_STATUS_UNKNOWN_CIPHER; goto err; @@ -778,14 +778,14 @@ SilcSKEStatus silc_ske_responder_phase_1(SilcSKE ske) } /* Encode the payload */ - status = silc_ske_payload_start_encode(ske, ske->start_payload, + status = silc_ske_payload_start_encode(ske, ske->start_payload, &payload_buf); if (status != SILC_SKE_STATUS_OK) goto err; /* Send the packet. */ if (ske->callbacks->send_packet) - (*ske->callbacks->send_packet)(ske, payload_buf, SILC_PACKET_KEY_EXCHANGE, + (*ske->callbacks->send_packet)(ske, payload_buf, SILC_PACKET_KEY_EXCHANGE, ske->callbacks->context); silc_buffer_free(payload_buf); @@ -851,15 +851,15 @@ static void silc_ske_responder_phase2_final(SilcSKE ske, /* The public key verification was performed only if the Mutual Authentication flag is set. */ - if (ske->start_payload && + if (ske->start_payload && ske->start_payload->flags & SILC_SKE_SP_FLAG_MUTUAL) { SilcPublicKey public_key = NULL; unsigned char hash[32]; SilcUInt32 hash_len; /* Decode the public key */ - if (!silc_pkcs_public_key_decode(recv_payload->pk_data, - recv_payload->pk_len, + if (!silc_pkcs_public_key_decode(recv_payload->pk_data, + recv_payload->pk_len, &public_key)) { ske->status = SILC_SKE_STATUS_UNSUPPORTED_PUBLIC_KEY; SILC_LOG_ERROR(("Unsupported/malformed public key received")); @@ -880,10 +880,10 @@ static void silc_ske_responder_phase2_final(SilcSKE ske, } SILC_LOG_DEBUG(("Verifying signature (HASH_i)")); - + /* Verify signature */ silc_pkcs_public_key_set(ske->prop->pkcs, public_key); - if (silc_pkcs_verify(ske->prop->pkcs, recv_payload->sign_data, + if (silc_pkcs_verify(ske->prop->pkcs, recv_payload->sign_data, recv_payload->sign_len, hash, hash_len) == FALSE) { SILC_LOG_ERROR(("Signature verification failed, incorrect signature")); ske->status = SILC_SKE_STATUS_INCORRECT_SIGNATURE; @@ -891,9 +891,9 @@ static void silc_ske_responder_phase2_final(SilcSKE ske, ske->callbacks->proto_continue(ske, ske->callbacks->context); return; } - + SILC_LOG_DEBUG(("Signature is Ok")); - + silc_pkcs_public_key_free(public_key); memset(hash, 'F', hash_len); } @@ -901,7 +901,7 @@ static void silc_ske_responder_phase2_final(SilcSKE ske, /* Create the random number x, 1 < x < q. */ x = silc_calloc(1, sizeof(*x)); silc_mp_init(x); - status = + status = silc_ske_create_rnd(ske, &ske->prop->group->group_order, silc_mp_sizeinbase(&ske->prop->group->group_order, 2), x); @@ -923,9 +923,9 @@ static void silc_ske_responder_phase2_final(SilcSKE ske, /* Do the Diffie Hellman computation, f = g ^ x mod p */ silc_mp_init(&send_payload->x); - silc_mp_pow_mod(&send_payload->x, &ske->prop->group->generator, x, + silc_mp_pow_mod(&send_payload->x, &ske->prop->group->generator, x, &ske->prop->group->group); - + /* Call the callback. The caller may now continue with the SKE protocol. */ ske->status = SILC_SKE_STATUS_OK; if (ske->callbacks->proto_continue) @@ -933,10 +933,10 @@ static void silc_ske_responder_phase2_final(SilcSKE ske, } /* This function receives the Key Exchange Payload from the initiator. - This also performs the mutual authentication if required. Then, this + This also performs the mutual authentication if required. Then, this function first generated a random number x, such that 1 < x < q and computes f = g ^ x mod p. This then puts the result f to a Key - Exchange Payload. + Exchange Payload. The `proto_continue' will be called to indicate that the caller may continue with the SKE protocol. The caller must not continue @@ -970,11 +970,11 @@ SilcSKEStatus silc_ske_responder_phase_2(SilcSKE ske, /* Verify the received public key and verify the signature if we are doing mutual authentication. */ - if (ske->start_payload && + if (ske->start_payload && ske->start_payload->flags & SILC_SKE_SP_FLAG_MUTUAL) { SILC_LOG_DEBUG(("We are doing mutual authentication")); - + if (!recv_payload->pk_data && ske->callbacks->verify_key) { SILC_LOG_ERROR(("Remote end did not send its public key (or " "certificate), even though we require it")); @@ -986,9 +986,9 @@ SilcSKEStatus silc_ske_responder_phase_2(SilcSKE ske, SILC_LOG_DEBUG(("Verifying public key")); ske->users++; - (*ske->callbacks->verify_key)(ske, recv_payload->pk_data, + (*ske->callbacks->verify_key)(ske, recv_payload->pk_data, recv_payload->pk_len, - recv_payload->pk_type, + recv_payload->pk_type, ske->callbacks->context, silc_ske_responder_phase2_final, NULL); @@ -1027,13 +1027,13 @@ SilcSKEStatus silc_ske_responder_finish(SilcSKE ske, /* Compute the shared secret key */ KEY = silc_calloc(1, sizeof(*KEY)); silc_mp_init(KEY); - silc_mp_pow_mod(KEY, &ske->ke1_payload->x, ske->x, + silc_mp_pow_mod(KEY, &ske->ke1_payload->x, ske->x, &ske->prop->group->group); ske->KEY = KEY; if (public_key && private_key) { SILC_LOG_DEBUG(("Getting public key")); - + /* Get the public key */ pk = silc_pkcs_public_key_encode(public_key, &pk_len); if (!pk) { @@ -1042,9 +1042,9 @@ SilcSKEStatus silc_ske_responder_finish(SilcSKE ske, } ske->ke2_payload->pk_data = pk; ske->ke2_payload->pk_len = pk_len; - + SILC_LOG_DEBUG(("Computing HASH value")); - + /* Compute the hash value */ memset(hash, 0, sizeof(hash)); status = silc_ske_make_hash(ske, hash, &hash_len, FALSE); @@ -1053,11 +1053,11 @@ SilcSKEStatus silc_ske_responder_finish(SilcSKE ske, ske->hash = silc_memdup(hash, hash_len); ske->hash_len = hash_len; - + SILC_LOG_DEBUG(("Signing HASH value")); - + /* Sign the hash value */ - silc_pkcs_private_key_data_set(ske->prop->pkcs, private_key->prv, + silc_pkcs_private_key_data_set(ske->prop->pkcs, private_key->prv, private_key->prv_len); if (silc_pkcs_get_key_len(ske->prop->pkcs) / 8 > sizeof(sign) - 1 || !silc_pkcs_sign(ske->prop->pkcs, hash, hash_len, sign, &sign_len)) { @@ -1078,7 +1078,7 @@ SilcSKEStatus silc_ske_responder_finish(SilcSKE ske, /* Send the packet. */ if (ske->callbacks->send_packet) - (*ske->callbacks->send_packet)(ske, payload_buf, + (*ske->callbacks->send_packet)(ske, payload_buf, SILC_PACKET_KEY_EXCHANGE_2, ske->callbacks->context); @@ -1114,7 +1114,7 @@ SilcSKEStatus silc_ske_end(SilcSKE ske) silc_buffer_set(&packet, data, 4); if (ske->callbacks->send_packet) - (*ske->callbacks->send_packet)(ske, &packet, SILC_PACKET_SUCCESS, + (*ske->callbacks->send_packet)(ske, &packet, SILC_PACKET_SUCCESS, ske->callbacks->context); return SILC_SKE_STATUS_OK; @@ -1138,7 +1138,7 @@ SilcSKEStatus silc_ske_abort(SilcSKE ske, SilcSKEStatus status) silc_buffer_set(&packet, data, 4); if (ske->callbacks->send_packet) - (*ske->callbacks->send_packet)(ske, &packet, SILC_PACKET_FAILURE, + (*ske->callbacks->send_packet)(ske, &packet, SILC_PACKET_FAILURE, ske->callbacks->context); return SILC_SKE_STATUS_OK; @@ -1151,7 +1151,7 @@ SilcSKEStatus silc_ske_abort(SilcSKE ske, SilcSKEStatus status) as, this function is called by the caller of the protocol and not by the protocol itself. */ -SilcSKEStatus +SilcSKEStatus silc_ske_assemble_security_properties(SilcSKE ske, SilcSKESecurityPropertyFlag flags, const char *version, @@ -1202,10 +1202,10 @@ silc_ske_assemble_security_properties(SilcSKE ske, rp->comp_alg_list = strdup("none"); rp->comp_alg_len = strlen("none"); - rp->len = 1 + 1 + 2 + SILC_SKE_COOKIE_LEN + + rp->len = 1 + 1 + 2 + SILC_SKE_COOKIE_LEN + 2 + rp->version_len + - 2 + rp->ke_grp_len + 2 + rp->pkcs_alg_len + - 2 + rp->enc_alg_len + 2 + rp->hash_alg_len + + 2 + rp->ke_grp_len + 2 + rp->pkcs_alg_len + + 2 + rp->enc_alg_len + 2 + rp->hash_alg_len + 2 + rp->hmac_alg_len + 2 + rp->comp_alg_len; *return_payload = rp; @@ -1213,10 +1213,10 @@ silc_ske_assemble_security_properties(SilcSKE ske, return SILC_SKE_STATUS_OK; } -/* Selects the supported security properties from the remote end's Key +/* Selects the supported security properties from the remote end's Key Exchange Start Payload. */ -SilcSKEStatus +SilcSKEStatus silc_ske_select_security_properties(SilcSKE ske, const char *version, SilcSKEStartPayload *payload, @@ -1233,7 +1233,7 @@ silc_ske_select_security_properties(SilcSKE ske, /* Check version string */ if (ske->callbacks->check_version) { - status = ske->callbacks->check_version(ske, rp->version, + status = ske->callbacks->check_version(ske, rp->version, rp->version_len, ske->callbacks->context); if (status != SILC_SKE_STATUS_OK) { @@ -1565,10 +1565,10 @@ silc_ske_select_security_properties(SilcSKE ske, } } - payload->len = 1 + 1 + 2 + SILC_SKE_COOKIE_LEN + - 2 + payload->version_len + - 2 + payload->ke_grp_len + 2 + payload->pkcs_alg_len + - 2 + payload->enc_alg_len + 2 + payload->hash_alg_len + + payload->len = 1 + 1 + 2 + SILC_SKE_COOKIE_LEN + + 2 + payload->version_len + + 2 + payload->ke_grp_len + 2 + payload->pkcs_alg_len + + 2 + payload->enc_alg_len + 2 + payload->hash_alg_len + 2 + payload->hmac_alg_len + 2 + payload->comp_alg_len; return SILC_SKE_STATUS_OK; @@ -1577,8 +1577,8 @@ silc_ske_select_security_properties(SilcSKE ske, /* Creates random number such that 1 < rnd < n and at most length of len bits. The rnd sent as argument must be initialized. */ -static SilcSKEStatus silc_ske_create_rnd(SilcSKE ske, SilcMPInt *n, - SilcUInt32 len, +static SilcSKEStatus silc_ske_create_rnd(SilcSKE ske, SilcMPInt *n, + SilcUInt32 len, SilcMPInt *rnd) { SilcSKEStatus status = SILC_SKE_STATUS_OK; @@ -1618,7 +1618,7 @@ static SilcSKEStatus silc_ske_create_rnd(SilcSKE ske, SilcMPInt *n, hash value defined in the protocol. If it is FALSE then this is used to create the HASH value defined by the protocol. */ -static SilcSKEStatus silc_ske_make_hash(SilcSKE ske, +static SilcSKEStatus silc_ske_make_hash(SilcSKE ske, unsigned char *return_hash, SilcUInt32 *return_hash_len, int initiator) @@ -1635,39 +1635,39 @@ static SilcSKEStatus silc_ske_make_hash(SilcSKE ske, e = silc_mp_mp2bin(&ske->ke1_payload->x, 0, &e_len); f = silc_mp_mp2bin(&ske->ke2_payload->x, 0, &f_len); KEY = silc_mp_mp2bin(ske->KEY, 0, &KEY_len); - + /* Format the buffer used to compute the hash value */ - buf = silc_buffer_alloc_size(ske->start_payload_copy->len + - ske->ke2_payload->pk_len + - ske->ke1_payload->pk_len + + buf = silc_buffer_alloc_size(ske->start_payload_copy->len + + ske->ke2_payload->pk_len + + ske->ke1_payload->pk_len + e_len + f_len + KEY_len); if (!buf) return SILC_SKE_STATUS_OUT_OF_MEMORY; /* Initiator is not required to send its public key */ if (!ske->ke1_payload->pk_data) { - ret = + ret = silc_buffer_format(buf, SILC_STR_UI_XNSTRING(ske->start_payload_copy-> data, ske->start_payload_copy-> len), - SILC_STR_UI_XNSTRING(ske->ke2_payload->pk_data, + SILC_STR_UI_XNSTRING(ske->ke2_payload->pk_data, ske->ke2_payload->pk_len), SILC_STR_UI_XNSTRING(e, e_len), SILC_STR_UI_XNSTRING(f, f_len), SILC_STR_UI_XNSTRING(KEY, KEY_len), SILC_STR_END); } else { - ret = + ret = silc_buffer_format(buf, SILC_STR_UI_XNSTRING(ske->start_payload_copy-> data, ske->start_payload_copy-> len), - SILC_STR_UI_XNSTRING(ske->ke2_payload->pk_data, + SILC_STR_UI_XNSTRING(ske->ke2_payload->pk_data, ske->ke2_payload->pk_len), - SILC_STR_UI_XNSTRING(ske->ke1_payload->pk_data, + SILC_STR_UI_XNSTRING(ske->ke1_payload->pk_data, ske->ke1_payload->pk_len), SILC_STR_UI_XNSTRING(e, e_len), SILC_STR_UI_XNSTRING(f, f_len), @@ -1694,17 +1694,17 @@ static SilcSKEStatus silc_ske_make_hash(SilcSKE ske, } else { e = silc_mp_mp2bin(&ske->ke1_payload->x, 0, &e_len); - buf = silc_buffer_alloc_size(ske->start_payload_copy->len + + buf = silc_buffer_alloc_size(ske->start_payload_copy->len + ske->ke1_payload->pk_len + e_len); if (!buf) return SILC_SKE_STATUS_OUT_OF_MEMORY; - + /* Format the buffer used to compute the hash value */ - ret = + ret = silc_buffer_format(buf, SILC_STR_UI_XNSTRING(ske->start_payload_copy->data, ske->start_payload_copy->len), - SILC_STR_UI_XNSTRING(ske->ke1_payload->pk_data, + SILC_STR_UI_XNSTRING(ske->ke1_payload->pk_data, ske->ke1_payload->pk_len), SILC_STR_UI_XNSTRING(e, e_len), SILC_STR_END); @@ -1734,10 +1734,10 @@ static SilcSKEStatus silc_ske_make_hash(SilcSKE ske, return status; } -/* Processes the provided key material `data' as the SILC protocol +/* Processes the provided key material `data' as the SILC protocol specification defines. */ -SilcSKEStatus +SilcSKEStatus silc_ske_process_key_material_data(unsigned char *data, SilcUInt32 data_len, SilcUInt32 req_iv_len, @@ -1785,15 +1785,15 @@ silc_ske_process_key_material_data(unsigned char *data, SilcBuffer dist; unsigned char k1[32], k2[32], k3[32]; unsigned char *dtmp; - + /* XXX */ if (enc_key_len > (3 * hash_len)) return SILC_SKE_STATUS_ERROR; - + /* Take first round */ memset(k1, 0, sizeof(k1)); silc_hash_make(hash, buf->data, buf->len, k1); - + /* Take second round */ dist = silc_buffer_alloc_size(data_len + hash_len); if (!dist) @@ -1847,15 +1847,15 @@ silc_ske_process_key_material_data(unsigned char *data, SilcBuffer dist; unsigned char k1[32], k2[32], k3[32]; unsigned char *dtmp; - + /* XXX */ if (enc_key_len > (3 * hash_len)) return SILC_SKE_STATUS_ERROR; - + /* Take first round */ memset(k1, 0, sizeof(k1)); silc_hash_make(hash, buf->data, buf->len, k1); - + /* Take second round */ dist = silc_buffer_alloc_size(data_len + hash_len); if (!dist) @@ -1866,7 +1866,7 @@ silc_ske_process_key_material_data(unsigned char *data, SILC_STR_END); memset(k2, 0, sizeof(k2)); silc_hash_make(hash, dist->data, dist->len, k2); - + /* Take third round */ dist = silc_buffer_realloc(dist, data_len + hash_len + hash_len); silc_buffer_pull_tail(dist, hash_len); @@ -1927,7 +1927,7 @@ silc_ske_process_key_material_data(unsigned char *data, /* Processes negotiated key material as protocol specifies. This returns the actual keys to be used in the SILC. */ -SilcSKEStatus silc_ske_process_key_material(SilcSKE ske, +SilcSKEStatus silc_ske_process_key_material(SilcSKE ske, SilcUInt32 req_iv_len, SilcUInt32 req_enc_key_len, SilcUInt32 req_hmac_key_len, @@ -1952,7 +1952,7 @@ SilcSKEStatus silc_ske_process_key_material(SilcSKE ske, /* Process the key material */ status = silc_ske_process_key_material_data(buf->data, buf->len, req_iv_len, req_enc_key_len, - req_hmac_key_len, + req_hmac_key_len, ske->prop->hash, key); memset(tmpbuf, 0, klen); @@ -1993,7 +1993,7 @@ void silc_ske_free_key_material(SilcSKEKeyMaterial *key) silc_free(key); } -const char *silc_ske_status_string[] = +const char *silc_ske_status_string[] = { /* Official */ "Ok", @@ -2037,16 +2037,16 @@ const char *silc_ske_map_status(SilcSKEStatus status) /* Parses remote host's version string. */ -bool silc_ske_parse_version(SilcSKE ske, +bool silc_ske_parse_version(SilcSKE ske, SilcUInt32 *protocol_version, char **protocol_version_string, - SilcUInt32 *software_version, + SilcUInt32 *software_version, char **software_version_string, char **vendor_version) { return silc_parse_version_string(ske->remote_version, - protocol_version, - protocol_version_string, + protocol_version, + protocol_version_string, software_version, software_version_string, vendor_version); -- 2.43.0