+Sun Apr 24 12:01:37 EEST 2005 Pekka Riikonen <priikone@silcnet.org>
+
+ * Optimized primer number generator to use simpler conversion
+ routines. Also assure that the prime number will have the
+ highest bit set after modifying it. Affected file is
+ lib/silcmath/silcprimegen.c.
+
+ * Added LibTomMath to SILC Math library (SILC_DIST_TMA).
+
+ * Added FastTomMath to SILC Math Library (SILC_DIST_TFM),
+ for now only for testing purposes, it will not be delivered
+ in any distribution for now.
+
+ * Removed NSS MPI from the source tree. Due to upcoming
+ license change for the SILC Toolkit, we cannot deliver the
+ NSS MPI anymore. I decided to replace it in all distributions
+ with public domain library.
+
+ * Removed GNU regex from lib/contrib and introduced free
+ GNU compatible regex, lib/contrib/regexpr.[ch]. Added
+ GNU regex compatible API to it (only partial).
+
+ * Removed getopt routines from lib/contrib.
+
Fri Apr 22 12:21:44 EEST 2005 Pekka Riikonen <priikone@silcnet.org>
* Use silc_server_send_command_reply to send replies instead
have_getopt_long=1
], have_getopt_long=0
)
-AM_CONDITIONAL(HAVE_GETOPT_LONG, test x$have_getopt_long = x1)
##
## Enable/disable checking
unset tmp_CFLAGS
])
+# Function to check if compiler flag works, destination specifiable
+# Usage: SILC_ADD_CC_FLAGS(VAR, FLAGS, [ACTION-IF-FAILED])
+AC_DEFUN([SILC_ADD_CC_FLAGS],
+[ tmp_CFLAGS="$1_CFLAGS"
+ $1_CFLAGS="${$1_CFLAGS} $2"
+ AC_MSG_CHECKING(whether $CC accepts $2 flag)
+ AC_TRY_LINK([], [], [AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no)
+ $1_CFLAGS="$tmp_CFLAGS"
+ $3])
+ unset tmp_CFLAGS
+])
+
if test "$GCC"; then
# GCC specific options
if test "x$summary_debug" = "xyes"; then
)
AM_CONDITIONAL(SILC_MP_GMP, test x$mp_gmp = xtrue)
-AM_CONDITIONAL(SILC_MP_NSS_MPI, test x$mp_gmp = xfalse)
+AM_CONDITIONAL(SILC_MP_SILCMATH, test x$mp_gmp = xfalse)
if test x$mp_gmp = xfalse; then
- AC_DEFINE([SILC_MP_NSS_MPI], [], [MPI])
- AC_MSG_RESULT(Using NSS MPI as a MP library.)
+ AC_DEFINE([SILC_MP_SILCMAP], [], [SILCMATH])
+ AC_MSG_RESULT(Using SILC Math as a MP library.)
fi
#endif SILC_DIST_MATH
echo " Assembler optimizations .......: $summary_asm"
#ifdef SILC_DIST_MATH
-mp="MPI"
if test x$mp_gmp = xtrue; then
- mp="GMP"
+ echo " Arithmetic library ............: GMP"
fi
-echo " Arithmetic library ............: $mp"
#endif SILC_DIST_MATH
threads="no"
define SILC_DIST_INCLUDES
define SILC_DIST_DOC
define SILC_DIST_SIM
-define SILC_DIST_MPI
-define SILC_DIST_MATH
define SILC_DIST_SFTP
define SILC_DIST_COMPILER
+
+# Math library, define only TMA or TFM, not both.
+define SILC_DIST_MATH
+define SILC_DIST_TMA
+#define SILC_DIST_TFM
lib/silcutil/beos/Makefile
lib/silcutil/os2/Makefile
lib/silcutil/epoc/Makefile
-#ifdef SILC_DIST_MATH
-lib/silcmath/Makefile
-#endif SILC_DIST_MATH
#ifdef SILC_DIST_SFTP
lib/silcsftp/Makefile
#endif SILC_DIST_SFTP
AUTOMAKE_OPTIONS = 1.0 no-dependencies foreign
-if SILC_MP_NSS_MPI
-SUBDIRS = mpi
-else
-SUBDIRS =
-endif
+noinst_LTLIBRARIES = libsilcmath.la
-DIST_SUBDIRS = mpi
+if SILC_MP_SILCMATH
+MP_SOURCE = \
+#ifdef SILC_DIST_TMA
+ mp_tma.c \
+ tma.c \
+#endif SILC_DIST_TMA
+#ifdef SILC_DIST_TFM
+ mp_tfm.c \
+ tfm.c
+#endif SILC_DIST_TFM
-noinst_LTLIBRARIES = libsilcmath.la
+MP_HEADER = \
+#ifdef SILC_DIST_TMA
+ mp_tma.h \
+ tma.h \
+ tma_class.h \
+ tma_superclass.h \
+#endif SILC_DIST_TMA
+#ifdef SILC_DIST_TFM
+ mp_tfm.h \
+ tfm.h
+#endif SILC_DIST_TFM
-if SILC_MP_NSS_MPI
-MP_SOURCE = mp_mpi.c
+AM_CFLAGS = @MATH_CFLAGS@
else
MP_SOURCE = mp_gmp.c
+MP_HEADER =
endif
libsilcmath_la_SOURCES = \
$(MP_SOURCE)
#ifdef SILC_DIST_TOOLKIT
-MP_HEADER = mpi/mpi.h mpi/mplogic.h mpi/mpi-config.h
include_HEADERS = \
mp_gmp.h \
- mp_mpi.h \
+ $(MP_HEADER) \
silcmath.h \
- silcmp.h \
- $(MP_HEADER)
+ silcmp.h
#endif SILC_DIST_TOOLKIT
-EXTRA_DIST = mp_gmp.c mp_mpi.c *.h
+EXTRA_DIST = $(MP_SOURCE) $(MP_HEADER) mp_gmp.c mp_gmp.h
include $(top_srcdir)/Makefile.defines.in
modinv.h
- Author: Pekka Riikonen <priikone@poseidon.pspt.fi>
+ Author: Pekka Riikonen <priikone@silcnet.org>
- Copyright (C) 1997 - 2000 Pekka Riikonen
+ Copyright (C) 1997 - 2005 Pekka Riikonen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
+ the Free Software Foundation; version 2 of the License.
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#define plus1 (i == 2 ? 0 : i + 1)
#define minus1 (i == 0 ? 2 : i - 1)
-/* Find multiplicative inverse using Euclid's extended algorithm.
- Computes inverse such that a * inv mod n = 1, where 0 < a < n.
+/* Find multiplicative inverse using Euclid's extended algorithm.
+ Computes inverse such that a * inv mod n = 1, where 0 < a < n.
Algorithm goes like this:
-
+
g(0) = n v(0) = 0
g(1) = a v(1) = 1
-
+
y = g(i-1) / g(i)
g(i+1) = g(i-1) - y * g(i) = g(i)-1 mod g(i)
v(i+1) = v(i-1) - y * v(i)
-
- do until g(i) = 0, then inverse = v(i-1). If inverse is negative then n,
- is added to inverse making it positive again. (Sometimes the algorithm
- has a variable u defined too and it behaves just like v, except that
- initalize values are swapped (i.e. u(0) = 1, u(1) = 0). However, u is
+
+ do until g(i) = 0, then inverse = v(i-1). If inverse is negative then n,
+ is added to inverse making it positive again. (Sometimes the algorithm
+ has a variable u defined too and it behaves just like v, except that
+ initalize values are swapped (i.e. u(0) = 1, u(1) = 0). However, u is
not needed by the algorithm so it does not have to be included.)
*/
int i;
SilcMPInt y;
SilcMPInt x;
-
+
ModInv g[3];
ModInv v[3];
-
+
/* init MP vars */
silc_mp_init(&y);
silc_mp_init(&x);
silc_mp_set(&g[0].x, n); /* g(0) = n */
silc_mp_set(&g[1].x, a); /* g(1) = a */
silc_mp_init(&g[2].x);
-
+
i = 1;
while(silc_mp_cmp_ui(&g[i].x, 0) != 0) {
silc_mp_div(&y, &g[minus1].x, &g[i].x); /* y = n / a */
silc_mp_sub(&v[plus1].x, &v[plus1].x, &x);
i = plus1;
}
-
+
/* set the inverse */
silc_mp_set(inv, &v[minus1].x);
-
+
/* if inverse is negative, add n to inverse */
if (silc_mp_cmp_ui(inv, 0) < 0)
silc_mp_add(inv, inv, n);
-
+
/* clear the vars */
memset(&g, 0, sizeof(g));
memset(&v, 0, sizeof(v));
/*
- mp_gmp.c
+ mp_gmp.c
Author: Pekka Riikonen <priikone@silcnet.org>
- Copyright (C) 2001 Pekka Riikonen
+ Copyright (C) 2001 - 2005 Pekka Riikonen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
mpz_div_ui(dst, mp1, ui);
}
-void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
+void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
SilcMPInt *mp2)
{
if (q && r)
mpz_fdiv_q_2exp(dst, mp1, exp);
}
-void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
+void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
SilcUInt32 exp)
{
if (q)
mpz_pow_ui(dst, mp1, exp);
}
-void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp,
+void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp,
SilcMPInt *mod)
{
mpz_powm(dst, mp1, exp, mod);
}
-void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp,
+void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp,
SilcMPInt *mod)
{
mpz_powm_ui(dst, mp1, exp, mod);
--- /dev/null
+/*
+
+ mp_tfm.c
+
+ Author: Pekka Riikonen <priikone@silcnet.org>
+
+ Copyright (C) 2005 Pekka Riikonen
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+*/
+/* $Id$ */
+
+#include "silcincludes.h"
+#include "mp_tfm.h"
+
+void silc_mp_init(SilcMPInt *mp)
+{
+ fp_init(mp);
+}
+
+void silc_mp_uninit(SilcMPInt *mp)
+{
+ fp_zero(mp);
+}
+
+size_t silc_mp_size(SilcMPInt *mp)
+{
+ return fp_unsigned_bin_size(mp);
+}
+
+size_t silc_mp_sizeinbase(SilcMPInt *mp, int base)
+{
+ int size = 0;
+ fp_radix_size(mp, base, &size);
+ if (size > 1)
+ size--;
+ return size;
+}
+
+void silc_mp_set(SilcMPInt *dst, SilcMPInt *src)
+{
+ fp_copy(src, dst);
+}
+
+void silc_mp_set_ui(SilcMPInt *dst, SilcUInt32 ui)
+{
+ fp_set(dst, ui);
+}
+
+void silc_mp_set_si(SilcMPInt *dst, SilcInt32 si)
+{
+ fp_set(dst, si);
+}
+
+void silc_mp_set_str(SilcMPInt *dst, const char *str, int base)
+{
+ fp_read_radix(dst, str, base);
+}
+
+SilcUInt32 silc_mp_get_ui(SilcMPInt *mp)
+{
+ SILC_NOT_IMPLEMENTED("silc_mp_get_ui");
+ assert(FALSE);
+}
+
+char *silc_mp_get_str(char *str, SilcMPInt *mp, int base)
+{
+ if (fp_toradix(mp, str, base) != MP_OKAY)
+ return NULL;
+ return str;
+}
+
+void silc_mp_add(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
+{
+ fp_add(mp1, mp2, dst);
+}
+
+void silc_mp_add_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui)
+{
+ mp_add_d(mp1, (mp_digit)ui, dst);
+}
+
+void silc_mp_sub(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
+{
+ fp_sub(mp1, mp2, dst);
+}
+
+void silc_mp_sub_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui)
+{
+ fp_sub_d(mp1, (mp_digit)ui, dst);
+}
+
+void silc_mp_mul(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
+{
+ fp_mul(mp1, mp2, dst);
+}
+
+void silc_mp_mul_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui)
+{
+ fp_mul_d(mp1, (mp_digit)ui, dst);
+}
+
+void silc_mp_mul_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp)
+{
+ fp_mul_2d(mp1, exp, dst);
+}
+
+void silc_mp_sqrt(SilcMPInt *dst, SilcMPInt *src)
+{
+ fp_sqrt(src, dst);
+}
+
+void silc_mp_div(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
+{
+ fp_div(mp1, mp2, dst, NULL);
+}
+
+void silc_mp_div_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui)
+{
+ fp_div_d(mp1, (mp_digit)ui, dst, NULL);
+}
+
+void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
+ SilcMPInt *mp2)
+{
+ fp_div(mp1, mp2, q, r);
+}
+
+void silc_mp_div_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp)
+{
+ fp_div_2d(mp1, exp, dst, NULL);
+}
+
+void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
+ SilcUInt32 exp)
+{
+ fp_div_2d(mp1, exp, q, r);
+}
+
+void silc_mp_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
+{
+ fp_mod(mp1, mp2, dst);
+}
+
+void silc_mp_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui)
+{
+ fp_digit d;
+ fp_mod_d(mp1, ui, &d);
+ silc_mp_set_ui(dst, d);
+}
+
+void silc_mp_mod_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui)
+{
+ fp_mod_2d(mp1, ui, dst);
+}
+
+void silc_mp_pow(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp)
+{
+ SILC_NOT_IMPLEMENTED("silc_mp_pow");
+ assert(FALSE);
+}
+
+void silc_mp_pow_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp)
+{
+ SILC_NOT_IMPLEMENTED("silc_mp_pow_ui");
+ assert(FALSE);
+}
+
+void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp,
+ SilcMPInt *mod)
+{
+ fp_exptmod(mp1, exp, mod, dst);
+}
+
+void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp,
+ SilcMPInt *mod)
+{
+ SilcMPInt tmp;
+ silc_mp_init(&tmp);
+ silc_mp_set_ui(&tmp, exp);
+ silc_mp_pow_mod(dst, mp1, &tmp, mod);
+ silc_mp_uninit(&tmp);
+}
+
+void silc_mp_gcd(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
+{
+ fp_gcd(mp1, mp2, dst);
+}
+
+void silc_mp_gcdext(SilcMPInt *g, SilcMPInt *s, SilcMPInt *t, SilcMPInt *mp1,
+ SilcMPInt *mp2)
+{
+ SILC_NOT_IMPLEMENTED("silc_mp_gcdext");
+ assert(FALSE);
+}
+
+int silc_mp_cmp(SilcMPInt *mp1, SilcMPInt *mp2)
+{
+ return fp_cmp(mp1, mp2);
+}
+
+int silc_mp_cmp_si(SilcMPInt *mp1, SilcInt32 si)
+{
+ return fp_cmp_d(mp1, si);
+}
+
+int silc_mp_cmp_ui(SilcMPInt *mp1, SilcUInt32 ui)
+{
+ return fp_cmp_d(mp1, ui);
+}
+
+void silc_mp_abs(SilcMPInt *dst, SilcMPInt *src)
+{
+ fp_abs(src, dst);
+}
+
+void silc_mp_neg(SilcMPInt *dst, SilcMPInt *src)
+{
+ fp_neg(src, dst);
+}
+
+void silc_mp_and(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
+{
+ SILC_NOT_IMPLEMENTED("silc_mp_and");
+ assert(FALSE);
+}
+
+void silc_mp_or(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
+{
+ SILC_NOT_IMPLEMENTED("silc_mp_or");
+ assert(FALSE);
+}
+
+void silc_mp_xor(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
+{
+ SILC_NOT_IMPLEMENTED("silc_mp_xor");
+ assert(FALSE);
+}
--- /dev/null
+/*
+
+ mp_tfm.h
+
+ Author: Pekka Riikonen <priikone@silcnet.org>
+
+ Copyright (C) 2005 Pekka Riikonen
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+*/
+
+#ifndef MP_TFM_H
+#define MP_TFM_H
+
+#include "tfm.h"
+
+#define SILC_MP_INT fp_int
+
+#endif /* MP_TFM_H */
/*
- mp_mpi.c
+ mp_tma.c
Author: Pekka Riikonen <priikone@silcnet.org>
- Copyright (C) 2001 Pekka Riikonen
+ Copyright (C) 2005 Pekka Riikonen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
/* $Id$ */
#include "silcincludes.h"
-#include "mpi.h"
-#include "mplogic.h"
+#include "mp_tma.h"
void silc_mp_init(SilcMPInt *mp)
{
void silc_mp_uninit(SilcMPInt *mp)
{
- (void)mp_clear(mp);
+ mp_clear(mp);
}
size_t silc_mp_size(SilcMPInt *mp)
{
- return mp_raw_size(mp);
+ return mp_unsigned_bin_size(mp);
}
size_t silc_mp_sizeinbase(SilcMPInt *mp, int base)
{
- size_t sib = mp_unsigned_octet_size(mp);
- sib = s_mp_outlen(sib * CHAR_BIT, base);
- if (sib > 2)
- sib -= 2; /* Looks like MPI returns extra zero
- bytes for C-strings. */
- return sib;
+ int size = 0;
+ mp_radix_size(mp, base, &size);
+ if (size > 1)
+ size--;
+ return size;
}
void silc_mp_set(SilcMPInt *dst, SilcMPInt *src)
void silc_mp_set_ui(SilcMPInt *dst, SilcUInt32 ui)
{
- mp_set(dst, ui);
+ (void)mp_set_int(dst, ui);
}
void silc_mp_set_si(SilcMPInt *dst, SilcInt32 si)
void silc_mp_set_str(SilcMPInt *dst, const char *str, int base)
{
- (void)mp_read_variable_radix(dst, str, base);
+ (void)mp_read_radix(dst, str, base);
}
SilcUInt32 silc_mp_get_ui(SilcMPInt *mp)
{
- return (SilcUInt32)MP_DIGIT(mp, 0);
+ return (SilcUInt32)mp_get_int(mp);
}
char *silc_mp_get_str(char *str, SilcMPInt *mp, int base)
void silc_mp_add_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui)
{
- mp_add_d(mp1, ui, dst);
+ mp_add_d(mp1, (mp_digit)ui, dst);
}
void silc_mp_sub(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
void silc_mp_mul_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp)
{
- SilcMPInt tmp;
- silc_mp_init(&tmp);
- (void)mp_2expt(&tmp, (mp_digit)exp);
- (void)mp_mul(mp1, &tmp, dst);
- silc_mp_uninit(&tmp);
+ (void)mp_mul_2d(mp1, exp, dst);
}
void silc_mp_sqrt(SilcMPInt *dst, SilcMPInt *src)
(void)mp_div_d(mp1, (mp_digit)ui, dst, NULL);
}
-void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
+void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
SilcMPInt *mp2)
{
(void)mp_div(mp1, mp2, q, r);
void silc_mp_div_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp)
{
- SilcMPInt tmp;
- silc_mp_init(&tmp);
- (void)mp_2expt(&tmp, (mp_digit)exp);
- (void)mp_div(mp1, &tmp, dst, NULL);
- silc_mp_uninit(&tmp);
+ (void)mp_div_2d(mp1, exp, dst, NULL);
}
-void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
+void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
SilcUInt32 exp)
{
- if (q) {
- (void)mp_2expt(q, (mp_digit)exp);
- (void)mp_div(mp1, q, q, r);
- }
+ (void)mp_div_2d(mp1, exp, q, r);
}
void silc_mp_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
void silc_mp_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui)
{
- mp_digit uidst;
- (void)mp_mod_d(mp1, (mp_digit)ui, &uidst);
- mp_set(dst, uidst);
+ mp_digit d;
+ (void)mp_mod_d(mp1, ui, &d);
+ silc_mp_set_ui(dst, d);
}
void silc_mp_mod_2exp(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 ui)
{
- SilcMPInt tmp;
- silc_mp_init(&tmp);
- (void)mp_2expt(&tmp, (mp_digit)ui);
- (void)mp_mod(mp1, &tmp, dst);
- silc_mp_uninit(&tmp);
+ (void)mp_mod_2d(mp1, ui, dst);
}
void silc_mp_pow(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp)
{
- (void)mp_expt(mp1, exp, dst);
+ SILC_NOT_IMPLEMENTED("silc_mp_pow");
+ assert(FALSE);
}
void silc_mp_pow_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp)
(void)mp_expt_d(mp1, (mp_digit)exp, dst);
}
-void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp,
+void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp,
SilcMPInt *mod)
{
(void)mp_exptmod(mp1, exp, mod, dst);
}
-void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp,
+void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp,
SilcMPInt *mod)
{
- (void)mp_exptmod_d(mp1, (mp_digit)exp, mod, dst);
+ SilcMPInt tmp;
+ silc_mp_init(&tmp);
+ silc_mp_set_ui(&tmp, exp);
+ silc_mp_pow_mod(dst, mp1, &tmp, mod);
+ silc_mp_uninit(&tmp);
}
void silc_mp_gcd(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
void silc_mp_gcdext(SilcMPInt *g, SilcMPInt *s, SilcMPInt *t, SilcMPInt *mp1,
SilcMPInt *mp2)
{
- (void)mp_xgcd(mp1, mp2, g, s, t);
+ (void)mp_exteuclid(mp1, mp2, s, t, g);
}
int silc_mp_cmp(SilcMPInt *mp1, SilcMPInt *mp2)
int silc_mp_cmp_si(SilcMPInt *mp1, SilcInt32 si)
{
- return mp_cmp_int(mp1, (long)si);
+ return mp_cmp_d(mp1, si);
}
int silc_mp_cmp_ui(SilcMPInt *mp1, SilcUInt32 ui)
void silc_mp_abs(SilcMPInt *dst, SilcMPInt *src)
{
- mp_abs(src, dst);
+ (void)mp_abs(src, dst);
}
void silc_mp_neg(SilcMPInt *dst, SilcMPInt *src)
{
- mp_neg(src, dst);
+ (void)mp_neg(src, dst);
}
void silc_mp_and(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
{
- mpl_and(mp1, mp2, dst);
+ (void)mp_and(mp1, mp2, dst);
}
void silc_mp_or(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
{
- mpl_or(mp1, mp2, dst);
+ (void)mp_or(mp1, mp2, dst);
}
void silc_mp_xor(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *mp2)
{
- mpl_xor(mp1, mp2, dst);
+ (void)mp_xor(mp1, mp2, dst);
}
/*
- mp_mpi.h
+ mp_tma.h
- Author: Pekka Riikonen <priikone@poseidon.pspt.fi>
+ Author: Pekka Riikonen <priikone@silcnet.org>
- Copyright (C) 2001 Pekka Riikonen
+ Copyright (C) 2005 Pekka Riikonen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
+ the Free Software Foundation; version 2 of the License.
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
*/
-#ifndef MP_MPI_H
-#define MP_MPI_H
+#ifndef MP_TMA_H
+#define MP_TMA_H
-#include "mpi.h"
-#include "mplogic.h"
+#include "tma.h"
#define SILC_MP_INT mp_int
mpbin.c
- Author: Pekka Riikonen <priikone@poseidon.pspt.fi>
+ Author: Pekka Riikonen <priikone@silcnet.org>
- Copyright (C) 2000 - 2001 Pekka Riikonen
+ Copyright (C) 2000 - 2005 Pekka Riikonen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
+ the Free Software Foundation; version 2 of the License.
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
size = (len ? len : ((silc_mp_sizeinbase(val, 2) + 7) / 8));
ret = silc_calloc(size, sizeof(*ret));
-
+
silc_mp_init(&tmp);
silc_mp_set(&tmp, val);
/*
silcmp.h
-
+
Author: Pekka Riikonen <priikone@silcnet.org>
-
- Copyright (C) 1997 - 2001 Pekka Riikonen
-
+
+ Copyright (C) 1997 - 2005 Pekka Riikonen
+
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
+ the Free Software Foundation; version 2 of the License.
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
*/
/****h* silcmath/SILC MP Interface
* DESCRIPTION
*
* SILC MP Library Interface. This interface defines the arbitrary
- * precision arithmetic routines for SILC. Currently the actual routines
- * are implemented separately, usually by some other MP library. The
- * interface is generic but is mainly intended for crypto usage. This
- * interface is used by SILC routines that needs big numbers, such as
- * RSA implementation, Diffie-Hellman implementation etc.
+ * precision arithmetic routines for SILC. The interface is generic but
+ * is mainly intended for crypto usage. This interface is used by SILC
+ * routines that needs big numbers, such as RSA implementation,
+ * Diffie-Hellman implementation etc.
*
***/
#if defined(SILC_MP_GMP)
#include "mp_gmp.h" /* SILC_MP_GMP */
#else
-#include "mp_mpi.h" /* SILC_MP_NSS_MPI */
+#ifdef SILC_DIST_TMA
+#include "mp_tma.h"
+#endif /* SILC_DIST_TMA */
+#ifdef SILC_DIST_TFM
+#include "mp_tfm.h"
+#endif /* SILC_DIST_TFM */
#endif
/****d* silcmath/SilcMPAPI/SilcMPInt
*
* DESCRIPTION
*
- * Multiply integers `mp1' with 2 ** `exp' and save the result to
+ * Multiply integers `mp1' with 2 ** `exp' and save the result to
* `dst'. This is equivalent to dst = mp1 * (2 ^ exp).
*
***/
*
* SYNOPSIS
*
- * void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
+ * void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
* SilcMPInt *mp2);
*
* DESCRIPTION
*
* Divide the `mp1' and `mp2' and save the quotient to the `q' and
- * the remainder to the `r'. This is equivalent to the q = mp1 / mp2,
+ * the remainder to the `r'. This is equivalent to the q = mp1 / mp2,
* r = mp1 mod mp2 (or mp1 = mp2 * q + r). If the `q' or `r' is NULL
* then the operation is omitted.
*
***/
-void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
+void silc_mp_div_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
SilcMPInt *mp2);
/****f* silcmath/SilcMPAPI/silc_mp_div_2exp
*
* SYNOPSIS
*
- * void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
+ * void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
* SilcUInt32 exp);
*
* DESCRIPTION
* is omitted.
*
***/
-void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
+void silc_mp_div_2exp_qr(SilcMPInt *q, SilcMPInt *r, SilcMPInt *mp1,
SilcUInt32 exp);
/****f* silcmath/SilcMPAPI/silc_mp_mod
*
* DESCRIPTION
*
- * Mathematical MOD function. Produces the remainder of `mp1' and
+ * Mathematical MOD function. Produces the remainder of `mp1' and
* unsigned word `ui' and saves the result to `dst'. This is equivalent
* to dst = mp1 mod ui.
*
*
* SYNOPSIS
*
- * void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp,
+ * void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp,
* SilcMPInt *mod);
*
* DESCRIPTION
* This is equivalent to dst = (mp1 ^ exp) mod mod.
*
***/
-void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp,
+void silc_mp_pow_mod(SilcMPInt *dst, SilcMPInt *mp1, SilcMPInt *exp,
SilcMPInt *mod);
/****f* silcmath/SilcMPAPI/silc_mp_pow_mod_ui
*
* SYNOPSIS
*
- * void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp,
+ * void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp,
* SilcMPInt *mod);
*
* DESCRIPTION
* This is equivalent to dst = (mp1 ^ exp) mod mod.
*
***/
-void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp,
+void silc_mp_pow_mod_ui(SilcMPInt *dst, SilcMPInt *mp1, SilcUInt32 exp,
SilcMPInt *mod);
/****f* silcmath/SilcMPAPI/silc_mp_modinv
*
* DESCRIPTION
*
- * Find multiplicative inverse using Euclid's extended algorithm.
- * Computes inverse such that a * inv mod n = 1, where 0 < a < n.
+ * Find multiplicative inverse using Euclid's extended algorithm.
+ * Computes inverse such that a * inv mod n = 1, where 0 < a < n.
* Algorithm goes like this:
- *
+ *
* g(0) = n v(0) = 0
* g(1) = a v(1) = 1
- *
+ *
* y = g(i-1) / g(i)
* g(i+1) = g(i-1) - y * g(i) = g(i)-1 mod g(i)
* v(i+1) = v(i-1) - y * v(i)
- *
- * do until g(i) = 0, then inverse = v(i-1). If inverse is negative then n,
- * is added to inverse making it positive again. (Sometimes the algorithm
- * has a variable u defined too and it behaves just like v, except that
- * initalize values are swapped (i.e. u(0) = 1, u(1) = 0). However, u is
+ *
+ * do until g(i) = 0, then inverse = v(i-1). If inverse is negative then n,
+ * is added to inverse making it positive again. (Sometimes the algorithm
+ * has a variable u defined too and it behaves just like v, except that
+ * initalize values are swapped (i.e. u(0) = 1, u(1) = 0). However, u is
* not needed by the algorithm so it does not have to be included.)
*
***/
*
* SYNOPSIS
*
- * void silc_mp_gcdext(SilcMPInt *g, SilcMPInt *s, SilcMPInt *t,
+ * void silc_mp_gcdext(SilcMPInt *g, SilcMPInt *s, SilcMPInt *t,
* SilcMPInt *mp1, SilcMPInt *mp2);
*
* DESCRIPTION
*
* DESCRIPTION
*
- * Compare `mp1' and unsigned word `ui'. Returns posivite, zero, or
- * negative if `mp1' > `ui', `mp1' == `ui', or `mp1' < `ui',
+ * Compare `mp1' and unsigned word `ui'. Returns posivite, zero, or
+ * negative if `mp1' > `ui', `mp1' == `ui', or `mp1' < `ui',
* respectively.
*
***/
*
* SYNOPSIS
*
- * unsigned char *silc_mp_mp2bin(SilcMPInt *val, SilcUInt32 len,
+ * unsigned char *silc_mp_mp2bin(SilcMPInt *val, SilcUInt32 len,
* SilcUInt32 *ret_len);
*
* DESCRIPTION
* buffer is allocated that large. If zero then the size is approximated.
*
***/
-unsigned char *silc_mp_mp2bin(SilcMPInt *val, SilcUInt32 len,
+unsigned char *silc_mp_mp2bin(SilcMPInt *val, SilcUInt32 len,
SilcUInt32 *ret_len);
/****f* silcmath/SilcMPAPI/silc_mp_mp2bin_noalloc
*
* SYNOPSIS
*
- * void silc_mp_bin2mp(unsigned char *data, SilcUInt32 len,
+ * void silc_mp_bin2mp(unsigned char *data, SilcUInt32 len,
* SilcMPInt *ret);
*
* DESCRIPTION
/*
silcprimegen.c
-
- Author: Pekka Riikonen <priikone@poseidon.pspt.fi>
- Copyright (C) 1997 - 2000 Pekka Riikonen
+ Author: Pekka Riikonen <priikone@silcnet.org>
+
+ Copyright (C) 1997 - 2005 Pekka Riikonen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
+ the Free Software Foundation; version 2 of the License.
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#include "silcincludes.h"
-/*
- Fixed primetable for small prime division. We use this primetable to
- test if possible prime is divisible any of these. Primetable is NULL
- terminated. This same primetable can be found in SSH and PGP except that
- SSH don't have prime 2 in the table. This sort of prime table is easily
+/*
+ Fixed primetable for small prime division. We use this primetable to
+ test if possible prime is divisible any of these. Primetable is NULL
+ terminated. This same primetable can be found in SSH and PGP except that
+ SSH don't have prime 2 in the table. This sort of prime table is easily
created, for example, using sieve of Erastosthenes.
-
- Just to include; if somebody is interested about Erastosthenes. Creating a
- small prime table using sieve of Erastosthenes would go like this: Write
+
+ Just to include; if somebody is interested about Erastosthenes. Creating a
+ small prime table using sieve of Erastosthenes would go like this: Write
down a list of integers in range of 2 to n. Here in example n = 20.
-
+
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
Then mark all multiples of 2 (Note: 2 is a prime number):
x x x x x x x x x x x
And continue doing this, marking all multiples of the next unmarked
- number until there are now new unmarked numbers. And there you have a
+ number until there are now new unmarked numbers. And there you have a
prime table. So in this example, primes between 2 - 20 are:
-
+
2 3 5 7 11 13 17 19
*/
};
-/* Find appropriate prime. It generates a number by taking random bytes.
- It then tests the number that it's not divisible by any of the small
- primes and then it performs Fermat's prime test. I thank Rieks Joosten
- (r.joosten@pijnenburg.nl) for such a good help with prime tests.
+/* Find appropriate prime. It generates a number by taking random bytes.
+ It then tests the number that it's not divisible by any of the small
+ primes and then it performs Fermat's prime test. I thank Rieks Joosten
+ (r.joosten@pijnenburg.nl) for such a good help with prime tests.
If argument verbose is TRUE this will display some status information
about the progress of generation. */
SilcUInt32 i, b, k;
SilcUInt32 *spmods;
SilcMPInt r, base, tmp, tmp2, oprime;
+ bool valid = FALSE;
silc_mp_init(&r);
silc_mp_init(&base);
SILC_LOG_DEBUG(("Generating new prime"));
- /* Get random number and assure that the first digit is not zero since
- our conversion routines does not like the first digit being zero. */
- do {
- if (numbuf) {
- memset(numbuf, 0, (bits / 8));
- silc_free(numbuf);
- }
+ while (valid == FALSE) {
+ /* Get random number */
if (rng)
- numbuf = silc_rng_get_rn_string(rng, (bits / 8));
+ numbuf = silc_rng_get_rn_data(rng, (bits / 8));
else
- numbuf = silc_rng_global_get_rn_string((bits / 8));
+ numbuf = silc_rng_global_get_rn_data((bits / 8));
if (!numbuf)
return FALSE;
- } while (numbuf[0] == '0');
-
- /* Convert into MP and set the size */
- silc_mp_set_str(prime, numbuf, 16);
- silc_mp_mod_2exp(prime, prime, bits);
-
- /* Empty buffer */
- memset(numbuf, 0, (bits / 8));
- silc_free(numbuf);
- /* Number could be even number, so we'll make it odd. */
- silc_mp_set_ui(&tmp, 1);
- silc_mp_or(prime, prime, &tmp); /* OR operator */
+ /* Convert into MP and set the size */
+ silc_mp_bin2mp(numbuf, (bits / 8), prime);
+ silc_mp_mod_2exp(prime, prime, bits);
+
+ /* Empty buffer */
+ memset(numbuf, 0, (bits / 8));
+ silc_free(numbuf);
+
+ /* Set highest bit */
+ silc_mp_set_ui(&tmp, 1);
+ silc_mp_mul_2exp(&tmp, &tmp, bits - 1);
+ silc_mp_or(prime, prime, &tmp);
+
+ /* Number could be even number, so we'll make it odd. */
+ silc_mp_set_ui(&tmp, 1);
+ silc_mp_or(prime, prime, &tmp);
+
+ /* Init modulo table with the prime candidate and the primes
+ in the primetable. */
+ spmods = silc_calloc(1, sizeof(primetable) * sizeof(SilcUInt32));
+ for (i = 0; primetable[i] != 0; i++) {
+ silc_mp_mod_ui(&tmp, prime, primetable[i]);
+ spmods[i] = silc_mp_get_ui(&tmp);
+ }
- /* Init modulo table with the prime candidate and the primes
- in the primetable. */
- spmods = silc_calloc(1, sizeof(primetable) * sizeof(SilcUInt32));
- for (i = 0; primetable[i] != 0; i++) {
- silc_mp_mod_ui(&tmp, prime, primetable[i]);
- spmods[i] = silc_mp_get_ui(&tmp);
- }
+ /* k is added by 2, this way we skip all even numbers (prime is odd). */
+ silc_mp_set(&oprime, prime);
+ for (k = 0;; k += 2) {
+ silc_mp_add_ui(&oprime, prime, k);
+
+ /* See if the prime has a divisor in primetable[].
+ * If it has then it's a composite. We add k to the
+ * original modulo value, k is added by 2 on every roll.
+ * This way we don't have to re-init the whole table if
+ * the number is composite.
+ */
+ for (b = 0; b < i; b++) {
+ silc_mp_set_ui(&tmp2, spmods[b]);
+ silc_mp_add_ui(&tmp2, &tmp2, k);
+ silc_mp_mod_ui(&tmp, &tmp2, primetable[b]);
+
+ /* If mod is 0, the number is composite */
+ if (silc_mp_cmp_ui(&tmp, 0) == 0)
+ break;
+ }
+ if (b < i)
+ continue;
+
+ /* Passed the quick test. */
+
+ /* Does the prime pass the Fermat's prime test.
+ * r = 2 ^ p mod p, if r == 2, then p is probably a prime.
+ */
+ silc_mp_pow_mod(&r, &base, &oprime, &oprime);
+ if (silc_mp_cmp_ui(&r, 2) != 0) {
+ if (verbose) {
+ printf(".");
+ fflush(stdout);
+ }
+ continue;
+ }
- /* k is added by 2, this way we skip all even numbers (prime is odd). */
- silc_mp_set(&oprime, prime);
- for (k = 0;; k += 2) {
- silc_mp_add_ui(&oprime, prime, k);
-
- /* See if the prime has a divisor in primetable[].
- * If it has then it's a composite. We add k to the
- * original modulo value, k is added by 2 on every roll.
- * This way we don't have to re-init the whole table if
- * the number is composite.
- */
- for (b = 0; b < i; b++) {
- silc_mp_set_ui(&tmp2, spmods[b]);
- silc_mp_add_ui(&tmp2, &tmp2, k);
- silc_mp_mod_ui(&tmp, &tmp2, primetable[b]);
-
- /* If mod is 0, the number is composite */
- if (silc_mp_cmp_ui(&tmp, 0) == 0)
- break;
+ /* Passed the Fermat's test. And I don't think
+ * we have to do more tests. If anyone wants to do more
+ * tests, MP library has probability of prime test:
+ *
+ * if (silc_mp_probab_prime_p(prime, 25))
+ * break; found a probable prime
+ *
+ * However, this is very slow.
+ */
+ /* XXX: this could be done if some argument, say strict_primes, is
+ TRUE when we are willing to spend more time on the prime test and
+ to get, perhaps, better primes. */
+ silc_mp_set(prime, &oprime);
+ break;
}
- if (b < i)
- continue;
-
- /* Passed the quick test. */
-
- /* Does the prime pass the Fermat's prime test.
- * r = 2 ^ p mod p, if r == 2, then p is probably a prime.
- */
- silc_mp_pow_mod(&r, &base, &oprime, &oprime);
- if (silc_mp_cmp_ui(&r, 2) != 0) {
- if (verbose) {
- printf(".");
- fflush(stdout);
- }
- continue;
+
+ /* Check highest bit */
+ silc_mp_div_2exp(&tmp, prime, bits - 1);
+ if (silc_mp_get_ui(&tmp) == 1) {
+ valid = TRUE;
+ break;
}
-
- /* Passed the Fermat's test. And I don't think
- * we have to do more tests. If anyone wants to do more
- * tests, MP library has probability of prime test:
- *
- * if (silc_mp_probab_prime_p(prime, 25))
- * break; found a probable prime
- *
- * However, this is very slow.
- */
- /* XXX: this could be done if some argument, say strict_primes, is
- TRUE when we are willing to spend more time on the prime test and
- to get, perhaps, better primes. */
- silc_mp_set(prime, &oprime);
- break;
}
silc_free(spmods);
silc_mp_uninit(&tmp2);
silc_mp_uninit(&oprime);
- return TRUE;
+ return valid;
}
-/* Performs primality testings for given number. Returns TRUE if the
+/* Performs primality testings for given number. Returns TRUE if the
number is probably a prime. */
bool silc_math_prime_test(SilcMPInt *p)
{
SilcMPInt r, base, tmp;
int i, ret = 0;
-
+
silc_mp_init(&r);
silc_mp_init(&tmp);
silc_mp_init(&base);
silc_mp_set_ui(&base, 2);
-
+
SILC_LOG_DEBUG(("Testing probability of prime"));
/* See if the number is divisible by any of the
small primes in primetable[]. */
for (i = 0; primetable[i] != 0; i++) {
silc_mp_mod_ui(&tmp, p, primetable[i]);
-
+
/* If mod is 0, the number is composite */
if (silc_mp_cmp_ui(&tmp, 0) == 0)
ret = -1;
}
-
+
/* Does the prime pass the Fermat's prime test.
* r = 2 ^ p mod p, if r == 2, then p is probably a prime.
*/
silc_mp_uninit(&r);
silc_mp_uninit(&tmp);
silc_mp_uninit(&base);
-
+
if (ret)
return FALSE;
--- /dev/null
+/* Start: fp_2expt.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* computes a = 2**b */
+void fp_2expt(fp_int *a, int b)
+{
+ int z;
+
+ /* zero a as per default */
+ fp_zero (a);
+
+ if (b < 0) {
+ return;
+ }
+
+ z = b / DIGIT_BIT;
+ if (z >= FP_SIZE) {
+ return;
+ }
+
+ /* set the used count of where the bit will go */
+ a->used = z + 1;
+
+ /* put the single bit in its place */
+ a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
+}
+
+
+/* End: fp_2expt.c */
+
+/* Start: fp_add.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+void fp_add(fp_int *a, fp_int *b, fp_int *c)
+{
+ int sa, sb;
+
+ /* get sign of both inputs */
+ sa = a->sign;
+ sb = b->sign;
+
+ /* handle two cases, not four */
+ if (sa == sb) {
+ /* both positive or both negative */
+ /* add their magnitudes, copy the sign */
+ c->sign = sa;
+ s_fp_add (a, b, c);
+ } else {
+ /* one positive, the other negative */
+ /* subtract the one with the greater magnitude from */
+ /* the one of the lesser magnitude. The result gets */
+ /* the sign of the one with the greater magnitude. */
+ if (fp_cmp_mag (a, b) == FP_LT) {
+ c->sign = sb;
+ s_fp_sub (b, a, c);
+ } else {
+ c->sign = sa;
+ s_fp_sub (a, b, c);
+ }
+ }
+}
+
+/* End: fp_add.c */
+
+/* Start: fp_add_d.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a + b */
+void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
+{
+ fp_int tmp;
+ fp_set(&tmp, b);
+ fp_add(a,&tmp,c);
+}
+
+/* End: fp_add_d.c */
+
+/* Start: fp_addmod.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* d = a + b (mod c) */
+int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
+{
+ fp_int tmp;
+ fp_zero(&tmp);
+ fp_add(a, b, &tmp);
+ return fp_mod(&tmp, c, d);
+}
+
+/* End: fp_addmod.c */
+
+/* Start: fp_cmp.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+int fp_cmp(fp_int *a, fp_int *b)
+{
+ if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
+ return FP_LT;
+ } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
+ return FP_GT;
+ } else {
+ /* compare digits */
+ if (a->sign == FP_NEG) {
+ /* if negative compare opposite direction */
+ return fp_cmp_mag(b, a);
+ } else {
+ return fp_cmp_mag(a, b);
+ }
+ }
+}
+
+/* End: fp_cmp.c */
+
+/* Start: fp_cmp_d.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* compare against a single digit */
+int fp_cmp_d(fp_int *a, fp_digit b)
+{
+ /* compare based on sign */
+ if ((b && a->used == 0) || a->sign == FP_NEG) {
+ return FP_LT;
+ }
+
+ /* compare based on magnitude */
+ if (a->used > 1) {
+ return FP_GT;
+ }
+
+ /* compare the only digit of a to b */
+ if (a->dp[0] > b) {
+ return FP_GT;
+ } else if (a->dp[0] < b) {
+ return FP_LT;
+ } else {
+ return FP_EQ;
+ }
+
+}
+
+/* End: fp_cmp_d.c */
+
+/* Start: fp_cmp_mag.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+int fp_cmp_mag(fp_int *a, fp_int *b)
+{
+ int x;
+
+ if (a->used > b->used) {
+ return FP_GT;
+ } else if (a->used < b->used) {
+ return FP_LT;
+ } else {
+ for (x = a->used - 1; x >= 0; x--) {
+ if (a->dp[x] > b->dp[x]) {
+ return FP_GT;
+ } else if (a->dp[x] < b->dp[x]) {
+ return FP_LT;
+ }
+ }
+ }
+ return FP_EQ;
+}
+
+
+/* End: fp_cmp_mag.c */
+
+/* Start: fp_cnt_lsb.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+static const int lnz[16] = {
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+/* Counts the number of lsbs which are zero before the first zero bit */
+int fp_cnt_lsb(fp_int *a)
+{
+ int x;
+ fp_digit q, qq;
+
+ /* easy out */
+ if (fp_iszero(a) == 1) {
+ return 0;
+ }
+
+ /* scan lower digits until non-zero */
+ for (x = 0; x < a->used && a->dp[x] == 0; x++);
+ q = a->dp[x];
+ x *= DIGIT_BIT;
+
+ /* now scan this digit until a 1 is found */
+ if ((q & 1) == 0) {
+ do {
+ qq = q & 15;
+ x += lnz[qq];
+ q >>= 4;
+ } while (qq == 0);
+ }
+ return x;
+}
+
+
+/* End: fp_cnt_lsb.c */
+
+/* Start: fp_count_bits.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+int fp_count_bits (fp_int * a)
+{
+ int r;
+ fp_digit q;
+
+ /* shortcut */
+ if (a->used == 0) {
+ return 0;
+ }
+
+ /* get number of digits and add that */
+ r = (a->used - 1) * DIGIT_BIT;
+
+ /* take the last digit and count the bits in it */
+ q = a->dp[a->used - 1];
+ while (q > ((fp_digit) 0)) {
+ ++r;
+ q >>= ((fp_digit) 1);
+ }
+ return r;
+}
+
+/* End: fp_count_bits.c */
+
+/* Start: fp_div.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* a/b => cb + d == a */
+int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
+{
+ fp_int q, x, y, t1, t2;
+ int n, t, i, norm, neg;
+
+ /* is divisor zero ? */
+ if (fp_iszero (b) == 1) {
+ return FP_VAL;
+ }
+
+ /* if a < b then q=0, r = a */
+ if (fp_cmp_mag (a, b) == FP_LT) {
+ if (d != NULL) {
+ fp_copy (a, d);
+ }
+ if (c != NULL) {
+ fp_zero (c);
+ }
+ return FP_OKAY;
+ }
+
+ fp_init(&q);
+ q.used = a->used + 2;
+
+ fp_init(&t1);
+ fp_init(&t2);
+ fp_init_copy(&x, a);
+ fp_init_copy(&y, b);
+
+ /* fix the sign */
+ neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
+ x.sign = y.sign = FP_ZPOS;
+
+ /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
+ norm = fp_count_bits(&y) % DIGIT_BIT;
+ if (norm < (int)(DIGIT_BIT-1)) {
+ norm = (DIGIT_BIT-1) - norm;
+ fp_mul_2d (&x, norm, &x);
+ fp_mul_2d (&y, norm, &y);
+ } else {
+ norm = 0;
+ }
+
+ /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
+ n = x.used - 1;
+ t = y.used - 1;
+
+ /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
+ fp_lshd (&y, n - t); /* y = y*b**{n-t} */
+
+ while (fp_cmp (&x, &y) != FP_LT) {
+ ++(q.dp[n - t]);
+ fp_sub (&x, &y, &x);
+ }
+
+ /* reset y by shifting it back down */
+ fp_rshd (&y, n - t);
+
+ /* step 3. for i from n down to (t + 1) */
+ for (i = n; i >= (t + 1); i--) {
+ if (i > x.used) {
+ continue;
+ }
+
+ /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
+ * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
+ if (x.dp[i] == y.dp[t]) {
+ q.dp[i - t - 1] = ((((fp_word)1) << DIGIT_BIT) - 1);
+ } else {
+ fp_word tmp;
+ tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
+ tmp |= ((fp_word) x.dp[i - 1]);
+ tmp /= ((fp_word) y.dp[t]);
+ q.dp[i - t - 1] = (fp_digit) (tmp);
+ }
+
+ /* while (q{i-t-1} * (yt * b + y{t-1})) >
+ xi * b**2 + xi-1 * b + xi-2
+
+ do q{i-t-1} -= 1;
+ */
+ q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
+ do {
+ q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
+
+ /* find left hand */
+ fp_zero (&t1);
+ t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
+ t1.dp[1] = y.dp[t];
+ t1.used = 2;
+ fp_mul_d (&t1, q.dp[i - t - 1], &t1);
+
+ /* find right hand */
+ t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
+ t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
+ t2.dp[2] = x.dp[i];
+ t2.used = 3;
+ } while (fp_cmp_mag(&t1, &t2) == FP_GT);
+
+ /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
+ fp_mul_d (&y, q.dp[i - t - 1], &t1);
+ fp_lshd (&t1, i - t - 1);
+ fp_sub (&x, &t1, &x);
+
+ /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
+ if (x.sign == FP_NEG) {
+ fp_copy (&y, &t1);
+ fp_lshd (&t1, i - t - 1);
+ fp_add (&x, &t1, &x);
+ q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
+ }
+ }
+
+ /* now q is the quotient and x is the remainder
+ * [which we have to normalize]
+ */
+
+ /* get sign before writing to c */
+ x.sign = x.used == 0 ? FP_ZPOS : a->sign;
+
+ if (c != NULL) {
+ fp_clamp (&q);
+ fp_copy (&q, c);
+ c->sign = neg;
+ }
+
+ if (d != NULL) {
+ fp_div_2d (&x, norm, &x, NULL);
+
+/* the following is a kludge, essentially we were seeing the right remainder but
+ with excess digits that should have been zero
+ */
+ for (i = b->used; i < x.used; i++) {
+ x.dp[i] = 0;
+ }
+ fp_clamp(&x);
+ fp_copy (&x, d);
+ }
+
+ return FP_OKAY;
+}
+
+/* End: fp_div.c */
+
+/* Start: fp_div_2.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* b = a/2 */
+void fp_div_2(fp_int * a, fp_int * b)
+{
+ int x, oldused;
+
+ oldused = b->used;
+ b->used = a->used;
+ {
+ register fp_digit r, rr, *tmpa, *tmpb;
+
+ /* source alias */
+ tmpa = a->dp + b->used - 1;
+
+ /* dest alias */
+ tmpb = b->dp + b->used - 1;
+
+ /* carry */
+ r = 0;
+ for (x = b->used - 1; x >= 0; x--) {
+ /* get the carry for the next iteration */
+ rr = *tmpa & 1;
+
+ /* shift the current digit, add in carry and store */
+ *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
+
+ /* forward carry to next iteration */
+ r = rr;
+ }
+
+ /* zero excess digits */
+ tmpb = b->dp + b->used;
+ for (x = b->used; x < oldused; x++) {
+ *tmpb++ = 0;
+ }
+ }
+ b->sign = a->sign;
+ fp_clamp (b);
+}
+
+/* End: fp_div_2.c */
+
+/* Start: fp_div_2d.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a / 2**b */
+void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
+{
+ fp_digit D, r, rr;
+ int x;
+ fp_int t;
+
+ /* if the shift count is <= 0 then we do no work */
+ if (b <= 0) {
+ fp_copy (a, c);
+ if (d != NULL) {
+ fp_zero (d);
+ }
+ return;
+ }
+
+ fp_init(&t);
+
+ /* get the remainder */
+ if (d != NULL) {
+ fp_mod_2d (a, b, &t);
+ }
+
+ /* copy */
+ fp_copy(a, c);
+
+ /* shift by as many digits in the bit count */
+ if (b >= (int)DIGIT_BIT) {
+ fp_rshd (c, b / DIGIT_BIT);
+ }
+
+ /* shift any bit count < DIGIT_BIT */
+ D = (fp_digit) (b % DIGIT_BIT);
+ if (D != 0) {
+ register fp_digit *tmpc, mask, shift;
+
+ /* mask */
+ mask = (((fp_digit)1) << D) - 1;
+
+ /* shift for lsb */
+ shift = DIGIT_BIT - D;
+
+ /* alias */
+ tmpc = c->dp + (c->used - 1);
+
+ /* carry */
+ r = 0;
+ for (x = c->used - 1; x >= 0; x--) {
+ /* get the lower bits of this word in a temp */
+ rr = *tmpc & mask;
+
+ /* shift the current word and mix in the carry bits from the previous word */
+ *tmpc = (*tmpc >> D) | (r << shift);
+ --tmpc;
+
+ /* set the carry to the carry bits of the current word found above */
+ r = rr;
+ }
+ }
+ fp_clamp (c);
+ if (d != NULL) {
+ fp_copy (&t, d);
+ }
+}
+
+/* End: fp_div_2d.c */
+
+/* Start: fp_div_d.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+static int s_is_power_of_two(fp_digit b, int *p)
+{
+ int x;
+
+ for (x = 1; x < DIGIT_BIT; x++) {
+ if (b == (((fp_digit)1)<<x)) {
+ *p = x;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* a/b => cb + d == a */
+int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
+{
+ fp_int q;
+ fp_word w;
+ fp_digit t;
+ int ix;
+
+ /* cannot divide by zero */
+ if (b == 0) {
+ return FP_VAL;
+ }
+
+ /* quick outs */
+ if (b == 1 || fp_iszero(a) == 1) {
+ if (d != NULL) {
+ *d = 0;
+ }
+ if (c != NULL) {
+ fp_copy(a, c);
+ }
+ return FP_OKAY;
+ }
+
+ /* power of two ? */
+ if (s_is_power_of_two(b, &ix) == 1) {
+ if (d != NULL) {
+ *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
+ }
+ if (c != NULL) {
+ fp_div_2d(a, ix, c, NULL);
+ }
+ return FP_OKAY;
+ }
+
+ /* no easy answer [c'est la vie]. Just division */
+ fp_init(&q);
+
+ q.used = a->used;
+ q.sign = a->sign;
+ w = 0;
+ for (ix = a->used - 1; ix >= 0; ix--) {
+ w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
+
+ if (w >= b) {
+ t = (fp_digit)(w / b);
+ w -= ((fp_word)t) * ((fp_word)b);
+ } else {
+ t = 0;
+ }
+ q.dp[ix] = (fp_digit)t;
+ }
+
+ if (d != NULL) {
+ *d = (fp_digit)w;
+ }
+
+ if (c != NULL) {
+ fp_clamp(&q);
+ fp_copy(&q, c);
+ }
+
+ return FP_OKAY;
+}
+
+
+/* End: fp_div_d.c */
+
+/* Start: fp_exptmod.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* y = g**x (mod b)
+ * Some restrictions... x must be positive and < b
+ */
+static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
+{
+ fp_int M[64], res;
+ fp_digit buf, mp;
+ int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
+
+ /* find window size */
+ x = fp_count_bits (X);
+ if (x <= 7) {
+ winsize = 2;
+ } else if (x <= 36) {
+ winsize = 3;
+ } else if (x <= 140) {
+ winsize = 4;
+ } else if (x <= 450) {
+ winsize = 5;
+ } else {
+ winsize = 6;
+ }
+
+ /* init M array */
+ memset(M, 0, sizeof(M));
+
+ /* now setup montgomery */
+ if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
+ return err;
+ }
+
+ /* setup result */
+ fp_init(&res);
+
+ /* create M table
+ *
+ * The M table contains powers of the input base, e.g. M[x] = G^x mod P
+ *
+ * The first half of the table is not computed though accept for M[0] and M[1]
+ */
+
+ /* now we need R mod m */
+ fp_montgomery_calc_normalization (&res, P);
+
+ /* now set M[1] to G * R mod m */
+ if (fp_cmp_mag(P, G) != FP_GT) {
+ /* G > P so we reduce it first */
+ fp_mod(G, P, &M[1]);
+ } else {
+ fp_copy(G, &M[1]);
+ }
+ fp_mulmod (&M[1], &res, P, &M[1]);
+
+ /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
+ fp_copy (&M[1], &M[1 << (winsize - 1)]);
+ for (x = 0; x < (winsize - 1); x++) {
+ fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
+ fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
+ }
+
+ /* create upper table */
+ for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
+ fp_mul(&M[x - 1], &M[1], &M[x]);
+ fp_montgomery_reduce(&M[x], P, mp);
+ }
+
+ /* set initial mode and bit cnt */
+ mode = 0;
+ bitcnt = 1;
+ buf = 0;
+ digidx = X->used - 1;
+ bitcpy = 0;
+ bitbuf = 0;
+
+ for (;;) {
+ /* grab next digit as required */
+ if (--bitcnt == 0) {
+ /* if digidx == -1 we are out of digits so break */
+ if (digidx == -1) {
+ break;
+ }
+ /* read next digit and reset bitcnt */
+ buf = X->dp[digidx--];
+ bitcnt = (int)DIGIT_BIT;
+ }
+
+ /* grab the next msb from the exponent */
+ y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
+ buf <<= (fp_digit)1;
+
+ /* if the bit is zero and mode == 0 then we ignore it
+ * These represent the leading zero bits before the first 1 bit
+ * in the exponent. Technically this opt is not required but it
+ * does lower the # of trivial squaring/reductions used
+ */
+ if (mode == 0 && y == 0) {
+ continue;
+ }
+
+ /* if the bit is zero and mode == 1 then we square */
+ if (mode == 1 && y == 0) {
+ fp_sqr(&res, &res);
+ fp_montgomery_reduce(&res, P, mp);
+ continue;
+ }
+
+ /* else we add it to the window */
+ bitbuf |= (y << (winsize - ++bitcpy));
+ mode = 2;
+
+ if (bitcpy == winsize) {
+ /* ok window is filled so square as required and multiply */
+ /* square first */
+ for (x = 0; x < winsize; x++) {
+ fp_sqr(&res, &res);
+ fp_montgomery_reduce(&res, P, mp);
+ }
+
+ /* then multiply */
+ fp_mul(&res, &M[bitbuf], &res);
+ fp_montgomery_reduce(&res, P, mp);
+
+ /* empty window and reset */
+ bitcpy = 0;
+ bitbuf = 0;
+ mode = 1;
+ }
+ }
+
+ /* if bits remain then square/multiply */
+ if (mode == 2 && bitcpy > 0) {
+ /* square then multiply if the bit is set */
+ for (x = 0; x < bitcpy; x++) {
+ fp_sqr(&res, &res);
+ fp_montgomery_reduce(&res, P, mp);
+
+ /* get next bit of the window */
+ bitbuf <<= 1;
+ if ((bitbuf & (1 << winsize)) != 0) {
+ /* then multiply */
+ fp_mul(&res, &M[1], &res);
+ fp_montgomery_reduce(&res, P, mp);
+ }
+ }
+ }
+
+ /* fixup result if Montgomery reduction is used
+ * recall that any value in a Montgomery system is
+ * actually multiplied by R mod n. So we have
+ * to reduce one more time to cancel out the factor
+ * of R.
+ */
+ fp_montgomery_reduce(&res, P, mp);
+
+ /* swap res with Y */
+ fp_copy (&res, Y);
+ return FP_OKAY;
+}
+
+
+int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
+{
+ fp_int tmp;
+ int err;
+
+ /* is X negative? */
+ if (X->sign == FP_NEG) {
+ /* yes, copy G and invmod it */
+ fp_copy(G, &tmp);
+ if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
+ return err;
+ }
+ X->sign = FP_ZPOS;
+ err = _fp_exptmod(&tmp, X, P, Y);
+ X->sign = FP_NEG;
+ return err;
+ } else {
+ /* Positive exponent so just exptmod */
+ return _fp_exptmod(G, X, P, Y);
+ }
+}
+
+/* End: fp_exptmod.c */
+
+/* Start: fp_gcd.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = (a, b) */
+void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
+{
+ fp_int u, v, r;
+
+ /* either zero than gcd is the largest */
+ if (fp_iszero (a) == 1 && fp_iszero (b) == 0) {
+ fp_abs (b, c);
+ return;
+ }
+ if (fp_iszero (a) == 0 && fp_iszero (b) == 1) {
+ fp_abs (a, c);
+ return;
+ }
+
+ /* optimized. At this point if a == 0 then
+ * b must equal zero too
+ */
+ if (fp_iszero (a) == 1) {
+ fp_zero(c);
+ return;
+ }
+
+ /* sort inputs */
+ if (fp_cmp_mag(a, b) != FP_LT) {
+ fp_init_copy(&u, a);
+ fp_init_copy(&v, b);
+ } else {
+ fp_init_copy(&u, b);
+ fp_init_copy(&v, a);
+ }
+
+ fp_zero(&r);
+ while (fp_iszero(&v) == FP_NO) {
+ fp_mod(&u, &v, &r);
+ fp_copy(&v, &u);
+ fp_copy(&r, &v);
+ }
+ fp_copy(&u, c);
+}
+
+/* End: fp_gcd.c */
+
+/* Start: fp_ident.c */
+#include "tfm.h"
+
+const char *fp_ident(void)
+{
+ static char buf[1024];
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, sizeof(buf)-1,
+"TomsFastMath (%s)\n"
+"\n"
+"Sizeofs\n"
+"\tfp_digit = %u\n"
+"\tfp_word = %u\n"
+"\n"
+"FP_MAX_SIZE = %u\n"
+"\n"
+"Defines: \n"
+#ifdef __i386__
+" __i386__ "
+#endif
+#ifdef __x86_64__
+" __x86_64__ "
+#endif
+#ifdef TFM_X86
+" TFM_X86 "
+#endif
+#ifdef TFM_X86_64
+" TFM_X86_64 "
+#endif
+#ifdef TFM_SSE2
+" TFM_SSE2 "
+#endif
+#ifdef TFM_ARM
+" TFM_ARM "
+#endif
+#ifdef TFM_NO_ASM
+" TFM_NO_ASM "
+#endif
+#ifdef FP_64BIT
+" FP_64BIT "
+#endif
+#ifdef TFM_LARGE
+" TFM_LARGE "
+#endif
+#ifdef TFM_HUGE
+" TFM_HUGE "
+#endif
+"\n", __DATE__, sizeof(fp_digit), sizeof(fp_word), FP_MAX_SIZE);
+
+ if (sizeof(fp_digit) == sizeof(fp_word)) {
+ strncat(buf, "WARNING: sizeof(fp_digit) == sizeof(fp_word), this build is likely to not work properly.\n",
+ sizeof(buf)-1);
+ }
+ return buf;
+}
+
+#ifdef STANDALONE
+
+int main(void)
+{
+ printf("%s\n", fp_ident());
+ return 0;
+}
+
+#endif
+
+
+/* End: fp_ident.c */
+
+/* Start: fp_invmod.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = 1/a (mod b) for odd b only */
+int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
+{
+ fp_int x, y, u, v, B, D;
+ int neg;
+
+ /* 2. [modified] b must be odd */
+ if (fp_iseven (b) == FP_YES) {
+ return FP_VAL;
+ }
+
+ /* init all our temps */
+ fp_init(&x); fp_init(&y);
+ fp_init(&u); fp_init(&v);
+ fp_init(&B); fp_init(&D);
+
+ /* x == modulus, y == value to invert */
+ fp_copy(b, &x);
+
+ /* we need y = |a| */
+ fp_abs(a, &y);
+
+ /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
+ fp_copy(&x, &u);
+ fp_copy(&y, &v);
+ fp_set (&D, 1);
+
+top:
+ /* 4. while u is even do */
+ while (fp_iseven (&u) == FP_YES) {
+ /* 4.1 u = u/2 */
+ fp_div_2 (&u, &u);
+
+ /* 4.2 if B is odd then */
+ if (fp_isodd (&B) == FP_YES) {
+ fp_sub (&B, &x, &B);
+ }
+ /* B = B/2 */
+ fp_div_2 (&B, &B);
+ }
+
+ /* 5. while v is even do */
+ while (fp_iseven (&v) == FP_YES) {
+ /* 5.1 v = v/2 */
+ fp_div_2 (&v, &v);
+
+ /* 5.2 if D is odd then */
+ if (fp_isodd (&D) == FP_YES) {
+ /* D = (D-x)/2 */
+ fp_sub (&D, &x, &D);
+ }
+ /* D = D/2 */
+ fp_div_2 (&D, &D);
+ }
+
+ /* 6. if u >= v then */
+ if (fp_cmp (&u, &v) != FP_LT) {
+ /* u = u - v, B = B - D */
+ fp_sub (&u, &v, &u);
+ fp_sub (&B, &D, &B);
+ } else {
+ /* v - v - u, D = D - B */
+ fp_sub (&v, &u, &v);
+ fp_sub (&D, &B, &D);
+ }
+
+ /* if not zero goto step 4 */
+ if (fp_iszero (&u) == FP_NO) {
+ goto top;
+ }
+
+ /* now a = C, b = D, gcd == g*v */
+
+ /* if v != 1 then there is no inverse */
+ if (fp_cmp_d (&v, 1) != FP_EQ) {
+ return FP_VAL;
+ }
+
+ /* b is now the inverse */
+ neg = a->sign;
+ while (D.sign == FP_NEG) {
+ fp_add (&D, b, &D);
+ }
+ fp_copy (&D, c);
+ c->sign = neg;
+ return FP_OKAY;
+}
+
+/* End: fp_invmod.c */
+
+/* Start: fp_isprime.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* a few primes */
+static const fp_digit primes[256] = {
+ 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
+ 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
+ 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
+ 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
+ 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
+ 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
+ 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
+ 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
+
+ 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
+ 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
+ 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
+ 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
+ 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
+ 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
+ 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
+ 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
+
+ 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
+ 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
+ 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
+ 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
+ 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
+ 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
+ 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
+ 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
+
+ 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
+ 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
+ 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
+ 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
+ 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
+ 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
+ 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
+ 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
+};
+
+int fp_isprime(fp_int *a)
+{
+ fp_int b;
+ fp_digit d;
+ int r, res;
+
+ /* do trial division */
+ for (r = 0; r < 256; r++) {
+ fp_mod_d(a, primes[r], &d);
+ if (d == 0) {
+ return FP_NO;
+ }
+ }
+
+ /* now do 8 miller rabins */
+ for (r = 0; r < 8; r++) {
+ fp_set(&b, primes[r]);
+ fp_prime_miller_rabin(a, &b, &res);
+ if (res == FP_NO) {
+ return FP_NO;
+ }
+ }
+ return FP_YES;
+}
+
+/* End: fp_isprime.c */
+
+/* Start: fp_lcm.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = [a, b] */
+void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
+{
+ fp_int t1, t2;
+
+ fp_init(&t1);
+ fp_init(&t2);
+ fp_gcd(a, b, &t1);
+ if (fp_cmp_mag(a, b) == FP_GT) {
+ fp_div(a, &t1, &t2, NULL);
+ fp_mul(b, &t2, c);
+ } else {
+ fp_div(b, &t1, &t2, NULL);
+ fp_mul(a, &t2, c);
+ }
+}
+
+/* End: fp_lcm.c */
+
+/* Start: fp_lshd.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+void fp_lshd(fp_int *a, int x)
+{
+ int y;
+
+ /* move up and truncate as required */
+ y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
+
+ /* store new size */
+ a->used = y + 1;
+
+ /* move digits */
+ for (; y >= x; y--) {
+ a->dp[y] = a->dp[y-x];
+ }
+
+ /* zero lower digits */
+ for (; y >= 0; y--) {
+ a->dp[y] = 0;
+ }
+
+ /* clamp digits */
+ fp_clamp(a);
+}
+
+/* End: fp_lshd.c */
+
+/* Start: fp_mod.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a mod b, 0 <= c < b */
+int fp_mod(fp_int *a, fp_int *b, fp_int *c)
+{
+ fp_int t;
+ int err;
+
+ fp_zero(&t);
+ if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
+ return err;
+ }
+ if (t.sign != b->sign) {
+ fp_add(&t, b, c);
+ } else {
+ fp_copy(&t, c);
+ }
+ return FP_OKAY;
+}
+
+
+
+/* End: fp_mod.c */
+
+/* Start: fp_mod_2d.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a mod 2**d */
+void fp_mod_2d(fp_int *a, int b, fp_int *c)
+{
+ int x;
+
+ /* zero if count less than or equal to zero */
+ if (b <= 0) {
+ fp_zero(c);
+ return;
+ }
+
+ /* get copy of input */
+ fp_copy(a, c);
+
+ /* if 2**d is larger than we just return */
+ if (b >= (DIGIT_BIT * a->used)) {
+ return;
+ }
+
+ /* zero digits above the last digit of the modulus */
+ for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
+ c->dp[x] = 0;
+ }
+ /* clear the digit that is not completely outside/inside the modulus */
+ c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
+ fp_clamp (c);
+}
+
+/* End: fp_mod_2d.c */
+
+/* Start: fp_mod_d.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a mod b, 0 <= c < b */
+int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
+{
+ return fp_div_d(a, b, NULL, c);
+}
+
+/* End: fp_mod_d.c */
+
+/* Start: fp_montgomery_calc_normalization.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* computes a = B**n mod b without division or multiplication useful for
+ * normalizing numbers in a Montgomery system.
+ */
+void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
+{
+ int x, bits;
+
+ /* how many bits of last digit does b use */
+ bits = fp_count_bits (b) % DIGIT_BIT;
+
+ /* compute A = B^(n-1) * 2^(bits-1) */
+ if (b->used > 1) {
+ fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
+ } else {
+ fp_set(a, 1);
+ bits = 1;
+ }
+
+ /* now compute C = A * B mod b */
+ for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
+ fp_mul_2 (a, a);
+ if (fp_cmp_mag (a, b) != FP_LT) {
+ s_fp_sub (a, b, a);
+ }
+ }
+}
+
+
+/* End: fp_montgomery_calc_normalization.c */
+
+/* Start: fp_montgomery_reduce.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+#if defined(TFM_X86)
+
+/* x86-32 code */
+
+#define MONT_START
+
+#define MONT_FINI
+
+#define LOOP_START \
+ mu = c[x] * mp;
+
+#define INNERMUL \
+asm( \
+"movl %7,%%eax \n\t" \
+"mull %6 \n\t" \
+"addl %%eax,%0 \n\t" \
+"adcl %%edx,%1 \n\t" \
+"adcl $0,%2 \n\t" \
+:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
+ "g"(mu), "g"(*tmpm++) \
+ : "%eax", "%edx", "%cc");
+
+#define PROPCARRY \
+asm( \
+"movl %1,%%eax \n\t" \
+"addl %%eax,%6 \n\t" \
+"movl %2,%%eax \n\t" \
+"adcl %%eax,%7 \n\t" \
+"adcl $0,%8 \n\t" \
+:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
+ "m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \
+: "%eax", "%cc");
+
+#elif defined(TFM_X86_64)
+/* x86-64 code */
+
+#define MONT_START
+
+#define MONT_FINI
+
+#define LOOP_START \
+ mu = c[x] * mp;
+
+#define INNERMUL \
+asm( \
+"movq %7,%%rax \n\t" \
+"mulq %6 \n\t" \
+"addq %%rax,%0 \n\t" \
+"adcq %%rdx,%1 \n\t" \
+"adcq $0,%2 \n\t" \
+:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
+ "g"(mu), "g"(*tmpm++) \
+ : "%rax", "%rdx", "%cc");
+
+#define PROPCARRY \
+asm( \
+"movq %1,%%rax \n\t" \
+"movq %2,%%rbx \n\t" \
+"addq %%rax,%6 \n\t" \
+"adcq %%rbx,%7 \n\t" \
+"adcq $0,%8 \n\t" \
+:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
+ "m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \
+: "%rax", "%rbx", "%cc");
+
+#elif defined(TFM_SSE2)
+
+/* SSE2 code */
+
+#define MONT_START \
+asm("movd %0,%%mm2"::"g"(mp));
+
+#define MONT_FINI \
+asm("emms");
+
+#define LOOP_START \
+asm(\
+"movd %0,%%mm1 \n\t" \
+"pmuludq %%mm2,%%mm1 \n\t" \
+:: "g"(c[x]));
+
+#define INNERMUL \
+asm( \
+"movd %6,%%mm0 \n\t" \
+"pmuludq %%mm1,%%mm0 \n\t" \
+"movd %%mm0,%%eax \n\t" \
+"psrlq $32, %%mm0 \n\t" \
+"addl %%eax,%0 \n\t" \
+"movd %%mm0,%%eax \n\t" \
+"adcl %%eax,%1 \n\t" \
+"adcl $0,%2 \n\t" \
+:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
+ "g"(*tmpm++) \
+ : "%eax", "%cc");
+
+#define PROPCARRY \
+asm( \
+"movl %1,%%eax \n\t" \
+"addl %%eax,%6 \n\t" \
+"movl %2,%%eax \n\t" \
+"adcl %%eax,%7 \n\t" \
+"adcl $0,%8 \n\t" \
+:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
+ "g"(_c[OFF0+1]), "g"(_c[OFF1+1]), "g"(_c[OFF2+1]) \
+: "%eax", "%cc");
+
+#elif defined(TFM_ARM)
+
+/* ISO C code */
+#define MONT_START
+
+#define MONT_FINI
+
+#define LOOP_START \
+ mu = c[x] * mp;
+
+/* NOTE: later write it using two regs instead of three for _c + ... */
+#define INNERMUL \
+asm( \
+"UMULL r0,r1,%0,%1 \n\t" \
+"LDR r2,[%2] \n\t" \
+"ADDS r2,r2,r0 \n\t" \
+"STR r2,[%2] \n\t" \
+"LDR r2,[%3] \n\t" \
+"ADCS r2,r2,r1 \n\t" \
+"STR r2,[%3] \n\t" \
+"LDR r2,[%4] \n\t" \
+"ADC r2,r2,#0 \n\t" \
+"STR r2,[%4] \n\t" \
+::"r"(mu),"r"(*tmpm++),"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "r2", "%cc");
+
+#define PROPCARRY \
+asm( \
+"LDR r0,[%1] \n\t" \
+"LDR r1,[%0,#4] \n\t" \
+"ADDS r0,r0,r1 \n\t" \
+"STR r0,[%0,#4] \n\t" \
+"LDR r0,[%2] \n\t" \
+"LDR r1,[%1,#4] \n\t" \
+"ADCS r0,r0,r1 \n\t" \
+"STR r0,[%1,#4] \n\t" \
+"LDR r0,[%2,#4] \n\t" \
+"ADC r0,r0,#0 \n\t" \
+"STR r0,[%2,#4] \n\t" \
+::"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "%cc");
+
+#else
+
+/* ISO C code */
+#define MONT_START
+
+#define MONT_FINI
+
+#define LOOP_START \
+ mu = c[x] * mp;
+
+#define INNERMUL \
+ do { fp_word t; \
+ t = (fp_word)_c[OFF0] + ((fp_word)mu) * ((fp_word)*tmpm++); _c[OFF0] = t; \
+ t = (fp_word)_c[OFF1] + (t >> DIGIT_BIT); _c[OFF1] = t; \
+ _c[OFF2] += (t >> DIGIT_BIT); \
+ } while (0);
+
+#define PROPCARRY \
+ do { fp_word t; \
+ t = (fp_word)_c[OFF0+1] + (fp_word)_c[OFF1]; _c[OFF0+1] = t; \
+ t = (fp_word)_c[OFF1+1] + (t >> DIGIT_BIT) + (fp_word)_c[OFF2]; _c[OFF1+1] = t; \
+ _c[OFF2+1] += (t >> DIGIT_BIT); \
+ } while (0);
+
+#endif
+
+
+#define OFF0 (0)
+#define OFF1 (FP_SIZE)
+#define OFF2 (FP_SIZE+FP_SIZE)
+
+/* computes x/R == x (mod N) via Montgomery Reduction */
+void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
+{
+ fp_digit c[3*FP_SIZE], *_c, *tmpm, mu;
+ int oldused, x, y, pa;
+
+ /* now zero the buff */
+ pa = m->used;
+ memset(c, 0, sizeof(c));
+
+ /* copy the input */
+ oldused = a->used;
+ for (x = 0; x < oldused; x++) {
+ c[x] = a->dp[x];
+ }
+
+ MONT_START;
+
+ /* now let's get bizz-sy! */
+ for (x = 0; x < pa; x++) {
+ /* get Mu for this round */
+ LOOP_START;
+
+ /* our friendly neighbourhood alias */
+ _c = c + x;
+ tmpm = m->dp;
+
+ for (y = 0; y < pa; y++) {
+ INNERMUL;
+ ++_c;
+ }
+ /* send carry up man... */
+ _c = c + x;
+ PROPCARRY;
+ }
+
+ /* fix the rest of the carries */
+ _c = c + pa;
+ for (x = pa; x < pa * 2 + 2; x++) {
+ PROPCARRY;
+ ++_c;
+ }
+
+ /* now copy out */
+ _c = c + pa;
+ tmpm = a->dp;
+ for (x = 0; x < pa+1; x++) {
+ *tmpm++ = *_c++;
+ }
+
+ for (; x < oldused; x++) {
+ *tmpm++ = 0;
+ }
+
+ MONT_FINI;
+
+ a->used = pa+1;
+ fp_clamp(a);
+
+ /* if A >= m then A = A - m */
+ if (fp_cmp_mag (a, m) != FP_LT) {
+ s_fp_sub (a, m, a);
+ }
+}
+
+/* End: fp_montgomery_reduce.c */
+
+/* Start: fp_montgomery_setup.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* setups the montgomery reduction */
+int fp_montgomery_setup(fp_int *a, fp_digit *rho)
+{
+ fp_digit x, b;
+
+/* fast inversion mod 2**k
+ *
+ * Based on the fact that
+ *
+ * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
+ * => 2*X*A - X*X*A*A = 1
+ * => 2*(1) - (1) = 1
+ */
+ b = a->dp[0];
+
+ if ((b & 1) == 0) {
+ return FP_VAL;
+ }
+
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+#ifdef FP_64BIT
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+#endif
+
+ /* rho = -1/m mod b */
+ *rho = (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
+
+ return FP_OKAY;
+}
+
+
+/* End: fp_montgomery_setup.c */
+
+/* Start: fp_mul.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a * b */
+void fp_mul(fp_int *A, fp_int *B, fp_int *C)
+{
+ int r, y, yy, s;
+ fp_int ac, bd, comp, amb, cmd, t1, t2;
+
+ y = MAX(A->used, B->used);
+ yy = MIN(A->used, B->used);
+ if (yy <= 8 || y <= 64) {
+
+ /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
+ of the largest input. We also want to avoid doing excess mults if the
+ inputs are not close to the next power of two. That is, for example,
+ if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
+ */
+ if (y <= 4) {
+ fp_mul_comba4(A,B,C);
+ } else if (y <= 8) {
+ fp_mul_comba8(A,B,C);
+#if defined(TFM_LARGE)
+ } else if (y <= 16 && y >= 10) {
+ fp_mul_comba16(A,B,C);
+#endif
+#if defined(TFM_HUGE)
+ } else if (y <= 32 && y >= 24) {
+ fp_mul_comba32(A,B,C);
+#endif
+ } else {
+ fp_mul_comba(A,B,C);
+ }
+ } else {
+ /* do the karatsuba action
+
+ if A = ab and B = cd for ||a|| = r we need to solve
+
+ ac*r^2 + (-(a-b)(c-d) + ac + bd)*r + bd
+
+ So we solve for the three products then we form the final result with careful shifting
+ and addition.
+
+Obvious points of optimization
+
+- "ac" parts can be memcpy'ed with an offset [all you have to do is zero upto the next 8 digits]
+- Similarly the "bd" parts can be memcpy'ed and zeroed to 8
+-
+
+ */
+ /* get our value of r */
+ r = yy >> 1;
+
+ /* now solve for ac */
+// fp_copy(A, &t1); fp_rshd(&t1, r);
+ for (s = 0; s < A->used - r; s++) {
+ t1.dp[s] = A->dp[s+r];
+ }
+ for (; s < FP_SIZE; s++) {
+ t1.dp[s] = 0;
+ }
+ if (A->used >= r) {
+ t1.used = A->used - r;
+ } else {
+ t1.used = 0;
+ }
+ t1.sign = A->sign;
+
+// fp_copy(B, &t2); fp_rshd(&t2, r);
+ for (s = 0; s < B->used - r; s++) {
+ t2.dp[s] = B->dp[s+r];
+ }
+ for (; s < FP_SIZE; s++) {
+ t2.dp[s] = 0;
+ }
+ if (B->used >= r) {
+ t2.used = B->used - r;
+ } else {
+ t2.used = 0;
+ }
+ t2.sign = B->sign;
+
+ fp_copy(&t1, &amb); fp_copy(&t2, &cmd);
+ fp_zero(&ac);
+ fp_mul(&t1, &t2, &ac);
+
+ /* now solve for bd */
+// fp_mod_2d(A, r * DIGIT_BIT, &t1);
+// fp_mod_2d(B, r * DIGIT_BIT, &t2);
+ for (s = 0; s < r; s++) {
+ t1.dp[s] = A->dp[s];
+ t2.dp[s] = B->dp[s];
+ }
+ for (; s < FP_SIZE; s++) {
+ t1.dp[s] = 0;
+ t2.dp[s] = 0;
+ }
+ t1.used = r;
+ t2.used = r;
+ fp_clamp(&t1);
+ fp_clamp(&t2);
+
+ fp_sub(&amb, &t1, &amb); fp_sub(&cmd, &t2, &cmd);
+ fp_zero(&bd);
+ fp_mul(&t1, &t2, &bd);
+
+ /* now get the (a-b)(c-d) term */
+ fp_zero(&comp);
+ fp_mul(&amb, &cmd, &comp);
+
+ /* now solve the system, do the middle term first */
+ comp.sign ^= 1;
+ fp_add(&comp, &ac, &comp);
+ fp_add(&comp, &bd, &comp);
+ fp_lshd(&comp, r);
+
+ /* leading term */
+ fp_lshd(&ac, r+r);
+
+ /* now sum them together */
+ s = A->sign ^ B->sign;
+ fp_zero(C);
+ fp_add(&ac, &comp, C);
+ fp_add(&bd, C, C);
+ C->sign = C->used ? s : FP_ZPOS;
+ }
+}
+
+
+/* End: fp_mul.c */
+
+/* Start: fp_mul_2.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+void fp_mul_2(fp_int * a, fp_int * b)
+{
+ int x, oldused;
+
+ oldused = b->used;
+ b->used = a->used;
+
+ {
+ register fp_digit r, rr, *tmpa, *tmpb;
+
+ /* alias for source */
+ tmpa = a->dp;
+
+ /* alias for dest */
+ tmpb = b->dp;
+
+ /* carry */
+ r = 0;
+ for (x = 0; x < a->used; x++) {
+
+ /* get what will be the *next* carry bit from the
+ * MSB of the current digit
+ */
+ rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
+
+ /* now shift up this digit, add in the carry [from the previous] */
+ *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
+
+ /* copy the carry that would be from the source
+ * digit into the next iteration
+ */
+ r = rr;
+ }
+
+ /* new leading digit? */
+ if (r != 0 && b->used != (FP_SIZE-1)) {
+ /* add a MSB which is always 1 at this point */
+ *tmpb = 1;
+ ++(b->used);
+ }
+
+ /* now zero any excess digits on the destination
+ * that we didn't write to
+ */
+ tmpb = b->dp + b->used;
+ for (x = b->used; x < oldused; x++) {
+ *tmpb++ = 0;
+ }
+ }
+ b->sign = a->sign;
+}
+
+
+/* End: fp_mul_2.c */
+
+/* Start: fp_mul_2d.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a * 2**d */
+void fp_mul_2d(fp_int *a, int b, fp_int *c)
+{
+ fp_digit carry, carrytmp, shift;
+ int x;
+
+ /* copy it */
+ fp_copy(a, c);
+
+ /* handle whole digits */
+ if (b >= DIGIT_BIT) {
+ fp_lshd(c, b/DIGIT_BIT);
+ }
+ b %= DIGIT_BIT;
+
+ /* shift the digits */
+ if (b != 0) {
+ carry = 0;
+ shift = DIGIT_BIT - b;
+ for (x = 0; x < c->used; x++) {
+ carrytmp = c->dp[x] >> shift;
+ c->dp[x] = (c->dp[x] << b) + carry;
+ carry = carrytmp;
+ }
+ /* store last carry if room */
+ if (carry && x < FP_SIZE) {
+ c->dp[c->used++] = carry;
+ }
+ }
+ fp_clamp(c);
+}
+
+
+/* End: fp_mul_2d.c */
+
+/* Start: fp_mul_comba.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+
+/* About this file...
+
+*/
+
+#include <tfm.h>
+
+/* these are the combas. Worship them. */
+#if defined(TFM_X86)
+/* Generic x86 optimized code */
+
+/* anything you need at the start */
+#define COMBA_START
+
+/* clear the chaining variables */
+#define COMBA_CLEAR \
+ c0 = c1 = c2 = 0;
+
+/* forward the carry to the next digit */
+#define COMBA_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+/* store the first sum */
+#define COMBA_STORE(x) \
+ x = c0;
+
+/* store the second sum [carry] */
+#define COMBA_STORE2(x) \
+ x = c1;
+
+/* anything you need at the end */
+#define COMBA_FINI
+
+/* this should multiply i and j */
+#define MULADD(i, j) \
+asm ( \
+ "movl %6,%%eax \n\t" \
+ "mull %7 \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "adcl %%edx,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
+
+#elif defined(TFM_X86_64)
+/* x86-64 optimized */
+
+/* anything you need at the start */
+#define COMBA_START
+
+/* clear the chaining variables */
+#define COMBA_CLEAR \
+ c0 = c1 = c2 = 0;
+
+/* forward the carry to the next digit */
+#define COMBA_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+/* store the first sum */
+#define COMBA_STORE(x) \
+ x = c0;
+
+/* store the second sum [carry] */
+#define COMBA_STORE2(x) \
+ x = c1;
+
+/* anything you need at the end */
+#define COMBA_FINI
+
+/* this should multiply i and j */
+#define MULADD(i, j) \
+asm ( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
+
+#elif defined(TFM_SSE2)
+/* use SSE2 optimizations */
+
+/* anything you need at the start */
+#define COMBA_START
+
+/* clear the chaining variables */
+#define COMBA_CLEAR \
+ c0 = c1 = c2 = 0;
+
+/* forward the carry to the next digit */
+#define COMBA_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+/* store the first sum */
+#define COMBA_STORE(x) \
+ x = c0;
+
+/* store the second sum [carry] */
+#define COMBA_STORE2(x) \
+ x = c1;
+
+/* anything you need at the end */
+#define COMBA_FINI \
+ asm("emms");
+
+/* this should multiply i and j */
+ #define MULADD(i, j) \
+ asm volatile ( \
+ "movd %6,%%mm0 \n\t" \
+ "movd %7,%%mm1 \n\t" \
+ "pmuludq %%mm1,%%mm0\n\t" \
+ "movd %%mm0,%%eax \n\t" \
+ "psrlq $32,%%mm0 \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "movd %%mm0,%%eax \n\t" \
+ "adcl %%eax,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc");
+
+#elif defined(TFM_ARM)
+/* ARM code */
+
+#define COMBA_START
+
+#define COMBA_CLEAR \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define COMBA_FINI
+
+#define MULADD(i, j) \
+asm( \
+" UMULL r0,r1,%6,%7 \n\t" \
+" ADDS %0,%0,r0 \n\t" \
+" ADCS %1,%1,r1 \n\t" \
+" ADC %2, %2, #0 \n\t" \
+:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
+
+#else
+/* ISO C code */
+
+#define COMBA_START
+
+#define COMBA_CLEAR \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define COMBA_FINI
+
+#define MULADD(i, j) \
+ do { fp_word t; \
+ t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \
+ t = (fp_word)c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \
+ } while (0);
+
+#endif
+
+
+/* generic PxQ multiplier */
+void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
+{
+ int ix, iy, iz, tx, ty, pa;
+ fp_digit c0, c1, c2, *tmpx, *tmpy;
+ fp_int tmp, *dst;
+
+ COMBA_START;
+ COMBA_CLEAR;
+
+ /* get size of output and trim */
+ pa = A->used + B->used;
+ if (pa >= FP_SIZE) {
+ pa = FP_SIZE-1;
+ }
+
+ if (A == C || B == C) {
+ fp_zero(&tmp);
+ dst = &tmp;
+ } else {
+ fp_zero(C);
+ dst = C;
+ }
+
+ for (ix = 0; ix < pa; ix++) {
+ /* get offsets into the two bignums */
+ ty = MIN(ix, B->used-1);
+ tx = ix - ty;
+
+ /* setup temp aliases */
+ tmpx = A->dp + tx;
+ tmpy = B->dp + ty;
+
+ /* this is the number of times the loop will iterrate, essentially its
+ while (tx++ < a->used && ty-- >= 0) { ... }
+ */
+ iy = MIN(A->used-tx, ty+1);
+
+ /* execute loop */
+ COMBA_FORWARD;
+ for (iz = 0; iz < iy; ++iz) {
+ MULADD(*tmpx++, *tmpy--);
+ }
+
+ /* store term */
+ COMBA_STORE(dst->dp[ix]);
+ }
+ /* store final carry */
+ COMBA_STORE2(dst->dp[ix]);
+ COMBA_FINI;
+
+ dst->used = pa;
+ fp_clamp(dst);
+ dst->sign = dst->used ? A->sign ^ B->sign : FP_ZPOS;
+ fp_copy(dst, C);
+}
+
+void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C)
+{
+ fp_digit c0, c1, c2, at[8];
+
+ memcpy(at, A->dp, 4 * sizeof(fp_digit));
+ memcpy(at+4, B->dp, 4 * sizeof(fp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[4]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[5]); MULADD(at[1], at[4]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[6]); MULADD(at[1], at[5]); MULADD(at[2], at[4]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[7]); MULADD(at[1], at[6]); MULADD(at[2], at[5]); MULADD(at[3], at[4]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[7]); MULADD(at[2], at[6]); MULADD(at[3], at[5]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[7]); MULADD(at[3], at[6]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[7]);
+ COMBA_STORE(C->dp[6]);
+ COMBA_STORE2(C->dp[7]);
+ C->used = 8;
+ C->sign = A->sign ^ B->sign;
+ fp_clamp(C);
+ COMBA_FINI;
+}
+
+
+void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C)
+{
+ fp_digit c0, c1, c2, at[16];
+
+ memcpy(at, A->dp, 8 * sizeof(fp_digit));
+ memcpy(at+8, B->dp, 8 * sizeof(fp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[8]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[9]); MULADD(at[1], at[8]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[10]); MULADD(at[1], at[9]); MULADD(at[2], at[8]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); MULADD(at[3], at[8]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); MULADD(at[4], at[8]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); MULADD(at[5], at[8]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); MULADD(at[6], at[8]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); MULADD(at[6], at[9]); MULADD(at[7], at[8]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); MULADD(at[7], at[9]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); MULADD(at[7], at[10]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); MULADD(at[7], at[11]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[15]); MULADD(at[7], at[14]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[15]);
+ COMBA_STORE(C->dp[14]);
+ COMBA_STORE2(C->dp[15]);
+ C->used = 16;
+ C->sign = A->sign ^ B->sign;
+ fp_clamp(C);
+ COMBA_FINI;
+}
+
+#if defined(TFM_LARGE)
+
+void fp_mul_comba16(fp_int *A, fp_int *B, fp_int *C)
+{
+ fp_digit c0, c1, c2, at[32];
+
+ memcpy(at, A->dp, 16 * sizeof(fp_digit));
+ memcpy(at+16, B->dp, 16 * sizeof(fp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[16]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[17]); MULADD(at[1], at[16]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); MULADD(at[13], at[16]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); MULADD(at[14], at[16]);
+ COMBA_STORE(C->dp[14]);
+ /* 15 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); MULADD(at[14], at[17]); MULADD(at[15], at[16]);
+ COMBA_STORE(C->dp[15]);
+ /* 16 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); MULADD(at[14], at[18]); MULADD(at[15], at[17]);
+ COMBA_STORE(C->dp[16]);
+ /* 17 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); MULADD(at[14], at[19]); MULADD(at[15], at[18]);
+ COMBA_STORE(C->dp[17]);
+ /* 18 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); MULADD(at[15], at[19]);
+ COMBA_STORE(C->dp[18]);
+ /* 19 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]);
+ COMBA_STORE(C->dp[19]);
+ /* 20 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]);
+ COMBA_STORE(C->dp[20]);
+ /* 21 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]);
+ COMBA_STORE(C->dp[21]);
+ /* 22 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]);
+ COMBA_STORE(C->dp[22]);
+ /* 23 */
+ COMBA_FORWARD;
+ MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]);
+ COMBA_STORE(C->dp[23]);
+ /* 24 */
+ COMBA_FORWARD;
+ MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]);
+ COMBA_STORE(C->dp[24]);
+ /* 25 */
+ COMBA_FORWARD;
+ MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]);
+ COMBA_STORE(C->dp[25]);
+ /* 26 */
+ COMBA_FORWARD;
+ MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]);
+ COMBA_STORE(C->dp[26]);
+ /* 27 */
+ COMBA_FORWARD;
+ MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]);
+ COMBA_STORE(C->dp[27]);
+ /* 28 */
+ COMBA_FORWARD;
+ MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]);
+ COMBA_STORE(C->dp[28]);
+ /* 29 */
+ COMBA_FORWARD;
+ MULADD(at[14], at[31]); MULADD(at[15], at[30]);
+ COMBA_STORE(C->dp[29]);
+ /* 30 */
+ COMBA_FORWARD;
+ MULADD(at[15], at[31]);
+ COMBA_STORE(C->dp[30]);
+ COMBA_STORE2(C->dp[31]);
+ C->used = 32;
+ C->sign = A->sign ^ B->sign;
+ fp_clamp(C);
+ COMBA_FINI;
+}
+
+#endif /* TFM_LARGE */
+
+#ifdef TFM_HUGE
+
+void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
+{
+ fp_digit c0, c1, c2, at[64];
+
+ memcpy(at, A->dp, 32 * sizeof(fp_digit));
+ memcpy(at+32, B->dp, 32 * sizeof(fp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[32]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[33]); MULADD(at[1], at[32]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]);
+ COMBA_STORE(C->dp[14]);
+ /* 15 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]);
+ COMBA_STORE(C->dp[15]);
+ /* 16 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]);
+ COMBA_STORE(C->dp[16]);
+ /* 17 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]);
+ COMBA_STORE(C->dp[17]);
+ /* 18 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]);
+ COMBA_STORE(C->dp[18]);
+ /* 19 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]);
+ COMBA_STORE(C->dp[19]);
+ /* 20 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]);
+ COMBA_STORE(C->dp[20]);
+ /* 21 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]);
+ COMBA_STORE(C->dp[21]);
+ /* 22 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]);
+ COMBA_STORE(C->dp[22]);
+ /* 23 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]);
+ COMBA_STORE(C->dp[23]);
+ /* 24 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[56]); MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]);
+ COMBA_STORE(C->dp[24]);
+ /* 25 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[57]); MULADD(at[1], at[56]); MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]);
+ COMBA_STORE(C->dp[25]);
+ /* 26 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[58]); MULADD(at[1], at[57]); MULADD(at[2], at[56]); MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]);
+ COMBA_STORE(C->dp[26]);
+ /* 27 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[59]); MULADD(at[1], at[58]); MULADD(at[2], at[57]); MULADD(at[3], at[56]); MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]);
+ COMBA_STORE(C->dp[27]);
+ /* 28 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[60]); MULADD(at[1], at[59]); MULADD(at[2], at[58]); MULADD(at[3], at[57]); MULADD(at[4], at[56]); MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]); MULADD(at[28], at[32]);
+ COMBA_STORE(C->dp[28]);
+ /* 29 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[61]); MULADD(at[1], at[60]); MULADD(at[2], at[59]); MULADD(at[3], at[58]); MULADD(at[4], at[57]); MULADD(at[5], at[56]); MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]); MULADD(at[28], at[33]); MULADD(at[29], at[32]);
+ COMBA_STORE(C->dp[29]);
+ /* 30 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[62]); MULADD(at[1], at[61]); MULADD(at[2], at[60]); MULADD(at[3], at[59]); MULADD(at[4], at[58]); MULADD(at[5], at[57]); MULADD(at[6], at[56]); MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]); MULADD(at[28], at[34]); MULADD(at[29], at[33]); MULADD(at[30], at[32]);
+ COMBA_STORE(C->dp[30]);
+ /* 31 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[63]); MULADD(at[1], at[62]); MULADD(at[2], at[61]); MULADD(at[3], at[60]); MULADD(at[4], at[59]); MULADD(at[5], at[58]); MULADD(at[6], at[57]); MULADD(at[7], at[56]); MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]); MULADD(at[28], at[35]); MULADD(at[29], at[34]); MULADD(at[30], at[33]); MULADD(at[31], at[32]);
+ COMBA_STORE(C->dp[31]);
+ /* 32 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[63]); MULADD(at[2], at[62]); MULADD(at[3], at[61]); MULADD(at[4], at[60]); MULADD(at[5], at[59]); MULADD(at[6], at[58]); MULADD(at[7], at[57]); MULADD(at[8], at[56]); MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]); MULADD(at[28], at[36]); MULADD(at[29], at[35]); MULADD(at[30], at[34]); MULADD(at[31], at[33]);
+ COMBA_STORE(C->dp[32]);
+ /* 33 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[63]); MULADD(at[3], at[62]); MULADD(at[4], at[61]); MULADD(at[5], at[60]); MULADD(at[6], at[59]); MULADD(at[7], at[58]); MULADD(at[8], at[57]); MULADD(at[9], at[56]); MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]); MULADD(at[28], at[37]); MULADD(at[29], at[36]); MULADD(at[30], at[35]); MULADD(at[31], at[34]);
+ COMBA_STORE(C->dp[33]);
+ /* 34 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[63]); MULADD(at[4], at[62]); MULADD(at[5], at[61]); MULADD(at[6], at[60]); MULADD(at[7], at[59]); MULADD(at[8], at[58]); MULADD(at[9], at[57]); MULADD(at[10], at[56]); MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]); MULADD(at[28], at[38]); MULADD(at[29], at[37]); MULADD(at[30], at[36]); MULADD(at[31], at[35]);
+ COMBA_STORE(C->dp[34]);
+ /* 35 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[63]); MULADD(at[5], at[62]); MULADD(at[6], at[61]); MULADD(at[7], at[60]); MULADD(at[8], at[59]); MULADD(at[9], at[58]); MULADD(at[10], at[57]); MULADD(at[11], at[56]); MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]); MULADD(at[28], at[39]); MULADD(at[29], at[38]); MULADD(at[30], at[37]); MULADD(at[31], at[36]);
+ COMBA_STORE(C->dp[35]);
+ /* 36 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[63]); MULADD(at[6], at[62]); MULADD(at[7], at[61]); MULADD(at[8], at[60]); MULADD(at[9], at[59]); MULADD(at[10], at[58]); MULADD(at[11], at[57]); MULADD(at[12], at[56]); MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]); MULADD(at[28], at[40]); MULADD(at[29], at[39]); MULADD(at[30], at[38]); MULADD(at[31], at[37]);
+ COMBA_STORE(C->dp[36]);
+ /* 37 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[63]); MULADD(at[7], at[62]); MULADD(at[8], at[61]); MULADD(at[9], at[60]); MULADD(at[10], at[59]); MULADD(at[11], at[58]); MULADD(at[12], at[57]); MULADD(at[13], at[56]); MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]); MULADD(at[28], at[41]); MULADD(at[29], at[40]); MULADD(at[30], at[39]); MULADD(at[31], at[38]);
+ COMBA_STORE(C->dp[37]);
+ /* 38 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[63]); MULADD(at[8], at[62]); MULADD(at[9], at[61]); MULADD(at[10], at[60]); MULADD(at[11], at[59]); MULADD(at[12], at[58]); MULADD(at[13], at[57]); MULADD(at[14], at[56]); MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]); MULADD(at[28], at[42]); MULADD(at[29], at[41]); MULADD(at[30], at[40]); MULADD(at[31], at[39]);
+ COMBA_STORE(C->dp[38]);
+ /* 39 */
+ COMBA_FORWARD;
+ MULADD(at[8], at[63]); MULADD(at[9], at[62]); MULADD(at[10], at[61]); MULADD(at[11], at[60]); MULADD(at[12], at[59]); MULADD(at[13], at[58]); MULADD(at[14], at[57]); MULADD(at[15], at[56]); MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]); MULADD(at[28], at[43]); MULADD(at[29], at[42]); MULADD(at[30], at[41]); MULADD(at[31], at[40]);
+ COMBA_STORE(C->dp[39]);
+ /* 40 */
+ COMBA_FORWARD;
+ MULADD(at[9], at[63]); MULADD(at[10], at[62]); MULADD(at[11], at[61]); MULADD(at[12], at[60]); MULADD(at[13], at[59]); MULADD(at[14], at[58]); MULADD(at[15], at[57]); MULADD(at[16], at[56]); MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]); MULADD(at[28], at[44]); MULADD(at[29], at[43]); MULADD(at[30], at[42]); MULADD(at[31], at[41]);
+ COMBA_STORE(C->dp[40]);
+ /* 41 */
+ COMBA_FORWARD;
+ MULADD(at[10], at[63]); MULADD(at[11], at[62]); MULADD(at[12], at[61]); MULADD(at[13], at[60]); MULADD(at[14], at[59]); MULADD(at[15], at[58]); MULADD(at[16], at[57]); MULADD(at[17], at[56]); MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]); MULADD(at[28], at[45]); MULADD(at[29], at[44]); MULADD(at[30], at[43]); MULADD(at[31], at[42]);
+ COMBA_STORE(C->dp[41]);
+ /* 42 */
+ COMBA_FORWARD;
+ MULADD(at[11], at[63]); MULADD(at[12], at[62]); MULADD(at[13], at[61]); MULADD(at[14], at[60]); MULADD(at[15], at[59]); MULADD(at[16], at[58]); MULADD(at[17], at[57]); MULADD(at[18], at[56]); MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]); MULADD(at[28], at[46]); MULADD(at[29], at[45]); MULADD(at[30], at[44]); MULADD(at[31], at[43]);
+ COMBA_STORE(C->dp[42]);
+ /* 43 */
+ COMBA_FORWARD;
+ MULADD(at[12], at[63]); MULADD(at[13], at[62]); MULADD(at[14], at[61]); MULADD(at[15], at[60]); MULADD(at[16], at[59]); MULADD(at[17], at[58]); MULADD(at[18], at[57]); MULADD(at[19], at[56]); MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); MULADD(at[28], at[47]); MULADD(at[29], at[46]); MULADD(at[30], at[45]); MULADD(at[31], at[44]);
+ COMBA_STORE(C->dp[43]);
+ /* 44 */
+ COMBA_FORWARD;
+ MULADD(at[13], at[63]); MULADD(at[14], at[62]); MULADD(at[15], at[61]); MULADD(at[16], at[60]); MULADD(at[17], at[59]); MULADD(at[18], at[58]); MULADD(at[19], at[57]); MULADD(at[20], at[56]); MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); MULADD(at[28], at[48]); MULADD(at[29], at[47]); MULADD(at[30], at[46]); MULADD(at[31], at[45]);
+ COMBA_STORE(C->dp[44]);
+ /* 45 */
+ COMBA_FORWARD;
+ MULADD(at[14], at[63]); MULADD(at[15], at[62]); MULADD(at[16], at[61]); MULADD(at[17], at[60]); MULADD(at[18], at[59]); MULADD(at[19], at[58]); MULADD(at[20], at[57]); MULADD(at[21], at[56]); MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); MULADD(at[28], at[49]); MULADD(at[29], at[48]); MULADD(at[30], at[47]); MULADD(at[31], at[46]);
+ COMBA_STORE(C->dp[45]);
+ /* 46 */
+ COMBA_FORWARD;
+ MULADD(at[15], at[63]); MULADD(at[16], at[62]); MULADD(at[17], at[61]); MULADD(at[18], at[60]); MULADD(at[19], at[59]); MULADD(at[20], at[58]); MULADD(at[21], at[57]); MULADD(at[22], at[56]); MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); MULADD(at[28], at[50]); MULADD(at[29], at[49]); MULADD(at[30], at[48]); MULADD(at[31], at[47]);
+ COMBA_STORE(C->dp[46]);
+ /* 47 */
+ COMBA_FORWARD;
+ MULADD(at[16], at[63]); MULADD(at[17], at[62]); MULADD(at[18], at[61]); MULADD(at[19], at[60]); MULADD(at[20], at[59]); MULADD(at[21], at[58]); MULADD(at[22], at[57]); MULADD(at[23], at[56]); MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); MULADD(at[28], at[51]); MULADD(at[29], at[50]); MULADD(at[30], at[49]); MULADD(at[31], at[48]);
+ COMBA_STORE(C->dp[47]);
+ /* 48 */
+ COMBA_FORWARD;
+ MULADD(at[17], at[63]); MULADD(at[18], at[62]); MULADD(at[19], at[61]); MULADD(at[20], at[60]); MULADD(at[21], at[59]); MULADD(at[22], at[58]); MULADD(at[23], at[57]); MULADD(at[24], at[56]); MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); MULADD(at[28], at[52]); MULADD(at[29], at[51]); MULADD(at[30], at[50]); MULADD(at[31], at[49]);
+ COMBA_STORE(C->dp[48]);
+ /* 49 */
+ COMBA_FORWARD;
+ MULADD(at[18], at[63]); MULADD(at[19], at[62]); MULADD(at[20], at[61]); MULADD(at[21], at[60]); MULADD(at[22], at[59]); MULADD(at[23], at[58]); MULADD(at[24], at[57]); MULADD(at[25], at[56]); MULADD(at[26], at[55]); MULADD(at[27], at[54]); MULADD(at[28], at[53]); MULADD(at[29], at[52]); MULADD(at[30], at[51]); MULADD(at[31], at[50]);
+ COMBA_STORE(C->dp[49]);
+ /* 50 */
+ COMBA_FORWARD;
+ MULADD(at[19], at[63]); MULADD(at[20], at[62]); MULADD(at[21], at[61]); MULADD(at[22], at[60]); MULADD(at[23], at[59]); MULADD(at[24], at[58]); MULADD(at[25], at[57]); MULADD(at[26], at[56]); MULADD(at[27], at[55]); MULADD(at[28], at[54]); MULADD(at[29], at[53]); MULADD(at[30], at[52]); MULADD(at[31], at[51]);
+ COMBA_STORE(C->dp[50]);
+ /* 51 */
+ COMBA_FORWARD;
+ MULADD(at[20], at[63]); MULADD(at[21], at[62]); MULADD(at[22], at[61]); MULADD(at[23], at[60]); MULADD(at[24], at[59]); MULADD(at[25], at[58]); MULADD(at[26], at[57]); MULADD(at[27], at[56]); MULADD(at[28], at[55]); MULADD(at[29], at[54]); MULADD(at[30], at[53]); MULADD(at[31], at[52]);
+ COMBA_STORE(C->dp[51]);
+ /* 52 */
+ COMBA_FORWARD;
+ MULADD(at[21], at[63]); MULADD(at[22], at[62]); MULADD(at[23], at[61]); MULADD(at[24], at[60]); MULADD(at[25], at[59]); MULADD(at[26], at[58]); MULADD(at[27], at[57]); MULADD(at[28], at[56]); MULADD(at[29], at[55]); MULADD(at[30], at[54]); MULADD(at[31], at[53]);
+ COMBA_STORE(C->dp[52]);
+ /* 53 */
+ COMBA_FORWARD;
+ MULADD(at[22], at[63]); MULADD(at[23], at[62]); MULADD(at[24], at[61]); MULADD(at[25], at[60]); MULADD(at[26], at[59]); MULADD(at[27], at[58]); MULADD(at[28], at[57]); MULADD(at[29], at[56]); MULADD(at[30], at[55]); MULADD(at[31], at[54]);
+ COMBA_STORE(C->dp[53]);
+ /* 54 */
+ COMBA_FORWARD;
+ MULADD(at[23], at[63]); MULADD(at[24], at[62]); MULADD(at[25], at[61]); MULADD(at[26], at[60]); MULADD(at[27], at[59]); MULADD(at[28], at[58]); MULADD(at[29], at[57]); MULADD(at[30], at[56]); MULADD(at[31], at[55]);
+ COMBA_STORE(C->dp[54]);
+ /* 55 */
+ COMBA_FORWARD;
+ MULADD(at[24], at[63]); MULADD(at[25], at[62]); MULADD(at[26], at[61]); MULADD(at[27], at[60]); MULADD(at[28], at[59]); MULADD(at[29], at[58]); MULADD(at[30], at[57]); MULADD(at[31], at[56]);
+ COMBA_STORE(C->dp[55]);
+ /* 56 */
+ COMBA_FORWARD;
+ MULADD(at[25], at[63]); MULADD(at[26], at[62]); MULADD(at[27], at[61]); MULADD(at[28], at[60]); MULADD(at[29], at[59]); MULADD(at[30], at[58]); MULADD(at[31], at[57]);
+ COMBA_STORE(C->dp[56]);
+ /* 57 */
+ COMBA_FORWARD;
+ MULADD(at[26], at[63]); MULADD(at[27], at[62]); MULADD(at[28], at[61]); MULADD(at[29], at[60]); MULADD(at[30], at[59]); MULADD(at[31], at[58]);
+ COMBA_STORE(C->dp[57]);
+ /* 58 */
+ COMBA_FORWARD;
+ MULADD(at[27], at[63]); MULADD(at[28], at[62]); MULADD(at[29], at[61]); MULADD(at[30], at[60]); MULADD(at[31], at[59]);
+ COMBA_STORE(C->dp[58]);
+ /* 59 */
+ COMBA_FORWARD;
+ MULADD(at[28], at[63]); MULADD(at[29], at[62]); MULADD(at[30], at[61]); MULADD(at[31], at[60]);
+ COMBA_STORE(C->dp[59]);
+ /* 60 */
+ COMBA_FORWARD;
+ MULADD(at[29], at[63]); MULADD(at[30], at[62]); MULADD(at[31], at[61]);
+ COMBA_STORE(C->dp[60]);
+ /* 61 */
+ COMBA_FORWARD;
+ MULADD(at[30], at[63]); MULADD(at[31], at[62]);
+ COMBA_STORE(C->dp[61]);
+ /* 62 */
+ COMBA_FORWARD;
+ MULADD(at[31], at[63]);
+ COMBA_STORE(C->dp[62]);
+ COMBA_STORE2(C->dp[63]);
+ C->used = 64;
+ C->sign = A->sign ^ B->sign;
+ fp_clamp(C);
+ COMBA_FINI;
+}
+
+#endif
+
+/* End: fp_mul_comba.c */
+
+/* Start: fp_mul_d.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a * b */
+void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
+{
+ fp_word w;
+ int x, oldused;
+
+ oldused = c->used;
+ c->used = a->used;
+ c->sign = a->sign;
+ w = 0;
+ for (x = 0; x < a->used; x++) {
+ w = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
+ c->dp[x] = (fp_digit)w;
+ w = w >> DIGIT_BIT;
+ }
+ if (w != 0 && (a->used != FP_SIZE)) {
+ c->dp[c->used++] = w;
+ ++x;
+ }
+ for (; x < oldused; x++) {
+ c->dp[x] = 0;
+ }
+ fp_clamp(c);
+}
+
+
+/* End: fp_mul_d.c */
+
+/* Start: fp_mulmod.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+/* d = a * b (mod c) */
+int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
+{
+ fp_int tmp;
+ fp_zero(&tmp);
+ fp_mul(a, b, &tmp);
+ return fp_mod(&tmp, c, d);
+}
+
+/* End: fp_mulmod.c */
+
+/* Start: fp_prime_miller_rabin.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* Miller-Rabin test of "a" to the base of "b" as described in
+ * HAC pp. 139 Algorithm 4.24
+ *
+ * Sets result to 0 if definitely composite or 1 if probably prime.
+ * Randomly the chance of error is no more than 1/4 and often
+ * very much lower.
+ */
+void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
+{
+ fp_int n1, y, r;
+ int s, j;
+
+ /* default */
+ *result = FP_NO;
+
+ /* ensure b > 1 */
+ if (fp_cmp_d(b, 1) != FP_GT) {
+ return;
+ }
+
+ /* get n1 = a - 1 */
+ fp_init_copy(&n1, a);
+ fp_sub_d(&n1, 1, &n1);
+
+ /* set 2**s * r = n1 */
+ fp_init_copy(&r, &n1);
+
+ /* count the number of least significant bits
+ * which are zero
+ */
+ s = fp_cnt_lsb(&r);
+
+ /* now divide n - 1 by 2**s */
+ fp_div_2d (&r, s, &r, NULL);
+
+ /* compute y = b**r mod a */
+ fp_init(&y);
+ fp_exptmod(b, &r, a, &y);
+
+ /* if y != 1 and y != n1 do */
+ if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
+ j = 1;
+ /* while j <= s-1 and y != n1 */
+ while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
+ fp_sqrmod (&y, a, &y);
+
+ /* if y == 1 then composite */
+ if (fp_cmp_d (&y, 1) == FP_EQ) {
+ return;
+ }
+ ++j;
+ }
+
+ /* if y != n1 then composite */
+ if (fp_cmp (&y, &n1) != FP_EQ) {
+ return;
+ }
+ }
+
+ /* probably prime now */
+ *result = FP_YES;
+}
+
+/* End: fp_prime_miller_rabin.c */
+
+/* Start: fp_prime_random_ex.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* This is possibly the mother of all prime generation functions, muahahahahaha! */
+int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat)
+{
+ unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
+ int res, err, bsize, maskOR_msb_offset;
+
+ /* sanity check the input */
+ if (size <= 1 || t <= 0) {
+ return FP_VAL;
+ }
+
+ /* TFM_PRIME_SAFE implies TFM_PRIME_BBS */
+ if (flags & TFM_PRIME_SAFE) {
+ flags |= TFM_PRIME_BBS;
+ }
+
+ /* calc the byte size */
+ bsize = (size>>3)+(size&7?1:0);
+
+ /* we need a buffer of bsize bytes */
+ tmp = malloc(bsize);
+ if (tmp == NULL) {
+ return FP_MEM;
+ }
+
+ /* calc the maskAND value for the MSbyte*/
+ maskAND = 0xFF >> (8 - (size & 7));
+
+ /* calc the maskOR_msb */
+ maskOR_msb = 0;
+ maskOR_msb_offset = (size - 2) >> 3;
+ if (flags & TFM_PRIME_2MSB_ON) {
+ maskOR_msb |= 1 << ((size - 2) & 7);
+ } else if (flags & TFM_PRIME_2MSB_OFF) {
+ maskAND &= ~(1 << ((size - 2) & 7));
+ }
+
+ /* get the maskOR_lsb */
+ maskOR_lsb = 1;
+ if (flags & TFM_PRIME_BBS) {
+ maskOR_lsb |= 3;
+ }
+
+ do {
+ /* read the bytes */
+ if (cb(tmp, bsize, dat) != bsize) {
+ err = FP_VAL;
+ goto error;
+ }
+
+ /* work over the MSbyte */
+ tmp[0] &= maskAND;
+ tmp[0] |= 1 << ((size - 1) & 7);
+
+ /* mix in the maskORs */
+ tmp[maskOR_msb_offset] |= maskOR_msb;
+ tmp[bsize-1] |= maskOR_lsb;
+
+ /* read it in */
+ fp_read_unsigned_bin(a, tmp, bsize);
+
+ /* is it prime? */
+ res = fp_isprime(a);
+ if (res == FP_NO) continue;
+
+ if (flags & TFM_PRIME_SAFE) {
+ /* see if (a-1)/2 is prime */
+ fp_sub_d(a, 1, a);
+ fp_div_2(a, a);
+
+ /* is it prime? */
+ res = fp_isprime(a);
+ }
+ } while (res == FP_NO);
+
+ if (flags & TFM_PRIME_SAFE) {
+ /* restore a to the original value */
+ fp_mul_2(a, a);
+ fp_add_d(a, 1, a);
+ }
+
+ err = FP_OKAY;
+error:
+ free(tmp);
+ return err;
+}
+
+/* End: fp_prime_random_ex.c */
+
+/* Start: fp_radix_size.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+int fp_radix_size(fp_int *a, int radix, int *size)
+{
+ int digs;
+ fp_int t;
+ fp_digit d;
+
+ *size = 0;
+
+ /* check range of the radix */
+ if (radix < 2 || radix > 64) {
+ return FP_VAL;
+ }
+
+ /* quick out if its zero */
+ if (fp_iszero(a) == 1) {
+ *size = 2;
+ return FP_OKAY;
+ }
+
+ fp_init_copy(&t, a);
+
+ /* if it is negative output a - */
+ if (t.sign == FP_NEG) {
+ *size++;
+ t.sign = FP_ZPOS;
+ }
+
+ digs = 0;
+ while (fp_iszero (&t) == FP_NO) {
+ fp_div_d (&t, (fp_digit) radix, &t, &d);
+ *size++;
+ }
+
+ /* append a NULL so the string is properly terminated */
+ *size++;
+ return FP_OKAY;
+
+}
+
+/* End: fp_radix_size.c */
+
+/* Start: fp_read_radix.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+int fp_read_radix(fp_int *a, char *str, int radix)
+{
+ int y, neg;
+ char ch;
+
+ /* make sure the radix is ok */
+ if (radix < 2 || radix > 64) {
+ return FP_VAL;
+ }
+
+ /* if the leading digit is a
+ * minus set the sign to negative.
+ */
+ if (*str == '-') {
+ ++str;
+ neg = FP_NEG;
+ } else {
+ neg = FP_ZPOS;
+ }
+
+ /* set the integer to the default of zero */
+ fp_zero (a);
+
+ /* process each digit of the string */
+ while (*str) {
+ /* if the radix < 36 the conversion is case insensitive
+ * this allows numbers like 1AB and 1ab to represent the same value
+ * [e.g. in hex]
+ */
+ ch = (char) ((radix < 36) ? toupper (*str) : *str);
+ for (y = 0; y < 64; y++) {
+ if (ch == fp_s_rmap[y]) {
+ break;
+ }
+ }
+
+ /* if the char was found in the map
+ * and is less than the given radix add it
+ * to the number, otherwise exit the loop.
+ */
+ if (y < radix) {
+ fp_mul_d (a, (fp_digit) radix, a);
+ fp_add_d (a, (fp_digit) y, a);
+ } else {
+ break;
+ }
+ ++str;
+ }
+
+ /* set the sign only if a != 0 */
+ if (fp_iszero(a) != FP_YES) {
+ a->sign = neg;
+ }
+ return FP_OKAY;
+}
+
+/* End: fp_read_radix.c */
+
+/* Start: fp_read_signed_bin.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+void fp_read_signed_bin(fp_int *a, unsigned char *b, int c)
+{
+ /* read magnitude */
+ fp_read_unsigned_bin (a, b + 1, c - 1);
+
+ /* first byte is 0 for positive, non-zero for negative */
+ if (b[0] == 0) {
+ a->sign = FP_ZPOS;
+ } else {
+ a->sign = FP_NEG;
+ }
+}
+
+/* End: fp_read_signed_bin.c */
+
+/* Start: fp_read_unsigned_bin.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
+{
+ /* zero the int */
+ fp_zero (a);
+
+ /* read the bytes in */
+ for (; c > 0; c--) {
+ fp_mul_2d (a, 8, a);
+ a->dp[0] |= *b++;
+ a->used += 1;
+ }
+ fp_clamp (a);
+}
+
+/* End: fp_read_unsigned_bin.c */
+
+/* Start: fp_reverse.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* reverse an array, used for radix code */
+void bn_reverse (unsigned char *s, int len)
+{
+ int ix, iy;
+ unsigned char t;
+
+ ix = 0;
+ iy = len - 1;
+ while (ix < iy) {
+ t = s[ix];
+ s[ix] = s[iy];
+ s[iy] = t;
+ ++ix;
+ --iy;
+ }
+}
+
+/* End: fp_reverse.c */
+
+/* Start: fp_rshd.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+void fp_rshd(fp_int *a, int x)
+{
+ int y;
+
+ /* too many digits just zero and return */
+ if (x >= a->used) {
+ fp_zero(a);
+ return;
+ }
+
+ /* shift */
+ for (y = 0; y < a->used - x; y++) {
+ a->dp[y] = a->dp[y+x];
+ }
+
+ /* zero rest */
+ for (; y < a->used; y++) {
+ a->dp[y] = 0;
+ }
+
+ /* decrement count */
+ a->used -= x;
+ fp_clamp(a);
+}
+
+
+/* End: fp_rshd.c */
+
+/* Start: fp_s_rmap.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* chars used in radix conversions */
+const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+
+/* End: fp_s_rmap.c */
+
+/* Start: fp_set.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+void fp_set(fp_int *a, fp_digit b)
+{
+ fp_zero(a);
+ a->dp[0] = b;
+ a->used = b ? 1 : 0;
+}
+
+/* End: fp_set.c */
+
+/* Start: fp_signed_bin_size.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+int fp_signed_bin_size(fp_int *a)
+{
+ return 1 + fp_unsigned_bin_size (a);
+}
+
+/* End: fp_signed_bin_size.c */
+
+/* Start: fp_sqr.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* b = a*a */
+void fp_sqr(fp_int *A, fp_int *B)
+{
+ int r, y, s;
+ fp_int aa, bb, comp, amb, t1;
+
+ y = A->used;
+ if (y <= 64) {
+ if (y <= 4) {
+ fp_sqr_comba4(A,B);
+ } else if (y <= 8) {
+ fp_sqr_comba8(A,B);
+#if defined(TFM_LARGE)
+ } else if (y <= 16 && y >= 12) {
+ fp_sqr_comba16(A,B);
+#endif
+#if defined(TFM_HUGE)
+ } else if (y <= 32 && y >= 20) {
+ fp_sqr_comba32(A,B);
+ } else if (y <= 64 && y >= 48) {
+ fp_sqr_comba64(A,B);
+#endif
+ } else {
+ fp_sqr_comba(A, B);
+ }
+
+ } else {
+ /* do the karatsuba action
+
+ if A = ab ||a|| = r we need to solve
+
+ a^2*r^2 + (-(a-b)^2 + a^2 + b^2)*r + b^2
+
+ So we solve for the three products then we form the final result with careful shifting
+ and addition.
+
+Obvious points of optimization
+
+- "ac" parts can be memcpy'ed with an offset [all you have to do is zero upto the next 8 digits]
+- Similarly the "bd" parts can be memcpy'ed and zeroed to 8
+-
+
+ */
+ /* get our value of r */
+ r = y >> 1;
+
+ /* now solve for ac */
+// fp_copy(A, &t1); fp_rshd(&t1, r);
+ for (s = 0; s < A->used - r; s++) {
+ t1.dp[s] = A->dp[s+r];
+ }
+ for (; s < FP_SIZE; s++) {
+ t1.dp[s] = 0;
+ }
+ if (A->used >= r) {
+ t1.used = A->used - r;
+ } else {
+ t1.used = 0;
+ }
+ t1.sign = A->sign;
+ fp_copy(&t1, &amb);
+ fp_zero(&aa);
+ fp_sqr(&t1, &aa);
+
+ /* now solve for bd */
+// fp_mod_2d(A, r * DIGIT_BIT, &t1);
+ for (s = 0; s < r; s++) {
+ t1.dp[s] = A->dp[s];
+ }
+ for (; s < FP_SIZE; s++) {
+ t1.dp[s] = 0;
+ }
+ t1.used = r;
+ fp_clamp(&t1);
+
+ fp_sub(&amb, &t1, &amb);
+ fp_zero(&bb);
+ fp_sqr(&t1, &bb);
+
+ /* now get the (a-b) term */
+ fp_zero(&comp);
+ fp_sqr(&amb, &comp);
+
+ /* now solve the system, do the middle term first */
+ comp.sign ^= 1;
+ fp_add(&comp, &aa, &comp);
+ fp_add(&comp, &bb, &comp);
+ fp_lshd(&comp, r);
+
+ /* leading term */
+ fp_lshd(&aa, r+r);
+
+ /* now sum them together */
+ fp_zero(B);
+ fp_add(&aa, &comp, B);
+ fp_add(&bb, B, B);
+ B->sign = FP_ZPOS;
+ }
+}
+
+
+/* End: fp_sqr.c */
+
+/* Start: fp_sqr_comba.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* About this file...
+*/
+
+#if defined(TFM_X86)
+
+/* x86-32 optimized */
+
+#define COMBA_START
+
+#define CLEAR_CARRY \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define CARRY_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+#define COMBA_FINI
+
+#define SQRADD(i, j) \
+asm volatile ( \
+ "movl %6,%%eax \n\t" \
+ "mull %%eax \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "adcl %%edx,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
+
+#define SQRADD2(i, j) \
+asm volatile ( \
+ "movl %6,%%eax \n\t" \
+ "mull %7 \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "adcl %%edx,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "adcl %%edx,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
+
+#define SQRADDSC(i, j) \
+asm ( \
+ "movl %6,%%eax \n\t" \
+ "mull %7 \n\t" \
+ "movl %%eax,%0 \n\t" \
+ "movl %%edx,%1 \n\t" \
+ "xorl %2,%2 \n\t" \
+ :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
+
+#define SQRADDAC(i, j) \
+asm ( \
+ "movl %6,%%eax \n\t" \
+ "mull %7 \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "adcl %%edx,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
+
+#define SQRADDDB \
+asm ( \
+ "addl %3,%0 \n\t" \
+ "adcl %4,%1 \n\t" \
+ "adcl %5,%2 \n\t" \
+ "addl %3,%0 \n\t" \
+ "adcl %4,%1 \n\t" \
+ "adcl %5,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2), "=g"(sc0), "=g"(sc1), "=g"(sc2) : "0"(c0), "1"(c1), "2"(c2), "3"(sc0), "4"(sc1), "5"(sc2) : "%cc");
+
+#elif defined(TFM_X86_64)
+/* x86-64 optimized */
+
+#define COMBA_START
+
+#define CLEAR_CARRY \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define CARRY_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+#define COMBA_FINI
+
+#define SQRADD(i, j) \
+asm ( \
+ "movq %6,%%rax \n\t" \
+ "mulq %%rax \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc");
+
+#define SQRADD2(i, j) \
+asm ( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
+
+#define SQRADDSC(i, j) \
+asm ( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "movq %%rax,%0 \n\t" \
+ "movq %%rdx,%1 \n\t" \
+ "xorq %2,%2 \n\t" \
+ :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
+
+#define SQRADDAC(i, j) \
+asm ( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
+
+#define SQRADDDB \
+asm ( \
+ "addq %3,%0 \n\t" \
+ "adcq %4,%1 \n\t" \
+ "adcq %5,%2 \n\t" \
+ "addq %3,%0 \n\t" \
+ "adcq %4,%1 \n\t" \
+ "adcq %5,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2), "=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(c0), "1"(c1), "2"(c2), "3"(sc0), "4"(sc1), "5"(sc2) : "%cc");
+
+#elif defined(TFM_SSE2)
+
+/* SSE2 Optimized */
+#define COMBA_START
+
+#define CLEAR_CARRY \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define CARRY_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+#define COMBA_FINI \
+ asm("emms");
+
+#define SQRADD(i, j) \
+asm volatile ( \
+ "movd %6,%%mm0 \n\t" \
+ "pmuludq %%mm0,%%mm0\n\t" \
+ "movd %%mm0,%%eax \n\t" \
+ "psrlq $32,%%mm0 \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "movd %%mm0,%%eax \n\t" \
+ "adcl %%eax,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
+
+#define SQRADD2(i, j) \
+asm volatile ( \
+ "movd %6,%%mm0 \n\t" \
+ "movd %7,%%mm1 \n\t" \
+ "pmuludq %%mm1,%%mm0\n\t" \
+ "movd %%mm0,%%eax \n\t" \
+ "psrlq $32,%%mm0 \n\t" \
+ "movd %%mm0,%%edx \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "adcl %%edx,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "adcl %%edx,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
+
+#define SQRADDSC(i, j) \
+asm volatile ( \
+ "movd %6,%%mm0 \n\t" \
+ "pmuludq %%mm0,%%mm0\n\t" \
+ "movd %%mm0,%%eax \n\t" \
+ "psrlq $32,%%mm0 \n\t" \
+ "movl %%eax,%0 \n\t" \
+ "movd %%mm0,%%eax \n\t" \
+ "movl %%eax,%1 \n\t" \
+ "xorl %2,%2 \n\t" \
+ :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i) :"%eax","%cc");
+
+#define SQRADDAC(i, j) \
+asm volatile ( \
+ "movd %6,%%mm0 \n\t" \
+ "movd %7,%%mm1 \n\t" \
+ "pmuludq %%mm1,%%mm0\n\t" \
+ "movd %%mm0,%%eax \n\t" \
+ "psrlq $32,%%mm0 \n\t" \
+ "movd %%mm0,%%edx \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "adcl %%edx,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ "addl %%eax,%0 \n\t" \
+ "adcl %%edx,%1 \n\t" \
+ "adcl $0,%2 \n\t" \
+ :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc");
+
+#define SQRADDDB \
+asm ( \
+ "addl %3,%0 \n\t" \
+ "adcl %4,%1 \n\t" \
+ "adcl %5,%2 \n\t" \
+ "addl %3,%0 \n\t" \
+ "adcl %4,%1 \n\t" \
+ "adcl %5,%2 \n\t" \
+ :"=r"(c0), "=r"(c1), "=r"(c2), "=g"(sc0), "=g"(sc1), "=g"(sc2) : "0"(c0), "1"(c1), "2"(c2), "3"(sc0), "4"(sc1), "5"(sc2) : "%cc");
+
+#elif defined(TFM_ARM)
+
+/* ARM code */
+
+#define COMBA_START
+
+#define CLEAR_CARRY \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define CARRY_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+#define COMBA_FINI
+
+/* multiplies point i and j, updates carry "c1" and digit c2 */
+#define SQRADD(i, j) \
+asm( \
+" UMULL r0,r1,%6,%6 \n\t" \
+" ADDS %0,%0,r0 \n\t" \
+" ADCS %1,%1,r1 \n\t" \
+" ADC %2,%2,#0 \n\t" \
+:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc");
+
+/* for squaring some of the terms are doubled... */
+#define SQRADD2(i, j) \
+asm( \
+" UMULL r0,r1,%6,%7 \n\t" \
+" ADDS %0,%0,r0 \n\t" \
+" ADCS %1,%1,r1 \n\t" \
+" ADC %2,%2,#0 \n\t" \
+" ADDS %0,%0,r0 \n\t" \
+" ADCS %1,%1,r1 \n\t" \
+" ADC %2,%2,#0 \n\t" \
+:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
+
+#define SQRADDSC(i, j) \
+asm( \
+" UMULL %0,%1,%6,%7 \n\t" \
+" SUB %2,%2,%2 \n\t" \
+:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc");
+
+#define SQRADDAC(i, j) \
+asm( \
+" UMULL r0,r1,%6,%7 \n\t" \
+" ADDS %0,%0,r0 \n\t" \
+" ADCS %1,%1,r1 \n\t" \
+" ADC %2,%2,#0 \n\t" \
+:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc");
+
+#define SQRADDDB \
+asm( \
+" ADDS %3,%0 \n\t" \
+" ADCS %4,%1 \n\t" \
+" ADC %5,%2 \n\t" \
+" ADDS %3,%0 \n\t" \
+" ADCS %4,%1 \n\t" \
+" ADC %5,%2 \n\t" \
+:"=r"(sc0), "=r"(sc1), "=r"(sc2), "=r"(c0), "=r"(c1), "=r"(c2) : "0"(sc0), "1"(sc1), "2"(sc2), "3"(c0), "4"(c1), "5"(c2) : "%cc");
+
+#else
+
+/* ISO C portable code */
+
+#define COMBA_START \
+ { fp_word tt;
+
+#define CLEAR_CARRY \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define CARRY_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+#define COMBA_FINI \
+ }
+
+/* multiplies point i and j, updates carry "c1" and digit c2 */
+#define SQRADD(i, j) \
+ do { fp_word t; \
+ t = c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \
+ t = c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \
+ } while (0);
+
+
+/* for squaring some of the terms are doubled... */
+#define SQRADD2(i, j) \
+ do { fp_word t; \
+ t = ((fp_word)i) * ((fp_word)j); \
+ tt = (fp_word)c0 + t; c0 = tt; \
+ tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \
+ tt = (fp_word)c0 + t; c0 = tt; \
+ tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \
+ } while (0);
+
+#define SQRADDSC(i, j) \
+ do { fp_word t; \
+ t = ((fp_word)i) * ((fp_word)j); \
+ sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \
+ } while (0);
+
+#define SQRADDAC(i, j) \
+ do { fp_word t; \
+ t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \
+ t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \
+ } while (0);
+
+#define SQRADDDB \
+ do { fp_word t; \
+ t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \
+ t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \
+ c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \
+ } while (0);
+
+#endif
+
+#include "fp_sqr_comba_generic.c"
+void fp_sqr_comba4(fp_int *A, fp_int *B)
+{
+ fp_digit *a, b[8], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0],a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADD2(a[2], a[3]);
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+ COMBA_STORE2(b[7]);
+ COMBA_FINI;
+
+ B->used = 8;
+ B->sign = FP_ZPOS;
+ memcpy(B->dp, b, 8 * sizeof(fp_digit));
+ fp_clamp(B);
+}
+
+
+void fp_sqr_comba8(fp_int *A, fp_int *B)
+{
+ fp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0],a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]);
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADD2(a[6], a[7]);
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+ COMBA_STORE2(b[15]);
+ COMBA_FINI;
+
+ B->used = 16;
+ B->sign = FP_ZPOS;
+ memcpy(B->dp, b, 16 * sizeof(fp_digit));
+ fp_clamp(B);
+}
+
+
+#ifdef TFM_LARGE
+void fp_sqr_comba16(fp_int *A, fp_int *B)
+{
+ fp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0],a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+
+ /* output 15 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
+ COMBA_STORE(b[15]);
+
+ /* output 16 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
+ COMBA_STORE(b[16]);
+
+ /* output 17 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
+ COMBA_STORE(b[17]);
+
+ /* output 18 */
+ CARRY_FORWARD;
+ SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
+ COMBA_STORE(b[18]);
+
+ /* output 19 */
+ CARRY_FORWARD;
+ SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
+ COMBA_STORE(b[19]);
+
+ /* output 20 */
+ CARRY_FORWARD;
+ SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
+ COMBA_STORE(b[20]);
+
+ /* output 21 */
+ CARRY_FORWARD;
+ SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
+ COMBA_STORE(b[21]);
+
+ /* output 22 */
+ CARRY_FORWARD;
+ SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
+ COMBA_STORE(b[22]);
+
+ /* output 23 */
+ CARRY_FORWARD;
+ SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
+ COMBA_STORE(b[23]);
+
+ /* output 24 */
+ CARRY_FORWARD;
+ SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
+ COMBA_STORE(b[24]);
+
+ /* output 25 */
+ CARRY_FORWARD;
+ SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
+ COMBA_STORE(b[25]);
+
+ /* output 26 */
+ CARRY_FORWARD;
+ SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]);
+ COMBA_STORE(b[26]);
+
+ /* output 27 */
+ CARRY_FORWARD;
+ SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]);
+ COMBA_STORE(b[27]);
+
+ /* output 28 */
+ CARRY_FORWARD;
+ SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]);
+ COMBA_STORE(b[28]);
+
+ /* output 29 */
+ CARRY_FORWARD;
+ SQRADD2(a[14], a[15]);
+ COMBA_STORE(b[29]);
+
+ /* output 30 */
+ CARRY_FORWARD;
+ SQRADD(a[15], a[15]);
+ COMBA_STORE(b[30]);
+ COMBA_STORE2(b[31]);
+ COMBA_FINI;
+
+ B->used = 32;
+ B->sign = FP_ZPOS;
+ memcpy(B->dp, b, 32 * sizeof(fp_digit));
+ fp_clamp(B);
+}
+
+
+#endif
+#ifdef TFM_HUGE
+void fp_sqr_comba32(fp_int *A, fp_int *B)
+{
+ fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0],a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+
+ /* output 15 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
+ COMBA_STORE(b[15]);
+
+ /* output 16 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
+ COMBA_STORE(b[16]);
+
+ /* output 17 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
+ COMBA_STORE(b[17]);
+
+ /* output 18 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
+ COMBA_STORE(b[18]);
+
+ /* output 19 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
+ COMBA_STORE(b[19]);
+
+ /* output 20 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
+ COMBA_STORE(b[20]);
+
+ /* output 21 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
+ COMBA_STORE(b[21]);
+
+ /* output 22 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
+ COMBA_STORE(b[22]);
+
+ /* output 23 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
+ COMBA_STORE(b[23]);
+
+ /* output 24 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
+ COMBA_STORE(b[24]);
+
+ /* output 25 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
+ COMBA_STORE(b[25]);
+
+ /* output 26 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]);
+ COMBA_STORE(b[26]);
+
+ /* output 27 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB;
+ COMBA_STORE(b[27]);
+
+ /* output 28 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]);
+ COMBA_STORE(b[28]);
+
+ /* output 29 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB;
+ COMBA_STORE(b[29]);
+
+ /* output 30 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]);
+ COMBA_STORE(b[30]);
+
+ /* output 31 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB;
+ COMBA_STORE(b[31]);
+
+ /* output 32 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]);
+ COMBA_STORE(b[32]);
+
+ /* output 33 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB;
+ COMBA_STORE(b[33]);
+
+ /* output 34 */
+ CARRY_FORWARD;
+ SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]);
+ COMBA_STORE(b[34]);
+
+ /* output 35 */
+ CARRY_FORWARD;
+ SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB;
+ COMBA_STORE(b[35]);
+
+ /* output 36 */
+ CARRY_FORWARD;
+ SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]);
+ COMBA_STORE(b[36]);
+
+ /* output 37 */
+ CARRY_FORWARD;
+ SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB;
+ COMBA_STORE(b[37]);
+
+ /* output 38 */
+ CARRY_FORWARD;
+ SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]);
+ COMBA_STORE(b[38]);
+
+ /* output 39 */
+ CARRY_FORWARD;
+ SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB;
+ COMBA_STORE(b[39]);
+
+ /* output 40 */
+ CARRY_FORWARD;
+ SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]);
+ COMBA_STORE(b[40]);
+
+ /* output 41 */
+ CARRY_FORWARD;
+ SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB;
+ COMBA_STORE(b[41]);
+
+ /* output 42 */
+ CARRY_FORWARD;
+ SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]);
+ COMBA_STORE(b[42]);
+
+ /* output 43 */
+ CARRY_FORWARD;
+ SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB;
+ COMBA_STORE(b[43]);
+
+ /* output 44 */
+ CARRY_FORWARD;
+ SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]);
+ COMBA_STORE(b[44]);
+
+ /* output 45 */
+ CARRY_FORWARD;
+ SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB;
+ COMBA_STORE(b[45]);
+
+ /* output 46 */
+ CARRY_FORWARD;
+ SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]);
+ COMBA_STORE(b[46]);
+
+ /* output 47 */
+ CARRY_FORWARD;
+ SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB;
+ COMBA_STORE(b[47]);
+
+ /* output 48 */
+ CARRY_FORWARD;
+ SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]);
+ COMBA_STORE(b[48]);
+
+ /* output 49 */
+ CARRY_FORWARD;
+ SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB;
+ COMBA_STORE(b[49]);
+
+ /* output 50 */
+ CARRY_FORWARD;
+ SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]);
+ COMBA_STORE(b[50]);
+
+ /* output 51 */
+ CARRY_FORWARD;
+ SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB;
+ COMBA_STORE(b[51]);
+
+ /* output 52 */
+ CARRY_FORWARD;
+ SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]);
+ COMBA_STORE(b[52]);
+
+ /* output 53 */
+ CARRY_FORWARD;
+ SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB;
+ COMBA_STORE(b[53]);
+
+ /* output 54 */
+ CARRY_FORWARD;
+ SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]);
+ COMBA_STORE(b[54]);
+
+ /* output 55 */
+ CARRY_FORWARD;
+ SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB;
+ COMBA_STORE(b[55]);
+
+ /* output 56 */
+ CARRY_FORWARD;
+ SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]);
+ COMBA_STORE(b[56]);
+
+ /* output 57 */
+ CARRY_FORWARD;
+ SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB;
+ COMBA_STORE(b[57]);
+
+ /* output 58 */
+ CARRY_FORWARD;
+ SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]);
+ COMBA_STORE(b[58]);
+
+ /* output 59 */
+ CARRY_FORWARD;
+ SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]);
+ COMBA_STORE(b[59]);
+
+ /* output 60 */
+ CARRY_FORWARD;
+ SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]);
+ COMBA_STORE(b[60]);
+
+ /* output 61 */
+ CARRY_FORWARD;
+ SQRADD2(a[30], a[31]);
+ COMBA_STORE(b[61]);
+
+ /* output 62 */
+ CARRY_FORWARD;
+ SQRADD(a[31], a[31]);
+ COMBA_STORE(b[62]);
+ COMBA_STORE2(b[63]);
+ COMBA_FINI;
+
+ B->used = 64;
+ B->sign = FP_ZPOS;
+ memcpy(B->dp, b, 64 * sizeof(fp_digit));
+ fp_clamp(B);
+}
+
+
+#endif
+
+#ifdef TFM_HUGE
+void fp_sqr_comba64(fp_int *A, fp_int *B)
+{
+ fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0],a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+
+ /* output 15 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
+ COMBA_STORE(b[15]);
+
+ /* output 16 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
+ COMBA_STORE(b[16]);
+
+ /* output 17 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
+ COMBA_STORE(b[17]);
+
+ /* output 18 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
+ COMBA_STORE(b[18]);
+
+ /* output 19 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
+ COMBA_STORE(b[19]);
+
+ /* output 20 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
+ COMBA_STORE(b[20]);
+
+ /* output 21 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
+ COMBA_STORE(b[21]);
+
+ /* output 22 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
+ COMBA_STORE(b[22]);
+
+ /* output 23 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
+ COMBA_STORE(b[23]);
+
+ /* output 24 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
+ COMBA_STORE(b[24]);
+
+ /* output 25 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
+ COMBA_STORE(b[25]);
+
+ /* output 26 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]);
+ COMBA_STORE(b[26]);
+
+ /* output 27 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB;
+ COMBA_STORE(b[27]);
+
+ /* output 28 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]);
+ COMBA_STORE(b[28]);
+
+ /* output 29 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB;
+ COMBA_STORE(b[29]);
+
+ /* output 30 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]);
+ COMBA_STORE(b[30]);
+
+ /* output 31 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB;
+ COMBA_STORE(b[31]);
+
+ /* output 32 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[32]); SQRADDAC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]);
+ COMBA_STORE(b[32]);
+
+ /* output 33 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[33]); SQRADDAC(a[1], a[32]); SQRADDAC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB;
+ COMBA_STORE(b[33]);
+
+ /* output 34 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[34]); SQRADDAC(a[1], a[33]); SQRADDAC(a[2], a[32]); SQRADDAC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]);
+ COMBA_STORE(b[34]);
+
+ /* output 35 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[35]); SQRADDAC(a[1], a[34]); SQRADDAC(a[2], a[33]); SQRADDAC(a[3], a[32]); SQRADDAC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB;
+ COMBA_STORE(b[35]);
+
+ /* output 36 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[36]); SQRADDAC(a[1], a[35]); SQRADDAC(a[2], a[34]); SQRADDAC(a[3], a[33]); SQRADDAC(a[4], a[32]); SQRADDAC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]);
+ COMBA_STORE(b[36]);
+
+ /* output 37 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[37]); SQRADDAC(a[1], a[36]); SQRADDAC(a[2], a[35]); SQRADDAC(a[3], a[34]); SQRADDAC(a[4], a[33]); SQRADDAC(a[5], a[32]); SQRADDAC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB;
+ COMBA_STORE(b[37]);
+
+ /* output 38 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[38]); SQRADDAC(a[1], a[37]); SQRADDAC(a[2], a[36]); SQRADDAC(a[3], a[35]); SQRADDAC(a[4], a[34]); SQRADDAC(a[5], a[33]); SQRADDAC(a[6], a[32]); SQRADDAC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]);
+ COMBA_STORE(b[38]);
+
+ /* output 39 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[39]); SQRADDAC(a[1], a[38]); SQRADDAC(a[2], a[37]); SQRADDAC(a[3], a[36]); SQRADDAC(a[4], a[35]); SQRADDAC(a[5], a[34]); SQRADDAC(a[6], a[33]); SQRADDAC(a[7], a[32]); SQRADDAC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB;
+ COMBA_STORE(b[39]);
+
+ /* output 40 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[40]); SQRADDAC(a[1], a[39]); SQRADDAC(a[2], a[38]); SQRADDAC(a[3], a[37]); SQRADDAC(a[4], a[36]); SQRADDAC(a[5], a[35]); SQRADDAC(a[6], a[34]); SQRADDAC(a[7], a[33]); SQRADDAC(a[8], a[32]); SQRADDAC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]);
+ COMBA_STORE(b[40]);
+
+ /* output 41 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[41]); SQRADDAC(a[1], a[40]); SQRADDAC(a[2], a[39]); SQRADDAC(a[3], a[38]); SQRADDAC(a[4], a[37]); SQRADDAC(a[5], a[36]); SQRADDAC(a[6], a[35]); SQRADDAC(a[7], a[34]); SQRADDAC(a[8], a[33]); SQRADDAC(a[9], a[32]); SQRADDAC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB;
+ COMBA_STORE(b[41]);
+
+ /* output 42 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[42]); SQRADDAC(a[1], a[41]); SQRADDAC(a[2], a[40]); SQRADDAC(a[3], a[39]); SQRADDAC(a[4], a[38]); SQRADDAC(a[5], a[37]); SQRADDAC(a[6], a[36]); SQRADDAC(a[7], a[35]); SQRADDAC(a[8], a[34]); SQRADDAC(a[9], a[33]); SQRADDAC(a[10], a[32]); SQRADDAC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]);
+ COMBA_STORE(b[42]);
+
+ /* output 43 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[43]); SQRADDAC(a[1], a[42]); SQRADDAC(a[2], a[41]); SQRADDAC(a[3], a[40]); SQRADDAC(a[4], a[39]); SQRADDAC(a[5], a[38]); SQRADDAC(a[6], a[37]); SQRADDAC(a[7], a[36]); SQRADDAC(a[8], a[35]); SQRADDAC(a[9], a[34]); SQRADDAC(a[10], a[33]); SQRADDAC(a[11], a[32]); SQRADDAC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB;
+ COMBA_STORE(b[43]);
+
+ /* output 44 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[44]); SQRADDAC(a[1], a[43]); SQRADDAC(a[2], a[42]); SQRADDAC(a[3], a[41]); SQRADDAC(a[4], a[40]); SQRADDAC(a[5], a[39]); SQRADDAC(a[6], a[38]); SQRADDAC(a[7], a[37]); SQRADDAC(a[8], a[36]); SQRADDAC(a[9], a[35]); SQRADDAC(a[10], a[34]); SQRADDAC(a[11], a[33]); SQRADDAC(a[12], a[32]); SQRADDAC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]);
+ COMBA_STORE(b[44]);
+
+ /* output 45 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[45]); SQRADDAC(a[1], a[44]); SQRADDAC(a[2], a[43]); SQRADDAC(a[3], a[42]); SQRADDAC(a[4], a[41]); SQRADDAC(a[5], a[40]); SQRADDAC(a[6], a[39]); SQRADDAC(a[7], a[38]); SQRADDAC(a[8], a[37]); SQRADDAC(a[9], a[36]); SQRADDAC(a[10], a[35]); SQRADDAC(a[11], a[34]); SQRADDAC(a[12], a[33]); SQRADDAC(a[13], a[32]); SQRADDAC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB;
+ COMBA_STORE(b[45]);
+
+ /* output 46 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[46]); SQRADDAC(a[1], a[45]); SQRADDAC(a[2], a[44]); SQRADDAC(a[3], a[43]); SQRADDAC(a[4], a[42]); SQRADDAC(a[5], a[41]); SQRADDAC(a[6], a[40]); SQRADDAC(a[7], a[39]); SQRADDAC(a[8], a[38]); SQRADDAC(a[9], a[37]); SQRADDAC(a[10], a[36]); SQRADDAC(a[11], a[35]); SQRADDAC(a[12], a[34]); SQRADDAC(a[13], a[33]); SQRADDAC(a[14], a[32]); SQRADDAC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]);
+ COMBA_STORE(b[46]);
+
+ /* output 47 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[47]); SQRADDAC(a[1], a[46]); SQRADDAC(a[2], a[45]); SQRADDAC(a[3], a[44]); SQRADDAC(a[4], a[43]); SQRADDAC(a[5], a[42]); SQRADDAC(a[6], a[41]); SQRADDAC(a[7], a[40]); SQRADDAC(a[8], a[39]); SQRADDAC(a[9], a[38]); SQRADDAC(a[10], a[37]); SQRADDAC(a[11], a[36]); SQRADDAC(a[12], a[35]); SQRADDAC(a[13], a[34]); SQRADDAC(a[14], a[33]); SQRADDAC(a[15], a[32]); SQRADDAC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB;
+ COMBA_STORE(b[47]);
+
+ /* output 48 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[48]); SQRADDAC(a[1], a[47]); SQRADDAC(a[2], a[46]); SQRADDAC(a[3], a[45]); SQRADDAC(a[4], a[44]); SQRADDAC(a[5], a[43]); SQRADDAC(a[6], a[42]); SQRADDAC(a[7], a[41]); SQRADDAC(a[8], a[40]); SQRADDAC(a[9], a[39]); SQRADDAC(a[10], a[38]); SQRADDAC(a[11], a[37]); SQRADDAC(a[12], a[36]); SQRADDAC(a[13], a[35]); SQRADDAC(a[14], a[34]); SQRADDAC(a[15], a[33]); SQRADDAC(a[16], a[32]); SQRADDAC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]);
+ COMBA_STORE(b[48]);
+
+ /* output 49 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[49]); SQRADDAC(a[1], a[48]); SQRADDAC(a[2], a[47]); SQRADDAC(a[3], a[46]); SQRADDAC(a[4], a[45]); SQRADDAC(a[5], a[44]); SQRADDAC(a[6], a[43]); SQRADDAC(a[7], a[42]); SQRADDAC(a[8], a[41]); SQRADDAC(a[9], a[40]); SQRADDAC(a[10], a[39]); SQRADDAC(a[11], a[38]); SQRADDAC(a[12], a[37]); SQRADDAC(a[13], a[36]); SQRADDAC(a[14], a[35]); SQRADDAC(a[15], a[34]); SQRADDAC(a[16], a[33]); SQRADDAC(a[17], a[32]); SQRADDAC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB;
+ COMBA_STORE(b[49]);
+
+ /* output 50 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[50]); SQRADDAC(a[1], a[49]); SQRADDAC(a[2], a[48]); SQRADDAC(a[3], a[47]); SQRADDAC(a[4], a[46]); SQRADDAC(a[5], a[45]); SQRADDAC(a[6], a[44]); SQRADDAC(a[7], a[43]); SQRADDAC(a[8], a[42]); SQRADDAC(a[9], a[41]); SQRADDAC(a[10], a[40]); SQRADDAC(a[11], a[39]); SQRADDAC(a[12], a[38]); SQRADDAC(a[13], a[37]); SQRADDAC(a[14], a[36]); SQRADDAC(a[15], a[35]); SQRADDAC(a[16], a[34]); SQRADDAC(a[17], a[33]); SQRADDAC(a[18], a[32]); SQRADDAC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]);
+ COMBA_STORE(b[50]);
+
+ /* output 51 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[51]); SQRADDAC(a[1], a[50]); SQRADDAC(a[2], a[49]); SQRADDAC(a[3], a[48]); SQRADDAC(a[4], a[47]); SQRADDAC(a[5], a[46]); SQRADDAC(a[6], a[45]); SQRADDAC(a[7], a[44]); SQRADDAC(a[8], a[43]); SQRADDAC(a[9], a[42]); SQRADDAC(a[10], a[41]); SQRADDAC(a[11], a[40]); SQRADDAC(a[12], a[39]); SQRADDAC(a[13], a[38]); SQRADDAC(a[14], a[37]); SQRADDAC(a[15], a[36]); SQRADDAC(a[16], a[35]); SQRADDAC(a[17], a[34]); SQRADDAC(a[18], a[33]); SQRADDAC(a[19], a[32]); SQRADDAC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB;
+ COMBA_STORE(b[51]);
+
+ /* output 52 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[52]); SQRADDAC(a[1], a[51]); SQRADDAC(a[2], a[50]); SQRADDAC(a[3], a[49]); SQRADDAC(a[4], a[48]); SQRADDAC(a[5], a[47]); SQRADDAC(a[6], a[46]); SQRADDAC(a[7], a[45]); SQRADDAC(a[8], a[44]); SQRADDAC(a[9], a[43]); SQRADDAC(a[10], a[42]); SQRADDAC(a[11], a[41]); SQRADDAC(a[12], a[40]); SQRADDAC(a[13], a[39]); SQRADDAC(a[14], a[38]); SQRADDAC(a[15], a[37]); SQRADDAC(a[16], a[36]); SQRADDAC(a[17], a[35]); SQRADDAC(a[18], a[34]); SQRADDAC(a[19], a[33]); SQRADDAC(a[20], a[32]); SQRADDAC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]);
+ COMBA_STORE(b[52]);
+
+ /* output 53 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[53]); SQRADDAC(a[1], a[52]); SQRADDAC(a[2], a[51]); SQRADDAC(a[3], a[50]); SQRADDAC(a[4], a[49]); SQRADDAC(a[5], a[48]); SQRADDAC(a[6], a[47]); SQRADDAC(a[7], a[46]); SQRADDAC(a[8], a[45]); SQRADDAC(a[9], a[44]); SQRADDAC(a[10], a[43]); SQRADDAC(a[11], a[42]); SQRADDAC(a[12], a[41]); SQRADDAC(a[13], a[40]); SQRADDAC(a[14], a[39]); SQRADDAC(a[15], a[38]); SQRADDAC(a[16], a[37]); SQRADDAC(a[17], a[36]); SQRADDAC(a[18], a[35]); SQRADDAC(a[19], a[34]); SQRADDAC(a[20], a[33]); SQRADDAC(a[21], a[32]); SQRADDAC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB;
+ COMBA_STORE(b[53]);
+
+ /* output 54 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[54]); SQRADDAC(a[1], a[53]); SQRADDAC(a[2], a[52]); SQRADDAC(a[3], a[51]); SQRADDAC(a[4], a[50]); SQRADDAC(a[5], a[49]); SQRADDAC(a[6], a[48]); SQRADDAC(a[7], a[47]); SQRADDAC(a[8], a[46]); SQRADDAC(a[9], a[45]); SQRADDAC(a[10], a[44]); SQRADDAC(a[11], a[43]); SQRADDAC(a[12], a[42]); SQRADDAC(a[13], a[41]); SQRADDAC(a[14], a[40]); SQRADDAC(a[15], a[39]); SQRADDAC(a[16], a[38]); SQRADDAC(a[17], a[37]); SQRADDAC(a[18], a[36]); SQRADDAC(a[19], a[35]); SQRADDAC(a[20], a[34]); SQRADDAC(a[21], a[33]); SQRADDAC(a[22], a[32]); SQRADDAC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]);
+ COMBA_STORE(b[54]);
+
+ /* output 55 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[55]); SQRADDAC(a[1], a[54]); SQRADDAC(a[2], a[53]); SQRADDAC(a[3], a[52]); SQRADDAC(a[4], a[51]); SQRADDAC(a[5], a[50]); SQRADDAC(a[6], a[49]); SQRADDAC(a[7], a[48]); SQRADDAC(a[8], a[47]); SQRADDAC(a[9], a[46]); SQRADDAC(a[10], a[45]); SQRADDAC(a[11], a[44]); SQRADDAC(a[12], a[43]); SQRADDAC(a[13], a[42]); SQRADDAC(a[14], a[41]); SQRADDAC(a[15], a[40]); SQRADDAC(a[16], a[39]); SQRADDAC(a[17], a[38]); SQRADDAC(a[18], a[37]); SQRADDAC(a[19], a[36]); SQRADDAC(a[20], a[35]); SQRADDAC(a[21], a[34]); SQRADDAC(a[22], a[33]); SQRADDAC(a[23], a[32]); SQRADDAC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB;
+ COMBA_STORE(b[55]);
+
+ /* output 56 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[56]); SQRADDAC(a[1], a[55]); SQRADDAC(a[2], a[54]); SQRADDAC(a[3], a[53]); SQRADDAC(a[4], a[52]); SQRADDAC(a[5], a[51]); SQRADDAC(a[6], a[50]); SQRADDAC(a[7], a[49]); SQRADDAC(a[8], a[48]); SQRADDAC(a[9], a[47]); SQRADDAC(a[10], a[46]); SQRADDAC(a[11], a[45]); SQRADDAC(a[12], a[44]); SQRADDAC(a[13], a[43]); SQRADDAC(a[14], a[42]); SQRADDAC(a[15], a[41]); SQRADDAC(a[16], a[40]); SQRADDAC(a[17], a[39]); SQRADDAC(a[18], a[38]); SQRADDAC(a[19], a[37]); SQRADDAC(a[20], a[36]); SQRADDAC(a[21], a[35]); SQRADDAC(a[22], a[34]); SQRADDAC(a[23], a[33]); SQRADDAC(a[24], a[32]); SQRADDAC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]);
+ COMBA_STORE(b[56]);
+
+ /* output 57 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[57]); SQRADDAC(a[1], a[56]); SQRADDAC(a[2], a[55]); SQRADDAC(a[3], a[54]); SQRADDAC(a[4], a[53]); SQRADDAC(a[5], a[52]); SQRADDAC(a[6], a[51]); SQRADDAC(a[7], a[50]); SQRADDAC(a[8], a[49]); SQRADDAC(a[9], a[48]); SQRADDAC(a[10], a[47]); SQRADDAC(a[11], a[46]); SQRADDAC(a[12], a[45]); SQRADDAC(a[13], a[44]); SQRADDAC(a[14], a[43]); SQRADDAC(a[15], a[42]); SQRADDAC(a[16], a[41]); SQRADDAC(a[17], a[40]); SQRADDAC(a[18], a[39]); SQRADDAC(a[19], a[38]); SQRADDAC(a[20], a[37]); SQRADDAC(a[21], a[36]); SQRADDAC(a[22], a[35]); SQRADDAC(a[23], a[34]); SQRADDAC(a[24], a[33]); SQRADDAC(a[25], a[32]); SQRADDAC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB;
+ COMBA_STORE(b[57]);
+
+ /* output 58 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[58]); SQRADDAC(a[1], a[57]); SQRADDAC(a[2], a[56]); SQRADDAC(a[3], a[55]); SQRADDAC(a[4], a[54]); SQRADDAC(a[5], a[53]); SQRADDAC(a[6], a[52]); SQRADDAC(a[7], a[51]); SQRADDAC(a[8], a[50]); SQRADDAC(a[9], a[49]); SQRADDAC(a[10], a[48]); SQRADDAC(a[11], a[47]); SQRADDAC(a[12], a[46]); SQRADDAC(a[13], a[45]); SQRADDAC(a[14], a[44]); SQRADDAC(a[15], a[43]); SQRADDAC(a[16], a[42]); SQRADDAC(a[17], a[41]); SQRADDAC(a[18], a[40]); SQRADDAC(a[19], a[39]); SQRADDAC(a[20], a[38]); SQRADDAC(a[21], a[37]); SQRADDAC(a[22], a[36]); SQRADDAC(a[23], a[35]); SQRADDAC(a[24], a[34]); SQRADDAC(a[25], a[33]); SQRADDAC(a[26], a[32]); SQRADDAC(a[27], a[31]); SQRADDAC(a[28], a[30]); SQRADDDB; SQRADD(a[29], a[29]);
+ COMBA_STORE(b[58]);
+
+ /* output 59 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[59]); SQRADDAC(a[1], a[58]); SQRADDAC(a[2], a[57]); SQRADDAC(a[3], a[56]); SQRADDAC(a[4], a[55]); SQRADDAC(a[5], a[54]); SQRADDAC(a[6], a[53]); SQRADDAC(a[7], a[52]); SQRADDAC(a[8], a[51]); SQRADDAC(a[9], a[50]); SQRADDAC(a[10], a[49]); SQRADDAC(a[11], a[48]); SQRADDAC(a[12], a[47]); SQRADDAC(a[13], a[46]); SQRADDAC(a[14], a[45]); SQRADDAC(a[15], a[44]); SQRADDAC(a[16], a[43]); SQRADDAC(a[17], a[42]); SQRADDAC(a[18], a[41]); SQRADDAC(a[19], a[40]); SQRADDAC(a[20], a[39]); SQRADDAC(a[21], a[38]); SQRADDAC(a[22], a[37]); SQRADDAC(a[23], a[36]); SQRADDAC(a[24], a[35]); SQRADDAC(a[25], a[34]); SQRADDAC(a[26], a[33]); SQRADDAC(a[27], a[32]); SQRADDAC(a[28], a[31]); SQRADDAC(a[29], a[30]); SQRADDDB;
+ COMBA_STORE(b[59]);
+
+ /* output 60 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[60]); SQRADDAC(a[1], a[59]); SQRADDAC(a[2], a[58]); SQRADDAC(a[3], a[57]); SQRADDAC(a[4], a[56]); SQRADDAC(a[5], a[55]); SQRADDAC(a[6], a[54]); SQRADDAC(a[7], a[53]); SQRADDAC(a[8], a[52]); SQRADDAC(a[9], a[51]); SQRADDAC(a[10], a[50]); SQRADDAC(a[11], a[49]); SQRADDAC(a[12], a[48]); SQRADDAC(a[13], a[47]); SQRADDAC(a[14], a[46]); SQRADDAC(a[15], a[45]); SQRADDAC(a[16], a[44]); SQRADDAC(a[17], a[43]); SQRADDAC(a[18], a[42]); SQRADDAC(a[19], a[41]); SQRADDAC(a[20], a[40]); SQRADDAC(a[21], a[39]); SQRADDAC(a[22], a[38]); SQRADDAC(a[23], a[37]); SQRADDAC(a[24], a[36]); SQRADDAC(a[25], a[35]); SQRADDAC(a[26], a[34]); SQRADDAC(a[27], a[33]); SQRADDAC(a[28], a[32]); SQRADDAC(a[29], a[31]); SQRADDDB; SQRADD(a[30], a[30]);
+ COMBA_STORE(b[60]);
+
+ /* output 61 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[61]); SQRADDAC(a[1], a[60]); SQRADDAC(a[2], a[59]); SQRADDAC(a[3], a[58]); SQRADDAC(a[4], a[57]); SQRADDAC(a[5], a[56]); SQRADDAC(a[6], a[55]); SQRADDAC(a[7], a[54]); SQRADDAC(a[8], a[53]); SQRADDAC(a[9], a[52]); SQRADDAC(a[10], a[51]); SQRADDAC(a[11], a[50]); SQRADDAC(a[12], a[49]); SQRADDAC(a[13], a[48]); SQRADDAC(a[14], a[47]); SQRADDAC(a[15], a[46]); SQRADDAC(a[16], a[45]); SQRADDAC(a[17], a[44]); SQRADDAC(a[18], a[43]); SQRADDAC(a[19], a[42]); SQRADDAC(a[20], a[41]); SQRADDAC(a[21], a[40]); SQRADDAC(a[22], a[39]); SQRADDAC(a[23], a[38]); SQRADDAC(a[24], a[37]); SQRADDAC(a[25], a[36]); SQRADDAC(a[26], a[35]); SQRADDAC(a[27], a[34]); SQRADDAC(a[28], a[33]); SQRADDAC(a[29], a[32]); SQRADDAC(a[30], a[31]); SQRADDDB;
+ COMBA_STORE(b[61]);
+
+ /* output 62 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[62]); SQRADDAC(a[1], a[61]); SQRADDAC(a[2], a[60]); SQRADDAC(a[3], a[59]); SQRADDAC(a[4], a[58]); SQRADDAC(a[5], a[57]); SQRADDAC(a[6], a[56]); SQRADDAC(a[7], a[55]); SQRADDAC(a[8], a[54]); SQRADDAC(a[9], a[53]); SQRADDAC(a[10], a[52]); SQRADDAC(a[11], a[51]); SQRADDAC(a[12], a[50]); SQRADDAC(a[13], a[49]); SQRADDAC(a[14], a[48]); SQRADDAC(a[15], a[47]); SQRADDAC(a[16], a[46]); SQRADDAC(a[17], a[45]); SQRADDAC(a[18], a[44]); SQRADDAC(a[19], a[43]); SQRADDAC(a[20], a[42]); SQRADDAC(a[21], a[41]); SQRADDAC(a[22], a[40]); SQRADDAC(a[23], a[39]); SQRADDAC(a[24], a[38]); SQRADDAC(a[25], a[37]); SQRADDAC(a[26], a[36]); SQRADDAC(a[27], a[35]); SQRADDAC(a[28], a[34]); SQRADDAC(a[29], a[33]); SQRADDAC(a[30], a[32]); SQRADDDB; SQRADD(a[31], a[31]);
+ COMBA_STORE(b[62]);
+
+ /* output 63 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[63]); SQRADDAC(a[1], a[62]); SQRADDAC(a[2], a[61]); SQRADDAC(a[3], a[60]); SQRADDAC(a[4], a[59]); SQRADDAC(a[5], a[58]); SQRADDAC(a[6], a[57]); SQRADDAC(a[7], a[56]); SQRADDAC(a[8], a[55]); SQRADDAC(a[9], a[54]); SQRADDAC(a[10], a[53]); SQRADDAC(a[11], a[52]); SQRADDAC(a[12], a[51]); SQRADDAC(a[13], a[50]); SQRADDAC(a[14], a[49]); SQRADDAC(a[15], a[48]); SQRADDAC(a[16], a[47]); SQRADDAC(a[17], a[46]); SQRADDAC(a[18], a[45]); SQRADDAC(a[19], a[44]); SQRADDAC(a[20], a[43]); SQRADDAC(a[21], a[42]); SQRADDAC(a[22], a[41]); SQRADDAC(a[23], a[40]); SQRADDAC(a[24], a[39]); SQRADDAC(a[25], a[38]); SQRADDAC(a[26], a[37]); SQRADDAC(a[27], a[36]); SQRADDAC(a[28], a[35]); SQRADDAC(a[29], a[34]); SQRADDAC(a[30], a[33]); SQRADDAC(a[31], a[32]); SQRADDDB;
+ COMBA_STORE(b[63]);
+
+ /* output 64 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[63]); SQRADDAC(a[2], a[62]); SQRADDAC(a[3], a[61]); SQRADDAC(a[4], a[60]); SQRADDAC(a[5], a[59]); SQRADDAC(a[6], a[58]); SQRADDAC(a[7], a[57]); SQRADDAC(a[8], a[56]); SQRADDAC(a[9], a[55]); SQRADDAC(a[10], a[54]); SQRADDAC(a[11], a[53]); SQRADDAC(a[12], a[52]); SQRADDAC(a[13], a[51]); SQRADDAC(a[14], a[50]); SQRADDAC(a[15], a[49]); SQRADDAC(a[16], a[48]); SQRADDAC(a[17], a[47]); SQRADDAC(a[18], a[46]); SQRADDAC(a[19], a[45]); SQRADDAC(a[20], a[44]); SQRADDAC(a[21], a[43]); SQRADDAC(a[22], a[42]); SQRADDAC(a[23], a[41]); SQRADDAC(a[24], a[40]); SQRADDAC(a[25], a[39]); SQRADDAC(a[26], a[38]); SQRADDAC(a[27], a[37]); SQRADDAC(a[28], a[36]); SQRADDAC(a[29], a[35]); SQRADDAC(a[30], a[34]); SQRADDAC(a[31], a[33]); SQRADDDB; SQRADD(a[32], a[32]);
+ COMBA_STORE(b[64]);
+
+ /* output 65 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[63]); SQRADDAC(a[3], a[62]); SQRADDAC(a[4], a[61]); SQRADDAC(a[5], a[60]); SQRADDAC(a[6], a[59]); SQRADDAC(a[7], a[58]); SQRADDAC(a[8], a[57]); SQRADDAC(a[9], a[56]); SQRADDAC(a[10], a[55]); SQRADDAC(a[11], a[54]); SQRADDAC(a[12], a[53]); SQRADDAC(a[13], a[52]); SQRADDAC(a[14], a[51]); SQRADDAC(a[15], a[50]); SQRADDAC(a[16], a[49]); SQRADDAC(a[17], a[48]); SQRADDAC(a[18], a[47]); SQRADDAC(a[19], a[46]); SQRADDAC(a[20], a[45]); SQRADDAC(a[21], a[44]); SQRADDAC(a[22], a[43]); SQRADDAC(a[23], a[42]); SQRADDAC(a[24], a[41]); SQRADDAC(a[25], a[40]); SQRADDAC(a[26], a[39]); SQRADDAC(a[27], a[38]); SQRADDAC(a[28], a[37]); SQRADDAC(a[29], a[36]); SQRADDAC(a[30], a[35]); SQRADDAC(a[31], a[34]); SQRADDAC(a[32], a[33]); SQRADDDB;
+ COMBA_STORE(b[65]);
+
+ /* output 66 */
+ CARRY_FORWARD;
+ SQRADDSC(a[3], a[63]); SQRADDAC(a[4], a[62]); SQRADDAC(a[5], a[61]); SQRADDAC(a[6], a[60]); SQRADDAC(a[7], a[59]); SQRADDAC(a[8], a[58]); SQRADDAC(a[9], a[57]); SQRADDAC(a[10], a[56]); SQRADDAC(a[11], a[55]); SQRADDAC(a[12], a[54]); SQRADDAC(a[13], a[53]); SQRADDAC(a[14], a[52]); SQRADDAC(a[15], a[51]); SQRADDAC(a[16], a[50]); SQRADDAC(a[17], a[49]); SQRADDAC(a[18], a[48]); SQRADDAC(a[19], a[47]); SQRADDAC(a[20], a[46]); SQRADDAC(a[21], a[45]); SQRADDAC(a[22], a[44]); SQRADDAC(a[23], a[43]); SQRADDAC(a[24], a[42]); SQRADDAC(a[25], a[41]); SQRADDAC(a[26], a[40]); SQRADDAC(a[27], a[39]); SQRADDAC(a[28], a[38]); SQRADDAC(a[29], a[37]); SQRADDAC(a[30], a[36]); SQRADDAC(a[31], a[35]); SQRADDAC(a[32], a[34]); SQRADDDB; SQRADD(a[33], a[33]);
+ COMBA_STORE(b[66]);
+
+ /* output 67 */
+ CARRY_FORWARD;
+ SQRADDSC(a[4], a[63]); SQRADDAC(a[5], a[62]); SQRADDAC(a[6], a[61]); SQRADDAC(a[7], a[60]); SQRADDAC(a[8], a[59]); SQRADDAC(a[9], a[58]); SQRADDAC(a[10], a[57]); SQRADDAC(a[11], a[56]); SQRADDAC(a[12], a[55]); SQRADDAC(a[13], a[54]); SQRADDAC(a[14], a[53]); SQRADDAC(a[15], a[52]); SQRADDAC(a[16], a[51]); SQRADDAC(a[17], a[50]); SQRADDAC(a[18], a[49]); SQRADDAC(a[19], a[48]); SQRADDAC(a[20], a[47]); SQRADDAC(a[21], a[46]); SQRADDAC(a[22], a[45]); SQRADDAC(a[23], a[44]); SQRADDAC(a[24], a[43]); SQRADDAC(a[25], a[42]); SQRADDAC(a[26], a[41]); SQRADDAC(a[27], a[40]); SQRADDAC(a[28], a[39]); SQRADDAC(a[29], a[38]); SQRADDAC(a[30], a[37]); SQRADDAC(a[31], a[36]); SQRADDAC(a[32], a[35]); SQRADDAC(a[33], a[34]); SQRADDDB;
+ COMBA_STORE(b[67]);
+
+ /* output 68 */
+ CARRY_FORWARD;
+ SQRADDSC(a[5], a[63]); SQRADDAC(a[6], a[62]); SQRADDAC(a[7], a[61]); SQRADDAC(a[8], a[60]); SQRADDAC(a[9], a[59]); SQRADDAC(a[10], a[58]); SQRADDAC(a[11], a[57]); SQRADDAC(a[12], a[56]); SQRADDAC(a[13], a[55]); SQRADDAC(a[14], a[54]); SQRADDAC(a[15], a[53]); SQRADDAC(a[16], a[52]); SQRADDAC(a[17], a[51]); SQRADDAC(a[18], a[50]); SQRADDAC(a[19], a[49]); SQRADDAC(a[20], a[48]); SQRADDAC(a[21], a[47]); SQRADDAC(a[22], a[46]); SQRADDAC(a[23], a[45]); SQRADDAC(a[24], a[44]); SQRADDAC(a[25], a[43]); SQRADDAC(a[26], a[42]); SQRADDAC(a[27], a[41]); SQRADDAC(a[28], a[40]); SQRADDAC(a[29], a[39]); SQRADDAC(a[30], a[38]); SQRADDAC(a[31], a[37]); SQRADDAC(a[32], a[36]); SQRADDAC(a[33], a[35]); SQRADDDB; SQRADD(a[34], a[34]);
+ COMBA_STORE(b[68]);
+
+ /* output 69 */
+ CARRY_FORWARD;
+ SQRADDSC(a[6], a[63]); SQRADDAC(a[7], a[62]); SQRADDAC(a[8], a[61]); SQRADDAC(a[9], a[60]); SQRADDAC(a[10], a[59]); SQRADDAC(a[11], a[58]); SQRADDAC(a[12], a[57]); SQRADDAC(a[13], a[56]); SQRADDAC(a[14], a[55]); SQRADDAC(a[15], a[54]); SQRADDAC(a[16], a[53]); SQRADDAC(a[17], a[52]); SQRADDAC(a[18], a[51]); SQRADDAC(a[19], a[50]); SQRADDAC(a[20], a[49]); SQRADDAC(a[21], a[48]); SQRADDAC(a[22], a[47]); SQRADDAC(a[23], a[46]); SQRADDAC(a[24], a[45]); SQRADDAC(a[25], a[44]); SQRADDAC(a[26], a[43]); SQRADDAC(a[27], a[42]); SQRADDAC(a[28], a[41]); SQRADDAC(a[29], a[40]); SQRADDAC(a[30], a[39]); SQRADDAC(a[31], a[38]); SQRADDAC(a[32], a[37]); SQRADDAC(a[33], a[36]); SQRADDAC(a[34], a[35]); SQRADDDB;
+ COMBA_STORE(b[69]);
+
+ /* output 70 */
+ CARRY_FORWARD;
+ SQRADDSC(a[7], a[63]); SQRADDAC(a[8], a[62]); SQRADDAC(a[9], a[61]); SQRADDAC(a[10], a[60]); SQRADDAC(a[11], a[59]); SQRADDAC(a[12], a[58]); SQRADDAC(a[13], a[57]); SQRADDAC(a[14], a[56]); SQRADDAC(a[15], a[55]); SQRADDAC(a[16], a[54]); SQRADDAC(a[17], a[53]); SQRADDAC(a[18], a[52]); SQRADDAC(a[19], a[51]); SQRADDAC(a[20], a[50]); SQRADDAC(a[21], a[49]); SQRADDAC(a[22], a[48]); SQRADDAC(a[23], a[47]); SQRADDAC(a[24], a[46]); SQRADDAC(a[25], a[45]); SQRADDAC(a[26], a[44]); SQRADDAC(a[27], a[43]); SQRADDAC(a[28], a[42]); SQRADDAC(a[29], a[41]); SQRADDAC(a[30], a[40]); SQRADDAC(a[31], a[39]); SQRADDAC(a[32], a[38]); SQRADDAC(a[33], a[37]); SQRADDAC(a[34], a[36]); SQRADDDB; SQRADD(a[35], a[35]);
+ COMBA_STORE(b[70]);
+
+ /* output 71 */
+ CARRY_FORWARD;
+ SQRADDSC(a[8], a[63]); SQRADDAC(a[9], a[62]); SQRADDAC(a[10], a[61]); SQRADDAC(a[11], a[60]); SQRADDAC(a[12], a[59]); SQRADDAC(a[13], a[58]); SQRADDAC(a[14], a[57]); SQRADDAC(a[15], a[56]); SQRADDAC(a[16], a[55]); SQRADDAC(a[17], a[54]); SQRADDAC(a[18], a[53]); SQRADDAC(a[19], a[52]); SQRADDAC(a[20], a[51]); SQRADDAC(a[21], a[50]); SQRADDAC(a[22], a[49]); SQRADDAC(a[23], a[48]); SQRADDAC(a[24], a[47]); SQRADDAC(a[25], a[46]); SQRADDAC(a[26], a[45]); SQRADDAC(a[27], a[44]); SQRADDAC(a[28], a[43]); SQRADDAC(a[29], a[42]); SQRADDAC(a[30], a[41]); SQRADDAC(a[31], a[40]); SQRADDAC(a[32], a[39]); SQRADDAC(a[33], a[38]); SQRADDAC(a[34], a[37]); SQRADDAC(a[35], a[36]); SQRADDDB;
+ COMBA_STORE(b[71]);
+
+ /* output 72 */
+ CARRY_FORWARD;
+ SQRADDSC(a[9], a[63]); SQRADDAC(a[10], a[62]); SQRADDAC(a[11], a[61]); SQRADDAC(a[12], a[60]); SQRADDAC(a[13], a[59]); SQRADDAC(a[14], a[58]); SQRADDAC(a[15], a[57]); SQRADDAC(a[16], a[56]); SQRADDAC(a[17], a[55]); SQRADDAC(a[18], a[54]); SQRADDAC(a[19], a[53]); SQRADDAC(a[20], a[52]); SQRADDAC(a[21], a[51]); SQRADDAC(a[22], a[50]); SQRADDAC(a[23], a[49]); SQRADDAC(a[24], a[48]); SQRADDAC(a[25], a[47]); SQRADDAC(a[26], a[46]); SQRADDAC(a[27], a[45]); SQRADDAC(a[28], a[44]); SQRADDAC(a[29], a[43]); SQRADDAC(a[30], a[42]); SQRADDAC(a[31], a[41]); SQRADDAC(a[32], a[40]); SQRADDAC(a[33], a[39]); SQRADDAC(a[34], a[38]); SQRADDAC(a[35], a[37]); SQRADDDB; SQRADD(a[36], a[36]);
+ COMBA_STORE(b[72]);
+
+ /* output 73 */
+ CARRY_FORWARD;
+ SQRADDSC(a[10], a[63]); SQRADDAC(a[11], a[62]); SQRADDAC(a[12], a[61]); SQRADDAC(a[13], a[60]); SQRADDAC(a[14], a[59]); SQRADDAC(a[15], a[58]); SQRADDAC(a[16], a[57]); SQRADDAC(a[17], a[56]); SQRADDAC(a[18], a[55]); SQRADDAC(a[19], a[54]); SQRADDAC(a[20], a[53]); SQRADDAC(a[21], a[52]); SQRADDAC(a[22], a[51]); SQRADDAC(a[23], a[50]); SQRADDAC(a[24], a[49]); SQRADDAC(a[25], a[48]); SQRADDAC(a[26], a[47]); SQRADDAC(a[27], a[46]); SQRADDAC(a[28], a[45]); SQRADDAC(a[29], a[44]); SQRADDAC(a[30], a[43]); SQRADDAC(a[31], a[42]); SQRADDAC(a[32], a[41]); SQRADDAC(a[33], a[40]); SQRADDAC(a[34], a[39]); SQRADDAC(a[35], a[38]); SQRADDAC(a[36], a[37]); SQRADDDB;
+ COMBA_STORE(b[73]);
+
+ /* output 74 */
+ CARRY_FORWARD;
+ SQRADDSC(a[11], a[63]); SQRADDAC(a[12], a[62]); SQRADDAC(a[13], a[61]); SQRADDAC(a[14], a[60]); SQRADDAC(a[15], a[59]); SQRADDAC(a[16], a[58]); SQRADDAC(a[17], a[57]); SQRADDAC(a[18], a[56]); SQRADDAC(a[19], a[55]); SQRADDAC(a[20], a[54]); SQRADDAC(a[21], a[53]); SQRADDAC(a[22], a[52]); SQRADDAC(a[23], a[51]); SQRADDAC(a[24], a[50]); SQRADDAC(a[25], a[49]); SQRADDAC(a[26], a[48]); SQRADDAC(a[27], a[47]); SQRADDAC(a[28], a[46]); SQRADDAC(a[29], a[45]); SQRADDAC(a[30], a[44]); SQRADDAC(a[31], a[43]); SQRADDAC(a[32], a[42]); SQRADDAC(a[33], a[41]); SQRADDAC(a[34], a[40]); SQRADDAC(a[35], a[39]); SQRADDAC(a[36], a[38]); SQRADDDB; SQRADD(a[37], a[37]);
+ COMBA_STORE(b[74]);
+
+ /* output 75 */
+ CARRY_FORWARD;
+ SQRADDSC(a[12], a[63]); SQRADDAC(a[13], a[62]); SQRADDAC(a[14], a[61]); SQRADDAC(a[15], a[60]); SQRADDAC(a[16], a[59]); SQRADDAC(a[17], a[58]); SQRADDAC(a[18], a[57]); SQRADDAC(a[19], a[56]); SQRADDAC(a[20], a[55]); SQRADDAC(a[21], a[54]); SQRADDAC(a[22], a[53]); SQRADDAC(a[23], a[52]); SQRADDAC(a[24], a[51]); SQRADDAC(a[25], a[50]); SQRADDAC(a[26], a[49]); SQRADDAC(a[27], a[48]); SQRADDAC(a[28], a[47]); SQRADDAC(a[29], a[46]); SQRADDAC(a[30], a[45]); SQRADDAC(a[31], a[44]); SQRADDAC(a[32], a[43]); SQRADDAC(a[33], a[42]); SQRADDAC(a[34], a[41]); SQRADDAC(a[35], a[40]); SQRADDAC(a[36], a[39]); SQRADDAC(a[37], a[38]); SQRADDDB;
+ COMBA_STORE(b[75]);
+
+ /* output 76 */
+ CARRY_FORWARD;
+ SQRADDSC(a[13], a[63]); SQRADDAC(a[14], a[62]); SQRADDAC(a[15], a[61]); SQRADDAC(a[16], a[60]); SQRADDAC(a[17], a[59]); SQRADDAC(a[18], a[58]); SQRADDAC(a[19], a[57]); SQRADDAC(a[20], a[56]); SQRADDAC(a[21], a[55]); SQRADDAC(a[22], a[54]); SQRADDAC(a[23], a[53]); SQRADDAC(a[24], a[52]); SQRADDAC(a[25], a[51]); SQRADDAC(a[26], a[50]); SQRADDAC(a[27], a[49]); SQRADDAC(a[28], a[48]); SQRADDAC(a[29], a[47]); SQRADDAC(a[30], a[46]); SQRADDAC(a[31], a[45]); SQRADDAC(a[32], a[44]); SQRADDAC(a[33], a[43]); SQRADDAC(a[34], a[42]); SQRADDAC(a[35], a[41]); SQRADDAC(a[36], a[40]); SQRADDAC(a[37], a[39]); SQRADDDB; SQRADD(a[38], a[38]);
+ COMBA_STORE(b[76]);
+
+ /* output 77 */
+ CARRY_FORWARD;
+ SQRADDSC(a[14], a[63]); SQRADDAC(a[15], a[62]); SQRADDAC(a[16], a[61]); SQRADDAC(a[17], a[60]); SQRADDAC(a[18], a[59]); SQRADDAC(a[19], a[58]); SQRADDAC(a[20], a[57]); SQRADDAC(a[21], a[56]); SQRADDAC(a[22], a[55]); SQRADDAC(a[23], a[54]); SQRADDAC(a[24], a[53]); SQRADDAC(a[25], a[52]); SQRADDAC(a[26], a[51]); SQRADDAC(a[27], a[50]); SQRADDAC(a[28], a[49]); SQRADDAC(a[29], a[48]); SQRADDAC(a[30], a[47]); SQRADDAC(a[31], a[46]); SQRADDAC(a[32], a[45]); SQRADDAC(a[33], a[44]); SQRADDAC(a[34], a[43]); SQRADDAC(a[35], a[42]); SQRADDAC(a[36], a[41]); SQRADDAC(a[37], a[40]); SQRADDAC(a[38], a[39]); SQRADDDB;
+ COMBA_STORE(b[77]);
+
+ /* output 78 */
+ CARRY_FORWARD;
+ SQRADDSC(a[15], a[63]); SQRADDAC(a[16], a[62]); SQRADDAC(a[17], a[61]); SQRADDAC(a[18], a[60]); SQRADDAC(a[19], a[59]); SQRADDAC(a[20], a[58]); SQRADDAC(a[21], a[57]); SQRADDAC(a[22], a[56]); SQRADDAC(a[23], a[55]); SQRADDAC(a[24], a[54]); SQRADDAC(a[25], a[53]); SQRADDAC(a[26], a[52]); SQRADDAC(a[27], a[51]); SQRADDAC(a[28], a[50]); SQRADDAC(a[29], a[49]); SQRADDAC(a[30], a[48]); SQRADDAC(a[31], a[47]); SQRADDAC(a[32], a[46]); SQRADDAC(a[33], a[45]); SQRADDAC(a[34], a[44]); SQRADDAC(a[35], a[43]); SQRADDAC(a[36], a[42]); SQRADDAC(a[37], a[41]); SQRADDAC(a[38], a[40]); SQRADDDB; SQRADD(a[39], a[39]);
+ COMBA_STORE(b[78]);
+
+ /* output 79 */
+ CARRY_FORWARD;
+ SQRADDSC(a[16], a[63]); SQRADDAC(a[17], a[62]); SQRADDAC(a[18], a[61]); SQRADDAC(a[19], a[60]); SQRADDAC(a[20], a[59]); SQRADDAC(a[21], a[58]); SQRADDAC(a[22], a[57]); SQRADDAC(a[23], a[56]); SQRADDAC(a[24], a[55]); SQRADDAC(a[25], a[54]); SQRADDAC(a[26], a[53]); SQRADDAC(a[27], a[52]); SQRADDAC(a[28], a[51]); SQRADDAC(a[29], a[50]); SQRADDAC(a[30], a[49]); SQRADDAC(a[31], a[48]); SQRADDAC(a[32], a[47]); SQRADDAC(a[33], a[46]); SQRADDAC(a[34], a[45]); SQRADDAC(a[35], a[44]); SQRADDAC(a[36], a[43]); SQRADDAC(a[37], a[42]); SQRADDAC(a[38], a[41]); SQRADDAC(a[39], a[40]); SQRADDDB;
+ COMBA_STORE(b[79]);
+
+ /* output 80 */
+ CARRY_FORWARD;
+ SQRADDSC(a[17], a[63]); SQRADDAC(a[18], a[62]); SQRADDAC(a[19], a[61]); SQRADDAC(a[20], a[60]); SQRADDAC(a[21], a[59]); SQRADDAC(a[22], a[58]); SQRADDAC(a[23], a[57]); SQRADDAC(a[24], a[56]); SQRADDAC(a[25], a[55]); SQRADDAC(a[26], a[54]); SQRADDAC(a[27], a[53]); SQRADDAC(a[28], a[52]); SQRADDAC(a[29], a[51]); SQRADDAC(a[30], a[50]); SQRADDAC(a[31], a[49]); SQRADDAC(a[32], a[48]); SQRADDAC(a[33], a[47]); SQRADDAC(a[34], a[46]); SQRADDAC(a[35], a[45]); SQRADDAC(a[36], a[44]); SQRADDAC(a[37], a[43]); SQRADDAC(a[38], a[42]); SQRADDAC(a[39], a[41]); SQRADDDB; SQRADD(a[40], a[40]);
+ COMBA_STORE(b[80]);
+
+ /* output 81 */
+ CARRY_FORWARD;
+ SQRADDSC(a[18], a[63]); SQRADDAC(a[19], a[62]); SQRADDAC(a[20], a[61]); SQRADDAC(a[21], a[60]); SQRADDAC(a[22], a[59]); SQRADDAC(a[23], a[58]); SQRADDAC(a[24], a[57]); SQRADDAC(a[25], a[56]); SQRADDAC(a[26], a[55]); SQRADDAC(a[27], a[54]); SQRADDAC(a[28], a[53]); SQRADDAC(a[29], a[52]); SQRADDAC(a[30], a[51]); SQRADDAC(a[31], a[50]); SQRADDAC(a[32], a[49]); SQRADDAC(a[33], a[48]); SQRADDAC(a[34], a[47]); SQRADDAC(a[35], a[46]); SQRADDAC(a[36], a[45]); SQRADDAC(a[37], a[44]); SQRADDAC(a[38], a[43]); SQRADDAC(a[39], a[42]); SQRADDAC(a[40], a[41]); SQRADDDB;
+ COMBA_STORE(b[81]);
+
+ /* output 82 */
+ CARRY_FORWARD;
+ SQRADDSC(a[19], a[63]); SQRADDAC(a[20], a[62]); SQRADDAC(a[21], a[61]); SQRADDAC(a[22], a[60]); SQRADDAC(a[23], a[59]); SQRADDAC(a[24], a[58]); SQRADDAC(a[25], a[57]); SQRADDAC(a[26], a[56]); SQRADDAC(a[27], a[55]); SQRADDAC(a[28], a[54]); SQRADDAC(a[29], a[53]); SQRADDAC(a[30], a[52]); SQRADDAC(a[31], a[51]); SQRADDAC(a[32], a[50]); SQRADDAC(a[33], a[49]); SQRADDAC(a[34], a[48]); SQRADDAC(a[35], a[47]); SQRADDAC(a[36], a[46]); SQRADDAC(a[37], a[45]); SQRADDAC(a[38], a[44]); SQRADDAC(a[39], a[43]); SQRADDAC(a[40], a[42]); SQRADDDB; SQRADD(a[41], a[41]);
+ COMBA_STORE(b[82]);
+
+ /* output 83 */
+ CARRY_FORWARD;
+ SQRADDSC(a[20], a[63]); SQRADDAC(a[21], a[62]); SQRADDAC(a[22], a[61]); SQRADDAC(a[23], a[60]); SQRADDAC(a[24], a[59]); SQRADDAC(a[25], a[58]); SQRADDAC(a[26], a[57]); SQRADDAC(a[27], a[56]); SQRADDAC(a[28], a[55]); SQRADDAC(a[29], a[54]); SQRADDAC(a[30], a[53]); SQRADDAC(a[31], a[52]); SQRADDAC(a[32], a[51]); SQRADDAC(a[33], a[50]); SQRADDAC(a[34], a[49]); SQRADDAC(a[35], a[48]); SQRADDAC(a[36], a[47]); SQRADDAC(a[37], a[46]); SQRADDAC(a[38], a[45]); SQRADDAC(a[39], a[44]); SQRADDAC(a[40], a[43]); SQRADDAC(a[41], a[42]); SQRADDDB;
+ COMBA_STORE(b[83]);
+
+ /* output 84 */
+ CARRY_FORWARD;
+ SQRADDSC(a[21], a[63]); SQRADDAC(a[22], a[62]); SQRADDAC(a[23], a[61]); SQRADDAC(a[24], a[60]); SQRADDAC(a[25], a[59]); SQRADDAC(a[26], a[58]); SQRADDAC(a[27], a[57]); SQRADDAC(a[28], a[56]); SQRADDAC(a[29], a[55]); SQRADDAC(a[30], a[54]); SQRADDAC(a[31], a[53]); SQRADDAC(a[32], a[52]); SQRADDAC(a[33], a[51]); SQRADDAC(a[34], a[50]); SQRADDAC(a[35], a[49]); SQRADDAC(a[36], a[48]); SQRADDAC(a[37], a[47]); SQRADDAC(a[38], a[46]); SQRADDAC(a[39], a[45]); SQRADDAC(a[40], a[44]); SQRADDAC(a[41], a[43]); SQRADDDB; SQRADD(a[42], a[42]);
+ COMBA_STORE(b[84]);
+
+ /* output 85 */
+ CARRY_FORWARD;
+ SQRADDSC(a[22], a[63]); SQRADDAC(a[23], a[62]); SQRADDAC(a[24], a[61]); SQRADDAC(a[25], a[60]); SQRADDAC(a[26], a[59]); SQRADDAC(a[27], a[58]); SQRADDAC(a[28], a[57]); SQRADDAC(a[29], a[56]); SQRADDAC(a[30], a[55]); SQRADDAC(a[31], a[54]); SQRADDAC(a[32], a[53]); SQRADDAC(a[33], a[52]); SQRADDAC(a[34], a[51]); SQRADDAC(a[35], a[50]); SQRADDAC(a[36], a[49]); SQRADDAC(a[37], a[48]); SQRADDAC(a[38], a[47]); SQRADDAC(a[39], a[46]); SQRADDAC(a[40], a[45]); SQRADDAC(a[41], a[44]); SQRADDAC(a[42], a[43]); SQRADDDB;
+ COMBA_STORE(b[85]);
+
+ /* output 86 */
+ CARRY_FORWARD;
+ SQRADDSC(a[23], a[63]); SQRADDAC(a[24], a[62]); SQRADDAC(a[25], a[61]); SQRADDAC(a[26], a[60]); SQRADDAC(a[27], a[59]); SQRADDAC(a[28], a[58]); SQRADDAC(a[29], a[57]); SQRADDAC(a[30], a[56]); SQRADDAC(a[31], a[55]); SQRADDAC(a[32], a[54]); SQRADDAC(a[33], a[53]); SQRADDAC(a[34], a[52]); SQRADDAC(a[35], a[51]); SQRADDAC(a[36], a[50]); SQRADDAC(a[37], a[49]); SQRADDAC(a[38], a[48]); SQRADDAC(a[39], a[47]); SQRADDAC(a[40], a[46]); SQRADDAC(a[41], a[45]); SQRADDAC(a[42], a[44]); SQRADDDB; SQRADD(a[43], a[43]);
+ COMBA_STORE(b[86]);
+
+ /* output 87 */
+ CARRY_FORWARD;
+ SQRADDSC(a[24], a[63]); SQRADDAC(a[25], a[62]); SQRADDAC(a[26], a[61]); SQRADDAC(a[27], a[60]); SQRADDAC(a[28], a[59]); SQRADDAC(a[29], a[58]); SQRADDAC(a[30], a[57]); SQRADDAC(a[31], a[56]); SQRADDAC(a[32], a[55]); SQRADDAC(a[33], a[54]); SQRADDAC(a[34], a[53]); SQRADDAC(a[35], a[52]); SQRADDAC(a[36], a[51]); SQRADDAC(a[37], a[50]); SQRADDAC(a[38], a[49]); SQRADDAC(a[39], a[48]); SQRADDAC(a[40], a[47]); SQRADDAC(a[41], a[46]); SQRADDAC(a[42], a[45]); SQRADDAC(a[43], a[44]); SQRADDDB;
+ COMBA_STORE(b[87]);
+
+ /* output 88 */
+ CARRY_FORWARD;
+ SQRADDSC(a[25], a[63]); SQRADDAC(a[26], a[62]); SQRADDAC(a[27], a[61]); SQRADDAC(a[28], a[60]); SQRADDAC(a[29], a[59]); SQRADDAC(a[30], a[58]); SQRADDAC(a[31], a[57]); SQRADDAC(a[32], a[56]); SQRADDAC(a[33], a[55]); SQRADDAC(a[34], a[54]); SQRADDAC(a[35], a[53]); SQRADDAC(a[36], a[52]); SQRADDAC(a[37], a[51]); SQRADDAC(a[38], a[50]); SQRADDAC(a[39], a[49]); SQRADDAC(a[40], a[48]); SQRADDAC(a[41], a[47]); SQRADDAC(a[42], a[46]); SQRADDAC(a[43], a[45]); SQRADDDB; SQRADD(a[44], a[44]);
+ COMBA_STORE(b[88]);
+
+ /* output 89 */
+ CARRY_FORWARD;
+ SQRADDSC(a[26], a[63]); SQRADDAC(a[27], a[62]); SQRADDAC(a[28], a[61]); SQRADDAC(a[29], a[60]); SQRADDAC(a[30], a[59]); SQRADDAC(a[31], a[58]); SQRADDAC(a[32], a[57]); SQRADDAC(a[33], a[56]); SQRADDAC(a[34], a[55]); SQRADDAC(a[35], a[54]); SQRADDAC(a[36], a[53]); SQRADDAC(a[37], a[52]); SQRADDAC(a[38], a[51]); SQRADDAC(a[39], a[50]); SQRADDAC(a[40], a[49]); SQRADDAC(a[41], a[48]); SQRADDAC(a[42], a[47]); SQRADDAC(a[43], a[46]); SQRADDAC(a[44], a[45]); SQRADDDB;
+ COMBA_STORE(b[89]);
+
+ /* output 90 */
+ CARRY_FORWARD;
+ SQRADDSC(a[27], a[63]); SQRADDAC(a[28], a[62]); SQRADDAC(a[29], a[61]); SQRADDAC(a[30], a[60]); SQRADDAC(a[31], a[59]); SQRADDAC(a[32], a[58]); SQRADDAC(a[33], a[57]); SQRADDAC(a[34], a[56]); SQRADDAC(a[35], a[55]); SQRADDAC(a[36], a[54]); SQRADDAC(a[37], a[53]); SQRADDAC(a[38], a[52]); SQRADDAC(a[39], a[51]); SQRADDAC(a[40], a[50]); SQRADDAC(a[41], a[49]); SQRADDAC(a[42], a[48]); SQRADDAC(a[43], a[47]); SQRADDAC(a[44], a[46]); SQRADDDB; SQRADD(a[45], a[45]);
+ COMBA_STORE(b[90]);
+
+ /* output 91 */
+ CARRY_FORWARD;
+ SQRADDSC(a[28], a[63]); SQRADDAC(a[29], a[62]); SQRADDAC(a[30], a[61]); SQRADDAC(a[31], a[60]); SQRADDAC(a[32], a[59]); SQRADDAC(a[33], a[58]); SQRADDAC(a[34], a[57]); SQRADDAC(a[35], a[56]); SQRADDAC(a[36], a[55]); SQRADDAC(a[37], a[54]); SQRADDAC(a[38], a[53]); SQRADDAC(a[39], a[52]); SQRADDAC(a[40], a[51]); SQRADDAC(a[41], a[50]); SQRADDAC(a[42], a[49]); SQRADDAC(a[43], a[48]); SQRADDAC(a[44], a[47]); SQRADDAC(a[45], a[46]); SQRADDDB;
+ COMBA_STORE(b[91]);
+
+ /* output 92 */
+ CARRY_FORWARD;
+ SQRADDSC(a[29], a[63]); SQRADDAC(a[30], a[62]); SQRADDAC(a[31], a[61]); SQRADDAC(a[32], a[60]); SQRADDAC(a[33], a[59]); SQRADDAC(a[34], a[58]); SQRADDAC(a[35], a[57]); SQRADDAC(a[36], a[56]); SQRADDAC(a[37], a[55]); SQRADDAC(a[38], a[54]); SQRADDAC(a[39], a[53]); SQRADDAC(a[40], a[52]); SQRADDAC(a[41], a[51]); SQRADDAC(a[42], a[50]); SQRADDAC(a[43], a[49]); SQRADDAC(a[44], a[48]); SQRADDAC(a[45], a[47]); SQRADDDB; SQRADD(a[46], a[46]);
+ COMBA_STORE(b[92]);
+
+ /* output 93 */
+ CARRY_FORWARD;
+ SQRADDSC(a[30], a[63]); SQRADDAC(a[31], a[62]); SQRADDAC(a[32], a[61]); SQRADDAC(a[33], a[60]); SQRADDAC(a[34], a[59]); SQRADDAC(a[35], a[58]); SQRADDAC(a[36], a[57]); SQRADDAC(a[37], a[56]); SQRADDAC(a[38], a[55]); SQRADDAC(a[39], a[54]); SQRADDAC(a[40], a[53]); SQRADDAC(a[41], a[52]); SQRADDAC(a[42], a[51]); SQRADDAC(a[43], a[50]); SQRADDAC(a[44], a[49]); SQRADDAC(a[45], a[48]); SQRADDAC(a[46], a[47]); SQRADDDB;
+ COMBA_STORE(b[93]);
+
+ /* output 94 */
+ CARRY_FORWARD;
+ SQRADDSC(a[31], a[63]); SQRADDAC(a[32], a[62]); SQRADDAC(a[33], a[61]); SQRADDAC(a[34], a[60]); SQRADDAC(a[35], a[59]); SQRADDAC(a[36], a[58]); SQRADDAC(a[37], a[57]); SQRADDAC(a[38], a[56]); SQRADDAC(a[39], a[55]); SQRADDAC(a[40], a[54]); SQRADDAC(a[41], a[53]); SQRADDAC(a[42], a[52]); SQRADDAC(a[43], a[51]); SQRADDAC(a[44], a[50]); SQRADDAC(a[45], a[49]); SQRADDAC(a[46], a[48]); SQRADDDB; SQRADD(a[47], a[47]);
+ COMBA_STORE(b[94]);
+
+ /* output 95 */
+ CARRY_FORWARD;
+ SQRADDSC(a[32], a[63]); SQRADDAC(a[33], a[62]); SQRADDAC(a[34], a[61]); SQRADDAC(a[35], a[60]); SQRADDAC(a[36], a[59]); SQRADDAC(a[37], a[58]); SQRADDAC(a[38], a[57]); SQRADDAC(a[39], a[56]); SQRADDAC(a[40], a[55]); SQRADDAC(a[41], a[54]); SQRADDAC(a[42], a[53]); SQRADDAC(a[43], a[52]); SQRADDAC(a[44], a[51]); SQRADDAC(a[45], a[50]); SQRADDAC(a[46], a[49]); SQRADDAC(a[47], a[48]); SQRADDDB;
+ COMBA_STORE(b[95]);
+
+ /* output 96 */
+ CARRY_FORWARD;
+ SQRADDSC(a[33], a[63]); SQRADDAC(a[34], a[62]); SQRADDAC(a[35], a[61]); SQRADDAC(a[36], a[60]); SQRADDAC(a[37], a[59]); SQRADDAC(a[38], a[58]); SQRADDAC(a[39], a[57]); SQRADDAC(a[40], a[56]); SQRADDAC(a[41], a[55]); SQRADDAC(a[42], a[54]); SQRADDAC(a[43], a[53]); SQRADDAC(a[44], a[52]); SQRADDAC(a[45], a[51]); SQRADDAC(a[46], a[50]); SQRADDAC(a[47], a[49]); SQRADDDB; SQRADD(a[48], a[48]);
+ COMBA_STORE(b[96]);
+
+ /* output 97 */
+ CARRY_FORWARD;
+ SQRADDSC(a[34], a[63]); SQRADDAC(a[35], a[62]); SQRADDAC(a[36], a[61]); SQRADDAC(a[37], a[60]); SQRADDAC(a[38], a[59]); SQRADDAC(a[39], a[58]); SQRADDAC(a[40], a[57]); SQRADDAC(a[41], a[56]); SQRADDAC(a[42], a[55]); SQRADDAC(a[43], a[54]); SQRADDAC(a[44], a[53]); SQRADDAC(a[45], a[52]); SQRADDAC(a[46], a[51]); SQRADDAC(a[47], a[50]); SQRADDAC(a[48], a[49]); SQRADDDB;
+ COMBA_STORE(b[97]);
+
+ /* output 98 */
+ CARRY_FORWARD;
+ SQRADDSC(a[35], a[63]); SQRADDAC(a[36], a[62]); SQRADDAC(a[37], a[61]); SQRADDAC(a[38], a[60]); SQRADDAC(a[39], a[59]); SQRADDAC(a[40], a[58]); SQRADDAC(a[41], a[57]); SQRADDAC(a[42], a[56]); SQRADDAC(a[43], a[55]); SQRADDAC(a[44], a[54]); SQRADDAC(a[45], a[53]); SQRADDAC(a[46], a[52]); SQRADDAC(a[47], a[51]); SQRADDAC(a[48], a[50]); SQRADDDB; SQRADD(a[49], a[49]);
+ COMBA_STORE(b[98]);
+
+ /* output 99 */
+ CARRY_FORWARD;
+ SQRADDSC(a[36], a[63]); SQRADDAC(a[37], a[62]); SQRADDAC(a[38], a[61]); SQRADDAC(a[39], a[60]); SQRADDAC(a[40], a[59]); SQRADDAC(a[41], a[58]); SQRADDAC(a[42], a[57]); SQRADDAC(a[43], a[56]); SQRADDAC(a[44], a[55]); SQRADDAC(a[45], a[54]); SQRADDAC(a[46], a[53]); SQRADDAC(a[47], a[52]); SQRADDAC(a[48], a[51]); SQRADDAC(a[49], a[50]); SQRADDDB;
+ COMBA_STORE(b[99]);
+
+ /* output 100 */
+ CARRY_FORWARD;
+ SQRADDSC(a[37], a[63]); SQRADDAC(a[38], a[62]); SQRADDAC(a[39], a[61]); SQRADDAC(a[40], a[60]); SQRADDAC(a[41], a[59]); SQRADDAC(a[42], a[58]); SQRADDAC(a[43], a[57]); SQRADDAC(a[44], a[56]); SQRADDAC(a[45], a[55]); SQRADDAC(a[46], a[54]); SQRADDAC(a[47], a[53]); SQRADDAC(a[48], a[52]); SQRADDAC(a[49], a[51]); SQRADDDB; SQRADD(a[50], a[50]);
+ COMBA_STORE(b[100]);
+
+ /* output 101 */
+ CARRY_FORWARD;
+ SQRADDSC(a[38], a[63]); SQRADDAC(a[39], a[62]); SQRADDAC(a[40], a[61]); SQRADDAC(a[41], a[60]); SQRADDAC(a[42], a[59]); SQRADDAC(a[43], a[58]); SQRADDAC(a[44], a[57]); SQRADDAC(a[45], a[56]); SQRADDAC(a[46], a[55]); SQRADDAC(a[47], a[54]); SQRADDAC(a[48], a[53]); SQRADDAC(a[49], a[52]); SQRADDAC(a[50], a[51]); SQRADDDB;
+ COMBA_STORE(b[101]);
+
+ /* output 102 */
+ CARRY_FORWARD;
+ SQRADDSC(a[39], a[63]); SQRADDAC(a[40], a[62]); SQRADDAC(a[41], a[61]); SQRADDAC(a[42], a[60]); SQRADDAC(a[43], a[59]); SQRADDAC(a[44], a[58]); SQRADDAC(a[45], a[57]); SQRADDAC(a[46], a[56]); SQRADDAC(a[47], a[55]); SQRADDAC(a[48], a[54]); SQRADDAC(a[49], a[53]); SQRADDAC(a[50], a[52]); SQRADDDB; SQRADD(a[51], a[51]);
+ COMBA_STORE(b[102]);
+
+ /* output 103 */
+ CARRY_FORWARD;
+ SQRADDSC(a[40], a[63]); SQRADDAC(a[41], a[62]); SQRADDAC(a[42], a[61]); SQRADDAC(a[43], a[60]); SQRADDAC(a[44], a[59]); SQRADDAC(a[45], a[58]); SQRADDAC(a[46], a[57]); SQRADDAC(a[47], a[56]); SQRADDAC(a[48], a[55]); SQRADDAC(a[49], a[54]); SQRADDAC(a[50], a[53]); SQRADDAC(a[51], a[52]); SQRADDDB;
+ COMBA_STORE(b[103]);
+
+ /* output 104 */
+ CARRY_FORWARD;
+ SQRADDSC(a[41], a[63]); SQRADDAC(a[42], a[62]); SQRADDAC(a[43], a[61]); SQRADDAC(a[44], a[60]); SQRADDAC(a[45], a[59]); SQRADDAC(a[46], a[58]); SQRADDAC(a[47], a[57]); SQRADDAC(a[48], a[56]); SQRADDAC(a[49], a[55]); SQRADDAC(a[50], a[54]); SQRADDAC(a[51], a[53]); SQRADDDB; SQRADD(a[52], a[52]);
+ COMBA_STORE(b[104]);
+
+ /* output 105 */
+ CARRY_FORWARD;
+ SQRADDSC(a[42], a[63]); SQRADDAC(a[43], a[62]); SQRADDAC(a[44], a[61]); SQRADDAC(a[45], a[60]); SQRADDAC(a[46], a[59]); SQRADDAC(a[47], a[58]); SQRADDAC(a[48], a[57]); SQRADDAC(a[49], a[56]); SQRADDAC(a[50], a[55]); SQRADDAC(a[51], a[54]); SQRADDAC(a[52], a[53]); SQRADDDB;
+ COMBA_STORE(b[105]);
+
+ /* output 106 */
+ CARRY_FORWARD;
+ SQRADDSC(a[43], a[63]); SQRADDAC(a[44], a[62]); SQRADDAC(a[45], a[61]); SQRADDAC(a[46], a[60]); SQRADDAC(a[47], a[59]); SQRADDAC(a[48], a[58]); SQRADDAC(a[49], a[57]); SQRADDAC(a[50], a[56]); SQRADDAC(a[51], a[55]); SQRADDAC(a[52], a[54]); SQRADDDB; SQRADD(a[53], a[53]);
+ COMBA_STORE(b[106]);
+
+ /* output 107 */
+ CARRY_FORWARD;
+ SQRADDSC(a[44], a[63]); SQRADDAC(a[45], a[62]); SQRADDAC(a[46], a[61]); SQRADDAC(a[47], a[60]); SQRADDAC(a[48], a[59]); SQRADDAC(a[49], a[58]); SQRADDAC(a[50], a[57]); SQRADDAC(a[51], a[56]); SQRADDAC(a[52], a[55]); SQRADDAC(a[53], a[54]); SQRADDDB;
+ COMBA_STORE(b[107]);
+
+ /* output 108 */
+ CARRY_FORWARD;
+ SQRADDSC(a[45], a[63]); SQRADDAC(a[46], a[62]); SQRADDAC(a[47], a[61]); SQRADDAC(a[48], a[60]); SQRADDAC(a[49], a[59]); SQRADDAC(a[50], a[58]); SQRADDAC(a[51], a[57]); SQRADDAC(a[52], a[56]); SQRADDAC(a[53], a[55]); SQRADDDB; SQRADD(a[54], a[54]);
+ COMBA_STORE(b[108]);
+
+ /* output 109 */
+ CARRY_FORWARD;
+ SQRADDSC(a[46], a[63]); SQRADDAC(a[47], a[62]); SQRADDAC(a[48], a[61]); SQRADDAC(a[49], a[60]); SQRADDAC(a[50], a[59]); SQRADDAC(a[51], a[58]); SQRADDAC(a[52], a[57]); SQRADDAC(a[53], a[56]); SQRADDAC(a[54], a[55]); SQRADDDB;
+ COMBA_STORE(b[109]);
+
+ /* output 110 */
+ CARRY_FORWARD;
+ SQRADDSC(a[47], a[63]); SQRADDAC(a[48], a[62]); SQRADDAC(a[49], a[61]); SQRADDAC(a[50], a[60]); SQRADDAC(a[51], a[59]); SQRADDAC(a[52], a[58]); SQRADDAC(a[53], a[57]); SQRADDAC(a[54], a[56]); SQRADDDB; SQRADD(a[55], a[55]);
+ COMBA_STORE(b[110]);
+
+ /* output 111 */
+ CARRY_FORWARD;
+ SQRADDSC(a[48], a[63]); SQRADDAC(a[49], a[62]); SQRADDAC(a[50], a[61]); SQRADDAC(a[51], a[60]); SQRADDAC(a[52], a[59]); SQRADDAC(a[53], a[58]); SQRADDAC(a[54], a[57]); SQRADDAC(a[55], a[56]); SQRADDDB;
+ COMBA_STORE(b[111]);
+
+ /* output 112 */
+ CARRY_FORWARD;
+ SQRADDSC(a[49], a[63]); SQRADDAC(a[50], a[62]); SQRADDAC(a[51], a[61]); SQRADDAC(a[52], a[60]); SQRADDAC(a[53], a[59]); SQRADDAC(a[54], a[58]); SQRADDAC(a[55], a[57]); SQRADDDB; SQRADD(a[56], a[56]);
+ COMBA_STORE(b[112]);
+
+ /* output 113 */
+ CARRY_FORWARD;
+ SQRADDSC(a[50], a[63]); SQRADDAC(a[51], a[62]); SQRADDAC(a[52], a[61]); SQRADDAC(a[53], a[60]); SQRADDAC(a[54], a[59]); SQRADDAC(a[55], a[58]); SQRADDAC(a[56], a[57]); SQRADDDB;
+ COMBA_STORE(b[113]);
+
+ /* output 114 */
+ CARRY_FORWARD;
+ SQRADDSC(a[51], a[63]); SQRADDAC(a[52], a[62]); SQRADDAC(a[53], a[61]); SQRADDAC(a[54], a[60]); SQRADDAC(a[55], a[59]); SQRADDAC(a[56], a[58]); SQRADDDB; SQRADD(a[57], a[57]);
+ COMBA_STORE(b[114]);
+
+ /* output 115 */
+ CARRY_FORWARD;
+ SQRADDSC(a[52], a[63]); SQRADDAC(a[53], a[62]); SQRADDAC(a[54], a[61]); SQRADDAC(a[55], a[60]); SQRADDAC(a[56], a[59]); SQRADDAC(a[57], a[58]); SQRADDDB;
+ COMBA_STORE(b[115]);
+
+ /* output 116 */
+ CARRY_FORWARD;
+ SQRADDSC(a[53], a[63]); SQRADDAC(a[54], a[62]); SQRADDAC(a[55], a[61]); SQRADDAC(a[56], a[60]); SQRADDAC(a[57], a[59]); SQRADDDB; SQRADD(a[58], a[58]);
+ COMBA_STORE(b[116]);
+
+ /* output 117 */
+ CARRY_FORWARD;
+ SQRADDSC(a[54], a[63]); SQRADDAC(a[55], a[62]); SQRADDAC(a[56], a[61]); SQRADDAC(a[57], a[60]); SQRADDAC(a[58], a[59]); SQRADDDB;
+ COMBA_STORE(b[117]);
+
+ /* output 118 */
+ CARRY_FORWARD;
+ SQRADDSC(a[55], a[63]); SQRADDAC(a[56], a[62]); SQRADDAC(a[57], a[61]); SQRADDAC(a[58], a[60]); SQRADDDB; SQRADD(a[59], a[59]);
+ COMBA_STORE(b[118]);
+
+ /* output 119 */
+ CARRY_FORWARD;
+ SQRADDSC(a[56], a[63]); SQRADDAC(a[57], a[62]); SQRADDAC(a[58], a[61]); SQRADDAC(a[59], a[60]); SQRADDDB;
+ COMBA_STORE(b[119]);
+
+ /* output 120 */
+ CARRY_FORWARD;
+ SQRADDSC(a[57], a[63]); SQRADDAC(a[58], a[62]); SQRADDAC(a[59], a[61]); SQRADDDB; SQRADD(a[60], a[60]);
+ COMBA_STORE(b[120]);
+
+ /* output 121 */
+ CARRY_FORWARD;
+ SQRADDSC(a[58], a[63]); SQRADDAC(a[59], a[62]); SQRADDAC(a[60], a[61]); SQRADDDB;
+ COMBA_STORE(b[121]);
+
+ /* output 122 */
+ CARRY_FORWARD;
+ SQRADD2(a[59], a[63]); SQRADD2(a[60], a[62]); SQRADD(a[61], a[61]);
+ COMBA_STORE(b[122]);
+
+ /* output 123 */
+ CARRY_FORWARD;
+ SQRADD2(a[60], a[63]); SQRADD2(a[61], a[62]);
+ COMBA_STORE(b[123]);
+
+ /* output 124 */
+ CARRY_FORWARD;
+ SQRADD2(a[61], a[63]); SQRADD(a[62], a[62]);
+ COMBA_STORE(b[124]);
+
+ /* output 125 */
+ CARRY_FORWARD;
+ SQRADD2(a[62], a[63]);
+ COMBA_STORE(b[125]);
+
+ /* output 126 */
+ CARRY_FORWARD;
+ SQRADD(a[63], a[63]);
+ COMBA_STORE(b[126]);
+ COMBA_STORE2(b[127]);
+ COMBA_FINI;
+
+ B->used = 128;
+ B->sign = FP_ZPOS;
+ memcpy(B->dp, b, 128 * sizeof(fp_digit));
+ fp_clamp(B);
+}
+
+
+#endif
+
+/* End: fp_sqr_comba.c */
+
+/* Start: fp_sqr_comba_generic.c */
+/* generic comba squarer */
+void fp_sqr_comba(fp_int *A, fp_int *B)
+{
+ int pa, ix, iz;
+ fp_digit c0, c1, c2;
+ fp_int tmp, *dst;
+
+ /* get size of output and trim */
+ pa = A->used + A->used;
+ if (pa >= FP_SIZE) {
+ pa = FP_SIZE-1;
+ }
+
+ /* number of output digits to produce */
+ COMBA_START;
+ CLEAR_CARRY;
+
+ if (A == B) {
+ fp_zero(&tmp);
+ dst = &tmp;
+ } else {
+ fp_zero(B);
+ dst = B;
+ }
+
+ for (ix = 0; ix < pa; ix++) {
+ int tx, ty, iy;
+ fp_digit *tmpy, *tmpx;
+
+ /* get offsets into the two bignums */
+ ty = MIN(A->used-1, ix);
+ tx = ix - ty;
+
+ /* setup temp aliases */
+ tmpx = A->dp + tx;
+ tmpy = A->dp + ty;
+
+ /* this is the number of times the loop will iterrate, essentially its
+ while (tx++ < a->used && ty-- >= 0) { ... }
+ */
+ iy = MIN(A->used-tx, ty+1);
+
+ /* now for squaring tx can never equal ty
+ * we halve the distance since they approach at a rate of 2x
+ * and we have to round because odd cases need to be executed
+ */
+ iy = MIN(iy, (ty-tx+1)>>1);
+
+ /* forward carries */
+ CARRY_FORWARD;
+
+ /* execute loop */
+ for (iz = 0; iz < iy; iz++) {
+ SQRADD2(*tmpx++, *tmpy--);
+ }
+
+ /* even columns have the square term in them */
+ if ((ix&1) == 0) {
+ SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
+ }
+
+ /* store it */
+ COMBA_STORE(dst->dp[ix]);
+ }
+ COMBA_STORE2(dst->dp[ix]);
+
+ COMBA_FINI;
+
+ /* setup dest */
+ dst->used = pa;
+ fp_clamp (dst);
+ if (dst != B) {
+ fp_copy(dst, B);
+ }
+}
+
+/* End: fp_sqr_comba_generic.c */
+
+/* Start: fp_sqrmod.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a * a (mod b) */
+int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
+{
+ fp_int tmp;
+ fp_zero(&tmp);
+ fp_sqr(a, &tmp);
+ return fp_mod(&tmp, b, c);
+}
+
+/* End: fp_sqrmod.c */
+
+/* Start: fp_sub.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a - b */
+void fp_sub(fp_int *a, fp_int *b, fp_int *c)
+{
+ int sa, sb;
+
+ sa = a->sign;
+ sb = b->sign;
+
+ if (sa != sb) {
+ /* subtract a negative from a positive, OR */
+ /* subtract a positive from a negative. */
+ /* In either case, ADD their magnitudes, */
+ /* and use the sign of the first number. */
+ c->sign = sa;
+ s_fp_add (a, b, c);
+ } else {
+ /* subtract a positive from a positive, OR */
+ /* subtract a negative from a negative. */
+ /* First, take the difference between their */
+ /* magnitudes, then... */
+ if (fp_cmp_mag (a, b) != FP_LT) {
+ /* Copy the sign from the first */
+ c->sign = sa;
+ /* The first has a larger or equal magnitude */
+ s_fp_sub (a, b, c);
+ } else {
+ /* The result has the *opposite* sign from */
+ /* the first number. */
+ c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
+ /* The second has a larger magnitude */
+ s_fp_sub (b, a, c);
+ }
+ }
+}
+
+
+/* End: fp_sub.c */
+
+/* Start: fp_sub_d.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* c = a - b */
+void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
+{
+ fp_int tmp;
+ fp_set(&tmp, b);
+ fp_sub(a, &tmp, c);
+}
+
+/* End: fp_sub_d.c */
+
+/* Start: fp_submod.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* d = a - b (mod c) */
+int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
+{
+ fp_int tmp;
+ fp_zero(&tmp);
+ fp_sub(a, b, &tmp);
+ return fp_mod(&tmp, c, d);
+}
+
+
+/* End: fp_submod.c */
+
+/* Start: fp_to_signed_bin.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+void fp_to_signed_bin(fp_int *a, unsigned char *b)
+{
+ fp_to_unsigned_bin (a, b + 1);
+ b[0] = (unsigned char) ((a->sign == FP_ZPOS) ? 0 : 1);
+}
+
+/* End: fp_to_signed_bin.c */
+
+/* Start: fp_to_unsigned_bin.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
+{
+ int x;
+ fp_int t;
+
+ fp_init_copy(&t, a);
+
+ x = 0;
+ while (fp_iszero (&t) == FP_NO) {
+ b[x++] = (unsigned char) (t.dp[0] & 255);
+ fp_div_2d (&t, 8, &t, NULL);
+ }
+ bn_reverse (b, x);
+}
+
+/* End: fp_to_unsigned_bin.c */
+
+/* Start: fp_toradix.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+int fp_toradix(fp_int *a, char *str, int radix)
+{
+ int digs;
+ fp_int t;
+ fp_digit d;
+ char *_s = str;
+
+ /* check range of the radix */
+ if (radix < 2 || radix > 64) {
+ return FP_VAL;
+ }
+
+ /* quick out if its zero */
+ if (fp_iszero(a) == 1) {
+ *str++ = '0';
+ *str = '\0';
+ return FP_OKAY;
+ }
+
+ fp_init_copy(&t, a);
+
+ /* if it is negative output a - */
+ if (t.sign == FP_NEG) {
+ ++_s;
+ *str++ = '-';
+ t.sign = FP_ZPOS;
+ }
+
+ digs = 0;
+ while (fp_iszero (&t) == FP_NO) {
+ fp_div_d (&t, (fp_digit) radix, &t, &d);
+ *str++ = fp_s_rmap[d];
+ ++digs;
+ }
+
+ /* reverse the digits of the string. In this case _s points
+ * to the first digit [exluding the sign] of the number]
+ */
+ bn_reverse ((unsigned char *)_s, digs);
+
+ /* append a NULL so the string is properly terminated */
+ *str = '\0';
+ return FP_OKAY;
+}
+
+/* End: fp_toradix.c */
+
+/* Start: fp_unsigned_bin_size.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+int fp_unsigned_bin_size(fp_int *a)
+{
+ int size = fp_count_bits (a);
+ return (size / 8 + ((size & 7) != 0 ? 1 : 0));
+}
+
+/* End: fp_unsigned_bin_size.c */
+
+/* Start: s_fp_add.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* unsigned addition */
+void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
+{
+ int x, y, oldused;
+ fp_word t;
+
+ y = MAX(a->used, b->used);
+ oldused = c->used;
+ c->used = y;
+
+ t = 0;
+ for (x = 0; x < y; x++) {
+ t += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
+ c->dp[x] = (fp_digit)t;
+ t >>= DIGIT_BIT;
+ }
+ if (t != 0 && x != FP_SIZE) {
+ c->dp[c->used++] = (fp_digit)t;
+ ++x;
+ }
+
+ for (; x < oldused; x++) {
+ c->dp[x] = 0;
+ }
+ fp_clamp(c);
+}
+
+/* End: s_fp_add.c */
+
+/* Start: s_fp_sub.c */
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <tfm.h>
+
+/* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
+void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
+{
+ int x, oldused;
+ fp_word t;
+
+ oldused = c->used;
+ c->used = a->used;
+ t = 0;
+ for (x = 0; x < a->used; x++) {
+ t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
+ c->dp[x] = (fp_digit)t;
+ t = (t >> DIGIT_BIT) & 1;
+ }
+
+ for (; x < oldused; x++) {
+ c->dp[x] = 0;
+ }
+ fp_clamp(c);
+}
+
+/* End: s_fp_sub.c */
+
+
+/* EOF */
--- /dev/null
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#ifndef TFM_H_
+#define TFM_H_
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+
+/* Assure these -Pekka */
+#undef CRYPT
+
+#undef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#undef MAX
+#define MAX(x,y) ((x)>(y)?(x):(y))
+
+/* do we want large code? */
+#define TFM_LARGE
+
+/* do we want huge code (implies large)? The answer is, yes. */
+#define TFM_HUGE
+
+/* imply TFM_LARGE as required */
+#if defined(TFM_HUGE)
+ #if !defined(TFM_LARGE)
+ #define TFM_LARGE
+ #endif
+#endif
+
+/* Max size of any number in bits. Basically the largest size you will be multiplying
+ * should be half [or smaller] of FP_MAX_SIZE-four_digit
+ *
+ * You can externally define this or it defaults to 4096-bits.
+ */
+#ifndef FP_MAX_SIZE
+/* For SILC -Pekka */
+ #define FP_MAX_SIZE (8192+(4*DIGIT_BIT))
+/* #define FP_MAX_SIZE (4096+(4*DIGIT_BIT))*/
+#endif
+
+/* will this lib work? */
+#if (CHAR_BIT & 7)
+ #error CHAR_BIT must be a multiple of eight.
+#endif
+#if FP_MAX_SIZE % CHAR_BIT
+ #error FP_MAX_SIZE must be a multiple of CHAR_BIT
+#endif
+
+/* autodetect x86-64 and make sure we are using 64-bit digits with x86-64 asm */
+#if defined(__x86_64__)
+ #if defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM)
+ #error x86-64 detected, x86-32/SSE2/ARM optimizations are not valid!
+ #endif
+ #if !defined(TFM_X86_64) && !defined(TFM_NO_ASM)
+ #define TFM_X86_64
+ #endif
+#endif
+#if defined(TFM_X86_64)
+ #if !defined(FP_64BIT)
+ #define FP_64BIT
+ #endif
+#endif
+
+/* try to detect x86-32 */
+#if defined(__i386__) && !defined(TFM_SSE2)
+ #if defined(TFM_X86_64) || defined(TFM_ARM)
+ #error x86-32 detected, x86-64/ARM optimizations are not valid!
+ #endif
+ #if !defined(TFM_X86) && !defined(TFM_NO_ASM)
+ #define TFM_X86
+ #endif
+#endif
+
+/* make sure we're 32-bit for x86-32/sse/arm */
+#if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM)) && defined(FP_64BIT)
+ #warning x86-32, SSE2 and ARM optimizations require 32-bit digits (undefining)
+ #undef FP_64BIT
+#endif
+
+/* multi asms? */
+#ifdef TFM_X86
+ #define TFM_ASM
+#endif
+#ifdef TFM_X86_64
+ #ifdef TFM_ASM
+ #error TFM_ASM already defined!
+ #endif
+ #define TFM_ASM
+#endif
+#ifdef TFM_SSE2
+ #ifdef TFM_ASM
+ #error TFM_ASM already defined!
+ #endif
+ #define TFM_ASM
+#endif
+#ifdef TFM_ARM
+ #ifdef TFM_ASM
+ #error TFM_ASM already defined!
+ #endif
+ #define TFM_ASM
+#endif
+
+/* we want no asm? */
+#ifdef TFM_NO_ASM
+ #undef TFM_X86
+ #undef TFM_X86_64
+ #undef TFM_SSE2
+ #undef TFM_ARM
+ #undef TFM_ASM
+#endif
+
+/* some default configurations.
+ */
+#if defined(FP_64BIT)
+ /* for GCC only on supported platforms */
+#ifndef CRYPT
+ typedef unsigned long ulong64;
+#endif
+ typedef ulong64 fp_digit;
+ typedef unsigned long fp_word __attribute__ ((mode(TI)));
+#else
+ /* this is to make porting into LibTomCrypt easier :-) */
+#ifndef CRYPT
+ #if defined(_MSC_VER) || defined(__BORLANDC__)
+ typedef unsigned __int64 ulong64;
+ typedef signed __int64 long64;
+ #else
+ typedef unsigned long long ulong64;
+ typedef signed long long long64;
+ #endif
+#endif
+ typedef unsigned long fp_digit;
+ typedef ulong64 fp_word;
+#endif
+
+/* # of digits this is */
+#define DIGIT_BIT (int)((CHAR_BIT) * sizeof(fp_digit))
+#define FP_MASK (fp_digit)(-1)
+#define FP_SIZE (FP_MAX_SIZE/DIGIT_BIT)
+
+/* signs */
+#define FP_ZPOS 0
+#define FP_NEG 1
+
+/* return codes */
+#define FP_OKAY 0
+#define FP_VAL 1
+#define FP_MEM 2
+
+/* equalities */
+#define FP_LT -1 /* less than */
+#define FP_EQ 0 /* equal to */
+#define FP_GT 1 /* greater than */
+
+/* replies */
+#define FP_YES 1 /* yes response */
+#define FP_NO 0 /* no response */
+
+/* a FP type */
+typedef struct {
+ fp_digit dp[FP_SIZE];
+ int used,
+ sign;
+} fp_int;
+
+/* functions */
+
+/* returns a TFM ident string useful for debugging... */
+const char *fp_ident(void);
+
+/* initialize [or zero] an fp int */
+#define fp_init(a) (void)memset((a), 0, sizeof(fp_int))
+#define fp_zero(a) fp_init(a)
+
+/* zero/even/odd ? */
+#define fp_iszero(a) (((a)->used == 0) ? FP_YES : FP_NO)
+#define fp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? FP_YES : FP_NO)
+#define fp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? FP_YES : FP_NO)
+
+/* set to a small digit */
+void fp_set(fp_int *a, fp_digit b);
+
+/* copy from a to b */
+#define fp_copy(a, b) (void)(((a) != (b)) && memcpy((b), (a), sizeof(fp_int)))
+#define fp_init_copy(a, b) fp_copy(b, a)
+
+/* negate and absolute */
+#define fp_neg(a, b) { fp_copy(a, b); (b)->sign ^= 1; }
+#define fp_abs(a, b) { fp_copy(a, b); (b)->sign = 0; }
+
+/* clamp digits */
+#define fp_clamp(a) { while ((a)->used && (a)->dp[(a)->used-1] == 0) --((a)->used); (a)->sign = (a)->used ? (a)->sign : FP_ZPOS; }
+
+/* right shift x digits */
+void fp_rshd(fp_int *a, int x);
+
+/* left shift x digits */
+void fp_lshd(fp_int *a, int x);
+
+/* signed comparison */
+int fp_cmp(fp_int *a, fp_int *b);
+
+/* unsigned comparison */
+int fp_cmp_mag(fp_int *a, fp_int *b);
+
+/* power of 2 operations */
+void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d);
+void fp_mod_2d(fp_int *a, int b, fp_int *c);
+void fp_mul_2d(fp_int *a, int b, fp_int *c);
+void fp_2expt (fp_int *a, int b);
+void fp_mul_2(fp_int *a, fp_int *c);
+void fp_div_2(fp_int *a, fp_int *c);
+
+/* Counts the number of lsbs which are zero before the first zero bit */
+int fp_cnt_lsb(fp_int *a);
+
+/* c = a + b */
+void fp_add(fp_int *a, fp_int *b, fp_int *c);
+
+/* c = a - b */
+void fp_sub(fp_int *a, fp_int *b, fp_int *c);
+
+/* c = a * b */
+void fp_mul(fp_int *a, fp_int *b, fp_int *c);
+
+/* b = a*a */
+void fp_sqr(fp_int *a, fp_int *b);
+
+/* a/b => cb + d == a */
+int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
+
+/* c = a mod b, 0 <= c < b */
+int fp_mod(fp_int *a, fp_int *b, fp_int *c);
+
+/* compare against a single digit */
+int fp_cmp_d(fp_int *a, fp_digit b);
+
+/* c = a + b */
+void fp_add_d(fp_int *a, fp_digit b, fp_int *c);
+
+/* c = a - b */
+void fp_sub_d(fp_int *a, fp_digit b, fp_int *c);
+
+/* c = a * b */
+void fp_mul_d(fp_int *a, fp_digit b, fp_int *c);
+
+/* a/b => cb + d == a */
+int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d);
+
+/* c = a mod b, 0 <= c < b */
+int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c);
+
+/* ---> number theory <--- */
+/* d = a + b (mod c) */
+int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
+
+/* d = a - b (mod c) */
+int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
+
+/* d = a * b (mod c) */
+int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
+
+/* c = a * a (mod b) */
+int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c);
+
+/* c = 1/a (mod b) */
+int fp_invmod(fp_int *a, fp_int *b, fp_int *c);
+
+/* c = (a, b) */
+void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
+
+/* c = [a, b] */
+void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
+
+/* setups the montgomery reduction */
+int fp_montgomery_setup(fp_int *a, fp_digit *mp);
+
+/* computes a = B**n mod b without division or multiplication useful for
+ * normalizing numbers in a Montgomery system.
+ */
+void fp_montgomery_calc_normalization(fp_int *a, fp_int *b);
+
+/* computes x/R == x (mod N) via Montgomery Reduction */
+void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp);
+
+/* d = a**b (mod c) */
+int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
+
+/* primality stuff */
+
+/* perform a Miller-Rabin test of a to the base b and store result in "result" */
+void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result);
+
+/* 256 trial divisions + 8 Miller-Rabins, returns FP_YES if probable prime */
+int fp_isprime(fp_int *a);
+
+/* Primality generation flags */
+#define TFM_PRIME_BBS 0x0001 /* BBS style prime */
+#define TFM_PRIME_SAFE 0x0002 /* Safe prime (p-1)/2 == prime */
+#define TFM_PRIME_2MSB_OFF 0x0004 /* force 2nd MSB to 0 */
+#define TFM_PRIME_2MSB_ON 0x0008 /* force 2nd MSB to 1 */
+
+/* callback for fp_prime_random, should fill dst with random bytes and return how many read [upto len] */
+typedef int tfm_prime_callback(unsigned char *dst, int len, void *dat);
+
+#define fp_prime_random(a, t, size, bbs, cb, dat) fp_prime_random_ex(a, t, ((size) * 8) + 1, (bbs==1)?TFM_PRIME_BBS:0, cb, dat)
+
+int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat);
+
+/* radix conersions */
+int fp_count_bits(fp_int *a);
+
+int fp_unsigned_bin_size(fp_int *a);
+void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c);
+void fp_to_unsigned_bin(fp_int *a, unsigned char *b);
+
+int fp_signed_bin_size(fp_int *a);
+void fp_read_signed_bin(fp_int *a, unsigned char *b, int c);
+void fp_to_signed_bin(fp_int *a, unsigned char *b);
+
+int fp_read_radix(fp_int *a, char *str, int radix);
+int fp_toradix(fp_int *a, char *str, int radix);
+int fp_toradix_n(fp_int * a, char *str, int radix, int maxlen);
+int fp_radix_size(fp_int *a, int radix, int *size);
+
+/* VARIOUS LOW LEVEL STUFFS */
+void s_fp_add(fp_int *a, fp_int *b, fp_int *c);
+void s_fp_sub(fp_int *a, fp_int *b, fp_int *c);
+void bn_reverse(unsigned char *s, int len);
+void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
+#ifdef TFM_HUGE
+void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
+#endif
+#ifdef TFM_LARGE
+void fp_mul_comba16(fp_int *A, fp_int *B, fp_int *C);
+#endif
+void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C);
+void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C);
+
+void fp_sqr_comba(fp_int *A, fp_int *B);
+void fp_sqr_comba4(fp_int *A, fp_int *B);
+void fp_sqr_comba8(fp_int *A, fp_int *B);
+#ifdef TFM_LARGE
+void fp_sqr_comba16(fp_int *A, fp_int *B);
+#endif
+#ifdef TFM_HUGE
+void fp_sqr_comba32(fp_int *A, fp_int *B);
+void fp_sqr_comba64(fp_int *A, fp_int *B);
+#endif
+extern const char *fp_s_rmap;
+
+#endif
--- /dev/null
+/* Start: bn_error.c */
+#include "tma.h"
+#ifdef BN_ERROR_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+static const struct {
+ int code;
+ char *msg;
+} msgs[] = {
+ { MP_OKAY, "Successful" },
+ { MP_MEM, "Out of heap" },
+ { MP_VAL, "Value out of range" }
+};
+
+/* return a char * string for a given code */
+char *mp_error_to_string(int code)
+{
+ int x;
+
+ /* scan the lookup table for the given message */
+ for (x = 0; x < (int)(sizeof(msgs) / sizeof(msgs[0])); x++) {
+ if (msgs[x].code == code) {
+ return msgs[x].msg;
+ }
+ }
+
+ /* generic reply for invalid code */
+ return "Invalid error code";
+}
+
+#endif
+
+/* End: bn_error.c */
+
+/* Start: bn_fast_mp_invmod.c */
+#include "tma.h"
+#ifdef BN_FAST_MP_INVMOD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* computes the modular inverse via binary extended euclidean algorithm,
+ * that is c = 1/a mod b
+ *
+ * Based on slow invmod except this is optimized for the case where b is
+ * odd as per HAC Note 14.64 on pp. 610
+ */
+int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
+{
+ mp_int x, y, u, v, B, D;
+ int res, neg;
+
+ /* 2. [modified] b must be odd */
+ if (mp_iseven (b) == 1) {
+ return MP_VAL;
+ }
+
+ /* init all our temps */
+ if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
+ return res;
+ }
+
+ /* x == modulus, y == value to invert */
+ if ((res = mp_copy (b, &x)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+
+ /* we need y = |a| */
+ if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+
+ /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
+ if ((res = mp_copy (&x, &u)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ if ((res = mp_copy (&y, &v)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ mp_set (&D, 1);
+
+top:
+ /* 4. while u is even do */
+ while (mp_iseven (&u) == 1) {
+ /* 4.1 u = u/2 */
+ if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ /* 4.2 if B is odd then */
+ if (mp_isodd (&B) == 1) {
+ if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+ /* B = B/2 */
+ if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+
+ /* 5. while v is even do */
+ while (mp_iseven (&v) == 1) {
+ /* 5.1 v = v/2 */
+ if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ /* 5.2 if D is odd then */
+ if (mp_isodd (&D) == 1) {
+ /* D = (D-x)/2 */
+ if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+ /* D = D/2 */
+ if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+
+ /* 6. if u >= v then */
+ if (mp_cmp (&u, &v) != MP_LT) {
+ /* u = u - v, B = B - D */
+ if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+
+ if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ } else {
+ /* v - v - u, D = D - B */
+ if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+
+ if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+
+ /* if not zero goto step 4 */
+ if (mp_iszero (&u) == 0) {
+ goto top;
+ }
+
+ /* now a = C, b = D, gcd == g*v */
+
+ /* if v != 1 then there is no inverse */
+ if (mp_cmp_d (&v, 1) != MP_EQ) {
+ res = MP_VAL;
+ goto LBL_ERR;
+ }
+
+ /* b is now the inverse */
+ neg = a->sign;
+ while (D.sign == MP_NEG) {
+ if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+ mp_exch (&D, c);
+ c->sign = neg;
+ res = MP_OKAY;
+
+LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
+ return res;
+}
+#endif
+
+/* End: bn_fast_mp_invmod.c */
+
+/* Start: bn_fast_mp_montgomery_reduce.c */
+#include "tma.h"
+#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* computes xR**-1 == x (mod N) via Montgomery Reduction
+ *
+ * This is an optimized implementation of montgomery_reduce
+ * which uses the comba method to quickly calculate the columns of the
+ * reduction.
+ *
+ * Based on Algorithm 14.32 on pp.601 of HAC.
+*/
+int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
+{
+ int ix, res, olduse;
+ mp_word W[MP_WARRAY];
+
+ /* get old used count */
+ olduse = x->used;
+
+ /* grow a as required */
+ if (x->alloc < n->used + 1) {
+ if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* first we have to get the digits of the input into
+ * an array of double precision words W[...]
+ */
+ {
+ register mp_word *_W;
+ register mp_digit *tmpx;
+
+ /* alias for the W[] array */
+ _W = W;
+
+ /* alias for the digits of x*/
+ tmpx = x->dp;
+
+ /* copy the digits of a into W[0..a->used-1] */
+ for (ix = 0; ix < x->used; ix++) {
+ *_W++ = *tmpx++;
+ }
+
+ /* zero the high words of W[a->used..m->used*2] */
+ for (; ix < n->used * 2 + 1; ix++) {
+ *_W++ = 0;
+ }
+ }
+
+ /* now we proceed to zero successive digits
+ * from the least significant upwards
+ */
+ for (ix = 0; ix < n->used; ix++) {
+ /* mu = ai * m' mod b
+ *
+ * We avoid a double precision multiplication (which isn't required)
+ * by casting the value down to a mp_digit. Note this requires
+ * that W[ix-1] have the carry cleared (see after the inner loop)
+ */
+ register mp_digit mu;
+ mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
+
+ /* a = a + mu * m * b**i
+ *
+ * This is computed in place and on the fly. The multiplication
+ * by b**i is handled by offseting which columns the results
+ * are added to.
+ *
+ * Note the comba method normally doesn't handle carries in the
+ * inner loop In this case we fix the carry from the previous
+ * column since the Montgomery reduction requires digits of the
+ * result (so far) [see above] to work. This is
+ * handled by fixing up one carry after the inner loop. The
+ * carry fixups are done in order so after these loops the
+ * first m->used words of W[] have the carries fixed
+ */
+ {
+ register int iy;
+ register mp_digit *tmpn;
+ register mp_word *_W;
+
+ /* alias for the digits of the modulus */
+ tmpn = n->dp;
+
+ /* Alias for the columns set by an offset of ix */
+ _W = W + ix;
+
+ /* inner loop */
+ for (iy = 0; iy < n->used; iy++) {
+ *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
+ }
+ }
+
+ /* now fix carry for next digit, W[ix+1] */
+ W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
+ }
+
+ /* now we have to propagate the carries and
+ * shift the words downward [all those least
+ * significant digits we zeroed].
+ */
+ {
+ register mp_digit *tmpx;
+ register mp_word *_W, *_W1;
+
+ /* nox fix rest of carries */
+
+ /* alias for current word */
+ _W1 = W + ix;
+
+ /* alias for next word, where the carry goes */
+ _W = W + ++ix;
+
+ for (; ix <= n->used * 2 + 1; ix++) {
+ *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
+ }
+
+ /* copy out, A = A/b**n
+ *
+ * The result is A/b**n but instead of converting from an
+ * array of mp_word to mp_digit than calling mp_rshd
+ * we just copy them in the right order
+ */
+
+ /* alias for destination word */
+ tmpx = x->dp;
+
+ /* alias for shifted double precision result */
+ _W = W + n->used;
+
+ for (ix = 0; ix < n->used + 1; ix++) {
+ *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
+ }
+
+ /* zero oldused digits, if the input a was larger than
+ * m->used+1 we'll have to clear the digits
+ */
+ for (; ix < olduse; ix++) {
+ *tmpx++ = 0;
+ }
+ }
+
+ /* set the max used and clamp */
+ x->used = n->used + 1;
+ mp_clamp (x);
+
+ /* if A >= m then A = A - m */
+ if (mp_cmp_mag (x, n) != MP_LT) {
+ return s_mp_sub (x, n, x);
+ }
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_fast_mp_montgomery_reduce.c */
+
+/* Start: bn_fast_s_mp_mul_digs.c */
+#include "tma.h"
+#ifdef BN_FAST_S_MP_MUL_DIGS_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* Fast (comba) multiplier
+ *
+ * This is the fast column-array [comba] multiplier. It is
+ * designed to compute the columns of the product first
+ * then handle the carries afterwards. This has the effect
+ * of making the nested loops that compute the columns very
+ * simple and schedulable on super-scalar processors.
+ *
+ * This has been modified to produce a variable number of
+ * digits of output so if say only a half-product is required
+ * you don't have to compute the upper half (a feature
+ * required for fast Barrett reduction).
+ *
+ * Based on Algorithm 14.12 on pp.595 of HAC.
+ *
+ */
+int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
+{
+ int olduse, res, pa, ix, iz;
+ mp_digit W[MP_WARRAY];
+ register mp_word _W;
+
+ /* grow the destination as required */
+ if (c->alloc < digs) {
+ if ((res = mp_grow (c, digs)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* number of output digits to produce */
+ pa = MIN(digs, a->used + b->used);
+
+ /* clear the carry */
+ _W = 0;
+ for (ix = 0; ix < pa; ix++) {
+ int tx, ty;
+ int iy;
+ mp_digit *tmpx, *tmpy;
+
+ /* get offsets into the two bignums */
+ ty = MIN(b->used-1, ix);
+ tx = ix - ty;
+
+ /* setup temp aliases */
+ tmpx = a->dp + tx;
+ tmpy = b->dp + ty;
+
+ /* this is the number of times the loop will iterrate, essentially
+ while (tx++ < a->used && ty-- >= 0) { ... }
+ */
+ iy = MIN(a->used-tx, ty+1);
+
+ /* execute loop */
+ for (iz = 0; iz < iy; ++iz) {
+ _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
+ }
+
+ /* store term */
+ W[ix] = ((mp_digit)_W) & MP_MASK;
+
+ /* make next carry */
+ _W = _W >> ((mp_word)DIGIT_BIT);
+ }
+
+ /* store final carry */
+ W[ix] = (mp_digit)(_W & MP_MASK);
+
+ /* setup dest */
+ olduse = c->used;
+ c->used = pa;
+
+ {
+ register mp_digit *tmpc;
+ tmpc = c->dp;
+ for (ix = 0; ix < pa+1; ix++) {
+ /* now extract the previous digit [below the carry] */
+ *tmpc++ = W[ix];
+ }
+
+ /* clear unused digits [that existed in the old copy of c] */
+ for (; ix < olduse; ix++) {
+ *tmpc++ = 0;
+ }
+ }
+ mp_clamp (c);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_fast_s_mp_mul_digs.c */
+
+/* Start: bn_fast_s_mp_mul_high_digs.c */
+#include "tma.h"
+#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* this is a modified version of fast_s_mul_digs that only produces
+ * output digits *above* digs. See the comments for fast_s_mul_digs
+ * to see how it works.
+ *
+ * This is used in the Barrett reduction since for one of the multiplications
+ * only the higher digits were needed. This essentially halves the work.
+ *
+ * Based on Algorithm 14.12 on pp.595 of HAC.
+ */
+int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
+{
+ int olduse, res, pa, ix, iz;
+ mp_digit W[MP_WARRAY];
+ mp_word _W;
+
+ /* grow the destination as required */
+ pa = a->used + b->used;
+ if (c->alloc < pa) {
+ if ((res = mp_grow (c, pa)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* number of output digits to produce */
+ pa = a->used + b->used;
+ _W = 0;
+ for (ix = digs; ix < pa; ix++) {
+ int tx, ty, iy;
+ mp_digit *tmpx, *tmpy;
+
+ /* get offsets into the two bignums */
+ ty = MIN(b->used-1, ix);
+ tx = ix - ty;
+
+ /* setup temp aliases */
+ tmpx = a->dp + tx;
+ tmpy = b->dp + ty;
+
+ /* this is the number of times the loop will iterrate, essentially its
+ while (tx++ < a->used && ty-- >= 0) { ... }
+ */
+ iy = MIN(a->used-tx, ty+1);
+
+ /* execute loop */
+ for (iz = 0; iz < iy; iz++) {
+ _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
+ }
+
+ /* store term */
+ W[ix] = ((mp_digit)_W) & MP_MASK;
+
+ /* make next carry */
+ _W = _W >> ((mp_word)DIGIT_BIT);
+ }
+
+ /* store final carry */
+ W[ix] = (mp_digit)(_W & MP_MASK);
+
+ /* setup dest */
+ olduse = c->used;
+ c->used = pa;
+
+ {
+ register mp_digit *tmpc;
+
+ tmpc = c->dp + digs;
+ for (ix = digs; ix <= pa; ix++) {
+ /* now extract the previous digit [below the carry] */
+ *tmpc++ = W[ix];
+ }
+
+ /* clear unused digits [that existed in the old copy of c] */
+ for (; ix < olduse; ix++) {
+ *tmpc++ = 0;
+ }
+ }
+ mp_clamp (c);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_fast_s_mp_mul_high_digs.c */
+
+/* Start: bn_fast_s_mp_sqr.c */
+#include "tma.h"
+#ifdef BN_FAST_S_MP_SQR_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* the jist of squaring...
+ * you do like mult except the offset of the tmpx [one that
+ * starts closer to zero] can't equal the offset of tmpy.
+ * So basically you set up iy like before then you min it with
+ * (ty-tx) so that it never happens. You double all those
+ * you add in the inner loop
+
+After that loop you do the squares and add them in.
+*/
+
+int fast_s_mp_sqr (mp_int * a, mp_int * b)
+{
+ int olduse, res, pa, ix, iz;
+ mp_digit W[MP_WARRAY], *tmpx;
+ mp_word W1;
+
+ /* grow the destination as required */
+ pa = a->used + a->used;
+ if (b->alloc < pa) {
+ if ((res = mp_grow (b, pa)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* number of output digits to produce */
+ W1 = 0;
+ for (ix = 0; ix < pa; ix++) {
+ int tx, ty, iy;
+ mp_word _W;
+ mp_digit *tmpy;
+
+ /* clear counter */
+ _W = 0;
+
+ /* get offsets into the two bignums */
+ ty = MIN(a->used-1, ix);
+ tx = ix - ty;
+
+ /* setup temp aliases */
+ tmpx = a->dp + tx;
+ tmpy = a->dp + ty;
+
+ /* this is the number of times the loop will iterrate, essentially
+ while (tx++ < a->used && ty-- >= 0) { ... }
+ */
+ iy = MIN(a->used-tx, ty+1);
+
+ /* now for squaring tx can never equal ty
+ * we halve the distance since they approach at a rate of 2x
+ * and we have to round because odd cases need to be executed
+ */
+ iy = MIN(iy, (ty-tx+1)>>1);
+
+ /* execute loop */
+ for (iz = 0; iz < iy; iz++) {
+ _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
+ }
+
+ /* double the inner product and add carry */
+ _W = _W + _W + W1;
+
+ /* even columns have the square term in them */
+ if ((ix&1) == 0) {
+ _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
+ }
+
+ /* store it */
+ W[ix] = (mp_digit)(_W & MP_MASK);
+
+ /* make next carry */
+ W1 = _W >> ((mp_word)DIGIT_BIT);
+ }
+
+ /* setup dest */
+ olduse = b->used;
+ b->used = a->used+a->used;
+
+ {
+ mp_digit *tmpb;
+ tmpb = b->dp;
+ for (ix = 0; ix < pa; ix++) {
+ *tmpb++ = W[ix] & MP_MASK;
+ }
+
+ /* clear unused digits [that existed in the old copy of c] */
+ for (; ix < olduse; ix++) {
+ *tmpb++ = 0;
+ }
+ }
+ mp_clamp (b);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_fast_s_mp_sqr.c */
+
+/* Start: bn_mp_2expt.c */
+#include "tma.h"
+#ifdef BN_MP_2EXPT_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* computes a = 2**b
+ *
+ * Simple algorithm which zeroes the int, grows it then just sets one bit
+ * as required.
+ */
+int
+mp_2expt (mp_int * a, int b)
+{
+ int res;
+
+ /* zero a as per default */
+ mp_zero (a);
+
+ /* grow a to accomodate the single bit */
+ if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
+ return res;
+ }
+
+ /* set the used count of where the bit will go */
+ a->used = b / DIGIT_BIT + 1;
+
+ /* put the single bit in its place */
+ a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_2expt.c */
+
+/* Start: bn_mp_abs.c */
+#include "tma.h"
+#ifdef BN_MP_ABS_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* b = |a|
+ *
+ * Simple function copies the input and fixes the sign to positive
+ */
+int
+mp_abs (mp_int * a, mp_int * b)
+{
+ int res;
+
+ /* copy a to b */
+ if (a != b) {
+ if ((res = mp_copy (a, b)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* force the sign of b to positive */
+ b->sign = MP_ZPOS;
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_abs.c */
+
+/* Start: bn_mp_add.c */
+#include "tma.h"
+#ifdef BN_MP_ADD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* high level addition (handles signs) */
+int mp_add (mp_int * a, mp_int * b, mp_int * c)
+{
+ int sa, sb, res;
+
+ /* get sign of both inputs */
+ sa = a->sign;
+ sb = b->sign;
+
+ /* handle two cases, not four */
+ if (sa == sb) {
+ /* both positive or both negative */
+ /* add their magnitudes, copy the sign */
+ c->sign = sa;
+ res = s_mp_add (a, b, c);
+ } else {
+ /* one positive, the other negative */
+ /* subtract the one with the greater magnitude from */
+ /* the one of the lesser magnitude. The result gets */
+ /* the sign of the one with the greater magnitude. */
+ if (mp_cmp_mag (a, b) == MP_LT) {
+ c->sign = sb;
+ res = s_mp_sub (b, a, c);
+ } else {
+ c->sign = sa;
+ res = s_mp_sub (a, b, c);
+ }
+ }
+ return res;
+}
+
+#endif
+
+/* End: bn_mp_add.c */
+
+/* Start: bn_mp_add_d.c */
+#include "tma.h"
+#ifdef BN_MP_ADD_D_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* single digit addition */
+int
+mp_add_d (mp_int * a, mp_digit b, mp_int * c)
+{
+ int res, ix, oldused;
+ mp_digit *tmpa, *tmpc, mu;
+
+ /* grow c as required */
+ if (c->alloc < a->used + 1) {
+ if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* if a is negative and |a| >= b, call c = |a| - b */
+ if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
+ /* temporarily fix sign of a */
+ a->sign = MP_ZPOS;
+
+ /* c = |a| - b */
+ res = mp_sub_d(a, b, c);
+
+ /* fix sign */
+ a->sign = c->sign = MP_NEG;
+
+ return res;
+ }
+
+ /* old number of used digits in c */
+ oldused = c->used;
+
+ /* sign always positive */
+ c->sign = MP_ZPOS;
+
+ /* source alias */
+ tmpa = a->dp;
+
+ /* destination alias */
+ tmpc = c->dp;
+
+ /* if a is positive */
+ if (a->sign == MP_ZPOS) {
+ /* add digit, after this we're propagating
+ * the carry.
+ */
+ *tmpc = *tmpa++ + b;
+ mu = *tmpc >> DIGIT_BIT;
+ *tmpc++ &= MP_MASK;
+
+ /* now handle rest of the digits */
+ for (ix = 1; ix < a->used; ix++) {
+ *tmpc = *tmpa++ + mu;
+ mu = *tmpc >> DIGIT_BIT;
+ *tmpc++ &= MP_MASK;
+ }
+ /* set final carry */
+ ix++;
+ *tmpc++ = mu;
+
+ /* setup size */
+ c->used = a->used + 1;
+ } else {
+ /* a was negative and |a| < b */
+ c->used = 1;
+
+ /* the result is a single digit */
+ if (a->used == 1) {
+ *tmpc++ = b - a->dp[0];
+ } else {
+ *tmpc++ = b;
+ }
+
+ /* setup count so the clearing of oldused
+ * can fall through correctly
+ */
+ ix = 1;
+ }
+
+ /* now zero to oldused */
+ while (ix++ < oldused) {
+ *tmpc++ = 0;
+ }
+ mp_clamp(c);
+
+ return MP_OKAY;
+}
+
+#endif
+
+/* End: bn_mp_add_d.c */
+
+/* Start: bn_mp_addmod.c */
+#include "tma.h"
+#ifdef BN_MP_ADDMOD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* d = a + b (mod c) */
+int
+mp_addmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+{
+ int res;
+ mp_int t;
+
+ if ((res = mp_init (&t)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_add (a, b, &t)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ res = mp_mod (&t, c, d);
+ mp_clear (&t);
+ return res;
+}
+#endif
+
+/* End: bn_mp_addmod.c */
+
+/* Start: bn_mp_and.c */
+#include "tma.h"
+#ifdef BN_MP_AND_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* AND two ints together */
+int
+mp_and (mp_int * a, mp_int * b, mp_int * c)
+{
+ int res, ix, px;
+ mp_int t, *x;
+
+ if (a->used > b->used) {
+ if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+ return res;
+ }
+ px = b->used;
+ x = b;
+ } else {
+ if ((res = mp_init_copy (&t, b)) != MP_OKAY) {
+ return res;
+ }
+ px = a->used;
+ x = a;
+ }
+
+ for (ix = 0; ix < px; ix++) {
+ t.dp[ix] &= x->dp[ix];
+ }
+
+ /* zero digits above the last from the smallest mp_int */
+ for (; ix < t.used; ix++) {
+ t.dp[ix] = 0;
+ }
+
+ mp_clamp (&t);
+ mp_exch (c, &t);
+ mp_clear (&t);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_and.c */
+
+/* Start: bn_mp_clamp.c */
+#include "tma.h"
+#ifdef BN_MP_CLAMP_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* trim unused digits
+ *
+ * This is used to ensure that leading zero digits are
+ * trimed and the leading "used" digit will be non-zero
+ * Typically very fast. Also fixes the sign if there
+ * are no more leading digits
+ */
+void
+mp_clamp (mp_int * a)
+{
+ /* decrease used while the most significant digit is
+ * zero.
+ */
+ while (a->used > 0 && a->dp[a->used - 1] == 0) {
+ --(a->used);
+ }
+
+ /* reset the sign flag if used == 0 */
+ if (a->used == 0) {
+ a->sign = MP_ZPOS;
+ }
+}
+#endif
+
+/* End: bn_mp_clamp.c */
+
+/* Start: bn_mp_clear.c */
+#include "tma.h"
+#ifdef BN_MP_CLEAR_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* clear one (frees) */
+void
+mp_clear (mp_int * a)
+{
+ int i;
+
+ /* only do anything if a hasn't been freed previously */
+ if (a->dp != NULL) {
+ /* first zero the digits */
+ for (i = 0; i < a->used; i++) {
+ a->dp[i] = 0;
+ }
+
+ /* free ram */
+ XFREE(a->dp);
+
+ /* reset members to make debugging easier */
+ a->dp = NULL;
+ a->alloc = a->used = 0;
+ a->sign = MP_ZPOS;
+ }
+}
+#endif
+
+/* End: bn_mp_clear.c */
+
+/* Start: bn_mp_clear_multi.c */
+#include "tma.h"
+#ifdef BN_MP_CLEAR_MULTI_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+#include <stdarg.h>
+
+void mp_clear_multi(mp_int *mp, ...)
+{
+ mp_int* next_mp = mp;
+ va_list args;
+ va_start(args, mp);
+ while (next_mp != NULL) {
+ mp_clear(next_mp);
+ next_mp = va_arg(args, mp_int*);
+ }
+ va_end(args);
+}
+#endif
+
+/* End: bn_mp_clear_multi.c */
+
+/* Start: bn_mp_cmp.c */
+#include "tma.h"
+#ifdef BN_MP_CMP_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* compare two ints (signed)*/
+int
+mp_cmp (mp_int * a, mp_int * b)
+{
+ /* compare based on sign */
+ if (a->sign != b->sign) {
+ if (a->sign == MP_NEG) {
+ return MP_LT;
+ } else {
+ return MP_GT;
+ }
+ }
+
+ /* compare digits */
+ if (a->sign == MP_NEG) {
+ /* if negative compare opposite direction */
+ return mp_cmp_mag(b, a);
+ } else {
+ return mp_cmp_mag(a, b);
+ }
+}
+#endif
+
+/* End: bn_mp_cmp.c */
+
+/* Start: bn_mp_cmp_d.c */
+#include "tma.h"
+#ifdef BN_MP_CMP_D_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* compare a digit */
+int mp_cmp_d(mp_int * a, mp_digit b)
+{
+ /* compare based on sign */
+ if (a->sign == MP_NEG) {
+ return MP_LT;
+ }
+
+ /* compare based on magnitude */
+ if (a->used > 1) {
+ return MP_GT;
+ }
+
+ /* compare the only digit of a to b */
+ if (a->dp[0] > b) {
+ return MP_GT;
+ } else if (a->dp[0] < b) {
+ return MP_LT;
+ } else {
+ return MP_EQ;
+ }
+}
+#endif
+
+/* End: bn_mp_cmp_d.c */
+
+/* Start: bn_mp_cmp_mag.c */
+#include "tma.h"
+#ifdef BN_MP_CMP_MAG_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* compare maginitude of two ints (unsigned) */
+int mp_cmp_mag (mp_int * a, mp_int * b)
+{
+ int n;
+ mp_digit *tmpa, *tmpb;
+
+ /* compare based on # of non-zero digits */
+ if (a->used > b->used) {
+ return MP_GT;
+ }
+
+ if (a->used < b->used) {
+ return MP_LT;
+ }
+
+ /* alias for a */
+ tmpa = a->dp + (a->used - 1);
+
+ /* alias for b */
+ tmpb = b->dp + (a->used - 1);
+
+ /* compare based on digits */
+ for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
+ if (*tmpa > *tmpb) {
+ return MP_GT;
+ }
+
+ if (*tmpa < *tmpb) {
+ return MP_LT;
+ }
+ }
+ return MP_EQ;
+}
+#endif
+
+/* End: bn_mp_cmp_mag.c */
+
+/* Start: bn_mp_cnt_lsb.c */
+#include "tma.h"
+#ifdef BN_MP_CNT_LSB_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+static const int lnz[16] = {
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+/* Counts the number of lsbs which are zero before the first zero bit */
+int mp_cnt_lsb(mp_int *a)
+{
+ int x;
+ mp_digit q, qq;
+
+ /* easy out */
+ if (mp_iszero(a) == 1) {
+ return 0;
+ }
+
+ /* scan lower digits until non-zero */
+ for (x = 0; x < a->used && a->dp[x] == 0; x++);
+ q = a->dp[x];
+ x *= DIGIT_BIT;
+
+ /* now scan this digit until a 1 is found */
+ if ((q & 1) == 0) {
+ do {
+ qq = q & 15;
+ x += lnz[qq];
+ q >>= 4;
+ } while (qq == 0);
+ }
+ return x;
+}
+
+#endif
+
+/* End: bn_mp_cnt_lsb.c */
+
+/* Start: bn_mp_copy.c */
+#include "tma.h"
+#ifdef BN_MP_COPY_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* copy, b = a */
+int
+mp_copy (mp_int * a, mp_int * b)
+{
+ int res, n;
+
+ /* if dst == src do nothing */
+ if (a == b) {
+ return MP_OKAY;
+ }
+
+ /* grow dest */
+ if (b->alloc < a->used) {
+ if ((res = mp_grow (b, a->used)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* zero b and copy the parameters over */
+ {
+ register mp_digit *tmpa, *tmpb;
+
+ /* pointer aliases */
+
+ /* source */
+ tmpa = a->dp;
+
+ /* destination */
+ tmpb = b->dp;
+
+ /* copy all the digits */
+ for (n = 0; n < a->used; n++) {
+ *tmpb++ = *tmpa++;
+ }
+
+ /* clear high digits */
+ for (; n < b->used; n++) {
+ *tmpb++ = 0;
+ }
+ }
+
+ /* copy used count and sign */
+ b->used = a->used;
+ b->sign = a->sign;
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_copy.c */
+
+/* Start: bn_mp_count_bits.c */
+#include "tma.h"
+#ifdef BN_MP_COUNT_BITS_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* returns the number of bits in an int */
+int
+mp_count_bits (mp_int * a)
+{
+ int r;
+ mp_digit q;
+
+ /* shortcut */
+ if (a->used == 0) {
+ return 0;
+ }
+
+ /* get number of digits and add that */
+ r = (a->used - 1) * DIGIT_BIT;
+
+ /* take the last digit and count the bits in it */
+ q = a->dp[a->used - 1];
+ while (q > ((mp_digit) 0)) {
+ ++r;
+ q >>= ((mp_digit) 1);
+ }
+ return r;
+}
+#endif
+
+/* End: bn_mp_count_bits.c */
+
+/* Start: bn_mp_div.c */
+#include "tma.h"
+#ifdef BN_MP_DIV_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+#ifdef BN_MP_DIV_SMALL
+
+/* slower bit-bang division... also smaller */
+int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+{
+ mp_int ta, tb, tq, q;
+ int res, n, n2;
+
+ /* is divisor zero ? */
+ if (mp_iszero (b) == 1) {
+ return MP_VAL;
+ }
+
+ /* if a < b then q=0, r = a */
+ if (mp_cmp_mag (a, b) == MP_LT) {
+ if (d != NULL) {
+ res = mp_copy (a, d);
+ } else {
+ res = MP_OKAY;
+ }
+ if (c != NULL) {
+ mp_zero (c);
+ }
+ return res;
+ }
+
+ /* init our temps */
+ if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) {
+ return res;
+ }
+
+
+ mp_set(&tq, 1);
+ n = mp_count_bits(a) - mp_count_bits(b);
+ if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
+ ((res = mp_abs(b, &tb)) != MP_OKAY) ||
+ ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
+ ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
+ goto LBL_ERR;
+ }
+
+ while (n-- >= 0) {
+ if (mp_cmp(&tb, &ta) != MP_GT) {
+ if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
+ ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
+ goto LBL_ERR;
+ }
+ }
+ if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
+ ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
+ goto LBL_ERR;
+ }
+ }
+
+ /* now q == quotient and ta == remainder */
+ n = a->sign;
+ n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
+ if (c != NULL) {
+ mp_exch(c, &q);
+ c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
+ }
+ if (d != NULL) {
+ mp_exch(d, &ta);
+ d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
+ }
+LBL_ERR:
+ mp_clear_multi(&ta, &tb, &tq, &q, NULL);
+ return res;
+}
+
+#else
+
+/* integer signed division.
+ * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
+ * HAC pp.598 Algorithm 14.20
+ *
+ * Note that the description in HAC is horribly
+ * incomplete. For example, it doesn't consider
+ * the case where digits are removed from 'x' in
+ * the inner loop. It also doesn't consider the
+ * case that y has fewer than three digits, etc..
+ *
+ * The overall algorithm is as described as
+ * 14.20 from HAC but fixed to treat these cases.
+*/
+int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+{
+ mp_int q, x, y, t1, t2;
+ int res, n, t, i, norm, neg;
+
+ /* is divisor zero ? */
+ if (mp_iszero (b) == 1) {
+ return MP_VAL;
+ }
+
+ /* if a < b then q=0, r = a */
+ if (mp_cmp_mag (a, b) == MP_LT) {
+ if (d != NULL) {
+ res = mp_copy (a, d);
+ } else {
+ res = MP_OKAY;
+ }
+ if (c != NULL) {
+ mp_zero (c);
+ }
+ return res;
+ }
+
+ if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
+ return res;
+ }
+ q.used = a->used + 2;
+
+ if ((res = mp_init (&t1)) != MP_OKAY) {
+ goto LBL_Q;
+ }
+
+ if ((res = mp_init (&t2)) != MP_OKAY) {
+ goto LBL_T1;
+ }
+
+ if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
+ goto LBL_T2;
+ }
+
+ if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
+ goto LBL_X;
+ }
+
+ /* fix the sign */
+ neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
+ x.sign = y.sign = MP_ZPOS;
+
+ /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
+ norm = mp_count_bits(&y) % DIGIT_BIT;
+ if (norm < (int)(DIGIT_BIT-1)) {
+ norm = (DIGIT_BIT-1) - norm;
+ if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+ if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+ } else {
+ norm = 0;
+ }
+
+ /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
+ n = x.used - 1;
+ t = y.used - 1;
+
+ /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
+ if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
+ goto LBL_Y;
+ }
+
+ while (mp_cmp (&x, &y) != MP_LT) {
+ ++(q.dp[n - t]);
+ if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+ }
+
+ /* reset y by shifting it back down */
+ mp_rshd (&y, n - t);
+
+ /* step 3. for i from n down to (t + 1) */
+ for (i = n; i >= (t + 1); i--) {
+ if (i > x.used) {
+ continue;
+ }
+
+ /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
+ * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
+ if (x.dp[i] == y.dp[t]) {
+ q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
+ } else {
+ mp_word tmp;
+ tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
+ tmp |= ((mp_word) x.dp[i - 1]);
+ tmp /= ((mp_word) y.dp[t]);
+ if (tmp > (mp_word) MP_MASK)
+ tmp = MP_MASK;
+ q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
+ }
+
+ /* while (q{i-t-1} * (yt * b + y{t-1})) >
+ xi * b**2 + xi-1 * b + xi-2
+
+ do q{i-t-1} -= 1;
+ */
+ q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
+ do {
+ q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
+
+ /* find left hand */
+ mp_zero (&t1);
+ t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
+ t1.dp[1] = y.dp[t];
+ t1.used = 2;
+ if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+
+ /* find right hand */
+ t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
+ t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
+ t2.dp[2] = x.dp[i];
+ t2.used = 3;
+ } while (mp_cmp_mag(&t1, &t2) == MP_GT);
+
+ /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
+ if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+
+ if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+
+ if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+
+ /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
+ if (x.sign == MP_NEG) {
+ if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+ if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+ if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+
+ q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
+ }
+ }
+
+ /* now q is the quotient and x is the remainder
+ * [which we have to normalize]
+ */
+
+ /* get sign before writing to c */
+ x.sign = x.used == 0 ? MP_ZPOS : a->sign;
+
+ if (c != NULL) {
+ mp_clamp (&q);
+ mp_exch (&q, c);
+ c->sign = neg;
+ }
+
+ if (d != NULL) {
+ mp_div_2d (&x, norm, &x, NULL);
+ mp_exch (&x, d);
+ }
+
+ res = MP_OKAY;
+
+LBL_Y:mp_clear (&y);
+LBL_X:mp_clear (&x);
+LBL_T2:mp_clear (&t2);
+LBL_T1:mp_clear (&t1);
+LBL_Q:mp_clear (&q);
+ return res;
+}
+
+#endif
+
+#endif
+
+/* End: bn_mp_div.c */
+
+/* Start: bn_mp_div_2.c */
+#include "tma.h"
+#ifdef BN_MP_DIV_2_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* b = a/2 */
+int mp_div_2(mp_int * a, mp_int * b)
+{
+ int x, res, oldused;
+
+ /* copy */
+ if (b->alloc < a->used) {
+ if ((res = mp_grow (b, a->used)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ oldused = b->used;
+ b->used = a->used;
+ {
+ register mp_digit r, rr, *tmpa, *tmpb;
+
+ /* source alias */
+ tmpa = a->dp + b->used - 1;
+
+ /* dest alias */
+ tmpb = b->dp + b->used - 1;
+
+ /* carry */
+ r = 0;
+ for (x = b->used - 1; x >= 0; x--) {
+ /* get the carry for the next iteration */
+ rr = *tmpa & 1;
+
+ /* shift the current digit, add in carry and store */
+ *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
+
+ /* forward carry to next iteration */
+ r = rr;
+ }
+
+ /* zero excess digits */
+ tmpb = b->dp + b->used;
+ for (x = b->used; x < oldused; x++) {
+ *tmpb++ = 0;
+ }
+ }
+ b->sign = a->sign;
+ mp_clamp (b);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_div_2.c */
+
+/* Start: bn_mp_div_2d.c */
+#include "tma.h"
+#ifdef BN_MP_DIV_2D_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
+int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
+{
+ mp_digit D, r, rr;
+ int x, res;
+ mp_int t;
+
+
+ /* if the shift count is <= 0 then we do no work */
+ if (b <= 0) {
+ res = mp_copy (a, c);
+ if (d != NULL) {
+ mp_zero (d);
+ }
+ return res;
+ }
+
+ if ((res = mp_init (&t)) != MP_OKAY) {
+ return res;
+ }
+
+ /* get the remainder */
+ if (d != NULL) {
+ if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ }
+
+ /* copy */
+ if ((res = mp_copy (a, c)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+
+ /* shift by as many digits in the bit count */
+ if (b >= (int)DIGIT_BIT) {
+ mp_rshd (c, b / DIGIT_BIT);
+ }
+
+ /* shift any bit count < DIGIT_BIT */
+ D = (mp_digit) (b % DIGIT_BIT);
+ if (D != 0) {
+ register mp_digit *tmpc, mask, shift;
+
+ /* mask */
+ mask = (((mp_digit)1) << D) - 1;
+
+ /* shift for lsb */
+ shift = DIGIT_BIT - D;
+
+ /* alias */
+ tmpc = c->dp + (c->used - 1);
+
+ /* carry */
+ r = 0;
+ for (x = c->used - 1; x >= 0; x--) {
+ /* get the lower bits of this word in a temp */
+ rr = *tmpc & mask;
+
+ /* shift the current word and mix in the carry bits from the previous word */
+ *tmpc = (*tmpc >> D) | (r << shift);
+ --tmpc;
+
+ /* set the carry to the carry bits of the current word found above */
+ r = rr;
+ }
+ }
+ mp_clamp (c);
+ if (d != NULL) {
+ mp_exch (&t, d);
+ }
+ mp_clear (&t);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_div_2d.c */
+
+/* Start: bn_mp_div_3.c */
+#include "tma.h"
+#ifdef BN_MP_DIV_3_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* divide by three (based on routine from MPI and the GMP manual) */
+int
+mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
+{
+ mp_int q;
+ mp_word w, t;
+ mp_digit b;
+ int res, ix;
+
+ /* b = 2**DIGIT_BIT / 3 */
+ b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
+
+ if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
+ return res;
+ }
+
+ q.used = a->used;
+ q.sign = a->sign;
+ w = 0;
+ for (ix = a->used - 1; ix >= 0; ix--) {
+ w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
+
+ if (w >= 3) {
+ /* multiply w by [1/3] */
+ t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
+
+ /* now subtract 3 * [w/3] from w, to get the remainder */
+ w -= t+t+t;
+
+ /* fixup the remainder as required since
+ * the optimization is not exact.
+ */
+ while (w >= 3) {
+ t += 1;
+ w -= 3;
+ }
+ } else {
+ t = 0;
+ }
+ q.dp[ix] = (mp_digit)t;
+ }
+
+ /* [optional] store the remainder */
+ if (d != NULL) {
+ *d = (mp_digit)w;
+ }
+
+ /* [optional] store the quotient */
+ if (c != NULL) {
+ mp_clamp(&q);
+ mp_exch(&q, c);
+ }
+ mp_clear(&q);
+
+ return res;
+}
+
+#endif
+
+/* End: bn_mp_div_3.c */
+
+/* Start: bn_mp_div_d.c */
+#include "tma.h"
+#ifdef BN_MP_DIV_D_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+static int s_is_power_of_two(mp_digit b, int *p)
+{
+ int x;
+
+ for (x = 1; x < DIGIT_BIT; x++) {
+ if (b == (((mp_digit)1)<<x)) {
+ *p = x;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* single digit division (based on routine from MPI) */
+int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
+{
+ mp_int q;
+ mp_word w;
+ mp_digit t;
+ int res, ix;
+
+ /* cannot divide by zero */
+ if (b == 0) {
+ return MP_VAL;
+ }
+
+ /* quick outs */
+ if (b == 1 || mp_iszero(a) == 1) {
+ if (d != NULL) {
+ *d = 0;
+ }
+ if (c != NULL) {
+ return mp_copy(a, c);
+ }
+ return MP_OKAY;
+ }
+
+ /* power of two ? */
+ if (s_is_power_of_two(b, &ix) == 1) {
+ if (d != NULL) {
+ *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
+ }
+ if (c != NULL) {
+ return mp_div_2d(a, ix, c, NULL);
+ }
+ return MP_OKAY;
+ }
+
+#ifdef BN_MP_DIV_3_C
+ /* three? */
+ if (b == 3) {
+ return mp_div_3(a, c, d);
+ }
+#endif
+
+ /* no easy answer [c'est la vie]. Just division */
+ if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
+ return res;
+ }
+
+ q.used = a->used;
+ q.sign = a->sign;
+ w = 0;
+ for (ix = a->used - 1; ix >= 0; ix--) {
+ w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
+
+ if (w >= b) {
+ t = (mp_digit)(w / b);
+ w -= ((mp_word)t) * ((mp_word)b);
+ } else {
+ t = 0;
+ }
+ q.dp[ix] = (mp_digit)t;
+ }
+
+ if (d != NULL) {
+ *d = (mp_digit)w;
+ }
+
+ if (c != NULL) {
+ mp_clamp(&q);
+ mp_exch(&q, c);
+ }
+ mp_clear(&q);
+
+ return res;
+}
+
+#endif
+
+/* End: bn_mp_div_d.c */
+
+/* Start: bn_mp_dr_is_modulus.c */
+#include "tma.h"
+#ifdef BN_MP_DR_IS_MODULUS_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* determines if a number is a valid DR modulus */
+int mp_dr_is_modulus(mp_int *a)
+{
+ int ix;
+
+ /* must be at least two digits */
+ if (a->used < 2) {
+ return 0;
+ }
+
+ /* must be of the form b**k - a [a <= b] so all
+ * but the first digit must be equal to -1 (mod b).
+ */
+ for (ix = 1; ix < a->used; ix++) {
+ if (a->dp[ix] != MP_MASK) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+#endif
+
+/* End: bn_mp_dr_is_modulus.c */
+
+/* Start: bn_mp_dr_reduce.c */
+#include "tma.h"
+#ifdef BN_MP_DR_REDUCE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
+ *
+ * Based on algorithm from the paper
+ *
+ * "Generating Efficient Primes for Discrete Log Cryptosystems"
+ * Chae Hoon Lim, Pil Joong Lee,
+ * POSTECH Information Research Laboratories
+ *
+ * The modulus must be of a special format [see manual]
+ *
+ * Has been modified to use algorithm 7.10 from the LTM book instead
+ *
+ * Input x must be in the range 0 <= x <= (n-1)**2
+ */
+int
+mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
+{
+ int err, i, m;
+ mp_word r;
+ mp_digit mu, *tmpx1, *tmpx2;
+
+ /* m = digits in modulus */
+ m = n->used;
+
+ /* ensure that "x" has at least 2m digits */
+ if (x->alloc < m + m) {
+ if ((err = mp_grow (x, m + m)) != MP_OKAY) {
+ return err;
+ }
+ }
+
+/* top of loop, this is where the code resumes if
+ * another reduction pass is required.
+ */
+top:
+ /* aliases for digits */
+ /* alias for lower half of x */
+ tmpx1 = x->dp;
+
+ /* alias for upper half of x, or x/B**m */
+ tmpx2 = x->dp + m;
+
+ /* set carry to zero */
+ mu = 0;
+
+ /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
+ for (i = 0; i < m; i++) {
+ r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
+ *tmpx1++ = (mp_digit)(r & MP_MASK);
+ mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
+ }
+
+ /* set final carry */
+ *tmpx1++ = mu;
+
+ /* zero words above m */
+ for (i = m + 1; i < x->used; i++) {
+ *tmpx1++ = 0;
+ }
+
+ /* clamp, sub and return */
+ mp_clamp (x);
+
+ /* if x >= n then subtract and reduce again
+ * Each successive "recursion" makes the input smaller and smaller.
+ */
+ if (mp_cmp_mag (x, n) != MP_LT) {
+ s_mp_sub(x, n, x);
+ goto top;
+ }
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_dr_reduce.c */
+
+/* Start: bn_mp_dr_setup.c */
+#include "tma.h"
+#ifdef BN_MP_DR_SETUP_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* determines the setup value */
+void mp_dr_setup(mp_int *a, mp_digit *d)
+{
+ /* the casts are required if DIGIT_BIT is one less than
+ * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
+ */
+ *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
+ ((mp_word)a->dp[0]));
+}
+
+#endif
+
+/* End: bn_mp_dr_setup.c */
+
+/* Start: bn_mp_exch.c */
+#include "tma.h"
+#ifdef BN_MP_EXCH_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* swap the elements of two integers, for cases where you can't simply swap the
+ * mp_int pointers around
+ */
+void
+mp_exch (mp_int * a, mp_int * b)
+{
+ mp_int t;
+
+ t = *a;
+ *a = *b;
+ *b = t;
+}
+#endif
+
+/* End: bn_mp_exch.c */
+
+/* Start: bn_mp_expt_d.c */
+#include "tma.h"
+#ifdef BN_MP_EXPT_D_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* calculate c = a**b using a square-multiply algorithm */
+int mp_expt_d (mp_int * a, mp_digit b, mp_int * c)
+{
+ int res, x;
+ mp_int g;
+
+ if ((res = mp_init_copy (&g, a)) != MP_OKAY) {
+ return res;
+ }
+
+ /* set initial result */
+ mp_set (c, 1);
+
+ for (x = 0; x < (int) DIGIT_BIT; x++) {
+ /* square */
+ if ((res = mp_sqr (c, c)) != MP_OKAY) {
+ mp_clear (&g);
+ return res;
+ }
+
+ /* if the bit is set multiply */
+ if ((b & (mp_digit) (((mp_digit)1) << (DIGIT_BIT - 1))) != 0) {
+ if ((res = mp_mul (c, &g, c)) != MP_OKAY) {
+ mp_clear (&g);
+ return res;
+ }
+ }
+
+ /* shift to next bit */
+ b <<= 1;
+ }
+
+ mp_clear (&g);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_expt_d.c */
+
+/* Start: bn_mp_exptmod.c */
+#include "tma.h"
+#ifdef BN_MP_EXPTMOD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+
+/* this is a shell function that calls either the normal or Montgomery
+ * exptmod functions. Originally the call to the montgomery code was
+ * embedded in the normal function but that wasted alot of stack space
+ * for nothing (since 99% of the time the Montgomery code would be called)
+ */
+int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+{
+ int dr;
+
+ /* modulus P must be positive */
+ if (P->sign == MP_NEG) {
+ return MP_VAL;
+ }
+
+ /* if exponent X is negative we have to recurse */
+ if (X->sign == MP_NEG) {
+#ifdef BN_MP_INVMOD_C
+ mp_int tmpG, tmpX;
+ int err;
+
+ /* first compute 1/G mod P */
+ if ((err = mp_init(&tmpG)) != MP_OKAY) {
+ return err;
+ }
+ if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
+ mp_clear(&tmpG);
+ return err;
+ }
+
+ /* now get |X| */
+ if ((err = mp_init(&tmpX)) != MP_OKAY) {
+ mp_clear(&tmpG);
+ return err;
+ }
+ if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
+ mp_clear_multi(&tmpG, &tmpX, NULL);
+ return err;
+ }
+
+ /* and now compute (1/G)**|X| instead of G**X [X < 0] */
+ err = mp_exptmod(&tmpG, &tmpX, P, Y);
+ mp_clear_multi(&tmpG, &tmpX, NULL);
+ return err;
+#else
+ /* no invmod */
+ return MP_VAL;
+#endif
+ }
+
+/* modified diminished radix reduction */
+#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C)
+ if (mp_reduce_is_2k_l(P) == MP_YES) {
+ return s_mp_exptmod(G, X, P, Y, 1);
+ }
+#endif
+
+#ifdef BN_MP_DR_IS_MODULUS_C
+ /* is it a DR modulus? */
+ dr = mp_dr_is_modulus(P);
+#else
+ /* default to no */
+ dr = 0;
+#endif
+
+#ifdef BN_MP_REDUCE_IS_2K_C
+ /* if not, is it a unrestricted DR modulus? */
+ if (dr == 0) {
+ dr = mp_reduce_is_2k(P) << 1;
+ }
+#endif
+
+ /* if the modulus is odd or dr != 0 use the montgomery method */
+#ifdef BN_MP_EXPTMOD_FAST_C
+ if (mp_isodd (P) == 1 || dr != 0) {
+ return mp_exptmod_fast (G, X, P, Y, dr);
+ } else {
+#endif
+#ifdef BN_S_MP_EXPTMOD_C
+ /* otherwise use the generic Barrett reduction technique */
+ return s_mp_exptmod (G, X, P, Y, 0);
+#else
+ /* no exptmod for evens */
+ return MP_VAL;
+#endif
+#ifdef BN_MP_EXPTMOD_FAST_C
+ }
+#endif
+}
+
+#endif
+
+/* End: bn_mp_exptmod.c */
+
+/* Start: bn_mp_exptmod_fast.c */
+#include "tma.h"
+#ifdef BN_MP_EXPTMOD_FAST_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
+ *
+ * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
+ * The value of k changes based on the size of the exponent.
+ *
+ * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
+ */
+
+#ifdef MP_LOW_MEM
+ #define TAB_SIZE 32
+#else
+ #define TAB_SIZE 256
+#endif
+
+int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
+{
+ mp_int M[TAB_SIZE], res;
+ mp_digit buf, mp;
+ int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
+
+ /* use a pointer to the reduction algorithm. This allows us to use
+ * one of many reduction algorithms without modding the guts of
+ * the code with if statements everywhere.
+ */
+ int (*redux)(mp_int*,mp_int*,mp_digit);
+
+ /* find window size */
+ x = mp_count_bits (X);
+ if (x <= 7) {
+ winsize = 2;
+ } else if (x <= 36) {
+ winsize = 3;
+ } else if (x <= 140) {
+ winsize = 4;
+ } else if (x <= 450) {
+ winsize = 5;
+ } else if (x <= 1303) {
+ winsize = 6;
+ } else if (x <= 3529) {
+ winsize = 7;
+ } else {
+ winsize = 8;
+ }
+
+#ifdef MP_LOW_MEM
+ if (winsize > 5) {
+ winsize = 5;
+ }
+#endif
+
+ /* init M array */
+ /* init first cell */
+ if ((err = mp_init(&M[1])) != MP_OKAY) {
+ return err;
+ }
+
+ /* now init the second half of the array */
+ for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
+ if ((err = mp_init(&M[x])) != MP_OKAY) {
+ for (y = 1<<(winsize-1); y < x; y++) {
+ mp_clear (&M[y]);
+ }
+ mp_clear(&M[1]);
+ return err;
+ }
+ }
+
+ /* determine and setup reduction code */
+ if (redmode == 0) {
+#ifdef BN_MP_MONTGOMERY_SETUP_C
+ /* now setup montgomery */
+ if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
+ goto LBL_M;
+ }
+#else
+ err = MP_VAL;
+ goto LBL_M;
+#endif
+
+ /* automatically pick the comba one if available (saves quite a few calls/ifs) */
+#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
+ if (((P->used * 2 + 1) < MP_WARRAY) &&
+ P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+ redux = fast_mp_montgomery_reduce;
+ } else
+#endif
+ {
+#ifdef BN_MP_MONTGOMERY_REDUCE_C
+ /* use slower baseline Montgomery method */
+ redux = mp_montgomery_reduce;
+#else
+ err = MP_VAL;
+ goto LBL_M;
+#endif
+ }
+ } else if (redmode == 1) {
+#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
+ /* setup DR reduction for moduli of the form B**k - b */
+ mp_dr_setup(P, &mp);
+ redux = mp_dr_reduce;
+#else
+ err = MP_VAL;
+ goto LBL_M;
+#endif
+ } else {
+#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
+ /* setup DR reduction for moduli of the form 2**k - b */
+ if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
+ goto LBL_M;
+ }
+ redux = mp_reduce_2k;
+#else
+ err = MP_VAL;
+ goto LBL_M;
+#endif
+ }
+
+ /* setup result */
+ if ((err = mp_init (&res)) != MP_OKAY) {
+ goto LBL_M;
+ }
+
+ /* create M table
+ *
+
+ *
+ * The first half of the table is not computed though accept for M[0] and M[1]
+ */
+
+ if (redmode == 0) {
+#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
+ /* now we need R mod m */
+ if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+#else
+ err = MP_VAL;
+ goto LBL_RES;
+#endif
+
+ /* now set M[1] to G * R mod m */
+ if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ } else {
+ mp_set(&res, 1);
+ if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ }
+
+ /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
+ if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
+ goto LBL_RES;
+ }
+
+ for (x = 0; x < (winsize - 1); x++) {
+ if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ }
+
+ /* create upper table */
+ for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
+ if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ }
+
+ /* set initial mode and bit cnt */
+ mode = 0;
+ bitcnt = 1;
+ buf = 0;
+ digidx = X->used - 1;
+ bitcpy = 0;
+ bitbuf = 0;
+
+ for (;;) {
+ /* grab next digit as required */
+ if (--bitcnt == 0) {
+ /* if digidx == -1 we are out of digits so break */
+ if (digidx == -1) {
+ break;
+ }
+ /* read next digit and reset bitcnt */
+ buf = X->dp[digidx--];
+ bitcnt = (int)DIGIT_BIT;
+ }
+
+ /* grab the next msb from the exponent */
+ y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
+ buf <<= (mp_digit)1;
+
+ /* if the bit is zero and mode == 0 then we ignore it
+ * These represent the leading zero bits before the first 1 bit
+ * in the exponent. Technically this opt is not required but it
+ * does lower the # of trivial squaring/reductions used
+ */
+ if (mode == 0 && y == 0) {
+ continue;
+ }
+
+ /* if the bit is zero and mode == 1 then we square */
+ if (mode == 1 && y == 0) {
+ if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&res, P, mp)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ continue;
+ }
+
+ /* else we add it to the window */
+ bitbuf |= (y << (winsize - ++bitcpy));
+ mode = 2;
+
+ if (bitcpy == winsize) {
+ /* ok window is filled so square as required and multiply */
+ /* square first */
+ for (x = 0; x < winsize; x++) {
+ if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&res, P, mp)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ }
+
+ /* then multiply */
+ if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&res, P, mp)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+
+ /* empty window and reset */
+ bitcpy = 0;
+ bitbuf = 0;
+ mode = 1;
+ }
+ }
+
+ /* if bits remain then square/multiply */
+ if (mode == 2 && bitcpy > 0) {
+ /* square then multiply if the bit is set */
+ for (x = 0; x < bitcpy; x++) {
+ if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&res, P, mp)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+
+ /* get next bit of the window */
+ bitbuf <<= 1;
+ if ((bitbuf & (1 << winsize)) != 0) {
+ /* then multiply */
+ if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&res, P, mp)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ }
+ }
+ }
+
+ if (redmode == 0) {
+ /* fixup result if Montgomery reduction is used
+ * recall that any value in a Montgomery system is
+ * actually multiplied by R mod n. So we have
+ * to reduce one more time to cancel out the factor
+ * of R.
+ */
+ if ((err = redux(&res, P, mp)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ }
+
+ /* swap res with Y */
+ mp_exch (&res, Y);
+ err = MP_OKAY;
+LBL_RES:mp_clear (&res);
+LBL_M:
+ mp_clear(&M[1]);
+ for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
+ mp_clear (&M[x]);
+ }
+ return err;
+}
+#endif
+
+
+/* End: bn_mp_exptmod_fast.c */
+
+/* Start: bn_mp_exteuclid.c */
+#include "tma.h"
+#ifdef BN_MP_EXTEUCLID_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* Extended euclidean algorithm of (a, b) produces
+ a*u1 + b*u2 = u3
+ */
+int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3)
+{
+ mp_int u1,u2,u3,v1,v2,v3,t1,t2,t3,q,tmp;
+ int err;
+
+ if ((err = mp_init_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL)) != MP_OKAY) {
+ return err;
+ }
+
+ /* initialize, (u1,u2,u3) = (1,0,a) */
+ mp_set(&u1, 1);
+ if ((err = mp_copy(a, &u3)) != MP_OKAY) { goto _ERR; }
+
+ /* initialize, (v1,v2,v3) = (0,1,b) */
+ mp_set(&v2, 1);
+ if ((err = mp_copy(b, &v3)) != MP_OKAY) { goto _ERR; }
+
+ /* loop while v3 != 0 */
+ while (mp_iszero(&v3) == MP_NO) {
+ /* q = u3/v3 */
+ if ((err = mp_div(&u3, &v3, &q, NULL)) != MP_OKAY) { goto _ERR; }
+
+ /* (t1,t2,t3) = (u1,u2,u3) - (v1,v2,v3)q */
+ if ((err = mp_mul(&v1, &q, &tmp)) != MP_OKAY) { goto _ERR; }
+ if ((err = mp_sub(&u1, &tmp, &t1)) != MP_OKAY) { goto _ERR; }
+ if ((err = mp_mul(&v2, &q, &tmp)) != MP_OKAY) { goto _ERR; }
+ if ((err = mp_sub(&u2, &tmp, &t2)) != MP_OKAY) { goto _ERR; }
+ if ((err = mp_mul(&v3, &q, &tmp)) != MP_OKAY) { goto _ERR; }
+ if ((err = mp_sub(&u3, &tmp, &t3)) != MP_OKAY) { goto _ERR; }
+
+ /* (u1,u2,u3) = (v1,v2,v3) */
+ if ((err = mp_copy(&v1, &u1)) != MP_OKAY) { goto _ERR; }
+ if ((err = mp_copy(&v2, &u2)) != MP_OKAY) { goto _ERR; }
+ if ((err = mp_copy(&v3, &u3)) != MP_OKAY) { goto _ERR; }
+
+ /* (v1,v2,v3) = (t1,t2,t3) */
+ if ((err = mp_copy(&t1, &v1)) != MP_OKAY) { goto _ERR; }
+ if ((err = mp_copy(&t2, &v2)) != MP_OKAY) { goto _ERR; }
+ if ((err = mp_copy(&t3, &v3)) != MP_OKAY) { goto _ERR; }
+ }
+
+ /* make sure U3 >= 0 */
+ if (u3.sign == MP_NEG) {
+ mp_neg(&u1, &u1);
+ mp_neg(&u2, &u2);
+ mp_neg(&u3, &u3);
+ }
+
+ /* copy result out */
+ if (U1 != NULL) { mp_exch(U1, &u1); }
+ if (U2 != NULL) { mp_exch(U2, &u2); }
+ if (U3 != NULL) { mp_exch(U3, &u3); }
+
+ err = MP_OKAY;
+_ERR: mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL);
+ return err;
+}
+#endif
+
+/* End: bn_mp_exteuclid.c */
+
+/* Start: bn_mp_fread.c */
+#include "tma.h"
+#ifdef BN_MP_FREAD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* read a bigint from a file stream in ASCII */
+int mp_fread(mp_int *a, int radix, FILE *stream)
+{
+ int err, ch, neg, y;
+
+ /* clear a */
+ mp_zero(a);
+
+ /* if first digit is - then set negative */
+ ch = fgetc(stream);
+ if (ch == '-') {
+ neg = MP_NEG;
+ ch = fgetc(stream);
+ } else {
+ neg = MP_ZPOS;
+ }
+
+ for (;;) {
+ /* find y in the radix map */
+ for (y = 0; y < radix; y++) {
+ if (mp_s_rmap[y] == ch) {
+ break;
+ }
+ }
+ if (y == radix) {
+ break;
+ }
+
+ /* shift up and add */
+ if ((err = mp_mul_d(a, radix, a)) != MP_OKAY) {
+ return err;
+ }
+ if ((err = mp_add_d(a, y, a)) != MP_OKAY) {
+ return err;
+ }
+
+ ch = fgetc(stream);
+ }
+ if (mp_cmp_d(a, 0) != MP_EQ) {
+ a->sign = neg;
+ }
+
+ return MP_OKAY;
+}
+
+#endif
+
+/* End: bn_mp_fread.c */
+
+/* Start: bn_mp_fwrite.c */
+#include "tma.h"
+#ifdef BN_MP_FWRITE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+int mp_fwrite(mp_int *a, int radix, FILE *stream)
+{
+ char *buf;
+ int err, len, x;
+
+ if ((err = mp_radix_size(a, radix, &len)) != MP_OKAY) {
+ return err;
+ }
+
+ buf = OPT_CAST(char) XMALLOC (len);
+ if (buf == NULL) {
+ return MP_MEM;
+ }
+
+ if ((err = mp_toradix(a, buf, radix)) != MP_OKAY) {
+ XFREE (buf);
+ return err;
+ }
+
+ for (x = 0; x < len; x++) {
+ if (fputc(buf[x], stream) == EOF) {
+ XFREE (buf);
+ return MP_VAL;
+ }
+ }
+
+ XFREE (buf);
+ return MP_OKAY;
+}
+
+#endif
+
+/* End: bn_mp_fwrite.c */
+
+/* Start: bn_mp_gcd.c */
+#include "tma.h"
+#ifdef BN_MP_GCD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* Greatest Common Divisor using the binary method */
+int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
+{
+ mp_int u, v;
+ int k, u_lsb, v_lsb, res;
+
+ /* either zero than gcd is the largest */
+ if (mp_iszero (a) == 1 && mp_iszero (b) == 0) {
+ return mp_abs (b, c);
+ }
+ if (mp_iszero (a) == 0 && mp_iszero (b) == 1) {
+ return mp_abs (a, c);
+ }
+
+ /* optimized. At this point if a == 0 then
+ * b must equal zero too
+ */
+ if (mp_iszero (a) == 1) {
+ mp_zero(c);
+ return MP_OKAY;
+ }
+
+ /* get copies of a and b we can modify */
+ if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
+ goto LBL_U;
+ }
+
+ /* must be positive for the remainder of the algorithm */
+ u.sign = v.sign = MP_ZPOS;
+
+ /* B1. Find the common power of two for u and v */
+ u_lsb = mp_cnt_lsb(&u);
+ v_lsb = mp_cnt_lsb(&v);
+ k = MIN(u_lsb, v_lsb);
+
+ if (k > 0) {
+ /* divide the power of two out */
+ if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
+ goto LBL_V;
+ }
+
+ if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
+ goto LBL_V;
+ }
+ }
+
+ /* divide any remaining factors of two out */
+ if (u_lsb != k) {
+ if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
+ goto LBL_V;
+ }
+ }
+
+ if (v_lsb != k) {
+ if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
+ goto LBL_V;
+ }
+ }
+
+ while (mp_iszero(&v) == 0) {
+ /* make sure v is the largest */
+ if (mp_cmp_mag(&u, &v) == MP_GT) {
+ /* swap u and v to make sure v is >= u */
+ mp_exch(&u, &v);
+ }
+
+ /* subtract smallest from largest */
+ if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
+ goto LBL_V;
+ }
+
+ /* Divide out all factors of two */
+ if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
+ goto LBL_V;
+ }
+ }
+
+ /* multiply by 2**k which we divided out at the beginning */
+ if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
+ goto LBL_V;
+ }
+ c->sign = MP_ZPOS;
+ res = MP_OKAY;
+LBL_V:mp_clear (&u);
+LBL_U:mp_clear (&v);
+ return res;
+}
+#endif
+
+/* End: bn_mp_gcd.c */
+
+/* Start: bn_mp_get_int.c */
+#include "tma.h"
+#ifdef BN_MP_GET_INT_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* get the lower 32-bits of an mp_int */
+unsigned long mp_get_int(mp_int * a)
+{
+ int i;
+ unsigned long res;
+
+ if (a->used == 0) {
+ return 0;
+ }
+
+ /* get number of digits of the lsb we have to read */
+ i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1;
+
+ /* get most significant digit of result */
+ res = DIGIT(a,i);
+
+ while (--i >= 0) {
+ res = (res << DIGIT_BIT) | DIGIT(a,i);
+ }
+
+ /* force result to 32-bits always so it is consistent on non 32-bit platforms */
+ return res & 0xFFFFFFFFUL;
+}
+#endif
+
+/* End: bn_mp_get_int.c */
+
+/* Start: bn_mp_grow.c */
+#include "tma.h"
+#ifdef BN_MP_GROW_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* grow as required */
+int mp_grow (mp_int * a, int size)
+{
+ int i;
+ mp_digit *tmp;
+
+ /* if the alloc size is smaller alloc more ram */
+ if (a->alloc < size) {
+ /* ensure there are always at least MP_PREC digits extra on top */
+ size += (MP_PREC * 2) - (size % MP_PREC);
+
+ /* reallocate the array a->dp
+ *
+ * We store the return in a temporary variable
+ * in case the operation failed we don't want
+ * to overwrite the dp member of a.
+ */
+ tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size);
+ if (tmp == NULL) {
+ /* reallocation failed but "a" is still valid [can be freed] */
+ return MP_MEM;
+ }
+
+ /* reallocation succeeded so set a->dp */
+ a->dp = tmp;
+
+ /* zero excess digits */
+ i = a->alloc;
+ a->alloc = size;
+ for (; i < a->alloc; i++) {
+ a->dp[i] = 0;
+ }
+ }
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_grow.c */
+
+/* Start: bn_mp_init.c */
+#include "tma.h"
+#ifdef BN_MP_INIT_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* init a new mp_int */
+int mp_init (mp_int * a)
+{
+ int i;
+
+ /* allocate memory required and clear it */
+ a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
+ if (a->dp == NULL) {
+ return MP_MEM;
+ }
+
+ /* set the digits to zero */
+ for (i = 0; i < MP_PREC; i++) {
+ a->dp[i] = 0;
+ }
+
+ /* set the used to zero, allocated digits to the default precision
+ * and sign to positive */
+ a->used = 0;
+ a->alloc = MP_PREC;
+ a->sign = MP_ZPOS;
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_init.c */
+
+/* Start: bn_mp_init_copy.c */
+#include "tma.h"
+#ifdef BN_MP_INIT_COPY_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* creates "a" then copies b into it */
+int mp_init_copy (mp_int * a, mp_int * b)
+{
+ int res;
+
+ if ((res = mp_init (a)) != MP_OKAY) {
+ return res;
+ }
+ return mp_copy (b, a);
+}
+#endif
+
+/* End: bn_mp_init_copy.c */
+
+/* Start: bn_mp_init_multi.c */
+#include "tma.h"
+#ifdef BN_MP_INIT_MULTI_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+#include <stdarg.h>
+
+int mp_init_multi(mp_int *mp, ...)
+{
+ mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
+ int n = 0; /* Number of ok inits */
+ mp_int* cur_arg = mp;
+ va_list args;
+
+ va_start(args, mp); /* init args to next argument from caller */
+ while (cur_arg != NULL) {
+ if (mp_init(cur_arg) != MP_OKAY) {
+ /* Oops - error! Back-track and mp_clear what we already
+ succeeded in init-ing, then return error.
+ */
+ va_list clean_args;
+
+ /* end the current list */
+ va_end(args);
+
+ /* now start cleaning up */
+ cur_arg = mp;
+ va_start(clean_args, mp);
+ while (n--) {
+ mp_clear(cur_arg);
+ cur_arg = va_arg(clean_args, mp_int*);
+ }
+ va_end(clean_args);
+ res = MP_MEM;
+ break;
+ }
+ n++;
+ cur_arg = va_arg(args, mp_int*);
+ }
+ va_end(args);
+ return res; /* Assumed ok, if error flagged above. */
+}
+
+#endif
+
+/* End: bn_mp_init_multi.c */
+
+/* Start: bn_mp_init_set.c */
+#include "tma.h"
+#ifdef BN_MP_INIT_SET_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* initialize and set a digit */
+int mp_init_set (mp_int * a, mp_digit b)
+{
+ int err;
+ if ((err = mp_init(a)) != MP_OKAY) {
+ return err;
+ }
+ mp_set(a, b);
+ return err;
+}
+#endif
+
+/* End: bn_mp_init_set.c */
+
+/* Start: bn_mp_init_set_int.c */
+#include "tma.h"
+#ifdef BN_MP_INIT_SET_INT_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* initialize and set a digit */
+int mp_init_set_int (mp_int * a, unsigned long b)
+{
+ int err;
+ if ((err = mp_init(a)) != MP_OKAY) {
+ return err;
+ }
+ return mp_set_int(a, b);
+}
+#endif
+
+/* End: bn_mp_init_set_int.c */
+
+/* Start: bn_mp_init_size.c */
+#include "tma.h"
+#ifdef BN_MP_INIT_SIZE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* init an mp_init for a given size */
+int mp_init_size (mp_int * a, int size)
+{
+ int x;
+
+ /* pad size so there are always extra digits */
+ size += (MP_PREC * 2) - (size % MP_PREC);
+
+ /* alloc mem */
+ a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
+ if (a->dp == NULL) {
+ return MP_MEM;
+ }
+
+ /* set the members */
+ a->used = 0;
+ a->alloc = size;
+ a->sign = MP_ZPOS;
+
+ /* zero the digits */
+ for (x = 0; x < size; x++) {
+ a->dp[x] = 0;
+ }
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_init_size.c */
+
+/* Start: bn_mp_invmod.c */
+#include "tma.h"
+#ifdef BN_MP_INVMOD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* hac 14.61, pp608 */
+int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
+{
+ /* b cannot be negative */
+ if (b->sign == MP_NEG || mp_iszero(b) == 1) {
+ return MP_VAL;
+ }
+
+#ifdef BN_FAST_MP_INVMOD_C
+ /* if the modulus is odd we can use a faster routine instead */
+ if (mp_isodd (b) == 1) {
+ return fast_mp_invmod (a, b, c);
+ }
+#endif
+
+#ifdef BN_MP_INVMOD_SLOW_C
+ return mp_invmod_slow(a, b, c);
+#endif
+
+ return MP_VAL;
+}
+#endif
+
+/* End: bn_mp_invmod.c */
+
+/* Start: bn_mp_invmod_slow.c */
+#include "tma.h"
+#ifdef BN_MP_INVMOD_SLOW_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* hac 14.61, pp608 */
+int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
+{
+ mp_int x, y, u, v, A, B, C, D;
+ int res;
+
+ /* b cannot be negative */
+ if (b->sign == MP_NEG || mp_iszero(b) == 1) {
+ return MP_VAL;
+ }
+
+ /* init temps */
+ if ((res = mp_init_multi(&x, &y, &u, &v,
+ &A, &B, &C, &D, NULL)) != MP_OKAY) {
+ return res;
+ }
+
+ /* x = a, y = b */
+ if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ if ((res = mp_copy (b, &y)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+
+ /* 2. [modified] if x,y are both even then return an error! */
+ if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
+ res = MP_VAL;
+ goto LBL_ERR;
+ }
+
+ /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
+ if ((res = mp_copy (&x, &u)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ if ((res = mp_copy (&y, &v)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ mp_set (&A, 1);
+ mp_set (&D, 1);
+
+top:
+ /* 4. while u is even do */
+ while (mp_iseven (&u) == 1) {
+ /* 4.1 u = u/2 */
+ if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ /* 4.2 if A or B is odd then */
+ if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
+ /* A = (A+y)/2, B = (B-x)/2 */
+ if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+ /* A = A/2, B = B/2 */
+ if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+
+ /* 5. while v is even do */
+ while (mp_iseven (&v) == 1) {
+ /* 5.1 v = v/2 */
+ if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ /* 5.2 if C or D is odd then */
+ if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
+ /* C = (C+y)/2, D = (D-x)/2 */
+ if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+ /* C = C/2, D = D/2 */
+ if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+
+ /* 6. if u >= v then */
+ if (mp_cmp (&u, &v) != MP_LT) {
+ /* u = u - v, A = A - C, B = B - D */
+ if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+
+ if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+
+ if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ } else {
+ /* v - v - u, C = C - A, D = D - B */
+ if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+
+ if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+
+ if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+
+ /* if not zero goto step 4 */
+ if (mp_iszero (&u) == 0)
+ goto top;
+
+ /* now a = C, b = D, gcd == g*v */
+
+ /* if v != 1 then there is no inverse */
+ if (mp_cmp_d (&v, 1) != MP_EQ) {
+ res = MP_VAL;
+ goto LBL_ERR;
+ }
+
+ /* if its too low */
+ while (mp_cmp_d(&C, 0) == MP_LT) {
+ if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+
+ /* too big */
+ while (mp_cmp_mag(&C, b) != MP_LT) {
+ if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
+
+ /* C is now the inverse */
+ mp_exch (&C, c);
+ res = MP_OKAY;
+LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
+ return res;
+}
+#endif
+
+/* End: bn_mp_invmod_slow.c */
+
+/* Start: bn_mp_is_square.c */
+#include "tma.h"
+#ifdef BN_MP_IS_SQUARE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* Check if remainders are possible squares - fast exclude non-squares */
+static const char rem_128[128] = {
+ 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+ 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+ 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+ 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+ 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+ 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+ 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+ 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1
+};
+
+static const char rem_105[105] = {
+ 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
+ 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
+ 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
+ 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
+ 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1
+};
+
+/* Store non-zero to ret if arg is square, and zero if not */
+int mp_is_square(mp_int *arg,int *ret)
+{
+ int res;
+ mp_digit c;
+ mp_int t;
+ unsigned long r;
+
+ /* Default to Non-square :) */
+ *ret = MP_NO;
+
+ if (arg->sign == MP_NEG) {
+ return MP_VAL;
+ }
+
+ /* digits used? (TSD) */
+ if (arg->used == 0) {
+ return MP_OKAY;
+ }
+
+ /* First check mod 128 (suppose that DIGIT_BIT is at least 7) */
+ if (rem_128[127 & DIGIT(arg,0)] == 1) {
+ return MP_OKAY;
+ }
+
+ /* Next check mod 105 (3*5*7) */
+ if ((res = mp_mod_d(arg,105,&c)) != MP_OKAY) {
+ return res;
+ }
+ if (rem_105[c] == 1) {
+ return MP_OKAY;
+ }
+
+
+ if ((res = mp_init_set_int(&t,11L*13L*17L*19L*23L*29L*31L)) != MP_OKAY) {
+ return res;
+ }
+ if ((res = mp_mod(arg,&t,&t)) != MP_OKAY) {
+ goto ERR;
+ }
+ r = mp_get_int(&t);
+ /* Check for other prime modules, note it's not an ERROR but we must
+ * free "t" so the easiest way is to goto ERR. We know that res
+ * is already equal to MP_OKAY from the mp_mod call
+ */
+ if ( (1L<<(r%11)) & 0x5C4L ) goto ERR;
+ if ( (1L<<(r%13)) & 0x9E4L ) goto ERR;
+ if ( (1L<<(r%17)) & 0x5CE8L ) goto ERR;
+ if ( (1L<<(r%19)) & 0x4F50CL ) goto ERR;
+ if ( (1L<<(r%23)) & 0x7ACCA0L ) goto ERR;
+ if ( (1L<<(r%29)) & 0xC2EDD0CL ) goto ERR;
+ if ( (1L<<(r%31)) & 0x6DE2B848L ) goto ERR;
+
+ /* Final check - is sqr(sqrt(arg)) == arg ? */
+ if ((res = mp_sqrt(arg,&t)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sqr(&t,&t)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ *ret = (mp_cmp_mag(&t,arg) == MP_EQ) ? MP_YES : MP_NO;
+ERR:mp_clear(&t);
+ return res;
+}
+#endif
+
+/* End: bn_mp_is_square.c */
+
+/* Start: bn_mp_jacobi.c */
+#include "tma.h"
+#ifdef BN_MP_JACOBI_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* computes the jacobi c = (a | n) (or Legendre if n is prime)
+ * HAC pp. 73 Algorithm 2.149
+ */
+int mp_jacobi (mp_int * a, mp_int * p, int *c)
+{
+ mp_int a1, p1;
+ int k, s, r, res;
+ mp_digit residue;
+
+ /* if p <= 0 return MP_VAL */
+ if (mp_cmp_d(p, 0) != MP_GT) {
+ return MP_VAL;
+ }
+
+ /* step 1. if a == 0, return 0 */
+ if (mp_iszero (a) == 1) {
+ *c = 0;
+ return MP_OKAY;
+ }
+
+ /* step 2. if a == 1, return 1 */
+ if (mp_cmp_d (a, 1) == MP_EQ) {
+ *c = 1;
+ return MP_OKAY;
+ }
+
+ /* default */
+ s = 0;
+
+ /* step 3. write a = a1 * 2**k */
+ if ((res = mp_init_copy (&a1, a)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_init (&p1)) != MP_OKAY) {
+ goto LBL_A1;
+ }
+
+ /* divide out larger power of two */
+ k = mp_cnt_lsb(&a1);
+ if ((res = mp_div_2d(&a1, k, &a1, NULL)) != MP_OKAY) {
+ goto LBL_P1;
+ }
+
+ /* step 4. if e is even set s=1 */
+ if ((k & 1) == 0) {
+ s = 1;
+ } else {
+ /* else set s=1 if p = 1/7 (mod 8) or s=-1 if p = 3/5 (mod 8) */
+ residue = p->dp[0] & 7;
+
+ if (residue == 1 || residue == 7) {
+ s = 1;
+ } else if (residue == 3 || residue == 5) {
+ s = -1;
+ }
+ }
+
+ /* step 5. if p == 3 (mod 4) *and* a1 == 3 (mod 4) then s = -s */
+ if ( ((p->dp[0] & 3) == 3) && ((a1.dp[0] & 3) == 3)) {
+ s = -s;
+ }
+
+ /* if a1 == 1 we're done */
+ if (mp_cmp_d (&a1, 1) == MP_EQ) {
+ *c = s;
+ } else {
+ /* n1 = n mod a1 */
+ if ((res = mp_mod (p, &a1, &p1)) != MP_OKAY) {
+ goto LBL_P1;
+ }
+ if ((res = mp_jacobi (&p1, &a1, &r)) != MP_OKAY) {
+ goto LBL_P1;
+ }
+ *c = s * r;
+ }
+
+ /* done */
+ res = MP_OKAY;
+LBL_P1:mp_clear (&p1);
+LBL_A1:mp_clear (&a1);
+ return res;
+}
+#endif
+
+/* End: bn_mp_jacobi.c */
+
+/* Start: bn_mp_karatsuba_mul.c */
+#include "tma.h"
+#ifdef BN_MP_KARATSUBA_MUL_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* c = |a| * |b| using Karatsuba Multiplication using
+ * three half size multiplications
+ *
+ * Let B represent the radix [e.g. 2**DIGIT_BIT] and
+ * let n represent half of the number of digits in
+ * the min(a,b)
+ *
+ * a = a1 * B**n + a0
+ * b = b1 * B**n + b0
+ *
+ * Then, a * b =>
+ a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
+ *
+ * Note that a1b1 and a0b0 are used twice and only need to be
+ * computed once. So in total three half size (half # of
+ * digit) multiplications are performed, a0b0, a1b1 and
+ * (a1-b1)(a0-b0)
+ *
+ * Note that a multiplication of half the digits requires
+ * 1/4th the number of single precision multiplications so in
+ * total after one call 25% of the single precision multiplications
+ * are saved. Note also that the call to mp_mul can end up back
+ * in this function if the a0, a1, b0, or b1 are above the threshold.
+ * This is known as divide-and-conquer and leads to the famous
+ * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
+ * the standard O(N**2) that the baseline/comba methods use.
+ * Generally though the overhead of this method doesn't pay off
+ * until a certain size (N ~ 80) is reached.
+ */
+int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
+{
+ mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
+ int B, err;
+
+ /* default the return code to an error */
+ err = MP_MEM;
+
+ /* min # of digits */
+ B = MIN (a->used, b->used);
+
+ /* now divide in two */
+ B = B >> 1;
+
+ /* init copy all the temps */
+ if (mp_init_size (&x0, B) != MP_OKAY)
+ goto ERR;
+ if (mp_init_size (&x1, a->used - B) != MP_OKAY)
+ goto X0;
+ if (mp_init_size (&y0, B) != MP_OKAY)
+ goto X1;
+ if (mp_init_size (&y1, b->used - B) != MP_OKAY)
+ goto Y0;
+
+ /* init temps */
+ if (mp_init_size (&t1, B * 2) != MP_OKAY)
+ goto Y1;
+ if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
+ goto T1;
+ if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
+ goto X0Y0;
+
+ /* now shift the digits */
+ x0.used = y0.used = B;
+ x1.used = a->used - B;
+ y1.used = b->used - B;
+
+ {
+ register int x;
+ register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
+
+ /* we copy the digits directly instead of using higher level functions
+ * since we also need to shift the digits
+ */
+ tmpa = a->dp;
+ tmpb = b->dp;
+
+ tmpx = x0.dp;
+ tmpy = y0.dp;
+ for (x = 0; x < B; x++) {
+ *tmpx++ = *tmpa++;
+ *tmpy++ = *tmpb++;
+ }
+
+ tmpx = x1.dp;
+ for (x = B; x < a->used; x++) {
+ *tmpx++ = *tmpa++;
+ }
+
+ tmpy = y1.dp;
+ for (x = B; x < b->used; x++) {
+ *tmpy++ = *tmpb++;
+ }
+ }
+
+ /* only need to clamp the lower words since by definition the
+ * upper words x1/y1 must have a known number of digits
+ */
+ mp_clamp (&x0);
+ mp_clamp (&y0);
+
+ /* now calc the products x0y0 and x1y1 */
+ /* after this x0 is no longer required, free temp [x0==t2]! */
+ if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
+ goto X1Y1; /* x0y0 = x0*y0 */
+ if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
+ goto X1Y1; /* x1y1 = x1*y1 */
+
+ /* now calc x1-x0 and y1-y0 */
+ if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
+ goto X1Y1; /* t1 = x1 - x0 */
+ if (mp_sub (&y1, &y0, &x0) != MP_OKAY)
+ goto X1Y1; /* t2 = y1 - y0 */
+ if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
+ goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
+
+ /* add x0y0 */
+ if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
+ goto X1Y1; /* t2 = x0y0 + x1y1 */
+ if (mp_sub (&x0, &t1, &t1) != MP_OKAY)
+ goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
+
+ /* shift by B */
+ if (mp_lshd (&t1, B) != MP_OKAY)
+ goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
+ if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
+ goto X1Y1; /* x1y1 = x1y1 << 2*B */
+
+ if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
+ goto X1Y1; /* t1 = x0y0 + t1 */
+ if (mp_add (&t1, &x1y1, c) != MP_OKAY)
+ goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
+
+ /* Algorithm succeeded set the return code to MP_OKAY */
+ err = MP_OKAY;
+
+X1Y1:mp_clear (&x1y1);
+X0Y0:mp_clear (&x0y0);
+T1:mp_clear (&t1);
+Y1:mp_clear (&y1);
+Y0:mp_clear (&y0);
+X1:mp_clear (&x1);
+X0:mp_clear (&x0);
+ERR:
+ return err;
+}
+#endif
+
+/* End: bn_mp_karatsuba_mul.c */
+
+/* Start: bn_mp_karatsuba_sqr.c */
+#include "tma.h"
+#ifdef BN_MP_KARATSUBA_SQR_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* Karatsuba squaring, computes b = a*a using three
+ * half size squarings
+ *
+ * See comments of karatsuba_mul for details. It
+ * is essentially the same algorithm but merely
+ * tuned to perform recursive squarings.
+ */
+int mp_karatsuba_sqr (mp_int * a, mp_int * b)
+{
+ mp_int x0, x1, t1, t2, x0x0, x1x1;
+ int B, err;
+
+ err = MP_MEM;
+
+ /* min # of digits */
+ B = a->used;
+
+ /* now divide in two */
+ B = B >> 1;
+
+ /* init copy all the temps */
+ if (mp_init_size (&x0, B) != MP_OKAY)
+ goto ERR;
+ if (mp_init_size (&x1, a->used - B) != MP_OKAY)
+ goto X0;
+
+ /* init temps */
+ if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
+ goto X1;
+ if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
+ goto T1;
+ if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
+ goto T2;
+ if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
+ goto X0X0;
+
+ {
+ register int x;
+ register mp_digit *dst, *src;
+
+ src = a->dp;
+
+ /* now shift the digits */
+ dst = x0.dp;
+ for (x = 0; x < B; x++) {
+ *dst++ = *src++;
+ }
+
+ dst = x1.dp;
+ for (x = B; x < a->used; x++) {
+ *dst++ = *src++;
+ }
+ }
+
+ x0.used = B;
+ x1.used = a->used - B;
+
+ mp_clamp (&x0);
+
+ /* now calc the products x0*x0 and x1*x1 */
+ if (mp_sqr (&x0, &x0x0) != MP_OKAY)
+ goto X1X1; /* x0x0 = x0*x0 */
+ if (mp_sqr (&x1, &x1x1) != MP_OKAY)
+ goto X1X1; /* x1x1 = x1*x1 */
+
+ /* now calc (x1-x0)**2 */
+ if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
+ goto X1X1; /* t1 = x1 - x0 */
+ if (mp_sqr (&t1, &t1) != MP_OKAY)
+ goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
+
+ /* add x0y0 */
+ if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
+ goto X1X1; /* t2 = x0x0 + x1x1 */
+ if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
+ goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
+
+ /* shift by B */
+ if (mp_lshd (&t1, B) != MP_OKAY)
+ goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
+ if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
+ goto X1X1; /* x1x1 = x1x1 << 2*B */
+
+ if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
+ goto X1X1; /* t1 = x0x0 + t1 */
+ if (mp_add (&t1, &x1x1, b) != MP_OKAY)
+ goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
+
+ err = MP_OKAY;
+
+X1X1:mp_clear (&x1x1);
+X0X0:mp_clear (&x0x0);
+T2:mp_clear (&t2);
+T1:mp_clear (&t1);
+X1:mp_clear (&x1);
+X0:mp_clear (&x0);
+ERR:
+ return err;
+}
+#endif
+
+/* End: bn_mp_karatsuba_sqr.c */
+
+/* Start: bn_mp_lcm.c */
+#include "tma.h"
+#ifdef BN_MP_LCM_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* computes least common multiple as |a*b|/(a, b) */
+int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
+{
+ int res;
+ mp_int t1, t2;
+
+
+ if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) {
+ return res;
+ }
+
+ /* t1 = get the GCD of the two inputs */
+ if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
+ goto LBL_T;
+ }
+
+ /* divide the smallest by the GCD */
+ if (mp_cmp_mag(a, b) == MP_LT) {
+ /* store quotient in t2 such that t2 * b is the LCM */
+ if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
+ goto LBL_T;
+ }
+ res = mp_mul(b, &t2, c);
+ } else {
+ /* store quotient in t2 such that t2 * a is the LCM */
+ if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
+ goto LBL_T;
+ }
+ res = mp_mul(a, &t2, c);
+ }
+
+ /* fix the sign to positive */
+ c->sign = MP_ZPOS;
+
+LBL_T:
+ mp_clear_multi (&t1, &t2, NULL);
+ return res;
+}
+#endif
+
+/* End: bn_mp_lcm.c */
+
+/* Start: bn_mp_lshd.c */
+#include "tma.h"
+#ifdef BN_MP_LSHD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* shift left a certain amount of digits */
+int mp_lshd (mp_int * a, int b)
+{
+ int x, res;
+
+ /* if its less than zero return */
+ if (b <= 0) {
+ return MP_OKAY;
+ }
+
+ /* grow to fit the new digits */
+ if (a->alloc < a->used + b) {
+ if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ {
+ register mp_digit *top, *bottom;
+
+ /* increment the used by the shift amount then copy upwards */
+ a->used += b;
+
+ /* top */
+ top = a->dp + a->used - 1;
+
+ /* base */
+ bottom = a->dp + a->used - 1 - b;
+
+ /* much like mp_rshd this is implemented using a sliding window
+ * except the window goes the otherway around. Copying from
+ * the bottom to the top. see bn_mp_rshd.c for more info.
+ */
+ for (x = a->used - 1; x >= b; x--) {
+ *top-- = *bottom--;
+ }
+
+ /* zero the lower digits */
+ top = a->dp;
+ for (x = 0; x < b; x++) {
+ *top++ = 0;
+ }
+ }
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_lshd.c */
+
+/* Start: bn_mp_mod.c */
+#include "tma.h"
+#ifdef BN_MP_MOD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* c = a mod b, 0 <= c < b */
+int
+mp_mod (mp_int * a, mp_int * b, mp_int * c)
+{
+ mp_int t;
+ int res;
+
+ if ((res = mp_init (&t)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+
+ if (t.sign != b->sign) {
+ res = mp_add (b, &t, c);
+ } else {
+ res = MP_OKAY;
+ mp_exch (&t, c);
+ }
+
+ mp_clear (&t);
+ return res;
+}
+#endif
+
+/* End: bn_mp_mod.c */
+
+/* Start: bn_mp_mod_2d.c */
+#include "tma.h"
+#ifdef BN_MP_MOD_2D_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* calc a value mod 2**b */
+int
+mp_mod_2d (mp_int * a, int b, mp_int * c)
+{
+ int x, res;
+
+ /* if b is <= 0 then zero the int */
+ if (b <= 0) {
+ mp_zero (c);
+ return MP_OKAY;
+ }
+
+ /* if the modulus is larger than the value than return */
+ if (b >= (int) (a->used * DIGIT_BIT)) {
+ res = mp_copy (a, c);
+ return res;
+ }
+
+ /* copy */
+ if ((res = mp_copy (a, c)) != MP_OKAY) {
+ return res;
+ }
+
+ /* zero digits above the last digit of the modulus */
+ for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
+ c->dp[x] = 0;
+ }
+ /* clear the digit that is not completely outside/inside the modulus */
+ c->dp[b / DIGIT_BIT] &=
+ (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
+ mp_clamp (c);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_mod_2d.c */
+
+/* Start: bn_mp_mod_d.c */
+#include "tma.h"
+#ifdef BN_MP_MOD_D_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+int
+mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
+{
+ return mp_div_d(a, b, NULL, c);
+}
+#endif
+
+/* End: bn_mp_mod_d.c */
+
+/* Start: bn_mp_montgomery_calc_normalization.c */
+#include "tma.h"
+#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/*
+ * shifts with subtractions when the result is greater than b.
+ *
+ * The method is slightly modified to shift B unconditionally upto just under
+ * the leading bit of b. This saves alot of multiple precision shifting.
+ */
+int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
+{
+ int x, bits, res;
+
+ /* how many bits of last digit does b use */
+ bits = mp_count_bits (b) % DIGIT_BIT;
+
+ if (b->used > 1) {
+ if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
+ return res;
+ }
+ } else {
+ mp_set(a, 1);
+ bits = 1;
+ }
+
+
+ /* now compute C = A * B mod b */
+ for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
+ if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
+ return res;
+ }
+ if (mp_cmp_mag (a, b) != MP_LT) {
+ if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
+ return res;
+ }
+ }
+ }
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_montgomery_calc_normalization.c */
+
+/* Start: bn_mp_montgomery_reduce.c */
+#include "tma.h"
+#ifdef BN_MP_MONTGOMERY_REDUCE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* computes xR**-1 == x (mod N) via Montgomery Reduction */
+int
+mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
+{
+ int ix, res, digs;
+ mp_digit mu;
+
+ /* can the fast reduction [comba] method be used?
+ *
+ * Note that unlike in mul you're safely allowed *less*
+ * than the available columns [255 per default] since carries
+ * are fixed up in the inner loop.
+ */
+ digs = n->used * 2 + 1;
+ if ((digs < MP_WARRAY) &&
+ n->used <
+ (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+ return fast_mp_montgomery_reduce (x, n, rho);
+ }
+
+ /* grow the input as required */
+ if (x->alloc < digs) {
+ if ((res = mp_grow (x, digs)) != MP_OKAY) {
+ return res;
+ }
+ }
+ x->used = digs;
+
+ for (ix = 0; ix < n->used; ix++) {
+ /* mu = ai * rho mod b
+ *
+ * The value of rho must be precalculated via
+ * montgomery_setup() such that
+ * it equals -1/n0 mod b this allows the
+ * following inner loop to reduce the
+ * input one digit at a time
+ */
+ mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
+
+ /* a = a + mu * m * b**i */
+ {
+ register int iy;
+ register mp_digit *tmpn, *tmpx, u;
+ register mp_word r;
+
+ /* alias for digits of the modulus */
+ tmpn = n->dp;
+
+ /* alias for the digits of x [the input] */
+ tmpx = x->dp + ix;
+
+ /* set the carry to zero */
+ u = 0;
+
+ /* Multiply and add in place */
+ for (iy = 0; iy < n->used; iy++) {
+ /* compute product and sum */
+ r = ((mp_word)mu) * ((mp_word)*tmpn++) +
+ ((mp_word) u) + ((mp_word) * tmpx);
+
+ /* get carry */
+ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
+
+ /* fix digit */
+ *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
+ }
+ /* At this point the ix'th digit of x should be zero */
+
+
+ /* propagate carries upwards as required*/
+ while (u) {
+ *tmpx += u;
+ u = *tmpx >> DIGIT_BIT;
+ *tmpx++ &= MP_MASK;
+ }
+ }
+ }
+
+ /* at this point the n.used'th least
+ * significant digits of x are all zero
+ * which means we can shift x to the
+ * right by n.used digits and the
+ * residue is unchanged.
+ */
+
+ /* x = x/b**n.used */
+ mp_clamp(x);
+ mp_rshd (x, n->used);
+
+ /* if x >= n then x = x - n */
+ if (mp_cmp_mag (x, n) != MP_LT) {
+ return s_mp_sub (x, n, x);
+ }
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_montgomery_reduce.c */
+
+/* Start: bn_mp_montgomery_setup.c */
+#include "tma.h"
+#ifdef BN_MP_MONTGOMERY_SETUP_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* setups the montgomery reduction stuff */
+int
+mp_montgomery_setup (mp_int * n, mp_digit * rho)
+{
+ mp_digit x, b;
+
+/* fast inversion mod 2**k
+ *
+ * Based on the fact that
+ *
+ * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
+ * => 2*X*A - X*X*A*A = 1
+ * => 2*(1) - (1) = 1
+ */
+ b = n->dp[0];
+
+ if ((b & 1) == 0) {
+ return MP_VAL;
+ }
+
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+#if !defined(MP_8BIT)
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+#endif
+#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+#endif
+#ifdef MP_64BIT
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+#endif
+
+ /* rho = -1/m mod b */
+ *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_montgomery_setup.c */
+
+/* Start: bn_mp_mul.c */
+#include "tma.h"
+#ifdef BN_MP_MUL_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* high level multiplication (handles sign) */
+int mp_mul (mp_int * a, mp_int * b, mp_int * c)
+{
+ int res, neg;
+ neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
+
+ /* use Toom-Cook? */
+#ifdef BN_MP_TOOM_MUL_C
+ if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
+ res = mp_toom_mul(a, b, c);
+ } else
+#endif
+#ifdef BN_MP_KARATSUBA_MUL_C
+ /* use Karatsuba? */
+ if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
+ res = mp_karatsuba_mul (a, b, c);
+ } else
+#endif
+ {
+ /* can we use the fast multiplier?
+ *
+ * The fast multiplier can be used if the output will
+ * have less than MP_WARRAY digits and the number of
+ * digits won't affect carry propagation
+ */
+ int digs = a->used + b->used + 1;
+
+#ifdef BN_FAST_S_MP_MUL_DIGS_C
+ if ((digs < MP_WARRAY) &&
+ MIN(a->used, b->used) <=
+ (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+ res = fast_s_mp_mul_digs (a, b, c, digs);
+ } else
+#endif
+#ifdef BN_S_MP_MUL_DIGS_C
+ res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
+#else
+ res = MP_VAL;
+#endif
+
+ }
+ c->sign = (c->used > 0) ? neg : MP_ZPOS;
+ return res;
+}
+#endif
+
+/* End: bn_mp_mul.c */
+
+/* Start: bn_mp_mul_2.c */
+#include "tma.h"
+#ifdef BN_MP_MUL_2_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* b = a*2 */
+int mp_mul_2(mp_int * a, mp_int * b)
+{
+ int x, res, oldused;
+
+ /* grow to accomodate result */
+ if (b->alloc < a->used + 1) {
+ if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ oldused = b->used;
+ b->used = a->used;
+
+ {
+ register mp_digit r, rr, *tmpa, *tmpb;
+
+ /* alias for source */
+ tmpa = a->dp;
+
+ /* alias for dest */
+ tmpb = b->dp;
+
+ /* carry */
+ r = 0;
+ for (x = 0; x < a->used; x++) {
+
+ /* get what will be the *next* carry bit from the
+ * MSB of the current digit
+ */
+ rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
+
+ /* now shift up this digit, add in the carry [from the previous] */
+ *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
+
+ /* copy the carry that would be from the source
+ * digit into the next iteration
+ */
+ r = rr;
+ }
+
+ /* new leading digit? */
+ if (r != 0) {
+ /* add a MSB which is always 1 at this point */
+ *tmpb = 1;
+ ++(b->used);
+ }
+
+ /* now zero any excess digits on the destination
+ * that we didn't write to
+ */
+ tmpb = b->dp + b->used;
+ for (x = b->used; x < oldused; x++) {
+ *tmpb++ = 0;
+ }
+ }
+ b->sign = a->sign;
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_mul_2.c */
+
+/* Start: bn_mp_mul_2d.c */
+#include "tma.h"
+#ifdef BN_MP_MUL_2D_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* shift left by a certain bit count */
+int mp_mul_2d (mp_int * a, int b, mp_int * c)
+{
+ mp_digit d;
+ int res;
+
+ /* copy */
+ if (a != c) {
+ if ((res = mp_copy (a, c)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
+ if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* shift by as many digits in the bit count */
+ if (b >= (int)DIGIT_BIT) {
+ if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* shift any bit count < DIGIT_BIT */
+ d = (mp_digit) (b % DIGIT_BIT);
+ if (d != 0) {
+ register mp_digit *tmpc, shift, mask, r, rr;
+ register int x;
+
+ /* bitmask for carries */
+ mask = (((mp_digit)1) << d) - 1;
+
+ /* shift for msbs */
+ shift = DIGIT_BIT - d;
+
+ /* alias */
+ tmpc = c->dp;
+
+ /* carry */
+ r = 0;
+ for (x = 0; x < c->used; x++) {
+ /* get the higher bits of the current word */
+ rr = (*tmpc >> shift) & mask;
+
+ /* shift the current word and OR in the carry */
+ *tmpc = ((*tmpc << d) | r) & MP_MASK;
+ ++tmpc;
+
+ /* set the carry to the carry bits of the current word */
+ r = rr;
+ }
+
+ /* set final carry */
+ if (r != 0) {
+ c->dp[(c->used)++] = r;
+ }
+ }
+ mp_clamp (c);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_mul_2d.c */
+
+/* Start: bn_mp_mul_d.c */
+#include "tma.h"
+#ifdef BN_MP_MUL_D_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* multiply by a digit */
+int
+mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
+{
+ mp_digit u, *tmpa, *tmpc;
+ mp_word r;
+ int ix, res, olduse;
+
+ /* make sure c is big enough to hold a*b */
+ if (c->alloc < a->used + 1) {
+ if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* get the original destinations used count */
+ olduse = c->used;
+
+ /* set the sign */
+ c->sign = a->sign;
+
+ /* alias for a->dp [source] */
+ tmpa = a->dp;
+
+ /* alias for c->dp [dest] */
+ tmpc = c->dp;
+
+ /* zero carry */
+ u = 0;
+
+ /* compute columns */
+ for (ix = 0; ix < a->used; ix++) {
+ /* compute product and carry sum for this term */
+ r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
+
+ /* mask off higher bits to get a single digit */
+ *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
+
+ /* send carry into next iteration */
+ u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
+ }
+
+ /* store final carry [if any] and increment ix offset */
+ *tmpc++ = u;
+ ++ix;
+
+ /* now zero digits above the top */
+ while (ix++ < olduse) {
+ *tmpc++ = 0;
+ }
+
+ /* set used count */
+ c->used = a->used + 1;
+ mp_clamp(c);
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_mul_d.c */
+
+/* Start: bn_mp_mulmod.c */
+#include "tma.h"
+#ifdef BN_MP_MULMOD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* d = a * b (mod c) */
+int
+mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+{
+ int res;
+ mp_int t;
+
+ if ((res = mp_init (&t)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ res = mp_mod (&t, c, d);
+ mp_clear (&t);
+ return res;
+}
+#endif
+
+/* End: bn_mp_mulmod.c */
+
+/* Start: bn_mp_n_root.c */
+#include "tma.h"
+#ifdef BN_MP_N_ROOT_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* find the n'th root of an integer
+ *
+ * Result found such that (c)**b <= a and (c+1)**b > a
+ *
+ * This algorithm uses Newton's approximation
+ * x[i+1] = x[i] - f(x[i])/f'(x[i])
+ * which will find the root in log(N) time where
+ * each step involves a fair bit. This is not meant to
+ * find huge roots [square and cube, etc].
+ */
+int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
+{
+ mp_int t1, t2, t3;
+ int res, neg;
+
+ /* input must be positive if b is even */
+ if ((b & 1) == 0 && a->sign == MP_NEG) {
+ return MP_VAL;
+ }
+
+ if ((res = mp_init (&t1)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_init (&t2)) != MP_OKAY) {
+ goto LBL_T1;
+ }
+
+ if ((res = mp_init (&t3)) != MP_OKAY) {
+ goto LBL_T2;
+ }
+
+ /* if a is negative fudge the sign but keep track */
+ neg = a->sign;
+ a->sign = MP_ZPOS;
+
+ /* t2 = 2 */
+ mp_set (&t2, 2);
+
+ do {
+ /* t1 = t2 */
+ if ((res = mp_copy (&t2, &t1)) != MP_OKAY) {
+ goto LBL_T3;
+ }
+
+ /* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */
+
+ /* t3 = t1**(b-1) */
+ if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) {
+ goto LBL_T3;
+ }
+
+ /* numerator */
+ /* t2 = t1**b */
+ if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) {
+ goto LBL_T3;
+ }
+
+ /* t2 = t1**b - a */
+ if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) {
+ goto LBL_T3;
+ }
+
+ /* denominator */
+ /* t3 = t1**(b-1) * b */
+ if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) {
+ goto LBL_T3;
+ }
+
+ /* t3 = (t1**b - a)/(b * t1**(b-1)) */
+ if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) {
+ goto LBL_T3;
+ }
+
+ if ((res = mp_sub (&t1, &t3, &t2)) != MP_OKAY) {
+ goto LBL_T3;
+ }
+ } while (mp_cmp (&t1, &t2) != MP_EQ);
+
+ /* result can be off by a few so check */
+ for (;;) {
+ if ((res = mp_expt_d (&t1, b, &t2)) != MP_OKAY) {
+ goto LBL_T3;
+ }
+
+ if (mp_cmp (&t2, a) == MP_GT) {
+ if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) {
+ goto LBL_T3;
+ }
+ } else {
+ break;
+ }
+ }
+
+ /* reset the sign of a first */
+ a->sign = neg;
+
+ /* set the result */
+ mp_exch (&t1, c);
+
+ /* set the sign of the result */
+ c->sign = neg;
+
+ res = MP_OKAY;
+
+LBL_T3:mp_clear (&t3);
+LBL_T2:mp_clear (&t2);
+LBL_T1:mp_clear (&t1);
+ return res;
+}
+#endif
+
+/* End: bn_mp_n_root.c */
+
+/* Start: bn_mp_neg.c */
+#include "tma.h"
+#ifdef BN_MP_NEG_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* b = -a */
+int mp_neg (mp_int * a, mp_int * b)
+{
+ int res;
+ if (a != b) {
+ if ((res = mp_copy (a, b)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ if (mp_iszero(b) != MP_YES) {
+ b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
+ } else {
+ b->sign = MP_ZPOS;
+ }
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_neg.c */
+
+/* Start: bn_mp_or.c */
+#include "tma.h"
+#ifdef BN_MP_OR_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* OR two ints together */
+int mp_or (mp_int * a, mp_int * b, mp_int * c)
+{
+ int res, ix, px;
+ mp_int t, *x;
+
+ if (a->used > b->used) {
+ if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+ return res;
+ }
+ px = b->used;
+ x = b;
+ } else {
+ if ((res = mp_init_copy (&t, b)) != MP_OKAY) {
+ return res;
+ }
+ px = a->used;
+ x = a;
+ }
+
+ for (ix = 0; ix < px; ix++) {
+ t.dp[ix] |= x->dp[ix];
+ }
+ mp_clamp (&t);
+ mp_exch (c, &t);
+ mp_clear (&t);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_or.c */
+
+/* Start: bn_mp_prime_fermat.c */
+#include "tma.h"
+#ifdef BN_MP_PRIME_FERMAT_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* performs one Fermat test.
+ *
+ * If "a" were prime then b**a == b (mod a) since the order of
+ * the multiplicative sub-group would be phi(a) = a-1. That means
+ * it would be the same as b**(a mod (a-1)) == b**1 == b (mod a).
+ *
+ * Sets result to 1 if the congruence holds, or zero otherwise.
+ */
+int mp_prime_fermat (mp_int * a, mp_int * b, int *result)
+{
+ mp_int t;
+ int err;
+
+ /* default to composite */
+ *result = MP_NO;
+
+ /* ensure b > 1 */
+ if (mp_cmp_d(b, 1) != MP_GT) {
+ return MP_VAL;
+ }
+
+ /* init t */
+ if ((err = mp_init (&t)) != MP_OKAY) {
+ return err;
+ }
+
+ /* compute t = b**a mod a */
+ if ((err = mp_exptmod (b, a, a, &t)) != MP_OKAY) {
+ goto LBL_T;
+ }
+
+ /* is it equal to b? */
+ if (mp_cmp (&t, b) == MP_EQ) {
+ *result = MP_YES;
+ }
+
+ err = MP_OKAY;
+LBL_T:mp_clear (&t);
+ return err;
+}
+#endif
+
+/* End: bn_mp_prime_fermat.c */
+
+/* Start: bn_mp_prime_is_divisible.c */
+#include "tma.h"
+#ifdef BN_MP_PRIME_IS_DIVISIBLE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* determines if an integers is divisible by one
+ * of the first PRIME_SIZE primes or not
+ *
+ * sets result to 0 if not, 1 if yes
+ */
+int mp_prime_is_divisible (mp_int * a, int *result)
+{
+ int err, ix;
+ mp_digit res;
+
+ /* default to not */
+ *result = MP_NO;
+
+ for (ix = 0; ix < PRIME_SIZE; ix++) {
+ /* what is a mod LBL_prime_tab[ix] */
+ if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
+ return err;
+ }
+
+ /* is the residue zero? */
+ if (res == 0) {
+ *result = MP_YES;
+ return MP_OKAY;
+ }
+ }
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_prime_is_divisible.c */
+
+/* Start: bn_mp_prime_is_prime.c */
+#include "tma.h"
+#ifdef BN_MP_PRIME_IS_PRIME_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* performs a variable number of rounds of Miller-Rabin
+ *
+ * Probability of error after t rounds is no more than
+
+ *
+ * Sets result to 1 if probably prime, 0 otherwise
+ */
+int mp_prime_is_prime (mp_int * a, int t, int *result)
+{
+ mp_int b;
+ int ix, err, res;
+
+ /* default to no */
+ *result = MP_NO;
+
+ /* valid value of t? */
+ if (t <= 0 || t > PRIME_SIZE) {
+ return MP_VAL;
+ }
+
+ /* is the input equal to one of the primes in the table? */
+ for (ix = 0; ix < PRIME_SIZE; ix++) {
+ if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
+ *result = 1;
+ return MP_OKAY;
+ }
+ }
+
+ /* first perform trial division */
+ if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
+ return err;
+ }
+
+ /* return if it was trivially divisible */
+ if (res == MP_YES) {
+ return MP_OKAY;
+ }
+
+ /* now perform the miller-rabin rounds */
+ if ((err = mp_init (&b)) != MP_OKAY) {
+ return err;
+ }
+
+ for (ix = 0; ix < t; ix++) {
+ /* set the prime */
+ mp_set (&b, ltm_prime_tab[ix]);
+
+ if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
+ goto LBL_B;
+ }
+
+ if (res == MP_NO) {
+ goto LBL_B;
+ }
+ }
+
+ /* passed the test */
+ *result = MP_YES;
+LBL_B:mp_clear (&b);
+ return err;
+}
+#endif
+
+/* End: bn_mp_prime_is_prime.c */
+
+/* Start: bn_mp_prime_miller_rabin.c */
+#include "tma.h"
+#ifdef BN_MP_PRIME_MILLER_RABIN_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* Miller-Rabin test of "a" to the base of "b" as described in
+ * HAC pp. 139 Algorithm 4.24
+ *
+ * Sets result to 0 if definitely composite or 1 if probably prime.
+ * Randomly the chance of error is no more than 1/4 and often
+ * very much lower.
+ */
+int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
+{
+ mp_int n1, y, r;
+ int s, j, err;
+
+ /* default */
+ *result = MP_NO;
+
+ /* ensure b > 1 */
+ if (mp_cmp_d(b, 1) != MP_GT) {
+ return MP_VAL;
+ }
+
+ /* get n1 = a - 1 */
+ if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
+ return err;
+ }
+ if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
+ goto LBL_N1;
+ }
+
+ /* set 2**s * r = n1 */
+ if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
+ goto LBL_N1;
+ }
+
+ /* count the number of least significant bits
+ * which are zero
+ */
+ s = mp_cnt_lsb(&r);
+
+ /* now divide n - 1 by 2**s */
+ if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
+ goto LBL_R;
+ }
+
+ /* compute y = b**r mod a */
+ if ((err = mp_init (&y)) != MP_OKAY) {
+ goto LBL_R;
+ }
+ if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+
+ /* if y != 1 and y != n1 do */
+ if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
+ j = 1;
+ /* while j <= s-1 and y != n1 */
+ while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
+ if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
+ goto LBL_Y;
+ }
+
+ /* if y == 1 then composite */
+ if (mp_cmp_d (&y, 1) == MP_EQ) {
+ goto LBL_Y;
+ }
+
+ ++j;
+ }
+
+ /* if y != n1 then composite */
+ if (mp_cmp (&y, &n1) != MP_EQ) {
+ goto LBL_Y;
+ }
+ }
+
+ /* probably prime now */
+ *result = MP_YES;
+LBL_Y:mp_clear (&y);
+LBL_R:mp_clear (&r);
+LBL_N1:mp_clear (&n1);
+ return err;
+}
+#endif
+
+/* End: bn_mp_prime_miller_rabin.c */
+
+/* Start: bn_mp_prime_next_prime.c */
+#include "tma.h"
+#ifdef BN_MP_PRIME_NEXT_PRIME_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* finds the next prime after the number "a" using "t" trials
+ * of Miller-Rabin.
+ *
+ * bbs_style = 1 means the prime must be congruent to 3 mod 4
+ */
+int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
+{
+ int err, res, x, y;
+ mp_digit res_tab[PRIME_SIZE], step, kstep;
+ mp_int b;
+
+ /* ensure t is valid */
+ if (t <= 0 || t > PRIME_SIZE) {
+ return MP_VAL;
+ }
+
+ /* force positive */
+ a->sign = MP_ZPOS;
+
+ /* simple algo if a is less than the largest prime in the table */
+ if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) {
+ /* find which prime it is bigger than */
+ for (x = PRIME_SIZE - 2; x >= 0; x--) {
+ if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) {
+ if (bbs_style == 1) {
+ /* ok we found a prime smaller or
+ * equal [so the next is larger]
+ *
+ * however, the prime must be
+ * congruent to 3 mod 4
+ */
+ if ((ltm_prime_tab[x + 1] & 3) != 3) {
+ /* scan upwards for a prime congruent to 3 mod 4 */
+ for (y = x + 1; y < PRIME_SIZE; y++) {
+ if ((ltm_prime_tab[y] & 3) == 3) {
+ mp_set(a, ltm_prime_tab[y]);
+ return MP_OKAY;
+ }
+ }
+ }
+ } else {
+ mp_set(a, ltm_prime_tab[x + 1]);
+ return MP_OKAY;
+ }
+ }
+ }
+ /* at this point a maybe 1 */
+ if (mp_cmp_d(a, 1) == MP_EQ) {
+ mp_set(a, 2);
+ return MP_OKAY;
+ }
+ /* fall through to the sieve */
+ }
+
+ /* generate a prime congruent to 3 mod 4 or 1/3 mod 4? */
+ if (bbs_style == 1) {
+ kstep = 4;
+ } else {
+ kstep = 2;
+ }
+
+ /* at this point we will use a combination of a sieve and Miller-Rabin */
+
+ if (bbs_style == 1) {
+ /* if a mod 4 != 3 subtract the correct value to make it so */
+ if ((a->dp[0] & 3) != 3) {
+ if ((err = mp_sub_d(a, (a->dp[0] & 3) + 1, a)) != MP_OKAY) { return err; };
+ }
+ } else {
+ if (mp_iseven(a) == 1) {
+ /* force odd */
+ if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) {
+ return err;
+ }
+ }
+ }
+
+ /* generate the restable */
+ for (x = 1; x < PRIME_SIZE; x++) {
+ if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) {
+ return err;
+ }
+ }
+
+ /* init temp used for Miller-Rabin Testing */
+ if ((err = mp_init(&b)) != MP_OKAY) {
+ return err;
+ }
+
+ for (;;) {
+ /* skip to the next non-trivially divisible candidate */
+ step = 0;
+ do {
+ /* y == 1 if any residue was zero [e.g. cannot be prime] */
+ y = 0;
+
+ /* increase step to next candidate */
+ step += kstep;
+
+ /* compute the new residue without using division */
+ for (x = 1; x < PRIME_SIZE; x++) {
+ /* add the step to each residue */
+ res_tab[x] += kstep;
+
+ /* subtract the modulus [instead of using division] */
+ if (res_tab[x] >= ltm_prime_tab[x]) {
+ res_tab[x] -= ltm_prime_tab[x];
+ }
+
+ /* set flag if zero */
+ if (res_tab[x] == 0) {
+ y = 1;
+ }
+ }
+ } while (y == 1 && step < ((((mp_digit)1)<<DIGIT_BIT) - kstep));
+
+ /* add the step */
+ if ((err = mp_add_d(a, step, a)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+
+ /* if didn't pass sieve and step == MAX then skip test */
+ if (y == 1 && step >= ((((mp_digit)1)<<DIGIT_BIT) - kstep)) {
+ continue;
+ }
+
+ /* is this prime? */
+ for (x = 0; x < t; x++) {
+ mp_set(&b, ltm_prime_tab[t]);
+ if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ if (res == MP_NO) {
+ break;
+ }
+ }
+
+ if (res == MP_YES) {
+ break;
+ }
+ }
+
+ err = MP_OKAY;
+LBL_ERR:
+ mp_clear(&b);
+ return err;
+}
+
+#endif
+
+/* End: bn_mp_prime_next_prime.c */
+
+/* Start: bn_mp_prime_rabin_miller_trials.c */
+#include "tma.h"
+#ifdef BN_MP_PRIME_RABIN_MILLER_TRIALS_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+
+static const struct {
+ int k, t;
+} sizes[] = {
+{ 128, 28 },
+{ 256, 16 },
+{ 384, 10 },
+{ 512, 7 },
+{ 640, 6 },
+{ 768, 5 },
+{ 896, 4 },
+{ 1024, 4 }
+};
+
+/* returns # of RM trials required for a given bit size */
+int mp_prime_rabin_miller_trials(int size)
+{
+ int x;
+
+ for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
+ if (sizes[x].k == size) {
+ return sizes[x].t;
+ } else if (sizes[x].k > size) {
+ return (x == 0) ? sizes[0].t : sizes[x - 1].t;
+ }
+ }
+ return sizes[x-1].t + 1;
+}
+
+
+#endif
+
+/* End: bn_mp_prime_rabin_miller_trials.c */
+
+/* Start: bn_mp_prime_random_ex.c */
+#include "tma.h"
+#ifdef BN_MP_PRIME_RANDOM_EX_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* makes a truly random prime of a given size (bits),
+ *
+ * Flags are as follows:
+ *
+ * LTM_PRIME_BBS - make prime congruent to 3 mod 4
+ * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
+ * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
+ * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
+ *
+ * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
+ * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
+ * so it can be NULL
+ *
+ */
+
+/* This is possibly the mother of all prime generation functions, muahahahahaha! */
+int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
+{
+ unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
+ int res, err, bsize, maskOR_msb_offset;
+
+ /* sanity check the input */
+ if (size <= 1 || t <= 0) {
+ return MP_VAL;
+ }
+
+ /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
+ if (flags & LTM_PRIME_SAFE) {
+ flags |= LTM_PRIME_BBS;
+ }
+
+ /* calc the byte size */
+ bsize = (size>>3) + ((size&7)?1:0);
+
+ /* we need a buffer of bsize bytes */
+ tmp = OPT_CAST(unsigned char) XMALLOC(bsize);
+ if (tmp == NULL) {
+ return MP_MEM;
+ }
+
+ /* calc the maskAND value for the MSbyte*/
+ maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
+
+ /* calc the maskOR_msb */
+ maskOR_msb = 0;
+ maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
+ if (flags & LTM_PRIME_2MSB_ON) {
+ maskOR_msb |= 1 << ((size - 2) & 7);
+ } else if (flags & LTM_PRIME_2MSB_OFF) {
+ maskAND &= ~(1 << ((size - 2) & 7));
+ }
+
+ /* get the maskOR_lsb */
+ maskOR_lsb = 1;
+ if (flags & LTM_PRIME_BBS) {
+ maskOR_lsb |= 3;
+ }
+
+ do {
+ /* read the bytes */
+ if (cb(tmp, bsize, dat) != bsize) {
+ err = MP_VAL;
+ goto error;
+ }
+
+ /* work over the MSbyte */
+ tmp[0] &= maskAND;
+ tmp[0] |= 1 << ((size - 1) & 7);
+
+ /* mix in the maskORs */
+ tmp[maskOR_msb_offset] |= maskOR_msb;
+ tmp[bsize-1] |= maskOR_lsb;
+
+ /* read it in */
+ if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; }
+
+ /* is it prime? */
+ if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
+ if (res == MP_NO) {
+ continue;
+ }
+
+ if (flags & LTM_PRIME_SAFE) {
+ /* see if (a-1)/2 is prime */
+ if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; }
+ if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; }
+
+ /* is it prime? */
+ if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
+ }
+ } while (res == MP_NO);
+
+ if (flags & LTM_PRIME_SAFE) {
+ /* restore a to the original value */
+ if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; }
+ if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; }
+ }
+
+ err = MP_OKAY;
+error:
+ XFREE(tmp);
+ return err;
+}
+
+
+#endif
+
+/* End: bn_mp_prime_random_ex.c */
+
+/* Start: bn_mp_radix_size.c */
+#include "tma.h"
+#ifdef BN_MP_RADIX_SIZE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* returns size of ASCII reprensentation */
+int mp_radix_size (mp_int * a, int radix, int *size)
+{
+ int res, digs;
+ mp_int t;
+ mp_digit d;
+
+ *size = 0;
+
+ /* special case for binary */
+ if (radix == 2) {
+ *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
+ return MP_OKAY;
+ }
+
+ /* make sure the radix is in range */
+ if (radix < 2 || radix > 64) {
+ return MP_VAL;
+ }
+
+ if (mp_iszero(a) == MP_YES) {
+ *size = 2;
+ return MP_OKAY;
+ }
+
+ /* digs is the digit count */
+ digs = 0;
+
+ /* if it's negative add one for the sign */
+ if (a->sign == MP_NEG) {
+ ++digs;
+ }
+
+ /* init a copy of the input */
+ if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+ return res;
+ }
+
+ /* force temp to positive */
+ t.sign = MP_ZPOS;
+
+ /* fetch out all of the digits */
+ while (mp_iszero (&t) == MP_NO) {
+ if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ ++digs;
+ }
+ mp_clear (&t);
+
+ /* return digs + 1, the 1 is for the NULL byte that would be required. */
+ *size = digs + 1;
+ return MP_OKAY;
+}
+
+#endif
+
+/* End: bn_mp_radix_size.c */
+
+/* Start: bn_mp_radix_smap.c */
+#include "tma.h"
+#ifdef BN_MP_RADIX_SMAP_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* chars used in radix conversions */
+const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+#endif
+
+/* End: bn_mp_radix_smap.c */
+
+/* Start: bn_mp_rand.c */
+#include "tma.h"
+#ifdef BN_MP_RAND_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* makes a pseudo-random int of a given size */
+int
+mp_rand (mp_int * a, int digits)
+{
+ int res;
+ mp_digit d;
+
+ mp_zero (a);
+ if (digits <= 0) {
+ return MP_OKAY;
+ }
+
+ /* first place a random non-zero digit */
+ do {
+ d = ((mp_digit) abs (rand ())) & MP_MASK;
+ } while (d == 0);
+
+ if ((res = mp_add_d (a, d, a)) != MP_OKAY) {
+ return res;
+ }
+
+ while (--digits > 0) {
+ if ((res = mp_lshd (a, 1)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_add_d (a, ((mp_digit) abs (rand ())), a)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_rand.c */
+
+/* Start: bn_mp_read_radix.c */
+#include "tma.h"
+#ifdef BN_MP_READ_RADIX_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* read a string [ASCII] in a given radix */
+int mp_read_radix (mp_int * a, const char *str, int radix)
+{
+ int y, res, neg;
+ char ch;
+
+ /* make sure the radix is ok */
+ if (radix < 2 || radix > 64) {
+ return MP_VAL;
+ }
+
+ /* if the leading digit is a
+ * minus set the sign to negative.
+ */
+ if (*str == '-') {
+ ++str;
+ neg = MP_NEG;
+ } else {
+ neg = MP_ZPOS;
+ }
+
+ /* set the integer to the default of zero */
+ mp_zero (a);
+
+ /* process each digit of the string */
+ while (*str) {
+ /* if the radix < 36 the conversion is case insensitive
+ * this allows numbers like 1AB and 1ab to represent the same value
+ * [e.g. in hex]
+ */
+ ch = (char) ((radix < 36) ? toupper (*str) : *str);
+ for (y = 0; y < 64; y++) {
+ if (ch == mp_s_rmap[y]) {
+ break;
+ }
+ }
+
+ /* if the char was found in the map
+ * and is less than the given radix add it
+ * to the number, otherwise exit the loop.
+ */
+ if (y < radix) {
+ if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
+ return res;
+ }
+ if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
+ return res;
+ }
+ } else {
+ break;
+ }
+ ++str;
+ }
+
+ /* set the sign only if a != 0 */
+ if (mp_iszero(a) != 1) {
+ a->sign = neg;
+ }
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_read_radix.c */
+
+/* Start: bn_mp_read_signed_bin.c */
+#include "tma.h"
+#ifdef BN_MP_READ_SIGNED_BIN_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* read signed bin, big endian, first byte is 0==positive or 1==negative */
+int
+mp_read_signed_bin (mp_int * a, unsigned char *b, int c)
+{
+ int res;
+
+ /* read magnitude */
+ if ((res = mp_read_unsigned_bin (a, b + 1, c - 1)) != MP_OKAY) {
+ return res;
+ }
+
+ /* first byte is 0 for positive, non-zero for negative */
+ if (b[0] == 0) {
+ a->sign = MP_ZPOS;
+ } else {
+ a->sign = MP_NEG;
+ }
+
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_read_signed_bin.c */
+
+/* Start: bn_mp_read_unsigned_bin.c */
+#include "tma.h"
+#ifdef BN_MP_READ_UNSIGNED_BIN_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* reads a unsigned char array, assumes the msb is stored first [big endian] */
+int
+mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c)
+{
+ int res;
+
+ /* make sure there are at least two digits */
+ if (a->alloc < 2) {
+ if ((res = mp_grow(a, 2)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* zero the int */
+ mp_zero (a);
+
+ /* read the bytes in */
+ while (c-- > 0) {
+ if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
+ return res;
+ }
+
+#ifndef MP_8BIT
+ a->dp[0] |= *b++;
+ a->used += 1;
+#else
+ a->dp[0] = (*b & MP_MASK);
+ a->dp[1] |= ((*b++ >> 7U) & 1);
+ a->used += 2;
+#endif
+ }
+ mp_clamp (a);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_read_unsigned_bin.c */
+
+/* Start: bn_mp_reduce.c */
+#include "tma.h"
+#ifdef BN_MP_REDUCE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* reduces x mod m, assumes 0 < x < m**2, mu is
+ * precomputed via mp_reduce_setup.
+ * From HAC pp.604 Algorithm 14.42
+ */
+int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
+{
+ mp_int q;
+ int res, um = m->used;
+
+ /* q = x */
+ if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
+ return res;
+ }
+
+ /* q1 = x / b**(k-1) */
+ mp_rshd (&q, um - 1);
+
+ /* according to HAC this optimization is ok */
+ if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
+ if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
+ goto CLEANUP;
+ }
+ } else {
+#ifdef BN_S_MP_MUL_HIGH_DIGS_C
+ if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
+ goto CLEANUP;
+ }
+#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
+ if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
+ goto CLEANUP;
+ }
+#else
+ {
+ res = MP_VAL;
+ goto CLEANUP;
+ }
+#endif
+ }
+
+ /* q3 = q2 / b**(k+1) */
+ mp_rshd (&q, um + 1);
+
+ /* x = x mod b**(k+1), quick (no division) */
+ if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
+ goto CLEANUP;
+ }
+
+ /* q = q * m mod b**(k+1), quick (no division) */
+ if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
+ goto CLEANUP;
+ }
+
+ /* x = x - q */
+ if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
+ goto CLEANUP;
+ }
+
+ /* If x < 0, add b**(k+1) to it */
+ if (mp_cmp_d (x, 0) == MP_LT) {
+ mp_set (&q, 1);
+ if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = mp_add (x, &q, x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ /* Back off if it's too big */
+ while (mp_cmp (x, m) != MP_LT) {
+ if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
+ goto CLEANUP;
+ }
+ }
+
+CLEANUP:
+ mp_clear (&q);
+
+ return res;
+}
+#endif
+
+/* End: bn_mp_reduce.c */
+
+/* Start: bn_mp_reduce_2k.c */
+#include "tma.h"
+#ifdef BN_MP_REDUCE_2K_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* reduces a modulo n where n is of the form 2**p - d */
+int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
+{
+ mp_int q;
+ int p, res;
+
+ if ((res = mp_init(&q)) != MP_OKAY) {
+ return res;
+ }
+
+ p = mp_count_bits(n);
+top:
+ /* q = a/2**p, a = a mod 2**p */
+ if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if (d != 1) {
+ /* q = q * d */
+ if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
+ goto ERR;
+ }
+ }
+
+ /* a = a + q */
+ if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if (mp_cmp_mag(a, n) != MP_LT) {
+ s_mp_sub(a, n, a);
+ goto top;
+ }
+
+ERR:
+ mp_clear(&q);
+ return res;
+}
+
+#endif
+
+/* End: bn_mp_reduce_2k.c */
+
+/* Start: bn_mp_reduce_2k_l.c */
+#include "tma.h"
+#ifdef BN_MP_REDUCE_2K_L_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* reduces a modulo n where n is of the form 2**p - d
+ This differs from reduce_2k since "d" can be larger
+ than a single digit.
+*/
+int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
+{
+ mp_int q;
+ int p, res;
+
+ if ((res = mp_init(&q)) != MP_OKAY) {
+ return res;
+ }
+
+ p = mp_count_bits(n);
+top:
+ /* q = a/2**p, a = a mod 2**p */
+ if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* q = q * d */
+ if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* a = a + q */
+ if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if (mp_cmp_mag(a, n) != MP_LT) {
+ s_mp_sub(a, n, a);
+ goto top;
+ }
+
+ERR:
+ mp_clear(&q);
+ return res;
+}
+
+#endif
+
+/* End: bn_mp_reduce_2k_l.c */
+
+/* Start: bn_mp_reduce_2k_setup.c */
+#include "tma.h"
+#ifdef BN_MP_REDUCE_2K_SETUP_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* determines the setup value */
+int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
+{
+ int res, p;
+ mp_int tmp;
+
+ if ((res = mp_init(&tmp)) != MP_OKAY) {
+ return res;
+ }
+
+ p = mp_count_bits(a);
+ if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
+ mp_clear(&tmp);
+ return res;
+ }
+
+ if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
+ mp_clear(&tmp);
+ return res;
+ }
+
+ *d = tmp.dp[0];
+ mp_clear(&tmp);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_reduce_2k_setup.c */
+
+/* Start: bn_mp_reduce_2k_setup_l.c */
+#include "tma.h"
+#ifdef BN_MP_REDUCE_2K_SETUP_L_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* determines the setup value */
+int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
+{
+ int res;
+ mp_int tmp;
+
+ if ((res = mp_init(&tmp)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ERR:
+ mp_clear(&tmp);
+ return res;
+}
+#endif
+
+/* End: bn_mp_reduce_2k_setup_l.c */
+
+/* Start: bn_mp_reduce_is_2k.c */
+#include "tma.h"
+#ifdef BN_MP_REDUCE_IS_2K_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* determines if mp_reduce_2k can be used */
+int mp_reduce_is_2k(mp_int *a)
+{
+ int ix, iy, iw;
+ mp_digit iz;
+
+ if (a->used == 0) {
+ return MP_NO;
+ } else if (a->used == 1) {
+ return MP_YES;
+ } else if (a->used > 1) {
+ iy = mp_count_bits(a);
+ iz = 1;
+ iw = 1;
+
+ /* Test every bit from the second digit up, must be 1 */
+ for (ix = DIGIT_BIT; ix < iy; ix++) {
+ if ((a->dp[iw] & iz) == 0) {
+ return MP_NO;
+ }
+ iz <<= 1;
+ if (iz > (mp_digit)MP_MASK) {
+ ++iw;
+ iz = 1;
+ }
+ }
+ }
+ return MP_YES;
+}
+
+#endif
+
+/* End: bn_mp_reduce_is_2k.c */
+
+/* Start: bn_mp_reduce_is_2k_l.c */
+#include "tma.h"
+#ifdef BN_MP_REDUCE_IS_2K_L_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* determines if reduce_2k_l can be used */
+int mp_reduce_is_2k_l(mp_int *a)
+{
+ int ix, iy;
+
+ if (a->used == 0) {
+ return MP_NO;
+ } else if (a->used == 1) {
+ return MP_YES;
+ } else if (a->used > 1) {
+ /* if more than half of the digits are -1 we're sold */
+ for (iy = ix = 0; ix < a->used; ix++) {
+ if (a->dp[ix] == MP_MASK) {
+ ++iy;
+ }
+ }
+ return (iy >= (a->used/2)) ? MP_YES : MP_NO;
+
+ }
+ return MP_NO;
+}
+
+#endif
+
+/* End: bn_mp_reduce_is_2k_l.c */
+
+/* Start: bn_mp_reduce_setup.c */
+#include "tma.h"
+#ifdef BN_MP_REDUCE_SETUP_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* pre-calculate the value required for Barrett reduction
+ * For a given modulus "b" it calulates the value required in "a"
+ */
+int mp_reduce_setup (mp_int * a, mp_int * b)
+{
+ int res;
+
+ if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
+ return res;
+ }
+ return mp_div (a, b, a, NULL);
+}
+#endif
+
+/* End: bn_mp_reduce_setup.c */
+
+/* Start: bn_mp_rshd.c */
+#include "tma.h"
+#ifdef BN_MP_RSHD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* shift right a certain amount of digits */
+void mp_rshd (mp_int * a, int b)
+{
+ int x;
+
+ /* if b <= 0 then ignore it */
+ if (b <= 0) {
+ return;
+ }
+
+ /* if b > used then simply zero it and return */
+ if (a->used <= b) {
+ mp_zero (a);
+ return;
+ }
+
+ {
+ register mp_digit *bottom, *top;
+
+ /* shift the digits down */
+
+ /* bottom */
+ bottom = a->dp;
+
+ /* top [offset into digits] */
+ top = a->dp + b;
+
+ /* this is implemented as a sliding window where
+ * the window is b-digits long and digits from
+ * the top of the window are copied to the bottom
+ *
+ * e.g.
+
+ b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
+ /\ | ---->
+ \-------------------/ ---->
+ */
+ for (x = 0; x < (a->used - b); x++) {
+ *bottom++ = *top++;
+ }
+
+ /* zero the top digits */
+ for (; x < a->used; x++) {
+ *bottom++ = 0;
+ }
+ }
+
+ /* remove excess digits */
+ a->used -= b;
+}
+#endif
+
+/* End: bn_mp_rshd.c */
+
+/* Start: bn_mp_set.c */
+#include "tma.h"
+#ifdef BN_MP_SET_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* set to a digit */
+void mp_set (mp_int * a, mp_digit b)
+{
+ mp_zero (a);
+ a->dp[0] = b & MP_MASK;
+ a->used = (a->dp[0] != 0) ? 1 : 0;
+}
+#endif
+
+/* End: bn_mp_set.c */
+
+/* Start: bn_mp_set_int.c */
+#include "tma.h"
+#ifdef BN_MP_SET_INT_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* set a 32-bit const */
+int mp_set_int (mp_int * a, unsigned long b)
+{
+ int x, res;
+
+ mp_zero (a);
+
+ /* set four bits at a time */
+ for (x = 0; x < 8; x++) {
+ /* shift the number up four bits */
+ if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
+ return res;
+ }
+
+ /* OR in the top four bits of the source */
+ a->dp[0] |= (b >> 28) & 15;
+
+ /* shift the source up to the next four bits */
+ b <<= 4;
+
+ /* ensure that digits are not clamped off */
+ a->used += 1;
+ }
+ mp_clamp (a);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_set_int.c */
+
+/* Start: bn_mp_shrink.c */
+#include "tma.h"
+#ifdef BN_MP_SHRINK_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* shrink a bignum */
+int mp_shrink (mp_int * a)
+{
+ mp_digit *tmp;
+ if (a->alloc != a->used && a->used > 0) {
+ if ((tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * a->used)) == NULL) {
+ return MP_MEM;
+ }
+ a->dp = tmp;
+ a->alloc = a->used;
+ }
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_shrink.c */
+
+/* Start: bn_mp_signed_bin_size.c */
+#include "tma.h"
+#ifdef BN_MP_SIGNED_BIN_SIZE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* get the size for an signed equivalent */
+int mp_signed_bin_size (mp_int * a)
+{
+ return 1 + mp_unsigned_bin_size (a);
+}
+#endif
+
+/* End: bn_mp_signed_bin_size.c */
+
+/* Start: bn_mp_sqr.c */
+#include "tma.h"
+#ifdef BN_MP_SQR_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* computes b = a*a */
+int
+mp_sqr (mp_int * a, mp_int * b)
+{
+ int res;
+
+#ifdef BN_MP_TOOM_SQR_C
+ /* use Toom-Cook? */
+ if (a->used >= TOOM_SQR_CUTOFF) {
+ res = mp_toom_sqr(a, b);
+ /* Karatsuba? */
+ } else
+#endif
+#ifdef BN_MP_KARATSUBA_SQR_C
+if (a->used >= KARATSUBA_SQR_CUTOFF) {
+ res = mp_karatsuba_sqr (a, b);
+ } else
+#endif
+ {
+#ifdef BN_FAST_S_MP_SQR_C
+ /* can we use the fast comba multiplier? */
+ if ((a->used * 2 + 1) < MP_WARRAY &&
+ a->used <
+ (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
+ res = fast_s_mp_sqr (a, b);
+ } else
+#endif
+#ifdef BN_S_MP_SQR_C
+ res = s_mp_sqr (a, b);
+#else
+ res = MP_VAL;
+#endif
+ }
+ b->sign = MP_ZPOS;
+ return res;
+}
+#endif
+
+/* End: bn_mp_sqr.c */
+
+/* Start: bn_mp_sqrmod.c */
+#include "tma.h"
+#ifdef BN_MP_SQRMOD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* c = a * a (mod b) */
+int
+mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
+{
+ int res;
+ mp_int t;
+
+ if ((res = mp_init (&t)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_sqr (a, &t)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ res = mp_mod (&t, b, c);
+ mp_clear (&t);
+ return res;
+}
+#endif
+
+/* End: bn_mp_sqrmod.c */
+
+/* Start: bn_mp_sqrt.c */
+#include "tma.h"
+#ifdef BN_MP_SQRT_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* this function is less generic than mp_n_root, simpler and faster */
+int mp_sqrt(mp_int *arg, mp_int *ret)
+{
+ int res;
+ mp_int t1,t2;
+
+ /* must be positive */
+ if (arg->sign == MP_NEG) {
+ return MP_VAL;
+ }
+
+ /* easy out */
+ if (mp_iszero(arg) == MP_YES) {
+ mp_zero(ret);
+ return MP_OKAY;
+ }
+
+ if ((res = mp_init_copy(&t1, arg)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_init(&t2)) != MP_OKAY) {
+ goto E2;
+ }
+
+ /* First approx. (not very bad for large arg) */
+ mp_rshd (&t1,t1.used/2);
+
+ /* t1 > 0 */
+ if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) {
+ goto E1;
+ }
+ if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) {
+ goto E1;
+ }
+ if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) {
+ goto E1;
+ }
+ /* And now t1 > sqrt(arg) */
+ do {
+ if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) {
+ goto E1;
+ }
+ if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) {
+ goto E1;
+ }
+ if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) {
+ goto E1;
+ }
+ /* t1 >= sqrt(arg) >= t2 at this point */
+ } while (mp_cmp_mag(&t1,&t2) == MP_GT);
+
+ mp_exch(&t1,ret);
+
+E1: mp_clear(&t2);
+E2: mp_clear(&t1);
+ return res;
+}
+
+#endif
+
+/* End: bn_mp_sqrt.c */
+
+/* Start: bn_mp_sub.c */
+#include "tma.h"
+#ifdef BN_MP_SUB_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* high level subtraction (handles signs) */
+int
+mp_sub (mp_int * a, mp_int * b, mp_int * c)
+{
+ int sa, sb, res;
+
+ sa = a->sign;
+ sb = b->sign;
+
+ if (sa != sb) {
+ /* subtract a negative from a positive, OR */
+ /* subtract a positive from a negative. */
+ /* In either case, ADD their magnitudes, */
+ /* and use the sign of the first number. */
+ c->sign = sa;
+ res = s_mp_add (a, b, c);
+ } else {
+ /* subtract a positive from a positive, OR */
+ /* subtract a negative from a negative. */
+ /* First, take the difference between their */
+ /* magnitudes, then... */
+ if (mp_cmp_mag (a, b) != MP_LT) {
+ /* Copy the sign from the first */
+ c->sign = sa;
+ /* The first has a larger or equal magnitude */
+ res = s_mp_sub (a, b, c);
+ } else {
+ /* The result has the *opposite* sign from */
+ /* the first number. */
+ c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
+ /* The second has a larger magnitude */
+ res = s_mp_sub (b, a, c);
+ }
+ }
+ return res;
+}
+
+#endif
+
+/* End: bn_mp_sub.c */
+
+/* Start: bn_mp_sub_d.c */
+#include "tma.h"
+#ifdef BN_MP_SUB_D_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* single digit subtraction */
+int
+mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
+{
+ mp_digit *tmpa, *tmpc, mu;
+ int res, ix, oldused;
+
+ /* grow c as required */
+ if (c->alloc < a->used + 1) {
+ if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* if a is negative just do an unsigned
+ * addition [with fudged signs]
+ */
+ if (a->sign == MP_NEG) {
+ a->sign = MP_ZPOS;
+ res = mp_add_d(a, b, c);
+ a->sign = c->sign = MP_NEG;
+ return res;
+ }
+
+ /* setup regs */
+ oldused = c->used;
+ tmpa = a->dp;
+ tmpc = c->dp;
+
+ /* if a <= b simply fix the single digit */
+ if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
+ if (a->used == 1) {
+ *tmpc++ = b - *tmpa;
+ } else {
+ *tmpc++ = b;
+ }
+ ix = 1;
+
+ /* negative/1digit */
+ c->sign = MP_NEG;
+ c->used = 1;
+ } else {
+ /* positive/size */
+ c->sign = MP_ZPOS;
+ c->used = a->used;
+
+ /* subtract first digit */
+ *tmpc = *tmpa++ - b;
+ mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
+ *tmpc++ &= MP_MASK;
+
+ /* handle rest of the digits */
+ for (ix = 1; ix < a->used; ix++) {
+ *tmpc = *tmpa++ - mu;
+ mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
+ *tmpc++ &= MP_MASK;
+ }
+ }
+
+ /* zero excess digits */
+ while (ix++ < oldused) {
+ *tmpc++ = 0;
+ }
+ mp_clamp(c);
+ return MP_OKAY;
+}
+
+#endif
+
+/* End: bn_mp_sub_d.c */
+
+/* Start: bn_mp_submod.c */
+#include "tma.h"
+#ifdef BN_MP_SUBMOD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* d = a - b (mod c) */
+int
+mp_submod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+{
+ int res;
+ mp_int t;
+
+
+ if ((res = mp_init (&t)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_sub (a, b, &t)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ res = mp_mod (&t, c, d);
+ mp_clear (&t);
+ return res;
+}
+#endif
+
+/* End: bn_mp_submod.c */
+
+/* Start: bn_mp_to_signed_bin.c */
+#include "tma.h"
+#ifdef BN_MP_TO_SIGNED_BIN_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* store in signed [big endian] format */
+int mp_to_signed_bin (mp_int * a, unsigned char *b)
+{
+ int res;
+
+ if ((res = mp_to_unsigned_bin (a, b + 1)) != MP_OKAY) {
+ return res;
+ }
+ b[0] = (unsigned char) ((a->sign == MP_ZPOS) ? 0 : 1);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_to_signed_bin.c */
+
+/* Start: bn_mp_to_signed_bin_n.c */
+#include "tma.h"
+#ifdef BN_MP_TO_SIGNED_BIN_N_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* store in signed [big endian] format */
+int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
+{
+ if (*outlen < (unsigned long)mp_signed_bin_size(a)) {
+ return MP_VAL;
+ }
+ *outlen = mp_signed_bin_size(a);
+ return mp_to_signed_bin(a, b);
+}
+#endif
+
+/* End: bn_mp_to_signed_bin_n.c */
+
+/* Start: bn_mp_to_unsigned_bin.c */
+#include "tma.h"
+#ifdef BN_MP_TO_UNSIGNED_BIN_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* store in unsigned [big endian] format */
+int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
+{
+ int x, res;
+ mp_int t;
+
+ if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+ return res;
+ }
+
+ x = 0;
+ while (mp_iszero (&t) == 0) {
+#ifndef MP_8BIT
+ b[x++] = (unsigned char) (t.dp[0] & 255);
+#else
+ b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
+#endif
+ if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ }
+ bn_reverse (b, x);
+ mp_clear (&t);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_to_unsigned_bin.c */
+
+/* Start: bn_mp_to_unsigned_bin_n.c */
+#include "tma.h"
+#ifdef BN_MP_TO_UNSIGNED_BIN_N_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* store in unsigned [big endian] format */
+int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
+{
+ if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) {
+ return MP_VAL;
+ }
+ *outlen = mp_unsigned_bin_size(a);
+ return mp_to_unsigned_bin(a, b);
+}
+#endif
+
+/* End: bn_mp_to_unsigned_bin_n.c */
+
+/* Start: bn_mp_toom_mul.c */
+#include "tma.h"
+#ifdef BN_MP_TOOM_MUL_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* multiplication using the Toom-Cook 3-way algorithm
+ *
+ * Much more complicated than Karatsuba but has a lower
+ * asymptotic running time of O(N**1.464). This algorithm is
+ * only particularly useful on VERY large inputs
+ * (we're talking 1000s of digits here...).
+*/
+int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
+ int res, B;
+
+ /* init temps */
+ if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
+ &a0, &a1, &a2, &b0, &b1,
+ &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
+ return res;
+ }
+
+ /* B */
+ B = MIN(a->used, b->used) / 3;
+
+ /* a = a2 * B**2 + a1 * B + a0 */
+ if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_copy(a, &a1)) != MP_OKAY) {
+ goto ERR;
+ }
+ mp_rshd(&a1, B);
+ mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
+
+ if ((res = mp_copy(a, &a2)) != MP_OKAY) {
+ goto ERR;
+ }
+ mp_rshd(&a2, B*2);
+
+ /* b = b2 * B**2 + b1 * B + b0 */
+ if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_copy(b, &b1)) != MP_OKAY) {
+ goto ERR;
+ }
+ mp_rshd(&b1, B);
+ mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
+
+ if ((res = mp_copy(b, &b2)) != MP_OKAY) {
+ goto ERR;
+ }
+ mp_rshd(&b2, B*2);
+
+ /* w0 = a0*b0 */
+ if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* w4 = a2 * b2 */
+ if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
+ if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
+ if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+
+
+ /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
+ if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* now solve the matrix
+
+ 0 0 0 0 1
+ 1 2 4 8 16
+ 1 1 1 1 1
+ 16 8 4 2 1
+ 1 0 0 0 0
+
+ using 12 subtractions, 4 shifts,
+ 2 small divisions and 1 small multiplication
+ */
+
+ /* r1 - r4 */
+ if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3 - r0 */
+ if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r1/2 */
+ if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3/2 */
+ if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r2 - r0 - r4 */
+ if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r1 - r2 */
+ if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3 - r2 */
+ if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r1 - 8r0 */
+ if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3 - 8r4 */
+ if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* 3r2 - r1 - r3 */
+ if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r1 - r2 */
+ if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3 - r2 */
+ if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r1/3 */
+ if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3/3 */
+ if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* at this point shift W[n] by B*n */
+ if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ERR:
+ mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
+ &a0, &a1, &a2, &b0, &b1,
+ &b2, &tmp1, &tmp2, NULL);
+ return res;
+}
+
+#endif
+
+/* End: bn_mp_toom_mul.c */
+
+/* Start: bn_mp_toom_sqr.c */
+#include "tma.h"
+#ifdef BN_MP_TOOM_SQR_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* squaring using Toom-Cook 3-way algorithm */
+int
+mp_toom_sqr(mp_int *a, mp_int *b)
+{
+ mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
+ int res, B;
+
+ /* init temps */
+ if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
+ return res;
+ }
+
+ /* B */
+ B = a->used / 3;
+
+ /* a = a2 * B**2 + a1 * B + a0 */
+ if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_copy(a, &a1)) != MP_OKAY) {
+ goto ERR;
+ }
+ mp_rshd(&a1, B);
+ mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
+
+ if ((res = mp_copy(a, &a2)) != MP_OKAY) {
+ goto ERR;
+ }
+ mp_rshd(&a2, B*2);
+
+ /* w0 = a0*a0 */
+ if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* w4 = a2 * a2 */
+ if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* w1 = (a2 + 2(a1 + 2a0))**2 */
+ if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* w3 = (a0 + 2(a1 + 2a2))**2 */
+ if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+
+
+ /* w2 = (a2 + a1 + a0)**2 */
+ if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* now solve the matrix
+
+ 0 0 0 0 1
+ 1 2 4 8 16
+ 1 1 1 1 1
+ 16 8 4 2 1
+ 1 0 0 0 0
+
+ using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
+ */
+
+ /* r1 - r4 */
+ if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3 - r0 */
+ if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r1/2 */
+ if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3/2 */
+ if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r2 - r0 - r4 */
+ if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r1 - r2 */
+ if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3 - r2 */
+ if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r1 - 8r0 */
+ if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3 - 8r4 */
+ if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* 3r2 - r1 - r3 */
+ if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r1 - r2 */
+ if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3 - r2 */
+ if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r1/3 */
+ if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
+ goto ERR;
+ }
+ /* r3/3 */
+ if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ /* at this point shift W[n] by B*n */
+ if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
+ goto ERR;
+ }
+ if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {
+ goto ERR;
+ }
+
+ERR:
+ mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
+ return res;
+}
+
+#endif
+
+/* End: bn_mp_toom_sqr.c */
+
+/* Start: bn_mp_toradix.c */
+#include "tma.h"
+#ifdef BN_MP_TORADIX_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* stores a bignum as a ASCII string in a given radix (2..64) */
+int mp_toradix (mp_int * a, char *str, int radix)
+{
+ int res, digs;
+ mp_int t;
+ mp_digit d;
+ char *_s = str;
+
+ /* check range of the radix */
+ if (radix < 2 || radix > 64) {
+ return MP_VAL;
+ }
+
+ /* quick out if its zero */
+ if (mp_iszero(a) == 1) {
+ *str++ = '0';
+ *str = '\0';
+ return MP_OKAY;
+ }
+
+ if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+ return res;
+ }
+
+ /* if it is negative output a - */
+ if (t.sign == MP_NEG) {
+ ++_s;
+ *str++ = '-';
+ t.sign = MP_ZPOS;
+ }
+
+ digs = 0;
+ while (mp_iszero (&t) == 0) {
+ if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ *str++ = mp_s_rmap[d];
+ ++digs;
+ }
+
+ /* reverse the digits of the string. In this case _s points
+ * to the first digit [exluding the sign] of the number]
+ */
+ bn_reverse ((unsigned char *)_s, digs);
+
+ /* append a NULL so the string is properly terminated */
+ *str = '\0';
+
+ mp_clear (&t);
+ return MP_OKAY;
+}
+
+#endif
+
+/* End: bn_mp_toradix.c */
+
+/* Start: bn_mp_toradix_n.c */
+#include "tma.h"
+#ifdef BN_MP_TORADIX_N_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* stores a bignum as a ASCII string in a given radix (2..64)
+ *
+ * Stores upto maxlen-1 chars and always a NULL byte
+ */
+int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen)
+{
+ int res, digs;
+ mp_int t;
+ mp_digit d;
+ char *_s = str;
+
+ /* check range of the maxlen, radix */
+ if (maxlen < 3 || radix < 2 || radix > 64) {
+ return MP_VAL;
+ }
+
+ /* quick out if its zero */
+ if (mp_iszero(a) == 1) {
+ *str++ = '0';
+ *str = '\0';
+ return MP_OKAY;
+ }
+
+ if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+ return res;
+ }
+
+ /* if it is negative output a - */
+ if (t.sign == MP_NEG) {
+ /* we have to reverse our digits later... but not the - sign!! */
+ ++_s;
+
+ /* store the flag and mark the number as positive */
+ *str++ = '-';
+ t.sign = MP_ZPOS;
+
+ /* subtract a char */
+ --maxlen;
+ }
+
+ digs = 0;
+ while (mp_iszero (&t) == 0) {
+ if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ *str++ = mp_s_rmap[d];
+ ++digs;
+
+ if (--maxlen == 1) {
+ /* no more room */
+ break;
+ }
+ }
+
+ /* reverse the digits of the string. In this case _s points
+ * to the first digit [exluding the sign] of the number]
+ */
+ bn_reverse ((unsigned char *)_s, digs);
+
+ /* append a NULL so the string is properly terminated */
+ *str = '\0';
+
+ mp_clear (&t);
+ return MP_OKAY;
+}
+
+#endif
+
+/* End: bn_mp_toradix_n.c */
+
+/* Start: bn_mp_unsigned_bin_size.c */
+#include "tma.h"
+#ifdef BN_MP_UNSIGNED_BIN_SIZE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* get the size for an unsigned equivalent */
+int mp_unsigned_bin_size (mp_int * a)
+{
+ int size = mp_count_bits (a);
+ return (size / 8 + ((size & 7) != 0 ? 1 : 0));
+}
+#endif
+
+/* End: bn_mp_unsigned_bin_size.c */
+
+/* Start: bn_mp_xor.c */
+#include "tma.h"
+#ifdef BN_MP_XOR_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* XOR two ints together */
+int
+mp_xor (mp_int * a, mp_int * b, mp_int * c)
+{
+ int res, ix, px;
+ mp_int t, *x;
+
+ if (a->used > b->used) {
+ if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+ return res;
+ }
+ px = b->used;
+ x = b;
+ } else {
+ if ((res = mp_init_copy (&t, b)) != MP_OKAY) {
+ return res;
+ }
+ px = a->used;
+ x = a;
+ }
+
+ for (ix = 0; ix < px; ix++) {
+ t.dp[ix] ^= x->dp[ix];
+ }
+ mp_clamp (&t);
+ mp_exch (c, &t);
+ mp_clear (&t);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_mp_xor.c */
+
+/* Start: bn_mp_zero.c */
+#include "tma.h"
+#ifdef BN_MP_ZERO_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* set to zero */
+void mp_zero (mp_int * a)
+{
+ int n;
+ mp_digit *tmp;
+
+ a->sign = MP_ZPOS;
+ a->used = 0;
+
+ tmp = a->dp;
+ for (n = 0; n < a->alloc; n++) {
+ *tmp++ = 0;
+ }
+}
+#endif
+
+/* End: bn_mp_zero.c */
+
+/* Start: bn_prime_tab.c */
+#include "tma.h"
+#ifdef BN_PRIME_TAB_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+const mp_digit ltm_prime_tab[] = {
+ 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
+ 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
+ 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
+ 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
+#ifndef MP_8BIT
+ 0x0083,
+ 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
+ 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
+ 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
+ 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
+
+ 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
+ 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
+ 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
+ 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
+ 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
+ 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
+ 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
+ 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
+
+ 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
+ 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
+ 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
+ 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
+ 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
+ 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
+ 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
+ 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
+
+ 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
+ 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
+ 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
+ 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
+ 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
+ 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
+ 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
+ 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
+#endif
+};
+#endif
+
+/* End: bn_prime_tab.c */
+
+/* Start: bn_reverse.c */
+#include "tma.h"
+#ifdef BN_REVERSE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* reverse an array, used for radix code */
+void
+bn_reverse (unsigned char *s, int len)
+{
+ int ix, iy;
+ unsigned char t;
+
+ ix = 0;
+ iy = len - 1;
+ while (ix < iy) {
+ t = s[ix];
+ s[ix] = s[iy];
+ s[iy] = t;
+ ++ix;
+ --iy;
+ }
+}
+#endif
+
+/* End: bn_reverse.c */
+
+/* Start: bn_s_mp_add.c */
+#include "tma.h"
+#ifdef BN_S_MP_ADD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* low level addition, based on HAC pp.594, Algorithm 14.7 */
+int
+s_mp_add (mp_int * a, mp_int * b, mp_int * c)
+{
+ mp_int *x;
+ int olduse, res, min, max;
+
+ /* find sizes, we let |a| <= |b| which means we have to sort
+ * them. "x" will point to the input with the most digits
+ */
+ if (a->used > b->used) {
+ min = b->used;
+ max = a->used;
+ x = a;
+ } else {
+ min = a->used;
+ max = b->used;
+ x = b;
+ }
+
+ /* init result */
+ if (c->alloc < max + 1) {
+ if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
+ return res;
+ }
+ }
+
+ /* get old used digit count and set new one */
+ olduse = c->used;
+ c->used = max + 1;
+
+ {
+ register mp_digit u, *tmpa, *tmpb, *tmpc;
+ register int i;
+
+ /* alias for digit pointers */
+
+ /* first input */
+ tmpa = a->dp;
+
+ /* second input */
+ tmpb = b->dp;
+
+ /* destination */
+ tmpc = c->dp;
+
+ /* zero the carry */
+ u = 0;
+ for (i = 0; i < min; i++) {
+ /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
+ *tmpc = *tmpa++ + *tmpb++ + u;
+
+ /* U = carry bit of T[i] */
+ u = *tmpc >> ((mp_digit)DIGIT_BIT);
+
+ /* take away carry bit from T[i] */
+ *tmpc++ &= MP_MASK;
+ }
+
+ /* now copy higher words if any, that is in A+B
+ * if A or B has more digits add those in
+ */
+ if (min != max) {
+ for (; i < max; i++) {
+ /* T[i] = X[i] + U */
+ *tmpc = x->dp[i] + u;
+
+ /* U = carry bit of T[i] */
+ u = *tmpc >> ((mp_digit)DIGIT_BIT);
+
+ /* take away carry bit from T[i] */
+ *tmpc++ &= MP_MASK;
+ }
+ }
+
+ /* add carry */
+ *tmpc++ = u;
+
+ /* clear digits above oldused */
+ for (i = c->used; i < olduse; i++) {
+ *tmpc++ = 0;
+ }
+ }
+
+ mp_clamp (c);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_s_mp_add.c */
+
+/* Start: bn_s_mp_exptmod.c */
+#include "tma.h"
+#ifdef BN_S_MP_EXPTMOD_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+#ifdef MP_LOW_MEM
+ #define TAB_SIZE 32
+#else
+ #define TAB_SIZE 256
+#endif
+
+int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
+{
+ mp_int M[TAB_SIZE], res, mu;
+ mp_digit buf;
+ int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
+ int (*redux)(mp_int*,mp_int*,mp_int*);
+
+ /* find window size */
+ x = mp_count_bits (X);
+ if (x <= 7) {
+ winsize = 2;
+ } else if (x <= 36) {
+ winsize = 3;
+ } else if (x <= 140) {
+ winsize = 4;
+ } else if (x <= 450) {
+ winsize = 5;
+ } else if (x <= 1303) {
+ winsize = 6;
+ } else if (x <= 3529) {
+ winsize = 7;
+ } else {
+ winsize = 8;
+ }
+
+#ifdef MP_LOW_MEM
+ if (winsize > 5) {
+ winsize = 5;
+ }
+#endif
+
+ /* init M array */
+ /* init first cell */
+ if ((err = mp_init(&M[1])) != MP_OKAY) {
+ return err;
+ }
+
+ /* now init the second half of the array */
+ for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
+ if ((err = mp_init(&M[x])) != MP_OKAY) {
+ for (y = 1<<(winsize-1); y < x; y++) {
+ mp_clear (&M[y]);
+ }
+ mp_clear(&M[1]);
+ return err;
+ }
+ }
+
+ /* create mu, used for Barrett reduction */
+ if ((err = mp_init (&mu)) != MP_OKAY) {
+ goto LBL_M;
+ }
+
+ if (redmode == 0) {
+ if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
+ goto LBL_MU;
+ }
+ redux = mp_reduce;
+ } else {
+ if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
+ goto LBL_MU;
+ }
+ redux = mp_reduce_2k_l;
+ }
+
+ /* create M table
+ *
+ * The M table contains powers of the base,
+ * e.g. M[x] = G**x mod P
+ *
+ * The first half of the table is not
+ * computed though accept for M[0] and M[1]
+ */
+ if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
+ goto LBL_MU;
+ }
+
+ /* compute the value at M[1<<(winsize-1)] by squaring
+ * M[1] (winsize-1) times
+ */
+ if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
+ goto LBL_MU;
+ }
+
+ for (x = 0; x < (winsize - 1); x++) {
+ /* square it */
+ if ((err = mp_sqr (&M[1 << (winsize - 1)],
+ &M[1 << (winsize - 1)])) != MP_OKAY) {
+ goto LBL_MU;
+ }
+
+ /* reduce modulo P */
+ if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
+ goto LBL_MU;
+ }
+ }
+
+ /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
+ * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
+ */
+ for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
+ if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
+ goto LBL_MU;
+ }
+ if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
+ goto LBL_MU;
+ }
+ }
+
+ /* setup result */
+ if ((err = mp_init (&res)) != MP_OKAY) {
+ goto LBL_MU;
+ }
+ mp_set (&res, 1);
+
+ /* set initial mode and bit cnt */
+ mode = 0;
+ bitcnt = 1;
+ buf = 0;
+ digidx = X->used - 1;
+ bitcpy = 0;
+ bitbuf = 0;
+
+ for (;;) {
+ /* grab next digit as required */
+ if (--bitcnt == 0) {
+ /* if digidx == -1 we are out of digits */
+ if (digidx == -1) {
+ break;
+ }
+ /* read next digit and reset the bitcnt */
+ buf = X->dp[digidx--];
+ bitcnt = (int) DIGIT_BIT;
+ }
+
+ /* grab the next msb from the exponent */
+ y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
+ buf <<= (mp_digit)1;
+
+ /* if the bit is zero and mode == 0 then we ignore it
+ * These represent the leading zero bits before the first 1 bit
+ * in the exponent. Technically this opt is not required but it
+ * does lower the # of trivial squaring/reductions used
+ */
+ if (mode == 0 && y == 0) {
+ continue;
+ }
+
+ /* if the bit is zero and mode == 1 then we square */
+ if (mode == 1 && y == 0) {
+ if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&res, P, &mu)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ continue;
+ }
+
+ /* else we add it to the window */
+ bitbuf |= (y << (winsize - ++bitcpy));
+ mode = 2;
+
+ if (bitcpy == winsize) {
+ /* ok window is filled so square as required and multiply */
+ /* square first */
+ for (x = 0; x < winsize; x++) {
+ if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&res, P, &mu)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ }
+
+ /* then multiply */
+ if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&res, P, &mu)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+
+ /* empty window and reset */
+ bitcpy = 0;
+ bitbuf = 0;
+ mode = 1;
+ }
+ }
+
+ /* if bits remain then square/multiply */
+ if (mode == 2 && bitcpy > 0) {
+ /* square then multiply if the bit is set */
+ for (x = 0; x < bitcpy; x++) {
+ if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&res, P, &mu)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+
+ bitbuf <<= 1;
+ if ((bitbuf & (1 << winsize)) != 0) {
+ /* then multiply */
+ if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ if ((err = redux (&res, P, &mu)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+ }
+ }
+ }
+
+ mp_exch (&res, Y);
+ err = MP_OKAY;
+LBL_RES:mp_clear (&res);
+LBL_MU:mp_clear (&mu);
+LBL_M:
+ mp_clear(&M[1]);
+ for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
+ mp_clear (&M[x]);
+ }
+ return err;
+}
+#endif
+
+/* End: bn_s_mp_exptmod.c */
+
+/* Start: bn_s_mp_mul_digs.c */
+#include "tma.h"
+#ifdef BN_S_MP_MUL_DIGS_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* multiplies |a| * |b| and only computes upto digs digits of result
+ * HAC pp. 595, Algorithm 14.12 Modified so you can control how
+ * many digits of output are created.
+ */
+int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
+{
+ mp_int t;
+ int res, pa, pb, ix, iy;
+ mp_digit u;
+ mp_word r;
+ mp_digit tmpx, *tmpt, *tmpy;
+
+ /* can we use the fast multiplier? */
+ if (((digs) < MP_WARRAY) &&
+ MIN (a->used, b->used) <
+ (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+ return fast_s_mp_mul_digs (a, b, c, digs);
+ }
+
+ if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
+ return res;
+ }
+ t.used = digs;
+
+ /* compute the digits of the product directly */
+ pa = a->used;
+ for (ix = 0; ix < pa; ix++) {
+ /* set the carry to zero */
+ u = 0;
+
+ /* limit ourselves to making digs digits of output */
+ pb = MIN (b->used, digs - ix);
+
+ /* setup some aliases */
+ /* copy of the digit from a used within the nested loop */
+ tmpx = a->dp[ix];
+
+ /* an alias for the destination shifted ix places */
+ tmpt = t.dp + ix;
+
+ /* an alias for the digits of b */
+ tmpy = b->dp;
+
+ /* compute the columns of the output and propagate the carry */
+ for (iy = 0; iy < pb; iy++) {
+ /* compute the column as a mp_word */
+ r = ((mp_word)*tmpt) +
+ ((mp_word)tmpx) * ((mp_word)*tmpy++) +
+ ((mp_word) u);
+
+ /* the new column is the lower part of the result */
+ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
+
+ /* get the carry word from the result */
+ u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
+ }
+ /* set carry if it is placed below digs */
+ if (ix + iy < digs) {
+ *tmpt = u;
+ }
+ }
+
+ mp_clamp (&t);
+ mp_exch (&t, c);
+
+ mp_clear (&t);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_s_mp_mul_digs.c */
+
+/* Start: bn_s_mp_mul_high_digs.c */
+#include "tma.h"
+#ifdef BN_S_MP_MUL_HIGH_DIGS_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* multiplies |a| * |b| and does not compute the lower digs digits
+ * [meant to get the higher part of the product]
+ */
+int
+s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
+{
+ mp_int t;
+ int res, pa, pb, ix, iy;
+ mp_digit u;
+ mp_word r;
+ mp_digit tmpx, *tmpt, *tmpy;
+
+ /* can we use the fast multiplier? */
+#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
+ if (((a->used + b->used + 1) < MP_WARRAY)
+ && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+ return fast_s_mp_mul_high_digs (a, b, c, digs);
+ }
+#endif
+
+ if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
+ return res;
+ }
+ t.used = a->used + b->used + 1;
+
+ pa = a->used;
+ pb = b->used;
+ for (ix = 0; ix < pa; ix++) {
+ /* clear the carry */
+ u = 0;
+
+ /* left hand side of A[ix] * B[iy] */
+ tmpx = a->dp[ix];
+
+ /* alias to the address of where the digits will be stored */
+ tmpt = &(t.dp[digs]);
+
+ /* alias for where to read the right hand side from */
+ tmpy = b->dp + (digs - ix);
+
+ for (iy = digs - ix; iy < pb; iy++) {
+ /* calculate the double precision result */
+ r = ((mp_word)*tmpt) +
+ ((mp_word)tmpx) * ((mp_word)*tmpy++) +
+ ((mp_word) u);
+
+ /* get the lower part */
+ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
+
+ /* carry the carry */
+ u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
+ }
+ *tmpt = u;
+ }
+ mp_clamp (&t);
+ mp_exch (&t, c);
+ mp_clear (&t);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_s_mp_mul_high_digs.c */
+
+/* Start: bn_s_mp_sqr.c */
+#include "tma.h"
+#ifdef BN_S_MP_SQR_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
+int s_mp_sqr (mp_int * a, mp_int * b)
+{
+ mp_int t;
+ int res, ix, iy, pa;
+ mp_word r;
+ mp_digit u, tmpx, *tmpt;
+
+ pa = a->used;
+ if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
+ return res;
+ }
+
+ /* default used is maximum possible size */
+ t.used = 2*pa + 1;
+
+ for (ix = 0; ix < pa; ix++) {
+ /* first calculate the digit at 2*ix */
+ /* calculate double precision result */
+ r = ((mp_word) t.dp[2*ix]) +
+ ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
+
+ /* store lower part in result */
+ t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
+
+ /* get the carry */
+ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
+
+ /* left hand side of A[ix] * A[iy] */
+ tmpx = a->dp[ix];
+
+ /* alias for where to store the results */
+ tmpt = t.dp + (2*ix + 1);
+
+ for (iy = ix + 1; iy < pa; iy++) {
+ /* first calculate the product */
+ r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
+
+ /* now calculate the double precision result, note we use
+ * addition instead of *2 since it's easier to optimize
+ */
+ r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
+
+ /* store lower part */
+ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
+
+ /* get carry */
+ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
+ }
+ /* propagate upwards */
+ while (u != ((mp_digit) 0)) {
+ r = ((mp_word) *tmpt) + ((mp_word) u);
+ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
+ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
+ }
+ }
+
+ mp_clamp (&t);
+ mp_exch (&t, b);
+ mp_clear (&t);
+ return MP_OKAY;
+}
+#endif
+
+/* End: bn_s_mp_sqr.c */
+
+/* Start: bn_s_mp_sub.c */
+#include "tma.h"
+#ifdef BN_S_MP_SUB_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
+int
+s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
+{
+ int olduse, res, min, max;
+
+ /* find sizes */
+ min = b->used;
+ max = a->used;
+
+ /* init result */
+ if (c->alloc < max) {
+ if ((res = mp_grow (c, max)) != MP_OKAY) {
+ return res;
+ }
+ }
+ olduse = c->used;
+ c->used = max;
+
+ {
+ register mp_digit u, *tmpa, *tmpb, *tmpc;
+ register int i;
+
+ /* alias for digit pointers */
+ tmpa = a->dp;
+ tmpb = b->dp;
+ tmpc = c->dp;
+
+ /* set carry to zero */
+ u = 0;
+ for (i = 0; i < min; i++) {
+ /* T[i] = A[i] - B[i] - U */
+ *tmpc = *tmpa++ - *tmpb++ - u;
+
+ /* U = carry bit of T[i]
+ * Note this saves performing an AND operation since
+ * if a carry does occur it will propagate all the way to the
+ * MSB. As a result a single shift is enough to get the carry
+ */
+ u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
+
+ /* Clear carry from T[i] */
+ *tmpc++ &= MP_MASK;
+ }
+
+ /* now copy higher words if any, e.g. if A has more digits than B */
+ for (; i < max; i++) {
+ /* T[i] = A[i] - U */
+ *tmpc = *tmpa++ - u;
+
+ /* U = carry bit of T[i] */
+ u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
+
+ /* Clear carry from T[i] */
+ *tmpc++ &= MP_MASK;
+ }
+
+ /* clear digits above used (since we may not have grown result above) */
+ for (i = c->used; i < olduse; i++) {
+ *tmpc++ = 0;
+ }
+ }
+
+ mp_clamp (c);
+ return MP_OKAY;
+}
+
+#endif
+
+/* End: bn_s_mp_sub.c */
+
+/* Start: bncore.c */
+#include "tma.h"
+#ifdef BNCORE_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* Known optimal configurations
+
+ CPU /Compiler /MUL CUTOFF/SQR CUTOFF
+-------------------------------------------------------------
+ Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
+ AMD Athlon64 /GCC v3.4.4 / 74/ 124/LTM 0.34
+
+*/
+
+int KARATSUBA_MUL_CUTOFF = 74, /* Min. number of digits before Karatsuba multiplication is used. */
+ KARATSUBA_SQR_CUTOFF = 124, /* Min. number of digits before Karatsuba squaring is used. */
+
+ TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
+ TOOM_SQR_CUTOFF = 400;
+#endif
+
+/* End: bncore.c */
+
+
+/* EOF */
--- /dev/null
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+#ifndef TMA_H
+#define TMA_H
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include <tma_class.h>
+
+/* Assure these -Pekka */
+#undef MP_8BIT
+#undef MP_16BIT
+#undef CRYPT
+
+#undef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#undef MAX
+#define MAX(x,y) ((x)>(y)?(x):(y))
+
+#ifdef __cplusplus
+extern "C" {
+
+/* C++ compilers don't like assigning void * to mp_digit * */
+#define OPT_CAST(x) (x *)
+
+#else
+
+/* C on the other hand doesn't care */
+#define OPT_CAST(x)
+
+#endif
+
+
+/* detect 64-bit mode if possible */
+#if defined(__x86_64__)
+ #if !(defined(MP_64BIT) && defined(MP_16BIT) && defined(MP_8BIT))
+ #define MP_64BIT
+ #endif
+#endif
+
+/* some default configurations.
+ *
+ * A "mp_digit" must be able to hold DIGIT_BIT + 1 bits
+ * A "mp_word" must be able to hold 2*DIGIT_BIT + 1 bits
+ *
+ * At the very least a mp_digit must be able to hold 7 bits
+ * [any size beyond that is ok provided it doesn't overflow the data type]
+ */
+#ifdef MP_8BIT
+ typedef unsigned char mp_digit;
+ typedef unsigned short mp_word;
+#elif defined(MP_16BIT)
+ typedef unsigned short mp_digit;
+ typedef unsigned long mp_word;
+#elif defined(MP_64BIT)
+ /* for GCC only on supported platforms */
+#ifndef CRYPT
+ typedef unsigned long long ulong64;
+ typedef signed long long long64;
+#endif
+
+ typedef unsigned long mp_digit;
+ typedef unsigned long mp_word __attribute__ ((mode(TI)));
+
+ #define DIGIT_BIT 60
+#else
+ /* this is the default case, 28-bit digits */
+
+ /* this is to make porting into LibTomCrypt easier :-) */
+#ifndef CRYPT
+ #if defined(_MSC_VER) || defined(__BORLANDC__)
+ typedef unsigned __int64 ulong64;
+ typedef signed __int64 long64;
+ #else
+ typedef unsigned long long ulong64;
+ typedef signed long long long64;
+ #endif
+#endif
+
+ typedef unsigned long mp_digit;
+ typedef ulong64 mp_word;
+
+#ifdef MP_31BIT
+ /* this is an extension that uses 31-bit digits */
+ #define DIGIT_BIT 31
+#else
+ /* default case is 28-bit digits, defines MP_28BIT as a handy macro to test */
+ #define DIGIT_BIT 28
+ #define MP_28BIT
+#endif
+#endif
+
+/* define heap macros */
+#ifndef CRYPT
+ /* default to libc stuff */
+ #ifndef XMALLOC
+ #define XMALLOC malloc
+ #define XFREE free
+ #define XREALLOC realloc
+ #define XCALLOC calloc
+ #else
+ /* prototypes for our heap functions */
+ extern void *XMALLOC(size_t n);
+ extern void *REALLOC(void *p, size_t n);
+ extern void *XCALLOC(size_t n, size_t s);
+ extern void XFREE(void *p);
+ #endif
+#endif
+
+
+/* otherwise the bits per digit is calculated automatically from the size of a mp_digit */
+#ifndef DIGIT_BIT
+ #define DIGIT_BIT ((int)((CHAR_BIT * sizeof(mp_digit) - 1))) /* bits per digit */
+#endif
+
+#define MP_DIGIT_BIT DIGIT_BIT
+#define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
+#define MP_DIGIT_MAX MP_MASK
+
+/* equalities */
+#define MP_LT -1 /* less than */
+#define MP_EQ 0 /* equal to */
+#define MP_GT 1 /* greater than */
+
+#define MP_ZPOS 0 /* positive integer */
+#define MP_NEG 1 /* negative */
+
+#define MP_OKAY 0 /* ok result */
+#define MP_MEM -2 /* out of mem */
+#define MP_VAL -3 /* invalid input */
+#define MP_RANGE MP_VAL
+
+#define MP_YES 1 /* yes response */
+#define MP_NO 0 /* no response */
+
+/* Primality generation flags */
+#define LTM_PRIME_BBS 0x0001 /* BBS style prime */
+#define LTM_PRIME_SAFE 0x0002 /* Safe prime (p-1)/2 == prime */
+#define LTM_PRIME_2MSB_OFF 0x0004 /* force 2nd MSB to 0 */
+#define LTM_PRIME_2MSB_ON 0x0008 /* force 2nd MSB to 1 */
+
+typedef int mp_err;
+
+/* you'll have to tune these... */
+extern int KARATSUBA_MUL_CUTOFF,
+ KARATSUBA_SQR_CUTOFF,
+ TOOM_MUL_CUTOFF,
+ TOOM_SQR_CUTOFF;
+
+/* define this to use lower memory usage routines (exptmods mostly) */
+/* #define MP_LOW_MEM */
+
+/* default precision */
+#ifndef MP_PREC
+ #ifndef MP_LOW_MEM
+ #define MP_PREC 64 /* default digits of precision */
+ #else
+ #define MP_PREC 8 /* default digits of precision */
+ #endif
+#endif
+
+/* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */
+#define MP_WARRAY (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
+
+/* the infamous mp_int structure */
+typedef struct {
+ int used, alloc, sign;
+ mp_digit *dp;
+} mp_int;
+
+/* callback for mp_prime_random, should fill dst with random bytes and return how many read [upto len] */
+typedef int ltm_prime_callback(unsigned char *dst, int len, void *dat);
+
+
+#define USED(m) ((m)->used)
+#define DIGIT(m,k) ((m)->dp[(k)])
+#define SIGN(m) ((m)->sign)
+
+/* error code to char* string */
+char *mp_error_to_string(int code);
+
+/* ---> init and deinit bignum functions <--- */
+/* init a bignum */
+int mp_init(mp_int *a);
+
+/* free a bignum */
+void mp_clear(mp_int *a);
+
+/* init a null terminated series of arguments */
+int mp_init_multi(mp_int *mp, ...);
+
+/* clear a null terminated series of arguments */
+void mp_clear_multi(mp_int *mp, ...);
+
+/* exchange two ints */
+void mp_exch(mp_int *a, mp_int *b);
+
+/* shrink ram required for a bignum */
+int mp_shrink(mp_int *a);
+
+/* grow an int to a given size */
+int mp_grow(mp_int *a, int size);
+
+/* init to a given number of digits */
+int mp_init_size(mp_int *a, int size);
+
+/* ---> Basic Manipulations <--- */
+#define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
+#define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
+#define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
+
+/* set to zero */
+void mp_zero(mp_int *a);
+
+/* set to a digit */
+void mp_set(mp_int *a, mp_digit b);
+
+/* set a 32-bit const */
+int mp_set_int(mp_int *a, unsigned long b);
+
+/* get a 32-bit value */
+unsigned long mp_get_int(mp_int * a);
+
+/* initialize and set a digit */
+int mp_init_set (mp_int * a, mp_digit b);
+
+/* initialize and set 32-bit value */
+int mp_init_set_int (mp_int * a, unsigned long b);
+
+/* copy, b = a */
+int mp_copy(mp_int *a, mp_int *b);
+
+/* inits and copies, a = b */
+int mp_init_copy(mp_int *a, mp_int *b);
+
+/* trim unused digits */
+void mp_clamp(mp_int *a);
+
+/* ---> digit manipulation <--- */
+
+/* right shift by "b" digits */
+void mp_rshd(mp_int *a, int b);
+
+/* left shift by "b" digits */
+int mp_lshd(mp_int *a, int b);
+
+/* c = a / 2**b */
+int mp_div_2d(mp_int *a, int b, mp_int *c, mp_int *d);
+
+/* b = a/2 */
+int mp_div_2(mp_int *a, mp_int *b);
+
+/* c = a * 2**b */
+int mp_mul_2d(mp_int *a, int b, mp_int *c);
+
+/* b = a*2 */
+int mp_mul_2(mp_int *a, mp_int *b);
+
+/* c = a mod 2**d */
+int mp_mod_2d(mp_int *a, int b, mp_int *c);
+
+/* computes a = 2**b */
+int mp_2expt(mp_int *a, int b);
+
+/* Counts the number of lsbs which are zero before the first zero bit */
+int mp_cnt_lsb(mp_int *a);
+
+/* I Love Earth! */
+
+/* makes a pseudo-random int of a given size */
+int mp_rand(mp_int *a, int digits);
+
+/* ---> binary operations <--- */
+/* c = a XOR b */
+int mp_xor(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = a OR b */
+int mp_or(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = a AND b */
+int mp_and(mp_int *a, mp_int *b, mp_int *c);
+
+/* ---> Basic arithmetic <--- */
+
+/* b = -a */
+int mp_neg(mp_int *a, mp_int *b);
+
+/* b = |a| */
+int mp_abs(mp_int *a, mp_int *b);
+
+/* compare a to b */
+int mp_cmp(mp_int *a, mp_int *b);
+
+/* compare |a| to |b| */
+int mp_cmp_mag(mp_int *a, mp_int *b);
+
+/* c = a + b */
+int mp_add(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = a - b */
+int mp_sub(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = a * b */
+int mp_mul(mp_int *a, mp_int *b, mp_int *c);
+
+/* b = a*a */
+int mp_sqr(mp_int *a, mp_int *b);
+
+/* a/b => cb + d == a */
+int mp_div(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
+
+/* c = a mod b, 0 <= c < b */
+int mp_mod(mp_int *a, mp_int *b, mp_int *c);
+
+/* ---> single digit functions <--- */
+
+/* compare against a single digit */
+int mp_cmp_d(mp_int *a, mp_digit b);
+
+/* c = a + b */
+int mp_add_d(mp_int *a, mp_digit b, mp_int *c);
+
+/* c = a - b */
+int mp_sub_d(mp_int *a, mp_digit b, mp_int *c);
+
+/* c = a * b */
+int mp_mul_d(mp_int *a, mp_digit b, mp_int *c);
+
+/* a/b => cb + d == a */
+int mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d);
+
+/* a/3 => 3c + d == a */
+int mp_div_3(mp_int *a, mp_int *c, mp_digit *d);
+
+/* c = a**b */
+int mp_expt_d(mp_int *a, mp_digit b, mp_int *c);
+
+/* c = a mod b, 0 <= c < b */
+int mp_mod_d(mp_int *a, mp_digit b, mp_digit *c);
+
+/* ---> number theory <--- */
+
+/* d = a + b (mod c) */
+int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
+
+/* d = a - b (mod c) */
+int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
+
+/* d = a * b (mod c) */
+int mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
+
+/* c = a * a (mod b) */
+int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = 1/a (mod b) */
+int mp_invmod(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = (a, b) */
+int mp_gcd(mp_int *a, mp_int *b, mp_int *c);
+
+/* produces value such that U1*a + U2*b = U3 */
+int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3);
+
+/* c = [a, b] or (a*b)/(a, b) */
+int mp_lcm(mp_int *a, mp_int *b, mp_int *c);
+
+/* finds one of the b'th root of a, such that |c|**b <= |a|
+ *
+ * returns error if a < 0 and b is even
+ */
+int mp_n_root(mp_int *a, mp_digit b, mp_int *c);
+
+/* special sqrt algo */
+int mp_sqrt(mp_int *arg, mp_int *ret);
+
+/* is number a square? */
+int mp_is_square(mp_int *arg, int *ret);
+
+/* computes the jacobi c = (a | n) (or Legendre if b is prime) */
+int mp_jacobi(mp_int *a, mp_int *n, int *c);
+
+/* used to setup the Barrett reduction for a given modulus b */
+int mp_reduce_setup(mp_int *a, mp_int *b);
+
+/* Barrett Reduction, computes a (mod b) with a precomputed value c
+ *
+ * Assumes that 0 < a <= b*b, note if 0 > a > -(b*b) then you can merely
+ * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
+ */
+int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
+
+/* setups the montgomery reduction */
+int mp_montgomery_setup(mp_int *a, mp_digit *mp);
+
+/* computes a = B**n mod b without division or multiplication useful for
+ * normalizing numbers in a Montgomery system.
+ */
+int mp_montgomery_calc_normalization(mp_int *a, mp_int *b);
+
+/* computes x/R == x (mod N) via Montgomery Reduction */
+int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
+
+/* returns 1 if a is a valid DR modulus */
+int mp_dr_is_modulus(mp_int *a);
+
+/* sets the value of "d" required for mp_dr_reduce */
+void mp_dr_setup(mp_int *a, mp_digit *d);
+
+/* reduces a modulo b using the Diminished Radix method */
+int mp_dr_reduce(mp_int *a, mp_int *b, mp_digit mp);
+
+/* returns true if a can be reduced with mp_reduce_2k */
+int mp_reduce_is_2k(mp_int *a);
+
+/* determines k value for 2k reduction */
+int mp_reduce_2k_setup(mp_int *a, mp_digit *d);
+
+/* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
+int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d);
+
+/* returns true if a can be reduced with mp_reduce_2k_l */
+int mp_reduce_is_2k_l(mp_int *a);
+
+/* determines k value for 2k reduction */
+int mp_reduce_2k_setup_l(mp_int *a, mp_int *d);
+
+/* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
+int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d);
+
+/* d = a**b (mod c) */
+int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
+
+/* ---> Primes <--- */
+
+/* number of primes */
+#ifdef MP_8BIT
+ #define PRIME_SIZE 31
+#else
+ #define PRIME_SIZE 256
+#endif
+
+/* table of first PRIME_SIZE primes */
+extern const mp_digit ltm_prime_tab[];
+
+/* result=1 if a is divisible by one of the first PRIME_SIZE primes */
+int mp_prime_is_divisible(mp_int *a, int *result);
+
+/* performs one Fermat test of "a" using base "b".
+ * Sets result to 0 if composite or 1 if probable prime
+ */
+int mp_prime_fermat(mp_int *a, mp_int *b, int *result);
+
+/* performs one Miller-Rabin test of "a" using base "b".
+ * Sets result to 0 if composite or 1 if probable prime
+ */
+int mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result);
+
+/* This gives [for a given bit size] the number of trials required
+ * such that Miller-Rabin gives a prob of failure lower than 2^-96
+ */
+int mp_prime_rabin_miller_trials(int size);
+
+/* performs t rounds of Miller-Rabin on "a" using the first
+ * t prime bases. Also performs an initial sieve of trial
+ * division. Determines if "a" is prime with probability
+ * of error no more than (1/4)**t.
+ *
+ * Sets result to 1 if probably prime, 0 otherwise
+ */
+int mp_prime_is_prime(mp_int *a, int t, int *result);
+
+/* finds the next prime after the number "a" using "t" trials
+ * of Miller-Rabin.
+ *
+ * bbs_style = 1 means the prime must be congruent to 3 mod 4
+ */
+int mp_prime_next_prime(mp_int *a, int t, int bbs_style);
+
+/* makes a truly random prime of a given size (bytes),
+ * call with bbs = 1 if you want it to be congruent to 3 mod 4
+ *
+ * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
+ * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
+ * so it can be NULL
+ *
+ * The prime generated will be larger than 2^(8*size).
+ */
+#define mp_prime_random(a, t, size, bbs, cb, dat) mp_prime_random_ex(a, t, ((size) * 8) + 1, (bbs==1)?LTM_PRIME_BBS:0, cb, dat)
+
+/* makes a truly random prime of a given size (bits),
+ *
+ * Flags are as follows:
+ *
+ * LTM_PRIME_BBS - make prime congruent to 3 mod 4
+ * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
+ * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
+ * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
+ *
+ * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
+ * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
+ * so it can be NULL
+ *
+ */
+int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat);
+
+/* ---> radix conversion <--- */
+int mp_count_bits(mp_int *a);
+
+int mp_unsigned_bin_size(mp_int *a);
+int mp_read_unsigned_bin(mp_int *a, unsigned char *b, int c);
+int mp_to_unsigned_bin(mp_int *a, unsigned char *b);
+int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen);
+
+int mp_signed_bin_size(mp_int *a);
+int mp_read_signed_bin(mp_int *a, unsigned char *b, int c);
+int mp_to_signed_bin(mp_int *a, unsigned char *b);
+int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen);
+
+int mp_read_radix(mp_int *a, const char *str, int radix);
+int mp_toradix(mp_int *a, char *str, int radix);
+int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen);
+int mp_radix_size(mp_int *a, int radix, int *size);
+
+int mp_fread(mp_int *a, int radix, FILE *stream);
+int mp_fwrite(mp_int *a, int radix, FILE *stream);
+
+#define mp_read_raw(mp, str, len) mp_read_signed_bin((mp), (str), (len))
+#define mp_raw_size(mp) mp_signed_bin_size(mp)
+#define mp_toraw(mp, str) mp_to_signed_bin((mp), (str))
+#define mp_read_mag(mp, str, len) mp_read_unsigned_bin((mp), (str), (len))
+#define mp_mag_size(mp) mp_unsigned_bin_size(mp)
+#define mp_tomag(mp, str) mp_to_unsigned_bin((mp), (str))
+
+#define mp_tobinary(M, S) mp_toradix((M), (S), 2)
+#define mp_tooctal(M, S) mp_toradix((M), (S), 8)
+#define mp_todecimal(M, S) mp_toradix((M), (S), 10)
+#define mp_tohex(M, S) mp_toradix((M), (S), 16)
+
+/* lowlevel functions, do not call! */
+int s_mp_add(mp_int *a, mp_int *b, mp_int *c);
+int s_mp_sub(mp_int *a, mp_int *b, mp_int *c);
+#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
+int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
+int s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
+int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
+int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
+int fast_s_mp_sqr(mp_int *a, mp_int *b);
+int s_mp_sqr(mp_int *a, mp_int *b);
+int mp_karatsuba_mul(mp_int *a, mp_int *b, mp_int *c);
+int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c);
+int mp_karatsuba_sqr(mp_int *a, mp_int *b);
+int mp_toom_sqr(mp_int *a, mp_int *b);
+int fast_mp_invmod(mp_int *a, mp_int *b, mp_int *c);
+int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c);
+int fast_mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
+int mp_exptmod_fast(mp_int *G, mp_int *X, mp_int *P, mp_int *Y, int mode);
+int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int mode);
+void bn_reverse(unsigned char *s, int len);
+
+extern const char *mp_s_rmap;
+
+#ifdef __cplusplus
+ }
+#endif
+
+#endif /* TMA_H */
/*
- silcske.c
+ silcske.c
Author: Pekka Riikonen <priikone@silcnet.org>
- Copyright (C) 2000 - 2002 Pekka Riikonen
+ Copyright (C) 2000 - 2005 Pekka Riikonen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "groups_internal.h"
/* Static functions */
-static SilcSKEStatus silc_ske_create_rnd(SilcSKE ske, SilcMPInt *n,
- SilcUInt32 len,
+static SilcSKEStatus silc_ske_create_rnd(SilcSKE ske, SilcMPInt *n,
+ SilcUInt32 len,
SilcMPInt *rnd);
-static SilcSKEStatus silc_ske_make_hash(SilcSKE ske,
+static SilcSKEStatus silc_ske_make_hash(SilcSKE ske,
unsigned char *return_hash,
SilcUInt32 *return_hash_len,
int initiator);
}
}
-/* Sets the callback functions for the SKE session.
+/* Sets the callback functions for the SKE session.
The `send_packet' callback is a function that sends the packet to
network. The SKE library will call it at any time packet needs to
- be sent to the remote host.
+ be sent to the remote host.
The `payload_receive' callback is called when the remote host's Key
Exchange Start Payload has been processed. The payload is saved
That is why the application must call the completion callback when the
verification process has been completed. The library then calls the user
callback (`proto_continue'), if it is provided to indicate that the SKE
- protocol may continue.
-
+ protocol may continue.
+
The `proto_continue' callback is called to indicate that it is
safe to continue the execution of the SKE protocol after executing
an asynchronous operation, such as calling the `verify_key' callback
The `check_version' callback is called to verify the remote host's
version. The application may check its own version against the remote
host's version and determine whether supporting the remote host
- is possible.
+ is possible.
The `context' is passed as argument to all of the above callback
functions. */
/* Send the packet. */
if (ske->callbacks->send_packet)
- (*ske->callbacks->send_packet)(ske, payload_buf, SILC_PACKET_KEY_EXCHANGE,
+ (*ske->callbacks->send_packet)(ske, payload_buf, SILC_PACKET_KEY_EXCHANGE,
ske->callbacks->context);
- /* Save the the payload buffer for future use. It is later used to
+ /* Save the the payload buffer for future use. It is later used to
compute the HASH value. */
ske->start_payload_copy = payload_buf;
ske->start_payload = start_payload;
security properties selected by the responder from our payload
sent in the silc_ske_initiator_start function. */
-SilcSKEStatus silc_ske_initiator_phase_1(SilcSKE ske,
+SilcSKEStatus silc_ske_initiator_phase_1(SilcSKE ske,
SilcBuffer start_payload)
{
SilcSKEStatus status = SILC_SKE_STATUS_OK;
/* Check version string */
if (ske->callbacks->check_version) {
- status = ske->callbacks->check_version(ske, payload->version,
+ status = ske->callbacks->check_version(ske, payload->version,
payload->version_len,
ske->callbacks->context);
if (status != SILC_SKE_STATUS_OK) {
silc_ske_payload_start_free(ske->start_payload);
/* Take the selected security properties into use while doing
- the key exchange. This is used only while doing the key
+ the key exchange. This is used only while doing the key
exchange. The same data is returned to upper levels by calling
the callback function. */
ske->prop = prop = silc_calloc(1, sizeof(*prop));
return status;
}
-/* This function creates random number x, such that 1 < x < q and
- computes e = g ^ x mod p and sends the result to the remote end in
+/* This function creates random number x, such that 1 < x < q and
+ computes e = g ^ x mod p and sends the result to the remote end in
Key Exchange Payload. */
SilcSKEStatus silc_ske_initiator_phase_2(SilcSKE ske,
return ske->status;
}
silc_mp_init(x);
- status =
+ status =
silc_ske_create_rnd(ske, &ske->prop->group->group_order,
silc_mp_sizeinbase(&ske->prop->group->group_order, 2),
x);
/* Do the Diffie Hellman computation, e = g ^ x mod p */
silc_mp_init(&payload->x);
- silc_mp_pow_mod(&payload->x, &ske->prop->group->generator, x,
+ silc_mp_pow_mod(&payload->x, &ske->prop->group->generator, x,
&ske->prop->group->group);
/* Get public key */
silc_ske_make_hash(ske, hash, &hash_len, TRUE);
SILC_LOG_DEBUG(("Signing HASH_i value"));
-
+
/* Sign the hash value */
- silc_pkcs_private_key_data_set(ske->prop->pkcs, private_key->prv,
+ silc_pkcs_private_key_data_set(ske->prop->pkcs, private_key->prv,
private_key->prv_len);
if (silc_pkcs_get_key_len(ske->prop->pkcs) / 8 > sizeof(sign) - 1 ||
!silc_pkcs_sign(ske->prop->pkcs, hash, hash_len, sign, &sign_len)) {
/* Send the packet. */
if (ske->callbacks->send_packet)
- (*ske->callbacks->send_packet)(ske, payload_buf,
- SILC_PACKET_KEY_EXCHANGE_1,
+ (*ske->callbacks->send_packet)(ske, payload_buf,
+ SILC_PACKET_KEY_EXCHANGE_1,
ske->callbacks->context);
silc_buffer_free(payload_buf);
if (payload->pk_data) {
/* Decode the public key */
- if (!silc_pkcs_public_key_decode(payload->pk_data, payload->pk_len,
+ if (!silc_pkcs_public_key_decode(payload->pk_data, payload->pk_len,
&public_key)) {
status = SILC_SKE_STATUS_UNSUPPORTED_PUBLIC_KEY;
SILC_LOG_ERROR(("Unsupported/malformed public key received"));
/* Verify signature */
silc_pkcs_public_key_set(ske->prop->pkcs, public_key);
- if (silc_pkcs_verify(ske->prop->pkcs, payload->sign_data,
+ if (silc_pkcs_verify(ske->prop->pkcs, payload->sign_data,
payload->sign_len, hash, hash_len) == FALSE) {
SILC_LOG_ERROR(("Signature verification failed, incorrect signature"));
status = SILC_SKE_STATUS_INCORRECT_SIGNATURE;
}
SILC_LOG_DEBUG(("Signature is Ok"));
-
+
silc_pkcs_public_key_free(public_key);
memset(hash, 'F', hash_len);
}
/* Receives Key Exchange Payload from responder consisting responders
public key, f, and signature. This function verifies the public key,
- computes the secret shared key and verifies the signature.
+ computes the secret shared key and verifies the signature.
The `proto_continue' will be called to indicate that the caller may
continue with the SKE protocol. The caller must not continue
This calls the `verify_key' callback to verify the received public
key or certificate. If the `verify_key' is provided then the remote
- must send public key and it is considered to be an error if remote
+ must send public key and it is considered to be an error if remote
does not send its public key. If caller is performing a re-key with
SKE then the `verify_key' is usually not provided when it is not also
required for the remote to send its public key. */
if (payload->pk_data && ske->callbacks->verify_key) {
SILC_LOG_DEBUG(("Verifying public key"));
-
+
ske->users++;
(*ske->callbacks->verify_key)(ske, payload->pk_data, payload->pk_len,
payload->pk_type, ske->callbacks->context,
silc_ske_initiator_finish_final, NULL);
-
+
/* We will continue to the final state after the public key has
been verified by the caller. */
return SILC_SKE_STATUS_PENDING;
/* Starts Key Exchange protocol for responder. Responder receives
Key Exchange Start Payload from initiator consisting of all the
security properties the initiator supports. This function decodes
- the payload and parses the payload further and selects the right
+ the payload and parses the payload further and selects the right
security properties. */
SilcSKEStatus silc_ske_responder_start(SilcSKE ske, SilcRng rng,
return status;
}
-/* The selected security properties from the initiator payload is now
+/* The selected security properties from the initiator payload is now
encoded into Key Exchange Start Payload and sent to the initiator. */
SilcSKEStatus silc_ske_responder_phase_1(SilcSKE ske)
prop->group = group;
- if (silc_pkcs_alloc(ske->start_payload->pkcs_alg_list,
+ if (silc_pkcs_alloc(ske->start_payload->pkcs_alg_list,
&prop->pkcs) == FALSE) {
status = SILC_SKE_STATUS_UNKNOWN_PKCS;
goto err;
}
- if (silc_cipher_alloc(ske->start_payload->enc_alg_list,
+ if (silc_cipher_alloc(ske->start_payload->enc_alg_list,
&prop->cipher) == FALSE) {
status = SILC_SKE_STATUS_UNKNOWN_CIPHER;
goto err;
}
/* Encode the payload */
- status = silc_ske_payload_start_encode(ske, ske->start_payload,
+ status = silc_ske_payload_start_encode(ske, ske->start_payload,
&payload_buf);
if (status != SILC_SKE_STATUS_OK)
goto err;
/* Send the packet. */
if (ske->callbacks->send_packet)
- (*ske->callbacks->send_packet)(ske, payload_buf, SILC_PACKET_KEY_EXCHANGE,
+ (*ske->callbacks->send_packet)(ske, payload_buf, SILC_PACKET_KEY_EXCHANGE,
ske->callbacks->context);
silc_buffer_free(payload_buf);
/* The public key verification was performed only if the Mutual
Authentication flag is set. */
- if (ske->start_payload &&
+ if (ske->start_payload &&
ske->start_payload->flags & SILC_SKE_SP_FLAG_MUTUAL) {
SilcPublicKey public_key = NULL;
unsigned char hash[32];
SilcUInt32 hash_len;
/* Decode the public key */
- if (!silc_pkcs_public_key_decode(recv_payload->pk_data,
- recv_payload->pk_len,
+ if (!silc_pkcs_public_key_decode(recv_payload->pk_data,
+ recv_payload->pk_len,
&public_key)) {
ske->status = SILC_SKE_STATUS_UNSUPPORTED_PUBLIC_KEY;
SILC_LOG_ERROR(("Unsupported/malformed public key received"));
}
SILC_LOG_DEBUG(("Verifying signature (HASH_i)"));
-
+
/* Verify signature */
silc_pkcs_public_key_set(ske->prop->pkcs, public_key);
- if (silc_pkcs_verify(ske->prop->pkcs, recv_payload->sign_data,
+ if (silc_pkcs_verify(ske->prop->pkcs, recv_payload->sign_data,
recv_payload->sign_len, hash, hash_len) == FALSE) {
SILC_LOG_ERROR(("Signature verification failed, incorrect signature"));
ske->status = SILC_SKE_STATUS_INCORRECT_SIGNATURE;
ske->callbacks->proto_continue(ske, ske->callbacks->context);
return;
}
-
+
SILC_LOG_DEBUG(("Signature is Ok"));
-
+
silc_pkcs_public_key_free(public_key);
memset(hash, 'F', hash_len);
}
/* Create the random number x, 1 < x < q. */
x = silc_calloc(1, sizeof(*x));
silc_mp_init(x);
- status =
+ status =
silc_ske_create_rnd(ske, &ske->prop->group->group_order,
silc_mp_sizeinbase(&ske->prop->group->group_order, 2),
x);
/* Do the Diffie Hellman computation, f = g ^ x mod p */
silc_mp_init(&send_payload->x);
- silc_mp_pow_mod(&send_payload->x, &ske->prop->group->generator, x,
+ silc_mp_pow_mod(&send_payload->x, &ske->prop->group->generator, x,
&ske->prop->group->group);
-
+
/* Call the callback. The caller may now continue with the SKE protocol. */
ske->status = SILC_SKE_STATUS_OK;
if (ske->callbacks->proto_continue)
}
/* This function receives the Key Exchange Payload from the initiator.
- This also performs the mutual authentication if required. Then, this
+ This also performs the mutual authentication if required. Then, this
function first generated a random number x, such that 1 < x < q
and computes f = g ^ x mod p. This then puts the result f to a Key
- Exchange Payload.
+ Exchange Payload.
The `proto_continue' will be called to indicate that the caller may
continue with the SKE protocol. The caller must not continue
/* Verify the received public key and verify the signature if we are
doing mutual authentication. */
- if (ske->start_payload &&
+ if (ske->start_payload &&
ske->start_payload->flags & SILC_SKE_SP_FLAG_MUTUAL) {
SILC_LOG_DEBUG(("We are doing mutual authentication"));
-
+
if (!recv_payload->pk_data && ske->callbacks->verify_key) {
SILC_LOG_ERROR(("Remote end did not send its public key (or "
"certificate), even though we require it"));
SILC_LOG_DEBUG(("Verifying public key"));
ske->users++;
- (*ske->callbacks->verify_key)(ske, recv_payload->pk_data,
+ (*ske->callbacks->verify_key)(ske, recv_payload->pk_data,
recv_payload->pk_len,
- recv_payload->pk_type,
+ recv_payload->pk_type,
ske->callbacks->context,
silc_ske_responder_phase2_final, NULL);
/* Compute the shared secret key */
KEY = silc_calloc(1, sizeof(*KEY));
silc_mp_init(KEY);
- silc_mp_pow_mod(KEY, &ske->ke1_payload->x, ske->x,
+ silc_mp_pow_mod(KEY, &ske->ke1_payload->x, ske->x,
&ske->prop->group->group);
ske->KEY = KEY;
if (public_key && private_key) {
SILC_LOG_DEBUG(("Getting public key"));
-
+
/* Get the public key */
pk = silc_pkcs_public_key_encode(public_key, &pk_len);
if (!pk) {
}
ske->ke2_payload->pk_data = pk;
ske->ke2_payload->pk_len = pk_len;
-
+
SILC_LOG_DEBUG(("Computing HASH value"));
-
+
/* Compute the hash value */
memset(hash, 0, sizeof(hash));
status = silc_ske_make_hash(ske, hash, &hash_len, FALSE);
ske->hash = silc_memdup(hash, hash_len);
ske->hash_len = hash_len;
-
+
SILC_LOG_DEBUG(("Signing HASH value"));
-
+
/* Sign the hash value */
- silc_pkcs_private_key_data_set(ske->prop->pkcs, private_key->prv,
+ silc_pkcs_private_key_data_set(ske->prop->pkcs, private_key->prv,
private_key->prv_len);
if (silc_pkcs_get_key_len(ske->prop->pkcs) / 8 > sizeof(sign) - 1 ||
!silc_pkcs_sign(ske->prop->pkcs, hash, hash_len, sign, &sign_len)) {
/* Send the packet. */
if (ske->callbacks->send_packet)
- (*ske->callbacks->send_packet)(ske, payload_buf,
+ (*ske->callbacks->send_packet)(ske, payload_buf,
SILC_PACKET_KEY_EXCHANGE_2,
ske->callbacks->context);
silc_buffer_set(&packet, data, 4);
if (ske->callbacks->send_packet)
- (*ske->callbacks->send_packet)(ske, &packet, SILC_PACKET_SUCCESS,
+ (*ske->callbacks->send_packet)(ske, &packet, SILC_PACKET_SUCCESS,
ske->callbacks->context);
return SILC_SKE_STATUS_OK;
silc_buffer_set(&packet, data, 4);
if (ske->callbacks->send_packet)
- (*ske->callbacks->send_packet)(ske, &packet, SILC_PACKET_FAILURE,
+ (*ske->callbacks->send_packet)(ske, &packet, SILC_PACKET_FAILURE,
ske->callbacks->context);
return SILC_SKE_STATUS_OK;
as, this function is called by the caller of the protocol and not
by the protocol itself. */
-SilcSKEStatus
+SilcSKEStatus
silc_ske_assemble_security_properties(SilcSKE ske,
SilcSKESecurityPropertyFlag flags,
const char *version,
rp->comp_alg_list = strdup("none");
rp->comp_alg_len = strlen("none");
- rp->len = 1 + 1 + 2 + SILC_SKE_COOKIE_LEN +
+ rp->len = 1 + 1 + 2 + SILC_SKE_COOKIE_LEN +
2 + rp->version_len +
- 2 + rp->ke_grp_len + 2 + rp->pkcs_alg_len +
- 2 + rp->enc_alg_len + 2 + rp->hash_alg_len +
+ 2 + rp->ke_grp_len + 2 + rp->pkcs_alg_len +
+ 2 + rp->enc_alg_len + 2 + rp->hash_alg_len +
2 + rp->hmac_alg_len + 2 + rp->comp_alg_len;
*return_payload = rp;
return SILC_SKE_STATUS_OK;
}
-/* Selects the supported security properties from the remote end's Key
+/* Selects the supported security properties from the remote end's Key
Exchange Start Payload. */
-SilcSKEStatus
+SilcSKEStatus
silc_ske_select_security_properties(SilcSKE ske,
const char *version,
SilcSKEStartPayload *payload,
/* Check version string */
if (ske->callbacks->check_version) {
- status = ske->callbacks->check_version(ske, rp->version,
+ status = ske->callbacks->check_version(ske, rp->version,
rp->version_len,
ske->callbacks->context);
if (status != SILC_SKE_STATUS_OK) {
}
}
- payload->len = 1 + 1 + 2 + SILC_SKE_COOKIE_LEN +
- 2 + payload->version_len +
- 2 + payload->ke_grp_len + 2 + payload->pkcs_alg_len +
- 2 + payload->enc_alg_len + 2 + payload->hash_alg_len +
+ payload->len = 1 + 1 + 2 + SILC_SKE_COOKIE_LEN +
+ 2 + payload->version_len +
+ 2 + payload->ke_grp_len + 2 + payload->pkcs_alg_len +
+ 2 + payload->enc_alg_len + 2 + payload->hash_alg_len +
2 + payload->hmac_alg_len + 2 + payload->comp_alg_len;
return SILC_SKE_STATUS_OK;
/* Creates random number such that 1 < rnd < n and at most length
of len bits. The rnd sent as argument must be initialized. */
-static SilcSKEStatus silc_ske_create_rnd(SilcSKE ske, SilcMPInt *n,
- SilcUInt32 len,
+static SilcSKEStatus silc_ske_create_rnd(SilcSKE ske, SilcMPInt *n,
+ SilcUInt32 len,
SilcMPInt *rnd)
{
SilcSKEStatus status = SILC_SKE_STATUS_OK;
hash value defined in the protocol. If it is FALSE then this is used
to create the HASH value defined by the protocol. */
-static SilcSKEStatus silc_ske_make_hash(SilcSKE ske,
+static SilcSKEStatus silc_ske_make_hash(SilcSKE ske,
unsigned char *return_hash,
SilcUInt32 *return_hash_len,
int initiator)
e = silc_mp_mp2bin(&ske->ke1_payload->x, 0, &e_len);
f = silc_mp_mp2bin(&ske->ke2_payload->x, 0, &f_len);
KEY = silc_mp_mp2bin(ske->KEY, 0, &KEY_len);
-
+
/* Format the buffer used to compute the hash value */
- buf = silc_buffer_alloc_size(ske->start_payload_copy->len +
- ske->ke2_payload->pk_len +
- ske->ke1_payload->pk_len +
+ buf = silc_buffer_alloc_size(ske->start_payload_copy->len +
+ ske->ke2_payload->pk_len +
+ ske->ke1_payload->pk_len +
e_len + f_len + KEY_len);
if (!buf)
return SILC_SKE_STATUS_OUT_OF_MEMORY;
/* Initiator is not required to send its public key */
if (!ske->ke1_payload->pk_data) {
- ret =
+ ret =
silc_buffer_format(buf,
SILC_STR_UI_XNSTRING(ske->start_payload_copy->
data,
ske->start_payload_copy->
len),
- SILC_STR_UI_XNSTRING(ske->ke2_payload->pk_data,
+ SILC_STR_UI_XNSTRING(ske->ke2_payload->pk_data,
ske->ke2_payload->pk_len),
SILC_STR_UI_XNSTRING(e, e_len),
SILC_STR_UI_XNSTRING(f, f_len),
SILC_STR_UI_XNSTRING(KEY, KEY_len),
SILC_STR_END);
} else {
- ret =
+ ret =
silc_buffer_format(buf,
SILC_STR_UI_XNSTRING(ske->start_payload_copy->
data,
ske->start_payload_copy->
len),
- SILC_STR_UI_XNSTRING(ske->ke2_payload->pk_data,
+ SILC_STR_UI_XNSTRING(ske->ke2_payload->pk_data,
ske->ke2_payload->pk_len),
- SILC_STR_UI_XNSTRING(ske->ke1_payload->pk_data,
+ SILC_STR_UI_XNSTRING(ske->ke1_payload->pk_data,
ske->ke1_payload->pk_len),
SILC_STR_UI_XNSTRING(e, e_len),
SILC_STR_UI_XNSTRING(f, f_len),
} else {
e = silc_mp_mp2bin(&ske->ke1_payload->x, 0, &e_len);
- buf = silc_buffer_alloc_size(ske->start_payload_copy->len +
+ buf = silc_buffer_alloc_size(ske->start_payload_copy->len +
ske->ke1_payload->pk_len + e_len);
if (!buf)
return SILC_SKE_STATUS_OUT_OF_MEMORY;
-
+
/* Format the buffer used to compute the hash value */
- ret =
+ ret =
silc_buffer_format(buf,
SILC_STR_UI_XNSTRING(ske->start_payload_copy->data,
ske->start_payload_copy->len),
- SILC_STR_UI_XNSTRING(ske->ke1_payload->pk_data,
+ SILC_STR_UI_XNSTRING(ske->ke1_payload->pk_data,
ske->ke1_payload->pk_len),
SILC_STR_UI_XNSTRING(e, e_len),
SILC_STR_END);
return status;
}
-/* Processes the provided key material `data' as the SILC protocol
+/* Processes the provided key material `data' as the SILC protocol
specification defines. */
-SilcSKEStatus
+SilcSKEStatus
silc_ske_process_key_material_data(unsigned char *data,
SilcUInt32 data_len,
SilcUInt32 req_iv_len,
SilcBuffer dist;
unsigned char k1[32], k2[32], k3[32];
unsigned char *dtmp;
-
+
/* XXX */
if (enc_key_len > (3 * hash_len))
return SILC_SKE_STATUS_ERROR;
-
+
/* Take first round */
memset(k1, 0, sizeof(k1));
silc_hash_make(hash, buf->data, buf->len, k1);
-
+
/* Take second round */
dist = silc_buffer_alloc_size(data_len + hash_len);
if (!dist)
SilcBuffer dist;
unsigned char k1[32], k2[32], k3[32];
unsigned char *dtmp;
-
+
/* XXX */
if (enc_key_len > (3 * hash_len))
return SILC_SKE_STATUS_ERROR;
-
+
/* Take first round */
memset(k1, 0, sizeof(k1));
silc_hash_make(hash, buf->data, buf->len, k1);
-
+
/* Take second round */
dist = silc_buffer_alloc_size(data_len + hash_len);
if (!dist)
SILC_STR_END);
memset(k2, 0, sizeof(k2));
silc_hash_make(hash, dist->data, dist->len, k2);
-
+
/* Take third round */
dist = silc_buffer_realloc(dist, data_len + hash_len + hash_len);
silc_buffer_pull_tail(dist, hash_len);
/* Processes negotiated key material as protocol specifies. This returns
the actual keys to be used in the SILC. */
-SilcSKEStatus silc_ske_process_key_material(SilcSKE ske,
+SilcSKEStatus silc_ske_process_key_material(SilcSKE ske,
SilcUInt32 req_iv_len,
SilcUInt32 req_enc_key_len,
SilcUInt32 req_hmac_key_len,
/* Process the key material */
status = silc_ske_process_key_material_data(buf->data, buf->len,
req_iv_len, req_enc_key_len,
- req_hmac_key_len,
+ req_hmac_key_len,
ske->prop->hash, key);
memset(tmpbuf, 0, klen);
silc_free(key);
}
-const char *silc_ske_status_string[] =
+const char *silc_ske_status_string[] =
{
/* Official */
"Ok",
/* Parses remote host's version string. */
-bool silc_ske_parse_version(SilcSKE ske,
+bool silc_ske_parse_version(SilcSKE ske,
SilcUInt32 *protocol_version,
char **protocol_version_string,
- SilcUInt32 *software_version,
+ SilcUInt32 *software_version,
char **software_version_string,
char **vendor_version)
{
return silc_parse_version_string(ske->remote_version,
- protocol_version,
- protocol_version_string,
+ protocol_version,
+ protocol_version_string,
software_version,
software_version_string,
vendor_version);