diff options
author | Malfurious <m@lfurio.us> | 2024-10-24 06:44:24 -0400 |
---|---|---|
committer | Malfurious <m@lfurio.us> | 2024-10-24 06:44:24 -0400 |
commit | 512aa4c77b3dc0d72db713a9215ff65a98a99ec3 (patch) | |
tree | 6db82e0109dc987b5b021f81d4e8a0926eb75ff7 /cryptopp562/integer.cpp | |
parent | 428471d39fb8c205a9fad899c88c30a2cb7df685 (diff) | |
parent | 10affea371406c0ae4c080e5a19390a8e9bd154b (diff) | |
download | compass-512aa4c77b3dc0d72db713a9215ff65a98a99ec3.tar.gz compass-512aa4c77b3dc0d72db713a9215ff65a98a99ec3.zip |
Merge branch 'mbedtls'
Replace Crypto++ 5.6.2 with Mbed TLS 3.6.0
Newer compilers are starting to show the age of the crypto library we've
been using, as it is sometimes a pain to recompile compass lately. So,
the tracked version of Crypto++ was at least due for an upgrade.
However, I plan to soon begin reimplementing compass in C. So, I'm
taking this opportunity to first just migrate the cryptography library
to a newer C alternative. This branch does so, and integrates its use
into the current C++ version of compass.
* mbedtls:
Remove unnecessary exception handler catch block
Refactor random password generation to use mbedtls entropy source
Refactor SHA256 function to use mbedtls
Refactor AES functions to use mbedtls
Add Mbedtls library
Remove Crypto++ library
Diffstat (limited to 'cryptopp562/integer.cpp')
-rw-r--r-- | cryptopp562/integer.cpp | 4235 |
1 files changed, 0 insertions, 4235 deletions
diff --git a/cryptopp562/integer.cpp b/cryptopp562/integer.cpp deleted file mode 100644 index f07cce8..0000000 --- a/cryptopp562/integer.cpp +++ /dev/null @@ -1,4235 +0,0 @@ -// integer.cpp - written and placed in the public domain by Wei Dai -// contains public domain code contributed by Alister Lee and Leonard Janke - -#include "pch.h" - -#ifndef CRYPTOPP_IMPORTS - -#include "integer.h" -#include "modarith.h" -#include "nbtheory.h" -#include "asn.h" -#include "oids.h" -#include "words.h" -#include "algparam.h" -#include "pubkey.h" // for P1363_KDF2 -#include "sha.h" -#include "cpu.h" - -#include <iostream> - -#if _MSC_VER >= 1400 - #include <intrin.h> -#endif - -#ifdef __DECCXX - #include <c_asm.h> -#endif - -#ifdef CRYPTOPP_MSVC6_NO_PP - #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.") -#endif - -#define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86) - -NAMESPACE_BEGIN(CryptoPP) - -bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt) -{ - if (valueType != typeid(Integer)) - return false; - *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt); - return true; -} - -inline static int Compare(const word *A, const word *B, size_t N) -{ - while (N--) - if (A[N] > B[N]) - return 1; - else if (A[N] < B[N]) - return -1; - - return 0; -} - -inline static int Increment(word *A, size_t N, word B=1) -{ - assert(N); - word t = A[0]; - A[0] = t+B; - if (A[0] >= t) - return 0; - for (unsigned i=1; i<N; i++) - if (++A[i]) - return 0; - return 1; -} - -inline static int Decrement(word *A, size_t N, word B=1) -{ - assert(N); - word t = A[0]; - A[0] = t-B; - if (A[0] <= t) - return 0; - for (unsigned i=1; i<N; i++) - if (A[i]--) - return 0; - return 1; -} - -static void TwosComplement(word *A, size_t N) -{ - Decrement(A, N); - for (unsigned i=0; i<N; i++) - A[i] = ~A[i]; -} - -static word AtomicInverseModPower2(word A) -{ - assert(A%2==1); - - word R=A%8; - - for (unsigned i=3; i<WORD_BITS; i*=2) - R = R*(2-R*A); - - assert(R*A==1); - return R; -} - -// ******************************************************** - -#if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE)) - #define Declare2Words(x) word x##0, x##1; - #define AssignWord(a, b) a##0 = b; a##1 = 0; - #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c); - #define LowWord(a) a##0 - #define HighWord(a) a##1 - #ifdef _MSC_VER - #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1); - #ifndef __INTEL_COMPILER - #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2; - #endif - #elif defined(__DECCXX) - #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b); - #elif defined(__x86_64__) - #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100 - // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works - #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc"); - #else - #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); - #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); - #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc"); - #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc"); - #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc"); - #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc"); - #endif - #endif - #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b) - #ifndef Double3Words - #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2; - #endif - #ifndef Acc2WordsBy2 - #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1; - #endif - #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);} - #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);} - #define GetCarry(u) u##1 - #define GetBorrow(u) u##1 -#else - #define Declare2Words(x) dword x; - #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) - #define MultiplyWords(p, a, b) p = __emulu(a, b); - #else - #define MultiplyWords(p, a, b) p = (dword)a*b; - #endif - #define AssignWord(a, b) a = b; - #define Add2WordsBy1(a, b, c) a = b + c; - #define Acc2WordsBy2(a, b) a += b; - #define LowWord(a) word(a) - #define HighWord(a) word(a>>WORD_BITS) - #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2; - #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u); - #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u); - #define GetCarry(u) HighWord(u) - #define GetBorrow(u) word(u>>(WORD_BITS*2-1)) -#endif -#ifndef MulAcc - #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p)); -#endif -#ifndef Acc2WordsBy1 - #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b) -#endif -#ifndef Acc3WordsBy2 - #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e)); -#endif - -class DWord -{ -public: - DWord() {} - -#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE - explicit DWord(word low) - { - m_whole = low; - } -#else - explicit DWord(word low) - { - m_halfs.low = low; - m_halfs.high = 0; - } -#endif - - DWord(word low, word high) - { - m_halfs.low = low; - m_halfs.high = high; - } - - static DWord Multiply(word a, word b) - { - DWord r; - #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE - r.m_whole = (dword)a * b; - #elif defined(MultiplyWordsLoHi) - MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b); - #endif - return r; - } - - static DWord MultiplyAndAdd(word a, word b, word c) - { - DWord r = Multiply(a, b); - return r += c; - } - - DWord & operator+=(word a) - { - #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE - m_whole = m_whole + a; - #else - m_halfs.low += a; - m_halfs.high += (m_halfs.low < a); - #endif - return *this; - } - - DWord operator+(word a) - { - DWord r; - #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE - r.m_whole = m_whole + a; - #else - r.m_halfs.low = m_halfs.low + a; - r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a); - #endif - return r; - } - - DWord operator-(DWord a) - { - DWord r; - #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE - r.m_whole = m_whole - a.m_whole; - #else - r.m_halfs.low = m_halfs.low - a.m_halfs.low; - r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low); - #endif - return r; - } - - DWord operator-(word a) - { - DWord r; - #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE - r.m_whole = m_whole - a; - #else - r.m_halfs.low = m_halfs.low - a; - r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low); - #endif - return r; - } - - // returns quotient, which must fit in a word - word operator/(word divisor); - - word operator%(word a); - - bool operator!() const - { - #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE - return !m_whole; - #else - return !m_halfs.high && !m_halfs.low; - #endif - } - - word GetLowHalf() const {return m_halfs.low;} - word GetHighHalf() const {return m_halfs.high;} - word GetHighHalfAsBorrow() const {return 0-m_halfs.high;} - -private: - union - { - #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE - dword m_whole; - #endif - struct - { - #ifdef IS_LITTLE_ENDIAN - word low; - word high; - #else - word high; - word low; - #endif - } m_halfs; - }; -}; - -class Word -{ -public: - Word() {} - - Word(word value) - { - m_whole = value; - } - - Word(hword low, hword high) - { - m_whole = low | (word(high) << (WORD_BITS/2)); - } - - static Word Multiply(hword a, hword b) - { - Word r; - r.m_whole = (word)a * b; - return r; - } - - Word operator-(Word a) - { - Word r; - r.m_whole = m_whole - a.m_whole; - return r; - } - - Word operator-(hword a) - { - Word r; - r.m_whole = m_whole - a; - return r; - } - - // returns quotient, which must fit in a word - hword operator/(hword divisor) - { - return hword(m_whole / divisor); - } - - bool operator!() const - { - return !m_whole; - } - - word GetWhole() const {return m_whole;} - hword GetLowHalf() const {return hword(m_whole);} - hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));} - hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));} - -private: - word m_whole; -}; - -// do a 3 word by 2 word divide, returns quotient and leaves remainder in A -template <class S, class D> -S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL) -{ - // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S - assert(A[2] < B1 || (A[2]==B1 && A[1] < B0)); - - // estimate the quotient: do a 2 S by 1 S divide - S Q; - if (S(B1+1) == 0) - Q = A[2]; - else if (B1 > 0) - Q = D(A[1], A[2]) / S(B1+1); - else - Q = D(A[0], A[1]) / B0; - - // now subtract Q*B from A - D p = D::Multiply(B0, Q); - D u = (D) A[0] - p.GetLowHalf(); - A[0] = u.GetLowHalf(); - u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q); - A[1] = u.GetLowHalf(); - A[2] += u.GetHighHalf(); - - // Q <= actual quotient, so fix it - while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) - { - u = (D) A[0] - B0; - A[0] = u.GetLowHalf(); - u = (D) A[1] - B1 - u.GetHighHalfAsBorrow(); - A[1] = u.GetLowHalf(); - A[2] += u.GetHighHalf(); - Q++; - assert(Q); // shouldn't overflow - } - - return Q; -} - -// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 -template <class S, class D> -inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B) -{ - if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) - return D(Ah.GetLowHalf(), Ah.GetHighHalf()); - else - { - S Q[2]; - T[0] = Al.GetLowHalf(); - T[1] = Al.GetHighHalf(); - T[2] = Ah.GetLowHalf(); - T[3] = Ah.GetHighHalf(); - Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf()); - Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf()); - return D(Q[0], Q[1]); - } -} - -// returns quotient, which must fit in a word -inline word DWord::operator/(word a) -{ - #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE - return word(m_whole / a); - #else - hword r[4]; - return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole(); - #endif -} - -inline word DWord::operator%(word a) -{ - #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE - return word(m_whole % a); - #else - if (a < (word(1) << (WORD_BITS/2))) - { - hword h = hword(a); - word r = m_halfs.high % h; - r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h; - return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h); - } - else - { - hword r[4]; - DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a); - return Word(r[0], r[1]).GetWhole(); - } - #endif -} - -// ******************************************************** - -// use some tricks to share assembly code between MSVC and GCC -#if defined(__GNUC__) - #define AddPrologue \ - int result; \ - __asm__ __volatile__ \ - ( \ - ".intel_syntax noprefix;" - #define AddEpilogue \ - ".att_syntax prefix;" \ - : "=a" (result)\ - : "d" (C), "a" (A), "D" (B), "c" (N) \ - : "%esi", "memory", "cc" \ - );\ - return result; - #define MulPrologue \ - __asm__ __volatile__ \ - ( \ - ".intel_syntax noprefix;" \ - AS1( push ebx) \ - AS2( mov ebx, edx) - #define MulEpilogue \ - AS1( pop ebx) \ - ".att_syntax prefix;" \ - : \ - : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \ - : "%esi", "memory", "cc" \ - ); - #define SquPrologue MulPrologue - #define SquEpilogue \ - AS1( pop ebx) \ - ".att_syntax prefix;" \ - : \ - : "d" (s_maskLow16), "c" (C), "a" (A) \ - : "%esi", "%edi", "memory", "cc" \ - ); - #define TopPrologue MulPrologue - #define TopEpilogue \ - AS1( pop ebx) \ - ".att_syntax prefix;" \ - : \ - : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \ - : "memory", "cc" \ - ); -#else - #define AddPrologue \ - __asm push edi \ - __asm push esi \ - __asm mov eax, [esp+12] \ - __asm mov edi, [esp+16] - #define AddEpilogue \ - __asm pop esi \ - __asm pop edi \ - __asm ret 8 -#if _MSC_VER < 1300 - #define SaveEBX __asm push ebx - #define RestoreEBX __asm pop ebx -#else - #define SaveEBX - #define RestoreEBX -#endif - #define SquPrologue \ - AS2( mov eax, A) \ - AS2( mov ecx, C) \ - SaveEBX \ - AS2( lea ebx, s_maskLow16) - #define MulPrologue \ - AS2( mov eax, A) \ - AS2( mov edi, B) \ - AS2( mov ecx, C) \ - SaveEBX \ - AS2( lea ebx, s_maskLow16) - #define TopPrologue \ - AS2( mov eax, A) \ - AS2( mov edi, B) \ - AS2( mov ecx, C) \ - AS2( mov esi, L) \ - SaveEBX \ - AS2( lea ebx, s_maskLow16) - #define SquEpilogue RestoreEBX - #define MulEpilogue RestoreEBX - #define TopEpilogue RestoreEBX -#endif - -#ifdef CRYPTOPP_X64_MASM_AVAILABLE -extern "C" { -int Baseline_Add(size_t N, word *C, const word *A, const word *B); -int Baseline_Sub(size_t N, word *C, const word *A, const word *B); -} -#elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE) -int Baseline_Add(size_t N, word *C, const word *A, const word *B) -{ - word result; - __asm__ __volatile__ - ( - ".intel_syntax;" - AS1( neg %1) - ASJ( jz, 1, f) - AS2( mov %0,[%3+8*%1]) - AS2( add %0,[%4+8*%1]) - AS2( mov [%2+8*%1],%0) - ASL(0) - AS2( mov %0,[%3+8*%1+8]) - AS2( adc %0,[%4+8*%1+8]) - AS2( mov [%2+8*%1+8],%0) - AS2( lea %1,[%1+2]) - ASJ( jrcxz, 1, f) - AS2( mov %0,[%3+8*%1]) - AS2( adc %0,[%4+8*%1]) - AS2( mov [%2+8*%1],%0) - ASJ( jmp, 0, b) - ASL(1) - AS2( mov %0, 0) - AS2( adc %0, %0) - ".att_syntax;" - : "=&r" (result), "+c" (N) - : "r" (C+N), "r" (A+N), "r" (B+N) - : "memory", "cc" - ); - return (int)result; -} - -int Baseline_Sub(size_t N, word *C, const word *A, const word *B) -{ - word result; - __asm__ __volatile__ - ( - ".intel_syntax;" - AS1( neg %1) - ASJ( jz, 1, f) - AS2( mov %0,[%3+8*%1]) - AS2( sub %0,[%4+8*%1]) - AS2( mov [%2+8*%1],%0) - ASL(0) - AS2( mov %0,[%3+8*%1+8]) - AS2( sbb %0,[%4+8*%1+8]) - AS2( mov [%2+8*%1+8],%0) - AS2( lea %1,[%1+2]) - ASJ( jrcxz, 1, f) - AS2( mov %0,[%3+8*%1]) - AS2( sbb %0,[%4+8*%1]) - AS2( mov [%2+8*%1],%0) - ASJ( jmp, 0, b) - ASL(1) - AS2( mov %0, 0) - AS2( adc %0, %0) - ".att_syntax;" - : "=&r" (result), "+c" (N) - : "r" (C+N), "r" (A+N), "r" (B+N) - : "memory", "cc" - ); - return (int)result; -} -#elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86 -CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) -{ - AddPrologue - - // now: eax = A, edi = B, edx = C, ecx = N - AS2( lea eax, [eax+4*ecx]) - AS2( lea edi, [edi+4*ecx]) - AS2( lea edx, [edx+4*ecx]) - - AS1( neg ecx) // ecx is negative index - AS2( test ecx, 2) // this clears carry flag - ASJ( jz, 0, f) - AS2( sub ecx, 2) - ASJ( jmp, 1, f) - - ASL(0) - ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero - AS2( mov esi,[eax+4*ecx]) - AS2( adc esi,[edi+4*ecx]) - AS2( mov [edx+4*ecx],esi) - AS2( mov esi,[eax+4*ecx+4]) - AS2( adc esi,[edi+4*ecx+4]) - AS2( mov [edx+4*ecx+4],esi) - ASL(1) - AS2( mov esi,[eax+4*ecx+8]) - AS2( adc esi,[edi+4*ecx+8]) - AS2( mov [edx+4*ecx+8],esi) - AS2( mov esi,[eax+4*ecx+12]) - AS2( adc esi,[edi+4*ecx+12]) - AS2( mov [edx+4*ecx+12],esi) - - AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2 - ASJ( jmp, 0, b) - - ASL(2) - AS2( mov eax, 0) - AS1( setc al) // store carry into eax (return result register) - - AddEpilogue -} - -CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) -{ - AddPrologue - - // now: eax = A, edi = B, edx = C, ecx = N - AS2( lea eax, [eax+4*ecx]) - AS2( lea edi, [edi+4*ecx]) - AS2( lea edx, [edx+4*ecx]) - - AS1( neg ecx) // ecx is negative index - AS2( test ecx, 2) // this clears carry flag - ASJ( jz, 0, f) - AS2( sub ecx, 2) - ASJ( jmp, 1, f) - - ASL(0) - ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero - AS2( mov esi,[eax+4*ecx]) - AS2( sbb esi,[edi+4*ecx]) - AS2( mov [edx+4*ecx],esi) - AS2( mov esi,[eax+4*ecx+4]) - AS2( sbb esi,[edi+4*ecx+4]) - AS2( mov [edx+4*ecx+4],esi) - ASL(1) - AS2( mov esi,[eax+4*ecx+8]) - AS2( sbb esi,[edi+4*ecx+8]) - AS2( mov [edx+4*ecx+8],esi) - AS2( mov esi,[eax+4*ecx+12]) - AS2( sbb esi,[edi+4*ecx+12]) - AS2( mov [edx+4*ecx+12],esi) - - AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2 - ASJ( jmp, 0, b) - - ASL(2) - AS2( mov eax, 0) - AS1( setc al) // store carry into eax (return result register) - - AddEpilogue -} - -#if CRYPTOPP_INTEGER_SSE2 -CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B) -{ - AddPrologue - - // now: eax = A, edi = B, edx = C, ecx = N - AS2( lea eax, [eax+4*ecx]) - AS2( lea edi, [edi+4*ecx]) - AS2( lea edx, [edx+4*ecx]) - - AS1( neg ecx) // ecx is negative index - AS2( pxor mm2, mm2) - ASJ( jz, 2, f) - AS2( test ecx, 2) // this clears carry flag - ASJ( jz, 0, f) - AS2( sub ecx, 2) - ASJ( jmp, 1, f) - - ASL(0) - AS2( movd mm0, DWORD PTR [eax+4*ecx]) - AS2( movd mm1, DWORD PTR [edi+4*ecx]) - AS2( paddq mm0, mm1) - AS2( paddq mm2, mm0) - AS2( movd DWORD PTR [edx+4*ecx], mm2) - AS2( psrlq mm2, 32) - - AS2( movd mm0, DWORD PTR [eax+4*ecx+4]) - AS2( movd mm1, DWORD PTR [edi+4*ecx+4]) - AS2( paddq mm0, mm1) - AS2( paddq mm2, mm0) - AS2( movd DWORD PTR [edx+4*ecx+4], mm2) - AS2( psrlq mm2, 32) - - ASL(1) - AS2( movd mm0, DWORD PTR [eax+4*ecx+8]) - AS2( movd mm1, DWORD PTR [edi+4*ecx+8]) - AS2( paddq mm0, mm1) - AS2( paddq mm2, mm0) - AS2( movd DWORD PTR [edx+4*ecx+8], mm2) - AS2( psrlq mm2, 32) - - AS2( movd mm0, DWORD PTR [eax+4*ecx+12]) - AS2( movd mm1, DWORD PTR [edi+4*ecx+12]) - AS2( paddq mm0, mm1) - AS2( paddq mm2, mm0) - AS2( movd DWORD PTR [edx+4*ecx+12], mm2) - AS2( psrlq mm2, 32) - - AS2( add ecx, 4) - ASJ( jnz, 0, b) - - ASL(2) - AS2( movd eax, mm2) - AS1( emms) - - AddEpilogue -} -CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B) -{ - AddPrologue - - // now: eax = A, edi = B, edx = C, ecx = N - AS2( lea eax, [eax+4*ecx]) - AS2( lea edi, [edi+4*ecx]) - AS2( lea edx, [edx+4*ecx]) - - AS1( neg ecx) // ecx is negative index - AS2( pxor mm2, mm2) - ASJ( jz, 2, f) - AS2( test ecx, 2) // this clears carry flag - ASJ( jz, 0, f) - AS2( sub ecx, 2) - ASJ( jmp, 1, f) - - ASL(0) - AS2( movd mm0, DWORD PTR [eax+4*ecx]) - AS2( movd mm1, DWORD PTR [edi+4*ecx]) - AS2( psubq mm0, mm1) - AS2( psubq mm0, mm2) - AS2( movd DWORD PTR [edx+4*ecx], mm0) - AS2( psrlq mm0, 63) - - AS2( movd mm2, DWORD PTR [eax+4*ecx+4]) - AS2( movd mm1, DWORD PTR [edi+4*ecx+4]) - AS2( psubq mm2, mm1) - AS2( psubq mm2, mm0) - AS2( movd DWORD PTR [edx+4*ecx+4], mm2) - AS2( psrlq mm2, 63) - - ASL(1) - AS2( movd mm0, DWORD PTR [eax+4*ecx+8]) - AS2( movd mm1, DWORD PTR [edi+4*ecx+8]) - AS2( psubq mm0, mm1) - AS2( psubq mm0, mm2) - AS2( movd DWORD PTR [edx+4*ecx+8], mm0) - AS2( psrlq mm0, 63) - - AS2( movd mm2, DWORD PTR [eax+4*ecx+12]) - AS2( movd mm1, DWORD PTR [edi+4*ecx+12]) - AS2( psubq mm2, mm1) - AS2( psubq mm2, mm0) - AS2( movd DWORD PTR [edx+4*ecx+12], mm2) - AS2( psrlq mm2, 63) - - AS2( add ecx, 4) - ASJ( jnz, 0, b) - - ASL(2) - AS2( movd eax, mm2) - AS1( emms) - - AddEpilogue -} -#endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE -#else -int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) -{ - assert (N%2 == 0); - - Declare2Words(u); - AssignWord(u, 0); - for (size_t i=0; i<N; i+=2) - { - AddWithCarry(u, A[i], B[i]); - C[i] = LowWord(u); - AddWithCarry(u, A[i+1], B[i+1]); - C[i+1] = LowWord(u); - } - return int(GetCarry(u)); -} - -int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) -{ - assert (N%2 == 0); - - Declare2Words(u); - AssignWord(u, 0); - for (size_t i=0; i<N; i+=2) - { - SubtractWithBorrow(u, A[i], B[i]); - C[i] = LowWord(u); - SubtractWithBorrow(u, A[i+1], B[i+1]); - C[i+1] = LowWord(u); - } - return int(GetBorrow(u)); -} -#endif - -static word LinearMultiply(word *C, const word *A, word B, size_t N) -{ - word carry=0; - for(unsigned i=0; i<N; i++) - { - Declare2Words(p); - MultiplyWords(p, A[i], B); - Acc2WordsBy1(p, carry); - C[i] = LowWord(p); - carry = HighWord(p); - } - return carry; -} - -#ifndef CRYPTOPP_DOXYGEN_PROCESSING - -#define Mul_2 \ - Mul_Begin(2) \ - Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ - Mul_End(1, 1) - -#define Mul_4 \ - Mul_Begin(4) \ - Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ - Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ - Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ - Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ - Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \ - Mul_End(5, 3) - -#define Mul_8 \ - Mul_Begin(8) \ - Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ - Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ - Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ - Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ - Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ - Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ - Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ - Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ - Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ - Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ - Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ - Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ - Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \ - Mul_End(13, 7) - -#define Mul_16 \ - Mul_Begin(16) \ - Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ - Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ - Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ - Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ - Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ - Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ - Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ - Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ - Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ - Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ - Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ - Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ - Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ - Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ - Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ - Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ - Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ - Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ - Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ - Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ - Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ - Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ - Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ - Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ - Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ - Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ - Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ - Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ - Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \ - Mul_End(29, 15) - -#define Squ_2 \ - Squ_Begin(2) \ - Squ_End(2) - -#define Squ_4 \ - Squ_Begin(4) \ - Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ - Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ - Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \ - Squ_SaveAcc(4, 2, 3) Squ_NonDiag \ - Squ_End(4) - -#define Squ_8 \ - Squ_Begin(8) \ - Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ - Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ - Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ - Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ - Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ - Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ - Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ - Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ - Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ - Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \ - Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \ - Squ_SaveAcc(12, 6, 7) Squ_NonDiag \ - Squ_End(8) - -#define Squ_16 \ - Squ_Begin(16) \ - Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ - Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ - Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ - Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ - Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ - Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ - Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ - Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ - Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ - Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \ - Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \ - Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \ - Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \ - Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \ - Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \ - Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \ - Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \ - Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \ - Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \ - Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \ - Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \ - Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \ - Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \ - Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \ - Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \ - Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \ - Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \ - Squ_SaveAcc(28, 14, 15) Squ_NonDiag \ - Squ_End(16) - -#define Bot_2 \ - Mul_Begin(2) \ - Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \ - Bot_End(2) - -#define Bot_4 \ - Mul_Begin(4) \ - Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ - Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \ - Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \ - Bot_End(4) - -#define Bot_8 \ - Mul_Begin(8) \ - Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ - Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ - Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ - Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ - Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ - Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ - Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \ - Bot_End(8) - -#define Bot_16 \ - Mul_Begin(16) \ - Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ - Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ - Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ - Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ - Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ - Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ - Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ - Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ - Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ - Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ - Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ - Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ - Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ - Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ - Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \ - Bot_End(16) - -#endif - -#if 0 -#define Mul_Begin(n) \ - Declare2Words(p) \ - Declare2Words(c) \ - Declare2Words(d) \ - MultiplyWords(p, A[0], B[0]) \ - AssignWord(c, LowWord(p)) \ - AssignWord(d, HighWord(p)) - -#define Mul_Acc(i, j) \ - MultiplyWords(p, A[i], B[j]) \ - Acc2WordsBy1(c, LowWord(p)) \ - Acc2WordsBy1(d, HighWord(p)) - -#define Mul_SaveAcc(k, i, j) \ - R[k] = LowWord(c); \ - Add2WordsBy1(c, d, HighWord(c)) \ - MultiplyWords(p, A[i], B[j]) \ - AssignWord(d, HighWord(p)) \ - Acc2WordsBy1(c, LowWord(p)) - -#define Mul_End(n) \ - R[2*n-3] = LowWord(c); \ - Acc2WordsBy1(d, HighWord(c)) \ - MultiplyWords(p, A[n-1], B[n-1])\ - Acc2WordsBy2(d, p) \ - R[2*n-2] = LowWord(d); \ - R[2*n-1] = HighWord(d); - -#define Bot_SaveAcc(k, i, j) \ - R[k] = LowWord(c); \ - word e = LowWord(d) + HighWord(c); \ - e += A[i] * B[j]; - -#define Bot_Acc(i, j) \ - e += A[i] * B[j]; - -#define Bot_End(n) \ - R[n-1] = e; -#else -#define Mul_Begin(n) \ - Declare2Words(p) \ - word c; \ - Declare2Words(d) \ - MultiplyWords(p, A[0], B[0]) \ - c = LowWord(p); \ - AssignWord(d, HighWord(p)) - -#define Mul_Acc(i, j) \ - MulAcc(c, d, A[i], B[j]) - -#define Mul_SaveAcc(k, i, j) \ - R[k] = c; \ - c = LowWord(d); \ - AssignWord(d, HighWord(d)) \ - MulAcc(c, d, A[i], B[j]) - -#define Mul_End(k, i) \ - R[k] = c; \ - MultiplyWords(p, A[i], B[i]) \ - Acc2WordsBy2(p, d) \ - R[k+1] = LowWord(p); \ - R[k+2] = HighWord(p); - -#define Bot_SaveAcc(k, i, j) \ - R[k] = c; \ - c = LowWord(d); \ - c += A[i] * B[j]; - -#define Bot_Acc(i, j) \ - c += A[i] * B[j]; - -#define Bot_End(n) \ - R[n-1] = c; -#endif - -#define Squ_Begin(n) \ - Declare2Words(p) \ - word c; \ - Declare2Words(d) \ - Declare2Words(e) \ - MultiplyWords(p, A[0], A[0]) \ - R[0] = LowWord(p); \ - AssignWord(e, HighWord(p)) \ - MultiplyWords(p, A[0], A[1]) \ - c = LowWord(p); \ - AssignWord(d, HighWord(p)) \ - Squ_NonDiag \ - -#define Squ_NonDiag \ - Double3Words(c, d) - -#define Squ_SaveAcc(k, i, j) \ - Acc3WordsBy2(c, d, e) \ - R[k] = c; \ - MultiplyWords(p, A[i], A[j]) \ - c = LowWord(p); \ - AssignWord(d, HighWord(p)) \ - -#define Squ_Acc(i, j) \ - MulAcc(c, d, A[i], A[j]) - -#define Squ_Diag(i) \ - Squ_NonDiag \ - MulAcc(c, d, A[i], A[i]) - -#define Squ_End(n) \ - Acc3WordsBy2(c, d, e) \ - R[2*n-3] = c; \ - MultiplyWords(p, A[n-1], A[n-1])\ - Acc2WordsBy2(p, e) \ - R[2*n-2] = LowWord(p); \ - R[2*n-1] = HighWord(p); - -void Baseline_Multiply2(word *R, const word *A, const word *B) -{ - Mul_2 -} - -void Baseline_Multiply4(word *R, const word *A, const word *B) -{ - Mul_4 -} - -void Baseline_Multiply8(word *R, const word *A, const word *B) -{ - Mul_8 -} - -void Baseline_Square2(word *R, const word *A) -{ - Squ_2 -} - -void Baseline_Square4(word *R, const word *A) -{ - Squ_4 -} - -void Baseline_Square8(word *R, const word *A) -{ - Squ_8 -} - -void Baseline_MultiplyBottom2(word *R, const word *A, const word *B) -{ - Bot_2 -} - -void Baseline_MultiplyBottom4(word *R, const word *A, const word *B) -{ - Bot_4 -} - -void Baseline_MultiplyBottom8(word *R, const word *A, const word *B) -{ - Bot_8 -} - -#define Top_Begin(n) \ - Declare2Words(p) \ - word c; \ - Declare2Words(d) \ - MultiplyWords(p, A[0], B[n-2]);\ - AssignWord(d, HighWord(p)); - -#define Top_Acc(i, j) \ - MultiplyWords(p, A[i], B[j]);\ - Acc2WordsBy1(d, HighWord(p)); - -#define Top_SaveAcc0(i, j) \ - c = LowWord(d); \ - AssignWord(d, HighWord(d)) \ - MulAcc(c, d, A[i], B[j]) - -#define Top_SaveAcc1(i, j) \ - c = L<c; \ - Acc2WordsBy1(d, c); \ - c = LowWord(d); \ - AssignWord(d, HighWord(d)) \ - MulAcc(c, d, A[i], B[j]) - -void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L) -{ - word T[4]; - Baseline_Multiply2(T, A, B); - R[0] = T[2]; - R[1] = T[3]; -} - -void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L) -{ - Top_Begin(4) - Top_Acc(1, 1) Top_Acc(2, 0) \ - Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ - Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ - Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \ - Mul_End(1, 3) -} - -void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L) -{ - Top_Begin(8) - Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \ - Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ - Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ - Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ - Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ - Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ - Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ - Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \ - Mul_End(5, 7) -} - -#if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available -void Baseline_Multiply16(word *R, const word *A, const word *B) -{ - Mul_16 -} - -void Baseline_Square16(word *R, const word *A) -{ - Squ_16 -} - -void Baseline_MultiplyBottom16(word *R, const word *A, const word *B) -{ - Bot_16 -} - -void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L) -{ - Top_Begin(16) - Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \ - Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ - Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ - Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ - Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ - Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ - Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ - Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ - Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ - Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ - Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ - Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ - Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ - Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ - Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ - Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \ - Mul_End(13, 15) -} -#endif - -// ******************************************************** - -#if CRYPTOPP_INTEGER_SSE2 - -CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff}; - -#undef Mul_Begin -#undef Mul_Acc -#undef Top_Begin -#undef Top_Acc -#undef Squ_Acc -#undef Squ_NonDiag -#undef Squ_Diag -#undef Squ_SaveAcc -#undef Squ_Begin -#undef Mul_SaveAcc -#undef Bot_Acc -#undef Bot_SaveAcc -#undef Bot_End -#undef Squ_End -#undef Mul_End - -#define SSE2_FinalSave(k) \ - AS2( psllq xmm5, 16) \ - AS2( paddq xmm4, xmm5) \ - AS2( movq QWORD PTR [ecx+8*(k)], xmm4) - -#define SSE2_SaveShift(k) \ - AS2( movq xmm0, xmm6) \ - AS2( punpckhqdq xmm6, xmm0) \ - AS2( movq xmm1, xmm7) \ - AS2( punpckhqdq xmm7, xmm1) \ - AS2( paddd xmm6, xmm0) \ - AS2( pslldq xmm6, 4) \ - AS2( paddd xmm7, xmm1) \ - AS2( paddd xmm4, xmm6) \ - AS2( pslldq xmm7, 4) \ - AS2( movq xmm6, xmm4) \ - AS2( paddd xmm5, xmm7) \ - AS2( movq xmm7, xmm5) \ - AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ - AS2( psrlq xmm6, 16) \ - AS2( paddq xmm6, xmm7) \ - AS2( punpckhqdq xmm4, xmm0) \ - AS2( punpckhqdq xmm5, xmm0) \ - AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \ - AS2( psrlq xmm6, 3*16) \ - AS2( paddd xmm4, xmm6) \ - -#define Squ_SSE2_SaveShift(k) \ - AS2( movq xmm0, xmm6) \ - AS2( punpckhqdq xmm6, xmm0) \ - AS2( movq xmm1, xmm7) \ - AS2( punpckhqdq xmm7, xmm1) \ - AS2( paddd xmm6, xmm0) \ - AS2( pslldq xmm6, 4) \ - AS2( paddd xmm7, xmm1) \ - AS2( paddd xmm4, xmm6) \ - AS2( pslldq xmm7, 4) \ - AS2( movhlps xmm6, xmm4) \ - AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ - AS2( paddd xmm5, xmm7) \ - AS2( movhps QWORD PTR [esp+12], xmm5)\ - AS2( psrlq xmm4, 16) \ - AS2( paddq xmm4, xmm5) \ - AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \ - AS2( psrlq xmm4, 3*16) \ - AS2( paddd xmm4, xmm6) \ - AS2( movq QWORD PTR [esp+4], xmm4)\ - -#define SSE2_FirstMultiply(i) \ - AS2( movdqa xmm7, [esi+(i)*16])\ - AS2( movdqa xmm5, [edi-(i)*16])\ - AS2( pmuludq xmm5, xmm7) \ - AS2( movdqa xmm4, [ebx])\ - AS2( movdqa xmm6, xmm4) \ - AS2( pand xmm4, xmm5) \ - AS2( psrld xmm5, 16) \ - AS2( pmuludq xmm7, [edx-(i)*16])\ - AS2( pand xmm6, xmm7) \ - AS2( psrld xmm7, 16) - -#define Squ_Begin(n) \ - SquPrologue \ - AS2( mov esi, esp)\ - AS2( and esp, 0xfffffff0)\ - AS2( lea edi, [esp-32*n])\ - AS2( sub esp, 32*n+16)\ - AS1( push esi)\ - AS2( mov esi, edi) \ - AS2( xor edx, edx) \ - ASL(1) \ - ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ - ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ - AS2( movdqa [edi+2*edx], xmm0) \ - AS2( psrlq xmm0, 32) \ - AS2( movdqa [edi+2*edx+16], xmm0) \ - AS2( movdqa [edi+16*n+2*edx], xmm1) \ - AS2( psrlq xmm1, 32) \ - AS2( movdqa [edi+16*n+2*edx+16], xmm1) \ - AS2( add edx, 16) \ - AS2( cmp edx, 8*(n)) \ - ASJ( jne, 1, b) \ - AS2( lea edx, [edi+16*n])\ - SSE2_FirstMultiply(0) \ - -#define Squ_Acc(i) \ - ASL(LSqu##i) \ - AS2( movdqa xmm1, [esi+(i)*16]) \ - AS2( movdqa xmm0, [edi-(i)*16]) \ - AS2( movdqa xmm2, [ebx]) \ - AS2( pmuludq xmm0, xmm1) \ - AS2( pmuludq xmm1, [edx-(i)*16]) \ - AS2( movdqa xmm3, xmm2) \ - AS2( pand xmm2, xmm0) \ - AS2( psrld xmm0, 16) \ - AS2( paddd xmm4, xmm2) \ - AS2( paddd xmm5, xmm0) \ - AS2( pand xmm3, xmm1) \ - AS2( psrld xmm1, 16) \ - AS2( paddd xmm6, xmm3) \ - AS2( paddd xmm7, xmm1) \ - -#define Squ_Acc1(i) -#define Squ_Acc2(i) ASC(call, LSqu##i) -#define Squ_Acc3(i) Squ_Acc2(i) -#define Squ_Acc4(i) Squ_Acc2(i) -#define Squ_Acc5(i) Squ_Acc2(i) -#define Squ_Acc6(i) Squ_Acc2(i) -#define Squ_Acc7(i) Squ_Acc2(i) -#define Squ_Acc8(i) Squ_Acc2(i) - -#define SSE2_End(E, n) \ - SSE2_SaveShift(2*(n)-3) \ - AS2( movdqa xmm7, [esi+16]) \ - AS2( movdqa xmm0, [edi]) \ - AS2( pmuludq xmm0, xmm7) \ - AS2( movdqa xmm2, [ebx]) \ - AS2( pmuludq xmm7, [edx]) \ - AS2( movdqa xmm6, xmm2) \ - AS2( pand xmm2, xmm0) \ - AS2( psrld xmm0, 16) \ - AS2( paddd xmm4, xmm2) \ - AS2( paddd xmm5, xmm0) \ - AS2( pand xmm6, xmm7) \ - AS2( psrld xmm7, 16) \ - SSE2_SaveShift(2*(n)-2) \ - SSE2_FinalSave(2*(n)-1) \ - AS1( pop esp)\ - E - -#define Squ_End(n) SSE2_End(SquEpilogue, n) -#define Mul_End(n) SSE2_End(MulEpilogue, n) -#define Top_End(n) SSE2_End(TopEpilogue, n) - -#define Squ_Column1(k, i) \ - Squ_SSE2_SaveShift(k) \ - AS2( add esi, 16) \ - SSE2_FirstMultiply(1)\ - Squ_Acc##i(i) \ - AS2( paddd xmm4, xmm4) \ - AS2( paddd xmm5, xmm5) \ - AS2( movdqa xmm3, [esi]) \ - AS2( movq xmm1, QWORD PTR [esi+8]) \ - AS2( pmuludq xmm1, xmm3) \ - AS2( pmuludq xmm3, xmm3) \ - AS2( movdqa xmm0, [ebx])\ - AS2( movdqa xmm2, xmm0) \ - AS2( pand xmm0, xmm1) \ - AS2( psrld xmm1, 16) \ - AS2( paddd xmm6, xmm0) \ - AS2( paddd xmm7, xmm1) \ - AS2( pand xmm2, xmm3) \ - AS2( psrld xmm3, 16) \ - AS2( paddd xmm6, xmm6) \ - AS2( paddd xmm7, xmm7) \ - AS2( paddd xmm4, xmm2) \ - AS2( paddd xmm5, xmm3) \ - AS2( movq xmm0, QWORD PTR [esp+4])\ - AS2( movq xmm1, QWORD PTR [esp+12])\ - AS2( paddd xmm4, xmm0)\ - AS2( paddd xmm5, xmm1)\ - -#define Squ_Column0(k, i) \ - Squ_SSE2_SaveShift(k) \ - AS2( add edi, 16) \ - AS2( add edx, 16) \ - SSE2_FirstMultiply(1)\ - Squ_Acc##i(i) \ - AS2( paddd xmm6, xmm6) \ - AS2( paddd xmm7, xmm7) \ - AS2( paddd xmm4, xmm4) \ - AS2( paddd xmm5, xmm5) \ - AS2( movq xmm0, QWORD PTR [esp+4])\ - AS2( movq xmm1, QWORD PTR [esp+12])\ - AS2( paddd xmm4, xmm0)\ - AS2( paddd xmm5, xmm1)\ - -#define SSE2_MulAdd45 \ - AS2( movdqa xmm7, [esi]) \ - AS2( movdqa xmm0, [edi]) \ - AS2( pmuludq xmm0, xmm7) \ - AS2( movdqa xmm2, [ebx]) \ - AS2( pmuludq xmm7, [edx]) \ - AS2( movdqa xmm6, xmm2) \ - AS2( pand xmm2, xmm0) \ - AS2( psrld xmm0, 16) \ - AS2( paddd xmm4, xmm2) \ - AS2( paddd xmm5, xmm0) \ - AS2( pand xmm6, xmm7) \ - AS2( psrld xmm7, 16) - -#define Mul_Begin(n) \ - MulPrologue \ - AS2( mov esi, esp)\ - AS2( and esp, 0xfffffff0)\ - AS2( sub esp, 48*n+16)\ - AS1( push esi)\ - AS2( xor edx, edx) \ - ASL(1) \ - ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ - ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ - ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \ - AS2( movdqa [esp+20+2*edx], xmm0) \ - AS2( psrlq xmm0, 32) \ - AS2( movdqa [esp+20+2*edx+16], xmm0) \ - AS2( movdqa [esp+20+16*n+2*edx], xmm1) \ - AS2( psrlq xmm1, 32) \ - AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \ - AS2( movdqa [esp+20+32*n+2*edx], xmm2) \ - AS2( psrlq xmm2, 32) \ - AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \ - AS2( add edx, 16) \ - AS2( cmp edx, 8*(n)) \ - ASJ( jne, 1, b) \ - AS2( lea edi, [esp+20])\ - AS2( lea edx, [esp+20+16*n])\ - AS2( lea esi, [esp+20+32*n])\ - SSE2_FirstMultiply(0) \ - -#define Mul_Acc(i) \ - ASL(LMul##i) \ - AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \ - AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \ - AS2( movdqa xmm2, [ebx]) \ - AS2( pmuludq xmm0, xmm1) \ - AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ - AS2( movdqa xmm3, xmm2) \ - AS2( pand xmm2, xmm0) \ - AS2( psrld xmm0, 16) \ - AS2( paddd xmm4, xmm2) \ - AS2( paddd xmm5, xmm0) \ - AS2( pand xmm3, xmm1) \ - AS2( psrld xmm1, 16) \ - AS2( paddd xmm6, xmm3) \ - AS2( paddd xmm7, xmm1) \ - -#define Mul_Acc1(i) -#define Mul_Acc2(i) ASC(call, LMul##i) -#define Mul_Acc3(i) Mul_Acc2(i) -#define Mul_Acc4(i) Mul_Acc2(i) -#define Mul_Acc5(i) Mul_Acc2(i) -#define Mul_Acc6(i) Mul_Acc2(i) -#define Mul_Acc7(i) Mul_Acc2(i) -#define Mul_Acc8(i) Mul_Acc2(i) -#define Mul_Acc9(i) Mul_Acc2(i) -#define Mul_Acc10(i) Mul_Acc2(i) -#define Mul_Acc11(i) Mul_Acc2(i) -#define Mul_Acc12(i) Mul_Acc2(i) -#define Mul_Acc13(i) Mul_Acc2(i) -#define Mul_Acc14(i) Mul_Acc2(i) -#define Mul_Acc15(i) Mul_Acc2(i) -#define Mul_Acc16(i) Mul_Acc2(i) - -#define Mul_Column1(k, i) \ - SSE2_SaveShift(k) \ - AS2( add esi, 16) \ - SSE2_MulAdd45\ - Mul_Acc##i(i) \ - -#define Mul_Column0(k, i) \ - SSE2_SaveShift(k) \ - AS2( add edi, 16) \ - AS2( add edx, 16) \ - SSE2_MulAdd45\ - Mul_Acc##i(i) \ - -#define Bot_Acc(i) \ - AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \ - AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \ - AS2( pmuludq xmm0, xmm1) \ - AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ - AS2( paddq xmm4, xmm0) \ - AS2( paddd xmm6, xmm1) - -#define Bot_SaveAcc(k) \ - SSE2_SaveShift(k) \ - AS2( add edi, 16) \ - AS2( add edx, 16) \ - AS2( movdqa xmm6, [esi]) \ - AS2( movdqa xmm0, [edi]) \ - AS2( pmuludq xmm0, xmm6) \ - AS2( paddq xmm4, xmm0) \ - AS2( psllq xmm5, 16) \ - AS2( paddq xmm4, xmm5) \ - AS2( pmuludq xmm6, [edx]) - -#define Bot_End(n) \ - AS2( movhlps xmm7, xmm6) \ - AS2( paddd xmm6, xmm7) \ - AS2( psllq xmm6, 32) \ - AS2( paddd xmm4, xmm6) \ - AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \ - AS1( pop esp)\ - MulEpilogue - -#define Top_Begin(n) \ - TopPrologue \ - AS2( mov edx, esp)\ - AS2( and esp, 0xfffffff0)\ - AS2( sub esp, 48*n+16)\ - AS1( push edx)\ - AS2( xor edx, edx) \ - ASL(1) \ - ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ - ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ - ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \ - AS2( movdqa [esp+20+2*edx], xmm0) \ - AS2( psrlq xmm0, 32) \ - AS2( movdqa [esp+20+2*edx+16], xmm0) \ - AS2( movdqa [esp+20+16*n+2*edx], xmm1) \ - AS2( psrlq xmm1, 32) \ - AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \ - AS2( movdqa [esp+20+32*n+2*edx], xmm2) \ - AS2( psrlq xmm2, 32) \ - AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \ - AS2( add edx, 16) \ - AS2( cmp edx, 8*(n)) \ - ASJ( jne, 1, b) \ - AS2( mov eax, esi) \ - AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\ - AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\ - AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\ - AS2( pxor xmm4, xmm4)\ - AS2( pxor xmm5, xmm5) - -#define Top_Acc(i) \ - AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \ - AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ - AS2( psrlq xmm0, 48) \ - AS2( paddd xmm5, xmm0)\ - -#define Top_Column0(i) \ - AS2( psllq xmm5, 32) \ - AS2( add edi, 16) \ - AS2( add edx, 16) \ - SSE2_MulAdd45\ - Mul_Acc##i(i) \ - -#define Top_Column1(i) \ - SSE2_SaveShift(0) \ - AS2( add esi, 16) \ - SSE2_MulAdd45\ - Mul_Acc##i(i) \ - AS2( shr eax, 16) \ - AS2( movd xmm0, eax)\ - AS2( movd xmm1, [ecx+4])\ - AS2( psrld xmm1, 16)\ - AS2( pcmpgtd xmm1, xmm0)\ - AS2( psrld xmm1, 31)\ - AS2( paddd xmm4, xmm1)\ - -void SSE2_Square4(word *C, const word *A) -{ - Squ_Begin(2) - Squ_Column0(0, 1) - Squ_End(2) -} - -void SSE2_Square8(word *C, const word *A) -{ - Squ_Begin(4) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Squ_Acc(2) - AS1( ret) ASL(0) -#endif - Squ_Column0(0, 1) - Squ_Column1(1, 1) - Squ_Column0(2, 2) - Squ_Column1(3, 1) - Squ_Column0(4, 1) - Squ_End(4) -} - -void SSE2_Square16(word *C, const word *A) -{ - Squ_Begin(8) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Squ_Acc(4) Squ_Acc(3) Squ_Acc(2) - AS1( ret) ASL(0) -#endif - Squ_Column0(0, 1) - Squ_Column1(1, 1) - Squ_Column0(2, 2) - Squ_Column1(3, 2) - Squ_Column0(4, 3) - Squ_Column1(5, 3) - Squ_Column0(6, 4) - Squ_Column1(7, 3) - Squ_Column0(8, 3) - Squ_Column1(9, 2) - Squ_Column0(10, 2) - Squ_Column1(11, 1) - Squ_Column0(12, 1) - Squ_End(8) -} - -void SSE2_Square32(word *C, const word *A) -{ - Squ_Begin(16) - ASJ( jmp, 0, f) - Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2) - AS1( ret) ASL(0) - Squ_Column0(0, 1) - Squ_Column1(1, 1) - Squ_Column0(2, 2) - Squ_Column1(3, 2) - Squ_Column0(4, 3) - Squ_Column1(5, 3) - Squ_Column0(6, 4) - Squ_Column1(7, 4) - Squ_Column0(8, 5) - Squ_Column1(9, 5) - Squ_Column0(10, 6) - Squ_Column1(11, 6) - Squ_Column0(12, 7) - Squ_Column1(13, 7) - Squ_Column0(14, 8) - Squ_Column1(15, 7) - Squ_Column0(16, 7) - Squ_Column1(17, 6) - Squ_Column0(18, 6) - Squ_Column1(19, 5) - Squ_Column0(20, 5) - Squ_Column1(21, 4) - Squ_Column0(22, 4) - Squ_Column1(23, 3) - Squ_Column0(24, 3) - Squ_Column1(25, 2) - Squ_Column0(26, 2) - Squ_Column1(27, 1) - Squ_Column0(28, 1) - Squ_End(16) -} - -void SSE2_Multiply4(word *C, const word *A, const word *B) -{ - Mul_Begin(2) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Mul_Acc(2) - AS1( ret) ASL(0) -#endif - Mul_Column0(0, 2) - Mul_End(2) -} - -void SSE2_Multiply8(word *C, const word *A, const word *B) -{ - Mul_Begin(4) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) - AS1( ret) ASL(0) -#endif - Mul_Column0(0, 2) - Mul_Column1(1, 3) - Mul_Column0(2, 4) - Mul_Column1(3, 3) - Mul_Column0(4, 2) - Mul_End(4) -} - -void SSE2_Multiply16(word *C, const word *A, const word *B) -{ - Mul_Begin(8) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) - AS1( ret) ASL(0) -#endif - Mul_Column0(0, 2) - Mul_Column1(1, 3) - Mul_Column0(2, 4) - Mul_Column1(3, 5) - Mul_Column0(4, 6) - Mul_Column1(5, 7) - Mul_Column0(6, 8) - Mul_Column1(7, 7) - Mul_Column0(8, 6) - Mul_Column1(9, 5) - Mul_Column0(10, 4) - Mul_Column1(11, 3) - Mul_Column0(12, 2) - Mul_End(8) -} - -void SSE2_Multiply32(word *C, const word *A, const word *B) -{ - Mul_Begin(16) - ASJ( jmp, 0, f) - Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) - AS1( ret) ASL(0) - Mul_Column0(0, 2) - Mul_Column1(1, 3) - Mul_Column0(2, 4) - Mul_Column1(3, 5) - Mul_Column0(4, 6) - Mul_Column1(5, 7) - Mul_Column0(6, 8) - Mul_Column1(7, 9) - Mul_Column0(8, 10) - Mul_Column1(9, 11) - Mul_Column0(10, 12) - Mul_Column1(11, 13) - Mul_Column0(12, 14) - Mul_Column1(13, 15) - Mul_Column0(14, 16) - Mul_Column1(15, 15) - Mul_Column0(16, 14) - Mul_Column1(17, 13) - Mul_Column0(18, 12) - Mul_Column1(19, 11) - Mul_Column0(20, 10) - Mul_Column1(21, 9) - Mul_Column0(22, 8) - Mul_Column1(23, 7) - Mul_Column0(24, 6) - Mul_Column1(25, 5) - Mul_Column0(26, 4) - Mul_Column1(27, 3) - Mul_Column0(28, 2) - Mul_End(16) -} - -void SSE2_MultiplyBottom4(word *C, const word *A, const word *B) -{ - Mul_Begin(2) - Bot_SaveAcc(0) Bot_Acc(2) - Bot_End(2) -} - -void SSE2_MultiplyBottom8(word *C, const word *A, const word *B) -{ - Mul_Begin(4) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Mul_Acc(3) Mul_Acc(2) - AS1( ret) ASL(0) -#endif - Mul_Column0(0, 2) - Mul_Column1(1, 3) - Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) - Bot_End(4) -} - -void SSE2_MultiplyBottom16(word *C, const word *A, const word *B) -{ - Mul_Begin(8) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) - AS1( ret) ASL(0) -#endif - Mul_Column0(0, 2) - Mul_Column1(1, 3) - Mul_Column0(2, 4) - Mul_Column1(3, 5) - Mul_Column0(4, 6) - Mul_Column1(5, 7) - Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) - Bot_End(8) -} - -void SSE2_MultiplyBottom32(word *C, const word *A, const word *B) -{ - Mul_Begin(16) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) - AS1( ret) ASL(0) -#endif - Mul_Column0(0, 2) - Mul_Column1(1, 3) - Mul_Column0(2, 4) - Mul_Column1(3, 5) - Mul_Column0(4, 6) - Mul_Column1(5, 7) - Mul_Column0(6, 8) - Mul_Column1(7, 9) - Mul_Column0(8, 10) - Mul_Column1(9, 11) - Mul_Column0(10, 12) - Mul_Column1(11, 13) - Mul_Column0(12, 14) - Mul_Column1(13, 15) - Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) - Bot_End(16) -} - -void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L) -{ - Top_Begin(4) - Top_Acc(3) Top_Acc(2) Top_Acc(1) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) - AS1( ret) ASL(0) -#endif - Top_Column0(4) - Top_Column1(3) - Mul_Column0(0, 2) - Top_End(2) -} - -void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L) -{ - Top_Begin(8) - Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) - AS1( ret) ASL(0) -#endif - Top_Column0(8) - Top_Column1(7) - Mul_Column0(0, 6) - Mul_Column1(1, 5) - Mul_Column0(2, 4) - Mul_Column1(3, 3) - Mul_Column0(4, 2) - Top_End(4) -} - -void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L) -{ - Top_Begin(16) - Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) -#ifndef __GNUC__ - ASJ( jmp, 0, f) - Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) - AS1( ret) ASL(0) -#endif - Top_Column0(16) - Top_Column1(15) - Mul_Column0(0, 14) - Mul_Column1(1, 13) - Mul_Column0(2, 12) - Mul_Column1(3, 11) - Mul_Column0(4, 10) - Mul_Column1(5, 9) - Mul_Column0(6, 8) - Mul_Column1(7, 7) - Mul_Column0(8, 6) - Mul_Column1(9, 5) - Mul_Column0(10, 4) - Mul_Column1(11, 3) - Mul_Column0(12, 2) - Top_End(8) -} - -#endif // #if CRYPTOPP_INTEGER_SSE2 - -// ******************************************************** - -typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B); -typedef void (* PMul)(word *C, const word *A, const word *B); -typedef void (* PSqu)(word *C, const word *A); -typedef void (* PMulTop)(word *C, const word *A, const word *B, word L); - -#if CRYPTOPP_INTEGER_SSE2 -static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub; -static size_t s_recursionLimit = 8; -#else -static const size_t s_recursionLimit = 16; -#endif - -static PMul s_pMul[9], s_pBot[9]; -static PSqu s_pSqu[9]; -static PMulTop s_pTop[9]; - -static void SetFunctionPointers() -{ - s_pMul[0] = &Baseline_Multiply2; - s_pBot[0] = &Baseline_MultiplyBottom2; - s_pSqu[0] = &Baseline_Square2; - s_pTop[0] = &Baseline_MultiplyTop2; - s_pTop[1] = &Baseline_MultiplyTop4; - -#if CRYPTOPP_INTEGER_SSE2 - if (HasSSE2()) - { -#if _MSC_VER != 1200 || defined(NDEBUG) - if (IsP4()) - { - s_pAdd = &SSE2_Add; - s_pSub = &SSE2_Sub; - } -#endif - - s_recursionLimit = 32; - - s_pMul[1] = &SSE2_Multiply4; - s_pMul[2] = &SSE2_Multiply8; - s_pMul[4] = &SSE2_Multiply16; - s_pMul[8] = &SSE2_Multiply32; - - s_pBot[1] = &SSE2_MultiplyBottom4; - s_pBot[2] = &SSE2_MultiplyBottom8; - s_pBot[4] = &SSE2_MultiplyBottom16; - s_pBot[8] = &SSE2_MultiplyBottom32; - - s_pSqu[1] = &SSE2_Square4; - s_pSqu[2] = &SSE2_Square8; - s_pSqu[4] = &SSE2_Square16; - s_pSqu[8] = &SSE2_Square32; - - s_pTop[2] = &SSE2_MultiplyTop8; - s_pTop[4] = &SSE2_MultiplyTop16; - s_pTop[8] = &SSE2_MultiplyTop32; - } - else -#endif - { - s_pMul[1] = &Baseline_Multiply4; - s_pMul[2] = &Baseline_Multiply8; - - s_pBot[1] = &Baseline_MultiplyBottom4; - s_pBot[2] = &Baseline_MultiplyBottom8; - - s_pSqu[1] = &Baseline_Square4; - s_pSqu[2] = &Baseline_Square8; - - s_pTop[2] = &Baseline_MultiplyTop8; - -#if !CRYPTOPP_INTEGER_SSE2 - s_pMul[4] = &Baseline_Multiply16; - s_pBot[4] = &Baseline_MultiplyBottom16; - s_pSqu[4] = &Baseline_Square16; - s_pTop[4] = &Baseline_MultiplyTop16; -#endif - } -} - -inline int Add(word *C, const word *A, const word *B, size_t N) -{ -#if CRYPTOPP_INTEGER_SSE2 - return s_pAdd(N, C, A, B); -#else - return Baseline_Add(N, C, A, B); -#endif -} - -inline int Subtract(word *C, const word *A, const word *B, size_t N) -{ -#if CRYPTOPP_INTEGER_SSE2 - return s_pSub(N, C, A, B); -#else - return Baseline_Sub(N, C, A, B); -#endif -} - -// ******************************************************** - - -#define A0 A -#define A1 (A+N2) -#define B0 B -#define B1 (B+N2) - -#define T0 T -#define T1 (T+N2) -#define T2 (T+N) -#define T3 (T+N+N2) - -#define R0 R -#define R1 (R+N2) -#define R2 (R+N) -#define R3 (R+N+N2) - -// R[2*N] - result = A*B -// T[2*N] - temporary work space -// A[N] --- multiplier -// B[N] --- multiplicant - -void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N) -{ - assert(N>=2 && N%2==0); - - if (N <= s_recursionLimit) - s_pMul[N/4](R, A, B); - else - { - const size_t N2 = N/2; - - size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; - Subtract(R0, A + AN2, A + (N2 ^ AN2), N2); - - size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; - Subtract(R1, B + BN2, B + (N2 ^ BN2), N2); - - RecursiveMultiply(R2, T2, A1, B1, N2); - RecursiveMultiply(T0, T2, R0, R1, N2); - RecursiveMultiply(R0, T2, A0, B0, N2); - - // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1 - - int c2 = Add(R2, R2, R1, N2); - int c3 = c2; - c2 += Add(R1, R2, R0, N2); - c3 += Add(R2, R2, R3, N2); - - if (AN2 == BN2) - c3 -= Subtract(R1, R1, T0, N); - else - c3 += Add(R1, R1, T0, N); - - c3 += Increment(R2, N2, c2); - assert (c3 >= 0 && c3 <= 2); - Increment(R3, N2, c3); - } -} - -// R[2*N] - result = A*A -// T[2*N] - temporary work space -// A[N] --- number to be squared - -void RecursiveSquare(word *R, word *T, const word *A, size_t N) -{ - assert(N && N%2==0); - - if (N <= s_recursionLimit) - s_pSqu[N/4](R, A); - else - { - const size_t N2 = N/2; - - RecursiveSquare(R0, T2, A0, N2); - RecursiveSquare(R2, T2, A1, N2); - RecursiveMultiply(T0, T2, A0, A1, N2); - - int carry = Add(R1, R1, T0, N); - carry += Add(R1, R1, T0, N); - Increment(R3, N2, carry); - } -} - -// R[N] - bottom half of A*B -// T[3*N/2] - temporary work space -// A[N] - multiplier -// B[N] - multiplicant - -void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N) -{ - assert(N>=2 && N%2==0); - - if (N <= s_recursionLimit) - s_pBot[N/4](R, A, B); - else - { - const size_t N2 = N/2; - - RecursiveMultiply(R, T, A0, B0, N2); - RecursiveMultiplyBottom(T0, T1, A1, B0, N2); - Add(R1, R1, T0, N2); - RecursiveMultiplyBottom(T0, T1, A0, B1, N2); - Add(R1, R1, T0, N2); - } -} - -// R[N] --- upper half of A*B -// T[2*N] - temporary work space -// L[N] --- lower half of A*B -// A[N] --- multiplier -// B[N] --- multiplicant - -void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N) -{ - assert(N>=2 && N%2==0); - - if (N <= s_recursionLimit) - s_pTop[N/4](R, A, B, L[N-1]); - else - { - const size_t N2 = N/2; - - size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; - Subtract(R0, A + AN2, A + (N2 ^ AN2), N2); - - size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; - Subtract(R1, B + BN2, B + (N2 ^ BN2), N2); - - RecursiveMultiply(T0, T2, R0, R1, N2); - RecursiveMultiply(R0, T2, A1, B1, N2); - - // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1 - - int t, c3; - int c2 = Subtract(T2, L+N2, L, N2); - - if (AN2 == BN2) - { - c2 -= Add(T2, T2, T0, N2); - t = (Compare(T2, R0, N2) == -1); - c3 = t - Subtract(T2, T2, T1, N2); - } - else - { - c2 += Subtract(T2, T2, T0, N2); - t = (Compare(T2, R0, N2) == -1); - c3 = t + Add(T2, T2, T1, N2); - } - - c2 += t; - if (c2 >= 0) - c3 += Increment(T2, N2, c2); - else - c3 -= Decrement(T2, N2, -c2); - c3 += Add(R0, T2, R1, N2); - - assert (c3 >= 0 && c3 <= 2); - Increment(R1, N2, c3); - } -} - -inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N) -{ - RecursiveMultiply(R, T, A, B, N); -} - -inline void Square(word *R, word *T, const word *A, size_t N) -{ - RecursiveSquare(R, T, A, N); -} - -inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N) -{ - RecursiveMultiplyBottom(R, T, A, B, N); -} - -// R[NA+NB] - result = A*B -// T[NA+NB] - temporary work space -// A[NA] ---- multiplier -// B[NB] ---- multiplicant - -void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB) -{ - if (NA == NB) - { - if (A == B) - Square(R, T, A, NA); - else - Multiply(R, T, A, B, NA); - - return; - } - - if (NA > NB) - { - std::swap(A, B); - std::swap(NA, NB); - } - - assert(NB % NA == 0); - - if (NA==2 && !A[1]) - { - switch (A[0]) - { - case 0: - SetWords(R, 0, NB+2); - return; - case 1: - CopyWords(R, B, NB); - R[NB] = R[NB+1] = 0; - return; - default: - R[NB] = LinearMultiply(R, B, A[0], NB); - R[NB+1] = 0; - return; - } - } - - size_t i; - if ((NB/NA)%2 == 0) - { - Multiply(R, T, A, B, NA); - CopyWords(T+2*NA, R+NA, NA); - - for (i=2*NA; i<NB; i+=2*NA) - Multiply(T+NA+i, T, A, B+i, NA); - for (i=NA; i<NB; i+=2*NA) - Multiply(R+i, T, A, B+i, NA); - } - else - { - for (i=0; i<NB; i+=2*NA) - Multiply(R+i, T, A, B+i, NA); - for (i=NA; i<NB; i+=2*NA) - Multiply(T+NA+i, T, A, B+i, NA); - } - - if (Add(R+NA, R+NA, T+2*NA, NB-NA)) - Increment(R+NB, NA); -} - -// R[N] ----- result = A inverse mod 2**(WORD_BITS*N) -// T[3*N/2] - temporary work space -// A[N] ----- an odd number as input - -void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N) -{ - if (N==2) - { - T[0] = AtomicInverseModPower2(A[0]); - T[1] = 0; - s_pBot[0](T+2, T, A); - TwosComplement(T+2, 2); - Increment(T+2, 2, 2); - s_pBot[0](R, T, T+2); - } - else - { - const size_t N2 = N/2; - RecursiveInverseModPower2(R0, T0, A0, N2); - T0[0] = 1; - SetWords(T0+1, 0, N2-1); - MultiplyTop(R1, T1, T0, R0, A0, N2); - MultiplyBottom(T0, T1, R0, A1, N2); - Add(T0, R1, T0, N2); - TwosComplement(T0, N2); - MultiplyBottom(R1, T1, R0, T0, N2); - } -} - -// R[N] --- result = X/(2**(WORD_BITS*N)) mod M -// T[3*N] - temporary work space -// X[2*N] - number to be reduced -// M[N] --- modulus -// U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N) - -void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N) -{ -#if 1 - MultiplyBottom(R, T, X, U, N); - MultiplyTop(T, T+N, X, R, M, N); - word borrow = Subtract(T, X+N, T, N); - // defend against timing attack by doing this Add even when not needed - word carry = Add(T+N, T, M, N); - assert(carry | !borrow); - CopyWords(R, T + ((0-borrow) & N), N); -#elif 0 - const word u = 0-U[0]; - Declare2Words(p) - for (size_t i=0; i<N; i++) - { - const word t = u * X[i]; - word c = 0; - for (size_t j=0; j<N; j+=2) - { - MultiplyWords(p, t, M[j]); - Acc2WordsBy1(p, X[i+j]); - Acc2WordsBy1(p, c); - X[i+j] = LowWord(p); - c = HighWord(p); - MultiplyWords(p, t, M[j+1]); - Acc2WordsBy1(p, X[i+j+1]); - Acc2WordsBy1(p, c); - X[i+j+1] = LowWord(p); - c = HighWord(p); - } - - if (Increment(X+N+i, N-i, c)) - while (!Subtract(X+N, X+N, M, N)) {} - } - - memcpy(R, X+N, N*WORD_SIZE); -#else - __m64 u = _mm_cvtsi32_si64(0-U[0]), p; - for (size_t i=0; i<N; i++) - { - __m64 t = _mm_cvtsi32_si64(X[i]); - t = _mm_mul_su32(t, u); - __m64 c = _mm_setzero_si64(); - for (size_t j=0; j<N; j+=2) - { - p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j])); - p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j])); - c = _mm_add_si64(c, p); - X[i+j] = _mm_cvtsi64_si32(c); - c = _mm_srli_si64(c, 32); - p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1])); - p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1])); - c = _mm_add_si64(c, p); - X[i+j+1] = _mm_cvtsi64_si32(c); - c = _mm_srli_si64(c, 32); - } - - if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c))) - while (!Subtract(X+N, X+N, M, N)) {} - } - - memcpy(R, X+N, N*WORD_SIZE); - _mm_empty(); -#endif -} - -// R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M -// T[2*N] - temporary work space -// X[2*N] - number to be reduced -// M[N] --- modulus -// U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2) -// V[N] --- 2**(WORD_BITS*3*N/2) mod M - -void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N) -{ - assert(N%2==0 && N>=4); - -#define M0 M -#define M1 (M+N2) -#define V0 V -#define V1 (V+N2) - -#define X0 X -#define X1 (X+N2) -#define X2 (X+N) -#define X3 (X+N+N2) - - const size_t N2 = N/2; - Multiply(T0, T2, V0, X3, N2); - int c2 = Add(T0, T0, X0, N); - MultiplyBottom(T3, T2, T0, U, N2); - MultiplyTop(T2, R, T0, T3, M0, N2); - c2 -= Subtract(T2, T1, T2, N2); - Multiply(T0, R, T3, M1, N2); - c2 -= Subtract(T0, T2, T0, N2); - int c3 = -(int)Subtract(T1, X2, T1, N2); - Multiply(R0, T2, V1, X3, N2); - c3 += Add(R, R, T, N); - - if (c2>0) - c3 += Increment(R1, N2); - else if (c2<0) - c3 -= Decrement(R1, N2, -c2); - - assert(c3>=-1 && c3<=1); - if (c3>0) - Subtract(R, R, M, N); - else if (c3<0) - Add(R, R, M, N); - -#undef M0 -#undef M1 -#undef V0 -#undef V1 - -#undef X0 -#undef X1 -#undef X2 -#undef X3 -} - -#undef A0 -#undef A1 -#undef B0 -#undef B1 - -#undef T0 -#undef T1 -#undef T2 -#undef T3 - -#undef R0 -#undef R1 -#undef R2 -#undef R3 - -/* -// do a 3 word by 2 word divide, returns quotient and leaves remainder in A -static word SubatomicDivide(word *A, word B0, word B1) -{ - // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word - assert(A[2] < B1 || (A[2]==B1 && A[1] < B0)); - - // estimate the quotient: do a 2 word by 1 word divide - word Q; - if (B1+1 == 0) - Q = A[2]; - else - Q = DWord(A[1], A[2]).DividedBy(B1+1); - - // now subtract Q*B from A - DWord p = DWord::Multiply(B0, Q); - DWord u = (DWord) A[0] - p.GetLowHalf(); - A[0] = u.GetLowHalf(); - u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q); - A[1] = u.GetLowHalf(); - A[2] += u.GetHighHalf(); - - // Q <= actual quotient, so fix it - while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) - { - u = (DWord) A[0] - B0; - A[0] = u.GetLowHalf(); - u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow(); - A[1] = u.GetLowHalf(); - A[2] += u.GetHighHalf(); - Q++; - assert(Q); // shouldn't overflow - } - - return Q; -} - -// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 -static inline void AtomicDivide(word *Q, const word *A, const word *B) -{ - if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) - { - Q[0] = A[2]; - Q[1] = A[3]; - } - else - { - word T[4]; - T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3]; - Q[1] = SubatomicDivide(T+1, B[0], B[1]); - Q[0] = SubatomicDivide(T, B[0], B[1]); - -#ifndef NDEBUG - // multiply quotient and divisor and add remainder, make sure it equals dividend - assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); - word P[4]; - LowLevel::Multiply2(P, Q, B); - Add(P, P, T, 4); - assert(memcmp(P, A, 4*WORD_SIZE)==0); -#endif - } -} -*/ - -static inline void AtomicDivide(word *Q, const word *A, const word *B) -{ - word T[4]; - DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1])); - Q[0] = q.GetLowHalf(); - Q[1] = q.GetHighHalf(); - -#ifndef NDEBUG - if (B[0] || B[1]) - { - // multiply quotient and divisor and add remainder, make sure it equals dividend - assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); - word P[4]; - s_pMul[0](P, Q, B); - Add(P, P, T, 4); - assert(memcmp(P, A, 4*WORD_SIZE)==0); - } -#endif -} - -// for use by Divide(), corrects the underestimated quotient {Q1,Q0} -static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N) -{ - assert(N && N%2==0); - - AsymmetricMultiply(T, T+N+2, Q, 2, B, N); - - word borrow = Subtract(R, R, T, N+2); - assert(!borrow && !R[N+1]); - - while (R[N] || Compare(R, B, N) >= 0) - { - R[N] -= Subtract(R, R, B, N); - Q[1] += (++Q[0]==0); - assert(Q[0] || Q[1]); // no overflow - } -} - -// R[NB] -------- remainder = A%B -// Q[NA-NB+2] --- quotient = A/B -// T[NA+3*(NB+2)] - temp work space -// A[NA] -------- dividend -// B[NB] -------- divisor - -void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB) -{ - assert(NA && NB && NA%2==0 && NB%2==0); - assert(B[NB-1] || B[NB-2]); - assert(NB <= NA); - - // set up temporary work space - word *const TA=T; - word *const TB=T+NA+2; - word *const TP=T+NA+2+NB; - - // copy B into TB and normalize it so that TB has highest bit set to 1 - unsigned shiftWords = (B[NB-1]==0); - TB[0] = TB[NB-1] = 0; - CopyWords(TB+shiftWords, B, NB-shiftWords); - unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]); - assert(shiftBits < WORD_BITS); - ShiftWordsLeftByBits(TB, NB, shiftBits); - - // copy A into TA and normalize it - TA[0] = TA[NA] = TA[NA+1] = 0; - CopyWords(TA+shiftWords, A, NA); - ShiftWordsLeftByBits(TA, NA+2, shiftBits); - - if (TA[NA+1]==0 && TA[NA] <= 1) - { - Q[NA-NB+1] = Q[NA-NB] = 0; - while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0) - { - TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB); - ++Q[NA-NB]; - } - } - else - { - NA+=2; - assert(Compare(TA+NA-NB, TB, NB) < 0); - } - - word BT[2]; - BT[0] = TB[NB-2] + 1; - BT[1] = TB[NB-1] + (BT[0]==0); - - // start reducing TA mod TB, 2 words at a time - for (size_t i=NA-2; i>=NB; i-=2) - { - AtomicDivide(Q+i-NB, TA+i-2, BT); - CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB); - } - - // copy TA into R, and denormalize it - CopyWords(R, TA+shiftWords, NB); - ShiftWordsRightByBits(R, NB, shiftBits); -} - -static inline size_t EvenWordCount(const word *X, size_t N) -{ - while (N && X[N-2]==0 && X[N-1]==0) - N-=2; - return N; -} - -// return k -// R[N] --- result = A^(-1) * 2^k mod M -// T[4*N] - temporary work space -// A[NA] -- number to take inverse of -// M[N] --- modulus - -unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N) -{ - assert(NA<=N && N && N%2==0); - - word *b = T; - word *c = T+N; - word *f = T+2*N; - word *g = T+3*N; - size_t bcLen=2, fgLen=EvenWordCount(M, N); - unsigned int k=0; - bool s=false; - - SetWords(T, 0, 3*N); - b[0]=1; - CopyWords(f, A, NA); - CopyWords(g, M, N); - - while (1) - { - word t=f[0]; - while (!t) - { - if (EvenWordCount(f, fgLen)==0) - { - SetWords(R, 0, N); - return 0; - } - - ShiftWordsRightByWords(f, fgLen, 1); - bcLen += 2 * (c[bcLen-1] != 0); - assert(bcLen <= N); - ShiftWordsLeftByWords(c, bcLen, 1); - k+=WORD_BITS; - t=f[0]; - } - - unsigned int i = TrailingZeros(t); - t >>= i; - k += i; - - if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0) - { - if (s) - Subtract(R, M, b, N); - else - CopyWords(R, b, N); - return k; - } - - ShiftWordsRightByBits(f, fgLen, i); - t = ShiftWordsLeftByBits(c, bcLen, i); - c[bcLen] += t; - bcLen += 2 * (t!=0); - assert(bcLen <= N); - - bool swap = Compare(f, g, fgLen)==-1; - ConditionalSwapPointers(swap, f, g); - ConditionalSwapPointers(swap, b, c); - s ^= swap; - - fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]); - - Subtract(f, f, g, fgLen); - t = Add(b, b, c, bcLen); - b[bcLen] += t; - bcLen += 2*t; - assert(bcLen <= N); - } -} - -// R[N] - result = A/(2^k) mod M -// A[N] - input -// M[N] - modulus - -void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N) -{ - CopyWords(R, A, N); - - while (k--) - { - if (R[0]%2==0) - ShiftWordsRightByBits(R, N, 1); - else - { - word carry = Add(R, R, M, N); - ShiftWordsRightByBits(R, N, 1); - R[N-1] += carry<<(WORD_BITS-1); - } - } -} - -// R[N] - result = A*(2^k) mod M -// A[N] - input -// M[N] - modulus - -void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N) -{ - CopyWords(R, A, N); - - while (k--) - if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0) - Subtract(R, R, M, N); -} - -// ****************************************************************** - -InitializeInteger::InitializeInteger() -{ - if (!g_pAssignIntToInteger) - { - SetFunctionPointers(); - g_pAssignIntToInteger = AssignIntToInteger; - } -} - -static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8}; - -static inline size_t RoundupSize(size_t n) -{ - if (n<=8) - return RoundupSizeTable[n]; - else if (n<=16) - return 16; - else if (n<=32) - return 32; - else if (n<=64) - return 64; - else return size_t(1) << BitPrecision(n-1); -} - -Integer::Integer() - : reg(2), sign(POSITIVE) -{ - reg[0] = reg[1] = 0; -} - -Integer::Integer(const Integer& t) - : reg(RoundupSize(t.WordCount())), sign(t.sign) -{ - CopyWords(reg, t.reg, reg.size()); -} - -Integer::Integer(Sign s, lword value) - : reg(2), sign(s) -{ - reg[0] = word(value); - reg[1] = word(SafeRightShift<WORD_BITS>(value)); -} - -Integer::Integer(signed long value) - : reg(2) -{ - if (value >= 0) - sign = POSITIVE; - else - { - sign = NEGATIVE; - value = -value; - } - reg[0] = word(value); - reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value)); -} - -Integer::Integer(Sign s, word high, word low) - : reg(2), sign(s) -{ - reg[0] = low; - reg[1] = high; -} - -bool Integer::IsConvertableToLong() const -{ - if (ByteCount() > sizeof(long)) - return false; - - unsigned long value = (unsigned long)reg[0]; - value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]); - - if (sign==POSITIVE) - return (signed long)value >= 0; - else - return -(signed long)value < 0; -} - -signed long Integer::ConvertToLong() const -{ - assert(IsConvertableToLong()); - - unsigned long value = (unsigned long)reg[0]; - value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]); - return sign==POSITIVE ? value : -(signed long)value; -} - -Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s) -{ - Decode(encodedInteger, byteCount, s); -} - -Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s) -{ - Decode(encodedInteger, byteCount, s); -} - -Integer::Integer(BufferedTransformation &bt) -{ - BERDecode(bt); -} - -Integer::Integer(RandomNumberGenerator &rng, size_t bitcount) -{ - Randomize(rng, bitcount); -} - -Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) -{ - if (!Randomize(rng, min, max, rnType, equiv, mod)) - throw Integer::RandomNumberNotFound(); -} - -Integer Integer::Power2(size_t e) -{ - Integer r((word)0, BitsToWords(e+1)); - r.SetBit(e); - return r; -} - -template <long i> -struct NewInteger -{ - Integer * operator()() const - { - return new Integer(i); - } -}; - -const Integer &Integer::Zero() -{ - return Singleton<Integer>().Ref(); -} - -const Integer &Integer::One() -{ - return Singleton<Integer, NewInteger<1> >().Ref(); -} - -const Integer &Integer::Two() -{ - return Singleton<Integer, NewInteger<2> >().Ref(); -} - -bool Integer::operator!() const -{ - return IsNegative() ? false : (reg[0]==0 && WordCount()==0); -} - -Integer& Integer::operator=(const Integer& t) -{ - if (this != &t) - { - if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0) - reg.New(RoundupSize(t.WordCount())); - CopyWords(reg, t.reg, reg.size()); - sign = t.sign; - } - return *this; -} - -bool Integer::GetBit(size_t n) const -{ - if (n/WORD_BITS >= reg.size()) - return 0; - else - return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1); -} - -void Integer::SetBit(size_t n, bool value) -{ - if (value) - { - reg.CleanGrow(RoundupSize(BitsToWords(n+1))); - reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS)); - } - else - { - if (n/WORD_BITS < reg.size()) - reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS)); - } -} - -byte Integer::GetByte(size_t n) const -{ - if (n/WORD_SIZE >= reg.size()) - return 0; - else - return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8)); -} - -void Integer::SetByte(size_t n, byte value) -{ - reg.CleanGrow(RoundupSize(BytesToWords(n+1))); - reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE)); - reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE)); -} - -lword Integer::GetBits(size_t i, size_t n) const -{ - lword v = 0; - assert(n <= sizeof(v)*8); - for (unsigned int j=0; j<n; j++) - v |= lword(GetBit(i+j)) << j; - return v; -} - -Integer Integer::operator-() const -{ - Integer result(*this); - result.Negate(); - return result; -} - -Integer Integer::AbsoluteValue() const -{ - Integer result(*this); - result.sign = POSITIVE; - return result; -} - -void Integer::swap(Integer &a) -{ - reg.swap(a.reg); - std::swap(sign, a.sign); -} - -Integer::Integer(word value, size_t length) - : reg(RoundupSize(length)), sign(POSITIVE) -{ - reg[0] = value; - SetWords(reg+1, 0, reg.size()-1); -} - -template <class T> -static Integer StringToInteger(const T *str) -{ - int radix; - // GCC workaround - // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3 - unsigned int length; - for (length = 0; str[length] != 0; length++) {} - - Integer v; - - if (length == 0) - return v; - - switch (str[length-1]) - { - case 'h': - case 'H': - radix=16; - break; - case 'o': - case 'O': - radix=8; - break; - case 'b': - case 'B': - radix=2; - break; - default: - radix=10; - } - - if (length > 2 && str[0] == '0' && str[1] == 'x') - radix = 16; - - for (unsigned i=0; i<length; i++) - { - int digit; - - if (str[i] >= '0' && str[i] <= '9') - digit = str[i] - '0'; - else if (str[i] >= 'A' && str[i] <= 'F') - digit = str[i] - 'A' + 10; - else if (str[i] >= 'a' && str[i] <= 'f') - digit = str[i] - 'a' + 10; - else - digit = radix; - - if (digit < radix) - { - v *= radix; - v += digit; - } - } - - if (str[0] == '-') - v.Negate(); - - return v; -} - -Integer::Integer(const char *str) - : reg(2), sign(POSITIVE) -{ - *this = StringToInteger(str); -} - -Integer::Integer(const wchar_t *str) - : reg(2), sign(POSITIVE) -{ - *this = StringToInteger(str); -} - -unsigned int Integer::WordCount() const -{ - return (unsigned int)CountWords(reg, reg.size()); -} - -unsigned int Integer::ByteCount() const -{ - unsigned wordCount = WordCount(); - if (wordCount) - return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]); - else - return 0; -} - -unsigned int Integer::BitCount() const -{ - unsigned wordCount = WordCount(); - if (wordCount) - return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]); - else - return 0; -} - -void Integer::Decode(const byte *input, size_t inputLen, Signedness s) -{ - StringStore store(input, inputLen); - Decode(store, inputLen, s); -} - -void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s) -{ - assert(bt.MaxRetrievable() >= inputLen); - - byte b; - bt.Peek(b); - sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE; - - while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff)) - { - bt.Skip(1); - inputLen--; - bt.Peek(b); - } - - reg.CleanNew(RoundupSize(BytesToWords(inputLen))); - - for (size_t i=inputLen; i > 0; i--) - { - bt.Get(b); - reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8; - } - - if (sign == NEGATIVE) - { - for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++) - reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8; - TwosComplement(reg, reg.size()); - } -} - -size_t Integer::MinEncodedSize(Signedness signedness) const -{ - unsigned int outputLen = STDMAX(1U, ByteCount()); - if (signedness == UNSIGNED) - return outputLen; - if (NotNegative() && (GetByte(outputLen-1) & 0x80)) - outputLen++; - if (IsNegative() && *this < -Power2(outputLen*8-1)) - outputLen++; - return outputLen; -} - -void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const -{ - ArraySink sink(output, outputLen); - Encode(sink, outputLen, signedness); -} - -void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const -{ - if (signedness == UNSIGNED || NotNegative()) - { - for (size_t i=outputLen; i > 0; i--) - bt.Put(GetByte(i-1)); - } - else - { - // take two's complement of *this - Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this; - temp.Encode(bt, outputLen, UNSIGNED); - } -} - -void Integer::DEREncode(BufferedTransformation &bt) const -{ - DERGeneralEncoder enc(bt, INTEGER); - Encode(enc, MinEncodedSize(SIGNED), SIGNED); - enc.MessageEnd(); -} - -void Integer::BERDecode(const byte *input, size_t len) -{ - StringStore store(input, len); - BERDecode(store); -} - -void Integer::BERDecode(BufferedTransformation &bt) -{ - BERGeneralDecoder dec(bt, INTEGER); - if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength()) - BERDecodeError(); - Decode(dec, (size_t)dec.RemainingLength(), SIGNED); - dec.MessageEnd(); -} - -void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const -{ - DERGeneralEncoder enc(bt, OCTET_STRING); - Encode(enc, length); - enc.MessageEnd(); -} - -void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length) -{ - BERGeneralDecoder dec(bt, OCTET_STRING); - if (!dec.IsDefiniteLength() || dec.RemainingLength() != length) - BERDecodeError(); - Decode(dec, length); - dec.MessageEnd(); -} - -size_t Integer::OpenPGPEncode(byte *output, size_t len) const -{ - ArraySink sink(output, len); - return OpenPGPEncode(sink); -} - -size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const -{ - word16 bitCount = BitCount(); - bt.PutWord16(bitCount); - size_t byteCount = BitsToBytes(bitCount); - Encode(bt, byteCount); - return 2 + byteCount; -} - -void Integer::OpenPGPDecode(const byte *input, size_t len) -{ - StringStore store(input, len); - OpenPGPDecode(store); -} - -void Integer::OpenPGPDecode(BufferedTransformation &bt) -{ - word16 bitCount; - if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount)) - throw OpenPGPDecodeErr(); - Decode(bt, BitsToBytes(bitCount)); -} - -void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits) -{ - const size_t nbytes = nbits/8 + 1; - SecByteBlock buf(nbytes); - rng.GenerateBlock(buf, nbytes); - if (nbytes) - buf[0] = (byte)Crop(buf[0], nbits % 8); - Decode(buf, nbytes, UNSIGNED); -} - -void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max) -{ - if (min > max) - throw InvalidArgument("Integer: Min must be no greater than Max"); - - Integer range = max - min; - const unsigned int nbits = range.BitCount(); - - do - { - Randomize(rng, nbits); - } - while (*this > range); - - *this += min; -} - -bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) -{ - return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod)); -} - -class KDF2_RNG : public RandomNumberGenerator -{ -public: - KDF2_RNG(const byte *seed, size_t seedSize) - : m_counter(0), m_counterAndSeed(seedSize + 4) - { - memcpy(m_counterAndSeed + 4, seed, seedSize); - } - - void GenerateBlock(byte *output, size_t size) - { - PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter); - ++m_counter; - P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0); - } - -private: - word32 m_counter; - SecByteBlock m_counterAndSeed; -}; - -bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms) -{ - Integer min = params.GetValueWithDefault("Min", Integer::Zero()); - Integer max; - if (!params.GetValue("Max", max)) - { - int bitLength; - if (params.GetIntValue("BitLength", bitLength)) - max = Integer::Power2(bitLength); - else - throw InvalidArgument("Integer: missing Max argument"); - } - if (min > max) - throw InvalidArgument("Integer: Min must be no greater than Max"); - - Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero()); - Integer mod = params.GetValueWithDefault("Mod", Integer::One()); - - if (equiv.IsNegative() || equiv >= mod) - throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument"); - - Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY); - - member_ptr<KDF2_RNG> kdf2Rng; - ConstByteArrayParameter seed; - if (params.GetValue(Name::Seed(), seed)) - { - ByteQueue bq; - DERSequenceEncoder seq(bq); - min.DEREncode(seq); - max.DEREncode(seq); - equiv.DEREncode(seq); - mod.DEREncode(seq); - DEREncodeUnsigned(seq, rnType); - DEREncodeOctetString(seq, seed.begin(), seed.size()); - seq.MessageEnd(); - - SecByteBlock finalSeed((size_t)bq.MaxRetrievable()); - bq.Get(finalSeed, finalSeed.size()); - kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size())); - } - RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng; - - switch (rnType) - { - case ANY: - if (mod == One()) - Randomize(rng, min, max); - else - { - Integer min1 = min + (equiv-min)%mod; - if (max < min1) - return false; - Randomize(rng, Zero(), (max - min1) / mod); - *this *= mod; - *this += min1; - } - return true; - - case PRIME: - { - const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL); - - int i; - i = 0; - while (1) - { - if (++i==16) - { - // check if there are any suitable primes in [min, max] - Integer first = min; - if (FirstPrime(first, max, equiv, mod, pSelector)) - { - // if there is only one suitable prime, we're done - *this = first; - if (!FirstPrime(first, max, equiv, mod, pSelector)) - return true; - } - else - return false; - } - - Randomize(rng, min, max); - if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector)) - return true; - } - } - - default: - throw InvalidArgument("Integer: invalid RandomNumberType argument"); - } -} - -std::istream& operator>>(std::istream& in, Integer &a) -{ - char c; - unsigned int length = 0; - SecBlock<char> str(length + 16); - - std::ws(in); - - do - { - in.read(&c, 1); - str[length++] = c; - if (length >= str.size()) - str.Grow(length + 16); - } - while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.')); - - if (in.gcount()) - in.putback(c); - str[length-1] = '\0'; - a = Integer(str); - - return in; -} - -std::ostream& operator<<(std::ostream& out, const Integer &a) -{ - // Get relevant conversion specifications from ostream. - long f = out.flags() & std::ios::basefield; // Get base digits. - int base, block; - char suffix; - switch(f) - { - case std::ios::oct : - base = 8; - block = 8; - suffix = 'o'; - break; - case std::ios::hex : - base = 16; - block = 4; - suffix = 'h'; - break; - default : - base = 10; - block = 3; - suffix = '.'; - } - - Integer temp1=a, temp2; - - if (a.IsNegative()) - { - out << '-'; - temp1.Negate(); - } - - if (!a) - out << '0'; - - static const char upper[]="0123456789ABCDEF"; - static const char lower[]="0123456789abcdef"; - - const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower; - unsigned i=0; - SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1); - - while (!!temp1) - { - word digit; - Integer::Divide(digit, temp2, temp1, base); - s[i++]=vec[digit]; - temp1.swap(temp2); - } - - while (i--) - { - out << s[i]; -// if (i && !(i%block)) -// out << ","; - } - return out << suffix; -} - -Integer& Integer::operator++() -{ - if (NotNegative()) - { - if (Increment(reg, reg.size())) - { - reg.CleanGrow(2*reg.size()); - reg[reg.size()/2]=1; - } - } - else - { - word borrow = Decrement(reg, reg.size()); - assert(!borrow); - if (WordCount()==0) - *this = Zero(); - } - return *this; -} - -Integer& Integer::operator--() -{ - if (IsNegative()) - { - if (Increment(reg, reg.size())) - { - reg.CleanGrow(2*reg.size()); - reg[reg.size()/2]=1; - } - } - else - { - if (Decrement(reg, reg.size())) - *this = -One(); - } - return *this; -} - -void PositiveAdd(Integer &sum, const Integer &a, const Integer& b) -{ - int carry; - if (a.reg.size() == b.reg.size()) - carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); - else if (a.reg.size() > b.reg.size()) - { - carry = Add(sum.reg, a.reg, b.reg, b.reg.size()); - CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size()); - carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry); - } - else - { - carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); - CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size()); - carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry); - } - - if (carry) - { - sum.reg.CleanGrow(2*sum.reg.size()); - sum.reg[sum.reg.size()/2] = 1; - } - sum.sign = Integer::POSITIVE; -} - -void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b) -{ - unsigned aSize = a.WordCount(); - aSize += aSize%2; - unsigned bSize = b.WordCount(); - bSize += bSize%2; - - if (aSize == bSize) - { - if (Compare(a.reg, b.reg, aSize) >= 0) - { - Subtract(diff.reg, a.reg, b.reg, aSize); - diff.sign = Integer::POSITIVE; - } - else - { - Subtract(diff.reg, b.reg, a.reg, aSize); - diff.sign = Integer::NEGATIVE; - } - } - else if (aSize > bSize) - { - word borrow = Subtract(diff.reg, a.reg, b.reg, bSize); - CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize); - borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow); - assert(!borrow); - diff.sign = Integer::POSITIVE; - } - else - { - word borrow = Subtract(diff.reg, b.reg, a.reg, aSize); - CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize); - borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow); - assert(!borrow); - diff.sign = Integer::NEGATIVE; - } -} - -// MSVC .NET 2003 workaround -template <class T> inline const T& STDMAX2(const T& a, const T& b) -{ - return a < b ? b : a; -} - -Integer Integer::Plus(const Integer& b) const -{ - Integer sum((word)0, STDMAX2(reg.size(), b.reg.size())); - if (NotNegative()) - { - if (b.NotNegative()) - PositiveAdd(sum, *this, b); - else - PositiveSubtract(sum, *this, b); - } - else - { - if (b.NotNegative()) - PositiveSubtract(sum, b, *this); - else - { - PositiveAdd(sum, *this, b); - sum.sign = Integer::NEGATIVE; - } - } - return sum; -} - -Integer& Integer::operator+=(const Integer& t) -{ - reg.CleanGrow(t.reg.size()); - if (NotNegative()) - { - if (t.NotNegative()) - PositiveAdd(*this, *this, t); - else - PositiveSubtract(*this, *this, t); - } - else - { - if (t.NotNegative()) - PositiveSubtract(*this, t, *this); - else - { - PositiveAdd(*this, *this, t); - sign = Integer::NEGATIVE; - } - } - return *this; -} - -Integer Integer::Minus(const Integer& b) const -{ - Integer diff((word)0, STDMAX2(reg.size(), b.reg.size())); - if (NotNegative()) - { - if (b.NotNegative()) - PositiveSubtract(diff, *this, b); - else - PositiveAdd(diff, *this, b); - } - else - { - if (b.NotNegative()) - { - PositiveAdd(diff, *this, b); - diff.sign = Integer::NEGATIVE; - } - else - PositiveSubtract(diff, b, *this); - } - return diff; -} - -Integer& Integer::operator-=(const Integer& t) -{ - reg.CleanGrow(t.reg.size()); - if (NotNegative()) - { - if (t.NotNegative()) - PositiveSubtract(*this, *this, t); - else - PositiveAdd(*this, *this, t); - } - else - { - if (t.NotNegative()) - { - PositiveAdd(*this, *this, t); - sign = Integer::NEGATIVE; - } - else - PositiveSubtract(*this, t, *this); - } - return *this; -} - -Integer& Integer::operator<<=(size_t n) -{ - const size_t wordCount = WordCount(); - const size_t shiftWords = n / WORD_BITS; - const unsigned int shiftBits = (unsigned int)(n % WORD_BITS); - - reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n))); - ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords); - ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits); - return *this; -} - -Integer& Integer::operator>>=(size_t n) -{ - const size_t wordCount = WordCount(); - const size_t shiftWords = n / WORD_BITS; - const unsigned int shiftBits = (unsigned int)(n % WORD_BITS); - - ShiftWordsRightByWords(reg, wordCount, shiftWords); - if (wordCount > shiftWords) - ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits); - if (IsNegative() && WordCount()==0) // avoid -0 - *this = Zero(); - return *this; -} - -void PositiveMultiply(Integer &product, const Integer &a, const Integer &b) -{ - size_t aSize = RoundupSize(a.WordCount()); - size_t bSize = RoundupSize(b.WordCount()); - - product.reg.CleanNew(RoundupSize(aSize+bSize)); - product.sign = Integer::POSITIVE; - - IntegerSecBlock workspace(aSize + bSize); - AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize); -} - -void Multiply(Integer &product, const Integer &a, const Integer &b) -{ - PositiveMultiply(product, a, b); - - if (a.NotNegative() != b.NotNegative()) - product.Negate(); -} - -Integer Integer::Times(const Integer &b) const -{ - Integer product; - Multiply(product, *this, b); - return product; -} - -/* -void PositiveDivide(Integer &remainder, Integer "ient, - const Integer ÷nd, const Integer &divisor) -{ - remainder.reg.CleanNew(divisor.reg.size()); - remainder.sign = Integer::POSITIVE; - quotient.reg.New(0); - quotient.sign = Integer::POSITIVE; - unsigned i=dividend.BitCount(); - while (i--) - { - word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1); - remainder.reg[0] |= dividend[i]; - if (overflow || remainder >= divisor) - { - Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size()); - quotient.SetBit(i); - } - } -} -*/ - -void PositiveDivide(Integer &remainder, Integer "ient, - const Integer &a, const Integer &b) -{ - unsigned aSize = a.WordCount(); - unsigned bSize = b.WordCount(); - - if (!bSize) - throw Integer::DivideByZero(); - - if (aSize < bSize) - { - remainder = a; - remainder.sign = Integer::POSITIVE; - quotient = Integer::Zero(); - return; - } - - aSize += aSize%2; // round up to next even number - bSize += bSize%2; - - remainder.reg.CleanNew(RoundupSize(bSize)); - remainder.sign = Integer::POSITIVE; - quotient.reg.CleanNew(RoundupSize(aSize-bSize+2)); - quotient.sign = Integer::POSITIVE; - - IntegerSecBlock T(aSize+3*(bSize+2)); - Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize); -} - -void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor) -{ - PositiveDivide(remainder, quotient, dividend, divisor); - - if (dividend.IsNegative()) - { - quotient.Negate(); - if (remainder.NotZero()) - { - --quotient; - remainder = divisor.AbsoluteValue() - remainder; - } - } - - if (divisor.IsNegative()) - quotient.Negate(); -} - -void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n) -{ - q = a; - q >>= n; - - const size_t wordCount = BitsToWords(n); - if (wordCount <= a.WordCount()) - { - r.reg.resize(RoundupSize(wordCount)); - CopyWords(r.reg, a.reg, wordCount); - SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount); - if (n % WORD_BITS != 0) - r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS)); - } - else - { - r.reg.resize(RoundupSize(a.WordCount())); - CopyWords(r.reg, a.reg, r.reg.size()); - } - r.sign = POSITIVE; - - if (a.IsNegative() && r.NotZero()) - { - --q; - r = Power2(n) - r; - } -} - -Integer Integer::DividedBy(const Integer &b) const -{ - Integer remainder, quotient; - Integer::Divide(remainder, quotient, *this, b); - return quotient; -} - -Integer Integer::Modulo(const Integer &b) const -{ - Integer remainder, quotient; - Integer::Divide(remainder, quotient, *this, b); - return remainder; -} - -void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor) -{ - if (!divisor) - throw Integer::DivideByZero(); - - assert(divisor); - - if ((divisor & (divisor-1)) == 0) // divisor is a power of 2 - { - quotient = dividend >> (BitPrecision(divisor)-1); - remainder = dividend.reg[0] & (divisor-1); - return; - } - - unsigned int i = dividend.WordCount(); - quotient.reg.CleanNew(RoundupSize(i)); - remainder = 0; - while (i--) - { - quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor; - remainder = DWord(dividend.reg[i], remainder) % divisor; - } - - if (dividend.NotNegative()) - quotient.sign = POSITIVE; - else - { - quotient.sign = NEGATIVE; - if (remainder) - { - --quotient; - remainder = divisor - remainder; - } - } -} - -Integer Integer::DividedBy(word b) const -{ - word remainder; - Integer quotient; - Integer::Divide(remainder, quotient, *this, b); - return quotient; -} - -word Integer::Modulo(word divisor) const -{ - if (!divisor) - throw Integer::DivideByZero(); - - assert(divisor); - - word remainder; - - if ((divisor & (divisor-1)) == 0) // divisor is a power of 2 - remainder = reg[0] & (divisor-1); - else - { - unsigned int i = WordCount(); - - if (divisor <= 5) - { - DWord sum(0, 0); - while (i--) - sum += reg[i]; - remainder = sum % divisor; - } - else - { - remainder = 0; - while (i--) - remainder = DWord(reg[i], remainder) % divisor; - } - } - - if (IsNegative() && remainder) - remainder = divisor - remainder; - - return remainder; -} - -void Integer::Negate() -{ - if (!!(*this)) // don't flip sign if *this==0 - sign = Sign(1-sign); -} - -int Integer::PositiveCompare(const Integer& t) const -{ - unsigned size = WordCount(), tSize = t.WordCount(); - - if (size == tSize) - return CryptoPP::Compare(reg, t.reg, size); - else - return size > tSize ? 1 : -1; -} - -int Integer::Compare(const Integer& t) const -{ - if (NotNegative()) - { - if (t.NotNegative()) - return PositiveCompare(t); - else - return 1; - } - else - { - if (t.NotNegative()) - return -1; - else - return -PositiveCompare(t); - } -} - -Integer Integer::SquareRoot() const -{ - if (!IsPositive()) - return Zero(); - - // overestimate square root - Integer x, y = Power2((BitCount()+1)/2); - assert(y*y >= *this); - - do - { - x = y; - y = (x + *this/x) >> 1; - } while (y<x); - - return x; -} - -bool Integer::IsSquare() const -{ - Integer r = SquareRoot(); - return *this == r.Squared(); -} - -bool Integer::IsUnit() const -{ - return (WordCount() == 1) && (reg[0] == 1); -} - -Integer Integer::MultiplicativeInverse() const -{ - return IsUnit() ? *this : Zero(); -} - -Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m) -{ - return x*y%m; -} - -Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m) -{ - ModularArithmetic mr(m); - return mr.Exponentiate(x, e); -} - -Integer Integer::Gcd(const Integer &a, const Integer &b) -{ - return EuclideanDomainOf<Integer>().Gcd(a, b); -} - -Integer Integer::InverseMod(const Integer &m) const -{ - assert(m.NotNegative()); - - if (IsNegative()) - return Modulo(m).InverseMod(m); - - if (m.IsEven()) - { - if (!m || IsEven()) - return Zero(); // no inverse - if (*this == One()) - return One(); - - Integer u = m.Modulo(*this).InverseMod(*this); - return !u ? Zero() : (m*(*this-u)+1)/(*this); - } - - SecBlock<word> T(m.reg.size() * 4); - Integer r((word)0, m.reg.size()); - unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size()); - DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size()); - return r; -} - -word Integer::InverseMod(word mod) const -{ - word g0 = mod, g1 = *this % mod; - word v0 = 0, v1 = 1; - word y; - - while (g1) - { - if (g1 == 1) - return v1; - y = g0 / g1; - g0 = g0 % g1; - v0 += y * v1; - - if (!g0) - break; - if (g0 == 1) - return mod-v0; - y = g1 / g0; - g1 = g1 % g0; - v1 += y * v0; - } - return 0; -} - -// ******************************************************** - -ModularArithmetic::ModularArithmetic(BufferedTransformation &bt) -{ - BERSequenceDecoder seq(bt); - OID oid(seq); - if (oid != ASN1::prime_field()) - BERDecodeError(); - m_modulus.BERDecode(seq); - seq.MessageEnd(); - m_result.reg.resize(m_modulus.reg.size()); -} - -void ModularArithmetic::DEREncode(BufferedTransformation &bt) const -{ - DERSequenceEncoder seq(bt); - ASN1::prime_field().DEREncode(seq); - m_modulus.DEREncode(seq); - seq.MessageEnd(); -} - -void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const -{ - a.DEREncodeAsOctetString(out, MaxElementByteLength()); -} - -void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const -{ - a.BERDecodeAsOctetString(in, MaxElementByteLength()); -} - -const Integer& ModularArithmetic::Half(const Integer &a) const -{ - if (a.reg.size()==m_modulus.reg.size()) - { - CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size()); - return m_result; - } - else - return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1)); -} - -const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const -{ - if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) - { - if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size()) - || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0) - { - CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size()); - } - return m_result; - } - else - { - m_result1 = a+b; - if (m_result1 >= m_modulus) - m_result1 -= m_modulus; - return m_result1; - } -} - -Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const -{ - if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) - { - if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size()) - || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0) - { - CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size()); - } - } - else - { - a+=b; - if (a>=m_modulus) - a-=m_modulus; - } - - return a; -} - -const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const -{ - if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) - { - if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size())) - CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size()); - return m_result; - } - else - { - m_result1 = a-b; - if (m_result1.IsNegative()) - m_result1 += m_modulus; - return m_result1; - } -} - -Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const -{ - if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) - { - if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size())) - CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size()); - } - else - { - a-=b; - if (a.IsNegative()) - a+=m_modulus; - } - - return a; -} - -const Integer& ModularArithmetic::Inverse(const Integer &a) const -{ - if (!a) - return a; - - CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size()); - if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size())) - Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size()); - - return m_result; -} - -Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const -{ - if (m_modulus.IsOdd()) - { - MontgomeryRepresentation dr(m_modulus); - return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2)); - } - else - return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2); -} - -void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const -{ - if (m_modulus.IsOdd()) - { - MontgomeryRepresentation dr(m_modulus); - dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount); - for (unsigned int i=0; i<exponentsCount; i++) - results[i] = dr.ConvertOut(results[i]); - } - else - AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount); -} - -MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd - : ModularArithmetic(m), - m_u((word)0, m_modulus.reg.size()), - m_workspace(5*m_modulus.reg.size()) -{ - if (!m_modulus.IsOdd()) - throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus"); - - RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size()); -} - -const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const -{ - word *const T = m_workspace.begin(); - word *const R = m_result.reg.begin(); - const size_t N = m_modulus.reg.size(); - assert(a.reg.size()<=N && b.reg.size()<=N); - - AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size()); - SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size()); - MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); - return m_result; -} - -const Integer& MontgomeryRepresentation::Square(const Integer &a) const -{ - word *const T = m_workspace.begin(); - word *const R = m_result.reg.begin(); - const size_t N = m_modulus.reg.size(); - assert(a.reg.size()<=N); - - CryptoPP::Square(T, T+2*N, a.reg, a.reg.size()); - SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size()); - MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); - return m_result; -} - -Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const -{ - word *const T = m_workspace.begin(); - word *const R = m_result.reg.begin(); - const size_t N = m_modulus.reg.size(); - assert(a.reg.size()<=N); - - CopyWords(T, a.reg, a.reg.size()); - SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); - MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); - return m_result; -} - -const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const -{ -// return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus; - word *const T = m_workspace.begin(); - word *const R = m_result.reg.begin(); - const size_t N = m_modulus.reg.size(); - assert(a.reg.size()<=N); - - CopyWords(T, a.reg, a.reg.size()); - SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); - MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); - unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N); - -// cout << "k=" << k << " N*32=" << 32*N << endl; - - if (k>N*WORD_BITS) - DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N); - else - MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N); - - return m_result; -} - -NAMESPACE_END - -#endif |