Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

integer.cpp

00001 // integer.cpp - written and placed in the public domain by Wei Dai
00002 // contains public domain code contributed by Alister Lee and Leonard Janke
00003 
00004 #include "pch.h"
00005 
00006 #ifndef CRYPTOPP_IMPORTS
00007 
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"             // for P1363_KDF2
00016 #include "sha.h"
00017 
00018 #include <iostream>
00019 
00020 #ifdef SSE2_INTRINSICS_AVAILABLE
00021         #ifdef __GNUC__
00022                 #include <xmmintrin.h>
00023                 #include <signal.h>
00024                 #include <setjmp.h>
00025                 #ifdef CRYPTOPP_MEMALIGN_AVAILABLE
00026                         #include <malloc.h>
00027                 #else
00028                         #include <stdlib.h>
00029                 #endif
00030         #else
00031                 #include <emmintrin.h>
00032         #endif
00033 #elif defined(_MSC_VER) && defined(_M_IX86)
00034         #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 intrinsics will be disabled.")
00035 #elif defined(__GNUC__) && defined(__i386__)
00036         #warning "You do not have GCC 3.3 or later, or did not specify -msse2 compiler option, so use of SSE2 intrinsics will be disabled."
00037 #endif
00038 
00039 NAMESPACE_BEGIN(CryptoPP)
00040 
00041 bool FunctionAssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00042 {
00043         if (valueType != typeid(Integer))
00044                 return false;
00045         *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00046         return true;
00047 }
00048 
00049 static const char s_RunAtStartup = (AssignIntToInteger = FunctionAssignIntToInteger, 0);
00050 
00051 #ifdef SSE2_INTRINSICS_AVAILABLE
00052 template <class T>
00053 CPP_TYPENAME AllocatorBase<T>::pointer AlignedAllocator<T>::allocate(size_type n, const void *)
00054 {
00055         CheckSize(n);
00056         if (n == 0)
00057                 return NULL;
00058         if (n >= 4)
00059         {
00060                 void *p;
00061         #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00062                 while (!(p = _mm_malloc(sizeof(T)*n, 16)))
00063         #elif defined(CRYPTOPP_MEMALIGN_AVAILABLE)
00064                 while (!(p = memalign(16, sizeof(T)*n)))
00065         #elif defined(CRYPTOPP_MALLOC_ALIGNMENT_IS_16)
00066                 while (!(p = malloc(sizeof(T)*n)))
00067         #else
00068                 while (!(p = (byte *)malloc(sizeof(T)*n + 8)))  // assume malloc alignment is at least 8
00069         #endif
00070                         CallNewHandler();
00071 
00072         #ifdef CRYPTOPP_NO_ALIGNED_ALLOC
00073                 assert(m_pBlock == NULL);
00074                 m_pBlock = p;
00075                 if (!IsAlignedOn(p, 16))
00076                 {
00077                         assert(IsAlignedOn(p, 8));
00078                         p = (byte *)p + 8;
00079                 }
00080         #endif
00081 
00082                 assert(IsAlignedOn(p, 16));
00083                 return (T*)p;
00084         }
00085         return new T[n];
00086 }
00087 
00088 template <class T>
00089 void AlignedAllocator<T>::deallocate(void *p, size_type n)
00090 {
00091         memset(p, 0, n*sizeof(T));
00092         if (n >= 4)
00093         {
00094                 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00095                         _mm_free(p);
00096                 #elif defined(CRYPTOPP_NO_ALIGNED_ALLOC)
00097                         assert(m_pBlock == p || (byte *)m_pBlock+8 == p);
00098                         free(m_pBlock);
00099                         m_pBlock = NULL;
00100                 #else
00101                         free(p);
00102                 #endif
00103         }
00104         else
00105                 delete [] (T *)p;
00106 }
00107 #endif
00108 
00109 static int Compare(const word *A, const word *B, unsigned int N)
00110 {
00111         while (N--)
00112                 if (A[N] > B[N])
00113                         return 1;
00114                 else if (A[N] < B[N])
00115                         return -1;
00116 
00117         return 0;
00118 }
00119 
00120 static word Increment(word *A, unsigned int N, word B=1)
00121 {
00122         assert(N);
00123         word t = A[0];
00124         A[0] = t+B;
00125         if (A[0] >= t)
00126                 return 0;
00127         for (unsigned i=1; i<N; i++)
00128                 if (++A[i])
00129                         return 0;
00130         return 1;
00131 }
00132 
00133 static word Decrement(word *A, unsigned int N, word B=1)
00134 {
00135         assert(N);
00136         word t = A[0];
00137         A[0] = t-B;
00138         if (A[0] <= t)
00139                 return 0;
00140         for (unsigned i=1; i<N; i++)
00141                 if (A[i]--)
00142                         return 0;
00143         return 1;
00144 }
00145 
00146 static void TwosComplement(word *A, unsigned int N)
00147 {
00148         Decrement(A, N);
00149         for (unsigned i=0; i<N; i++)
00150                 A[i] = ~A[i];
00151 }
00152 
00153 static word AtomicInverseModPower2(word A)
00154 {
00155         assert(A%2==1);
00156 
00157         word R=A%8;
00158 
00159         for (unsigned i=3; i<WORD_BITS; i*=2)
00160                 R = R*(2-R*A);
00161 
00162         assert(R*A==1);
00163         return R;
00164 }
00165 
00166 // ********************************************************
00167 
00168 class DWord
00169 {
00170 public:
00171         DWord() {}
00172 
00173 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00174         explicit DWord(word low)
00175         {
00176                 m_whole = low;
00177         }
00178 #else
00179         explicit DWord(word low)
00180         {
00181                 m_halfs.low = low;
00182                 m_halfs.high = 0;
00183         }
00184 #endif
00185 
00186         DWord(word low, word high)
00187         {
00188                 m_halfs.low = low;
00189                 m_halfs.high = high;
00190         }
00191 
00192         static DWord Multiply(word a, word b)
00193         {
00194                 DWord r;
00195                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00196                         r.m_whole = (dword)a * b;
00197                 #elif defined(__alpha__)
00198                         r.m_halfs.low = a*b; __asm__("umulh %1,%2,%0" : "=r" (r.m_halfs.high) : "r" (a), "r" (b));
00199                 #elif defined(__ia64__)
00200                         r.m_halfs.low = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (r.m_halfs.high) : "f" (a), "f" (b));
00201                 #elif defined(_ARCH_PPC64)
00202                         r.m_halfs.low = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (r.m_halfs.high) : "r" (a), "r" (b) : "cc");
00203                 #elif defined(__x86_64__)
00204                         __asm__("mulq %3" : "=d" (r.m_halfs.high), "=a" (r.m_halfs.low) : "a" (a), "rm" (b) : "cc");
00205                 #elif defined(__mips64)
00206                         __asm__("dmultu %2,%3" : "=h" (r.m_halfs.high), "=l" (r.m_halfs.low) : "r" (a), "r" (b));
00207                 #elif defined(_M_IX86)
00208                         // for testing
00209                         word64 t = (word64)a * b;
00210                         r.m_halfs.high = ((word32 *)(&t))[1];
00211                         r.m_halfs.low = (word32)t;
00212                 #else
00213                         #error can not implement DWord
00214                 #endif
00215                 return r;
00216         }
00217 
00218         static DWord MultiplyAndAdd(word a, word b, word c)
00219         {
00220                 DWord r = Multiply(a, b);
00221                 return r += c;
00222         }
00223 
00224         DWord & operator+=(word a)
00225         {
00226                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00227                         m_whole = m_whole + a;
00228                 #else
00229                         m_halfs.low += a;
00230                         m_halfs.high += (m_halfs.low < a);
00231                 #endif
00232                 return *this;
00233         }
00234 
00235         DWord operator+(word a)
00236         {
00237                 DWord r;
00238                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00239                         r.m_whole = m_whole + a;
00240                 #else
00241                         r.m_halfs.low = m_halfs.low + a;
00242                         r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00243                 #endif
00244                 return r;
00245         }
00246 
00247         DWord operator-(DWord a)
00248         {
00249                 DWord r;
00250                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00251                         r.m_whole = m_whole - a.m_whole;
00252                 #else
00253                         r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00254                         r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00255                 #endif
00256                 return r;
00257         }
00258 
00259         DWord operator-(word a)
00260         {
00261                 DWord r;
00262                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00263                         r.m_whole = m_whole - a;
00264                 #else
00265                         r.m_halfs.low = m_halfs.low - a;
00266                         r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00267                 #endif
00268                 return r;
00269         }
00270 
00271         // returns quotient, which must fit in a word
00272         word operator/(word divisor);
00273 
00274         word operator%(word a);
00275 
00276         bool operator!() const
00277         {
00278         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00279                 return !m_whole;
00280         #else
00281                 return !m_halfs.high && !m_halfs.low;
00282         #endif
00283         }
00284 
00285         word GetLowHalf() const {return m_halfs.low;}
00286         word GetHighHalf() const {return m_halfs.high;}
00287         word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00288 
00289 private:
00290         union
00291         {
00292         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00293                 dword m_whole;
00294         #endif
00295                 struct
00296                 {
00297                 #ifdef IS_LITTLE_ENDIAN
00298                         word low;
00299                         word high;
00300                 #else
00301                         word high;
00302                         word low;
00303                 #endif
00304                 } m_halfs;
00305         };
00306 };
00307 
00308 class Word
00309 {
00310 public:
00311         Word() {}
00312 
00313         Word(word value)
00314         {
00315                 m_whole = value;
00316         }
00317 
00318         Word(hword low, hword high)
00319         {
00320                 m_whole = low | (word(high) << (WORD_BITS/2));
00321         }
00322 
00323         static Word Multiply(hword a, hword b)
00324         {
00325                 Word r;
00326                 r.m_whole = (word)a * b;
00327                 return r;
00328         }
00329 
00330         Word operator-(Word a)
00331         {
00332                 Word r;
00333                 r.m_whole = m_whole - a.m_whole;
00334                 return r;
00335         }
00336 
00337         Word operator-(hword a)
00338         {
00339                 Word r;
00340                 r.m_whole = m_whole - a;
00341                 return r;
00342         }
00343 
00344         // returns quotient, which must fit in a word
00345         hword operator/(hword divisor)
00346         {
00347                 return hword(m_whole / divisor);
00348         }
00349 
00350         bool operator!() const
00351         {
00352                 return !m_whole;
00353         }
00354 
00355         word GetWhole() const {return m_whole;}
00356         hword GetLowHalf() const {return hword(m_whole);}
00357         hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00358         hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00359         
00360 private:
00361         word m_whole;
00362 };
00363 
00364 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
00365 template <class S, class D>
00366 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00367 {
00368         // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
00369         assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00370 
00371         // estimate the quotient: do a 2 S by 1 S divide
00372         S Q;
00373         if (S(B1+1) == 0)
00374                 Q = A[2];
00375         else
00376                 Q = D(A[1], A[2]) / S(B1+1);
00377 
00378         // now subtract Q*B from A
00379         D p = D::Multiply(B0, Q);
00380         D u = (D) A[0] - p.GetLowHalf();
00381         A[0] = u.GetLowHalf();
00382         u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00383         A[1] = u.GetLowHalf();
00384         A[2] += u.GetHighHalf();
00385 
00386         // Q <= actual quotient, so fix it
00387         while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00388         {
00389                 u = (D) A[0] - B0;
00390                 A[0] = u.GetLowHalf();
00391                 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00392                 A[1] = u.GetLowHalf();
00393                 A[2] += u.GetHighHalf();
00394                 Q++;
00395                 assert(Q);      // shouldn't overflow
00396         }
00397 
00398         return Q;
00399 }
00400 
00401 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
00402 template <class S, class D>
00403 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00404 {
00405         if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
00406                 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00407         else
00408         {
00409                 S Q[2];
00410                 T[0] = Al.GetLowHalf();
00411                 T[1] = Al.GetHighHalf(); 
00412                 T[2] = Ah.GetLowHalf();
00413                 T[3] = Ah.GetHighHalf();
00414                 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00415                 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00416                 return D(Q[0], Q[1]);
00417         }
00418 }
00419 
00420 // returns quotient, which must fit in a word
00421 inline word DWord::operator/(word a)
00422 {
00423         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00424                 return word(m_whole / a);
00425         #else
00426                 hword r[4];
00427                 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00428         #endif
00429 }
00430 
00431 inline word DWord::operator%(word a)
00432 {
00433         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00434                 return word(m_whole % a);
00435         #else
00436                 if (a < (word(1) << (WORD_BITS/2)))
00437                 {
00438                         hword h = hword(a);
00439                         word r = m_halfs.high % h;
00440                         r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00441                         return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00442                 }
00443                 else
00444                 {
00445                         hword r[4];
00446                         DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00447                         return Word(r[0], r[1]).GetWhole();
00448                 }
00449         #endif
00450 }
00451 
00452 // ********************************************************
00453 
00454 class Portable
00455 {
00456 public:
00457         static word Add(word *C, const word *A, const word *B, unsigned int N);
00458         static word Subtract(word *C, const word *A, const word *B, unsigned int N);
00459 
00460         static inline void Multiply2(word *C, const word *A, const word *B);
00461         static inline word Multiply2Add(word *C, const word *A, const word *B);
00462         static void Multiply4(word *C, const word *A, const word *B);
00463         static void Multiply8(word *C, const word *A, const word *B);
00464         static inline unsigned int MultiplyRecursionLimit() {return 8;}
00465 
00466         static inline void Multiply2Bottom(word *C, const word *A, const word *B);
00467         static void Multiply4Bottom(word *C, const word *A, const word *B);
00468         static void Multiply8Bottom(word *C, const word *A, const word *B);
00469         static inline unsigned int MultiplyBottomRecursionLimit() {return 8;}
00470 
00471         static void Square2(word *R, const word *A);
00472         static void Square4(word *R, const word *A);
00473         static void Square8(word *R, const word *A) {assert(false);}
00474         static inline unsigned int SquareRecursionLimit() {return 4;}
00475 };
00476 
00477 word Portable::Add(word *C, const word *A, const word *B, unsigned int N)
00478 {
00479         assert (N%2 == 0);
00480 
00481         DWord u(0, 0);
00482         for (unsigned int i = 0; i < N; i+=2)
00483         {
00484                 u = DWord(A[i]) + B[i] + u.GetHighHalf();
00485                 C[i] = u.GetLowHalf();
00486                 u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf();
00487                 C[i+1] = u.GetLowHalf();
00488         }
00489         return u.GetHighHalf();
00490 }
00491 
00492 word Portable::Subtract(word *C, const word *A, const word *B, unsigned int N)
00493 {
00494         assert (N%2 == 0);
00495 
00496         DWord u(0, 0);
00497         for (unsigned int i = 0; i < N; i+=2)
00498         {
00499                 u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow();
00500                 C[i] = u.GetLowHalf();
00501                 u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow();
00502                 C[i+1] = u.GetLowHalf();
00503         }
00504         return 0-u.GetHighHalf();
00505 }
00506 
00507 void Portable::Multiply2(word *C, const word *A, const word *B)
00508 {
00509 /*
00510         word s;
00511         dword d;
00512 
00513         if (A1 >= A0)
00514                 if (B0 >= B1)
00515                 {
00516                         s = 0;
00517                         d = (dword)(A1-A0)*(B0-B1);
00518                 }
00519                 else
00520                 {
00521                         s = (A1-A0);
00522                         d = (dword)s*(word)(B0-B1);
00523                 }
00524         else
00525                 if (B0 > B1)
00526                 {
00527                         s = (B0-B1);
00528                         d = (word)(A1-A0)*(dword)s;
00529                 }
00530                 else
00531                 {
00532                         s = 0;
00533                         d = (dword)(A0-A1)*(B1-B0);
00534                 }
00535 */
00536         // this segment is the branchless equivalent of above
00537         word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00538         unsigned int ai = A[1] < A[0];
00539         unsigned int bi = B[0] < B[1];
00540         unsigned int di = ai & bi;
00541         DWord d = DWord::Multiply(D[di], D[di+2]);
00542         D[1] = D[3] = 0;
00543         unsigned int si = ai + !bi;
00544         word s = D[si];
00545 
00546         DWord A0B0 = DWord::Multiply(A[0], B[0]);
00547         C[0] = A0B0.GetLowHalf();
00548 
00549         DWord A1B1 = DWord::Multiply(A[1], B[1]);
00550         DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf();
00551         C[1] = t.GetLowHalf();
00552 
00553         t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s;
00554         C[2] = t.GetLowHalf();
00555         C[3] = t.GetHighHalf();
00556 }
00557 
00558 inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B)
00559 {
00560         DWord t = DWord::Multiply(A[0], B[0]);
00561         C[0] = t.GetLowHalf();
00562         C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0];
00563 }
00564 
00565 word Portable::Multiply2Add(word *C, const word *A, const word *B)
00566 {
00567         word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00568         unsigned int ai = A[1] < A[0];
00569         unsigned int bi = B[0] < B[1];
00570         unsigned int di = ai & bi;
00571         DWord d = DWord::Multiply(D[di], D[di+2]);
00572         D[1] = D[3] = 0;
00573         unsigned int si = ai + !bi;
00574         word s = D[si];
00575 
00576         DWord A0B0 = DWord::Multiply(A[0], B[0]);
00577         DWord t = A0B0 + C[0];
00578         C[0] = t.GetLowHalf();
00579 
00580         DWord A1B1 = DWord::Multiply(A[1], B[1]);
00581         t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1];
00582         C[1] = t.GetLowHalf();
00583 
00584         t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
00585         C[2] = t.GetLowHalf();
00586 
00587         t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
00588         C[3] = t.GetLowHalf();
00589         return t.GetHighHalf();
00590 }
00591 
00592 #define MulAcc(x, y)                                                            \
00593         p = DWord::MultiplyAndAdd(A[x], B[y], c);               \
00594         c = p.GetLowHalf();                                                             \
00595         p = (DWord) d + p.GetHighHalf();                                        \
00596         d = p.GetLowHalf();                                                             \
00597         e += p.GetHighHalf();
00598 
00599 #define SaveMulAcc(s, x, y)                                             \
00600         R[s] = c;                                                                               \
00601         p = DWord::MultiplyAndAdd(A[x], B[y], d);                               \
00602         c = p.GetLowHalf();                                                             \
00603         p = (DWord) e + p.GetHighHalf();                                        \
00604         d = p.GetLowHalf();                                                             \
00605         e = p.GetHighHalf();
00606 
00607 #define SquAcc(x, y)                                                            \
00608         q = DWord::Multiply(A[x], A[y]);        \
00609         p = q + c;                                      \
00610         c = p.GetLowHalf();                                                             \
00611         p = (DWord) d + p.GetHighHalf();                                        \
00612         d = p.GetLowHalf();                                                             \
00613         e += p.GetHighHalf();                   \
00614         p = q + c;                                      \
00615         c = p.GetLowHalf();                                                             \
00616         p = (DWord) d + p.GetHighHalf();                                        \
00617         d = p.GetLowHalf();                                                             \
00618         e += p.GetHighHalf();
00619 
00620 #define SaveSquAcc(s, x, y)                                             \
00621         R[s] = c;                                                                               \
00622         q = DWord::Multiply(A[x], A[y]);        \
00623         p = q + d;                                      \
00624         c = p.GetLowHalf();                                                             \
00625         p = (DWord) e + p.GetHighHalf();                                        \
00626         d = p.GetLowHalf();                                                             \
00627         e = p.GetHighHalf();                    \
00628         p = q + c;                                      \
00629         c = p.GetLowHalf();                                                             \
00630         p = (DWord) d + p.GetHighHalf();                                        \
00631         d = p.GetLowHalf();                                                             \
00632         e += p.GetHighHalf();
00633 
00634 void Portable::Multiply4(word *R, const word *A, const word *B)
00635 {
00636         DWord p;
00637         word c, d, e;
00638 
00639         p = DWord::Multiply(A[0], B[0]);
00640         R[0] = p.GetLowHalf();
00641         c = p.GetHighHalf();
00642         d = e = 0;
00643 
00644         MulAcc(0, 1);
00645         MulAcc(1, 0);
00646 
00647         SaveMulAcc(1, 2, 0);
00648         MulAcc(1, 1);
00649         MulAcc(0, 2);
00650 
00651         SaveMulAcc(2, 0, 3);
00652         MulAcc(1, 2);
00653         MulAcc(2, 1);
00654         MulAcc(3, 0);
00655 
00656         SaveMulAcc(3, 3, 1);
00657         MulAcc(2, 2);
00658         MulAcc(1, 3);
00659 
00660         SaveMulAcc(4, 2, 3);
00661         MulAcc(3, 2);
00662 
00663         R[5] = c;
00664         p = DWord::MultiplyAndAdd(A[3], B[3], d);
00665         R[6] = p.GetLowHalf();
00666         R[7] = e + p.GetHighHalf();
00667 }
00668 
00669 void Portable::Square2(word *R, const word *A)
00670 {
00671         DWord p, q;
00672         word c, d, e;
00673 
00674         p = DWord::Multiply(A[0], A[0]);
00675         R[0] = p.GetLowHalf();
00676         c = p.GetHighHalf();
00677         d = e = 0;
00678 
00679         SquAcc(0, 1);
00680 
00681         R[1] = c;
00682         p = DWord::MultiplyAndAdd(A[1], A[1], d);
00683         R[2] = p.GetLowHalf();
00684         R[3] = e + p.GetHighHalf();
00685 }
00686 
00687 void Portable::Square4(word *R, const word *A)
00688 {
00689 #ifdef _MSC_VER
00690         // VC60 workaround: MSVC 6.0 has an optimization bug that makes
00691         // (dword)A*B where either A or B has been cast to a dword before
00692         // very expensive. Revisit this function when this
00693         // bug is fixed.
00694         Multiply4(R, A, A);
00695 #else
00696         const word *B = A;
00697         DWord p, q;
00698         word c, d, e;
00699 
00700         p = DWord::Multiply(A[0], A[0]);
00701         R[0] = p.GetLowHalf();
00702         c = p.GetHighHalf();
00703         d = e = 0;
00704 
00705         SquAcc(0, 1);
00706 
00707         SaveSquAcc(1, 2, 0);
00708         MulAcc(1, 1);
00709 
00710         SaveSquAcc(2, 0, 3);
00711         SquAcc(1, 2);
00712 
00713         SaveSquAcc(3, 3, 1);
00714         MulAcc(2, 2);
00715 
00716         SaveSquAcc(4, 2, 3);
00717 
00718         R[5] = c;
00719         p = DWord::MultiplyAndAdd(A[3], A[3], d);
00720         R[6] = p.GetLowHalf();
00721         R[7] = e + p.GetHighHalf();
00722 #endif
00723 }
00724 
00725 void Portable::Multiply8(word *R, const word *A, const word *B)
00726 {
00727         DWord p;
00728         word c, d, e;
00729 
00730         p = DWord::Multiply(A[0], B[0]);
00731         R[0] = p.GetLowHalf();
00732         c = p.GetHighHalf();
00733         d = e = 0;
00734 
00735         MulAcc(0, 1);
00736         MulAcc(1, 0);
00737 
00738         SaveMulAcc(1, 2, 0);
00739         MulAcc(1, 1);
00740         MulAcc(0, 2);
00741 
00742         SaveMulAcc(2, 0, 3);
00743         MulAcc(1, 2);
00744         MulAcc(2, 1);
00745         MulAcc(3, 0);
00746 
00747         SaveMulAcc(3, 0, 4);
00748         MulAcc(1, 3);
00749         MulAcc(2, 2);
00750         MulAcc(3, 1);
00751         MulAcc(4, 0);
00752 
00753         SaveMulAcc(4, 0, 5);
00754         MulAcc(1, 4);
00755         MulAcc(2, 3);
00756         MulAcc(3, 2);
00757         MulAcc(4, 1);
00758         MulAcc(5, 0);
00759 
00760         SaveMulAcc(5, 0, 6);
00761         MulAcc(1, 5);
00762         MulAcc(2, 4);
00763         MulAcc(3, 3);
00764         MulAcc(4, 2);
00765         MulAcc(5, 1);
00766         MulAcc(6, 0);
00767 
00768         SaveMulAcc(6, 0, 7);
00769         MulAcc(1, 6);
00770         MulAcc(2, 5);
00771         MulAcc(3, 4);
00772         MulAcc(4, 3);
00773         MulAcc(5, 2);
00774         MulAcc(6, 1);
00775         MulAcc(7, 0);
00776 
00777         SaveMulAcc(7, 1, 7);
00778         MulAcc(2, 6);
00779         MulAcc(3, 5);
00780         MulAcc(4, 4);
00781         MulAcc(5, 3);
00782         MulAcc(6, 2);
00783         MulAcc(7, 1);
00784 
00785         SaveMulAcc(8, 2, 7);
00786         MulAcc(3, 6);
00787         MulAcc(4, 5);
00788         MulAcc(5, 4);
00789         MulAcc(6, 3);
00790         MulAcc(7, 2);
00791 
00792         SaveMulAcc(9, 3, 7);
00793         MulAcc(4, 6);
00794         MulAcc(5, 5);
00795         MulAcc(6, 4);
00796         MulAcc(7, 3);
00797 
00798         SaveMulAcc(10, 4, 7);
00799         MulAcc(5, 6);
00800         MulAcc(6, 5);
00801         MulAcc(7, 4);
00802 
00803         SaveMulAcc(11, 5, 7);
00804         MulAcc(6, 6);
00805         MulAcc(7, 5);
00806 
00807         SaveMulAcc(12, 6, 7);
00808         MulAcc(7, 6);
00809 
00810         R[13] = c;
00811         p = DWord::MultiplyAndAdd(A[7], B[7], d);
00812         R[14] = p.GetLowHalf();
00813         R[15] = e + p.GetHighHalf();
00814 }
00815 
00816 void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
00817 {
00818         DWord p;
00819         word c, d, e;
00820 
00821         p = DWord::Multiply(A[0], B[0]);
00822         R[0] = p.GetLowHalf();
00823         c = p.GetHighHalf();
00824         d = e = 0;
00825 
00826         MulAcc(0, 1);
00827         MulAcc(1, 0);
00828 
00829         SaveMulAcc(1, 2, 0);
00830         MulAcc(1, 1);
00831         MulAcc(0, 2);
00832 
00833         R[2] = c;
00834         R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
00835 }
00836 
00837 void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
00838 {
00839         DWord p;
00840         word c, d, e;
00841 
00842         p = DWord::Multiply(A[0], B[0]);
00843         R[0] = p.GetLowHalf();
00844         c = p.GetHighHalf();
00845         d = e = 0;
00846 
00847         MulAcc(0, 1);
00848         MulAcc(1, 0);
00849 
00850         SaveMulAcc(1, 2, 0);
00851         MulAcc(1, 1);
00852         MulAcc(0, 2);
00853 
00854         SaveMulAcc(2, 0, 3);
00855         MulAcc(1, 2);
00856         MulAcc(2, 1);
00857         MulAcc(3, 0);
00858 
00859         SaveMulAcc(3, 0, 4);
00860         MulAcc(1, 3);
00861         MulAcc(2, 2);
00862         MulAcc(3, 1);
00863         MulAcc(4, 0);
00864 
00865         SaveMulAcc(4, 0, 5);
00866         MulAcc(1, 4);
00867         MulAcc(2, 3);
00868         MulAcc(3, 2);
00869         MulAcc(4, 1);
00870         MulAcc(5, 0);
00871 
00872         SaveMulAcc(5, 0, 6);
00873         MulAcc(1, 5);
00874         MulAcc(2, 4);
00875         MulAcc(3, 3);
00876         MulAcc(4, 2);
00877         MulAcc(5, 1);
00878         MulAcc(6, 0);
00879 
00880         R[6] = c;
00881         R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
00882                                 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
00883 }
00884 
00885 #undef MulAcc
00886 #undef SaveMulAcc
00887 #undef SquAcc
00888 #undef SaveSquAcc
00889 
00890 #ifdef CRYPTOPP_X86ASM_AVAILABLE
00891 
00892 // ************** x86 feature detection ***************
00893 
00894 static bool s_sse2Enabled = true;
00895 
00896 static void CpuId(word32 input, word32 *output)
00897 {
00898 #ifdef __GNUC__
00899         __asm__
00900         (
00901                 // save ebx in case -fPIC is being used
00902                 "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
00903                 : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
00904                 : "a" (input)
00905         );
00906 #else
00907         __asm
00908         {
00909                 mov eax, input
00910                 cpuid
00911                 mov edi, output
00912                 mov [edi], eax
00913                 mov [edi+4], ebx
00914                 mov [edi+8], ecx
00915                 mov [edi+12], edx
00916         }
00917 #endif
00918 }
00919 
00920 #ifdef SSE2_INTRINSICS_AVAILABLE
00921 #ifndef _MSC_VER
00922 static jmp_buf s_env;
00923 static void SigIllHandler(int)
00924 {
00925         longjmp(s_env, 1);
00926 }
00927 #endif
00928 
00929 static bool HasSSE2()
00930 {
00931         if (!s_sse2Enabled)
00932                 return false;
00933 
00934         word32 cpuid[4];
00935         CpuId(1, cpuid);
00936         if ((cpuid[3] & (1 << 26)) == 0)
00937                 return false;
00938 
00939 #ifdef _MSC_VER
00940     __try
00941         {
00942         __asm xorpd xmm0, xmm0        // executing SSE2 instruction
00943         }
00944     __except (1)
00945         {
00946                 return false;
00947     }
00948         return true;
00949 #else
00950         typedef void (*SigHandler)(int);
00951 
00952         SigHandler oldHandler = signal(SIGILL, SigIllHandler);
00953         if (oldHandler == SIG_ERR)
00954                 return false;
00955 
00956         bool result = true;
00957         if (setjmp(s_env))
00958                 result = false;
00959         else
00960                 __asm __volatile ("xorps %xmm0, %xmm0");
00961 
00962         signal(SIGILL, oldHandler);
00963         return result;
00964 #endif
00965 }
00966 #endif
00967 
00968 static bool IsP4()
00969 {
00970         word32 cpuid[4];
00971 
00972         CpuId(0, cpuid);
00973         std::swap(cpuid[2], cpuid[3]);
00974         if (memcmp(cpuid+1, "GenuineIntel", 12) != 0)
00975                 return false;
00976 
00977         CpuId(1, cpuid);
00978         return ((cpuid[0] >> 8) & 0xf) == 0xf;
00979 }
00980 
00981 // ************** Pentium/P4 optimizations ***************
00982 
00983 class PentiumOptimized : public Portable
00984 {
00985 public:
00986         static word CRYPTOPP_CDECL Add(word *C, const word *A, const word *B, unsigned int N);
00987         static word CRYPTOPP_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N);
00988         static void CRYPTOPP_CDECL Multiply4(word *C, const word *A, const word *B);
00989         static void CRYPTOPP_CDECL Multiply8(word *C, const word *A, const word *B);
00990         static void CRYPTOPP_CDECL Multiply8Bottom(word *C, const word *A, const word *B);
00991 };
00992 
00993 class P4Optimized
00994 {
00995 public:
00996         static word CRYPTOPP_CDECL Add(word *C, const word *A, const word *B, unsigned int N);
00997         static word CRYPTOPP_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N);
00998 #ifdef SSE2_INTRINSICS_AVAILABLE
00999         static void CRYPTOPP_CDECL Multiply4(word *C, const word *A, const word *B);
01000         static void CRYPTOPP_CDECL Multiply8(word *C, const word *A, const word *B);
01001         static void CRYPTOPP_CDECL Multiply8Bottom(word *C, const word *A, const word *B);
01002 #endif
01003 };
01004 
01005 typedef word (CRYPTOPP_CDECL * PAddSub)(word *C, const word *A, const word *B, unsigned int N);
01006 typedef void (CRYPTOPP_CDECL * PMul)(word *C, const word *A, const word *B);
01007 
01008 static PAddSub s_pAdd, s_pSub;
01009 #ifdef SSE2_INTRINSICS_AVAILABLE
01010 static PMul s_pMul4, s_pMul8, s_pMul8B;
01011 #endif
01012 
01013 static void SetPentiumFunctionPointers()
01014 {
01015         if (IsP4())
01016         {
01017                 s_pAdd = &P4Optimized::Add;
01018                 s_pSub = &P4Optimized::Subtract;
01019         }
01020         else
01021         {
01022                 s_pAdd = &PentiumOptimized::Add;
01023                 s_pSub = &PentiumOptimized::Subtract;
01024         }
01025 
01026 #ifdef SSE2_INTRINSICS_AVAILABLE
01027         if (HasSSE2())
01028         {
01029                 s_pMul4 = &P4Optimized::Multiply4;
01030                 s_pMul8 = &P4Optimized::Multiply8;
01031                 s_pMul8B = &P4Optimized::Multiply8Bottom;
01032         }
01033         else
01034         {
01035                 s_pMul4 = &PentiumOptimized::Multiply4;
01036                 s_pMul8 = &PentiumOptimized::Multiply8;
01037                 s_pMul8B = &PentiumOptimized::Multiply8Bottom;
01038         }
01039 #endif
01040 }
01041 
01042 static const char s_RunAtStartupSetPentiumFunctionPointers = (SetPentiumFunctionPointers(), 0);
01043 
01044 void DisableSSE2()
01045 {
01046         s_sse2Enabled = false;
01047         SetPentiumFunctionPointers();
01048 }
01049 
01050 class LowLevel : public PentiumOptimized
01051 {
01052 public:
01053         inline static word Add(word *C, const word *A, const word *B, unsigned int N)
01054                 {return s_pAdd(C, A, B, N);}
01055         inline static word Subtract(word *C, const word *A, const word *B, unsigned int N)
01056                 {return s_pSub(C, A, B, N);}
01057         inline static void Square4(word *R, const word *A)
01058                 {Multiply4(R, A, A);}
01059 #ifdef SSE2_INTRINSICS_AVAILABLE
01060         inline static void Multiply4(word *C, const word *A, const word *B)
01061                 {s_pMul4(C, A, B);}
01062         inline static void Multiply8(word *C, const word *A, const word *B)
01063                 {s_pMul8(C, A, B);}
01064         inline static void Multiply8Bottom(word *C, const word *A, const word *B)
01065                 {s_pMul8B(C, A, B);}
01066 #endif
01067 };
01068 
01069 // use some tricks to share assembly code between MSVC and GCC
01070 #ifdef _MSC_VER
01071         #define CRYPTOPP_NAKED __declspec(naked)
01072         #define AS1(x) __asm x
01073         #define AS2(x, y) __asm x, y
01074         #define AddPrologue \
01075                 __asm   push ebp \
01076                 __asm   push ebx \
01077                 __asm   push esi \
01078                 __asm   push edi \
01079                 __asm   mov             ecx, [esp+20] \
01080                 __asm   mov             edx, [esp+24] \
01081                 __asm   mov             ebx, [esp+28] \
01082                 __asm   mov             esi, [esp+32]
01083         #define AddEpilogue \
01084                 __asm   pop edi \
01085                 __asm   pop esi \
01086                 __asm   pop ebx \
01087                 __asm   pop ebp \
01088                 __asm   ret
01089         #define MulPrologue \
01090                 __asm   push ebp \
01091                 __asm   push ebx \
01092                 __asm   push esi \
01093                 __asm   push edi \
01094                 __asm   mov ecx, [esp+28] \
01095                 __asm   mov esi, [esp+24] \
01096                 __asm   push [esp+20]
01097         #define MulEpilogue \
01098                 __asm   add esp, 4 \
01099                 __asm   pop edi \
01100                 __asm   pop esi \
01101                 __asm   pop ebx \
01102                 __asm   pop ebp \
01103                 __asm   ret
01104 #else
01105         #define CRYPTOPP_NAKED
01106         #define AS1(x) #x ";"
01107         #define AS2(x, y) #x ", " #y ";"
01108         #define AddPrologue \
01109                 __asm__ __volatile__ \
01110                 ( \
01111                         "push %%ebx;"   /* save this manually, in case of -fPIC */ \
01112                         "mov %2, %%ebx;" \
01113                         ".intel_syntax noprefix;" \
01114                         "push ebp;"
01115         #define AddEpilogue \
01116                         "pop ebp;" \
01117                         ".att_syntax prefix;" \
01118                         "pop %%ebx;" \
01119                                         : \
01120                                         : "c" (C), "d" (A), "m" (B), "S" (N) \
01121                                         : "%edi", "memory", "cc" \
01122                 );
01123         #define MulPrologue \
01124                 __asm__ __volatile__ \
01125                 ( \
01126                         "push %%ebx;"   /* save this manually, in case of -fPIC */ \
01127                         "push %%ebp;" \
01128                         "push %0;" \
01129                         ".intel_syntax noprefix;"
01130         #define MulEpilogue \
01131                         "add esp, 4;" \
01132                         "pop ebp;" \
01133                         "pop ebx;" \
01134                         ".att_syntax prefix;" \
01135                         : \
01136                         : "rm" (Z), "S" (X), "c" (Y) \
01137                         : "%eax", "%edx", "%edi", "memory", "cc" \
01138                 );
01139 #endif
01140 
01141 CRYPTOPP_NAKED word PentiumOptimized::Add(word *C, const word *A, const word *B, unsigned int N)
01142 {
01143         AddPrologue
01144 
01145         // now: ebx = B, ecx = C, edx = A, esi = N
01146         AS2(    sub ecx, edx)   // hold the distance between C & A so we can add this to A to get C
01147         AS2(    xor eax, eax)   // clear eax
01148 
01149         AS2(    sub eax, esi)   // eax is a negative index from end of B
01150         AS2(    lea ebx, [ebx+4*esi])   // ebx is end of B
01151 
01152         AS2(    sar eax, 1)             // unit of eax is now dwords; this also clears the carry flag
01153         AS1(    jz      loopendAdd)             // if no dwords then nothing to do
01154 
01155         AS1(loopstartAdd:)
01156         AS2(    mov    esi,[edx])                       // load lower word of A
01157         AS2(    mov    ebp,[edx+4])                     // load higher word of A
01158 
01159         AS2(    mov    edi,[ebx+8*eax])         // load lower word of B
01160         AS2(    lea    edx,[edx+8])                     // advance A and C
01161 
01162         AS2(    adc    esi,edi)                         // add lower words
01163         AS2(    mov    edi,[ebx+8*eax+4])       // load higher word of B
01164 
01165         AS2(    adc    ebp,edi)                         // add higher words
01166         AS1(    inc    eax)                                     // advance B
01167 
01168         AS2(    mov    [edx+ecx-8],esi)         // store lower word result
01169         AS2(    mov    [edx+ecx-4],ebp)         // store higher word result
01170 
01171         AS1(    jnz    loopstartAdd)                    // loop until eax overflows and becomes zero
01172 
01173         AS1(loopendAdd:)
01174         AS2(    adc eax, 0)             // store carry into eax (return result register)
01175 
01176         AddEpilogue
01177 }
01178 
01179 CRYPTOPP_NAKED word PentiumOptimized::Subtract(word *C, const word *A, const word *B, unsigned int N)
01180 {
01181         AddPrologue
01182 
01183         // now: ebx = B, ecx = C, edx = A, esi = N
01184         AS2(    sub ecx, edx)   // hold the distance between C & A so we can add this to A to get C
01185         AS2(    xor eax, eax)   // clear eax
01186 
01187         AS2(    sub eax, esi)   // eax is a negative index from end of B
01188         AS2(    lea ebx, [ebx+4*esi])   // ebx is end of B
01189 
01190         AS2(    sar eax, 1)             // unit of eax is now dwords; this also clears the carry flag
01191         AS1(    jz      loopendSub)             // if no dwords then nothing to do
01192 
01193         AS1(loopstartSub:)
01194         AS2(    mov    esi,[edx])                       // load lower word of A
01195         AS2(    mov    ebp,[edx+4])                     // load higher word of A
01196 
01197         AS2(    mov    edi,[ebx+8*eax])         // load lower word of B
01198         AS2(    lea    edx,[edx+8])                     // advance A and C
01199 
01200         AS2(    sbb    esi,edi)                         // subtract lower words
01201         AS2(    mov    edi,[ebx+8*eax+4])       // load higher word of B
01202 
01203         AS2(    sbb    ebp,edi)                         // subtract higher words
01204         AS1(    inc    eax)                                     // advance B
01205 
01206         AS2(    mov    [edx+ecx-8],esi)         // store lower word result
01207         AS2(    mov    [edx+ecx-4],ebp)         // store higher word result
01208 
01209         AS1(    jnz    loopstartSub)                    // loop until eax overflows and becomes zero
01210 
01211         AS1(loopendSub:)
01212         AS2(    adc eax, 0)             // store carry into eax (return result register)
01213 
01214         AddEpilogue
01215 }
01216 
01217 // On Pentium 4, the adc and sbb instructions are very expensive, so avoid them.
01218 
01219 CRYPTOPP_NAKED word P4Optimized::Add(word *C, const word *A, const word *B, unsigned int N)
01220 {
01221         AddPrologue
01222 
01223         // now: ebx = B, ecx = C, edx = A, esi = N
01224         AS2(    xor             eax, eax)
01225         AS1(    neg             esi)
01226         AS1(    jz              loopendAddP4)           // if no dwords then nothing to do
01227 
01228         AS2(    mov             edi, [edx])
01229         AS2(    mov             ebp, [ebx])
01230         AS1(    jmp             carry1AddP4)
01231 
01232         AS1(loopstartAddP4:)
01233         AS2(    mov             edi, [edx+8])
01234         AS2(    add             ecx, 8)
01235         AS2(    add             edx, 8)
01236         AS2(    mov             ebp, [ebx])
01237         AS2(    add             edi, eax)
01238         AS1(    jc              carry1AddP4)
01239         AS2(    xor             eax, eax)
01240 
01241         AS1(carry1AddP4:)
01242         AS2(    add             edi, ebp)
01243         AS2(    mov             ebp, 1)
01244         AS2(    mov             [ecx], edi)
01245         AS2(    mov             edi, [edx+4])
01246         AS2(    cmovc   eax, ebp)
01247         AS2(    mov             ebp, [ebx+4])
01248         AS2(    add             ebx, 8)
01249         AS2(    add             edi, eax)
01250         AS1(    jc              carry2AddP4)
01251         AS2(    xor             eax, eax)
01252 
01253         AS1(carry2AddP4:)
01254         AS2(    add             edi, ebp)
01255         AS2(    mov             ebp, 1)
01256         AS2(    cmovc   eax, ebp)
01257         AS2(    mov             [ecx+4], edi)
01258         AS2(    add             esi, 2)
01259         AS1(    jnz             loopstartAddP4)
01260 
01261         AS1(loopendAddP4:)
01262 
01263         AddEpilogue
01264 }
01265 
01266 CRYPTOPP_NAKED word P4Optimized::Subtract(word *C, const word *A, const word *B, unsigned int N)
01267 {
01268         AddPrologue
01269 
01270         // now: ebx = B, ecx = C, edx = A, esi = N
01271         AS2(    xor             eax, eax)
01272         AS1(    neg             esi)
01273         AS1(    jz              loopendSubP4)           // if no dwords then nothing to do
01274 
01275         AS2(    mov             edi, [edx])
01276         AS2(    mov             ebp, [ebx])
01277         AS1(    jmp             carry1SubP4)
01278 
01279         AS1(loopstartSubP4:)
01280         AS2(    mov             edi, [edx+8])
01281         AS2(    add             edx, 8)
01282         AS2(    add             ecx, 8)
01283         AS2(    mov             ebp, [ebx])
01284         AS2(    sub             edi, eax)
01285         AS1(    jc              carry1SubP4)
01286         AS2(    xor             eax, eax)
01287 
01288         AS1(carry1SubP4:)
01289         AS2(    sub             edi, ebp)
01290         AS2(    mov             ebp, 1)
01291         AS2(    mov             [ecx], edi)
01292         AS2(    mov             edi, [edx+4])
01293         AS2(    cmovc   eax, ebp)
01294         AS2(    mov             ebp, [ebx+4])
01295         AS2(    add             ebx, 8)
01296         AS2(    sub             edi, eax)
01297         AS1(    jc              carry2SubP4)
01298         AS2(    xor             eax, eax)
01299 
01300         AS1(carry2SubP4:)
01301         AS2(    sub             edi, ebp)
01302         AS2(    mov             ebp, 1)
01303         AS2(    cmovc   eax, ebp)
01304         AS2(    mov             [ecx+4], edi)
01305         AS2(    add             esi, 2)
01306         AS1(    jnz             loopstartSubP4)
01307 
01308         AS1(loopendSubP4:)
01309 
01310         AddEpilogue
01311 }
01312 
01313 // multiply assembly code originally contributed by Leonard Janke
01314 
01315 #define MulStartup \
01316         AS2(xor ebp, ebp) \
01317         AS2(xor edi, edi) \
01318         AS2(xor ebx, ebx) 
01319 
01320 #define MulShiftCarry \
01321         AS2(mov ebp, edx) \
01322         AS2(mov edi, ebx) \
01323         AS2(xor ebx, ebx)
01324 
01325 #define MulAccumulateBottom(i,j) \
01326         AS2(mov eax, [ecx+4*j]) \
01327         AS2(imul eax, dword ptr [esi+4*i]) \
01328         AS2(add ebp, eax)
01329 
01330 #define MulAccumulate(i,j) \
01331         AS2(mov eax, [ecx+4*j]) \
01332         AS1(mul dword ptr [esi+4*i]) \
01333         AS2(add ebp, eax) \
01334         AS2(adc edi, edx) \
01335         AS2(adc bl, bh)
01336 
01337 #define MulStoreDigit(i)  \
01338         AS2(mov edx, edi) \
01339         AS2(mov edi, [esp]) \
01340         AS2(mov [edi+4*i], ebp)
01341 
01342 #define MulLastDiagonal(digits) \
01343         AS2(mov eax, [ecx+4*(digits-1)]) \
01344         AS1(mul dword ptr [esi+4*(digits-1)]) \
01345         AS2(add ebp, eax) \
01346         AS2(adc edx, edi) \
01347         AS2(mov edi, [esp]) \
01348         AS2(mov [edi+4*(2*digits-2)], ebp) \
01349         AS2(mov [edi+4*(2*digits-1)], edx)
01350 
01351 CRYPTOPP_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y)
01352 {
01353         MulPrologue
01354         // now: [esp] = Z, esi = X, ecx = Y
01355         MulStartup
01356         MulAccumulate(0,0)
01357         MulStoreDigit(0)
01358         MulShiftCarry
01359 
01360         MulAccumulate(1,0)
01361         MulAccumulate(0,1)
01362         MulStoreDigit(1)
01363         MulShiftCarry
01364 
01365         MulAccumulate(2,0)
01366         MulAccumulate(1,1)
01367         MulAccumulate(0,2)
01368         MulStoreDigit(2)
01369         MulShiftCarry
01370 
01371         MulAccumulate(3,0)
01372         MulAccumulate(2,1)
01373         MulAccumulate(1,2)
01374         MulAccumulate(0,3)
01375         MulStoreDigit(3)
01376         MulShiftCarry
01377 
01378         MulAccumulate(3,1)
01379         MulAccumulate(2,2)
01380         MulAccumulate(1,3)
01381         MulStoreDigit(4)
01382         MulShiftCarry
01383 
01384         MulAccumulate(3,2)
01385         MulAccumulate(2,3)
01386         MulStoreDigit(5)
01387         MulShiftCarry
01388 
01389         MulLastDiagonal(4)
01390         MulEpilogue
01391 }
01392 
01393 CRYPTOPP_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y)
01394 {
01395         MulPrologue
01396         // now: [esp] = Z, esi = X, ecx = Y
01397         MulStartup
01398         MulAccumulate(0,0)
01399         MulStoreDigit(0)
01400         MulShiftCarry
01401 
01402         MulAccumulate(1,0)
01403         MulAccumulate(0,1)
01404         MulStoreDigit(1)
01405         MulShiftCarry
01406 
01407         MulAccumulate(2,0)
01408         MulAccumulate(1,1)
01409         MulAccumulate(0,2)
01410         MulStoreDigit(2)
01411         MulShiftCarry
01412 
01413         MulAccumulate(3,0)
01414         MulAccumulate(2,1)
01415         MulAccumulate(1,2)
01416         MulAccumulate(0,3)
01417         MulStoreDigit(3)
01418         MulShiftCarry
01419 
01420         MulAccumulate(4,0)
01421         MulAccumulate(3,1)
01422         MulAccumulate(2,2)
01423         MulAccumulate(1,3)
01424         MulAccumulate(0,4)
01425         MulStoreDigit(4)
01426         MulShiftCarry
01427 
01428         MulAccumulate(5,0)
01429         MulAccumulate(4,1)
01430         MulAccumulate(3,2)
01431         MulAccumulate(2,3)
01432         MulAccumulate(1,4)
01433         MulAccumulate(0,5)
01434         MulStoreDigit(5)
01435         MulShiftCarry
01436 
01437         MulAccumulate(6,0)
01438         MulAccumulate(5,1)
01439         MulAccumulate(4,2)
01440         MulAccumulate(3,3)
01441         MulAccumulate(2,4)
01442         MulAccumulate(1,5)
01443         MulAccumulate(0,6)
01444         MulStoreDigit(6)
01445         MulShiftCarry
01446 
01447         MulAccumulate(7,0)
01448         MulAccumulate(6,1)
01449         MulAccumulate(5,2)
01450         MulAccumulate(4,3)
01451         MulAccumulate(3,4)
01452         MulAccumulate(2,5)
01453         MulAccumulate(1,6)
01454         MulAccumulate(0,7)
01455         MulStoreDigit(7)
01456         MulShiftCarry
01457 
01458         MulAccumulate(7,1)
01459         MulAccumulate(6,2)
01460         MulAccumulate(5,3)
01461         MulAccumulate(4,4)
01462         MulAccumulate(3,5)
01463         MulAccumulate(2,6)
01464         MulAccumulate(1,7)
01465         MulStoreDigit(8)
01466         MulShiftCarry
01467 
01468         MulAccumulate(7,2)
01469         MulAccumulate(6,3)
01470         MulAccumulate(5,4)
01471         MulAccumulate(4,5)
01472         MulAccumulate(3,6)
01473         MulAccumulate(2,7)
01474         MulStoreDigit(9)
01475         MulShiftCarry
01476 
01477         MulAccumulate(7,3)
01478         MulAccumulate(6,4)
01479         MulAccumulate(5,5)
01480         MulAccumulate(4,6)
01481         MulAccumulate(3,7)
01482         MulStoreDigit(10)
01483         MulShiftCarry
01484 
01485         MulAccumulate(7,4)
01486         MulAccumulate(6,5)
01487         MulAccumulate(5,6)
01488         MulAccumulate(4,7)
01489         MulStoreDigit(11)
01490         MulShiftCarry
01491 
01492         MulAccumulate(7,5)
01493         MulAccumulate(6,6)
01494         MulAccumulate(5,7)
01495         MulStoreDigit(12)
01496         MulShiftCarry
01497 
01498         MulAccumulate(7,6)
01499         MulAccumulate(6,7)
01500         MulStoreDigit(13)
01501         MulShiftCarry
01502 
01503         MulLastDiagonal(8)
01504         MulEpilogue
01505 }
01506 
01507 CRYPTOPP_NAKED void PentiumOptimized::Multiply8Bottom(word* Z, const word* X, const word* Y)
01508 {
01509         MulPrologue
01510         // now: [esp] = Z, esi = X, ecx = Y
01511         MulStartup
01512         MulAccumulate(0,0)
01513         MulStoreDigit(0)
01514         MulShiftCarry
01515 
01516         MulAccumulate(1,0)
01517         MulAccumulate(0,1)
01518         MulStoreDigit(1)
01519         MulShiftCarry
01520 
01521         MulAccumulate(2,0)
01522         MulAccumulate(1,1)
01523         MulAccumulate(0,2)
01524         MulStoreDigit(2)
01525         MulShiftCarry
01526 
01527         MulAccumulate(3,0)
01528         MulAccumulate(2,1)
01529         MulAccumulate(1,2)
01530         MulAccumulate(0,3)
01531         MulStoreDigit(3)
01532         MulShiftCarry
01533 
01534         MulAccumulate(4,0)
01535         MulAccumulate(3,1)
01536         MulAccumulate(2,2)
01537         MulAccumulate(1,3)
01538         MulAccumulate(0,4)
01539         MulStoreDigit(4)
01540         MulShiftCarry
01541 
01542         MulAccumulate(5,0)
01543         MulAccumulate(4,1)
01544         MulAccumulate(3,2)
01545         MulAccumulate(2,3)
01546         MulAccumulate(1,4)
01547         MulAccumulate(0,5)
01548         MulStoreDigit(5)
01549         MulShiftCarry
01550 
01551         MulAccumulate(6,0)
01552         MulAccumulate(5,1)
01553         MulAccumulate(4,2)
01554         MulAccumulate(3,3)
01555         MulAccumulate(2,4)
01556         MulAccumulate(1,5)
01557         MulAccumulate(0,6)
01558         MulStoreDigit(6)
01559         MulShiftCarry
01560 
01561         MulAccumulateBottom(7,0)
01562         MulAccumulateBottom(6,1)
01563         MulAccumulateBottom(5,2)
01564         MulAccumulateBottom(4,3)
01565         MulAccumulateBottom(3,4)
01566         MulAccumulateBottom(2,5)
01567         MulAccumulateBottom(1,6)
01568         MulAccumulateBottom(0,7)
01569         MulStoreDigit(7)
01570         MulEpilogue
01571 }
01572 
01573 #undef AS1
01574 #undef AS2
01575 
01576 #else   // not x86 - no processor specific code at this layer
01577 
01578 typedef Portable LowLevel;
01579 
01580 #endif
01581 
01582 #ifdef SSE2_INTRINSICS_AVAILABLE
01583 
01584 #ifdef __GNUC__
01585 #define CRYPTOPP_FASTCALL
01586 #else
01587 #define CRYPTOPP_FASTCALL __fastcall
01588 #endif
01589 
01590 static void CRYPTOPP_FASTCALL P4_Mul(__m128i *C, const __m128i *A, const __m128i *B)
01591 {
01592         __m128i a3210 = _mm_load_si128(A);
01593         __m128i b3210 = _mm_load_si128(B);
01594 
01595         __m128i sum;
01596 
01597         __m128i z = _mm_setzero_si128();
01598         __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
01599         C[0] = a2b2_a0b0;
01600 
01601         __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
01602         __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
01603         __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
01604         __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
01605         __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
01606         C[1] = _mm_add_epi64(a1b0, a0b1);
01607 
01608         __m128i a31 = _mm_srli_epi64(a3210, 32);
01609         __m128i b31 = _mm_srli_epi64(b3210, 32);
01610         __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
01611         C[6] = a3b3_a1b1;
01612 
01613         __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
01614         __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
01615         __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
01616         __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
01617         __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
01618         sum = _mm_add_epi64(a1b1, a0b2);
01619         C[2] = _mm_add_epi64(sum, a2b0);
01620 
01621         __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
01622         __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
01623         __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
01624         __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
01625         __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
01626         __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
01627         __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
01628         __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
01629         __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
01630         sum = _mm_add_epi64(a2b1, a0b3);
01631         C[3] = _mm_add_epi64(sum, sum1);
01632 
01633         __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
01634         __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
01635         __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
01636         __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
01637         sum = _mm_add_epi64(a2b2, a3b1);
01638         C[4] = _mm_add_epi64(sum, a1b3);
01639 
01640         __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
01641         __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
01642         __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
01643         __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
01644         __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
01645         C[5] = _mm_add_epi64(a3b2, a2b3);
01646 }
01647 
01648 void P4Optimized::Multiply4(word *C, const word *A, const word *B)
01649 {
01650         __m128i temp[7];
01651         const word *w = (word *)temp;
01652         const __m64 *mw = (__m64 *)w;
01653 
01654         P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01655 
01656         C[0] = w[0];
01657 
01658         __m64 s1, s2;
01659 
01660         __m64 w1 = _mm_cvtsi32_si64(w[1]);
01661         __m64 w4 = mw[2];
01662         __m64 w6 = mw[3];
01663         __m64 w8 = mw[4];
01664         __m64 w10 = mw[5];
01665         __m64 w12 = mw[6];
01666         __m64 w14 = mw[7];
01667         __m64 w16 = mw[8];
01668         __m64 w18 = mw[9];
01669         __m64 w20 = mw[10];
01670         __m64 w22 = mw[11];
01671         __m64 w26 = _mm_cvtsi32_si64(w[26]);
01672 
01673         s1 = _mm_add_si64(w1, w4);
01674         C[1] = _mm_cvtsi64_si32(s1);
01675         s1 = _mm_srli_si64(s1, 32);
01676 
01677         s2 = _mm_add_si64(w6, w8);
01678         s1 = _mm_add_si64(s1, s2);
01679         C[2] = _mm_cvtsi64_si32(s1);
01680         s1 = _mm_srli_si64(s1, 32);
01681 
01682         s2 = _mm_add_si64(w10, w12);
01683         s1 = _mm_add_si64(s1, s2);
01684         C[3] = _mm_cvtsi64_si32(s1);
01685         s1 = _mm_srli_si64(s1, 32);
01686 
01687         s2 = _mm_add_si64(w14, w16);
01688         s1 = _mm_add_si64(s1, s2);
01689         C[4] = _mm_cvtsi64_si32(s1);
01690         s1 = _mm_srli_si64(s1, 32);
01691 
01692         s2 = _mm_add_si64(w18, w20);
01693         s1 = _mm_add_si64(s1, s2);
01694         C[5] = _mm_cvtsi64_si32(s1);
01695         s1 = _mm_srli_si64(s1, 32);
01696 
01697         s2 = _mm_add_si64(w22, w26);
01698         s1 = _mm_add_si64(s1, s2);
01699         C[6] = _mm_cvtsi64_si32(s1);
01700         s1 = _mm_srli_si64(s1, 32);
01701 
01702         C[7] = _mm_cvtsi64_si32(s1) + w[27];
01703         _mm_empty();
01704 }
01705 
01706 void P4Optimized::Multiply8(word *C, const word *A, const word *B)
01707 {
01708         __m128i temp[28];
01709         const word *w = (word *)temp;
01710         const __m64 *mw = (__m64 *)w;
01711         const word *x = (word *)temp+7*4;
01712         const __m64 *mx = (__m64 *)x;
01713         const word *y = (word *)temp+7*4*2;
01714         const __m64 *my = (__m64 *)y;
01715         const word *z = (word *)temp+7*4*3;
01716         const __m64 *mz = (__m64 *)z;
01717 
01718         P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01719 
01720         P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01721 
01722         P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01723 
01724         P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
01725 
01726         C[0] = w[0];
01727 
01728         __m64 s1, s2, s3, s4;
01729 
01730         __m64 w1 = _mm_cvtsi32_si64(w[1]);
01731         __m64 w4 = mw[2];
01732         __m64 w6 = mw[3];
01733         __m64 w8 = mw[4];
01734         __m64 w10 = mw[5];
01735         __m64 w12 = mw[6];
01736         __m64 w14 = mw[7];
01737         __m64 w16 = mw[8];
01738         __m64 w18 = mw[9];
01739         __m64 w20 = mw[10];
01740         __m64 w22 = mw[11];
01741         __m64 w26 = _mm_cvtsi32_si64(w[26]);
01742         __m64 w27 = _mm_cvtsi32_si64(w[27]);
01743 
01744         __m64 x0 = _mm_cvtsi32_si64(x[0]);
01745         __m64 x1 = _mm_cvtsi32_si64(x[1]);
01746         __m64 x4 = mx[2];
01747         __m64 x6 = mx[3];
01748         __m64 x8 = mx[4];
01749         __m64 x10 = mx[5];
01750         __m64 x12 = mx[6];
01751         __m64 x14 = mx[7];
01752         __m64 x16 = mx[8];
01753         __m64 x18 = mx[9];
01754         __m64 x20 = mx[10];
01755         __m64 x22 = mx[11];
01756         __m64 x26 = _mm_cvtsi32_si64(x[26]);
01757         __m64 x27 = _mm_cvtsi32_si64(x[27]);
01758 
01759         __m64 y0 = _mm_cvtsi32_si64(y[0]);
01760         __m64 y1 = _mm_cvtsi32_si64(y[1]);
01761         __m64 y4 = my[2];
01762         __m64 y6 = my[3];
01763         __m64 y8 = my[4];
01764         __m64 y10 = my[5];
01765         __m64 y12 = my[6];
01766         __m64 y14 = my[7];
01767         __m64 y16 = my[8];
01768         __m64 y18 = my[9];
01769         __m64 y20 = my[10];
01770         __m64 y22 = my[11];
01771         __m64 y26 = _mm_cvtsi32_si64(y[26]);
01772         __m64 y27 = _mm_cvtsi32_si64(y[27]);
01773 
01774         __m64 z0 = _mm_cvtsi32_si64(z[0]);
01775         __m64 z1 = _mm_cvtsi32_si64(z[1]);
01776         __m64 z4 = mz[2];
01777         __m64 z6 = mz[3];
01778         __m64 z8 = mz[4];
01779         __m64 z10 = mz[5];
01780         __m64 z12 = mz[6];
01781         __m64 z14 = mz[7];
01782         __m64 z16 = mz[8];
01783         __m64 z18 = mz[9];
01784         __m64 z20 = mz[10];
01785         __m64 z22 = mz[11];
01786         __m64 z26 = _mm_cvtsi32_si64(z[26]);
01787 
01788         s1 = _mm_add_si64(w1, w4);
01789         C[1] = _mm_cvtsi64_si32(s1);
01790         s1 = _mm_srli_si64(s1, 32);
01791 
01792         s2 = _mm_add_si64(w6, w8);
01793         s1 = _mm_add_si64(s1, s2);
01794         C[2] = _mm_cvtsi64_si32(s1);
01795         s1 = _mm_srli_si64(s1, 32);
01796 
01797         s2 = _mm_add_si64(w10, w12);
01798         s1 = _mm_add_si64(s1, s2);
01799         C[3] = _mm_cvtsi64_si32(s1);
01800         s1 = _mm_srli_si64(s1, 32);
01801 
01802         s3 = _mm_add_si64(x0, y0);
01803         s2 = _mm_add_si64(w14, w16);
01804         s1 = _mm_add_si64(s1, s3);
01805         s1 = _mm_add_si64(s1, s2);
01806         C[4] = _mm_cvtsi64_si32(s1);
01807         s1 = _mm_srli_si64(s1, 32);
01808 
01809         s3 = _mm_add_si64(x1, y1);
01810         s4 = _mm_add_si64(x4, y4);
01811         s1 = _mm_add_si64(s1, w18);
01812         s3 = _mm_add_si64(s3, s4);
01813         s1 = _mm_add_si64(s1, w20);
01814         s1 = _mm_add_si64(s1, s3);
01815         C[5] = _mm_cvtsi64_si32(s1);
01816         s1 = _mm_srli_si64(s1, 32);
01817 
01818         s3 = _mm_add_si64(x6, y6);
01819         s4 = _mm_add_si64(x8, y8);
01820         s1 = _mm_add_si64(s1, w22);
01821         s3 = _mm_add_si64(s3, s4);
01822         s1 = _mm_add_si64(s1, w26);
01823         s1 = _mm_add_si64(s1, s3);
01824         C[6] = _mm_cvtsi64_si32(s1);
01825         s1 = _mm_srli_si64(s1, 32);
01826 
01827         s3 = _mm_add_si64(x10, y10);
01828         s4 = _mm_add_si64(x12, y12);
01829         s1 = _mm_add_si64(s1, w27);
01830         s3 = _mm_add_si64(s3, s4);
01831         s1 = _mm_add_si64(s1, s3);
01832         C[7] = _mm_cvtsi64_si32(s1);
01833         s1 = _mm_srli_si64(s1, 32);
01834 
01835         s3 = _mm_add_si64(x14, y14);
01836         s4 = _mm_add_si64(x16, y16);
01837         s1 = _mm_add_si64(s1, z0);
01838         s3 = _mm_add_si64(s3, s4);
01839         s1 = _mm_add_si64(s1, s3);
01840         C[8] = _mm_cvtsi64_si32(s1);
01841         s1 = _mm_srli_si64(s1, 32);
01842 
01843         s3 = _mm_add_si64(x18, y18);
01844         s4 = _mm_add_si64(x20, y20);
01845         s1 = _mm_add_si64(s1, z1);
01846         s3 = _mm_add_si64(s3, s4);
01847         s1 = _mm_add_si64(s1, z4);
01848         s1 = _mm_add_si64(s1, s3);
01849         C[9] = _mm_cvtsi64_si32(s1);
01850         s1 = _mm_srli_si64(s1, 32);
01851 
01852         s3 = _mm_add_si64(x22, y22);
01853         s4 = _mm_add_si64(x26, y26);
01854         s1 = _mm_add_si64(s1, z6);
01855         s3 = _mm_add_si64(s3, s4);
01856         s1 = _mm_add_si64(s1, z8);
01857         s1 = _mm_add_si64(s1, s3);
01858         C[10] = _mm_cvtsi64_si32(s1);
01859         s1 = _mm_srli_si64(s1, 32);
01860 
01861         s3 = _mm_add_si64(x27, y27);
01862         s1 = _mm_add_si64(s1, z10);
01863         s1 = _mm_add_si64(s1, z12);
01864         s1 = _mm_add_si64(s1, s3);
01865         C[11] = _mm_cvtsi64_si32(s1);
01866         s1 = _mm_srli_si64(s1, 32);
01867 
01868         s3 = _mm_add_si64(z14, z16);
01869         s1 = _mm_add_si64(s1, s3);
01870         C[12] = _mm_cvtsi64_si32(s1);
01871         s1 = _mm_srli_si64(s1, 32);
01872 
01873         s3 = _mm_add_si64(z18, z20);
01874         s1 = _mm_add_si64(s1, s3);
01875         C[13] = _mm_cvtsi64_si32(s1);
01876         s1 = _mm_srli_si64(s1, 32);
01877 
01878         s3 = _mm_add_si64(z22, z26);
01879         s1 = _mm_add_si64(s1, s3);
01880         C[14] = _mm_cvtsi64_si32(s1);
01881         s1 = _mm_srli_si64(s1, 32);
01882 
01883         C[15] = z[27] + _mm_cvtsi64_si32(s1);
01884         _mm_empty();
01885 }
01886 
01887 void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B)
01888 {
01889         __m128i temp[21];
01890         const word *w = (word *)temp;
01891         const __m64 *mw = (__m64 *)w;
01892         const word *x = (word *)temp+7*4;
01893         const __m64 *mx = (__m64 *)x;
01894         const word *y = (word *)temp+7*4*2;
01895         const __m64 *my = (__m64 *)y;
01896 
01897         P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01898 
01899         P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01900 
01901         P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01902 
01903         C[0] = w[0];
01904 
01905         __m64 s1, s2, s3, s4;
01906 
01907         __m64 w1 = _mm_cvtsi32_si64(w[1]);
01908         __m64 w4 = mw[2];
01909         __m64 w6 = mw[3];
01910         __m64 w8 = mw[4];
01911         __m64 w10 = mw[5];
01912         __m64 w12 = mw[6];
01913         __m64 w14 = mw[7];
01914         __m64 w16 = mw[8];
01915         __m64 w18 = mw[9];
01916         __m64 w20 = mw[10];
01917         __m64 w22 = mw[11];
01918         __m64 w26 = _mm_cvtsi32_si64(w[26]);
01919 
01920         __m64 x0 = _mm_cvtsi32_si64(x[0]);
01921         __m64 x1 = _mm_cvtsi32_si64(x[1]);
01922         __m64 x4 = mx[2];
01923         __m64 x6 = mx[3];
01924         __m64 x8 = mx[4];
01925 
01926         __m64 y0 = _mm_cvtsi32_si64(y[0]);
01927         __m64 y1 = _mm_cvtsi32_si64(y[1]);
01928         __m64 y4 = my[2];
01929         __m64 y6 = my[3];
01930         __m64 y8 = my[4];
01931 
01932         s1 = _mm_add_si64(w1, w4);
01933         C[1] = _mm_cvtsi64_si32(s1);
01934         s1 = _mm_srli_si64(s1, 32);
01935 
01936         s2 = _mm_add_si64(w6, w8);
01937         s1 = _mm_add_si64(s1, s2);
01938         C[2] = _mm_cvtsi64_si32(s1);
01939         s1 = _mm_srli_si64(s1, 32);
01940 
01941         s2 = _mm_add_si64(w10, w12);
01942         s1 = _mm_add_si64(s1, s2);
01943         C[3] = _mm_cvtsi64_si32(s1);
01944         s1 = _mm_srli_si64(s1, 32);
01945 
01946         s3 = _mm_add_si64(x0, y0);
01947         s2 = _mm_add_si64(w14, w16);
01948         s1 = _mm_add_si64(s1, s3);
01949         s1 = _mm_add_si64(s1, s2);
01950         C[4] = _mm_cvtsi64_si32(s1);
01951         s1 = _mm_srli_si64(s1, 32);
01952 
01953         s3 = _mm_add_si64(x1, y1);
01954         s4 = _mm_add_si64(x4, y4);
01955         s1 = _mm_add_si64(s1, w18);
01956         s3 = _mm_add_si64(s3, s4);
01957         s1 = _mm_add_si64(s1, w20);
01958         s1 = _mm_add_si64(s1, s3);
01959         C[5] = _mm_cvtsi64_si32(s1);
01960         s1 = _mm_srli_si64(s1, 32);
01961 
01962         s3 = _mm_add_si64(x6, y6);
01963         s4 = _mm_add_si64(x8, y8);
01964         s1 = _mm_add_si64(s1, w22);
01965         s3 = _mm_add_si64(s3, s4);
01966         s1 = _mm_add_si64(s1, w26);
01967         s1 = _mm_add_si64(s1, s3);
01968         C[6] = _mm_cvtsi64_si32(s1);
01969         s1 = _mm_srli_si64(s1, 32);
01970 
01971         C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
01972         _mm_empty();
01973 }
01974 
01975 #endif  // #ifdef SSE2_INTRINSICS_AVAILABLE
01976 
01977 // ********************************************************
01978 
01979 #define A0              A
01980 #define A1              (A+N2)
01981 #define B0              B
01982 #define B1              (B+N2)
01983 
01984 #define T0              T
01985 #define T1              (T+N2)
01986 #define T2              (T+N)
01987 #define T3              (T+N+N2)
01988 
01989 #define R0              R
01990 #define R1              (R+N2)
01991 #define R2              (R+N)
01992 #define R3              (R+N+N2)
01993 
01994 // R[2*N] - result = A*B
01995 // T[2*N] - temporary work space
01996 // A[N] --- multiplier
01997 // B[N] --- multiplicant
01998 
01999 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, unsigned int N)
02000 {
02001         assert(N>=2 && N%2==0);
02002 
02003         if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8)
02004                 LowLevel::Multiply8(R, A, B);
02005         else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4)
02006                 LowLevel::Multiply4(R, A, B);
02007         else if (N==2)
02008                 LowLevel::Multiply2(R, A, B);
02009         else
02010         {
02011                 const unsigned int N2 = N/2;
02012                 int carry;
02013 
02014                 int aComp = Compare(A0, A1, N2);
02015                 int bComp = Compare(B0, B1, N2);
02016 
02017                 switch (2*aComp + aComp + bComp)
02018                 {
02019                 case -4:
02020                         LowLevel::Subtract(R0, A1, A0, N2);
02021                         LowLevel::Subtract(R1, B0, B1, N2);
02022                         RecursiveMultiply(T0, T2, R0, R1, N2);
02023                         LowLevel::Subtract(T1, T1, R0, N2);
02024                         carry = -1;
02025                         break;
02026                 case -2:
02027                         LowLevel::Subtract(R0, A1, A0, N2);
02028                         LowLevel::Subtract(R1, B0, B1, N2);
02029                         RecursiveMultiply(T0, T2, R0, R1, N2);
02030                         carry = 0;
02031                         break;
02032                 case 2:
02033                         LowLevel::Subtract(R0, A0, A1, N2);
02034                         LowLevel::Subtract(R1, B1, B0, N2);
02035                         RecursiveMultiply(T0, T2, R0, R1, N2);
02036                         carry = 0;
02037                         break;
02038                 case 4:
02039                         LowLevel::Subtract(R0, A1, A0, N2);
02040                         LowLevel::Subtract(R1, B0, B1, N2);
02041                         RecursiveMultiply(T0, T2, R0, R1, N2);
02042                         LowLevel::Subtract(T1, T1, R1, N2);
02043                         carry = -1;
02044                         break;
02045                 default:
02046                         SetWords(T0, 0, N);
02047                         carry = 0;
02048                 }
02049 
02050                 RecursiveMultiply(R0, T2, A0, B0, N2);
02051                 RecursiveMultiply(R2, T2, A1, B1, N2);
02052 
02053                 // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
02054 
02055                 carry += LowLevel::Add(T0, T0, R0, N);
02056                 carry += LowLevel::Add(T0, T0, R2, N);
02057                 carry += LowLevel::Add(R1, R1, T0, N);
02058 
02059                 assert (carry >= 0 && carry <= 2);
02060                 Increment(R3, N2, carry);
02061         }
02062 }
02063 
02064 // R[2*N] - result = A*A
02065 // T[2*N] - temporary work space
02066 // A[N] --- number to be squared
02067 
02068 void RecursiveSquare(word *R, word *T, const word *A, unsigned int N)
02069 {
02070         assert(N && N%2==0);
02071         if (LowLevel::SquareRecursionLimit() >= 8 && N==8)
02072                 LowLevel::Square8(R, A);
02073         if (LowLevel::SquareRecursionLimit() >= 4 && N==4)
02074                 LowLevel::Square4(R, A);
02075         else if (N==2)
02076                 LowLevel::Square2(R, A);
02077         else
02078         {
02079                 const unsigned int N2 = N/2;
02080 
02081                 RecursiveSquare(R0, T2, A0, N2);
02082                 RecursiveSquare(R2, T2, A1, N2);
02083                 RecursiveMultiply(T0, T2, A0, A1, N2);
02084 
02085                 word carry = LowLevel::Add(R1, R1, T0, N);
02086                 carry += LowLevel::Add(R1, R1, T0, N);
02087                 Increment(R3, N2, carry);
02088         }
02089 }
02090 
02091 // R[N] - bottom half of A*B
02092 // T[N] - temporary work space
02093 // A[N] - multiplier
02094 // B[N] - multiplicant
02095 
02096 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N)
02097 {
02098         assert(N>=2 && N%2==0);
02099         if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8)
02100                 LowLevel::Multiply8Bottom(R, A, B);
02101         else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4)
02102                 LowLevel::Multiply4Bottom(R, A, B);
02103         else if (N==2)
02104                 LowLevel::Multiply2Bottom(R, A, B);
02105         else
02106         {
02107                 const unsigned int N2 = N/2;
02108 
02109                 RecursiveMultiply(R, T, A0, B0, N2);
02110                 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02111                 LowLevel::Add(R1, R1, T0, N2);
02112                 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02113                 LowLevel::Add(R1, R1, T0, N2);
02114         }
02115 }
02116 
02117 // R[N] --- upper half of A*B
02118 // T[2*N] - temporary work space
02119 // L[N] --- lower half of A*B
02120 // A[N] --- multiplier
02121 // B[N] --- multiplicant
02122 
02123 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N)
02124 {
02125         assert(N>=2 && N%2==0);
02126 
02127         if (N==4)
02128         {
02129                 LowLevel::Multiply4(T, A, B);
02130                 memcpy(R, T+4, 4*WORD_SIZE);
02131         }
02132         else if (N==2)
02133         {
02134                 LowLevel::Multiply2(T, A, B);
02135                 memcpy(R, T+2, 2*WORD_SIZE);
02136         }
02137         else
02138         {
02139                 const unsigned int N2 = N/2;
02140                 int carry;
02141 
02142                 int aComp = Compare(A0, A1, N2);
02143                 int bComp = Compare(B0, B1, N2);
02144 
02145                 switch (2*aComp + aComp + bComp)
02146                 {
02147                 case -4:
02148                         LowLevel::Subtract(R0, A1, A0, N2);
02149                         LowLevel::Subtract(R1, B0, B1, N2);
02150                         RecursiveMultiply(T0, T2, R0, R1, N2);
02151                         LowLevel::Subtract(T1, T1, R0, N2);
02152                         carry = -1;
02153                         break;
02154                 case -2:
02155                         LowLevel::Subtract(R0, A1, A0, N2);
02156                         LowLevel::Subtract(R1, B0, B1, N2);
02157                         RecursiveMultiply(T0, T2, R0, R1, N2);
02158                         carry = 0;
02159                         break;
02160                 case 2:
02161                         LowLevel::Subtract(R0, A0, A1, N2);
02162                         LowLevel::Subtract(R1, B1, B0, N2);
02163                         RecursiveMultiply(T0, T2, R0, R1, N2);
02164                         carry = 0;
02165                         break;
02166                 case 4:
02167                         LowLevel::Subtract(R0, A1, A0, N2);
02168                         LowLevel::Subtract(R1, B0, B1, N2);
02169                         RecursiveMultiply(T0, T2, R0, R1, N2);
02170                         LowLevel::Subtract(T1, T1, R1, N2);
02171                         carry = -1;
02172                         break;
02173                 default:
02174                         SetWords(T0, 0, N);
02175                         carry = 0;
02176                 }
02177 
02178                 RecursiveMultiply(T2, R0, A1, B1, N2);
02179 
02180                 // now T[01] holds (A1-A0)*(B0-B1), T[23] holds A1*B1
02181 
02182                 word c2 = LowLevel::Subtract(R0, L+N2, L, N2);
02183                 c2 += LowLevel::Subtract(R0, R0, T0, N2);
02184                 word t = (Compare(R0, T2, N2) == -1);
02185 
02186                 carry += t;
02187                 carry += Increment(R0, N2, c2+t);
02188                 carry += LowLevel::Add(R0, R0, T1, N2);
02189                 carry += LowLevel::Add(R0, R0, T3, N2);
02190                 assert (carry >= 0 && carry <= 2);
02191 
02192                 CopyWords(R1, T3, N2);
02193                 Increment(R1, N2, carry);
02194         }
02195 }
02196 
02197 inline word Add(word *C, const word *A, const word *B, unsigned int N)
02198 {
02199         return LowLevel::Add(C, A, B, N);
02200 }
02201 
02202 inline word Subtract(word *C, const word *A, const word *B, unsigned int N)
02203 {
02204         return LowLevel::Subtract(C, A, B, N);
02205 }
02206 
02207 inline void Multiply(word *R, word *T, const word *A, const word *B, unsigned int N)
02208 {
02209         RecursiveMultiply(R, T, A, B, N);
02210 }
02211 
02212 inline void Square(word *R, word *T, const word *A, unsigned int N)
02213 {
02214         RecursiveSquare(R, T, A, N);
02215 }
02216 
02217 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N)
02218 {
02219         RecursiveMultiplyBottom(R, T, A, B, N);
02220 }
02221 
02222 inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N)
02223 {
02224         RecursiveMultiplyTop(R, T, L, A, B, N);
02225 }
02226 
02227 static word LinearMultiply(word *C, const word *A, word B, unsigned int N)
02228 {
02229         word carry=0;
02230         for(unsigned i=0; i<N; i++)
02231         {
02232                 DWord p = DWord::MultiplyAndAdd(A[i], B, carry);
02233                 C[i] = p.GetLowHalf();
02234                 carry = p.GetHighHalf();
02235         }
02236         return carry;
02237 }
02238 
02239 // R[NA+NB] - result = A*B
02240 // T[NA+NB] - temporary work space
02241 // A[NA] ---- multiplier
02242 // B[NB] ---- multiplicant
02243 
02244 void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
02245 {
02246         if (NA == NB)
02247         {
02248                 if (A == B)
02249                         Square(R, T, A, NA);
02250                 else
02251                         Multiply(R, T, A, B, NA);
02252 
02253                 return;
02254         }
02255 
02256         if (NA > NB)
02257         {
02258                 std::swap(A, B);
02259                 std::swap(NA, NB);
02260         }
02261 
02262         assert(NB % NA == 0);
02263         assert((NB/NA)%2 == 0);         // NB is an even multiple of NA
02264 
02265         if (NA==2 && !A[1])
02266         {
02267                 switch (A[0])
02268                 {
02269                 case 0:
02270                         SetWords(R, 0, NB+2);
02271                         return;
02272                 case 1:
02273                         CopyWords(R, B, NB);
02274                         R[NB] = R[NB+1] = 0;
02275                         return;
02276                 default:
02277                         R[NB] = LinearMultiply(R, B, A[0], NB);
02278                         R[NB+1] = 0;
02279                         return;
02280                 }
02281         }
02282 
02283         Multiply(R, T, A, B, NA);
02284         CopyWords(T+2*NA, R+NA, NA);
02285 
02286         unsigned i;
02287 
02288         for (i=2*NA; i<NB; i+=2*NA)
02289                 Multiply(T+NA+i, T, A, B+i, NA);
02290         for (i=NA; i<NB; i+=2*NA)
02291                 Multiply(R+i, T, A, B+i, NA);
02292 
02293         if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02294                 Increment(R+NB, NA);
02295 }
02296 
02297 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
02298 // T[3*N/2] - temporary work space
02299 // A[N] ----- an odd number as input
02300 
02301 void RecursiveInverseModPower2(word *R, word *T, const word *A, unsigned int N)
02302 {
02303         if (N==2)
02304         {
02305                 T[0] = AtomicInverseModPower2(A[0]);
02306                 T[1] = 0;
02307                 LowLevel::Multiply2Bottom(T+2, T, A);
02308                 TwosComplement(T+2, 2);
02309                 Increment(T+2, 2, 2);
02310                 LowLevel::Multiply2Bottom(R, T, T+2);
02311         }
02312         else
02313         {
02314                 const unsigned int N2 = N/2;
02315                 RecursiveInverseModPower2(R0, T0, A0, N2);
02316                 T0[0] = 1;
02317                 SetWords(T0+1, 0, N2-1);
02318                 MultiplyTop(R1, T1, T0, R0, A0, N2);
02319                 MultiplyBottom(T0, T1, R0, A1, N2);
02320                 Add(T0, R1, T0, N2);
02321                 TwosComplement(T0, N2);
02322                 MultiplyBottom(R1, T1, R0, T0, N2);
02323         }
02324 }
02325 
02326 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
02327 // T[3*N] - temporary work space
02328 // X[2*N] - number to be reduced
02329 // M[N] --- modulus
02330 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
02331 
02332 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, unsigned int N)
02333 {
02334         MultiplyBottom(R, T, X, U, N);
02335         MultiplyTop(T, T+N, X, R, M, N);
02336         word borrow = Subtract(T, X+N, T, N);
02337         // defend against timing attack by doing this Add even when not needed
02338         word carry = Add(T+N, T, M, N);
02339         assert(carry || !borrow);
02340         CopyWords(R, T + (borrow ? N : 0), N);
02341 }
02342 
02343 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
02344 // T[2*N] - temporary work space
02345 // X[2*N] - number to be reduced
02346 // M[N] --- modulus
02347 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
02348 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
02349 
02350 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, unsigned int N)
02351 {
02352         assert(N%2==0 && N>=4);
02353 
02354 #define M0              M
02355 #define M1              (M+N2)
02356 #define V0              V
02357 #define V1              (V+N2)
02358 
02359 #define X0              X
02360 #define X1              (X+N2)
02361 #define X2              (X+N)
02362 #define X3              (X+N+N2)
02363 
02364         const unsigned int N2 = N/2;
02365         Multiply(T0, T2, V0, X3, N2);
02366         int c2 = Add(T0, T0, X0, N);
02367         MultiplyBottom(T3, T2, T0, U, N2);
02368         MultiplyTop(T2, R, T0, T3, M0, N2);
02369         c2 -= Subtract(T2, T1, T2, N2);
02370         Multiply(T0, R, T3, M1, N2);
02371         c2 -= Subtract(T0, T2, T0, N2);
02372         int c3 = -(int)Subtract(T1, X2, T1, N2);
02373         Multiply(R0, T2, V1, X3, N2);
02374         c3 += Add(R, R, T, N);
02375 
02376         if (c2>0)
02377                 c3 += Increment(R1, N2);
02378         else if (c2<0)
02379                 c3 -= Decrement(R1, N2, -c2);
02380 
02381         assert(c3>=-1 && c3<=1);
02382         if (c3>0)
02383                 Subtract(R, R, M, N);
02384         else if (c3<0)
02385                 Add(R, R, M, N);
02386 
02387 #undef M0
02388 #undef M1
02389 #undef V0
02390 #undef V1
02391 
02392 #undef X0
02393 #undef X1
02394 #undef X2
02395 #undef X3
02396 }
02397 
02398 #undef A0
02399 #undef A1
02400 #undef B0
02401 #undef B1
02402 
02403 #undef T0
02404 #undef T1
02405 #undef T2
02406 #undef T3
02407 
02408 #undef R0
02409 #undef R1
02410 #undef R2
02411 #undef R3
02412 
02413 /*
02414 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
02415 static word SubatomicDivide(word *A, word B0, word B1)
02416 {
02417         // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
02418         assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
02419 
02420         // estimate the quotient: do a 2 word by 1 word divide
02421         word Q;
02422         if (B1+1 == 0)
02423                 Q = A[2];
02424         else
02425                 Q = DWord(A[1], A[2]).DividedBy(B1+1);
02426 
02427         // now subtract Q*B from A
02428         DWord p = DWord::Multiply(B0, Q);
02429         DWord u = (DWord) A[0] - p.GetLowHalf();
02430         A[0] = u.GetLowHalf();
02431         u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
02432         A[1] = u.GetLowHalf();
02433         A[2] += u.GetHighHalf();
02434 
02435         // Q <= actual quotient, so fix it
02436         while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
02437         {
02438                 u = (DWord) A[0] - B0;
02439                 A[0] = u.GetLowHalf();
02440                 u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
02441                 A[1] = u.GetLowHalf();
02442                 A[2] += u.GetHighHalf();
02443                 Q++;
02444                 assert(Q);      // shouldn't overflow
02445         }
02446 
02447         return Q;
02448 }
02449 
02450 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
02451 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02452 {
02453         if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
02454         {
02455                 Q[0] = A[2];
02456                 Q[1] = A[3];
02457         }
02458         else
02459         {
02460                 word T[4];
02461                 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
02462                 Q[1] = SubatomicDivide(T+1, B[0], B[1]);
02463                 Q[0] = SubatomicDivide(T, B[0], B[1]);
02464 
02465 #ifndef NDEBUG
02466                 // multiply quotient and divisor and add remainder, make sure it equals dividend
02467                 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02468                 word P[4];
02469                 LowLevel::Multiply2(P, Q, B);
02470                 Add(P, P, T, 4);
02471                 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02472 #endif
02473         }
02474 }
02475 */
02476 
02477 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02478 {
02479         word T[4];
02480         DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02481         Q[0] = q.GetLowHalf();
02482         Q[1] = q.GetHighHalf();
02483 
02484 #ifndef NDEBUG
02485         if (B[0] || B[1])
02486         {
02487                 // multiply quotient and divisor and add remainder, make sure it equals dividend
02488                 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02489                 word P[4];
02490                 Portable::Multiply2(P, Q, B);
02491                 Add(P, P, T, 4);
02492                 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02493         }
02494 #endif
02495 }
02496 
02497 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
02498 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, unsigned int N)
02499 {
02500         assert(N && N%2==0);
02501 
02502         if (Q[1])
02503         {
02504                 T[N] = T[N+1] = 0;
02505                 unsigned i;
02506                 for (i=0; i<N; i+=4)
02507                         LowLevel::Multiply2(T+i, Q, B+i);
02508                 for (i=2; i<N; i+=4)
02509                         if (LowLevel::Multiply2Add(T+i, Q, B+i))
02510                                 T[i+5] += (++T[i+4]==0);
02511         }
02512         else
02513         {
02514                 T[N] = LinearMultiply(T, B, Q[0], N);
02515                 T[N+1] = 0;
02516         }
02517 
02518         word borrow = Subtract(R, R, T, N+2);
02519         assert(!borrow && !R[N+1]);
02520 
02521         while (R[N] || Compare(R, B, N) >= 0)
02522         {
02523                 R[N] -= Subtract(R, R, B, N);
02524                 Q[1] += (++Q[0]==0);
02525                 assert(Q[0] || Q[1]); // no overflow
02526         }
02527 }
02528 
02529 // R[NB] -------- remainder = A%B
02530 // Q[NA-NB+2] --- quotient      = A/B
02531 // T[NA+2*NB+4] - temp work space
02532 // A[NA] -------- dividend
02533 // B[NB] -------- divisor
02534 
02535 void Divide(word *R, word *Q, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
02536 {
02537         assert(NA && NB && NA%2==0 && NB%2==0);
02538         assert(B[NB-1] || B[NB-2]);
02539         assert(NB <= NA);
02540 
02541         // set up temporary work space
02542         word *const TA=T;
02543         word *const TB=T+NA+2;
02544         word *const TP=T+NA+2+NB;
02545 
02546         // copy B into TB and normalize it so that TB has highest bit set to 1
02547         unsigned shiftWords = (B[NB-1]==0);
02548         TB[0] = TB[NB-1] = 0;
02549         CopyWords(TB+shiftWords, B, NB-shiftWords);
02550         unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02551         assert(shiftBits < WORD_BITS);
02552         ShiftWordsLeftByBits(TB, NB, shiftBits);
02553 
02554         // copy A into TA and normalize it
02555         TA[0] = TA[NA] = TA[NA+1] = 0;
02556         CopyWords(TA+shiftWords, A, NA);
02557         ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02558 
02559         if (TA[NA+1]==0 && TA[NA] <= 1)
02560         {
02561                 Q[NA-NB+1] = Q[NA-NB] = 0;
02562                 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02563                 {
02564                         TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02565                         ++Q[NA-NB];
02566                 }
02567         }
02568         else
02569         {
02570                 NA+=2;
02571                 assert(Compare(TA+NA-NB, TB, NB) < 0);
02572         }
02573 
02574         word BT[2];
02575         BT[0] = TB[NB-2] + 1;
02576         BT[1] = TB[NB-1] + (BT[0]==0);
02577 
02578         // start reducing TA mod TB, 2 words at a time
02579         for (unsigned i=NA-2; i>=NB; i-=2)
02580         {
02581                 AtomicDivide(Q+i-NB, TA+i-2, BT);
02582                 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02583         }
02584 
02585         // copy TA into R, and denormalize it
02586         CopyWords(R, TA+shiftWords, NB);
02587         ShiftWordsRightByBits(R, NB, shiftBits);
02588 }
02589 
02590 static inline unsigned int EvenWordCount(const word *X, unsigned int N)
02591 {
02592         while (N && X[N-2]==0 && X[N-1]==0)
02593                 N-=2;
02594         return N;
02595 }
02596 
02597 // return k
02598 // R[N] --- result = A^(-1) * 2^k mod M
02599 // T[4*N] - temporary work space
02600 // A[NA] -- number to take inverse of
02601 // M[N] --- modulus
02602 
02603 unsigned int AlmostInverse(word *R, word *T, const word *A, unsigned int NA, const word *M, unsigned int N)
02604 {
02605         assert(NA<=N && N && N%2==0);
02606 
02607         word *b = T;
02608         word *c = T+N;
02609         word *f = T+2*N;
02610         word *g = T+3*N;
02611         unsigned int bcLen=2, fgLen=EvenWordCount(M, N);
02612         unsigned int k=0, s=0;
02613 
02614         SetWords(T, 0, 3*N);
02615         b[0]=1;
02616         CopyWords(f, A, NA);
02617         CopyWords(g, M, N);
02618 
02619         while (1)
02620         {
02621                 word t=f[0];
02622                 while (!t)
02623                 {
02624                         if (EvenWordCount(f, fgLen)==0)
02625                         {
02626                                 SetWords(R, 0, N);
02627                                 return 0;
02628                         }
02629 
02630                         ShiftWordsRightByWords(f, fgLen, 1);
02631                         if (c[bcLen-1]) bcLen+=2;
02632                         assert(bcLen <= N);
02633                         ShiftWordsLeftByWords(c, bcLen, 1);
02634                         k+=WORD_BITS;
02635                         t=f[0];
02636                 }
02637 
02638                 unsigned int i=0;
02639                 while (t%2 == 0)
02640                 {
02641                         t>>=1;
02642                         i++;
02643                 }
02644                 k+=i;
02645 
02646                 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02647                 {
02648                         if (s%2==0)
02649                                 CopyWords(R, b, N);
02650                         else
02651                                 Subtract(R, M, b, N);
02652                         return k;
02653                 }
02654 
02655                 ShiftWordsRightByBits(f, fgLen, i);
02656                 t=ShiftWordsLeftByBits(c, bcLen, i);
02657                 if (t)
02658                 {
02659                         c[bcLen] = t;
02660                         bcLen+=2;
02661                         assert(bcLen <= N);
02662                 }
02663 
02664                 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02665                         fgLen-=2;
02666 
02667                 if (Compare(f, g, fgLen)==-1)
02668                 {
02669                         std::swap(f, g);
02670                         std::swap(b, c);
02671                         s++;
02672                 }
02673 
02674                 Subtract(f, f, g, fgLen);
02675 
02676                 if (Add(b, b, c, bcLen))
02677                 {
02678                         b[bcLen] = 1;
02679                         bcLen+=2;
02680                         assert(bcLen <= N);
02681                 }
02682         }
02683 }
02684 
02685 // R[N] - result = A/(2^k) mod M
02686 // A[N] - input
02687 // M[N] - modulus
02688 
02689 void DivideByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)
02690 {
02691         CopyWords(R, A, N);
02692 
02693         while (k--)
02694         {
02695                 if (R[0]%2==0)
02696                         ShiftWordsRightByBits(R, N, 1);
02697                 else
02698                 {
02699                         word carry = Add(R, R, M, N);
02700                         ShiftWordsRightByBits(R, N, 1);
02701                         R[N-1] += carry<<(WORD_BITS-1);
02702                 }
02703         }
02704 }
02705 
02706 // R[N] - result = A*(2^k) mod M
02707 // A[N] - input
02708 // M[N] - modulus
02709 
02710 void MultiplyByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)
02711 {
02712         CopyWords(R, A, N);
02713 
02714         while (k--)
02715                 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02716                         Subtract(R, R, M, N);
02717 }
02718 
02719 // ******************************************************************
02720 
02721 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02722 
02723 static inline unsigned int RoundupSize(unsigned int n)
02724 {
02725         if (n<=8)
02726                 return RoundupSizeTable[n];
02727         else if (n<=16)
02728                 return 16;
02729         else if (n<=32)
02730                 return 32;
02731         else if (n<=64)
02732                 return 64;
02733         else return 1U << BitPrecision(n-1);
02734 }
02735 
02736 Integer::Integer()
02737         : reg(2), sign(POSITIVE)
02738 {
02739         reg[0] = reg[1] = 0;
02740 }
02741 
02742 Integer::Integer(const Integer& t)
02743         : reg(RoundupSize(t.WordCount())), sign(t.sign)
02744 {
02745         CopyWords(reg, t.reg, reg.size());
02746 }
02747 
02748 Integer::Integer(Sign s, lword value)
02749         : reg(2), sign(s)
02750 {
02751         reg[0] = word(value);
02752         reg[1] = word(SafeRightShift<WORD_BITS>(value));
02753 }
02754 
02755 Integer::Integer(signed long value)
02756         : reg(2)
02757 {
02758         if (value >= 0)
02759                 sign = POSITIVE;
02760         else
02761         {
02762                 sign = NEGATIVE;
02763                 value = -value;
02764         }
02765         reg[0] = word(value);
02766         reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02767 }
02768 
02769 Integer::Integer(Sign s, word high, word low)
02770         : reg(2), sign(s)
02771 {
02772         reg[0] = low;
02773         reg[1] = high;
02774 }
02775 
02776 bool Integer::IsConvertableToLong() const
02777 {
02778         if (ByteCount() > sizeof(long))
02779                 return false;
02780 
02781         unsigned long value = reg[0];
02782         value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02783 
02784         if (sign==POSITIVE)
02785                 return (signed long)value >= 0;
02786         else
02787                 return -(signed long)value < 0;
02788 }
02789 
02790 signed long Integer::ConvertToLong() const
02791 {
02792         assert(IsConvertableToLong());
02793 
02794         unsigned long value = reg[0];
02795         value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02796         return sign==POSITIVE ? value : -(signed long)value;
02797 }
02798 
02799 Integer::Integer(BufferedTransformation &encodedInteger, unsigned int byteCount, Signedness s)
02800 {
02801         Decode(encodedInteger, byteCount, s);
02802 }
02803 
02804 Integer::Integer(const byte *encodedInteger, unsigned int byteCount, Signedness s)
02805 {
02806         Decode(encodedInteger, byteCount, s);
02807 }
02808 
02809 Integer::Integer(BufferedTransformation &bt)
02810 {
02811         BERDecode(bt);
02812 }
02813 
02814 Integer::Integer(RandomNumberGenerator &rng, unsigned int bitcount)
02815 {
02816         Randomize(rng, bitcount);
02817 }
02818 
02819 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02820 {
02821         if (!Randomize(rng, min, max, rnType, equiv, mod))
02822                 throw Integer::RandomNumberNotFound();
02823 }
02824 
02825 Integer Integer::Power2(unsigned int e)
02826 {
02827         Integer r((word)0, BitsToWords(e+1));
02828         r.SetBit(e);
02829         return r;
02830 }
02831 
02832 template <long i>
02833 struct NewInteger
02834 {
02835         Integer * operator()() const
02836         {
02837                 return new Integer(i);
02838         }
02839 };
02840 
02841 const Integer &Integer::Zero()
02842 {
02843         return Singleton<Integer>().Ref();
02844 }
02845 
02846 const Integer &Integer::One()
02847 {
02848         return Singleton<Integer, NewInteger<1> >().Ref();
02849 }
02850 
02851 const Integer &Integer::Two()
02852 {
02853         return Singleton<Integer, NewInteger<2> >().Ref();
02854 }
02855 
02856 bool Integer::operator!() const
02857 {
02858         return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02859 }
02860 
02861 Integer& Integer::operator=(const Integer& t)
02862 {
02863         if (this != &t)
02864         {
02865                 reg.New(RoundupSize(t.WordCount()));
02866                 CopyWords(reg, t.reg, reg.size());
02867                 sign = t.sign;
02868         }
02869         return *this;
02870 }
02871 
02872 bool Integer::GetBit(unsigned int n) const
02873 {
02874         if (n/WORD_BITS >= reg.size())
02875                 return 0;
02876         else
02877                 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02878 }
02879 
02880 void Integer::SetBit(unsigned int n, bool value)
02881 {
02882         if (value)
02883         {
02884                 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02885                 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02886         }
02887         else
02888         {
02889                 if (n/WORD_BITS < reg.size())
02890                         reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02891         }
02892 }
02893 
02894 byte Integer::GetByte(unsigned int n) const
02895 {
02896         if (n/WORD_SIZE >= reg.size())
02897                 return 0;
02898         else
02899                 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02900 }
02901 
02902 void Integer::SetByte(unsigned int n, byte value)
02903 {
02904         reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02905         reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02906         reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02907 }
02908 
02909 unsigned long Integer::GetBits(unsigned int i, unsigned int n) const
02910 {
02911         assert(n <= sizeof(unsigned long)*8);
02912         unsigned long v = 0;
02913         for (unsigned int j=0; j<n; j++)
02914                 v |= GetBit(i+j) << j;
02915         return v;
02916 }
02917 
02918 Integer Integer::operator-() const
02919 {
02920         Integer result(*this);
02921         result.Negate();
02922         return result;
02923 }
02924 
02925 Integer Integer::AbsoluteValue() const
02926 {
02927         Integer result(*this);
02928         result.sign = POSITIVE;
02929         return result;
02930 }
02931 
02932 void Integer::swap(Integer &a)
02933 {
02934         reg.swap(a.reg);
02935         std::swap(sign, a.sign);
02936 }
02937 
02938 Integer::Integer(word value, unsigned int length)
02939         : reg(RoundupSize(length)), sign(POSITIVE)
02940 {
02941         reg[0] = value;
02942         SetWords(reg+1, 0, reg.size()-1);
02943 }
02944 
02945 template <class T>
02946 static Integer StringToInteger(const T *str)
02947 {
02948         word radix;
02949         // GCC workaround
02950         // std::char_traits doesn't exist in GCC 2.x
02951         // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
02952         unsigned int length;
02953         for (length = 0; str[length] != 0; length++) {}
02954 
02955         Integer v;
02956 
02957         if (length == 0)
02958                 return v;
02959 
02960         switch (str[length-1])
02961         {
02962         case 'h':
02963         case 'H':
02964                 radix=16;
02965                 break;
02966         case 'o':
02967         case 'O':
02968                 radix=8;
02969                 break;
02970         case 'b':
02971         case 'B':
02972                 radix=2;
02973                 break;
02974         default:
02975                 radix=10;
02976         }
02977 
02978         if (length > 2 && str[0] == '0' && str[1] == 'x')
02979                 radix = 16;
02980 
02981         for (unsigned i=0; i<length; i++)
02982         {
02983                 word digit;
02984 
02985                 if (str[i] >= '0' && str[i] <= '9')
02986                         digit = str[i] - '0';
02987                 else if (str[i] >= 'A' && str[i] <= 'F')
02988                         digit = str[i] - 'A' + 10;
02989                 else if (str[i] >= 'a' && str[i] <= 'f')
02990                         digit = str[i] - 'a' + 10;
02991                 else
02992                         digit = radix;
02993 
02994                 if (digit < radix)
02995                 {
02996                         v *= radix;
02997                         v += digit;
02998                 }
02999         }
03000 
03001         if (str[0] == '-')
03002                 v.Negate();
03003 
03004         return v;
03005 }
03006 
03007 Integer::Integer(const char *str)
03008         : reg(2), sign(POSITIVE)
03009 {
03010         *this = StringToInteger(str);
03011 }
03012 
03013 Integer::Integer(const wchar_t *str)
03014         : reg(2), sign(POSITIVE)
03015 {
03016         *this = StringToInteger(str);
03017 }
03018 
03019 unsigned int Integer::WordCount() const
03020 {
03021         return CountWords(reg, reg.size());
03022 }
03023 
03024 unsigned int Integer::ByteCount() const
03025 {
03026         unsigned wordCount = WordCount();
03027         if (wordCount)
03028                 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03029         else
03030                 return 0;
03031 }
03032 
03033 unsigned int Integer::BitCount() const
03034 {
03035         unsigned wordCount = WordCount();
03036         if (wordCount)
03037                 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03038         else
03039                 return 0;
03040 }
03041 
03042 void Integer::Decode(const byte *input, unsigned int inputLen, Signedness s)
03043 {
03044         StringStore store(input, inputLen);
03045         Decode(store, inputLen, s);
03046 }
03047 
03048 void Integer::Decode(BufferedTransformation &bt, unsigned int inputLen, Signedness s)
03049 {
03050         assert(bt.MaxRetrievable() >= inputLen);
03051 
03052         byte b;
03053         bt.Peek(b);
03054         sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03055 
03056         while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03057         {
03058                 bt.Skip(1);
03059                 inputLen--;
03060                 bt.Peek(b);
03061         }
03062 
03063         reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03064 
03065         for (unsigned int i=inputLen; i > 0; i--)
03066         {
03067                 bt.Get(b);
03068                 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03069         }
03070 
03071         if (sign == NEGATIVE)
03072         {
03073                 for (unsigned i=inputLen; i<reg.size()*WORD_SIZE; i++)
03074                         reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03075                 TwosComplement(reg, reg.size());
03076         }
03077 }
03078 
03079 unsigned int Integer::MinEncodedSize(Signedness signedness) const
03080 {
03081         unsigned int outputLen = STDMAX(1U, ByteCount());
03082         if (signedness == UNSIGNED)
03083                 return outputLen;
03084         if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03085                 outputLen++;
03086         if (IsNegative() && *this < -Power2(outputLen*8-1))
03087                 outputLen++;
03088         return outputLen;
03089 }
03090 
03091 unsigned int Integer::Encode(byte *output, unsigned int outputLen, Signedness signedness) const
03092 {
03093         ArraySink sink(output, outputLen);
03094         return Encode(sink, outputLen, signedness);
03095 }
03096 
03097 unsigned int Integer::Encode(BufferedTransformation &bt, unsigned int outputLen, Signedness signedness) const
03098 {
03099         if (signedness == UNSIGNED || NotNegative())
03100         {
03101                 for (unsigned int i=outputLen; i > 0; i--)
03102                         bt.Put(GetByte(i-1));
03103         }
03104         else
03105         {
03106                 // take two's complement of *this
03107                 Integer temp = Integer::Power2(8*STDMAX(ByteCount(), outputLen)) + *this;
03108                 for (unsigned i=0; i<outputLen; i++)
03109                         bt.Put(temp.GetByte(outputLen-i-1));
03110         }
03111         return outputLen;
03112 }
03113 
03114 void Integer::DEREncode(BufferedTransformation &bt) const
03115 {
03116         DERGeneralEncoder enc(bt, INTEGER);
03117         Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03118         enc.MessageEnd();
03119 }
03120 
03121 void Integer::BERDecode(const byte *input, unsigned int len)
03122 {
03123         StringStore store(input, len);
03124         BERDecode(store);
03125 }
03126 
03127 void Integer::BERDecode(BufferedTransformation &bt)
03128 {
03129         BERGeneralDecoder dec(bt, INTEGER);
03130         if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03131                 BERDecodeError();
03132         Decode(dec, dec.RemainingLength(), SIGNED);
03133         dec.MessageEnd();
03134 }
03135 
03136 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, unsigned int length) const
03137 {
03138         DERGeneralEncoder enc(bt, OCTET_STRING);
03139         Encode(enc, length);
03140         enc.MessageEnd();
03141 }
03142 
03143 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, unsigned int length)
03144 {
03145         BERGeneralDecoder dec(bt, OCTET_STRING);
03146         if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03147                 BERDecodeError();
03148         Decode(dec, length);
03149         dec.MessageEnd();
03150 }
03151 
03152 unsigned int Integer::OpenPGPEncode(byte *output, unsigned int len) const
03153 {
03154         ArraySink sink(output, len);
03155         return OpenPGPEncode(sink);
03156 }
03157 
03158 unsigned int Integer::OpenPGPEncode(BufferedTransformation &bt) const
03159 {
03160         word16 bitCount = BitCount();
03161         bt.PutWord16(bitCount);
03162         return 2 + Encode(bt, BitsToBytes(bitCount));
03163 }
03164 
03165 void Integer::OpenPGPDecode(const byte *input, unsigned int len)
03166 {
03167         StringStore store(input, len);
03168         OpenPGPDecode(store);
03169 }
03170 
03171 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03172 {
03173         word16 bitCount;
03174         if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03175                 throw OpenPGPDecodeErr();
03176         Decode(bt, BitsToBytes(bitCount));
03177 }
03178 
03179 void Integer::Randomize(RandomNumberGenerator &rng, unsigned int nbits)
03180 {
03181         const unsigned int nbytes = nbits/8 + 1;
03182         SecByteBlock buf(nbytes);
03183         rng.GenerateBlock(buf, nbytes);
03184         if (nbytes)
03185                 buf[0] = (byte)Crop(buf[0], nbits % 8);
03186         Decode(buf, nbytes, UNSIGNED);
03187 }
03188 
03189 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03190 {
03191         if (min > max)
03192                 throw InvalidArgument("Integer: Min must be no greater than Max");
03193 
03194         Integer range = max - min;
03195         const unsigned int nbits = range.BitCount();
03196 
03197         do
03198         {
03199                 Randomize(rng, nbits);
03200         }
03201         while (*this > range);
03202 
03203         *this += min;
03204 }
03205 
03206 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03207 {
03208         return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03209 }
03210 
03211 class KDF2_RNG : public RandomNumberGenerator
03212 {
03213 public:
03214         KDF2_RNG(const byte *seed, unsigned int seedSize)
03215                 : m_counter(0), m_counterAndSeed(seedSize + 4)
03216         {
03217                 memcpy(m_counterAndSeed + 4, seed, seedSize);
03218         }
03219 
03220         byte GenerateByte()
03221         {
03222                 byte b;
03223                 GenerateBlock(&b, 1);
03224                 return b;
03225         }
03226 
03227         void GenerateBlock(byte *output, unsigned int size)
03228         {
03229                 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03230                 ++m_counter;
03231                 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03232         }
03233 
03234 private:
03235         word32 m_counter;
03236         SecByteBlock m_counterAndSeed;
03237 };
03238 
03239 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
03240 {
03241         Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03242         Integer max;
03243         if (!params.GetValue("Max", max))
03244         {
03245                 int bitLength;
03246                 if (params.GetIntValue("BitLength", bitLength))
03247                         max = Integer::Power2(bitLength);
03248                 else
03249                         throw InvalidArgument("Integer: missing Max argument");
03250         }
03251         if (min > max)
03252                 throw InvalidArgument("Integer: Min must be no greater than Max");
03253 
03254         Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03255         Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03256 
03257         if (equiv.IsNegative() || equiv >= mod)
03258                 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03259 
03260         Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03261 
03262         member_ptr<KDF2_RNG> kdf2Rng;
03263         ConstByteArrayParameter seed;
03264         if (params.GetValue("Seed", seed))
03265         {
03266                 ByteQueue bq;
03267                 DERSequenceEncoder seq(bq);
03268                 min.DEREncode(seq);
03269                 max.DEREncode(seq);
03270                 equiv.DEREncode(seq);
03271                 mod.DEREncode(seq);
03272                 DEREncodeUnsigned(seq, rnType);
03273                 DEREncodeOctetString(seq, seed.begin(), seed.size());
03274                 seq.MessageEnd();
03275 
03276                 SecByteBlock finalSeed(bq.MaxRetrievable());
03277                 bq.Get(finalSeed, finalSeed.size());
03278                 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03279         }
03280         RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03281 
03282         switch (rnType)
03283         {
03284                 case ANY:
03285                         if (mod == One())
03286                                 Randomize(rng, min, max);
03287                         else
03288                         {
03289                                 Integer min1 = min + (equiv-min)%mod;
03290                                 if (max < min1)
03291                                         return false;
03292                                 Randomize(rng, Zero(), (max - min1) / mod);
03293                                 *this *= mod;
03294                                 *this += min1;
03295                         }
03296                         return true;
03297 
03298                 case PRIME:
03299                 {
03300                         const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03301 
03302                         int i;
03303                         i = 0;
03304                         while (1)
03305                         {
03306                                 if (++i==16)
03307                                 {
03308                                         // check if there are any suitable primes in [min, max]
03309                                         Integer first = min;
03310                                         if (FirstPrime(first, max, equiv, mod, pSelector))
03311                                         {
03312                                                 // if there is only one suitable prime, we're done
03313                                                 *this = first;
03314                                                 if (!FirstPrime(first, max, equiv, mod, pSelector))
03315                                                         return true;
03316                                         }
03317                                         else
03318                                                 return false;
03319                                 }
03320 
03321                                 Randomize(rng, min, max);
03322                                 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03323                                         return true;
03324                         }
03325                 }
03326 
03327                 default:
03328                         throw InvalidArgument("Integer: invalid RandomNumberType argument");
03329         }
03330 }
03331 
03332 std::istream& operator>>(std::istream& in, Integer &a)
03333 {
03334         char c;
03335         unsigned int length = 0;
03336         SecBlock<char> str(length + 16);
03337 
03338         std::ws(in);
03339 
03340         do
03341         {
03342                 in.read(&c, 1);
03343                 str[length++] = c;
03344                 if (length >= str.size())
03345                         str.Grow(length + 16);
03346         }
03347         while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03348 
03349         if (in.gcount())
03350                 in.putback(c);
03351         str[length-1] = '\0';
03352         a = Integer(str);
03353 
03354         return in;
03355 }
03356 
03357 std::ostream& operator<<(std::ostream& out, const Integer &a)
03358 {
03359         // Get relevant conversion specifications from ostream.
03360         long f = out.flags() & std::ios::basefield; // Get base digits.
03361         int base, block;
03362         char suffix;
03363         switch(f)
03364         {
03365         case std::ios::oct :
03366                 base = 8;
03367                 block = 8;
03368                 suffix = 'o';
03369                 break;
03370         case std::ios::hex :
03371                 base = 16;
03372                 block = 4;
03373                 suffix = 'h';
03374                 break;
03375         default :
03376                 base = 10;
03377                 block = 3;
03378                 suffix = '.';
03379         }
03380 
03381         SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03382         Integer temp1=a, temp2;
03383         unsigned i=0;
03384         const char vec[]="0123456789ABCDEF";
03385 
03386         if (a.IsNegative())
03387         {
03388                 out << '-';
03389                 temp1.Negate();
03390         }
03391 
03392         if (!a)
03393                 out << '0';
03394 
03395         while (!!temp1)
03396         {
03397                 word digit;
03398                 Integer::Divide(digit, temp2, temp1, base);
03399                 s[i++]=vec[digit];
03400                 temp1=temp2;
03401         }
03402 
03403         while (i--)
03404         {
03405                 out << s[i];
03406 //              if (i && !(i%block))
03407 //                      out << ",";
03408         }
03409         return out << suffix;
03410 }
03411 
03412 Integer& Integer::operator++()
03413 {
03414         if (NotNegative())
03415         {
03416                 if (Increment(reg, reg.size()))
03417                 {
03418                         reg.CleanGrow(2*reg.size());
03419                         reg[reg.size()/2]=1;
03420                 }
03421         }
03422         else
03423         {
03424                 word borrow = Decrement(reg, reg.size());
03425                 assert(!borrow);
03426                 if (WordCount()==0)
03427                         *this = Zero();
03428         }
03429         return *this;
03430 }
03431 
03432 Integer& Integer::operator--()
03433 {
03434         if (IsNegative())
03435         {
03436                 if (Increment(reg, reg.size()))
03437                 {
03438                         reg.CleanGrow(2*reg.size());
03439                         reg[reg.size()/2]=1;
03440                 }
03441         }
03442         else
03443         {
03444                 if (Decrement(reg, reg.size()))
03445                         *this = -One();
03446         }
03447         return *this;
03448 }
03449 
03450 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03451 {
03452         word carry;
03453         if (a.reg.size() == b.reg.size())
03454                 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03455         else if (a.reg.size() > b.reg.size())
03456         {
03457                 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03458                 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03459                 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03460         }
03461         else
03462         {
03463                 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03464                 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03465                 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03466         }
03467 
03468         if (carry)
03469         {
03470                 sum.reg.CleanGrow(2*sum.reg.size());
03471                 sum.reg[sum.reg.size()/2] = 1;
03472         }
03473         sum.sign = Integer::POSITIVE;
03474 }
03475 
03476 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03477 {
03478         unsigned aSize = a.WordCount();
03479         aSize += aSize%2;
03480         unsigned bSize = b.WordCount();
03481         bSize += bSize%2;
03482 
03483         if (aSize == bSize)
03484         {
03485                 if (Compare(a.reg, b.reg, aSize) >= 0)
03486                 {
03487                         Subtract(diff.reg, a.reg, b.reg, aSize);
03488                         diff.sign = Integer::POSITIVE;
03489                 }
03490                 else
03491                 {
03492                         Subtract(diff.reg, b.reg, a.reg, aSize);
03493                         diff.sign = Integer::NEGATIVE;
03494                 }
03495         }
03496         else if (aSize > bSize)
03497         {
03498                 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03499                 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03500                 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03501                 assert(!borrow);
03502                 diff.sign = Integer::POSITIVE;
03503         }
03504         else
03505         {
03506                 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03507                 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03508                 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03509                 assert(!borrow);
03510                 diff.sign = Integer::NEGATIVE;
03511         }
03512 }
03513 
03514 Integer Integer::Plus(const Integer& b) const
03515 {
03516         Integer sum((word)0, STDMAX(reg.size(), b.reg.size()));
03517         if (NotNegative())
03518         {
03519                 if (b.NotNegative())
03520                         PositiveAdd(sum, *this, b);
03521                 else
03522                         PositiveSubtract(sum, *this, b);
03523         }
03524         else
03525         {
03526                 if (b.NotNegative())
03527                         PositiveSubtract(sum, b, *this);
03528                 else
03529                 {
03530                         PositiveAdd(sum, *this, b);
03531                         sum.sign = Integer::NEGATIVE;
03532                 }
03533         }
03534         return sum;
03535 }
03536 
03537 Integer& Integer::operator+=(const Integer& t)
03538 {
03539         reg.CleanGrow(t.reg.size());
03540         if (NotNegative())
03541         {
03542                 if (t.NotNegative())
03543                         PositiveAdd(*this, *this, t);
03544                 else
03545                         PositiveSubtract(*this, *this, t);
03546         }
03547         else
03548         {
03549                 if (t.NotNegative())
03550                         PositiveSubtract(*this, t, *this);
03551                 else
03552                 {
03553                         PositiveAdd(*this, *this, t);
03554                         sign = Integer::NEGATIVE;
03555                 }
03556         }
03557         return *this;
03558 }
03559 
03560 Integer Integer::Minus(const Integer& b) const
03561 {
03562         Integer diff((word)0, STDMAX(reg.size(), b.reg.size()));
03563         if (NotNegative())
03564         {
03565                 if (b.NotNegative())
03566                         PositiveSubtract(diff, *this, b);
03567                 else
03568                         PositiveAdd(diff, *this, b);
03569         }
03570         else
03571         {
03572                 if (b.NotNegative())
03573                 {
03574                         PositiveAdd(diff, *this, b);
03575                         diff.sign = Integer::NEGATIVE;
03576                 }
03577                 else
03578                         PositiveSubtract(diff, b, *this);
03579         }
03580         return diff;
03581 }
03582 
03583 Integer& Integer::operator-=(const Integer& t)
03584 {
03585         reg.CleanGrow(t.reg.size());
03586         if (NotNegative())
03587         {
03588                 if (t.NotNegative())
03589                         PositiveSubtract(*this, *this, t);
03590                 else
03591                         PositiveAdd(*this, *this, t);
03592         }
03593         else
03594         {
03595                 if (t.NotNegative())
03596                 {
03597                         PositiveAdd(*this, *this, t);
03598                         sign = Integer::NEGATIVE;
03599                 }
03600                 else
03601                         PositiveSubtract(*this, t, *this);
03602         }
03603         return *this;
03604 }
03605 
03606 Integer& Integer::operator<<=(unsigned int n)
03607 {
03608         const unsigned int wordCount = WordCount();
03609         const unsigned int shiftWords = n / WORD_BITS;
03610         const unsigned int shiftBits = n % WORD_BITS;
03611 
03612         reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03613         ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03614         ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03615         return *this;
03616 }
03617 
03618 Integer& Integer::operator>>=(unsigned int n)
03619 {
03620         const unsigned int wordCount = WordCount();
03621         const unsigned int shiftWords = n / WORD_BITS;
03622         const unsigned int shiftBits = n % WORD_BITS;
03623 
03624         ShiftWordsRightByWords(reg, wordCount, shiftWords);
03625         if (wordCount > shiftWords)
03626                 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03627         if (IsNegative() && WordCount()==0)   // avoid -0
03628                 *this = Zero();
03629         return *this;
03630 }
03631 
03632 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03633 {
03634         unsigned aSize = RoundupSize(a.WordCount());
03635         unsigned bSize = RoundupSize(b.WordCount());
03636 
03637         product.reg.CleanNew(RoundupSize(aSize+bSize));
03638         product.sign = Integer::POSITIVE;
03639 
03640         SecAlignedWordBlock workspace(aSize + bSize);
03641         AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03642 }
03643 
03644 void Multiply(Integer &product, const Integer &a, const Integer &b)
03645 {
03646         PositiveMultiply(product, a, b);
03647 
03648         if (a.NotNegative() != b.NotNegative())
03649                 product.Negate();
03650 }
03651 
03652 Integer Integer::Times(const Integer &b) const
03653 {
03654         Integer product;
03655         Multiply(product, *this, b);
03656         return product;
03657 }
03658 
03659 /*
03660 void PositiveDivide(Integer &remainder, Integer &quotient,
03661                                    const Integer &dividend, const Integer &divisor)
03662 {
03663         remainder.reg.CleanNew(divisor.reg.size());
03664         remainder.sign = Integer::POSITIVE;
03665         quotient.reg.New(0);
03666         quotient.sign = Integer::POSITIVE;
03667         unsigned i=dividend.BitCount();
03668         while (i--)
03669         {
03670                 word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
03671                 remainder.reg[0] |= dividend[i];
03672                 if (overflow || remainder >= divisor)
03673                 {
03674                         Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
03675                         quotient.SetBit(i);
03676                 }
03677         }
03678 }
03679 */
03680 
03681 void PositiveDivide(Integer &remainder, Integer &quotient,
03682                                    const Integer &a, const Integer &b)
03683 {
03684         unsigned aSize = a.WordCount();
03685         unsigned bSize = b.WordCount();
03686 
03687         if (!bSize)
03688                 throw Integer::DivideByZero();
03689 
03690         if (a.PositiveCompare(b) == -1)
03691         {
03692                 remainder = a;
03693                 remainder.sign = Integer::POSITIVE;
03694                 quotient = Integer::Zero();
03695                 return;
03696         }
03697 
03698         aSize += aSize%2;       // round up to next even number
03699         bSize += bSize%2;
03700 
03701         remainder.reg.CleanNew(RoundupSize(bSize));
03702         remainder.sign = Integer::POSITIVE;
03703         quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03704         quotient.sign = Integer::POSITIVE;
03705 
03706         SecAlignedWordBlock T(aSize+2*bSize+4);
03707         Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03708 }
03709 
03710 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
03711 {
03712         PositiveDivide(remainder, quotient, dividend, divisor);
03713 
03714         if (dividend.IsNegative())
03715         {
03716                 quotient.Negate();
03717                 if (remainder.NotZero())
03718                 {
03719                         --quotient;
03720                         remainder = divisor.AbsoluteValue() - remainder;
03721                 }
03722         }
03723 
03724         if (divisor.IsNegative())
03725                 quotient.Negate();
03726 }
03727 
03728 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03729 {
03730         q = a;
03731         q >>= n;
03732 
03733         const unsigned int wordCount = BitsToWords(n);
03734         if (wordCount <= a.WordCount())
03735         {
03736                 r.reg.resize(RoundupSize(wordCount));
03737                 CopyWords(r.reg, a.reg, wordCount);
03738                 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03739                 if (n % WORD_BITS != 0)
03740                         r.reg[wordCount-1] %= (1 << (n % WORD_BITS));
03741         }
03742         else
03743         {
03744                 r.reg.resize(RoundupSize(a.WordCount()));
03745                 CopyWords(r.reg, a.reg, r.reg.size());
03746         }
03747         r.sign = POSITIVE;
03748 
03749         if (a.IsNegative() && r.NotZero())
03750         {
03751                 --q;
03752                 r = Power2(n) - r;
03753         }
03754 }
03755 
03756 Integer Integer::DividedBy(const Integer &b) const
03757 {
03758         Integer remainder, quotient;
03759         Integer::Divide(remainder, quotient, *this, b);
03760         return quotient;
03761 }
03762 
03763 Integer Integer::Modulo(const Integer &b) const
03764 {
03765         Integer remainder, quotient;
03766         Integer::Divide(remainder, quotient, *this, b);
03767         return remainder;
03768 }
03769 
03770 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
03771 {
03772         if (!divisor)
03773                 throw Integer::DivideByZero();
03774 
03775         assert(divisor);
03776 
03777         if ((divisor & (divisor-1)) == 0)       // divisor is a power of 2
03778         {
03779                 quotient = dividend >> (BitPrecision(divisor)-1);
03780                 remainder = dividend.reg[0] & (divisor-1);
03781                 return;
03782         }
03783 
03784         unsigned int i = dividend.WordCount();
03785         quotient.reg.CleanNew(RoundupSize(i));
03786         remainder = 0;
03787         while (i--)
03788         {
03789                 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03790                 remainder = DWord(dividend.reg[i], remainder) % divisor;
03791         }
03792 
03793         if (dividend.NotNegative())
03794                 quotient.sign = POSITIVE;
03795         else
03796         {
03797                 quotient.sign = NEGATIVE;
03798                 if (remainder)
03799                 {
03800                         --quotient;
03801                         remainder = divisor - remainder;
03802                 }
03803         }
03804 }
03805 
03806 Integer Integer::DividedBy(word b) const
03807 {
03808         word remainder;
03809         Integer quotient;
03810         Integer::Divide(remainder, quotient, *this, b);
03811         return quotient;
03812 }
03813 
03814 word Integer::Modulo(word divisor) const
03815 {
03816         if (!divisor)
03817                 throw Integer::DivideByZero();
03818 
03819         assert(divisor);
03820 
03821         word remainder;
03822 
03823         if ((divisor & (divisor-1)) == 0)       // divisor is a power of 2
03824                 remainder = reg[0] & (divisor-1);
03825         else
03826         {
03827                 unsigned int i = WordCount();
03828 
03829                 if (divisor <= 5)
03830                 {
03831                         DWord sum(0, 0);
03832                         while (i--)
03833                                 sum += reg[i];
03834                         remainder = sum % divisor;
03835                 }
03836                 else
03837                 {
03838                         remainder = 0;
03839                         while (i--)
03840                                 remainder = DWord(reg[i], remainder) % divisor;
03841                 }
03842         }
03843 
03844         if (IsNegative() && remainder)
03845                 remainder = divisor - remainder;
03846 
03847         return remainder;
03848 }
03849 
03850 void Integer::Negate()
03851 {
03852         if (!!(*this))  // don't flip sign if *this==0
03853                 sign = Sign(1-sign);
03854 }
03855 
03856 int Integer::PositiveCompare(const Integer& t) const
03857 {
03858         unsigned size = WordCount(), tSize = t.WordCount();
03859 
03860         if (size == tSize)
03861                 return CryptoPP::Compare(reg, t.reg, size);
03862         else
03863                 return size > tSize ? 1 : -1;
03864 }
03865 
03866 int Integer::Compare(const Integer& t) const
03867 {
03868         if (NotNegative())
03869         {
03870                 if (t.NotNegative())
03871                         return PositiveCompare(t);
03872                 else
03873                         return 1;
03874         }
03875         else
03876         {
03877                 if (t.NotNegative())
03878                         return -1;
03879                 else
03880                         return -PositiveCompare(t);
03881         }
03882 }
03883 
03884 Integer Integer::SquareRoot() const
03885 {
03886         if (!IsPositive())
03887                 return Zero();
03888 
03889         // overestimate square root
03890         Integer x, y = Power2((BitCount()+1)/2);
03891         assert(y*y >= *this);
03892 
03893         do
03894         {
03895                 x = y;
03896                 y = (x + *this/x) >> 1;
03897         } while (y<x);
03898 
03899         return x;
03900 }
03901 
03902 bool Integer::IsSquare() const
03903 {
03904         Integer r = SquareRoot();
03905         return *this == r.Squared();
03906 }
03907 
03908 bool Integer::IsUnit() const
03909 {
03910         return (WordCount() == 1) && (reg[0] == 1);
03911 }
03912 
03913 Integer Integer::MultiplicativeInverse() const
03914 {
03915         return IsUnit() ? *this : Zero();
03916 }
03917 
03918 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03919 {
03920         return x*y%m;
03921 }
03922 
03923 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03924 {
03925         ModularArithmetic mr(m);
03926         return mr.Exponentiate(x, e);
03927 }
03928 
03929 Integer Integer::Gcd(const Integer &a, const Integer &b)
03930 {
03931         return EuclideanDomainOf<Integer>().Gcd(a, b);
03932 }
03933 
03934 Integer Integer::InverseMod(const Integer &m) const
03935 {
03936         assert(m.NotNegative());
03937 
03938         if (IsNegative() || *this>=m)
03939                 return (*this%m).InverseMod(m);
03940 
03941         if (m.IsEven())
03942         {
03943                 if (!m || IsEven())
03944                         return Zero();  // no inverse
03945                 if (*this == One())
03946                         return One();
03947 
03948                 Integer u = m.InverseMod(*this);
03949                 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03950         }
03951 
03952         SecBlock<word> T(m.reg.size() * 4);
03953         Integer r((word)0, m.reg.size());
03954         unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03955         DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03956         return r;
03957 }
03958 
03959 word Integer::InverseMod(const word mod) const
03960 {
03961         word g0 = mod, g1 = *this % mod;
03962         word v0 = 0, v1 = 1;
03963         word y;
03964 
03965         while (g1)
03966         {
03967                 if (g1 == 1)
03968                         return v1;
03969                 y = g0 / g1;
03970                 g0 = g0 % g1;
03971                 v0 += y * v1;
03972 
03973                 if (!g0)
03974                         break;
03975                 if (g0 == 1)
03976                         return mod-v0;
03977                 y = g1 / g0;
03978                 g1 = g1 % g0;
03979                 v1 += y * v0;
03980         }
03981         return 0;
03982 }
03983 
03984 // ********************************************************
03985 
03986 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
03987 {
03988         BERSequenceDecoder seq(bt);
03989         OID oid(seq);
03990         if (oid != ASN1::prime_field())
03991                 BERDecodeError();
03992         modulus.BERDecode(seq);
03993         seq.MessageEnd();
03994         result.reg.resize(modulus.reg.size());
03995 }
03996 
03997 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
03998 {
03999         DERSequenceEncoder seq(bt);
04000         ASN1::prime_field().DEREncode(seq);
04001         modulus.DEREncode(seq);
04002         seq.MessageEnd();
04003 }
04004 
04005 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04006 {
04007         a.DEREncodeAsOctetString(out, MaxElementByteLength());
04008 }
04009 
04010 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04011 {
04012         a.BERDecodeAsOctetString(in, MaxElementByteLength());
04013 }
04014 
04015 const Integer& ModularArithmetic::Half(const Integer &a) const
04016 {
04017         if (a.reg.size()==modulus.reg.size())
04018         {
04019                 CryptoPP::DivideByPower2Mod(result.reg.begin(), a.reg, 1, modulus.reg, a.reg.size());
04020                 return result;
04021         }
04022         else
04023                 return result1 = (a.IsEven() ? (a >> 1) : ((a+modulus) >> 1));
04024 }
04025 
04026 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04027 {
04028         if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
04029         {
04030                 if (CryptoPP::Add(result.reg.begin(), a.reg, b.reg, a.reg.size())
04031                         || Compare(result.reg, modulus.reg, a.reg.size()) >= 0)
04032                 {
04033                         CryptoPP::Subtract(result.reg.begin(), result.reg, modulus.reg, a.reg.size());
04034                 }
04035                 return result;
04036         }
04037         else
04038         {
04039                 result1 = a+b;
04040                 if (result1 >= modulus)
04041                         result1 -= modulus;
04042                 return result1;
04043         }
04044 }
04045 
04046 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04047 {
04048         if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
04049         {
04050                 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04051                         || Compare(a.reg, modulus.reg, a.reg.size()) >= 0)
04052                 {
04053                         CryptoPP::Subtract(a.reg, a.reg, modulus.reg, a.reg.size());
04054                 }
04055         }
04056         else
04057         {
04058                 a+=b;
04059                 if (a>=modulus)
04060                         a-=modulus;
04061         }
04062 
04063         return a;
04064 }
04065 
04066 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04067 {
04068         if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
04069         {
04070                 if (CryptoPP::Subtract(result.reg.begin(), a.reg, b.reg, a.reg.size()))
04071                         CryptoPP::Add(result.reg.begin(), result.reg, modulus.reg, a.reg.size());
04072                 return result;
04073         }
04074         else
04075         {
04076                 result1 = a-b;
04077                 if (result1.IsNegative())
04078                         result1 += modulus;
04079                 return result1;
04080         }
04081 }
04082 
04083 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04084 {
04085         if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
04086         {
04087                 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04088                         CryptoPP::Add(a.reg, a.reg, modulus.reg, a.reg.size());
04089         }
04090         else
04091         {
04092                 a-=b;
04093                 if (a.IsNegative())
04094                         a+=modulus;
04095         }
04096 
04097         return a;
04098 }
04099 
04100 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04101 {
04102         if (!a)
04103                 return a;
04104 
04105         CopyWords(result.reg.begin(), modulus.reg, modulus.reg.size());
04106         if (CryptoPP::Subtract(result.reg.begin(), result.reg, a.reg, a.reg.size()))
04107                 Decrement(result.reg.begin()+a.reg.size(), 1, modulus.reg.size()-a.reg.size());
04108 
04109         return result;
04110 }
04111 
04112 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04113 {
04114         if (modulus.IsOdd())
04115         {
04116                 MontgomeryRepresentation dr(modulus);
04117                 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04118         }
04119         else
04120                 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04121 }
04122 
04123 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04124 {
04125         if (modulus.IsOdd())
04126         {
04127                 MontgomeryRepresentation dr(modulus);
04128                 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04129                 for (unsigned int i=0; i<exponentsCount; i++)
04130                         results[i] = dr.ConvertOut(results[i]);
04131         }
04132         else
04133                 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04134 }
04135 
04136 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)    // modulus must be odd
04137         : ModularArithmetic(m),
04138           u((word)0, modulus.reg.size()),
04139           workspace(5*modulus.reg.size())
04140 {
04141         if (!modulus.IsOdd())
04142                 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04143 
04144         RecursiveInverseModPower2(u.reg, workspace, modulus.reg, modulus.reg.size());
04145 }
04146 
04147 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04148 {
04149         word *const T = workspace.begin();
04150         word *const R = result.reg.begin();
04151         const unsigned int N = modulus.reg.size();
04152         assert(a.reg.size()<=N && b.reg.size()<=N);
04153 
04154         AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04155         SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04156         MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
04157         return result;
04158 }
04159 
04160 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04161 {
04162         word *const T = workspace.begin();
04163         word *const R = result.reg.begin();
04164         const unsigned int N = modulus.reg.size();
04165         assert(a.reg.size()<=N);
04166 
04167         CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04168         SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04169         MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
04170         return result;
04171 }
04172 
04173 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04174 {
04175         word *const T = workspace.begin();
04176         word *const R = result.reg.begin();
04177         const unsigned int N = modulus.reg.size();
04178         assert(a.reg.size()<=N);
04179 
04180         CopyWords(T, a.reg, a.reg.size());
04181         SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04182         MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
04183         return result;
04184 }
04185 
04186 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04187 {
04188 //        return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
04189         word *const T = workspace.begin();
04190         word *const R = result.reg.begin();
04191         const unsigned int N = modulus.reg.size();
04192         assert(a.reg.size()<=N);
04193 
04194         CopyWords(T, a.reg, a.reg.size());
04195         SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04196         MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
04197         unsigned k = AlmostInverse(R, T, R, N, modulus.reg, N);
04198 
04199 //      cout << "k=" << k << " N*32=" << 32*N << endl;
04200 
04201         if (k>N*WORD_BITS)
04202                 DivideByPower2Mod(R, R, k-N*WORD_BITS, modulus.reg, N);
04203         else
04204                 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, modulus.reg, N);
04205 
04206         return result;
04207 }
04208 
04209 NAMESPACE_END
04210 
04211 #endif

Generated on Tue Oct 26 18:51:39 2004 for Crypto++ by  doxygen 1.3.9.1