00001
00002
00003
00004
#include "pch.h"
00005
#include "integer.h"
00006
#include "modarith.h"
00007
#include "nbtheory.h"
00008
#include "asn.h"
00009
#include "oids.h"
00010
#include "words.h"
00011
#include "algparam.h"
00012
#include "pubkey.h"
00013
#include "sha.h"
00014
00015
#include <iostream>
00016
00017
#ifdef SSE2_INTRINSICS_AVAILABLE
00018
#include <emmintrin.h>
00019
#endif
00020
00021
#include "algebra.cpp"
00022
#include "eprecomp.cpp"
00023
00024 NAMESPACE_BEGIN(CryptoPP)
00025
00026 bool FunctionAssignIntToInteger(const std::type_info &valueType,
void *pInteger, const
void *pInt)
00027 {
00028
if (valueType !=
typeid(
Integer))
00029
return false;
00030 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00031
return true;
00032 }
00033
00034
static int DummyAssignIntToInteger = (AssignIntToInteger = FunctionAssignIntToInteger, 0);
00035
00036
#ifdef SSE2_INTRINSICS_AVAILABLE
00037
template <
class T>
00038 AllocatorBase<T>::pointer AlignedAllocator<T>::allocate(size_type n,
const void *)
00039 {
00040
if (n < 4)
00041
return new T[n];
00042
else
00043
return (T *)_mm_malloc(
sizeof(T)*n, 16);
00044
00045 }
00046
00047
template <
class T>
00048
void AlignedAllocator<T>::deallocate(
void *p, size_type n)
00049 {
00050 memset(p, 0, n*
sizeof(T));
00051
if (n < 4)
00052
delete [] p;
00053
else
00054 _mm_free(p);
00055 }
00056
00057
template class AlignedAllocator<word>;
00058
#endif
00059
00060
#define MAKE_DWORD(lowWord, highWord) ((dword(highWord)<<WORD_BITS) | (lowWord))
00061
00062
static int Compare(
const word *A,
const word *B,
unsigned int N)
00063 {
00064
while (N--)
00065
if (A[N] > B[N])
00066
return 1;
00067
else if (A[N] < B[N])
00068
return -1;
00069
00070
return 0;
00071 }
00072
00073
static word Increment(word *A,
unsigned int N, word B=1)
00074 {
00075 assert(N);
00076 word t = A[0];
00077 A[0] = t+B;
00078
if (A[0] >= t)
00079
return 0;
00080
for (
unsigned i=1; i<N; i++)
00081
if (++A[i])
00082
return 0;
00083
return 1;
00084 }
00085
00086
static word Decrement(word *A,
unsigned int N, word B=1)
00087 {
00088 assert(N);
00089 word t = A[0];
00090 A[0] = t-B;
00091
if (A[0] <= t)
00092
return 0;
00093
for (
unsigned i=1; i<N; i++)
00094
if (A[i]--)
00095
return 0;
00096
return 1;
00097 }
00098
00099
static void TwosComplement(word *A,
unsigned int N)
00100 {
00101 Decrement(A, N);
00102
for (
unsigned i=0; i<N; i++)
00103 A[i] = ~A[i];
00104 }
00105
00106
static word LinearMultiply(word *C,
const word *A, word B,
unsigned int N)
00107 {
00108 word carry=0;
00109
for(
unsigned i=0; i<N; i++)
00110 {
00111 dword p = (dword)A[i] * B + carry;
00112 C[i] = LOW_WORD(p);
00113 carry = HIGH_WORD(p);
00114 }
00115
return carry;
00116 }
00117
00118
static void AtomicInverseModPower2(word *C, word A0, word A1)
00119 {
00120 assert(A0%2==1);
00121
00122 dword A=MAKE_DWORD(A0, A1), R=A0%8;
00123
00124
for (
unsigned i=3; i<2*WORD_BITS; i*=2)
00125 R = R*(2-R*A);
00126
00127 assert(R*A==1);
00128
00129 C[0] = LOW_WORD(R);
00130 C[1] = HIGH_WORD(R);
00131 }
00132
00133
00134
00135
class Portable
00136 {
00137
public:
00138
static word Add(word *C,
const word *A,
const word *B,
unsigned int N);
00139
static word Subtract(word *C,
const word *A,
const word *B,
unsigned int N);
00140
00141
static inline void Multiply2(word *C,
const word *A,
const word *B);
00142
static inline word Multiply2Add(word *C,
const word *A,
const word *B);
00143
static void Multiply4(word *C,
const word *A,
const word *B);
00144
static void Multiply8(word *C,
const word *A,
const word *B);
00145
static inline unsigned int MultiplyRecursionLimit() {
return 8;}
00146
00147
static inline void Multiply2Bottom(word *C,
const word *A,
const word *B);
00148
static void Multiply4Bottom(word *C,
const word *A,
const word *B);
00149
static void Multiply8Bottom(word *C,
const word *A,
const word *B);
00150
static inline unsigned int MultiplyBottomRecursionLimit() {
return 8;}
00151
00152
static void Square2(word *R,
const word *A);
00153
static void Square4(word *R,
const word *A);
00154
static void Square8(word *R,
const word *A) {assert(
false);}
00155
static inline unsigned int SquareRecursionLimit() {
return 4;}
00156 };
00157
00158 word Portable::Add(word *C,
const word *A,
const word *B,
unsigned int N)
00159 {
00160 assert (N%2 == 0);
00161
00162
#ifdef IS_LITTLE_ENDIAN
00163
if (
sizeof(dword) ==
sizeof(size_t))
00164 {
00165 dword carry = 0;
00166 N >>= 1;
00167
for (
unsigned int i = 0; i < N; i++)
00168 {
00169 dword a = ((
const dword *)A)[i] + carry;
00170 dword c = a + ((
const dword *)B)[i];
00171 ((dword *)C)[i] = c;
00172 carry = (a < carry) | (c < a);
00173 }
00174
return (word)carry;
00175 }
00176
else
00177
#endif
00178
{
00179 word carry = 0;
00180
for (
unsigned int i = 0; i < N; i+=2)
00181 {
00182 dword u = (dword) carry + A[i] + B[i];
00183 C[i] = LOW_WORD(u);
00184 u = (dword) HIGH_WORD(u) + A[i+1] + B[i+1];
00185 C[i+1] = LOW_WORD(u);
00186 carry = HIGH_WORD(u);
00187 }
00188
return carry;
00189 }
00190 }
00191
00192 word Portable::Subtract(word *C,
const word *A,
const word *B,
unsigned int N)
00193 {
00194 assert (N%2 == 0);
00195
00196
#ifdef IS_LITTLE_ENDIAN
00197
if (
sizeof(dword) ==
sizeof(size_t))
00198 {
00199 dword borrow = 0;
00200 N >>= 1;
00201
for (
unsigned int i = 0; i < N; i++)
00202 {
00203 dword a = ((
const dword *)A)[i];
00204 dword b = a - borrow;
00205 dword c = b - ((
const dword *)B)[i];
00206 ((dword *)C)[i] = c;
00207 borrow = (b > a) | (c > b);
00208 }
00209
return (word)borrow;
00210 }
00211
else
00212
#endif
00213
{
00214 word borrow=0;
00215
for (
unsigned i = 0; i < N; i+=2)
00216 {
00217 dword u = (dword) A[i] - B[i] - borrow;
00218 C[i] = LOW_WORD(u);
00219 u = (dword) A[i+1] - B[i+1] - (word)(0-HIGH_WORD(u));
00220 C[i+1] = LOW_WORD(u);
00221 borrow = 0-HIGH_WORD(u);
00222 }
00223
return borrow;
00224 }
00225 }
00226
00227
void Portable::Multiply2(word *C,
const word *A,
const word *B)
00228 {
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00258
unsigned int ai = A[1] < A[0];
00259
unsigned int bi = B[0] < B[1];
00260
unsigned int di = ai & bi;
00261 dword d = (dword)D[di]*D[di+2];
00262 D[1] = D[3] = 0;
00263
unsigned int si = ai + !bi;
00264 word s = D[si];
00265
00266 dword A0B0 = (dword)A[0]*B[0];
00267 C[0] = LOW_WORD(A0B0);
00268
00269 dword A1B1 = (dword)A[1]*B[1];
00270 dword t = (dword) HIGH_WORD(A0B0) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1);
00271 C[1] = LOW_WORD(t);
00272
00273 t = A1B1 + HIGH_WORD(t) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s;
00274 C[2] = LOW_WORD(t);
00275 C[3] = HIGH_WORD(t);
00276 }
00277
00278
inline void Portable::Multiply2Bottom(word *C,
const word *A,
const word *B)
00279 {
00280
#ifdef IS_LITTLE_ENDIAN
00281
if (
sizeof(dword) ==
sizeof(size_t))
00282 {
00283 dword a = *(
const dword *)A, b = *(
const dword *)B;
00284 ((dword *)C)[0] = a*b;
00285 }
00286
else
00287
#endif
00288
{
00289 dword t = (dword)A[0]*B[0];
00290 C[0] = LOW_WORD(t);
00291 C[1] = HIGH_WORD(t) + A[0]*B[1] + A[1]*B[0];
00292 }
00293 }
00294
00295 word Portable::Multiply2Add(word *C,
const word *A,
const word *B)
00296 {
00297 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00298
unsigned int ai = A[1] < A[0];
00299
unsigned int bi = B[0] < B[1];
00300
unsigned int di = ai & bi;
00301 dword d = (dword)D[di]*D[di+2];
00302 D[1] = D[3] = 0;
00303
unsigned int si = ai + !bi;
00304 word s = D[si];
00305
00306 dword A0B0 = (dword)A[0]*B[0];
00307 dword t = A0B0 + C[0];
00308 C[0] = LOW_WORD(t);
00309
00310 dword A1B1 = (dword)A[1]*B[1];
00311 t = (dword) HIGH_WORD(t) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1) + C[1];
00312 C[1] = LOW_WORD(t);
00313
00314 t = (dword) HIGH_WORD(t) + LOW_WORD(A1B1) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s + C[2];
00315 C[2] = LOW_WORD(t);
00316
00317 t = (dword) HIGH_WORD(t) + HIGH_WORD(A1B1) + C[3];
00318 C[3] = LOW_WORD(t);
00319
return HIGH_WORD(t);
00320 }
00321
00322
#define MulAcc(x, y) \
00323
p = (dword)A[x] * B[y] + c; \
00324
c = LOW_WORD(p); \
00325
p = (dword)d + HIGH_WORD(p); \
00326
d = LOW_WORD(p); \
00327
e += HIGH_WORD(p);
00328
00329
#define SaveMulAcc(s, x, y) \
00330
R[s] = c; \
00331
p = (dword)A[x] * B[y] + d; \
00332
c = LOW_WORD(p); \
00333
p = (dword)e + HIGH_WORD(p); \
00334
d = LOW_WORD(p); \
00335
e = HIGH_WORD(p);
00336
00337
#define SquAcc(x, y) \
00338
q = (dword)A[x] * A[y]; \
00339
p = q + c; \
00340
c = LOW_WORD(p); \
00341
p = (dword)d + HIGH_WORD(p); \
00342
d = LOW_WORD(p); \
00343
e += HIGH_WORD(p); \
00344
p = q + c; \
00345
c = LOW_WORD(p); \
00346
p = (dword)d + HIGH_WORD(p); \
00347
d = LOW_WORD(p); \
00348
e += HIGH_WORD(p);
00349
00350
#define SaveSquAcc(s, x, y) \
00351
R[s] = c; \
00352
q = (dword)A[x] * A[y]; \
00353
p = q + d; \
00354
c = LOW_WORD(p); \
00355
p = (dword)e + HIGH_WORD(p); \
00356
d = LOW_WORD(p); \
00357
e = HIGH_WORD(p); \
00358
p = q + c; \
00359
c = LOW_WORD(p); \
00360
p = (dword)d + HIGH_WORD(p); \
00361
d = LOW_WORD(p); \
00362
e += HIGH_WORD(p);
00363
00364
void Portable::Multiply4(word *R,
const word *A,
const word *B)
00365 {
00366 dword p;
00367 word c, d, e;
00368
00369 p = (dword)A[0] * B[0];
00370 R[0] = LOW_WORD(p);
00371 c = HIGH_WORD(p);
00372 d = e = 0;
00373
00374 MulAcc(0, 1);
00375 MulAcc(1, 0);
00376
00377 SaveMulAcc(1, 2, 0);
00378 MulAcc(1, 1);
00379 MulAcc(0, 2);
00380
00381 SaveMulAcc(2, 0, 3);
00382 MulAcc(1, 2);
00383 MulAcc(2, 1);
00384 MulAcc(3, 0);
00385
00386 SaveMulAcc(3, 3, 1);
00387 MulAcc(2, 2);
00388 MulAcc(1, 3);
00389
00390 SaveMulAcc(4, 2, 3);
00391 MulAcc(3, 2);
00392
00393 R[5] = c;
00394 p = (dword)A[3] * B[3] + d;
00395 R[6] = LOW_WORD(p);
00396 R[7] = e + HIGH_WORD(p);
00397 }
00398
00399
void Portable::Square2(word *R,
const word *A)
00400 {
00401 dword p, q;
00402 word c, d, e;
00403
00404 p = (dword)A[0] * A[0];
00405 R[0] = LOW_WORD(p);
00406 c = HIGH_WORD(p);
00407 d = e = 0;
00408
00409 SquAcc(0, 1);
00410
00411 R[1] = c;
00412 p = (dword)A[1] * A[1] + d;
00413 R[2] = LOW_WORD(p);
00414 R[3] = e + HIGH_WORD(p);
00415 }
00416
00417
void Portable::Square4(word *R,
const word *A)
00418 {
00419
const word *B = A;
00420 dword p, q;
00421 word c, d, e;
00422
00423 p = (dword)A[0] * A[0];
00424 R[0] = LOW_WORD(p);
00425 c = HIGH_WORD(p);
00426 d = e = 0;
00427
00428 SquAcc(0, 1);
00429
00430 SaveSquAcc(1, 2, 0);
00431 MulAcc(1, 1);
00432
00433 SaveSquAcc(2, 0, 3);
00434 SquAcc(1, 2);
00435
00436 SaveSquAcc(3, 3, 1);
00437 MulAcc(2, 2);
00438
00439 SaveSquAcc(4, 2, 3);
00440
00441 R[5] = c;
00442 p = (dword)A[3] * A[3] + d;
00443 R[6] = LOW_WORD(p);
00444 R[7] = e + HIGH_WORD(p);
00445 }
00446
00447
void Portable::Multiply8(word *R,
const word *A,
const word *B)
00448 {
00449 dword p;
00450 word c, d, e;
00451
00452 p = (dword)A[0] * B[0];
00453 R[0] = LOW_WORD(p);
00454 c = HIGH_WORD(p);
00455 d = e = 0;
00456
00457 MulAcc(0, 1);
00458 MulAcc(1, 0);
00459
00460 SaveMulAcc(1, 2, 0);
00461 MulAcc(1, 1);
00462 MulAcc(0, 2);
00463
00464 SaveMulAcc(2, 0, 3);
00465 MulAcc(1, 2);
00466 MulAcc(2, 1);
00467 MulAcc(3, 0);
00468
00469 SaveMulAcc(3, 0, 4);
00470 MulAcc(1, 3);
00471 MulAcc(2, 2);
00472 MulAcc(3, 1);
00473 MulAcc(4, 0);
00474
00475 SaveMulAcc(4, 0, 5);
00476 MulAcc(1, 4);
00477 MulAcc(2, 3);
00478 MulAcc(3, 2);
00479 MulAcc(4, 1);
00480 MulAcc(5, 0);
00481
00482 SaveMulAcc(5, 0, 6);
00483 MulAcc(1, 5);
00484 MulAcc(2, 4);
00485 MulAcc(3, 3);
00486 MulAcc(4, 2);
00487 MulAcc(5, 1);
00488 MulAcc(6, 0);
00489
00490 SaveMulAcc(6, 0, 7);
00491 MulAcc(1, 6);
00492 MulAcc(2, 5);
00493 MulAcc(3, 4);
00494 MulAcc(4, 3);
00495 MulAcc(5, 2);
00496 MulAcc(6, 1);
00497 MulAcc(7, 0);
00498
00499 SaveMulAcc(7, 1, 7);
00500 MulAcc(2, 6);
00501 MulAcc(3, 5);
00502 MulAcc(4, 4);
00503 MulAcc(5, 3);
00504 MulAcc(6, 2);
00505 MulAcc(7, 1);
00506
00507 SaveMulAcc(8, 2, 7);
00508 MulAcc(3, 6);
00509 MulAcc(4, 5);
00510 MulAcc(5, 4);
00511 MulAcc(6, 3);
00512 MulAcc(7, 2);
00513
00514 SaveMulAcc(9, 3, 7);
00515 MulAcc(4, 6);
00516 MulAcc(5, 5);
00517 MulAcc(6, 4);
00518 MulAcc(7, 3);
00519
00520 SaveMulAcc(10, 4, 7);
00521 MulAcc(5, 6);
00522 MulAcc(6, 5);
00523 MulAcc(7, 4);
00524
00525 SaveMulAcc(11, 5, 7);
00526 MulAcc(6, 6);
00527 MulAcc(7, 5);
00528
00529 SaveMulAcc(12, 6, 7);
00530 MulAcc(7, 6);
00531
00532 R[13] = c;
00533 p = (dword)A[7] * B[7] + d;
00534 R[14] = LOW_WORD(p);
00535 R[15] = e + HIGH_WORD(p);
00536 }
00537
00538
void Portable::Multiply4Bottom(word *R,
const word *A,
const word *B)
00539 {
00540 dword p;
00541 word c, d, e;
00542
00543 p = (dword)A[0] * B[0];
00544 R[0] = LOW_WORD(p);
00545 c = HIGH_WORD(p);
00546 d = e = 0;
00547
00548 MulAcc(0, 1);
00549 MulAcc(1, 0);
00550
00551 SaveMulAcc(1, 2, 0);
00552 MulAcc(1, 1);
00553 MulAcc(0, 2);
00554
00555 R[2] = c;
00556 R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
00557 }
00558
00559
void Portable::Multiply8Bottom(word *R,
const word *A,
const word *B)
00560 {
00561 dword p;
00562 word c, d, e;
00563
00564 p = (dword)A[0] * B[0];
00565 R[0] = LOW_WORD(p);
00566 c = HIGH_WORD(p);
00567 d = e = 0;
00568
00569 MulAcc(0, 1);
00570 MulAcc(1, 0);
00571
00572 SaveMulAcc(1, 2, 0);
00573 MulAcc(1, 1);
00574 MulAcc(0, 2);
00575
00576 SaveMulAcc(2, 0, 3);
00577 MulAcc(1, 2);
00578 MulAcc(2, 1);
00579 MulAcc(3, 0);
00580
00581 SaveMulAcc(3, 0, 4);
00582 MulAcc(1, 3);
00583 MulAcc(2, 2);
00584 MulAcc(3, 1);
00585 MulAcc(4, 0);
00586
00587 SaveMulAcc(4, 0, 5);
00588 MulAcc(1, 4);
00589 MulAcc(2, 3);
00590 MulAcc(3, 2);
00591 MulAcc(4, 1);
00592 MulAcc(5, 0);
00593
00594 SaveMulAcc(5, 0, 6);
00595 MulAcc(1, 5);
00596 MulAcc(2, 4);
00597 MulAcc(3, 3);
00598 MulAcc(4, 2);
00599 MulAcc(5, 1);
00600 MulAcc(6, 0);
00601
00602 R[6] = c;
00603 R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
00604 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
00605 }
00606
00607
#undef MulAcc
00608
#undef SaveMulAcc
00609
#undef SquAcc
00610
#undef SaveSquAcc
00611
00612
00613
#if defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86) && (_M_IX86<=700)
00614
00615
class PentiumOptimized :
public Portable
00616 {
00617
public:
00618
static word __fastcall Add(word *C,
const word *A,
const word *B,
unsigned int N);
00619
static word __fastcall Subtract(word *C,
const word *A,
const word *B,
unsigned int N);
00620
static inline void Square4(word *R,
const word *A)
00621 {
00622
00623
00624
00625
00626 Multiply4(R, A, A);
00627 }
00628 };
00629
00630
typedef PentiumOptimized LowLevel;
00631
00632 __declspec(naked) word __fastcall PentiumOptimized::Add(word *C, const word *A, const word *B,
unsigned int N)
00633 {
00634 __asm
00635 {
00636 push ebp
00637 push ebx
00638 push esi
00639 push edi
00640
00641 mov esi, [esp+24] ; N
00642 mov ebx, [esp+20] ; B
00643
00644
00645
00646 sub ecx, edx
00647 xor eax, eax
00648
00649 sub eax, esi
00650 lea ebx, [ebx+4*esi]
00651
00652 sar eax, 1
00653 jz loopend
00654
00655 loopstart:
00656 mov esi,[edx]
00657 mov ebp,[edx+4]
00658
00659 mov edi,[ebx+8*eax]
00660 lea edx,[edx+8]
00661
00662 adc esi,edi
00663 mov edi,[ebx+8*eax+4]
00664
00665 adc ebp,edi
00666 inc eax
00667
00668 mov [edx+ecx-8],esi
00669 mov [edx+ecx-4],ebp
00670
00671 jnz loopstart
00672
00673 loopend:
00674 adc eax, 0
00675 pop edi
00676 pop esi
00677 pop ebx
00678 pop ebp
00679 ret 8
00680 }
00681 }
00682
00683 __declspec(naked) word __fastcall PentiumOptimized::Subtract(word *C, const word *A, const word *B,
unsigned int N)
00684 {
00685 __asm
00686 {
00687 push ebp
00688 push ebx
00689 push esi
00690 push edi
00691
00692 mov esi, [esp+24] ; N
00693 mov ebx, [esp+20] ; B
00694
00695 sub ecx, edx
00696 xor eax, eax
00697
00698 sub eax, esi
00699 lea ebx, [ebx+4*esi]
00700
00701 sar eax, 1
00702 jz loopend
00703
00704 loopstart:
00705 mov esi,[edx]
00706 mov ebp,[edx+4]
00707
00708 mov edi,[ebx+8*eax]
00709 lea edx,[edx+8]
00710
00711 sbb esi,edi
00712 mov edi,[ebx+8*eax+4]
00713
00714 sbb ebp,edi
00715 inc eax
00716
00717 mov [edx+ecx-8],esi
00718 mov [edx+ecx-4],ebp
00719
00720 jnz loopstart
00721
00722 loopend:
00723 adc eax, 0
00724 pop edi
00725 pop esi
00726 pop ebx
00727 pop ebp
00728 ret 8
00729 }
00730 }
00731
00732
#ifdef SSE2_INTRINSICS_AVAILABLE
00733
00734
static bool GetSSE2Capability()
00735 {
00736 word32 b;
00737
00738 __asm
00739 {
00740 mov eax, 1
00741 cpuid
00742 mov b, edx
00743 }
00744
00745
return (b & (1 << 26)) != 0;
00746 }
00747
00748
bool g_sse2DetectionDone =
false, g_sse2Detected, g_sse2Enabled =
true;
00749
00750
static inline bool HasSSE2()
00751 {
00752
if (g_sse2Enabled && !g_sse2DetectionDone)
00753 {
00754 g_sse2Detected = GetSSE2Capability();
00755 g_sse2DetectionDone =
true;
00756 }
00757
return g_sse2Enabled && g_sse2Detected;
00758 }
00759
00760
class P4Optimized :
public PentiumOptimized
00761 {
00762
public:
00763
static word __fastcall Add(word *C,
const word *A,
const word *B,
unsigned int N);
00764
static word __fastcall Subtract(word *C,
const word *A,
const word *B,
unsigned int N);
00765
static void Multiply4(word *C,
const word *A,
const word *B);
00766
static void Multiply8(word *C,
const word *A,
const word *B);
00767
static inline void Square4(word *R,
const word *A)
00768 {
00769 Multiply4(R, A, A);
00770 }
00771
static void Multiply8Bottom(word *C,
const word *A,
const word *B);
00772 };
00773
00774
static void __fastcall P4_Mul(__m128i *C,
const __m128i *A,
const __m128i *B)
00775 {
00776 __m128i a3210 = _mm_load_si128(A);
00777 __m128i b3210 = _mm_load_si128(B);
00778
00779 __m128i sum;
00780
00781 __m128i z = _mm_setzero_si128();
00782 __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
00783 C[0] = a2b2_a0b0;
00784
00785 __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
00786 __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
00787 __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
00788 __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
00789 __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
00790 C[1] = _mm_add_epi64(a1b0, a0b1);
00791
00792 __m128i a31 = _mm_srli_epi64(a3210, 32);
00793 __m128i b31 = _mm_srli_epi64(b3210, 32);
00794 __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
00795 C[6] = a3b3_a1b1;
00796
00797 __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
00798 __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
00799 __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
00800 __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
00801 __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
00802 sum = _mm_add_epi64(a1b1, a0b2);
00803 C[2] = _mm_add_epi64(sum, a2b0);
00804
00805 __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
00806 __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
00807 __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
00808 __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
00809 __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
00810 __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
00811 __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
00812 __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
00813 __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
00814 sum = _mm_add_epi64(a2b1, a0b3);
00815 C[3] = _mm_add_epi64(sum, sum1);
00816
00817 __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
00818 __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
00819 __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
00820 __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
00821 sum = _mm_add_epi64(a2b2, a3b1);
00822 C[4] = _mm_add_epi64(sum, a1b3);
00823
00824 __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
00825 __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
00826 __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
00827 __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
00828 __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
00829 C[5] = _mm_add_epi64(a3b2, a2b3);
00830 }
00831
00832
void P4Optimized::Multiply4(word *C,
const word *A,
const word *B)
00833 {
00834 __m128i temp[7];
00835
const word *w = (word *)temp;
00836
const __m64 *mw = (__m64 *)w;
00837
00838 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
00839
00840 C[0] = w[0];
00841
00842 __m64 s1, s2;
00843
00844 __m64 w1 = _m_from_int(w[1]);
00845 __m64 w4 = mw[2];
00846 __m64 w6 = mw[3];
00847 __m64 w8 = mw[4];
00848 __m64 w10 = mw[5];
00849 __m64 w12 = mw[6];
00850 __m64 w14 = mw[7];
00851 __m64 w16 = mw[8];
00852 __m64 w18 = mw[9];
00853 __m64 w20 = mw[10];
00854 __m64 w22 = mw[11];
00855 __m64 w26 = _m_from_int(w[26]);
00856
00857 s1 = _mm_add_si64(w1, w4);
00858 C[1] = _m_to_int(s1);
00859 s1 = _m_psrlqi(s1, 32);
00860
00861 s2 = _mm_add_si64(w6, w8);
00862 s1 = _mm_add_si64(s1, s2);
00863 C[2] = _m_to_int(s1);
00864 s1 = _m_psrlqi(s1, 32);
00865
00866 s2 = _mm_add_si64(w10, w12);
00867 s1 = _mm_add_si64(s1, s2);
00868 C[3] = _m_to_int(s1);
00869 s1 = _m_psrlqi(s1, 32);
00870
00871 s2 = _mm_add_si64(w14, w16);
00872 s1 = _mm_add_si64(s1, s2);
00873 C[4] = _m_to_int(s1);
00874 s1 = _m_psrlqi(s1, 32);
00875
00876 s2 = _mm_add_si64(w18, w20);
00877 s1 = _mm_add_si64(s1, s2);
00878 C[5] = _m_to_int(s1);
00879 s1 = _m_psrlqi(s1, 32);
00880
00881 s2 = _mm_add_si64(w22, w26);
00882 s1 = _mm_add_si64(s1, s2);
00883 C[6] = _m_to_int(s1);
00884 s1 = _m_psrlqi(s1, 32);
00885
00886 C[7] = _m_to_int(s1) + w[27];
00887 _mm_empty();
00888 }
00889
00890
void P4Optimized::Multiply8(word *C,
const word *A,
const word *B)
00891 {
00892 __m128i temp[28];
00893
const word *w = (word *)temp;
00894
const __m64 *mw = (__m64 *)w;
00895
const word *x = (word *)temp+7*4;
00896
const __m64 *mx = (__m64 *)x;
00897
const word *y = (word *)temp+7*4*2;
00898
const __m64 *my = (__m64 *)y;
00899
const word *z = (word *)temp+7*4*3;
00900
const __m64 *mz = (__m64 *)z;
00901
00902 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
00903
00904 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
00905
00906 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
00907
00908 P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
00909
00910 C[0] = w[0];
00911
00912 __m64 s1, s2, s3, s4;
00913
00914 __m64 w1 = _m_from_int(w[1]);
00915 __m64 w4 = mw[2];
00916 __m64 w6 = mw[3];
00917 __m64 w8 = mw[4];
00918 __m64 w10 = mw[5];
00919 __m64 w12 = mw[6];
00920 __m64 w14 = mw[7];
00921 __m64 w16 = mw[8];
00922 __m64 w18 = mw[9];
00923 __m64 w20 = mw[10];
00924 __m64 w22 = mw[11];
00925 __m64 w26 = _m_from_int(w[26]);
00926 __m64 w27 = _m_from_int(w[27]);
00927
00928 __m64 x0 = _m_from_int(x[0]);
00929 __m64 x1 = _m_from_int(x[1]);
00930 __m64 x4 = mx[2];
00931 __m64 x6 = mx[3];
00932 __m64 x8 = mx[4];
00933 __m64 x10 = mx[5];
00934 __m64 x12 = mx[6];
00935 __m64 x14 = mx[7];
00936 __m64 x16 = mx[8];
00937 __m64 x18 = mx[9];
00938 __m64 x20 = mx[10];
00939 __m64 x22 = mx[11];
00940 __m64 x26 = _m_from_int(x[26]);
00941 __m64 x27 = _m_from_int(x[27]);
00942
00943 __m64 y0 = _m_from_int(y[0]);
00944 __m64 y1 = _m_from_int(y[1]);
00945 __m64 y4 = my[2];
00946 __m64 y6 = my[3];
00947 __m64 y8 = my[4];
00948 __m64 y10 = my[5];
00949 __m64 y12 = my[6];
00950 __m64 y14 = my[7];
00951 __m64 y16 = my[8];
00952 __m64 y18 = my[9];
00953 __m64 y20 = my[10];
00954 __m64 y22 = my[11];
00955 __m64 y26 = _m_from_int(y[26]);
00956 __m64 y27 = _m_from_int(y[27]);
00957
00958 __m64 z0 = _m_from_int(z[0]);
00959 __m64 z1 = _m_from_int(z[1]);
00960 __m64 z4 = mz[2];
00961 __m64 z6 = mz[3];
00962 __m64 z8 = mz[4];
00963 __m64 z10 = mz[5];
00964 __m64 z12 = mz[6];
00965 __m64 z14 = mz[7];
00966 __m64 z16 = mz[8];
00967 __m64 z18 = mz[9];
00968 __m64 z20 = mz[10];
00969 __m64 z22 = mz[11];
00970 __m64 z26 = _m_from_int(z[26]);
00971
00972 s1 = _mm_add_si64(w1, w4);
00973 C[1] = _m_to_int(s1);
00974 s1 = _m_psrlqi(s1, 32);
00975
00976 s2 = _mm_add_si64(w6, w8);
00977 s1 = _mm_add_si64(s1, s2);
00978 C[2] = _m_to_int(s1);
00979 s1 = _m_psrlqi(s1, 32);
00980
00981 s2 = _mm_add_si64(w10, w12);
00982 s1 = _mm_add_si64(s1, s2);
00983 C[3] = _m_to_int(s1);
00984 s1 = _m_psrlqi(s1, 32);
00985
00986 s3 = _mm_add_si64(x0, y0);
00987 s2 = _mm_add_si64(w14, w16);
00988 s1 = _mm_add_si64(s1, s3);
00989 s1 = _mm_add_si64(s1, s2);
00990 C[4] = _m_to_int(s1);
00991 s1 = _m_psrlqi(s1, 32);
00992
00993 s3 = _mm_add_si64(x1, y1);
00994 s4 = _mm_add_si64(x4, y4);
00995 s1 = _mm_add_si64(s1, w18);
00996 s3 = _mm_add_si64(s3, s4);
00997 s1 = _mm_add_si64(s1, w20);
00998 s1 = _mm_add_si64(s1, s3);
00999 C[5] = _m_to_int(s1);
01000 s1 = _m_psrlqi(s1, 32);
01001
01002 s3 = _mm_add_si64(x6, y6);
01003 s4 = _mm_add_si64(x8, y8);
01004 s1 = _mm_add_si64(s1, w22);
01005 s3 = _mm_add_si64(s3, s4);
01006 s1 = _mm_add_si64(s1, w26);
01007 s1 = _mm_add_si64(s1, s3);
01008 C[6] = _m_to_int(s1);
01009 s1 = _m_psrlqi(s1, 32);
01010
01011 s3 = _mm_add_si64(x10, y10);
01012 s4 = _mm_add_si64(x12, y12);
01013 s1 = _mm_add_si64(s1, w27);
01014 s3 = _mm_add_si64(s3, s4);
01015 s1 = _mm_add_si64(s1, s3);
01016 C[7] = _m_to_int(s1);
01017 s1 = _m_psrlqi(s1, 32);
01018
01019 s3 = _mm_add_si64(x14, y14);
01020 s4 = _mm_add_si64(x16, y16);
01021 s1 = _mm_add_si64(s1, z0);
01022 s3 = _mm_add_si64(s3, s4);
01023 s1 = _mm_add_si64(s1, s3);
01024 C[8] = _m_to_int(s1);
01025 s1 = _m_psrlqi(s1, 32);
01026
01027 s3 = _mm_add_si64(x18, y18);
01028 s4 = _mm_add_si64(x20, y20);
01029 s1 = _mm_add_si64(s1, z1);
01030 s3 = _mm_add_si64(s3, s4);
01031 s1 = _mm_add_si64(s1, z4);
01032 s1 = _mm_add_si64(s1, s3);
01033 C[9] = _m_to_int(s1);
01034 s1 = _m_psrlqi(s1, 32);
01035
01036 s3 = _mm_add_si64(x22, y22);
01037 s4 = _mm_add_si64(x26, y26);
01038 s1 = _mm_add_si64(s1, z6);
01039 s3 = _mm_add_si64(s3, s4);
01040 s1 = _mm_add_si64(s1, z8);
01041 s1 = _mm_add_si64(s1, s3);
01042 C[10] = _m_to_int(s1);
01043 s1 = _m_psrlqi(s1, 32);
01044
01045 s3 = _mm_add_si64(x27, y27);
01046 s1 = _mm_add_si64(s1, z10);
01047 s1 = _mm_add_si64(s1, z12);
01048 s1 = _mm_add_si64(s1, s3);
01049 C[11] = _m_to_int(s1);
01050 s1 = _m_psrlqi(s1, 32);
01051
01052 s3 = _mm_add_si64(z14, z16);
01053 s1 = _mm_add_si64(s1, s3);
01054 C[12] = _m_to_int(s1);
01055 s1 = _m_psrlqi(s1, 32);
01056
01057 s3 = _mm_add_si64(z18, z20);
01058 s1 = _mm_add_si64(s1, s3);
01059 C[13] = _m_to_int(s1);
01060 s1 = _m_psrlqi(s1, 32);
01061
01062 s3 = _mm_add_si64(z22, z26);
01063 s1 = _mm_add_si64(s1, s3);
01064 C[14] = _m_to_int(s1);
01065 s1 = _m_psrlqi(s1, 32);
01066
01067 C[15] = z[27] + _m_to_int(s1);
01068 _mm_empty();
01069 }
01070
01071
void P4Optimized::Multiply8Bottom(word *C,
const word *A,
const word *B)
01072 {
01073 __m128i temp[21];
01074
const word *w = (word *)temp;
01075
const __m64 *mw = (__m64 *)w;
01076
const word *x = (word *)temp+7*4;
01077
const __m64 *mx = (__m64 *)x;
01078
const word *y = (word *)temp+7*4*2;
01079
const __m64 *my = (__m64 *)y;
01080
01081 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01082
01083 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01084
01085 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01086
01087 C[0] = w[0];
01088
01089 __m64 s1, s2, s3, s4;
01090
01091 __m64 w1 = _m_from_int(w[1]);
01092 __m64 w4 = mw[2];
01093 __m64 w6 = mw[3];
01094 __m64 w8 = mw[4];
01095 __m64 w10 = mw[5];
01096 __m64 w12 = mw[6];
01097 __m64 w14 = mw[7];
01098 __m64 w16 = mw[8];
01099 __m64 w18 = mw[9];
01100 __m64 w20 = mw[10];
01101 __m64 w22 = mw[11];
01102 __m64 w26 = _m_from_int(w[26]);
01103
01104 __m64 x0 = _m_from_int(x[0]);
01105 __m64 x1 = _m_from_int(x[1]);
01106 __m64 x4 = mx[2];
01107 __m64 x6 = mx[3];
01108 __m64 x8 = mx[4];
01109
01110 __m64 y0 = _m_from_int(y[0]);
01111 __m64 y1 = _m_from_int(y[1]);
01112 __m64 y4 = my[2];
01113 __m64 y6 = my[3];
01114 __m64 y8 = my[4];
01115
01116 s1 = _mm_add_si64(w1, w4);
01117 C[1] = _m_to_int(s1);
01118 s1 = _m_psrlqi(s1, 32);
01119
01120 s2 = _mm_add_si64(w6, w8);
01121 s1 = _mm_add_si64(s1, s2);
01122 C[2] = _m_to_int(s1);
01123 s1 = _m_psrlqi(s1, 32);
01124
01125 s2 = _mm_add_si64(w10, w12);
01126 s1 = _mm_add_si64(s1, s2);
01127 C[3] = _m_to_int(s1);
01128 s1 = _m_psrlqi(s1, 32);
01129
01130 s3 = _mm_add_si64(x0, y0);
01131 s2 = _mm_add_si64(w14, w16);
01132 s1 = _mm_add_si64(s1, s3);
01133 s1 = _mm_add_si64(s1, s2);
01134 C[4] = _m_to_int(s1);
01135 s1 = _m_psrlqi(s1, 32);
01136
01137 s3 = _mm_add_si64(x1, y1);
01138 s4 = _mm_add_si64(x4, y4);
01139 s1 = _mm_add_si64(s1, w18);
01140 s3 = _mm_add_si64(s3, s4);
01141 s1 = _mm_add_si64(s1, w20);
01142 s1 = _mm_add_si64(s1, s3);
01143 C[5] = _m_to_int(s1);
01144 s1 = _m_psrlqi(s1, 32);
01145
01146 s3 = _mm_add_si64(x6, y6);
01147 s4 = _mm_add_si64(x8, y8);
01148 s1 = _mm_add_si64(s1, w22);
01149 s3 = _mm_add_si64(s3, s4);
01150 s1 = _mm_add_si64(s1, w26);
01151 s1 = _mm_add_si64(s1, s3);
01152 C[6] = _m_to_int(s1);
01153 s1 = _m_psrlqi(s1, 32);
01154
01155 C[7] = _m_to_int(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
01156 _mm_empty();
01157 }
01158
01159 __declspec(naked) word __fastcall P4Optimized::Add(word *C, const word *A, const word *B,
unsigned int N)
01160 {
01161 __asm
01162 {
01163 sub esp, 16
01164 xor eax, eax
01165 mov [esp], edi
01166 mov [esp+4], esi
01167 mov [esp+8], ebx
01168 mov [esp+12], ebp
01169
01170 mov ebx, [esp+20]
01171 mov esi, [esp+24]
01172
01173
01174
01175 neg esi
01176 jz loopend
01177
01178 mov edi, [edx]
01179 mov ebp, [ebx]
01180
01181 loopstart:
01182 add edi, eax
01183 jc carry1
01184
01185 xor eax, eax
01186
01187 carry1continue:
01188 add edi, ebp
01189 mov ebp, 1
01190 mov [ecx], edi
01191 mov edi, [edx+4]
01192 cmovc eax, ebp
01193 mov ebp, [ebx+4]
01194 lea ebx, [ebx+8]
01195 add edi, eax
01196 jc carry2
01197
01198 xor eax, eax
01199
01200 carry2continue:
01201 add edi, ebp
01202 mov ebp, 1
01203 cmovc eax, ebp
01204 mov [ecx+4], edi
01205 add ecx, 8
01206 mov edi, [edx+8]
01207 add edx, 8
01208 add esi, 2
01209 mov ebp, [ebx]
01210 jnz loopstart
01211
01212 loopend:
01213 mov edi, [esp]
01214 mov esi, [esp+4]
01215 mov ebx, [esp+8]
01216 mov ebp, [esp+12]
01217 add esp, 16
01218 ret 8
01219
01220 carry1:
01221 mov eax, 1
01222 jmp carry1continue
01223
01224 carry2:
01225 mov eax, 1
01226 jmp carry2continue
01227 }
01228 }
01229
01230 __declspec(naked) word __fastcall P4Optimized::Subtract(word *C, const word *A, const word *B,
unsigned int N)
01231 {
01232 __asm
01233 {
01234 sub esp, 16
01235 xor eax, eax
01236 mov [esp], edi
01237 mov [esp+4], esi
01238 mov [esp+8], ebx
01239 mov [esp+12], ebp
01240
01241 mov ebx, [esp+20]
01242 mov esi, [esp+24]
01243
01244
01245
01246 neg esi
01247 jz loopend
01248
01249 mov edi, [edx]
01250 mov ebp, [ebx]
01251
01252 loopstart:
01253 sub edi, eax
01254 jc carry1
01255
01256 xor eax, eax
01257
01258 carry1continue:
01259 sub edi, ebp
01260 mov ebp, 1
01261 mov [ecx], edi
01262 mov edi, [edx+4]
01263 cmovc eax, ebp
01264 mov ebp, [ebx+4]
01265 lea ebx, [ebx+8]
01266 sub edi, eax
01267 jc carry2
01268
01269 xor eax, eax
01270
01271 carry2continue:
01272 sub edi, ebp
01273 mov ebp, 1
01274 cmovc eax, ebp
01275 mov [ecx+4], edi
01276 add ecx, 8
01277 mov edi, [edx+8]
01278 add edx, 8
01279 add esi, 2
01280 mov ebp, [ebx]
01281 jnz loopstart
01282
01283 loopend:
01284 mov edi, [esp]
01285 mov esi, [esp+4]
01286 mov ebx, [esp+8]
01287 mov ebp, [esp+12]
01288 add esp, 16
01289 ret 8
01290
01291 carry1:
01292 mov eax, 1
01293 jmp carry1continue
01294
01295 carry2:
01296 mov eax, 1
01297 jmp carry2continue
01298 }
01299 }
01300
01301
#endif // #ifdef SSE2_INTRINSICS_AVAILABLE
01302
01303
#elif defined(__GNUC__) && defined(__i386__)
01304
01305
class PentiumOptimized :
public Portable
01306 {
01307
public:
01308
#ifndef __pic__ // -fpic uses up a register, leaving too few for the asm code
01309
static word Add(word *C,
const word *A,
const word *B,
unsigned int N);
01310
static word Subtract(word *C,
const word *A,
const word *B,
unsigned int N);
01311
#endif
01312
static void Square4(word *R,
const word *A);
01313
static void Multiply4(word *C,
const word *A,
const word *B);
01314
static void Multiply8(word *C,
const word *A,
const word *B);
01315 };
01316
01317
typedef PentiumOptimized LowLevel;
01318
01319
01320
01321
#ifndef __pic__
01322
__attribute__((regparm(3))) word PentiumOptimized::Add(word *C, const word *A, const word *B,
unsigned int N)
01323 {
01324 assert (N%2 == 0);
01325
01326
register word carry, temp;
01327
01328 __asm__ __volatile__(
01329
"push %%ebp;"
01330
"sub %3, %2;"
01331
"xor %0, %0;"
01332
"sub %4, %0;"
01333
"lea (%1,%4,4), %1;"
01334
"sar $1, %0;"
01335
"jz 1f;"
01336
01337
"0:;"
01338
"mov 0(%3), %4;"
01339
"mov 4(%3), %%ebp;"
01340
"mov (%1,%0,8), %5;"
01341
"lea 8(%3), %3;"
01342
"adc %5, %4;"
01343
"mov 4(%1,%0,8), %5;"
01344
"adc %5, %%ebp;"
01345
"inc %0;"
01346
"mov %4, -8(%3, %2);"
01347
"mov %%ebp, -4(%3, %2);"
01348
"jnz 0b;"
01349
01350
"1:;"
01351
"adc $0, %0;"
01352
"pop %%ebp;"
01353
01354 :
"=aSD" (carry),
"+r" (B),
"+r" (C),
"+r" (A),
"+r" (N),
"=r" (temp)
01355 : :
"cc",
"memory");
01356
01357
return carry;
01358 }
01359
01360 __attribute__((regparm(3))) word PentiumOptimized::Subtract(word *C, const word *A, const word *B,
unsigned int N)
01361 {
01362 assert (N%2 == 0);
01363
01364
register word carry, temp;
01365
01366 __asm__ __volatile__(
01367
"push %%ebp;"
01368
"sub %3, %2;"
01369
"xor %0, %0;"
01370
"sub %4, %0;"
01371
"lea (%1,%4,4), %1;"
01372
"sar $1, %0;"
01373
"jz 1f;"
01374
01375
"0:;"
01376
"mov 0(%3), %4;"
01377
"mov 4(%3), %%ebp;"
01378
"mov (%1,%0,8), %5;"
01379
"lea 8(%3), %3;"
01380
"sbb %5, %4;"
01381
"mov 4(%1,%0,8), %5;"
01382
"sbb %5, %%ebp;"
01383
"inc %0;"
01384
"mov %4, -8(%3, %2);"
01385
"mov %%ebp, -4(%3, %2);"
01386
"jnz 0b;"
01387
01388
"1:;"
01389
"adc $0, %0;"
01390
"pop %%ebp;"
01391
01392 :
"=aSD" (carry),
"+r" (B),
"+r" (C),
"+r" (A),
"+r" (N),
"=r" (temp)
01393 : :
"cc",
"memory");
01394
01395
return carry;
01396 }
01397
#endif // __pic__
01398
01399
01400
01401
#define SqrStartup \
01402
"push %%ebp\n\t" \
01403
"push %%esi\n\t" \
01404
"push %%ebx\n\t" \
01405
"xor %%ebp, %%ebp\n\t" \
01406
"xor %%ebx, %%ebx\n\t" \
01407
"xor %%ecx, %%ecx\n\t"
01408
01409
#define SqrShiftCarry \
01410
"mov %%ebx, %%ebp\n\t" \
01411
"mov %%ecx, %%ebx\n\t" \
01412
"xor %%ecx, %%ecx\n\t"
01413
01414
#define SqrAccumulate(i,j) \
01415
"mov 4*"#j"(%%esi), %%eax\n\t" \
01416
"mull 4*"#i"(%%esi)\n\t" \
01417
"add %%eax, %%ebp\n\t" \
01418
"adc %%edx, %%ebx\n\t" \
01419
"adc %%ch, %%cl\n\t" \
01420
"add %%eax, %%ebp\n\t" \
01421
"adc %%edx, %%ebx\n\t" \
01422
"adc %%ch, %%cl\n\t"
01423
01424
#define SqrAccumulateCentre(i) \
01425
"mov 4*"#i"(%%esi), %%eax\n\t" \
01426
"mull 4*"#i"(%%esi)\n\t" \
01427
"add %%eax, %%ebp\n\t" \
01428
"adc %%edx, %%ebx\n\t" \
01429
"adc %%ch, %%cl\n\t"
01430
01431
#define SqrStoreDigit(X) \
01432
"mov %%ebp, 4*"#X"(%%edi)\n\t" \
01433
01434
#define SqrLastDiagonal(digits) \
01435
"mov 4*("#digits"-1)(%%esi), %%eax\n\t" \
01436
"mull 4*("#digits"-1)(%%esi)\n\t" \
01437
"add %%eax, %%ebp\n\t" \
01438
"adc %%edx, %%ebx\n\t" \
01439
"mov %%ebp, 4*(2*"#digits"-2)(%%edi)\n\t" \
01440
"mov %%ebx, 4*(2*"#digits"-1)(%%edi)\n\t"
01441
01442
#define SqrCleanup \
01443
"pop %%ebx\n\t" \
01444
"pop %%esi\n\t" \
01445
"pop %%ebp\n\t"
01446
01447
void PentiumOptimized::Square4(word* Y,
const word* X)
01448 {
01449 __asm__ __volatile__(
01450 SqrStartup
01451
01452 SqrAccumulateCentre(0)
01453 SqrStoreDigit(0)
01454 SqrShiftCarry
01455
01456 SqrAccumulate(1,0)
01457 SqrStoreDigit(1)
01458 SqrShiftCarry
01459
01460 SqrAccumulate(2,0)
01461 SqrAccumulateCentre(1)
01462 SqrStoreDigit(2)
01463 SqrShiftCarry
01464
01465 SqrAccumulate(3,0)
01466 SqrAccumulate(2,1)
01467 SqrStoreDigit(3)
01468 SqrShiftCarry
01469
01470 SqrAccumulate(3,1)
01471 SqrAccumulateCentre(2)
01472 SqrStoreDigit(4)
01473 SqrShiftCarry
01474
01475 SqrAccumulate(3,2)
01476 SqrStoreDigit(5)
01477 SqrShiftCarry
01478
01479 SqrLastDiagonal(4)
01480
01481 SqrCleanup
01482
01483 :
01484 :
"D" (Y),
"S" (X)
01485 :
"eax",
"ecx",
"edx",
"ebp",
"memory"
01486 );
01487 }
01488
01489
#define MulStartup \
01490
"push %%ebp\n\t" \
01491
"push %%esi\n\t" \
01492
"push %%ebx\n\t" \
01493
"push %%edi\n\t" \
01494
"mov %%eax, %%ebx \n\t" \
01495
"xor %%ebp, %%ebp\n\t" \
01496
"xor %%edi, %%edi\n\t" \
01497
"xor %%ecx, %%ecx\n\t"
01498
01499
#define MulShiftCarry \
01500
"mov %%edx, %%ebp\n\t" \
01501
"mov %%ecx, %%edi\n\t" \
01502
"xor %%ecx, %%ecx\n\t"
01503
01504
#define MulAccumulate(i,j) \
01505
"mov 4*"#j"(%%ebx), %%eax\n\t" \
01506
"mull 4*"#i"(%%esi)\n\t" \
01507
"add %%eax, %%ebp\n\t" \
01508
"adc %%edx, %%edi\n\t" \
01509
"adc %%ch, %%cl\n\t"
01510
01511
#define MulStoreDigit(X) \
01512
"mov %%edi, %%edx \n\t" \
01513
"mov (%%esp), %%edi \n\t" \
01514
"mov %%ebp, 4*"#X"(%%edi)\n\t" \
01515
"mov %%edi, (%%esp)\n\t"
01516
01517
#define MulLastDiagonal(digits) \
01518
"mov 4*("#digits"-1)(%%ebx), %%eax\n\t" \
01519
"mull 4*("#digits"-1)(%%esi)\n\t" \
01520
"add %%eax, %%ebp\n\t" \
01521
"adc %%edi, %%edx\n\t" \
01522
"mov (%%esp), %%edi\n\t" \
01523
"mov %%ebp, 4*(2*"#digits"-2)(%%edi)\n\t" \
01524
"mov %%edx, 4*(2*"#digits"-1)(%%edi)\n\t"
01525
01526
#define MulCleanup \
01527
"pop %%edi\n\t" \
01528
"pop %%ebx\n\t" \
01529
"pop %%esi\n\t" \
01530
"pop %%ebp\n\t"
01531
01532
void PentiumOptimized::Multiply4(word* Z,
const word* X,
const word* Y)
01533 {
01534 __asm__ __volatile__(
01535 MulStartup
01536 MulAccumulate(0,0)
01537 MulStoreDigit(0)
01538 MulShiftCarry
01539
01540 MulAccumulate(1,0)
01541 MulAccumulate(0,1)
01542 MulStoreDigit(1)
01543 MulShiftCarry
01544
01545 MulAccumulate(2,0)
01546 MulAccumulate(1,1)
01547 MulAccumulate(0,2)
01548 MulStoreDigit(2)
01549 MulShiftCarry
01550
01551 MulAccumulate(3,0)
01552 MulAccumulate(2,1)
01553 MulAccumulate(1,2)
01554 MulAccumulate(0,3)
01555 MulStoreDigit(3)
01556 MulShiftCarry
01557
01558 MulAccumulate(3,1)
01559 MulAccumulate(2,2)
01560 MulAccumulate(1,3)
01561 MulStoreDigit(4)
01562 MulShiftCarry
01563
01564 MulAccumulate(3,2)
01565 MulAccumulate(2,3)
01566 MulStoreDigit(5)
01567 MulShiftCarry
01568
01569 MulLastDiagonal(4)
01570
01571 MulCleanup
01572
01573 :
01574 :
"D" (Z),
"S" (X),
"a" (Y)
01575 :
"%ecx",
"%edx",
"memory"
01576 );
01577 }
01578
01579
void PentiumOptimized::Multiply8(word* Z,
const word* X,
const word* Y)
01580 {
01581 __asm__ __volatile__(
01582 MulStartup
01583 MulAccumulate(0,0)
01584 MulStoreDigit(0)
01585 MulShiftCarry
01586
01587 MulAccumulate(1,0)
01588 MulAccumulate(0,1)
01589 MulStoreDigit(1)
01590 MulShiftCarry
01591
01592 MulAccumulate(2,0)
01593 MulAccumulate(1,1)
01594 MulAccumulate(0,2)
01595 MulStoreDigit(2)
01596 MulShiftCarry
01597
01598 MulAccumulate(3,0)
01599 MulAccumulate(2,1)
01600 MulAccumulate(1,2)
01601 MulAccumulate(0,3)
01602 MulStoreDigit(3)
01603 MulShiftCarry
01604
01605 MulAccumulate(4,0)
01606 MulAccumulate(3,1)
01607 MulAccumulate(2,2)
01608 MulAccumulate(1,3)
01609 MulAccumulate(0,4)
01610 MulStoreDigit(4)
01611 MulShiftCarry
01612
01613 MulAccumulate(5,0)
01614 MulAccumulate(4,1)
01615 MulAccumulate(3,2)
01616 MulAccumulate(2,3)
01617 MulAccumulate(1,4)
01618 MulAccumulate(0,5)
01619 MulStoreDigit(5)
01620 MulShiftCarry
01621
01622 MulAccumulate(6,0)
01623 MulAccumulate(5,1)
01624 MulAccumulate(4,2)
01625 MulAccumulate(3,3)
01626 MulAccumulate(2,4)
01627 MulAccumulate(1,5)
01628 MulAccumulate(0,6)
01629 MulStoreDigit(6)
01630 MulShiftCarry
01631
01632 MulAccumulate(7,0)
01633 MulAccumulate(6,1)
01634 MulAccumulate(5,2)
01635 MulAccumulate(4,3)
01636 MulAccumulate(3,4)
01637 MulAccumulate(2,5)
01638 MulAccumulate(1,6)
01639 MulAccumulate(0,7)
01640 MulStoreDigit(7)
01641 MulShiftCarry
01642
01643 MulAccumulate(7,1)
01644 MulAccumulate(6,2)
01645 MulAccumulate(5,3)
01646 MulAccumulate(4,4)
01647 MulAccumulate(3,5)
01648 MulAccumulate(2,6)
01649 MulAccumulate(1,7)
01650 MulStoreDigit(8)
01651 MulShiftCarry
01652
01653 MulAccumulate(7,2)
01654 MulAccumulate(6,3)
01655 MulAccumulate(5,4)
01656 MulAccumulate(4,5)
01657 MulAccumulate(3,6)
01658 MulAccumulate(2,7)
01659 MulStoreDigit(9)
01660 MulShiftCarry
01661
01662 MulAccumulate(7,3)
01663 MulAccumulate(6,4)
01664 MulAccumulate(5,5)
01665 MulAccumulate(4,6)
01666 MulAccumulate(3,7)
01667 MulStoreDigit(10)
01668 MulShiftCarry
01669
01670 MulAccumulate(7,4)
01671 MulAccumulate(6,5)
01672 MulAccumulate(5,6)
01673 MulAccumulate(4,7)
01674 MulStoreDigit(11)
01675 MulShiftCarry
01676
01677 MulAccumulate(7,5)
01678 MulAccumulate(6,6)
01679 MulAccumulate(5,7)
01680 MulStoreDigit(12)
01681 MulShiftCarry
01682
01683 MulAccumulate(7,6)
01684 MulAccumulate(6,7)
01685 MulStoreDigit(13)
01686 MulShiftCarry
01687
01688 MulLastDiagonal(8)
01689
01690 MulCleanup
01691
01692 :
01693 :
"D" (Z),
"S" (X),
"a" (Y)
01694 :
"%ecx",
"%edx",
"memory"
01695 );
01696 }
01697
01698
#elif defined(__GNUC__) && defined(__alpha__)
01699
01700
class AlphaOptimized :
public Portable
01701 {
01702
public:
01703
static inline void Multiply2(word *C,
const word *A,
const word *B);
01704
static inline word Multiply2Add(word *C,
const word *A,
const word *B);
01705
static inline void Multiply4(word *C,
const word *A,
const word *B);
01706
static inline unsigned int MultiplyRecursionLimit() {
return 4;}
01707
01708
static inline void Multiply4Bottom(word *C,
const word *A,
const word *B);
01709
static inline unsigned int MultiplyBottomRecursionLimit() {
return 4;}
01710
01711
static inline void Square4(word *R,
const word *A)
01712 {
01713 Multiply4(R, A, A);
01714 }
01715 };
01716
01717
typedef AlphaOptimized LowLevel;
01718
01719
inline void AlphaOptimized::Multiply2(word *C,
const word *A,
const word *B)
01720 {
01721
register dword c, a = *(
const dword *)A, b = *(
const dword *)B;
01722 ((dword *)C)[0] = a*b;
01723 __asm__(
"umulh %1,%2,%0" :
"=r" (c) :
"r" (a),
"r" (b));
01724 ((dword *)C)[1] = c;
01725 }
01726
01727
inline word AlphaOptimized::Multiply2Add(word *C,
const word *A,
const word *B)
01728 {
01729
register dword c, d, e, a = *(
const dword *)A, b = *(
const dword *)B;
01730 c = ((dword *)C)[0];
01731 d = a*b + c;
01732 __asm__(
"umulh %1,%2,%0" :
"=r" (e) :
"r" (a),
"r" (b));
01733 ((dword *)C)[0] = d;
01734 d = (d < c);
01735 c = ((dword *)C)[1] + d;
01736 d = (c < d);
01737 c += e;
01738 ((dword *)C)[1] = c;
01739 d |= (c < e);
01740
return d;
01741 }
01742
01743
inline void AlphaOptimized::Multiply4(word *R,
const word *A,
const word *B)
01744 {
01745 Multiply2(R, A, B);
01746 Multiply2(R+4, A+2, B+2);
01747 word carry = Multiply2Add(R+2, A+0, B+2);
01748 carry += Multiply2Add(R+2, A+2, B+0);
01749 Increment(R+6, 2, carry);
01750 }
01751
01752
static inline void Multiply2BottomAdd(word *C,
const word *A,
const word *B)
01753 {
01754
register dword a = *(
const dword *)A, b = *(
const dword *)B;
01755 ((dword *)C)[0] = a*b + ((dword *)C)[0];
01756 }
01757
01758
inline void AlphaOptimized::Multiply4Bottom(word *R,
const word *A,
const word *B)
01759 {
01760 Multiply2(R, A, B);
01761 Multiply2BottomAdd(R+2, A+0, B+2);
01762 Multiply2BottomAdd(R+2, A+2, B+0);
01763 }
01764
01765
#else // no processor specific code available
01766
01767
typedef Portable LowLevel;
01768
01769
#endif
01770
01771
01772
01773
#define A0 A
01774
#define A1 (A+N2)
01775
#define B0 B
01776
#define B1 (B+N2)
01777
01778
#define T0 T
01779
#define T1 (T+N2)
01780
#define T2 (T+N)
01781
#define T3 (T+N+N2)
01782
01783
#define R0 R
01784
#define R1 (R+N2)
01785
#define R2 (R+N)
01786
#define R3 (R+N+N2)
01787
01788
01789
01790
01791
01792
01793
01794
01795
template <
class P>
01796
void DoRecursiveMultiply(word *R, word *T,
const word *A,
const word *B,
unsigned int N,
const P *dummy=NULL);
01797
01798
template <
class P>
01799
inline void RecursiveMultiply(word *R, word *T,
const word *A,
const word *B,
unsigned int N,
const P *dummy=NULL)
01800 {
01801 assert(N>=2 && N%2==0);
01802
01803
if (P::MultiplyRecursionLimit() >= 8 && N==8)
01804 P::Multiply8(R, A, B);
01805
else if (P::MultiplyRecursionLimit() >= 4 && N==4)
01806 P::Multiply4(R, A, B);
01807
else if (N==2)
01808 P::Multiply2(R, A, B);
01809
else
01810 DoRecursiveMultiply<P>(R, T, A, B, N, NULL);
01811 }
01812
01813
template <
class P>
01814
void DoRecursiveMultiply(word *R, word *T,
const word *A,
const word *B,
unsigned int N,
const P *dummy)
01815 {
01816
const unsigned int N2 = N/2;
01817
int carry;
01818
01819
int aComp = Compare(A0, A1, N2);
01820
int bComp = Compare(B0, B1, N2);
01821
01822
switch (2*aComp + aComp + bComp)
01823 {
01824
case -4:
01825 P::Subtract(R0, A1, A0, N2);
01826 P::Subtract(R1, B0, B1, N2);
01827 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01828 P::Subtract(T1, T1, R0, N2);
01829 carry = -1;
01830
break;
01831
case -2:
01832 P::Subtract(R0, A1, A0, N2);
01833 P::Subtract(R1, B0, B1, N2);
01834 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01835 carry = 0;
01836
break;
01837
case 2:
01838 P::Subtract(R0, A0, A1, N2);
01839 P::Subtract(R1, B1, B0, N2);
01840 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01841 carry = 0;
01842
break;
01843
case 4:
01844 P::Subtract(R0, A1, A0, N2);
01845 P::Subtract(R1, B0, B1, N2);
01846 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01847 P::Subtract(T1, T1, R1, N2);
01848 carry = -1;
01849
break;
01850
default:
01851 SetWords(T0, 0, N);
01852 carry = 0;
01853 }
01854
01855 RecursiveMultiply<P>(R0, T2, A0, B0, N2);
01856 RecursiveMultiply<P>(R2, T2, A1, B1, N2);
01857
01858
01859
01860 carry += P::Add(T0, T0, R0, N);
01861 carry += P::Add(T0, T0, R2, N);
01862 carry += P::Add(R1, R1, T0, N);
01863
01864 assert (carry >= 0 && carry <= 2);
01865 Increment(R3, N2, carry);
01866 }
01867
01868
01869
01870
01871
01872
template <
class P>
01873
void DoRecursiveSquare(word *R, word *T,
const word *A,
unsigned int N,
const P *dummy=NULL);
01874
01875
template <
class P>
01876
inline void RecursiveSquare(word *R, word *T,
const word *A,
unsigned int N,
const P *dummy=NULL)
01877 {
01878 assert(N && N%2==0);
01879
if (P::SquareRecursionLimit() >= 8 && N==8)
01880 P::Square8(R, A);
01881
if (P::SquareRecursionLimit() >= 4 && N==4)
01882 P::Square4(R, A);
01883
else if (N==2)
01884 P::Square2(R, A);
01885
else
01886 DoRecursiveSquare<P>(R, T, A, N, NULL);
01887 }
01888
01889
template <
class P>
01890
void DoRecursiveSquare(word *R, word *T,
const word *A,
unsigned int N,
const P *dummy)
01891 {
01892
const unsigned int N2 = N/2;
01893
01894 RecursiveSquare<P>(R0, T2, A0, N2);
01895 RecursiveSquare<P>(R2, T2, A1, N2);
01896 RecursiveMultiply<P>(T0, T2, A0, A1, N2);
01897
01898 word carry = P::Add(R1, R1, T0, N);
01899 carry += P::Add(R1, R1, T0, N);
01900 Increment(R3, N2, carry);
01901 }
01902
01903
01904
01905
01906
01907
01908
template <
class P>
01909
void DoRecursiveMultiplyBottom(word *R, word *T,
const word *A,
const word *B,
unsigned int N,
const P *dummy=NULL);
01910
01911
template <
class P>
01912
inline void RecursiveMultiplyBottom(word *R, word *T,
const word *A,
const word *B,
unsigned int N,
const P *dummy=NULL)
01913 {
01914 assert(N>=2 && N%2==0);
01915
if (P::MultiplyBottomRecursionLimit() >= 8 && N==8)
01916 P::Multiply8Bottom(R, A, B);
01917
else if (P::MultiplyBottomRecursionLimit() >= 4 && N==4)
01918 P::Multiply4Bottom(R, A, B);
01919
else if (N==2)
01920 P::Multiply2Bottom(R, A, B);
01921
else
01922 DoRecursiveMultiplyBottom<P>(R, T, A, B, N, NULL);
01923 }
01924
01925
template <
class P>
01926
void DoRecursiveMultiplyBottom(word *R, word *T,
const word *A,
const word *B,
unsigned int N,
const P *dummy)
01927 {
01928
const unsigned int N2 = N/2;
01929
01930 RecursiveMultiply<P>(R, T, A0, B0, N2);
01931 RecursiveMultiplyBottom<P>(T0, T1, A1, B0, N2);
01932 P::Add(R1, R1, T0, N2);
01933 RecursiveMultiplyBottom<P>(T0, T1, A0, B1, N2);
01934 P::Add(R1, R1, T0, N2);
01935 }
01936
01937
01938
01939
01940
01941
01942
01943
template <
class P>
01944
void RecursiveMultiplyTop(word *R, word *T,
const word *L,
const word *A,
const word *B,
unsigned int N,
const P *dummy=NULL)
01945 {
01946 assert(N>=2 && N%2==0);
01947
01948
if (N==4)
01949 {
01950 P::Multiply4(T, A, B);
01951 ((dword *)R)[0] = ((dword *)T)[2];
01952 ((dword *)R)[1] = ((dword *)T)[3];
01953 }
01954
else if (N==2)
01955 {
01956 P::Multiply2(T, A, B);
01957 ((dword *)R)[0] = ((dword *)T)[1];
01958 }
01959
else
01960 {
01961
const unsigned int N2 = N/2;
01962
int carry;
01963
01964
int aComp = Compare(A0, A1, N2);
01965
int bComp = Compare(B0, B1, N2);
01966
01967
switch (2*aComp + aComp + bComp)
01968 {
01969
case -4:
01970 P::Subtract(R0, A1, A0, N2);
01971 P::Subtract(R1, B0, B1, N2);
01972 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01973 P::Subtract(T1, T1, R0, N2);
01974 carry = -1;
01975
break;
01976
case -2:
01977 P::Subtract(R0, A1, A0, N2);
01978 P::Subtract(R1, B0, B1, N2);
01979 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01980 carry = 0;
01981
break;
01982
case 2:
01983 P::Subtract(R0, A0, A1, N2);
01984 P::Subtract(R1, B1, B0, N2);
01985 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01986 carry = 0;
01987
break;
01988
case 4:
01989 P::Subtract(R0, A1, A0, N2);
01990 P::Subtract(R1, B0, B1, N2);
01991 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01992 P::Subtract(T1, T1, R1, N2);
01993 carry = -1;
01994
break;
01995
default:
01996 SetWords(T0, 0, N);
01997 carry = 0;
01998 }
01999
02000 RecursiveMultiply<P>(T2, R0, A1, B1, N2);
02001
02002
02003
02004 word c2 = P::Subtract(R0, L+N2, L, N2);
02005 c2 += P::Subtract(R0, R0, T0, N2);
02006 word t = (Compare(R0, T2, N2) == -1);
02007
02008 carry += t;
02009 carry += Increment(R0, N2, c2+t);
02010 carry += P::Add(R0, R0, T1, N2);
02011 carry += P::Add(R0, R0, T3, N2);
02012 assert (carry >= 0 && carry <= 2);
02013
02014 CopyWords(R1, T3, N2);
02015 Increment(R1, N2, carry);
02016 }
02017 }
02018
02019
inline word Add(word *C,
const word *A,
const word *B,
unsigned int N)
02020 {
02021
return LowLevel::Add(C, A, B, N);
02022 }
02023
02024
inline word Subtract(word *C,
const word *A,
const word *B,
unsigned int N)
02025 {
02026
return LowLevel::Subtract(C, A, B, N);
02027 }
02028
02029
inline void Multiply(word *R, word *T,
const word *A,
const word *B,
unsigned int N)
02030 {
02031
#ifdef SSE2_INTRINSICS_AVAILABLE
02032
if (HasSSE2())
02033 RecursiveMultiply<P4Optimized>(R, T, A, B, N);
02034
else
02035
#endif
02036
RecursiveMultiply<LowLevel>(R, T, A, B, N);
02037 }
02038
02039
inline void Square(word *R, word *T,
const word *A,
unsigned int N)
02040 {
02041
#ifdef SSE2_INTRINSICS_AVAILABLE
02042
if (HasSSE2())
02043 RecursiveSquare<P4Optimized>(R, T, A, N);
02044
else
02045
#endif
02046
RecursiveSquare<LowLevel>(R, T, A, N);
02047 }
02048
02049
inline void MultiplyBottom(word *R, word *T,
const word *A,
const word *B,
unsigned int N)
02050 {
02051
#ifdef SSE2_INTRINSICS_AVAILABLE
02052
if (HasSSE2())
02053 RecursiveMultiplyBottom<P4Optimized>(R, T, A, B, N);
02054
else
02055
#endif
02056
RecursiveMultiplyBottom<LowLevel>(R, T, A, B, N);
02057 }
02058
02059
inline void MultiplyTop(word *R, word *T,
const word *L,
const word *A,
const word *B,
unsigned int N)
02060 {
02061
#ifdef SSE2_INTRINSICS_AVAILABLE
02062
if (HasSSE2())
02063 RecursiveMultiplyTop<P4Optimized>(R, T, L, A, B, N);
02064
else
02065
#endif
02066
RecursiveMultiplyTop<LowLevel>(R, T, L, A, B, N);
02067 }
02068
02069
02070
02071
02072
02073
02074
void AsymmetricMultiply(word *R, word *T,
const word *A,
unsigned int NA,
const word *B,
unsigned int NB)
02075 {
02076
if (NA == NB)
02077 {
02078
if (A == B)
02079 Square(R, T, A, NA);
02080
else
02081 Multiply(R, T, A, B, NA);
02082
02083
return;
02084 }
02085
02086
if (NA > NB)
02087 {
02088 std::swap(A, B);
02089 std::swap(NA, NB);
02090 }
02091
02092 assert(NB % NA == 0);
02093 assert((NB/NA)%2 == 0);
02094
02095
if (NA==2 && !A[1])
02096 {
02097
switch (A[0])
02098 {
02099
case 0:
02100 SetWords(R, 0, NB+2);
02101
return;
02102
case 1:
02103 CopyWords(R, B, NB);
02104 R[NB] = R[NB+1] = 0;
02105
return;
02106
default:
02107 R[NB] = LinearMultiply(R, B, A[0], NB);
02108 R[NB+1] = 0;
02109
return;
02110 }
02111 }
02112
02113 Multiply(R, T, A, B, NA);
02114 CopyWords(T+2*NA, R+NA, NA);
02115
02116
unsigned i;
02117
02118
for (i=2*NA; i<NB; i+=2*NA)
02119 Multiply(T+NA+i, T, A, B+i, NA);
02120
for (i=NA; i<NB; i+=2*NA)
02121 Multiply(R+i, T, A, B+i, NA);
02122
02123
if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02124 Increment(R+NB, NA);
02125 }
02126
02127
02128
02129
02130
02131
void RecursiveInverseModPower2(word *R, word *T,
const word *A,
unsigned int N)
02132 {
02133
if (N==2)
02134 AtomicInverseModPower2(R, A[0], A[1]);
02135
else
02136 {
02137
const unsigned int N2 = N/2;
02138 RecursiveInverseModPower2(R0, T0, A0, N2);
02139 T0[0] = 1;
02140 SetWords(T0+1, 0, N2-1);
02141 MultiplyTop(R1, T1, T0, R0, A0, N2);
02142 MultiplyBottom(T0, T1, R0, A1, N2);
02143 Add(T0, R1, T0, N2);
02144 TwosComplement(T0, N2);
02145 MultiplyBottom(R1, T1, R0, T0, N2);
02146 }
02147 }
02148
02149
02150
02151
02152
02153
02154
02155
void MontgomeryReduce(word *R, word *T,
const word *X,
const word *M,
const word *U,
unsigned int N)
02156 {
02157 MultiplyBottom(R, T, X, U, N);
02158 MultiplyTop(T, T+N, X, R, M, N);
02159 word borrow = Subtract(T, X+N, T, N);
02160
02161 word carry = Add(T+N, T, M, N);
02162 assert(carry || !borrow);
02163 CopyWords(R, T + (borrow ? N : 0), N);
02164 }
02165
02166
02167
02168
02169
02170
02171
02172
02173
void HalfMontgomeryReduce(word *R, word *T,
const word *X,
const word *M,
const word *U,
const word *V,
unsigned int N)
02174 {
02175 assert(N%2==0 && N>=4);
02176
02177
#define M0 M
02178
#define M1 (M+N2)
02179
#define V0 V
02180
#define V1 (V+N2)
02181
02182
#define X0 X
02183
#define X1 (X+N2)
02184
#define X2 (X+N)
02185
#define X3 (X+N+N2)
02186
02187
const unsigned int N2 = N/2;
02188 Multiply(T0, T2, V0, X3, N2);
02189
int c2 = Add(T0, T0, X0, N);
02190 MultiplyBottom(T3, T2, T0, U, N2);
02191 MultiplyTop(T2, R, T0, T3, M0, N2);
02192 c2 -= Subtract(T2, T1, T2, N2);
02193 Multiply(T0, R, T3, M1, N2);
02194 c2 -= Subtract(T0, T2, T0, N2);
02195
int c3 = -(
int)Subtract(T1, X2, T1, N2);
02196 Multiply(R0, T2, V1, X3, N2);
02197 c3 += Add(R, R, T, N);
02198
02199
if (c2>0)
02200 c3 += Increment(R1, N2);
02201
else if (c2<0)
02202 c3 -= Decrement(R1, N2, -c2);
02203
02204 assert(c3>=-1 && c3<=1);
02205
if (c3>0)
02206 Subtract(R, R, M, N);
02207
else if (c3<0)
02208 Add(R, R, M, N);
02209
02210
#undef M0
02211
#undef M1
02212
#undef V0
02213
#undef V1
02214
02215
#undef X0
02216
#undef X1
02217
#undef X2
02218
#undef X3
02219
}
02220
02221
#undef A0
02222
#undef A1
02223
#undef B0
02224
#undef B1
02225
02226
#undef T0
02227
#undef T1
02228
#undef T2
02229
#undef T3
02230
02231
#undef R0
02232
#undef R1
02233
#undef R2
02234
#undef R3
02235
02236
02237
static word SubatomicDivide(word *A, word B0, word B1)
02238 {
02239
02240 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
02241
02242 dword p, u;
02243 word Q;
02244
02245
02246
if (B1+1 == 0)
02247 Q = A[2];
02248
else
02249 Q = word(MAKE_DWORD(A[1], A[2]) / (B1+1));
02250
02251
02252 p = (dword) B0*Q;
02253 u = (dword) A[0] - LOW_WORD(p);
02254 A[0] = LOW_WORD(u);
02255 u = (dword) A[1] - HIGH_WORD(p) - (word)(0-HIGH_WORD(u)) - (dword)B1*Q;
02256 A[1] = LOW_WORD(u);
02257 A[2] += HIGH_WORD(u);
02258
02259
02260
while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
02261 {
02262 u = (dword) A[0] - B0;
02263 A[0] = LOW_WORD(u);
02264 u = (dword) A[1] - B1 - (word)(0-HIGH_WORD(u));
02265 A[1] = LOW_WORD(u);
02266 A[2] += HIGH_WORD(u);
02267 Q++;
02268 assert(Q);
02269 }
02270
02271
return Q;
02272 }
02273
02274
02275
static inline void AtomicDivide(word *Q,
const word *A,
const word *B)
02276 {
02277
if (!B[0] && !B[1])
02278 {
02279 Q[0] = A[2];
02280 Q[1] = A[3];
02281 }
02282
else
02283 {
02284 word T[4];
02285 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
02286 Q[1] = SubatomicDivide(T+1, B[0], B[1]);
02287 Q[0] = SubatomicDivide(T, B[0], B[1]);
02288
02289
#ifndef NDEBUG
02290
02291 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02292 word P[4];
02293 LowLevel::Multiply2(P, Q, B);
02294 Add(P, P, T, 4);
02295 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02296
#endif
02297
}
02298 }
02299
02300
02301
static void CorrectQuotientEstimate(word *R, word *T, word *Q,
const word *B,
unsigned int N)
02302 {
02303 assert(N && N%2==0);
02304
02305
if (Q[1])
02306 {
02307 T[N] = T[N+1] = 0;
02308
unsigned i;
02309
for (i=0; i<N; i+=4)
02310 LowLevel::Multiply2(T+i, Q, B+i);
02311
for (i=2; i<N; i+=4)
02312
if (LowLevel::Multiply2Add(T+i, Q, B+i))
02313 T[i+5] += (++T[i+4]==0);
02314 }
02315
else
02316 {
02317 T[N] = LinearMultiply(T, B, Q[0], N);
02318 T[N+1] = 0;
02319 }
02320
02321 word borrow = Subtract(R, R, T, N+2);
02322 assert(!borrow && !R[N+1]);
02323
02324
while (R[N] || Compare(R, B, N) >= 0)
02325 {
02326 R[N] -= Subtract(R, R, B, N);
02327 Q[1] += (++Q[0]==0);
02328 assert(Q[0] || Q[1]);
02329 }
02330 }
02331
02332
02333
02334
02335
02336
02337
02338
void Divide(word *R, word *Q, word *T,
const word *A,
unsigned int NA,
const word *B,
unsigned int NB)
02339 {
02340 assert(NA && NB && NA%2==0 && NB%2==0);
02341 assert(B[NB-1] || B[NB-2]);
02342 assert(NB <= NA);
02343
02344
02345 word *
const TA=T;
02346 word *
const TB=T+NA+2;
02347 word *
const TP=T+NA+2+NB;
02348
02349
02350
unsigned shiftWords = (B[NB-1]==0);
02351 TB[0] = TB[NB-1] = 0;
02352 CopyWords(TB+shiftWords, B, NB-shiftWords);
02353
unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02354 assert(shiftBits < WORD_BITS);
02355 ShiftWordsLeftByBits(TB, NB, shiftBits);
02356
02357
02358 TA[0] = TA[NA] = TA[NA+1] = 0;
02359 CopyWords(TA+shiftWords, A, NA);
02360 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02361
02362
if (TA[NA+1]==0 && TA[NA] <= 1)
02363 {
02364 Q[NA-NB+1] = Q[NA-NB] = 0;
02365
while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02366 {
02367 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02368 ++Q[NA-NB];
02369 }
02370 }
02371
else
02372 {
02373 NA+=2;
02374 assert(Compare(TA+NA-NB, TB, NB) < 0);
02375 }
02376
02377 word BT[2];
02378 BT[0] = TB[NB-2] + 1;
02379 BT[1] = TB[NB-1] + (BT[0]==0);
02380
02381
02382
for (
unsigned i=NA-2; i>=NB; i-=2)
02383 {
02384 AtomicDivide(Q+i-NB, TA+i-2, BT);
02385 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02386 }
02387
02388
02389 CopyWords(R, TA+shiftWords, NB);
02390 ShiftWordsRightByBits(R, NB, shiftBits);
02391 }
02392
02393
static inline unsigned int EvenWordCount(
const word *X,
unsigned int N)
02394 {
02395
while (N && X[N-2]==0 && X[N-1]==0)
02396 N-=2;
02397
return N;
02398 }
02399
02400
02401
02402
02403
02404
02405
02406
unsigned int AlmostInverse(word *R, word *T,
const word *A,
unsigned int NA,
const word *M,
unsigned int N)
02407 {
02408 assert(NA<=N && N && N%2==0);
02409
02410 word *b = T;
02411 word *c = T+N;
02412 word *f = T+2*N;
02413 word *g = T+3*N;
02414
unsigned int bcLen=2, fgLen=EvenWordCount(M, N);
02415
unsigned int k=0, s=0;
02416
02417 SetWords(T, 0, 3*N);
02418 b[0]=1;
02419 CopyWords(f, A, NA);
02420 CopyWords(g, M, N);
02421
02422
while (1)
02423 {
02424 word t=f[0];
02425
while (!t)
02426 {
02427
if (EvenWordCount(f, fgLen)==0)
02428 {
02429 SetWords(R, 0, N);
02430
return 0;
02431 }
02432
02433 ShiftWordsRightByWords(f, fgLen, 1);
02434
if (c[bcLen-1]) bcLen+=2;
02435 assert(bcLen <= N);
02436 ShiftWordsLeftByWords(c, bcLen, 1);
02437 k+=WORD_BITS;
02438 t=f[0];
02439 }
02440
02441
unsigned int i=0;
02442
while (t%2 == 0)
02443 {
02444 t>>=1;
02445 i++;
02446 }
02447 k+=i;
02448
02449
if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02450 {
02451
if (s%2==0)
02452 CopyWords(R, b, N);
02453
else
02454 Subtract(R, M, b, N);
02455
return k;
02456 }
02457
02458 ShiftWordsRightByBits(f, fgLen, i);
02459 t=ShiftWordsLeftByBits(c, bcLen, i);
02460
if (t)
02461 {
02462 c[bcLen] = t;
02463 bcLen+=2;
02464 assert(bcLen <= N);
02465 }
02466
02467
if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02468 fgLen-=2;
02469
02470
if (Compare(f, g, fgLen)==-1)
02471 {
02472 std::swap(f, g);
02473 std::swap(b, c);
02474 s++;
02475 }
02476
02477 Subtract(f, f, g, fgLen);
02478
02479
if (Add(b, b, c, bcLen))
02480 {
02481 b[bcLen] = 1;
02482 bcLen+=2;
02483 assert(bcLen <= N);
02484 }
02485 }
02486 }
02487
02488
02489
02490
02491
02492
void DivideByPower2Mod(word *R,
const word *A,
unsigned int k,
const word *M,
unsigned int N)
02493 {
02494 CopyWords(R, A, N);
02495
02496
while (k--)
02497 {
02498
if (R[0]%2==0)
02499 ShiftWordsRightByBits(R, N, 1);
02500
else
02501 {
02502 word carry = Add(R, R, M, N);
02503 ShiftWordsRightByBits(R, N, 1);
02504 R[N-1] += carry<<(WORD_BITS-1);
02505 }
02506 }
02507 }
02508
02509
02510
02511
02512
02513
void MultiplyByPower2Mod(word *R,
const word *A,
unsigned int k,
const word *M,
unsigned int N)
02514 {
02515 CopyWords(R, A, N);
02516
02517
while (k--)
02518
if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02519 Subtract(R, R, M, N);
02520 }
02521
02522
02523
02524
static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02525
02526
static inline unsigned int RoundupSize(
unsigned int n)
02527 {
02528
if (n<=8)
02529
return RoundupSizeTable[n];
02530
else if (n<=16)
02531
return 16;
02532
else if (n<=32)
02533
return 32;
02534
else if (n<=64)
02535
return 64;
02536
else return 1U << BitPrecision(n-1);
02537 }
02538
02539 Integer::Integer()
02540 : reg(2), sign(POSITIVE)
02541 {
02542 reg[0] = reg[1] = 0;
02543 }
02544
02545 Integer::Integer(
const Integer& t)
02546 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02547 {
02548 CopyWords(reg, t.reg, reg.
size());
02549 }
02550
02551 Integer::Integer(
signed long value)
02552 : reg(2)
02553 {
02554
if (value >= 0)
02555 sign = POSITIVE;
02556
else
02557 {
02558 sign = NEGATIVE;
02559 value = -value;
02560 }
02561 reg[0] = word(value);
02562 reg[1] = word(SafeRightShift<WORD_BITS, unsigned long>(value));
02563 }
02564
02565 Integer::Integer(Sign s, word high, word low)
02566 : reg(2), sign(s)
02567 {
02568 reg[0] = low;
02569 reg[1] = high;
02570 }
02571
02572 bool Integer::IsConvertableToLong()
const
02573
{
02574
if (
ByteCount() >
sizeof(
long))
02575
return false;
02576
02577
unsigned long value = reg[0];
02578 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02579
02580
if (sign==POSITIVE)
02581
return (
signed long)value >= 0;
02582
else
02583
return -(
signed long)value < 0;
02584 }
02585
02586 signed long Integer::ConvertToLong()
const
02587
{
02588 assert(
IsConvertableToLong());
02589
02590
unsigned long value = reg[0];
02591 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02592
return sign==POSITIVE ? value : -(
signed long)value;
02593 }
02594
02595 Integer::Integer(
BufferedTransformation &encodedInteger,
unsigned int byteCount, Signedness s)
02596 {
02597 Decode(encodedInteger, byteCount, s);
02598 }
02599
02600 Integer::Integer(
const byte *encodedInteger,
unsigned int byteCount, Signedness s)
02601 {
02602 Decode(encodedInteger, byteCount, s);
02603 }
02604
02605 Integer::Integer(
BufferedTransformation &bt)
02606 {
02607 BERDecode(bt);
02608 }
02609
02610 Integer::Integer(
RandomNumberGenerator &rng,
unsigned int bitcount)
02611 {
02612 Randomize(rng, bitcount);
02613 }
02614
02615 Integer::Integer(
RandomNumberGenerator &rng,
const Integer &min,
const Integer &max, RandomNumberType rnType,
const Integer &equiv,
const Integer &mod)
02616 {
02617
if (!Randomize(rng, min, max, rnType, equiv, mod))
02618
throw Integer::RandomNumberNotFound();
02619 }
02620
02621 Integer Integer::Power2(
unsigned int e)
02622 {
02623
Integer r((word)0, BitsToWords(e+1));
02624 r.
SetBit(e);
02625
return r;
02626 }
02627
02628 const Integer &
Integer::Zero()
02629 {
02630
static const Integer zero;
02631
return zero;
02632 }
02633
02634 const Integer &
Integer::One()
02635 {
02636
static const Integer one(1,2);
02637
return one;
02638 }
02639
02640 const Integer &
Integer::Two()
02641 {
02642
static const Integer two(2,2);
02643
return two;
02644 }
02645
02646
bool Integer::operator!()
const
02647
{
02648
return IsNegative() ?
false : (reg[0]==0 &&
WordCount()==0);
02649 }
02650
02651
Integer& Integer::operator=(
const Integer& t)
02652 {
02653
if (
this != &t)
02654 {
02655 reg.
New(RoundupSize(t.WordCount()));
02656 CopyWords(reg, t.reg, reg.
size());
02657 sign = t.sign;
02658 }
02659
return *
this;
02660 }
02661
02662 bool Integer::GetBit(
unsigned int n)
const
02663
{
02664
if (n/WORD_BITS >= reg.
size())
02665
return 0;
02666
else
02667
return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02668 }
02669
02670 void Integer::SetBit(
unsigned int n,
bool value)
02671 {
02672
if (value)
02673 {
02674 reg.
CleanGrow(RoundupSize(BitsToWords(n+1)));
02675 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02676 }
02677
else
02678 {
02679
if (n/WORD_BITS < reg.
size())
02680 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02681 }
02682 }
02683
02684 byte
Integer::GetByte(
unsigned int n)
const
02685
{
02686
if (n/WORD_SIZE >= reg.
size())
02687
return 0;
02688
else
02689
return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02690 }
02691
02692 void Integer::SetByte(
unsigned int n, byte value)
02693 {
02694 reg.
CleanGrow(RoundupSize(BytesToWords(n+1)));
02695 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02696 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02697 }
02698
02699 unsigned long Integer::GetBits(
unsigned int i,
unsigned int n)
const
02700
{
02701 assert(n <=
sizeof(
unsigned long)*8);
02702
unsigned long v = 0;
02703
for (
unsigned int j=0; j<n; j++)
02704 v |=
GetBit(i+j) << j;
02705
return v;
02706 }
02707
02708
Integer Integer::operator-()
const
02709
{
02710
Integer result(*
this);
02711 result.
Negate();
02712
return result;
02713 }
02714
02715
Integer Integer::AbsoluteValue()
const
02716
{
02717
Integer result(*
this);
02718 result.
sign = POSITIVE;
02719
return result;
02720 }
02721
02722
void Integer::swap(
Integer &a)
02723 {
02724 reg.
swap(a.reg);
02725 std::swap(sign, a.sign);
02726 }
02727
02728
Integer::Integer(word value,
unsigned int length)
02729 : reg(RoundupSize(length)), sign(POSITIVE)
02730 {
02731 reg[0] = value;
02732 SetWords(reg+1, 0, reg.
size()-1);
02733 }
02734
02735
template <
class T>
02736
static Integer StringToInteger(
const T *str)
02737 {
02738 word radix;
02739
#if (defined(__GNUC__) && __GNUC__ <= 3) // GCC workaround
02740
02741
02742
unsigned int length;
02743
for (length = 0; str[length] != 0; length++) {}
02744
#else
02745
unsigned int length = std::char_traits<T>::length(str);
02746
#endif
02747
02748
Integer v;
02749
02750
if (length == 0)
02751
return v;
02752
02753
switch (str[length-1])
02754 {
02755
case 'h':
02756
case 'H':
02757 radix=16;
02758
break;
02759
case 'o':
02760
case 'O':
02761 radix=8;
02762
break;
02763
case 'b':
02764
case 'B':
02765 radix=2;
02766
break;
02767
default:
02768 radix=10;
02769 }
02770
02771
if (length > 2 && str[0] ==
'0' && str[1] ==
'x')
02772 radix = 16;
02773
02774
for (
unsigned i=0; i<length; i++)
02775 {
02776 word digit;
02777
02778
if (str[i] >=
'0' && str[i] <=
'9')
02779 digit = str[i] -
'0';
02780
else if (str[i] >=
'A' && str[i] <=
'F')
02781 digit = str[i] -
'A' + 10;
02782
else if (str[i] >=
'a' && str[i] <=
'f')
02783 digit = str[i] -
'a' + 10;
02784
else
02785 digit = radix;
02786
02787
if (digit < radix)
02788 {
02789 v *= radix;
02790 v += digit;
02791 }
02792 }
02793
02794
if (str[0] ==
'-')
02795 v.
Negate();
02796
02797
return v;
02798 }
02799
02800 Integer::Integer(
const char *str)
02801 : reg(2), sign(POSITIVE)
02802 {
02803 *
this = StringToInteger(str);
02804 }
02805
02806
Integer::Integer(
const wchar_t *str)
02807 : reg(2), sign(POSITIVE)
02808 {
02809 *
this = StringToInteger(str);
02810 }
02811
02812 unsigned int Integer::WordCount()
const
02813
{
02814
return CountWords(reg, reg.
size());
02815 }
02816
02817 unsigned int Integer::ByteCount()
const
02818
{
02819
unsigned wordCount =
WordCount();
02820
if (wordCount)
02821
return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
02822
else
02823
return 0;
02824 }
02825
02826 unsigned int Integer::BitCount()
const
02827
{
02828
unsigned wordCount =
WordCount();
02829
if (wordCount)
02830
return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
02831
else
02832
return 0;
02833 }
02834
02835
void Integer::Decode(
const byte *input,
unsigned int inputLen, Signedness s)
02836 {
02837
StringStore store(input, inputLen);
02838 Decode(store, inputLen, s);
02839 }
02840
02841
void Integer::Decode(
BufferedTransformation &bt,
unsigned int inputLen, Signedness s)
02842 {
02843 assert(bt.
MaxRetrievable() >= inputLen);
02844
02845 byte b;
02846 bt.
Peek(b);
02847 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
02848
02849
while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
02850 {
02851 bt.
Skip(1);
02852 inputLen--;
02853 bt.
Peek(b);
02854 }
02855
02856 reg.
CleanNew(RoundupSize(BytesToWords(inputLen)));
02857
02858
for (
unsigned int i=inputLen; i > 0; i--)
02859 {
02860 bt.
Get(b);
02861 reg[(i-1)/WORD_SIZE] |= b << ((i-1)%WORD_SIZE)*8;
02862 }
02863
02864
if (sign == NEGATIVE)
02865 {
02866
for (
unsigned i=inputLen; i<reg.
size()*WORD_SIZE; i++)
02867 reg[i/WORD_SIZE] |= 0xff << (i%WORD_SIZE)*8;
02868 TwosComplement(reg, reg.
size());
02869 }
02870 }
02871
02872 unsigned int Integer::MinEncodedSize(Signedness signedness)
const
02873
{
02874
unsigned int outputLen = STDMAX(1U,
ByteCount());
02875
if (signedness == UNSIGNED)
02876
return outputLen;
02877
if (NotNegative() && (
GetByte(outputLen-1) & 0x80))
02878 outputLen++;
02879
if (IsNegative() && *
this < -
Power2(outputLen*8-1))
02880 outputLen++;
02881
return outputLen;
02882 }
02883
02884 unsigned int Integer::Encode(byte *output,
unsigned int outputLen, Signedness signedness)
const
02885
{
02886
ArraySink sink(output, outputLen);
02887
return Encode(sink, outputLen, signedness);
02888 }
02889
02890
unsigned int Integer::Encode(
BufferedTransformation &bt,
unsigned int outputLen, Signedness signedness)
const
02891
{
02892
if (signedness == UNSIGNED || NotNegative())
02893 {
02894
for (
unsigned int i=outputLen; i > 0; i--)
02895 bt.
Put(GetByte(i-1));
02896 }
02897
else
02898 {
02899
02900
Integer temp =
Integer::Power2(8*STDMAX(
ByteCount(), outputLen)) + *
this;
02901
for (
unsigned i=0; i<outputLen; i++)
02902 bt.
Put(temp.
GetByte(outputLen-i-1));
02903 }
02904
return outputLen;
02905 }
02906
02907 void Integer::DEREncode(
BufferedTransformation &bt)
const
02908
{
02909
DERGeneralEncoder enc(bt, INTEGER);
02910
Encode(enc,
MinEncodedSize(SIGNED), SIGNED);
02911 enc.
MessageEnd();
02912 }
02913
02914
void Integer::BERDecode(
const byte *input,
unsigned int len)
02915 {
02916
StringStore store(input, len);
02917 BERDecode(store);
02918 }
02919
02920 void Integer::BERDecode(
BufferedTransformation &bt)
02921 {
02922
BERGeneralDecoder dec(bt, INTEGER);
02923
if (!dec.
IsDefiniteLength() || dec.MaxRetrievable() < dec.
RemainingLength())
02924 BERDecodeError();
02925 Decode(dec, dec.
RemainingLength(), SIGNED);
02926 dec.
MessageEnd();
02927 }
02928
02929 void Integer::DEREncodeAsOctetString(
BufferedTransformation &bt,
unsigned int length)
const
02930
{
02931
DERGeneralEncoder enc(bt, OCTET_STRING);
02932
Encode(enc, length);
02933 enc.
MessageEnd();
02934 }
02935
02936 void Integer::BERDecodeAsOctetString(
BufferedTransformation &bt,
unsigned int length)
02937 {
02938
BERGeneralDecoder dec(bt, OCTET_STRING);
02939
if (!dec.
IsDefiniteLength() || dec.
RemainingLength() != length)
02940 BERDecodeError();
02941 Decode(dec, length);
02942 dec.
MessageEnd();
02943 }
02944
02945 unsigned int Integer::OpenPGPEncode(byte *output,
unsigned int len)
const
02946
{
02947
ArraySink sink(output, len);
02948
return OpenPGPEncode(sink);
02949 }
02950
02951 unsigned int Integer::OpenPGPEncode(
BufferedTransformation &bt)
const
02952
{
02953 word16 bitCount =
BitCount();
02954 bt.
PutWord16(bitCount);
02955
return 2 +
Encode(bt, BitsToBytes(bitCount));
02956 }
02957
02958
void Integer::OpenPGPDecode(
const byte *input,
unsigned int len)
02959 {
02960
StringStore store(input, len);
02961 OpenPGPDecode(store);
02962 }
02963
02964
void Integer::OpenPGPDecode(
BufferedTransformation &bt)
02965 {
02966 word16 bitCount;
02967
if (bt.
GetWord16(bitCount) != 2 || bt.
MaxRetrievable() < BitsToBytes(bitCount))
02968
throw OpenPGPDecodeErr();
02969 Decode(bt, BitsToBytes(bitCount));
02970 }
02971
02972
void Integer::Randomize(
RandomNumberGenerator &rng,
unsigned int nbits)
02973 {
02974
const unsigned int nbytes = nbits/8 + 1;
02975
SecByteBlock buf(nbytes);
02976 rng.
GenerateBlock(buf, nbytes);
02977
if (nbytes)
02978 buf[0] = (byte)Crop(buf[0], nbits % 8);
02979 Decode(buf, nbytes, UNSIGNED);
02980 }
02981
02982
void Integer::Randomize(
RandomNumberGenerator &rng,
const Integer &min,
const Integer &max)
02983 {
02984
if (min > max)
02985
throw InvalidArgument(
"Integer: Min must be no greater than Max");
02986
02987
Integer range = max - min;
02988
const unsigned int nbits = range.
BitCount();
02989
02990
do
02991 {
02992 Randomize(rng, nbits);
02993 }
02994
while (*
this > range);
02995
02996 *
this += min;
02997 }
02998
02999 bool Integer::Randomize(
RandomNumberGenerator &rng,
const Integer &min,
const Integer &max, RandomNumberType rnType,
const Integer &equiv,
const Integer &mod)
03000 {
03001
return GenerateRandomNoThrow(rng, MakeParameters(
"Min", min)(
"Max", max)(
"RandomNumberType", rnType)(
"EquivalentTo", equiv)(
"Mod", mod));
03002 }
03003
03004
class KDF2_RNG :
public RandomNumberGenerator
03005 {
03006
public:
03007 KDF2_RNG(
const byte *seed,
unsigned int seedSize)
03008 : m_counter(0), m_counterAndSeed(seedSize + 4)
03009 {
03010 memcpy(m_counterAndSeed + 4, seed, seedSize);
03011 }
03012
03013 byte
GenerateByte()
03014 {
03015 byte b;
03016 GenerateBlock(&b, 1);
03017
return b;
03018 }
03019
03020
void GenerateBlock(byte *output,
unsigned int size)
03021 {
03022 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03023 ++m_counter;
03024
P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size());
03025 }
03026
03027
private:
03028 word32 m_counter;
03029
SecByteBlock m_counterAndSeed;
03030 };
03031
03032
bool Integer::GenerateRandomNoThrow(
RandomNumberGenerator &i_rng,
const NameValuePairs ¶ms)
03033 {
03034
Integer min = params.
GetValueWithDefault(
"Min", Integer::Zero());
03035
Integer max;
03036
if (!params.
GetValue(
"Max", max))
03037 {
03038
int bitLength;
03039
if (params.
GetIntValue(
"BitLength", bitLength))
03040 max =
Integer::Power2(bitLength);
03041
else
03042
throw InvalidArgument(
"Integer: missing Max argument");
03043 }
03044
if (min > max)
03045
throw InvalidArgument(
"Integer: Min must be no greater than Max");
03046
03047
Integer equiv = params.
GetValueWithDefault(
"EquivalentTo", Integer::Zero());
03048
Integer mod = params.
GetValueWithDefault(
"Mod", Integer::One());
03049
03050
if (equiv.
IsNegative() || equiv >= mod)
03051
throw InvalidArgument(
"Integer: invalid EquivalentTo and/or Mod argument");
03052
03053 Integer::RandomNumberType rnType = params.
GetValueWithDefault(
"RandomNumberType", Integer::ANY);
03054
03055 member_ptr<KDF2_RNG> kdf2Rng;
03056
ConstByteArrayParameter seed;
03057
if (params.
GetValue(
"Seed", seed))
03058 {
03059
ByteQueue bq;
03060
DERSequenceEncoder seq(bq);
03061 min.
DEREncode(seq);
03062 max.
DEREncode(seq);
03063 equiv.
DEREncode(seq);
03064 mod.
DEREncode(seq);
03065 DEREncodeUnsigned(seq, rnType);
03066 DEREncodeOctetString(seq, seed.
begin(), seed.
size());
03067 seq.
MessageEnd();
03068
03069
SecByteBlock finalSeed(bq.
MaxRetrievable());
03070 bq.
Get(finalSeed, finalSeed.
size());
03071 kdf2Rng.reset(
new KDF2_RNG(finalSeed.
begin(), finalSeed.
size()));
03072 }
03073
RandomNumberGenerator &rng = kdf2Rng.get() ? (
RandomNumberGenerator &)*kdf2Rng : i_rng;
03074
03075
switch (rnType)
03076 {
03077
case ANY:
03078
if (mod ==
One())
03079 Randomize(rng, min, max);
03080
else
03081 {
03082
Integer min1 = min + (equiv-min)%mod;
03083
if (max < min1)
03084
return false;
03085 Randomize(rng,
Zero(), (max - min1) / mod);
03086 *
this *= mod;
03087 *
this += min1;
03088 }
03089
return true;
03090
03091
case PRIME:
03092 {
03093
const PrimeSelector *pSelector = params.
GetValueWithDefault(
"PointerToPrimeSelector", (
const PrimeSelector *)NULL);
03094
03095
int i;
03096 i = 0;
03097
while (1)
03098 {
03099
if (++i==16)
03100 {
03101
03102
Integer first = min;
03103
if (FirstPrime(first, max, equiv, mod, pSelector))
03104 {
03105
03106 *
this = first;
03107
if (!FirstPrime(first, max, equiv, mod, pSelector))
03108
return true;
03109 }
03110
else
03111
return false;
03112 }
03113
03114 Randomize(rng, min, max);
03115
if (FirstPrime(*
this, STDMIN(*
this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03116
return true;
03117 }
03118 }
03119
03120
default:
03121
throw InvalidArgument(
"Integer: invalid RandomNumberType argument");
03122 }
03123 }
03124
03125 std::istream& operator>>(std::istream& in,
Integer &a)
03126 {
03127
char c;
03128
unsigned int length = 0;
03129
SecBlock<char> str(length + 16);
03130
03131 std::ws(in);
03132
03133
do
03134 {
03135 in.read(&c, 1);
03136 str[length++] = c;
03137
if (length >= str.
size())
03138 str.
Grow(length + 16);
03139 }
03140
while (in && (c==
'-' || c==
'x' || (c>=
'0' && c<='9') || (c>=
'a' && c<='f') || (c>=
'A' && c<=
'F') || c==
'h' || c==
'H' || c==
'o' || c==
'O' || c==
',' || c==
'.'));
03141
03142
if (in.gcount())
03143 in.putback(c);
03144 str[length-1] =
'\0';
03145 a =
Integer(str);
03146
03147
return in;
03148 }
03149
03150 std::ostream& operator<<(std::ostream& out,
const Integer &a)
03151 {
03152
03153
long f = out.flags() & std::ios::basefield;
03154
int base, block;
03155
char suffix;
03156
switch(f)
03157 {
03158
case std::ios::oct :
03159 base = 8;
03160 block = 8;
03161 suffix =
'o';
03162
break;
03163
case std::ios::hex :
03164 base = 16;
03165 block = 4;
03166 suffix =
'h';
03167
break;
03168
default :
03169 base = 10;
03170 block = 3;
03171 suffix =
'.';
03172 }
03173
03174
SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03175
Integer temp1=a, temp2;
03176
unsigned i=0;
03177
const char vec[]=
"0123456789ABCDEF";
03178
03179
if (a.IsNegative())
03180 {
03181 out <<
'-';
03182 temp1.
Negate();
03183 }
03184
03185
if (!a)
03186 out <<
'0';
03187
03188
while (!!temp1)
03189 {
03190 word digit;
03191
Integer::Divide(digit, temp2, temp1, base);
03192 s[i++]=vec[digit];
03193 temp1=temp2;
03194 }
03195
03196
while (i--)
03197 {
03198 out << s[i];
03199
03200
03201 }
03202
return out << suffix;
03203 }
03204
03205
Integer& Integer::operator++()
03206 {
03207
if (NotNegative())
03208 {
03209
if (Increment(reg, reg.
size()))
03210 {
03211 reg.
CleanGrow(2*reg.
size());
03212 reg[reg.
size()/2]=1;
03213 }
03214 }
03215
else
03216 {
03217 word borrow = Decrement(reg, reg.
size());
03218 assert(!borrow);
03219
if (
WordCount()==0)
03220 *
this =
Zero();
03221 }
03222
return *
this;
03223 }
03224
03225
Integer& Integer::operator--()
03226 {
03227
if (IsNegative())
03228 {
03229
if (Increment(reg, reg.
size()))
03230 {
03231 reg.
CleanGrow(2*reg.
size());
03232 reg[reg.
size()/2]=1;
03233 }
03234 }
03235
else
03236 {
03237
if (Decrement(reg, reg.
size()))
03238 *
this = -
One();
03239 }
03240
return *
this;
03241 }
03242
03243
void PositiveAdd(
Integer &sum,
const Integer &a,
const Integer& b)
03244 {
03245 word carry;
03246
if (a.reg.size() == b.reg.size())
03247 carry = Add(sum.
reg, a.reg, b.reg, a.reg.size());
03248
else if (a.reg.size() > b.reg.size())
03249 {
03250 carry = Add(sum.
reg, a.reg, b.reg, b.reg.size());
03251 CopyWords(sum.
reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03252 carry = Increment(sum.
reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03253 }
03254
else
03255 {
03256 carry = Add(sum.
reg, a.reg, b.reg, a.reg.size());
03257 CopyWords(sum.
reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03258 carry = Increment(sum.
reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03259 }
03260
03261
if (carry)
03262 {
03263 sum.
reg.
CleanGrow(2*sum.
reg.
size());
03264 sum.
reg[sum.
reg.
size()/2] = 1;
03265 }
03266 sum.
sign = Integer::POSITIVE;
03267 }
03268
03269
void PositiveSubtract(
Integer &diff,
const Integer &a,
const Integer& b)
03270 {
03271
unsigned aSize = a.WordCount();
03272 aSize += aSize%2;
03273
unsigned bSize = b.WordCount();
03274 bSize += bSize%2;
03275
03276
if (aSize == bSize)
03277 {
03278
if (Compare(a.reg, b.reg, aSize) >= 0)
03279 {
03280 Subtract(diff.
reg, a.reg, b.reg, aSize);
03281 diff.
sign = Integer::POSITIVE;
03282 }
03283
else
03284 {
03285 Subtract(diff.
reg, b.reg, a.reg, aSize);
03286 diff.
sign = Integer::NEGATIVE;
03287 }
03288 }
03289
else if (aSize > bSize)
03290 {
03291 word borrow = Subtract(diff.
reg, a.reg, b.reg, bSize);
03292 CopyWords(diff.
reg+bSize, a.reg+bSize, aSize-bSize);
03293 borrow = Decrement(diff.
reg+bSize, aSize-bSize, borrow);
03294 assert(!borrow);
03295 diff.
sign = Integer::POSITIVE;
03296 }
03297
else
03298 {
03299 word borrow = Subtract(diff.
reg, b.reg, a.reg, aSize);
03300 CopyWords(diff.
reg+aSize, b.reg+aSize, bSize-aSize);
03301 borrow = Decrement(diff.
reg+aSize, bSize-aSize, borrow);
03302 assert(!borrow);
03303 diff.
sign = Integer::NEGATIVE;
03304 }
03305 }
03306
03307
Integer Integer::Plus(
const Integer& b)
const
03308
{
03309
Integer sum((word)0, STDMAX(reg.
size(), b.reg.size()));
03310
if (NotNegative())
03311 {
03312
if (b.NotNegative())
03313 PositiveAdd(sum, *
this, b);
03314
else
03315 PositiveSubtract(sum, *
this, b);
03316 }
03317
else
03318 {
03319
if (b.NotNegative())
03320 PositiveSubtract(sum, b, *
this);
03321
else
03322 {
03323 PositiveAdd(sum, *
this, b);
03324 sum.
sign = Integer::NEGATIVE;
03325 }
03326 }
03327
return sum;
03328 }
03329
03330
Integer& Integer::operator+=(
const Integer& t)
03331 {
03332 reg.
CleanGrow(t.reg.size());
03333
if (NotNegative())
03334 {
03335
if (t.NotNegative())
03336 PositiveAdd(*
this, *
this, t);
03337
else
03338 PositiveSubtract(*
this, *
this, t);
03339 }
03340
else
03341 {
03342
if (t.NotNegative())
03343 PositiveSubtract(*
this, t, *
this);
03344
else
03345 {
03346 PositiveAdd(*
this, *
this, t);
03347 sign = Integer::NEGATIVE;
03348 }
03349 }
03350
return *
this;
03351 }
03352
03353
Integer Integer::Minus(
const Integer& b)
const
03354
{
03355
Integer diff((word)0, STDMAX(reg.
size(), b.reg.size()));
03356
if (NotNegative())
03357 {
03358
if (b.NotNegative())
03359 PositiveSubtract(diff, *
this, b);
03360
else
03361 PositiveAdd(diff, *
this, b);
03362 }
03363
else
03364 {
03365
if (b.NotNegative())
03366 {
03367 PositiveAdd(diff, *
this, b);
03368 diff.
sign = Integer::NEGATIVE;
03369 }
03370
else
03371 PositiveSubtract(diff, b, *
this);
03372 }
03373
return diff;
03374 }
03375
03376
Integer& Integer::operator-=(
const Integer& t)
03377 {
03378 reg.
CleanGrow(t.reg.size());
03379
if (NotNegative())
03380 {
03381
if (t.NotNegative())
03382 PositiveSubtract(*
this, *
this, t);
03383
else
03384 PositiveAdd(*
this, *
this, t);
03385 }
03386
else
03387 {
03388
if (t.NotNegative())
03389 {
03390 PositiveAdd(*
this, *
this, t);
03391 sign = Integer::NEGATIVE;
03392 }
03393
else
03394 PositiveSubtract(*
this, t, *
this);
03395 }
03396
return *
this;
03397 }
03398
03399
Integer& Integer::operator<<=(
unsigned int n)
03400 {
03401
const unsigned int wordCount =
WordCount();
03402
const unsigned int shiftWords = n / WORD_BITS;
03403
const unsigned int shiftBits = n % WORD_BITS;
03404
03405 reg.
CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03406 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03407 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03408
return *
this;
03409 }
03410
03411
Integer& Integer::operator>>=(
unsigned int n)
03412 {
03413
const unsigned int wordCount =
WordCount();
03414
const unsigned int shiftWords = n / WORD_BITS;
03415
const unsigned int shiftBits = n % WORD_BITS;
03416
03417 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03418
if (wordCount > shiftWords)
03419 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03420
if (IsNegative() &&
WordCount()==0)
03421 *
this =
Zero();
03422
return *
this;
03423 }
03424
03425
void PositiveMultiply(
Integer &product,
const Integer &a,
const Integer &b)
03426 {
03427
unsigned aSize = RoundupSize(a.WordCount());
03428
unsigned bSize = RoundupSize(b.WordCount());
03429
03430 product.
reg.
CleanNew(RoundupSize(aSize+bSize));
03431 product.
sign = Integer::POSITIVE;
03432
03433
SecAlignedWordBlock workspace(aSize + bSize);
03434 AsymmetricMultiply(product.
reg, workspace, a.reg, aSize, b.reg, bSize);
03435 }
03436
03437
void Multiply(
Integer &product,
const Integer &a,
const Integer &b)
03438 {
03439 PositiveMultiply(product, a, b);
03440
03441
if (a.NotNegative() != b.NotNegative())
03442 product.
Negate();
03443 }
03444
03445
Integer Integer::Times(
const Integer &b)
const
03446
{
03447
Integer product;
03448 Multiply(product, *
this, b);
03449
return product;
03450 }
03451
03452
03453
03454
03455
03456
03457
03458
03459
03460
03461
03462
03463
03464
03465
03466
03467
03468
03469
03470
03471
03472
03473
03474
void PositiveDivide(
Integer &remainder,
Integer "ient,
03475
const Integer &a,
const Integer &b)
03476 {
03477
unsigned aSize = a.WordCount();
03478
unsigned bSize = b.WordCount();
03479
03480
if (!bSize)
03481
throw Integer::DivideByZero();
03482
03483
if (a.PositiveCompare(b) == -1)
03484 {
03485 remainder = a;
03486 remainder.
sign = Integer::POSITIVE;
03487 quotient =
Integer::Zero();
03488
return;
03489 }
03490
03491 aSize += aSize%2;
03492 bSize += bSize%2;
03493
03494 remainder.
reg.
CleanNew(RoundupSize(bSize));
03495 remainder.
sign = Integer::POSITIVE;
03496 quotient.
reg.
CleanNew(RoundupSize(aSize-bSize+2));
03497 quotient.
sign = Integer::POSITIVE;
03498
03499
SecAlignedWordBlock T(aSize+2*bSize+4);
03500 Divide(remainder.
reg, quotient.
reg, T, a.reg, aSize, b.reg, bSize);
03501 }
03502
03503 void Integer::Divide(
Integer &remainder,
Integer "ient,
const Integer ÷nd,
const Integer &divisor)
03504 {
03505 PositiveDivide(remainder, quotient, dividend, divisor);
03506
03507
if (dividend.
IsNegative())
03508 {
03509 quotient.
Negate();
03510
if (remainder.
NotZero())
03511 {
03512 --quotient;
03513 remainder = divisor.
AbsoluteValue() - remainder;
03514 }
03515 }
03516
03517
if (divisor.
IsNegative())
03518 quotient.
Negate();
03519 }
03520
03521 void Integer::DivideByPowerOf2(
Integer &r,
Integer &q,
const Integer &a,
unsigned int n)
03522 {
03523 q = a;
03524 q >>= n;
03525
03526
const unsigned int wordCount = BitsToWords(n);
03527
if (wordCount <= a.WordCount())
03528 {
03529 r.
reg.
resize(RoundupSize(wordCount));
03530 CopyWords(r.
reg, a.reg, wordCount);
03531 SetWords(r.
reg+wordCount, 0, r.
reg.
size()-wordCount);
03532
if (n % WORD_BITS != 0)
03533 r.
reg[wordCount-1] %= (1 << (n % WORD_BITS));
03534 }
03535
else
03536 {
03537 r.
reg.
resize(RoundupSize(a.WordCount()));
03538 CopyWords(r.
reg, a.reg, r.
reg.
size());
03539 }
03540 r.
sign = POSITIVE;
03541
03542
if (a.IsNegative() && r.
NotZero())
03543 {
03544 --q;
03545 r =
Power2(n) - r;
03546 }
03547 }
03548
03549
Integer Integer::DividedBy(
const Integer &b)
const
03550
{
03551
Integer remainder, quotient;
03552 Integer::Divide(remainder, quotient, *
this, b);
03553
return quotient;
03554 }
03555
03556
Integer Integer::Modulo(
const Integer &b)
const
03557
{
03558
Integer remainder, quotient;
03559
Integer::Divide(remainder, quotient, *
this, b);
03560
return remainder;
03561 }
03562
03563 void Integer::Divide(word &remainder,
Integer "ient,
const Integer ÷nd, word divisor)
03564 {
03565
if (!divisor)
03566
throw Integer::DivideByZero();
03567
03568 assert(divisor);
03569
03570
if ((divisor & (divisor-1)) == 0)
03571 {
03572 quotient = dividend >> (BitPrecision(divisor)-1);
03573 remainder = dividend.
reg[0] & (divisor-1);
03574
return;
03575 }
03576
03577
unsigned int i = dividend.
WordCount();
03578 quotient.
reg.
CleanNew(RoundupSize(i));
03579 remainder = 0;
03580
while (i--)
03581 {
03582 quotient.
reg[i] = word(MAKE_DWORD(dividend.
reg[i], remainder) / divisor);
03583 remainder = word(MAKE_DWORD(dividend.
reg[i], remainder) % divisor);
03584 }
03585
03586
if (dividend.
NotNegative())
03587 quotient.
sign = POSITIVE;
03588
else
03589 {
03590 quotient.
sign = NEGATIVE;
03591
if (remainder)
03592 {
03593 --quotient;
03594 remainder = divisor - remainder;
03595 }
03596 }
03597 }
03598
03599
Integer Integer::DividedBy(word b)
const
03600
{
03601 word remainder;
03602
Integer quotient;
03603 Integer::Divide(remainder, quotient, *
this, b);
03604
return quotient;
03605 }
03606
03607 word Integer::Modulo(word divisor)
const
03608
{
03609
if (!divisor)
03610
throw Integer::DivideByZero();
03611
03612 assert(divisor);
03613
03614 word remainder;
03615
03616
if ((divisor & (divisor-1)) == 0)
03617 remainder = reg[0] & (divisor-1);
03618
else
03619 {
03620
unsigned int i =
WordCount();
03621
03622
if (divisor <= 5)
03623 {
03624 dword sum=0;
03625
while (i--)
03626 sum += reg[i];
03627 remainder = word(sum%divisor);
03628 }
03629
else
03630 {
03631 remainder = 0;
03632
while (i--)
03633 remainder = word(MAKE_DWORD(reg[i], remainder) % divisor);
03634 }
03635 }
03636
03637
if (IsNegative() && remainder)
03638 remainder = divisor - remainder;
03639
03640
return remainder;
03641 }
03642
03643
void Integer::Negate()
03644 {
03645
if (!!(*this))
03646 sign = Sign(1-sign);
03647 }
03648
03649
int Integer::PositiveCompare(
const Integer& t)
const
03650
{
03651
unsigned size =
WordCount(), tSize = t.WordCount();
03652
03653
if (size == tSize)
03654
return CryptoPP::Compare(reg, t.reg, size);
03655
else
03656
return size > tSize ? 1 : -1;
03657 }
03658
03659 int Integer::Compare(
const Integer& t)
const
03660
{
03661
if (NotNegative())
03662 {
03663
if (t.NotNegative())
03664
return PositiveCompare(t);
03665
else
03666
return 1;
03667 }
03668
else
03669 {
03670
if (t.NotNegative())
03671
return -1;
03672
else
03673
return -PositiveCompare(t);
03674 }
03675 }
03676
03677 Integer Integer::SquareRoot()
const
03678
{
03679
if (!IsPositive())
03680
return Zero();
03681
03682
03683
Integer x, y =
Power2((
BitCount()+1)/2);
03684 assert(y*y >= *
this);
03685
03686
do
03687 {
03688 x = y;
03689 y = (x + *
this/x) >> 1;
03690 }
while (y<x);
03691
03692
return x;
03693 }
03694
03695 bool Integer::IsSquare()
const
03696
{
03697
Integer r =
SquareRoot();
03698
return *
this == r.
Squared();
03699 }
03700
03701 bool Integer::IsUnit()
const
03702
{
03703
return (
WordCount() == 1) && (reg[0] == 1);
03704 }
03705
03706 Integer Integer::MultiplicativeInverse()
const
03707
{
03708
return IsUnit() ? *
this :
Zero();
03709 }
03710
03711 Integer a_times_b_mod_c(
const Integer &x,
const Integer& y,
const Integer& m)
03712 {
03713
return x*y%m;
03714 }
03715
03716 Integer a_exp_b_mod_c(
const Integer &x,
const Integer& e,
const Integer& m)
03717 {
03718
ModularArithmetic mr(m);
03719
return mr.
Exponentiate(x, e);
03720 }
03721
03722 Integer Integer::Gcd(
const Integer &a,
const Integer &b)
03723 {
03724
return EuclideanDomainOf<Integer>().
Gcd(a, b);
03725 }
03726
03727 Integer Integer::InverseMod(
const Integer &m)
const
03728
{
03729 assert(m.
NotNegative());
03730
03731
if (IsNegative() || *
this>=m)
03732
return (*
this%m).
InverseMod(m);
03733
03734
if (m.
IsEven())
03735 {
03736
if (!m || IsEven())
03737
return Zero();
03738
if (*
this ==
One())
03739
return One();
03740
03741
Integer u = m.
InverseMod(*
this);
03742
return !u ?
Zero() : (m*(*
this-u)+1)/(*this);
03743 }
03744
03745
SecBlock<word> T(m.
reg.
size() * 4);
03746
Integer r((word)0, m.
reg.
size());
03747
unsigned k = AlmostInverse(r.
reg, T, reg, reg.size(), m.
reg, m.
reg.
size());
03748 DivideByPower2Mod(r.
reg, r.
reg, k, m.
reg, m.
reg.
size());
03749
return r;
03750 }
03751
03752 word Integer::InverseMod(
const word mod)
const
03753
{
03754 word g0 = mod, g1 = *
this % mod;
03755 word v0 = 0, v1 = 1;
03756 word y;
03757
03758
while (g1)
03759 {
03760
if (g1 == 1)
03761
return v1;
03762 y = g0 / g1;
03763 g0 = g0 % g1;
03764 v0 += y * v1;
03765
03766
if (!g0)
03767
break;
03768
if (g0 == 1)
03769
return mod-v0;
03770 y = g1 / g0;
03771 g1 = g1 % g0;
03772 v1 += y * v0;
03773 }
03774
return 0;
03775 }
03776
03777
03778
03779 ModularArithmetic::ModularArithmetic(
BufferedTransformation &bt)
03780 {
03781
BERSequenceDecoder seq(bt);
03782
OID oid(seq);
03783
if (oid != ASN1::prime_field())
03784 BERDecodeError();
03785 modulus.
BERDecode(seq);
03786 seq.
MessageEnd();
03787 result.
reg.
resize(modulus.
reg.
size());
03788 }
03789
03790
void ModularArithmetic::DEREncode(
BufferedTransformation &bt)
const
03791
{
03792
DERSequenceEncoder seq(bt);
03793 ASN1::prime_field().DEREncode(seq);
03794 modulus.
DEREncode(seq);
03795 seq.
MessageEnd();
03796 }
03797
03798
void ModularArithmetic::DEREncodeElement(
BufferedTransformation &out,
const Element &a)
const
03799
{
03800 a.DEREncodeAsOctetString(out, MaxElementByteLength());
03801 }
03802
03803
void ModularArithmetic::BERDecodeElement(
BufferedTransformation &in, Element &a)
const
03804
{
03805 a.BERDecodeAsOctetString(in, MaxElementByteLength());
03806 }
03807
03808
const Integer& ModularArithmetic::Half(
const Integer &a)
const
03809
{
03810
if (a.reg.size()==modulus.
reg.
size())
03811 {
03812 CryptoPP::DivideByPower2Mod(result.
reg.
begin(), a.reg, 1, modulus.
reg, a.reg.size());
03813
return result;
03814 }
03815
else
03816
return result1 = (a.IsEven() ? (a >> 1) : ((a+modulus) >> 1));
03817 }
03818
03819
const Integer& ModularArithmetic::Add(
const Integer &a,
const Integer &b)
const
03820
{
03821
if (a.reg.size()==modulus.
reg.
size() && b.reg.size()==modulus.
reg.
size())
03822 {
03823
if (CryptoPP::Add(result.
reg.
begin(), a.reg, b.reg, a.reg.size())
03824 || Compare(result.
reg, modulus.
reg, a.reg.size()) >= 0)
03825 {
03826 CryptoPP::Subtract(result.
reg.
begin(), result.
reg, modulus.
reg, a.reg.size());
03827 }
03828
return result;
03829 }
03830
else
03831 {
03832 result1 = a+b;
03833
if (result1 >= modulus)
03834 result1 -= modulus;
03835
return result1;
03836 }
03837 }
03838
03839
Integer& ModularArithmetic::Accumulate(
Integer &a,
const Integer &b)
const
03840
{
03841
if (a.reg.size()==modulus.
reg.
size() && b.reg.size()==modulus.
reg.
size())
03842 {
03843
if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
03844 || Compare(a.reg, modulus.
reg, a.reg.size()) >= 0)
03845 {
03846 CryptoPP::Subtract(a.reg, a.reg, modulus.
reg, a.reg.size());
03847 }
03848 }
03849
else
03850 {
03851 a+=b;
03852
if (a>=modulus)
03853 a-=modulus;
03854 }
03855
03856
return a;
03857 }
03858
03859
const Integer& ModularArithmetic::Subtract(
const Integer &a,
const Integer &b)
const
03860
{
03861
if (a.reg.size()==modulus.
reg.
size() && b.reg.size()==modulus.
reg.
size())
03862 {
03863
if (CryptoPP::Subtract(result.
reg.
begin(), a.reg, b.reg, a.reg.size()))
03864 CryptoPP::Add(result.
reg.
begin(), result.
reg, modulus.
reg, a.reg.size());
03865
return result;
03866 }
03867
else
03868 {
03869 result1 = a-b;
03870
if (result1.
IsNegative())
03871 result1 += modulus;
03872
return result1;
03873 }
03874 }
03875
03876
Integer& ModularArithmetic::Reduce(
Integer &a,
const Integer &b)
const
03877
{
03878
if (a.reg.size()==modulus.
reg.
size() && b.reg.size()==modulus.
reg.
size())
03879 {
03880
if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
03881 CryptoPP::Add(a.reg, a.reg, modulus.
reg, a.reg.size());
03882 }
03883
else
03884 {
03885 a-=b;
03886
if (a.IsNegative())
03887 a+=modulus;
03888 }
03889
03890
return a;
03891 }
03892
03893
const Integer& ModularArithmetic::Inverse(
const Integer &a)
const
03894
{
03895
if (!a)
03896
return a;
03897
03898 CopyWords(result.
reg.
begin(), modulus.
reg, modulus.
reg.
size());
03899
if (CryptoPP::Subtract(result.
reg.
begin(), result.
reg, a.reg, a.reg.size()))
03900 Decrement(result.
reg.
begin()+a.reg.size(), 1, modulus.
reg.
size()-a.reg.size());
03901
03902
return result;
03903 }
03904
03905
Integer ModularArithmetic::CascadeExponentiate(
const Integer &x,
const Integer &e1,
const Integer &y,
const Integer &e2)
const
03906
{
03907
if (modulus.
IsOdd())
03908 {
03909
MontgomeryRepresentation dr(modulus);
03910
return dr.
ConvertOut(dr.
CascadeExponentiate(dr.
ConvertIn(x), e1, dr.
ConvertIn(y), e2));
03911 }
03912
else
03913
return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
03914 }
03915
03916
void ModularArithmetic::SimultaneousExponentiate(
Integer *results,
const Integer &base,
const Integer *exponents,
unsigned int exponentsCount)
const
03917
{
03918
if (modulus.
IsOdd())
03919 {
03920
MontgomeryRepresentation dr(modulus);
03921 dr.
SimultaneousExponentiate(results, dr.
ConvertIn(base), exponents, exponentsCount);
03922
for (
unsigned int i=0; i<exponentsCount; i++)
03923 results[i] = dr.
ConvertOut(results[i]);
03924 }
03925
else
03926
AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
03927 }
03928
03929 MontgomeryRepresentation::MontgomeryRepresentation(
const Integer &m)
03930 :
ModularArithmetic(m),
03931 u((word)0, modulus.reg.size()),
03932 workspace(5*modulus.reg.size())
03933 {
03934
if (!modulus.IsOdd())
03935
throw InvalidArgument(
"MontgomeryRepresentation: Montgomery representation requires an odd modulus");
03936
03937 RecursiveInverseModPower2(u.reg, workspace, modulus.reg, modulus.reg.size());
03938 }
03939
03940
const Integer& MontgomeryRepresentation::Multiply(
const Integer &a,
const Integer &b)
const
03941
{
03942 word *
const T = workspace.
begin();
03943 word *
const R = result.
reg.
begin();
03944
const unsigned int N = modulus.
reg.
size();
03945 assert(a.reg.size()<=N && b.reg.size()<=N);
03946
03947 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
03948 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
03949 MontgomeryReduce(R, T+2*N, T, modulus.
reg, u.reg, N);
03950
return result;
03951 }
03952
03953
const Integer& MontgomeryRepresentation::Square(
const Integer &a)
const
03954
{
03955 word *
const T = workspace.
begin();
03956 word *
const R = result.
reg.
begin();
03957
const unsigned int N = modulus.
reg.
size();
03958 assert(a.reg.size()<=N);
03959
03960 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
03961 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
03962 MontgomeryReduce(R, T+2*N, T, modulus.
reg, u.reg, N);
03963
return result;
03964 }
03965
03966
Integer MontgomeryRepresentation::ConvertOut(
const Integer &a)
const
03967
{
03968 word *
const T = workspace.
begin();
03969 word *
const R = result.
reg.
begin();
03970
const unsigned int N = modulus.
reg.
size();
03971 assert(a.reg.size()<=N);
03972
03973 CopyWords(T, a.reg, a.reg.size());
03974 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
03975 MontgomeryReduce(R, T+2*N, T, modulus.
reg, u.reg, N);
03976
return result;
03977 }
03978
03979
const Integer& MontgomeryRepresentation::MultiplicativeInverse(
const Integer &a)
const
03980
{
03981
03982 word *
const T = workspace.
begin();
03983 word *
const R = result.
reg.
begin();
03984
const unsigned int N = modulus.
reg.
size();
03985 assert(a.reg.size()<=N);
03986
03987 CopyWords(T, a.reg, a.reg.size());
03988 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
03989 MontgomeryReduce(R, T+2*N, T, modulus.
reg, u.reg, N);
03990
unsigned k = AlmostInverse(R, T, R, N, modulus.
reg, N);
03991
03992
03993
03994
if (k>N*WORD_BITS)
03995 DivideByPower2Mod(R, R, k-N*WORD_BITS, modulus.
reg, N);
03996
else
03997 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, modulus.
reg, N);
03998
03999
return result;
04000 }
04001
04002
template class AbstractRing<Integer>;
04003
04004 NAMESPACE_END