| [25] | 1 | #include <tommath.h> | 
|---|
 | 2 | #ifdef BN_MP_KARATSUBA_MUL_C | 
|---|
 | 3 | /* LibTomMath, multiple-precision integer library -- Tom St Denis | 
|---|
 | 4 |  * | 
|---|
 | 5 |  * LibTomMath is a library that provides multiple-precision | 
|---|
 | 6 |  * integer arithmetic as well as number theoretic functionality. | 
|---|
 | 7 |  * | 
|---|
 | 8 |  * The library was designed directly after the MPI library by | 
|---|
 | 9 |  * Michael Fromberger but has been written from scratch with | 
|---|
 | 10 |  * additional optimizations in place. | 
|---|
 | 11 |  * | 
|---|
 | 12 |  * The library is free for all purposes without any express | 
|---|
 | 13 |  * guarantee it works. | 
|---|
 | 14 |  * | 
|---|
 | 15 |  * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com | 
|---|
 | 16 |  */ | 
|---|
 | 17 |  | 
|---|
 | 18 | /* c = |a| * |b| using Karatsuba Multiplication using  | 
|---|
 | 19 |  * three half size multiplications | 
|---|
 | 20 |  * | 
|---|
 | 21 |  * Let B represent the radix [e.g. 2**DIGIT_BIT] and  | 
|---|
 | 22 |  * let n represent half of the number of digits in  | 
|---|
 | 23 |  * the min(a,b) | 
|---|
 | 24 |  * | 
|---|
 | 25 |  * a = a1 * B**n + a0 | 
|---|
 | 26 |  * b = b1 * B**n + b0 | 
|---|
 | 27 |  * | 
|---|
 | 28 |  * Then, a * b =>  | 
|---|
 | 29 |    a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 | 
|---|
 | 30 |  * | 
|---|
 | 31 |  * Note that a1b1 and a0b0 are used twice and only need to be  | 
|---|
 | 32 |  * computed once.  So in total three half size (half # of  | 
|---|
 | 33 |  * digit) multiplications are performed, a0b0, a1b1 and  | 
|---|
 | 34 |  * (a1+b1)(a0+b0) | 
|---|
 | 35 |  * | 
|---|
 | 36 |  * Note that a multiplication of half the digits requires | 
|---|
 | 37 |  * 1/4th the number of single precision multiplications so in  | 
|---|
 | 38 |  * total after one call 25% of the single precision multiplications  | 
|---|
 | 39 |  * are saved.  Note also that the call to mp_mul can end up back  | 
|---|
 | 40 |  * in this function if the a0, a1, b0, or b1 are above the threshold.   | 
|---|
 | 41 |  * This is known as divide-and-conquer and leads to the famous  | 
|---|
 | 42 |  * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than  | 
|---|
 | 43 |  * the standard O(N**2) that the baseline/comba methods use.   | 
|---|
 | 44 |  * Generally though the overhead of this method doesn't pay off  | 
|---|
 | 45 |  * until a certain size (N ~ 80) is reached. | 
|---|
 | 46 |  */ | 
|---|
 | 47 | int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) | 
|---|
 | 48 | { | 
|---|
 | 49 |   mp_int  x0, x1, y0, y1, t1, x0y0, x1y1; | 
|---|
 | 50 |   int     B, err; | 
|---|
 | 51 |  | 
|---|
 | 52 |   /* default the return code to an error */ | 
|---|
 | 53 |   err = MP_MEM; | 
|---|
 | 54 |  | 
|---|
 | 55 |   /* min # of digits */ | 
|---|
 | 56 |   B = MIN (a->used, b->used); | 
|---|
 | 57 |  | 
|---|
 | 58 |   /* now divide in two */ | 
|---|
 | 59 |   B = B >> 1; | 
|---|
 | 60 |  | 
|---|
 | 61 |   /* init copy all the temps */ | 
|---|
 | 62 |   if (mp_init_size (&x0, B) != MP_OKAY) | 
|---|
 | 63 |     goto ERR; | 
|---|
 | 64 |   if (mp_init_size (&x1, a->used - B) != MP_OKAY) | 
|---|
 | 65 |     goto X0; | 
|---|
 | 66 |   if (mp_init_size (&y0, B) != MP_OKAY) | 
|---|
 | 67 |     goto X1; | 
|---|
 | 68 |   if (mp_init_size (&y1, b->used - B) != MP_OKAY) | 
|---|
 | 69 |     goto Y0; | 
|---|
 | 70 |  | 
|---|
 | 71 |   /* init temps */ | 
|---|
 | 72 |   if (mp_init_size (&t1, B * 2) != MP_OKAY) | 
|---|
 | 73 |     goto Y1; | 
|---|
 | 74 |   if (mp_init_size (&x0y0, B * 2) != MP_OKAY) | 
|---|
 | 75 |     goto T1; | 
|---|
 | 76 |   if (mp_init_size (&x1y1, B * 2) != MP_OKAY) | 
|---|
 | 77 |     goto X0Y0; | 
|---|
 | 78 |  | 
|---|
 | 79 |   /* now shift the digits */ | 
|---|
 | 80 |   x0.used = y0.used = B; | 
|---|
 | 81 |   x1.used = a->used - B; | 
|---|
 | 82 |   y1.used = b->used - B; | 
|---|
 | 83 |  | 
|---|
 | 84 |   { | 
|---|
 | 85 |     register int x; | 
|---|
 | 86 |     register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; | 
|---|
 | 87 |  | 
|---|
 | 88 |     /* we copy the digits directly instead of using higher level functions | 
|---|
 | 89 |      * since we also need to shift the digits | 
|---|
 | 90 |      */ | 
|---|
 | 91 |     tmpa = a->dp; | 
|---|
 | 92 |     tmpb = b->dp; | 
|---|
 | 93 |  | 
|---|
 | 94 |     tmpx = x0.dp; | 
|---|
 | 95 |     tmpy = y0.dp; | 
|---|
 | 96 |     for (x = 0; x < B; x++) { | 
|---|
 | 97 |       *tmpx++ = *tmpa++; | 
|---|
 | 98 |       *tmpy++ = *tmpb++; | 
|---|
 | 99 |     } | 
|---|
 | 100 |  | 
|---|
 | 101 |     tmpx = x1.dp; | 
|---|
 | 102 |     for (x = B; x < a->used; x++) { | 
|---|
 | 103 |       *tmpx++ = *tmpa++; | 
|---|
 | 104 |     } | 
|---|
 | 105 |  | 
|---|
 | 106 |     tmpy = y1.dp; | 
|---|
 | 107 |     for (x = B; x < b->used; x++) { | 
|---|
 | 108 |       *tmpy++ = *tmpb++; | 
|---|
 | 109 |     } | 
|---|
 | 110 |   } | 
|---|
 | 111 |  | 
|---|
 | 112 |   /* only need to clamp the lower words since by definition the  | 
|---|
 | 113 |    * upper words x1/y1 must have a known number of digits | 
|---|
 | 114 |    */ | 
|---|
 | 115 |   mp_clamp (&x0); | 
|---|
 | 116 |   mp_clamp (&y0); | 
|---|
 | 117 |  | 
|---|
 | 118 |   /* now calc the products x0y0 and x1y1 */ | 
|---|
 | 119 |   /* after this x0 is no longer required, free temp [x0==t2]! */ | 
|---|
 | 120 |   if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)   | 
|---|
 | 121 |     goto X1Y1;          /* x0y0 = x0*y0 */ | 
|---|
 | 122 |   if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) | 
|---|
 | 123 |     goto X1Y1;          /* x1y1 = x1*y1 */ | 
|---|
 | 124 |  | 
|---|
 | 125 |   /* now calc x1+x0 and y1+y0 */ | 
|---|
 | 126 |   if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) | 
|---|
 | 127 |     goto X1Y1;          /* t1 = x1 - x0 */ | 
|---|
 | 128 |   if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) | 
|---|
 | 129 |     goto X1Y1;          /* t2 = y1 - y0 */ | 
|---|
 | 130 |   if (mp_mul (&t1, &x0, &t1) != MP_OKAY) | 
|---|
 | 131 |     goto X1Y1;          /* t1 = (x1 + x0) * (y1 + y0) */ | 
|---|
 | 132 |  | 
|---|
 | 133 |   /* add x0y0 */ | 
|---|
 | 134 |   if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) | 
|---|
 | 135 |     goto X1Y1;          /* t2 = x0y0 + x1y1 */ | 
|---|
 | 136 |   if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) | 
|---|
 | 137 |     goto X1Y1;          /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ | 
|---|
 | 138 |  | 
|---|
 | 139 |   /* shift by B */ | 
|---|
 | 140 |   if (mp_lshd (&t1, B) != MP_OKAY) | 
|---|
 | 141 |     goto X1Y1;          /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ | 
|---|
 | 142 |   if (mp_lshd (&x1y1, B * 2) != MP_OKAY) | 
|---|
 | 143 |     goto X1Y1;          /* x1y1 = x1y1 << 2*B */ | 
|---|
 | 144 |  | 
|---|
 | 145 |   if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) | 
|---|
 | 146 |     goto X1Y1;          /* t1 = x0y0 + t1 */ | 
|---|
 | 147 |   if (mp_add (&t1, &x1y1, c) != MP_OKAY) | 
|---|
 | 148 |     goto X1Y1;          /* t1 = x0y0 + t1 + x1y1 */ | 
|---|
 | 149 |  | 
|---|
 | 150 |   /* Algorithm succeeded set the return code to MP_OKAY */ | 
|---|
 | 151 |   err = MP_OKAY; | 
|---|
 | 152 |  | 
|---|
 | 153 | X1Y1:mp_clear (&x1y1); | 
|---|
 | 154 | X0Y0:mp_clear (&x0y0); | 
|---|
 | 155 | T1:mp_clear (&t1); | 
|---|
 | 156 | Y1:mp_clear (&y1); | 
|---|
 | 157 | Y0:mp_clear (&y0); | 
|---|
 | 158 | X1:mp_clear (&x1); | 
|---|
 | 159 | X0:mp_clear (&x0); | 
|---|
 | 160 | ERR: | 
|---|
 | 161 |   return err; | 
|---|
 | 162 | } | 
|---|
 | 163 | #endif | 
|---|
 | 164 |  | 
|---|
 | 165 | /* $Source: /cvsroot/tcl/libtommath/bn_mp_karatsuba_mul.c,v $ */ | 
|---|
 | 166 | /* $Revision: 1.1.1.3 $ */ | 
|---|
 | 167 | /* $Date: 2006/12/01 00:08:11 $ */ | 
|---|