The real, definitive, Visual C++ support branch. Accept no substitutes
[reactos.git] / dll / win32 / rsaenh / mpi.c
1 /*
2 * dlls/rsaenh/mpi.c
3 * Multi Precision Integer functions
4 *
5 * Copyright 2004 Michael Jung
6 * Based on public domain code by Tom St Denis (tomstdenis@iahu.ca)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 */
22
23 /*
24 * This file contains code from the LibTomCrypt cryptographic
25 * library written by Tom St Denis (tomstdenis@iahu.ca). LibTomCrypt
26 * is in the public domain. The code in this file is tailored to
27 * special requirements. Take a look at http://libtomcrypt.org for the
28 * original version.
29 */
30
31 #include <stdarg.h>
32 #include "tomcrypt.h"
33
34 /* Known optimal configurations
35 CPU /Compiler /MUL CUTOFF/SQR CUTOFF
36 -------------------------------------------------------------
37 Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
38 */
39 static const int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */
40 KARATSUBA_SQR_CUTOFF = 128; /* Min. number of digits before Karatsuba squaring is used. */
41
42 /* computes the modular inverse via binary extended euclidean algorithm,
43 * that is c = 1/a mod b
44 *
45 * Based on slow invmod except this is optimized for the case where b is
46 * odd as per HAC Note 14.64 on pp. 610
47 */
48 int
49 fast_mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
50 {
51 mp_int x, y, u, v, B, D;
52 int res, neg;
53
54 /* 2. [modified] b must be odd */
55 if (mp_iseven (b) == 1) {
56 return MP_VAL;
57 }
58
59 /* init all our temps */
60 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
61 return res;
62 }
63
64 /* x == modulus, y == value to invert */
65 if ((res = mp_copy (b, &x)) != MP_OKAY) {
66 goto __ERR;
67 }
68
69 /* we need y = |a| */
70 if ((res = mp_abs (a, &y)) != MP_OKAY) {
71 goto __ERR;
72 }
73
74 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
75 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
76 goto __ERR;
77 }
78 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
79 goto __ERR;
80 }
81 mp_set (&D, 1);
82
83 top:
84 /* 4. while u is even do */
85 while (mp_iseven (&u) == 1) {
86 /* 4.1 u = u/2 */
87 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
88 goto __ERR;
89 }
90 /* 4.2 if B is odd then */
91 if (mp_isodd (&B) == 1) {
92 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
93 goto __ERR;
94 }
95 }
96 /* B = B/2 */
97 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
98 goto __ERR;
99 }
100 }
101
102 /* 5. while v is even do */
103 while (mp_iseven (&v) == 1) {
104 /* 5.1 v = v/2 */
105 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
106 goto __ERR;
107 }
108 /* 5.2 if D is odd then */
109 if (mp_isodd (&D) == 1) {
110 /* D = (D-x)/2 */
111 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
112 goto __ERR;
113 }
114 }
115 /* D = D/2 */
116 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
117 goto __ERR;
118 }
119 }
120
121 /* 6. if u >= v then */
122 if (mp_cmp (&u, &v) != MP_LT) {
123 /* u = u - v, B = B - D */
124 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
125 goto __ERR;
126 }
127
128 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
129 goto __ERR;
130 }
131 } else {
132 /* v - v - u, D = D - B */
133 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
134 goto __ERR;
135 }
136
137 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
138 goto __ERR;
139 }
140 }
141
142 /* if not zero goto step 4 */
143 if (mp_iszero (&u) == 0) {
144 goto top;
145 }
146
147 /* now a = C, b = D, gcd == g*v */
148
149 /* if v != 1 then there is no inverse */
150 if (mp_cmp_d (&v, 1) != MP_EQ) {
151 res = MP_VAL;
152 goto __ERR;
153 }
154
155 /* b is now the inverse */
156 neg = a->sign;
157 while (D.sign == MP_NEG) {
158 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
159 goto __ERR;
160 }
161 }
162 mp_exch (&D, c);
163 c->sign = neg;
164 res = MP_OKAY;
165
166 __ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
167 return res;
168 }
169
170 /* computes xR**-1 == x (mod N) via Montgomery Reduction
171 *
172 * This is an optimized implementation of montgomery_reduce
173 * which uses the comba method to quickly calculate the columns of the
174 * reduction.
175 *
176 * Based on Algorithm 14.32 on pp.601 of HAC.
177 */
178 int
179 fast_mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
180 {
181 int ix, res, olduse;
182 mp_word W[MP_WARRAY];
183
184 /* get old used count */
185 olduse = x->used;
186
187 /* grow a as required */
188 if (x->alloc < n->used + 1) {
189 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
190 return res;
191 }
192 }
193
194 /* first we have to get the digits of the input into
195 * an array of double precision words W[...]
196 */
197 {
198 register mp_word *_W;
199 register mp_digit *tmpx;
200
201 /* alias for the W[] array */
202 _W = W;
203
204 /* alias for the digits of x*/
205 tmpx = x->dp;
206
207 /* copy the digits of a into W[0..a->used-1] */
208 for (ix = 0; ix < x->used; ix++) {
209 *_W++ = *tmpx++;
210 }
211
212 /* zero the high words of W[a->used..m->used*2] */
213 for (; ix < n->used * 2 + 1; ix++) {
214 *_W++ = 0;
215 }
216 }
217
218 /* now we proceed to zero successive digits
219 * from the least significant upwards
220 */
221 for (ix = 0; ix < n->used; ix++) {
222 /* mu = ai * m' mod b
223 *
224 * We avoid a double precision multiplication (which isn't required)
225 * by casting the value down to a mp_digit. Note this requires
226 * that W[ix-1] have the carry cleared (see after the inner loop)
227 */
228 register mp_digit mu;
229 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
230
231 /* a = a + mu * m * b**i
232 *
233 * This is computed in place and on the fly. The multiplication
234 * by b**i is handled by offsetting which columns the results
235 * are added to.
236 *
237 * Note the comba method normally doesn't handle carries in the
238 * inner loop In this case we fix the carry from the previous
239 * column since the Montgomery reduction requires digits of the
240 * result (so far) [see above] to work. This is
241 * handled by fixing up one carry after the inner loop. The
242 * carry fixups are done in order so after these loops the
243 * first m->used words of W[] have the carries fixed
244 */
245 {
246 register int iy;
247 register mp_digit *tmpn;
248 register mp_word *_W;
249
250 /* alias for the digits of the modulus */
251 tmpn = n->dp;
252
253 /* Alias for the columns set by an offset of ix */
254 _W = W + ix;
255
256 /* inner loop */
257 for (iy = 0; iy < n->used; iy++) {
258 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
259 }
260 }
261
262 /* now fix carry for next digit, W[ix+1] */
263 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
264 }
265
266 /* now we have to propagate the carries and
267 * shift the words downward [all those least
268 * significant digits we zeroed].
269 */
270 {
271 register mp_digit *tmpx;
272 register mp_word *_W, *_W1;
273
274 /* nox fix rest of carries */
275
276 /* alias for current word */
277 _W1 = W + ix;
278
279 /* alias for next word, where the carry goes */
280 _W = W + ++ix;
281
282 for (; ix <= n->used * 2 + 1; ix++) {
283 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
284 }
285
286 /* copy out, A = A/b**n
287 *
288 * The result is A/b**n but instead of converting from an
289 * array of mp_word to mp_digit than calling mp_rshd
290 * we just copy them in the right order
291 */
292
293 /* alias for destination word */
294 tmpx = x->dp;
295
296 /* alias for shifted double precision result */
297 _W = W + n->used;
298
299 for (ix = 0; ix < n->used + 1; ix++) {
300 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
301 }
302
303 /* zero oldused digits, if the input a was larger than
304 * m->used+1 we'll have to clear the digits
305 */
306 for (; ix < olduse; ix++) {
307 *tmpx++ = 0;
308 }
309 }
310
311 /* set the max used and clamp */
312 x->used = n->used + 1;
313 mp_clamp (x);
314
315 /* if A >= m then A = A - m */
316 if (mp_cmp_mag (x, n) != MP_LT) {
317 return s_mp_sub (x, n, x);
318 }
319 return MP_OKAY;
320 }
321
322 /* Fast (comba) multiplier
323 *
324 * This is the fast column-array [comba] multiplier. It is
325 * designed to compute the columns of the product first
326 * then handle the carries afterwards. This has the effect
327 * of making the nested loops that compute the columns very
328 * simple and schedulable on super-scalar processors.
329 *
330 * This has been modified to produce a variable number of
331 * digits of output so if say only a half-product is required
332 * you don't have to compute the upper half (a feature
333 * required for fast Barrett reduction).
334 *
335 * Based on Algorithm 14.12 on pp.595 of HAC.
336 *
337 */
338 int
339 fast_s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
340 {
341 int olduse, res, pa, ix, iz;
342 mp_digit W[MP_WARRAY];
343 register mp_word _W;
344
345 /* grow the destination as required */
346 if (c->alloc < digs) {
347 if ((res = mp_grow (c, digs)) != MP_OKAY) {
348 return res;
349 }
350 }
351
352 /* number of output digits to produce */
353 pa = MIN(digs, a->used + b->used);
354
355 /* clear the carry */
356 _W = 0;
357 for (ix = 0; ix <= pa; ix++) {
358 int tx, ty;
359 int iy;
360 mp_digit *tmpx, *tmpy;
361
362 /* get offsets into the two bignums */
363 ty = MIN(b->used-1, ix);
364 tx = ix - ty;
365
366 /* setup temp aliases */
367 tmpx = a->dp + tx;
368 tmpy = b->dp + ty;
369
370 /* This is the number of times the loop will iterate, essentially it's
371 while (tx++ < a->used && ty-- >= 0) { ... }
372 */
373 iy = MIN(a->used-tx, ty+1);
374
375 /* execute loop */
376 for (iz = 0; iz < iy; ++iz) {
377 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
378 }
379
380 /* store term */
381 W[ix] = ((mp_digit)_W) & MP_MASK;
382
383 /* make next carry */
384 _W = _W >> ((mp_word)DIGIT_BIT);
385 }
386
387 /* setup dest */
388 olduse = c->used;
389 c->used = digs;
390
391 {
392 register mp_digit *tmpc;
393 tmpc = c->dp;
394 for (ix = 0; ix < digs; ix++) {
395 /* now extract the previous digit [below the carry] */
396 *tmpc++ = W[ix];
397 }
398
399 /* clear unused digits [that existed in the old copy of c] */
400 for (; ix < olduse; ix++) {
401 *tmpc++ = 0;
402 }
403 }
404 mp_clamp (c);
405 return MP_OKAY;
406 }
407
408 /* this is a modified version of fast_s_mul_digs that only produces
409 * output digits *above* digs. See the comments for fast_s_mul_digs
410 * to see how it works.
411 *
412 * This is used in the Barrett reduction since for one of the multiplications
413 * only the higher digits were needed. This essentially halves the work.
414 *
415 * Based on Algorithm 14.12 on pp.595 of HAC.
416 */
417 int
418 fast_s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
419 {
420 int olduse, res, pa, ix, iz;
421 mp_digit W[MP_WARRAY];
422 mp_word _W;
423
424 /* grow the destination as required */
425 pa = a->used + b->used;
426 if (c->alloc < pa) {
427 if ((res = mp_grow (c, pa)) != MP_OKAY) {
428 return res;
429 }
430 }
431
432 /* number of output digits to produce */
433 pa = a->used + b->used;
434 _W = 0;
435 for (ix = digs; ix <= pa; ix++) {
436 int tx, ty, iy;
437 mp_digit *tmpx, *tmpy;
438
439 /* get offsets into the two bignums */
440 ty = MIN(b->used-1, ix);
441 tx = ix - ty;
442
443 /* setup temp aliases */
444 tmpx = a->dp + tx;
445 tmpy = b->dp + ty;
446
447 /* This is the number of times the loop will iterate, essentially it's
448 while (tx++ < a->used && ty-- >= 0) { ... }
449 */
450 iy = MIN(a->used-tx, ty+1);
451
452 /* execute loop */
453 for (iz = 0; iz < iy; iz++) {
454 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
455 }
456
457 /* store term */
458 W[ix] = ((mp_digit)_W) & MP_MASK;
459
460 /* make next carry */
461 _W = _W >> ((mp_word)DIGIT_BIT);
462 }
463
464 /* setup dest */
465 olduse = c->used;
466 c->used = pa;
467
468 {
469 register mp_digit *tmpc;
470
471 tmpc = c->dp + digs;
472 for (ix = digs; ix <= pa; ix++) {
473 /* now extract the previous digit [below the carry] */
474 *tmpc++ = W[ix];
475 }
476
477 /* clear unused digits [that existed in the old copy of c] */
478 for (; ix < olduse; ix++) {
479 *tmpc++ = 0;
480 }
481 }
482 mp_clamp (c);
483 return MP_OKAY;
484 }
485
486 /* fast squaring
487 *
488 * This is the comba method where the columns of the product
489 * are computed first then the carries are computed. This
490 * has the effect of making a very simple inner loop that
491 * is executed the most
492 *
493 * W2 represents the outer products and W the inner.
494 *
495 * A further optimizations is made because the inner
496 * products are of the form "A * B * 2". The *2 part does
497 * not need to be computed until the end which is good
498 * because 64-bit shifts are slow!
499 *
500 * Based on Algorithm 14.16 on pp.597 of HAC.
501 *
502 */
503 /* the jist of squaring...
504
505 you do like mult except the offset of the tmpx [one that starts closer to zero]
506 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
507 (ty-tx) so that it never happens. You double all those you add in the inner loop
508
509 After that loop you do the squares and add them in.
510
511 Remove W2 and don't memset W
512
513 */
514
515 int fast_s_mp_sqr (const mp_int * a, mp_int * b)
516 {
517 int olduse, res, pa, ix, iz;
518 mp_digit W[MP_WARRAY], *tmpx;
519 mp_word W1;
520
521 /* grow the destination as required */
522 pa = a->used + a->used;
523 if (b->alloc < pa) {
524 if ((res = mp_grow (b, pa)) != MP_OKAY) {
525 return res;
526 }
527 }
528
529 /* number of output digits to produce */
530 W1 = 0;
531 for (ix = 0; ix <= pa; ix++) {
532 int tx, ty, iy;
533 mp_word _W;
534 mp_digit *tmpy;
535
536 /* clear counter */
537 _W = 0;
538
539 /* get offsets into the two bignums */
540 ty = MIN(a->used-1, ix);
541 tx = ix - ty;
542
543 /* setup temp aliases */
544 tmpx = a->dp + tx;
545 tmpy = a->dp + ty;
546
547 /* This is the number of times the loop will iterate, essentially it's
548 while (tx++ < a->used && ty-- >= 0) { ... }
549 */
550 iy = MIN(a->used-tx, ty+1);
551
552 /* now for squaring tx can never equal ty
553 * we halve the distance since they approach at a rate of 2x
554 * and we have to round because odd cases need to be executed
555 */
556 iy = MIN(iy, (ty-tx+1)>>1);
557
558 /* execute loop */
559 for (iz = 0; iz < iy; iz++) {
560 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
561 }
562
563 /* double the inner product and add carry */
564 _W = _W + _W + W1;
565
566 /* even columns have the square term in them */
567 if ((ix&1) == 0) {
568 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
569 }
570
571 /* store it */
572 W[ix] = _W;
573
574 /* make next carry */
575 W1 = _W >> ((mp_word)DIGIT_BIT);
576 }
577
578 /* setup dest */
579 olduse = b->used;
580 b->used = a->used+a->used;
581
582 {
583 mp_digit *tmpb;
584 tmpb = b->dp;
585 for (ix = 0; ix < pa; ix++) {
586 *tmpb++ = W[ix] & MP_MASK;
587 }
588
589 /* clear unused digits [that existed in the old copy of c] */
590 for (; ix < olduse; ix++) {
591 *tmpb++ = 0;
592 }
593 }
594 mp_clamp (b);
595 return MP_OKAY;
596 }
597
598 /* computes a = 2**b
599 *
600 * Simple algorithm which zeroes the int, grows it then just sets one bit
601 * as required.
602 */
603 int
604 mp_2expt (mp_int * a, int b)
605 {
606 int res;
607
608 /* zero a as per default */
609 mp_zero (a);
610
611 /* grow a to accommodate the single bit */
612 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
613 return res;
614 }
615
616 /* set the used count of where the bit will go */
617 a->used = b / DIGIT_BIT + 1;
618
619 /* put the single bit in its place */
620 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
621
622 return MP_OKAY;
623 }
624
625 /* b = |a|
626 *
627 * Simple function copies the input and fixes the sign to positive
628 */
629 int
630 mp_abs (const mp_int * a, mp_int * b)
631 {
632 int res;
633
634 /* copy a to b */
635 if (a != b) {
636 if ((res = mp_copy (a, b)) != MP_OKAY) {
637 return res;
638 }
639 }
640
641 /* force the sign of b to positive */
642 b->sign = MP_ZPOS;
643
644 return MP_OKAY;
645 }
646
647 /* high level addition (handles signs) */
648 int mp_add (mp_int * a, mp_int * b, mp_int * c)
649 {
650 int sa, sb, res;
651
652 /* get sign of both inputs */
653 sa = a->sign;
654 sb = b->sign;
655
656 /* handle two cases, not four */
657 if (sa == sb) {
658 /* both positive or both negative */
659 /* add their magnitudes, copy the sign */
660 c->sign = sa;
661 res = s_mp_add (a, b, c);
662 } else {
663 /* one positive, the other negative */
664 /* subtract the one with the greater magnitude from */
665 /* the one of the lesser magnitude. The result gets */
666 /* the sign of the one with the greater magnitude. */
667 if (mp_cmp_mag (a, b) == MP_LT) {
668 c->sign = sb;
669 res = s_mp_sub (b, a, c);
670 } else {
671 c->sign = sa;
672 res = s_mp_sub (a, b, c);
673 }
674 }
675 return res;
676 }
677
678
679 /* single digit addition */
680 int
681 mp_add_d (mp_int * a, mp_digit b, mp_int * c)
682 {
683 int res, ix, oldused;
684 mp_digit *tmpa, *tmpc, mu;
685
686 /* grow c as required */
687 if (c->alloc < a->used + 1) {
688 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
689 return res;
690 }
691 }
692
693 /* if a is negative and |a| >= b, call c = |a| - b */
694 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
695 /* temporarily fix sign of a */
696 a->sign = MP_ZPOS;
697
698 /* c = |a| - b */
699 res = mp_sub_d(a, b, c);
700
701 /* fix sign */
702 a->sign = c->sign = MP_NEG;
703
704 return res;
705 }
706
707 /* old number of used digits in c */
708 oldused = c->used;
709
710 /* sign always positive */
711 c->sign = MP_ZPOS;
712
713 /* source alias */
714 tmpa = a->dp;
715
716 /* destination alias */
717 tmpc = c->dp;
718
719 /* if a is positive */
720 if (a->sign == MP_ZPOS) {
721 /* add digit, after this we're propagating
722 * the carry.
723 */
724 *tmpc = *tmpa++ + b;
725 mu = *tmpc >> DIGIT_BIT;
726 *tmpc++ &= MP_MASK;
727
728 /* now handle rest of the digits */
729 for (ix = 1; ix < a->used; ix++) {
730 *tmpc = *tmpa++ + mu;
731 mu = *tmpc >> DIGIT_BIT;
732 *tmpc++ &= MP_MASK;
733 }
734 /* set final carry */
735 ix++;
736 *tmpc++ = mu;
737
738 /* setup size */
739 c->used = a->used + 1;
740 } else {
741 /* a was negative and |a| < b */
742 c->used = 1;
743
744 /* the result is a single digit */
745 if (a->used == 1) {
746 *tmpc++ = b - a->dp[0];
747 } else {
748 *tmpc++ = b;
749 }
750
751 /* setup count so the clearing of oldused
752 * can fall through correctly
753 */
754 ix = 1;
755 }
756
757 /* now zero to oldused */
758 while (ix++ < oldused) {
759 *tmpc++ = 0;
760 }
761 mp_clamp(c);
762
763 return MP_OKAY;
764 }
765
766 /* trim unused digits
767 *
768 * This is used to ensure that leading zero digits are
769 * trimed and the leading "used" digit will be non-zero
770 * Typically very fast. Also fixes the sign if there
771 * are no more leading digits
772 */
773 void
774 mp_clamp (mp_int * a)
775 {
776 /* decrease used while the most significant digit is
777 * zero.
778 */
779 while (a->used > 0 && a->dp[a->used - 1] == 0) {
780 --(a->used);
781 }
782
783 /* reset the sign flag if used == 0 */
784 if (a->used == 0) {
785 a->sign = MP_ZPOS;
786 }
787 }
788
789 /* clear one (frees) */
790 void
791 mp_clear (mp_int * a)
792 {
793 int i;
794
795 /* only do anything if a hasn't been freed previously */
796 if (a->dp != NULL) {
797 /* first zero the digits */
798 for (i = 0; i < a->used; i++) {
799 a->dp[i] = 0;
800 }
801
802 /* free ram */
803 free(a->dp);
804
805 /* reset members to make debugging easier */
806 a->dp = NULL;
807 a->alloc = a->used = 0;
808 a->sign = MP_ZPOS;
809 }
810 }
811
812
813 void mp_clear_multi(mp_int *mp, ...)
814 {
815 mp_int* next_mp = mp;
816 va_list args;
817 va_start(args, mp);
818 while (next_mp != NULL) {
819 mp_clear(next_mp);
820 next_mp = va_arg(args, mp_int*);
821 }
822 va_end(args);
823 }
824
825 /* compare two ints (signed)*/
826 int
827 mp_cmp (const mp_int * a, const mp_int * b)
828 {
829 /* compare based on sign */
830 if (a->sign != b->sign) {
831 if (a->sign == MP_NEG) {
832 return MP_LT;
833 } else {
834 return MP_GT;
835 }
836 }
837
838 /* compare digits */
839 if (a->sign == MP_NEG) {
840 /* if negative compare opposite direction */
841 return mp_cmp_mag(b, a);
842 } else {
843 return mp_cmp_mag(a, b);
844 }
845 }
846
847 /* compare a digit */
848 int mp_cmp_d(const mp_int * a, mp_digit b)
849 {
850 /* compare based on sign */
851 if (a->sign == MP_NEG) {
852 return MP_LT;
853 }
854
855 /* compare based on magnitude */
856 if (a->used > 1) {
857 return MP_GT;
858 }
859
860 /* compare the only digit of a to b */
861 if (a->dp[0] > b) {
862 return MP_GT;
863 } else if (a->dp[0] < b) {
864 return MP_LT;
865 } else {
866 return MP_EQ;
867 }
868 }
869
870 /* compare maginitude of two ints (unsigned) */
871 int mp_cmp_mag (const mp_int * a, const mp_int * b)
872 {
873 int n;
874 mp_digit *tmpa, *tmpb;
875
876 /* compare based on # of non-zero digits */
877 if (a->used > b->used) {
878 return MP_GT;
879 }
880
881 if (a->used < b->used) {
882 return MP_LT;
883 }
884
885 /* alias for a */
886 tmpa = a->dp + (a->used - 1);
887
888 /* alias for b */
889 tmpb = b->dp + (a->used - 1);
890
891 /* compare based on digits */
892 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
893 if (*tmpa > *tmpb) {
894 return MP_GT;
895 }
896
897 if (*tmpa < *tmpb) {
898 return MP_LT;
899 }
900 }
901 return MP_EQ;
902 }
903
904 static const int lnz[16] = {
905 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
906 };
907
908 /* Counts the number of lsbs which are zero before the first zero bit */
909 int mp_cnt_lsb(const mp_int *a)
910 {
911 int x;
912 mp_digit q, qq;
913
914 /* easy out */
915 if (mp_iszero(a) == 1) {
916 return 0;
917 }
918
919 /* scan lower digits until non-zero */
920 for (x = 0; x < a->used && a->dp[x] == 0; x++);
921 q = a->dp[x];
922 x *= DIGIT_BIT;
923
924 /* now scan this digit until a 1 is found */
925 if ((q & 1) == 0) {
926 do {
927 qq = q & 15;
928 x += lnz[qq];
929 q >>= 4;
930 } while (qq == 0);
931 }
932 return x;
933 }
934
935 /* copy, b = a */
936 int
937 mp_copy (const mp_int * a, mp_int * b)
938 {
939 int res, n;
940
941 /* if dst == src do nothing */
942 if (a == b) {
943 return MP_OKAY;
944 }
945
946 /* grow dest */
947 if (b->alloc < a->used) {
948 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
949 return res;
950 }
951 }
952
953 /* zero b and copy the parameters over */
954 {
955 register mp_digit *tmpa, *tmpb;
956
957 /* pointer aliases */
958
959 /* source */
960 tmpa = a->dp;
961
962 /* destination */
963 tmpb = b->dp;
964
965 /* copy all the digits */
966 for (n = 0; n < a->used; n++) {
967 *tmpb++ = *tmpa++;
968 }
969
970 /* clear high digits */
971 for (; n < b->used; n++) {
972 *tmpb++ = 0;
973 }
974 }
975
976 /* copy used count and sign */
977 b->used = a->used;
978 b->sign = a->sign;
979 return MP_OKAY;
980 }
981
982 /* returns the number of bits in an int */
983 int
984 mp_count_bits (const mp_int * a)
985 {
986 int r;
987 mp_digit q;
988
989 /* shortcut */
990 if (a->used == 0) {
991 return 0;
992 }
993
994 /* get number of digits and add that */
995 r = (a->used - 1) * DIGIT_BIT;
996
997 /* take the last digit and count the bits in it */
998 q = a->dp[a->used - 1];
999 while (q > ((mp_digit) 0)) {
1000 ++r;
1001 q >>= ((mp_digit) 1);
1002 }
1003 return r;
1004 }
1005
1006 /* integer signed division.
1007 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1008 * HAC pp.598 Algorithm 14.20
1009 *
1010 * Note that the description in HAC is horribly
1011 * incomplete. For example, it doesn't consider
1012 * the case where digits are removed from 'x' in
1013 * the inner loop. It also doesn't consider the
1014 * case that y has fewer than three digits, etc..
1015 *
1016 * The overall algorithm is as described as
1017 * 14.20 from HAC but fixed to treat these cases.
1018 */
1019 int mp_div (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
1020 {
1021 mp_int q, x, y, t1, t2;
1022 int res, n, t, i, norm, neg;
1023
1024 /* is divisor zero ? */
1025 if (mp_iszero (b) == 1) {
1026 return MP_VAL;
1027 }
1028
1029 /* if a < b then q=0, r = a */
1030 if (mp_cmp_mag (a, b) == MP_LT) {
1031 if (d != NULL) {
1032 res = mp_copy (a, d);
1033 } else {
1034 res = MP_OKAY;
1035 }
1036 if (c != NULL) {
1037 mp_zero (c);
1038 }
1039 return res;
1040 }
1041
1042 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1043 return res;
1044 }
1045 q.used = a->used + 2;
1046
1047 if ((res = mp_init (&t1)) != MP_OKAY) {
1048 goto __Q;
1049 }
1050
1051 if ((res = mp_init (&t2)) != MP_OKAY) {
1052 goto __T1;
1053 }
1054
1055 if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1056 goto __T2;
1057 }
1058
1059 if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1060 goto __X;
1061 }
1062
1063 /* fix the sign */
1064 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1065 x.sign = y.sign = MP_ZPOS;
1066
1067 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1068 norm = mp_count_bits(&y) % DIGIT_BIT;
1069 if (norm < DIGIT_BIT-1) {
1070 norm = (DIGIT_BIT-1) - norm;
1071 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1072 goto __Y;
1073 }
1074 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1075 goto __Y;
1076 }
1077 } else {
1078 norm = 0;
1079 }
1080
1081 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1082 n = x.used - 1;
1083 t = y.used - 1;
1084
1085 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1086 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1087 goto __Y;
1088 }
1089
1090 while (mp_cmp (&x, &y) != MP_LT) {
1091 ++(q.dp[n - t]);
1092 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1093 goto __Y;
1094 }
1095 }
1096
1097 /* reset y by shifting it back down */
1098 mp_rshd (&y, n - t);
1099
1100 /* step 3. for i from n down to (t + 1) */
1101 for (i = n; i >= (t + 1); i--) {
1102 if (i > x.used) {
1103 continue;
1104 }
1105
1106 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1107 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1108 if (x.dp[i] == y.dp[t]) {
1109 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1110 } else {
1111 mp_word tmp;
1112 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1113 tmp |= ((mp_word) x.dp[i - 1]);
1114 tmp /= ((mp_word) y.dp[t]);
1115 if (tmp > (mp_word) MP_MASK)
1116 tmp = MP_MASK;
1117 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1118 }
1119
1120 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1121 xi * b**2 + xi-1 * b + xi-2
1122
1123 do q{i-t-1} -= 1;
1124 */
1125 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1126 do {
1127 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1128
1129 /* find left hand */
1130 mp_zero (&t1);
1131 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1132 t1.dp[1] = y.dp[t];
1133 t1.used = 2;
1134 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1135 goto __Y;
1136 }
1137
1138 /* find right hand */
1139 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1140 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1141 t2.dp[2] = x.dp[i];
1142 t2.used = 3;
1143 } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1144
1145 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1146 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1147 goto __Y;
1148 }
1149
1150 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1151 goto __Y;
1152 }
1153
1154 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1155 goto __Y;
1156 }
1157
1158 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1159 if (x.sign == MP_NEG) {
1160 if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1161 goto __Y;
1162 }
1163 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1164 goto __Y;
1165 }
1166 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1167 goto __Y;
1168 }
1169
1170 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1171 }
1172 }
1173
1174 /* now q is the quotient and x is the remainder
1175 * [which we have to normalize]
1176 */
1177
1178 /* get sign before writing to c */
1179 x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1180
1181 if (c != NULL) {
1182 mp_clamp (&q);
1183 mp_exch (&q, c);
1184 c->sign = neg;
1185 }
1186
1187 if (d != NULL) {
1188 mp_div_2d (&x, norm, &x, NULL);
1189 mp_exch (&x, d);
1190 }
1191
1192 res = MP_OKAY;
1193
1194 __Y:mp_clear (&y);
1195 __X:mp_clear (&x);
1196 __T2:mp_clear (&t2);
1197 __T1:mp_clear (&t1);
1198 __Q:mp_clear (&q);
1199 return res;
1200 }
1201
1202 /* b = a/2 */
1203 int mp_div_2(const mp_int * a, mp_int * b)
1204 {
1205 int x, res, oldused;
1206
1207 /* copy */
1208 if (b->alloc < a->used) {
1209 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1210 return res;
1211 }
1212 }
1213
1214 oldused = b->used;
1215 b->used = a->used;
1216 {
1217 register mp_digit r, rr, *tmpa, *tmpb;
1218
1219 /* source alias */
1220 tmpa = a->dp + b->used - 1;
1221
1222 /* dest alias */
1223 tmpb = b->dp + b->used - 1;
1224
1225 /* carry */
1226 r = 0;
1227 for (x = b->used - 1; x >= 0; x--) {
1228 /* get the carry for the next iteration */
1229 rr = *tmpa & 1;
1230
1231 /* shift the current digit, add in carry and store */
1232 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1233
1234 /* forward carry to next iteration */
1235 r = rr;
1236 }
1237
1238 /* zero excess digits */
1239 tmpb = b->dp + b->used;
1240 for (x = b->used; x < oldused; x++) {
1241 *tmpb++ = 0;
1242 }
1243 }
1244 b->sign = a->sign;
1245 mp_clamp (b);
1246 return MP_OKAY;
1247 }
1248
1249 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1250 int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d)
1251 {
1252 mp_digit D, r, rr;
1253 int x, res;
1254 mp_int t;
1255
1256
1257 /* if the shift count is <= 0 then we do no work */
1258 if (b <= 0) {
1259 res = mp_copy (a, c);
1260 if (d != NULL) {
1261 mp_zero (d);
1262 }
1263 return res;
1264 }
1265
1266 if ((res = mp_init (&t)) != MP_OKAY) {
1267 return res;
1268 }
1269
1270 /* get the remainder */
1271 if (d != NULL) {
1272 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
1273 mp_clear (&t);
1274 return res;
1275 }
1276 }
1277
1278 /* copy */
1279 if ((res = mp_copy (a, c)) != MP_OKAY) {
1280 mp_clear (&t);
1281 return res;
1282 }
1283
1284 /* shift by as many digits in the bit count */
1285 if (b >= DIGIT_BIT) {
1286 mp_rshd (c, b / DIGIT_BIT);
1287 }
1288
1289 /* shift any bit count < DIGIT_BIT */
1290 D = (mp_digit) (b % DIGIT_BIT);
1291 if (D != 0) {
1292 register mp_digit *tmpc, mask, shift;
1293
1294 /* mask */
1295 mask = (((mp_digit)1) << D) - 1;
1296
1297 /* shift for lsb */
1298 shift = DIGIT_BIT - D;
1299
1300 /* alias */
1301 tmpc = c->dp + (c->used - 1);
1302
1303 /* carry */
1304 r = 0;
1305 for (x = c->used - 1; x >= 0; x--) {
1306 /* get the lower bits of this word in a temp */
1307 rr = *tmpc & mask;
1308
1309 /* shift the current word and mix in the carry bits from the previous word */
1310 *tmpc = (*tmpc >> D) | (r << shift);
1311 --tmpc;
1312
1313 /* set the carry to the carry bits of the current word found above */
1314 r = rr;
1315 }
1316 }
1317 mp_clamp (c);
1318 if (d != NULL) {
1319 mp_exch (&t, d);
1320 }
1321 mp_clear (&t);
1322 return MP_OKAY;
1323 }
1324
1325 static int s_is_power_of_two(mp_digit b, int *p)
1326 {
1327 int x;
1328
1329 for (x = 1; x < DIGIT_BIT; x++) {
1330 if (b == (((mp_digit)1)<<x)) {
1331 *p = x;
1332 return 1;
1333 }
1334 }
1335 return 0;
1336 }
1337
1338 /* single digit division (based on routine from MPI) */
1339 int mp_div_d (const mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
1340 {
1341 mp_int q;
1342 mp_word w;
1343 mp_digit t;
1344 int res, ix;
1345
1346 /* cannot divide by zero */
1347 if (b == 0) {
1348 return MP_VAL;
1349 }
1350
1351 /* quick outs */
1352 if (b == 1 || mp_iszero(a) == 1) {
1353 if (d != NULL) {
1354 *d = 0;
1355 }
1356 if (c != NULL) {
1357 return mp_copy(a, c);
1358 }
1359 return MP_OKAY;
1360 }
1361
1362 /* power of two ? */
1363 if (s_is_power_of_two(b, &ix) == 1) {
1364 if (d != NULL) {
1365 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
1366 }
1367 if (c != NULL) {
1368 return mp_div_2d(a, ix, c, NULL);
1369 }
1370 return MP_OKAY;
1371 }
1372
1373 /* no easy answer [c'est la vie]. Just division */
1374 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1375 return res;
1376 }
1377
1378 q.used = a->used;
1379 q.sign = a->sign;
1380 w = 0;
1381 for (ix = a->used - 1; ix >= 0; ix--) {
1382 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1383
1384 if (w >= b) {
1385 t = (mp_digit)(w / b);
1386 w -= ((mp_word)t) * ((mp_word)b);
1387 } else {
1388 t = 0;
1389 }
1390 q.dp[ix] = t;
1391 }
1392
1393 if (d != NULL) {
1394 *d = (mp_digit)w;
1395 }
1396
1397 if (c != NULL) {
1398 mp_clamp(&q);
1399 mp_exch(&q, c);
1400 }
1401 mp_clear(&q);
1402
1403 return res;
1404 }
1405
1406 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1407 *
1408 * Based on algorithm from the paper
1409 *
1410 * "Generating Efficient Primes for Discrete Log Cryptosystems"
1411 * Chae Hoon Lim, Pil Loong Lee,
1412 * POSTECH Information Research Laboratories
1413 *
1414 * The modulus must be of a special format [see manual]
1415 *
1416 * Has been modified to use algorithm 7.10 from the LTM book instead
1417 *
1418 * Input x must be in the range 0 <= x <= (n-1)**2
1419 */
1420 int
1421 mp_dr_reduce (mp_int * x, const mp_int * n, mp_digit k)
1422 {
1423 int err, i, m;
1424 mp_word r;
1425 mp_digit mu, *tmpx1, *tmpx2;
1426
1427 /* m = digits in modulus */
1428 m = n->used;
1429
1430 /* ensure that "x" has at least 2m digits */
1431 if (x->alloc < m + m) {
1432 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
1433 return err;
1434 }
1435 }
1436
1437 /* top of loop, this is where the code resumes if
1438 * another reduction pass is required.
1439 */
1440 top:
1441 /* aliases for digits */
1442 /* alias for lower half of x */
1443 tmpx1 = x->dp;
1444
1445 /* alias for upper half of x, or x/B**m */
1446 tmpx2 = x->dp + m;
1447
1448 /* set carry to zero */
1449 mu = 0;
1450
1451 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1452 for (i = 0; i < m; i++) {
1453 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
1454 *tmpx1++ = (mp_digit)(r & MP_MASK);
1455 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
1456 }
1457
1458 /* set final carry */
1459 *tmpx1++ = mu;
1460
1461 /* zero words above m */
1462 for (i = m + 1; i < x->used; i++) {
1463 *tmpx1++ = 0;
1464 }
1465
1466 /* clamp, sub and return */
1467 mp_clamp (x);
1468
1469 /* if x >= n then subtract and reduce again
1470 * Each successive "recursion" makes the input smaller and smaller.
1471 */
1472 if (mp_cmp_mag (x, n) != MP_LT) {
1473 s_mp_sub(x, n, x);
1474 goto top;
1475 }
1476 return MP_OKAY;
1477 }
1478
1479 /* determines the setup value */
1480 void mp_dr_setup(const mp_int *a, mp_digit *d)
1481 {
1482 /* the casts are required if DIGIT_BIT is one less than
1483 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1484 */
1485 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
1486 ((mp_word)a->dp[0]));
1487 }
1488
1489 /* swap the elements of two integers, for cases where you can't simply swap the
1490 * mp_int pointers around
1491 */
1492 void
1493 mp_exch (mp_int * a, mp_int * b)
1494 {
1495 mp_int t;
1496
1497 t = *a;
1498 *a = *b;
1499 *b = t;
1500 }
1501
1502 /* this is a shell function that calls either the normal or Montgomery
1503 * exptmod functions. Originally the call to the montgomery code was
1504 * embedded in the normal function but that wasted a lot of stack space
1505 * for nothing (since 99% of the time the Montgomery code would be called)
1506 */
1507 int mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
1508 {
1509 int dr;
1510
1511 /* modulus P must be positive */
1512 if (P->sign == MP_NEG) {
1513 return MP_VAL;
1514 }
1515
1516 /* if exponent X is negative we have to recurse */
1517 if (X->sign == MP_NEG) {
1518 mp_int tmpG, tmpX;
1519 int err;
1520
1521 /* first compute 1/G mod P */
1522 if ((err = mp_init(&tmpG)) != MP_OKAY) {
1523 return err;
1524 }
1525 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
1526 mp_clear(&tmpG);
1527 return err;
1528 }
1529
1530 /* now get |X| */
1531 if ((err = mp_init(&tmpX)) != MP_OKAY) {
1532 mp_clear(&tmpG);
1533 return err;
1534 }
1535 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
1536 mp_clear_multi(&tmpG, &tmpX, NULL);
1537 return err;
1538 }
1539
1540 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
1541 err = mp_exptmod(&tmpG, &tmpX, P, Y);
1542 mp_clear_multi(&tmpG, &tmpX, NULL);
1543 return err;
1544 }
1545
1546 dr = 0;
1547
1548 /* if the modulus is odd or dr != 0 use the fast method */
1549 if (mp_isodd (P) == 1 || dr != 0) {
1550 return mp_exptmod_fast (G, X, P, Y, dr);
1551 } else {
1552 /* otherwise use the generic Barrett reduction technique */
1553 return s_mp_exptmod (G, X, P, Y);
1554 }
1555 }
1556
1557 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1558 *
1559 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1560 * The value of k changes based on the size of the exponent.
1561 *
1562 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1563 */
1564
1565 int
1566 mp_exptmod_fast (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y, int redmode)
1567 {
1568 mp_int M[256], res;
1569 mp_digit buf, mp;
1570 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1571
1572 /* use a pointer to the reduction algorithm. This allows us to use
1573 * one of many reduction algorithms without modding the guts of
1574 * the code with if statements everywhere.
1575 */
1576 int (*redux)(mp_int*,const mp_int*,mp_digit);
1577
1578 /* find window size */
1579 x = mp_count_bits (X);
1580 if (x <= 7) {
1581 winsize = 2;
1582 } else if (x <= 36) {
1583 winsize = 3;
1584 } else if (x <= 140) {
1585 winsize = 4;
1586 } else if (x <= 450) {
1587 winsize = 5;
1588 } else if (x <= 1303) {
1589 winsize = 6;
1590 } else if (x <= 3529) {
1591 winsize = 7;
1592 } else {
1593 winsize = 8;
1594 }
1595
1596 /* init M array */
1597 /* init first cell */
1598 if ((err = mp_init(&M[1])) != MP_OKAY) {
1599 return err;
1600 }
1601
1602 /* now init the second half of the array */
1603 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1604 if ((err = mp_init(&M[x])) != MP_OKAY) {
1605 for (y = 1<<(winsize-1); y < x; y++) {
1606 mp_clear (&M[y]);
1607 }
1608 mp_clear(&M[1]);
1609 return err;
1610 }
1611 }
1612
1613 /* determine and setup reduction code */
1614 if (redmode == 0) {
1615 /* now setup montgomery */
1616 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1617 goto __M;
1618 }
1619
1620 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
1621 if (((P->used * 2 + 1) < MP_WARRAY) &&
1622 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1623 redux = fast_mp_montgomery_reduce;
1624 } else {
1625 /* use slower baseline Montgomery method */
1626 redux = mp_montgomery_reduce;
1627 }
1628 } else if (redmode == 1) {
1629 /* setup DR reduction for moduli of the form B**k - b */
1630 mp_dr_setup(P, &mp);
1631 redux = mp_dr_reduce;
1632 } else {
1633 /* setup DR reduction for moduli of the form 2**k - b */
1634 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1635 goto __M;
1636 }
1637 redux = mp_reduce_2k;
1638 }
1639
1640 /* setup result */
1641 if ((err = mp_init (&res)) != MP_OKAY) {
1642 goto __M;
1643 }
1644
1645 /* create M table
1646 *
1647
1648 *
1649 * The first half of the table is not computed though accept for M[0] and M[1]
1650 */
1651
1652 if (redmode == 0) {
1653 /* now we need R mod m */
1654 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
1655 goto __RES;
1656 }
1657
1658 /* now set M[1] to G * R mod m */
1659 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
1660 goto __RES;
1661 }
1662 } else {
1663 mp_set(&res, 1);
1664 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
1665 goto __RES;
1666 }
1667 }
1668
1669 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
1670 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
1671 goto __RES;
1672 }
1673
1674 for (x = 0; x < (winsize - 1); x++) {
1675 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
1676 goto __RES;
1677 }
1678 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
1679 goto __RES;
1680 }
1681 }
1682
1683 /* create upper table */
1684 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1685 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
1686 goto __RES;
1687 }
1688 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
1689 goto __RES;
1690 }
1691 }
1692
1693 /* set initial mode and bit cnt */
1694 mode = 0;
1695 bitcnt = 1;
1696 buf = 0;
1697 digidx = X->used - 1;
1698 bitcpy = 0;
1699 bitbuf = 0;
1700
1701 for (;;) {
1702 /* grab next digit as required */
1703 if (--bitcnt == 0) {
1704 /* if digidx == -1 we are out of digits so break */
1705 if (digidx == -1) {
1706 break;
1707 }
1708 /* read next digit and reset bitcnt */
1709 buf = X->dp[digidx--];
1710 bitcnt = DIGIT_BIT;
1711 }
1712
1713 /* grab the next msb from the exponent */
1714 y = (buf >> (DIGIT_BIT - 1)) & 1;
1715 buf <<= (mp_digit)1;
1716
1717 /* if the bit is zero and mode == 0 then we ignore it
1718 * These represent the leading zero bits before the first 1 bit
1719 * in the exponent. Technically this opt is not required but it
1720 * does lower the # of trivial squaring/reductions used
1721 */
1722 if (mode == 0 && y == 0) {
1723 continue;
1724 }
1725
1726 /* if the bit is zero and mode == 1 then we square */
1727 if (mode == 1 && y == 0) {
1728 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1729 goto __RES;
1730 }
1731 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1732 goto __RES;
1733 }
1734 continue;
1735 }
1736
1737 /* else we add it to the window */
1738 bitbuf |= (y << (winsize - ++bitcpy));
1739 mode = 2;
1740
1741 if (bitcpy == winsize) {
1742 /* ok window is filled so square as required and multiply */
1743 /* square first */
1744 for (x = 0; x < winsize; x++) {
1745 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1746 goto __RES;
1747 }
1748 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1749 goto __RES;
1750 }
1751 }
1752
1753 /* then multiply */
1754 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
1755 goto __RES;
1756 }
1757 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1758 goto __RES;
1759 }
1760
1761 /* empty window and reset */
1762 bitcpy = 0;
1763 bitbuf = 0;
1764 mode = 1;
1765 }
1766 }
1767
1768 /* if bits remain then square/multiply */
1769 if (mode == 2 && bitcpy > 0) {
1770 /* square then multiply if the bit is set */
1771 for (x = 0; x < bitcpy; x++) {
1772 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1773 goto __RES;
1774 }
1775 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1776 goto __RES;
1777 }
1778
1779 /* get next bit of the window */
1780 bitbuf <<= 1;
1781 if ((bitbuf & (1 << winsize)) != 0) {
1782 /* then multiply */
1783 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
1784 goto __RES;
1785 }
1786 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1787 goto __RES;
1788 }
1789 }
1790 }
1791 }
1792
1793 if (redmode == 0) {
1794 /* fixup result if Montgomery reduction is used
1795 * recall that any value in a Montgomery system is
1796 * actually multiplied by R mod n. So we have
1797 * to reduce one more time to cancel out the factor
1798 * of R.
1799 */
1800 if ((err = redux(&res, P, mp)) != MP_OKAY) {
1801 goto __RES;
1802 }
1803 }
1804
1805 /* swap res with Y */
1806 mp_exch (&res, Y);
1807 err = MP_OKAY;
1808 __RES:mp_clear (&res);
1809 __M:
1810 mp_clear(&M[1]);
1811 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1812 mp_clear (&M[x]);
1813 }
1814 return err;
1815 }
1816
1817 /* Greatest Common Divisor using the binary method */
1818 int mp_gcd (const mp_int * a, const mp_int * b, mp_int * c)
1819 {
1820 mp_int u, v;
1821 int k, u_lsb, v_lsb, res;
1822
1823 /* either zero than gcd is the largest */
1824 if (mp_iszero (a) == 1 && mp_iszero (b) == 0) {
1825 return mp_abs (b, c);
1826 }
1827 if (mp_iszero (a) == 0 && mp_iszero (b) == 1) {
1828 return mp_abs (a, c);
1829 }
1830
1831 /* optimized. At this point if a == 0 then
1832 * b must equal zero too
1833 */
1834 if (mp_iszero (a) == 1) {
1835 mp_zero(c);
1836 return MP_OKAY;
1837 }
1838
1839 /* get copies of a and b we can modify */
1840 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
1841 return res;
1842 }
1843
1844 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
1845 goto __U;
1846 }
1847
1848 /* must be positive for the remainder of the algorithm */
1849 u.sign = v.sign = MP_ZPOS;
1850
1851 /* B1. Find the common power of two for u and v */
1852 u_lsb = mp_cnt_lsb(&u);
1853 v_lsb = mp_cnt_lsb(&v);
1854 k = MIN(u_lsb, v_lsb);
1855
1856 if (k > 0) {
1857 /* divide the power of two out */
1858 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
1859 goto __V;
1860 }
1861
1862 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
1863 goto __V;
1864 }
1865 }
1866
1867 /* divide any remaining factors of two out */
1868 if (u_lsb != k) {
1869 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
1870 goto __V;
1871 }
1872 }
1873
1874 if (v_lsb != k) {
1875 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
1876 goto __V;
1877 }
1878 }
1879
1880 while (mp_iszero(&v) == 0) {
1881 /* make sure v is the largest */
1882 if (mp_cmp_mag(&u, &v) == MP_GT) {
1883 /* swap u and v to make sure v is >= u */
1884 mp_exch(&u, &v);
1885 }
1886
1887 /* subtract smallest from largest */
1888 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
1889 goto __V;
1890 }
1891
1892 /* Divide out all factors of two */
1893 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
1894 goto __V;
1895 }
1896 }
1897
1898 /* multiply by 2**k which we divided out at the beginning */
1899 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
1900 goto __V;
1901 }
1902 c->sign = MP_ZPOS;
1903 res = MP_OKAY;
1904 __V:mp_clear (&u);
1905 __U:mp_clear (&v);
1906 return res;
1907 }
1908
1909 /* get the lower 32-bits of an mp_int */
1910 unsigned long mp_get_int(const mp_int * a)
1911 {
1912 int i;
1913 unsigned long res;
1914
1915 if (a->used == 0) {
1916 return 0;
1917 }
1918
1919 /* get number of digits of the lsb we have to read */
1920 i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1;
1921
1922 /* get most significant digit of result */
1923 res = DIGIT(a,i);
1924
1925 while (--i >= 0) {
1926 res = (res << DIGIT_BIT) | DIGIT(a,i);
1927 }
1928
1929 /* force result to 32-bits always so it is consistent on non 32-bit platforms */
1930 return res & 0xFFFFFFFFUL;
1931 }
1932
1933 /* grow as required */
1934 int mp_grow (mp_int * a, int size)
1935 {
1936 int i;
1937 mp_digit *tmp;
1938
1939 /* if the alloc size is smaller alloc more ram */
1940 if (a->alloc < size) {
1941 /* ensure there are always at least MP_PREC digits extra on top */
1942 size += (MP_PREC * 2) - (size % MP_PREC);
1943
1944 /* reallocate the array a->dp
1945 *
1946 * We store the return in a temporary variable
1947 * in case the operation failed we don't want
1948 * to overwrite the dp member of a.
1949 */
1950 tmp = realloc (a->dp, sizeof (mp_digit) * size);
1951 if (tmp == NULL) {
1952 /* reallocation failed but "a" is still valid [can be freed] */
1953 return MP_MEM;
1954 }
1955
1956 /* reallocation succeeded so set a->dp */
1957 a->dp = tmp;
1958
1959 /* zero excess digits */
1960 i = a->alloc;
1961 a->alloc = size;
1962 for (; i < a->alloc; i++) {
1963 a->dp[i] = 0;
1964 }
1965 }
1966 return MP_OKAY;
1967 }
1968
1969 /* init a new mp_int */
1970 int mp_init (mp_int * a)
1971 {
1972 int i;
1973
1974 /* allocate memory required and clear it */
1975 a->dp = malloc (sizeof (mp_digit) * MP_PREC);
1976 if (a->dp == NULL) {
1977 return MP_MEM;
1978 }
1979
1980 /* set the digits to zero */
1981 for (i = 0; i < MP_PREC; i++) {
1982 a->dp[i] = 0;
1983 }
1984
1985 /* set the used to zero, allocated digits to the default precision
1986 * and sign to positive */
1987 a->used = 0;
1988 a->alloc = MP_PREC;
1989 a->sign = MP_ZPOS;
1990
1991 return MP_OKAY;
1992 }
1993
1994 /* creates "a" then copies b into it */
1995 int mp_init_copy (mp_int * a, const mp_int * b)
1996 {
1997 int res;
1998
1999 if ((res = mp_init (a)) != MP_OKAY) {
2000 return res;
2001 }
2002 return mp_copy (b, a);
2003 }
2004
2005 int mp_init_multi(mp_int *mp, ...)
2006 {
2007 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
2008 int n = 0; /* Number of ok inits */
2009 mp_int* cur_arg = mp;
2010 va_list args;
2011
2012 va_start(args, mp); /* init args to next argument from caller */
2013 while (cur_arg != NULL) {
2014 if (mp_init(cur_arg) != MP_OKAY) {
2015 /* Oops - error! Back-track and mp_clear what we already
2016 succeeded in init-ing, then return error.
2017 */
2018 va_list clean_args;
2019
2020 /* end the current list */
2021 va_end(args);
2022
2023 /* now start cleaning up */
2024 cur_arg = mp;
2025 va_start(clean_args, mp);
2026 while (n--) {
2027 mp_clear(cur_arg);
2028 cur_arg = va_arg(clean_args, mp_int*);
2029 }
2030 va_end(clean_args);
2031 res = MP_MEM;
2032 break;
2033 }
2034 n++;
2035 cur_arg = va_arg(args, mp_int*);
2036 }
2037 va_end(args);
2038 return res; /* Assumed ok, if error flagged above. */
2039 }
2040
2041 /* init an mp_init for a given size */
2042 int mp_init_size (mp_int * a, int size)
2043 {
2044 int x;
2045
2046 /* pad size so there are always extra digits */
2047 size += (MP_PREC * 2) - (size % MP_PREC);
2048
2049 /* alloc mem */
2050 a->dp = malloc (sizeof (mp_digit) * size);
2051 if (a->dp == NULL) {
2052 return MP_MEM;
2053 }
2054
2055 /* set the members */
2056 a->used = 0;
2057 a->alloc = size;
2058 a->sign = MP_ZPOS;
2059
2060 /* zero the digits */
2061 for (x = 0; x < size; x++) {
2062 a->dp[x] = 0;
2063 }
2064
2065 return MP_OKAY;
2066 }
2067
2068 /* hac 14.61, pp608 */
2069 int mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
2070 {
2071 /* b cannot be negative */
2072 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2073 return MP_VAL;
2074 }
2075
2076 /* if the modulus is odd we can use a faster routine instead */
2077 if (mp_isodd (b) == 1) {
2078 return fast_mp_invmod (a, b, c);
2079 }
2080
2081 return mp_invmod_slow(a, b, c);
2082 }
2083
2084 /* hac 14.61, pp608 */
2085 int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c)
2086 {
2087 mp_int x, y, u, v, A, B, C, D;
2088 int res;
2089
2090 /* b cannot be negative */
2091 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2092 return MP_VAL;
2093 }
2094
2095 /* init temps */
2096 if ((res = mp_init_multi(&x, &y, &u, &v,
2097 &A, &B, &C, &D, NULL)) != MP_OKAY) {
2098 return res;
2099 }
2100
2101 /* x = a, y = b */
2102 if ((res = mp_copy (a, &x)) != MP_OKAY) {
2103 goto __ERR;
2104 }
2105 if ((res = mp_copy (b, &y)) != MP_OKAY) {
2106 goto __ERR;
2107 }
2108
2109 /* 2. [modified] if x,y are both even then return an error! */
2110 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
2111 res = MP_VAL;
2112 goto __ERR;
2113 }
2114
2115 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2116 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
2117 goto __ERR;
2118 }
2119 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
2120 goto __ERR;
2121 }
2122 mp_set (&A, 1);
2123 mp_set (&D, 1);
2124
2125 top:
2126 /* 4. while u is even do */
2127 while (mp_iseven (&u) == 1) {
2128 /* 4.1 u = u/2 */
2129 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
2130 goto __ERR;
2131 }
2132 /* 4.2 if A or B is odd then */
2133 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
2134 /* A = (A+y)/2, B = (B-x)/2 */
2135 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
2136 goto __ERR;
2137 }
2138 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
2139 goto __ERR;
2140 }
2141 }
2142 /* A = A/2, B = B/2 */
2143 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
2144 goto __ERR;
2145 }
2146 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
2147 goto __ERR;
2148 }
2149 }
2150
2151 /* 5. while v is even do */
2152 while (mp_iseven (&v) == 1) {
2153 /* 5.1 v = v/2 */
2154 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
2155 goto __ERR;
2156 }
2157 /* 5.2 if C or D is odd then */
2158 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
2159 /* C = (C+y)/2, D = (D-x)/2 */
2160 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
2161 goto __ERR;
2162 }
2163 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
2164 goto __ERR;
2165 }
2166 }
2167 /* C = C/2, D = D/2 */
2168 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
2169 goto __ERR;
2170 }
2171 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
2172 goto __ERR;
2173 }
2174 }
2175
2176 /* 6. if u >= v then */
2177 if (mp_cmp (&u, &v) != MP_LT) {
2178 /* u = u - v, A = A - C, B = B - D */
2179 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
2180 goto __ERR;
2181 }
2182
2183 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
2184 goto __ERR;
2185 }
2186
2187 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
2188 goto __ERR;
2189 }
2190 } else {
2191 /* v - v - u, C = C - A, D = D - B */
2192 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
2193 goto __ERR;
2194 }
2195
2196 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
2197 goto __ERR;
2198 }
2199
2200 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
2201 goto __ERR;
2202 }
2203 }
2204
2205 /* if not zero goto step 4 */
2206 if (mp_iszero (&u) == 0)
2207 goto top;
2208
2209 /* now a = C, b = D, gcd == g*v */
2210
2211 /* if v != 1 then there is no inverse */
2212 if (mp_cmp_d (&v, 1) != MP_EQ) {
2213 res = MP_VAL;
2214 goto __ERR;
2215 }
2216
2217 /* if its too low */
2218 while (mp_cmp_d(&C, 0) == MP_LT) {
2219 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
2220 goto __ERR;
2221 }
2222 }
2223
2224 /* too big */
2225 while (mp_cmp_mag(&C, b) != MP_LT) {
2226 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
2227 goto __ERR;
2228 }
2229 }
2230
2231 /* C is now the inverse */
2232 mp_exch (&C, c);
2233 res = MP_OKAY;
2234 __ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
2235 return res;
2236 }
2237
2238 /* c = |a| * |b| using Karatsuba Multiplication using
2239 * three half size multiplications
2240 *
2241 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2242 * let n represent half of the number of digits in
2243 * the min(a,b)
2244 *
2245 * a = a1 * B**n + a0
2246 * b = b1 * B**n + b0
2247 *
2248 * Then, a * b =>
2249 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
2250 *
2251 * Note that a1b1 and a0b0 are used twice and only need to be
2252 * computed once. So in total three half size (half # of
2253 * digit) multiplications are performed, a0b0, a1b1 and
2254 * (a1-b1)(a0-b0)
2255 *
2256 * Note that a multiplication of half the digits requires
2257 * 1/4th the number of single precision multiplications so in
2258 * total after one call 25% of the single precision multiplications
2259 * are saved. Note also that the call to mp_mul can end up back
2260 * in this function if the a0, a1, b0, or b1 are above the threshold.
2261 * This is known as divide-and-conquer and leads to the famous
2262 * O(N**lg(3)) or O(N**1.584) work which is asymptotically lower than
2263 * the standard O(N**2) that the baseline/comba methods use.
2264 * Generally though the overhead of this method doesn't pay off
2265 * until a certain size (N ~ 80) is reached.
2266 */
2267 int mp_karatsuba_mul (const mp_int * a, const mp_int * b, mp_int * c)
2268 {
2269 mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
2270 int B, err;
2271
2272 /* default the return code to an error */
2273 err = MP_MEM;
2274
2275 /* min # of digits */
2276 B = MIN (a->used, b->used);
2277
2278 /* now divide in two */
2279 B = B >> 1;
2280
2281 /* init copy all the temps */
2282 if (mp_init_size (&x0, B) != MP_OKAY)
2283 goto ERR;
2284 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2285 goto X0;
2286 if (mp_init_size (&y0, B) != MP_OKAY)
2287 goto X1;
2288 if (mp_init_size (&y1, b->used - B) != MP_OKAY)
2289 goto Y0;
2290
2291 /* init temps */
2292 if (mp_init_size (&t1, B * 2) != MP_OKAY)
2293 goto Y1;
2294 if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
2295 goto T1;
2296 if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
2297 goto X0Y0;
2298
2299 /* now shift the digits */
2300 x0.used = y0.used = B;
2301 x1.used = a->used - B;
2302 y1.used = b->used - B;
2303
2304 {
2305 register int x;
2306 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2307
2308 /* we copy the digits directly instead of using higher level functions
2309 * since we also need to shift the digits
2310 */
2311 tmpa = a->dp;
2312 tmpb = b->dp;
2313
2314 tmpx = x0.dp;
2315 tmpy = y0.dp;
2316 for (x = 0; x < B; x++) {
2317 *tmpx++ = *tmpa++;
2318 *tmpy++ = *tmpb++;
2319 }
2320
2321 tmpx = x1.dp;
2322 for (x = B; x < a->used; x++) {
2323 *tmpx++ = *tmpa++;
2324 }
2325
2326 tmpy = y1.dp;
2327 for (x = B; x < b->used; x++) {
2328 *tmpy++ = *tmpb++;
2329 }
2330 }
2331
2332 /* only need to clamp the lower words since by definition the
2333 * upper words x1/y1 must have a known number of digits
2334 */
2335 mp_clamp (&x0);
2336 mp_clamp (&y0);
2337
2338 /* now calc the products x0y0 and x1y1 */
2339 /* after this x0 is no longer required, free temp [x0==t2]! */
2340 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
2341 goto X1Y1; /* x0y0 = x0*y0 */
2342 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
2343 goto X1Y1; /* x1y1 = x1*y1 */
2344
2345 /* now calc x1-x0 and y1-y0 */
2346 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2347 goto X1Y1; /* t1 = x1 - x0 */
2348 if (mp_sub (&y1, &y0, &x0) != MP_OKAY)
2349 goto X1Y1; /* t2 = y1 - y0 */
2350 if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
2351 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
2352
2353 /* add x0y0 */
2354 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
2355 goto X1Y1; /* t2 = x0y0 + x1y1 */
2356 if (mp_sub (&x0, &t1, &t1) != MP_OKAY)
2357 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
2358
2359 /* shift by B */
2360 if (mp_lshd (&t1, B) != MP_OKAY)
2361 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2362 if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
2363 goto X1Y1; /* x1y1 = x1y1 << 2*B */
2364
2365 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
2366 goto X1Y1; /* t1 = x0y0 + t1 */
2367 if (mp_add (&t1, &x1y1, c) != MP_OKAY)
2368 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
2369
2370 /* Algorithm succeeded set the return code to MP_OKAY */
2371 err = MP_OKAY;
2372
2373 X1Y1:mp_clear (&x1y1);
2374 X0Y0:mp_clear (&x0y0);
2375 T1:mp_clear (&t1);
2376 Y1:mp_clear (&y1);
2377 Y0:mp_clear (&y0);
2378 X1:mp_clear (&x1);
2379 X0:mp_clear (&x0);
2380 ERR:
2381 return err;
2382 }
2383
2384 /* Karatsuba squaring, computes b = a*a using three
2385 * half size squarings
2386 *
2387 * See comments of karatsuba_mul for details. It
2388 * is essentially the same algorithm but merely
2389 * tuned to perform recursive squarings.
2390 */
2391 int mp_karatsuba_sqr (const mp_int * a, mp_int * b)
2392 {
2393 mp_int x0, x1, t1, t2, x0x0, x1x1;
2394 int B, err;
2395
2396 err = MP_MEM;
2397
2398 /* min # of digits */
2399 B = a->used;
2400
2401 /* now divide in two */
2402 B = B >> 1;
2403
2404 /* init copy all the temps */
2405 if (mp_init_size (&x0, B) != MP_OKAY)
2406 goto ERR;
2407 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2408 goto X0;
2409
2410 /* init temps */
2411 if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
2412 goto X1;
2413 if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
2414 goto T1;
2415 if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
2416 goto T2;
2417 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
2418 goto X0X0;
2419
2420 {
2421 register int x;
2422 register mp_digit *dst, *src;
2423
2424 src = a->dp;
2425
2426 /* now shift the digits */
2427 dst = x0.dp;
2428 for (x = 0; x < B; x++) {
2429 *dst++ = *src++;
2430 }
2431
2432 dst = x1.dp;
2433 for (x = B; x < a->used; x++) {
2434 *dst++ = *src++;
2435 }
2436 }
2437
2438 x0.used = B;
2439 x1.used = a->used - B;
2440
2441 mp_clamp (&x0);
2442
2443 /* now calc the products x0*x0 and x1*x1 */
2444 if (mp_sqr (&x0, &x0x0) != MP_OKAY)
2445 goto X1X1; /* x0x0 = x0*x0 */
2446 if (mp_sqr (&x1, &x1x1) != MP_OKAY)
2447 goto X1X1; /* x1x1 = x1*x1 */
2448
2449 /* now calc (x1-x0)**2 */
2450 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2451 goto X1X1; /* t1 = x1 - x0 */
2452 if (mp_sqr (&t1, &t1) != MP_OKAY)
2453 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
2454
2455 /* add x0y0 */
2456 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
2457 goto X1X1; /* t2 = x0x0 + x1x1 */
2458 if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
2459 goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
2460
2461 /* shift by B */
2462 if (mp_lshd (&t1, B) != MP_OKAY)
2463 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
2464 if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
2465 goto X1X1; /* x1x1 = x1x1 << 2*B */
2466
2467 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
2468 goto X1X1; /* t1 = x0x0 + t1 */
2469 if (mp_add (&t1, &x1x1, b) != MP_OKAY)
2470 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
2471
2472 err = MP_OKAY;
2473
2474 X1X1:mp_clear (&x1x1);
2475 X0X0:mp_clear (&x0x0);
2476 T2:mp_clear (&t2);
2477 T1:mp_clear (&t1);
2478 X1:mp_clear (&x1);
2479 X0:mp_clear (&x0);
2480 ERR:
2481 return err;
2482 }
2483
2484 /* computes least common multiple as |a*b|/(a, b) */
2485 int mp_lcm (const mp_int * a, const mp_int * b, mp_int * c)
2486 {
2487 int res;
2488 mp_int t1, t2;
2489
2490
2491 if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) {
2492 return res;
2493 }
2494
2495 /* t1 = get the GCD of the two inputs */
2496 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
2497 goto __T;
2498 }
2499
2500 /* divide the smallest by the GCD */
2501 if (mp_cmp_mag(a, b) == MP_LT) {
2502 /* store quotient in t2 such that t2 * b is the LCM */
2503 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
2504 goto __T;
2505 }
2506 res = mp_mul(b, &t2, c);
2507 } else {
2508 /* store quotient in t2 such that t2 * a is the LCM */
2509 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
2510 goto __T;
2511 }
2512 res = mp_mul(a, &t2, c);
2513 }
2514
2515 /* fix the sign to positive */
2516 c->sign = MP_ZPOS;
2517
2518 __T:
2519 mp_clear_multi (&t1, &t2, NULL);
2520 return res;
2521 }
2522
2523 /* shift left a certain amount of digits */
2524 int mp_lshd (mp_int * a, int b)
2525 {
2526 int x, res;
2527
2528 /* if its less than zero return */
2529 if (b <= 0) {
2530 return MP_OKAY;
2531 }
2532
2533 /* grow to fit the new digits */
2534 if (a->alloc < a->used + b) {
2535 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
2536 return res;
2537 }
2538 }
2539
2540 {
2541 register mp_digit *top, *bottom;
2542
2543 /* increment the used by the shift amount then copy upwards */
2544 a->used += b;
2545
2546 /* top */
2547 top = a->dp + a->used - 1;
2548
2549 /* base */
2550 bottom = a->dp + a->used - 1 - b;
2551
2552 /* much like mp_rshd this is implemented using a sliding window
2553 * except the window goes the otherway around. Copying from
2554 * the bottom to the top. see bn_mp_rshd.c for more info.
2555 */
2556 for (x = a->used - 1; x >= b; x--) {
2557 *top-- = *bottom--;
2558 }
2559
2560 /* zero the lower digits */
2561 top = a->dp;
2562 for (x = 0; x < b; x++) {
2563 *top++ = 0;
2564 }
2565 }
2566 return MP_OKAY;
2567 }
2568
2569 /* c = a mod b, 0 <= c < b */
2570 int
2571 mp_mod (const mp_int * a, mp_int * b, mp_int * c)
2572 {
2573 mp_int t;
2574 int res;
2575
2576 if ((res = mp_init (&t)) != MP_OKAY) {
2577 return res;
2578 }
2579
2580 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
2581 mp_clear (&t);
2582 return res;
2583 }
2584
2585 if (t.sign != b->sign) {
2586 res = mp_add (b, &t, c);
2587 } else {
2588 res = MP_OKAY;
2589 mp_exch (&t, c);
2590 }
2591
2592 mp_clear (&t);
2593 return res;
2594 }
2595
2596 /* calc a value mod 2**b */
2597 int
2598 mp_mod_2d (const mp_int * a, int b, mp_int * c)
2599 {
2600 int x, res;
2601
2602 /* if b is <= 0 then zero the int */
2603 if (b <= 0) {
2604 mp_zero (c);
2605 return MP_OKAY;
2606 }
2607
2608 /* if the modulus is larger than the value than return */
2609 if (b > a->used * DIGIT_BIT) {
2610 res = mp_copy (a, c);
2611 return res;
2612 }
2613
2614 /* copy */
2615 if ((res = mp_copy (a, c)) != MP_OKAY) {
2616 return res;
2617 }
2618
2619 /* zero digits above the last digit of the modulus */
2620 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
2621 c->dp[x] = 0;
2622 }
2623 /* clear the digit that is not completely outside/inside the modulus */
2624 c->dp[b / DIGIT_BIT] &= (1 << ((mp_digit)b % DIGIT_BIT)) - 1;
2625 mp_clamp (c);
2626 return MP_OKAY;
2627 }
2628
2629 int
2630 mp_mod_d (const mp_int * a, mp_digit b, mp_digit * c)
2631 {
2632 return mp_div_d(a, b, NULL, c);
2633 }
2634
2635 /*
2636 * shifts with subtractions when the result is greater than b.
2637 *
2638 * The method is slightly modified to shift B unconditionally up to just under
2639 * the leading bit of b. This saves a lot of multiple precision shifting.
2640 */
2641 int mp_montgomery_calc_normalization (mp_int * a, const mp_int * b)
2642 {
2643 int x, bits, res;
2644
2645 /* how many bits of last digit does b use */
2646 bits = mp_count_bits (b) % DIGIT_BIT;
2647
2648
2649 if (b->used > 1) {
2650 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
2651 return res;
2652 }
2653 } else {
2654 mp_set(a, 1);
2655 bits = 1;
2656 }
2657
2658
2659 /* now compute C = A * B mod b */
2660 for (x = bits - 1; x < DIGIT_BIT; x++) {
2661 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
2662 return res;
2663 }
2664 if (mp_cmp_mag (a, b) != MP_LT) {
2665 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
2666 return res;
2667 }
2668 }
2669 }
2670
2671 return MP_OKAY;
2672 }
2673
2674 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2675 int
2676 mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
2677 {
2678 int ix, res, digs;
2679 mp_digit mu;
2680
2681 /* can the fast reduction [comba] method be used?
2682 *
2683 * Note that unlike in mul you're safely allowed *less*
2684 * than the available columns [255 per default] since carries
2685 * are fixed up in the inner loop.
2686 */
2687 digs = n->used * 2 + 1;
2688 if ((digs < MP_WARRAY) &&
2689 n->used <
2690 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2691 return fast_mp_montgomery_reduce (x, n, rho);
2692 }
2693
2694 /* grow the input as required */
2695 if (x->alloc < digs) {
2696 if ((res = mp_grow (x, digs)) != MP_OKAY) {
2697 return res;
2698 }
2699 }
2700 x->used = digs;
2701
2702 for (ix = 0; ix < n->used; ix++) {
2703 /* mu = ai * rho mod b
2704 *
2705 * The value of rho must be precalculated via
2706 * montgomery_setup() such that
2707 * it equals -1/n0 mod b this allows the
2708 * following inner loop to reduce the
2709 * input one digit at a time
2710 */
2711 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2712
2713 /* a = a + mu * m * b**i */
2714 {
2715 register int iy;
2716 register mp_digit *tmpn, *tmpx, u;
2717 register mp_word r;
2718
2719 /* alias for digits of the modulus */
2720 tmpn = n->dp;
2721
2722 /* alias for the digits of x [the input] */
2723 tmpx = x->dp + ix;
2724
2725 /* set the carry to zero */
2726 u = 0;
2727
2728 /* Multiply and add in place */
2729 for (iy = 0; iy < n->used; iy++) {
2730 /* compute product and sum */
2731 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2732 ((mp_word) u) + ((mp_word) * tmpx);
2733
2734 /* get carry */
2735 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2736
2737 /* fix digit */
2738 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2739 }
2740 /* At this point the ix'th digit of x should be zero */
2741
2742
2743 /* propagate carries upwards as required*/
2744 while (u) {
2745 *tmpx += u;
2746 u = *tmpx >> DIGIT_BIT;
2747 *tmpx++ &= MP_MASK;
2748 }
2749 }
2750 }
2751
2752 /* at this point the n.used'th least
2753 * significant digits of x are all zero
2754 * which means we can shift x to the
2755 * right by n.used digits and the
2756 * residue is unchanged.
2757 */
2758
2759 /* x = x/b**n.used */
2760 mp_clamp(x);
2761 mp_rshd (x, n->used);
2762
2763 /* if x >= n then x = x - n */
2764 if (mp_cmp_mag (x, n) != MP_LT) {
2765 return s_mp_sub (x, n, x);
2766 }
2767
2768 return MP_OKAY;
2769 }
2770
2771 /* setups the montgomery reduction stuff */
2772 int
2773 mp_montgomery_setup (const mp_int * n, mp_digit * rho)
2774 {
2775 mp_digit x, b;
2776
2777 /* fast inversion mod 2**k
2778 *
2779 * Based on the fact that
2780 *
2781 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
2782 * => 2*X*A - X*X*A*A = 1
2783 * => 2*(1) - (1) = 1
2784 */
2785 b = n->dp[0];
2786
2787 if ((b & 1) == 0) {
2788 return MP_VAL;
2789 }
2790
2791 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
2792 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
2793 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
2794 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
2795
2796 /* rho = -1/m mod b */
2797 *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
2798
2799 return MP_OKAY;
2800 }
2801
2802 /* high level multiplication (handles sign) */
2803 int mp_mul (const mp_int * a, const mp_int * b, mp_int * c)
2804 {
2805 int res, neg;
2806 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2807
2808 /* use Karatsuba? */
2809 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
2810 res = mp_karatsuba_mul (a, b, c);
2811 } else
2812 {
2813 /* can we use the fast multiplier?
2814 *
2815 * The fast multiplier can be used if the output will
2816 * have less than MP_WARRAY digits and the number of
2817 * digits won't affect carry propagation
2818 */
2819 int digs = a->used + b->used + 1;
2820
2821 if ((digs < MP_WARRAY) &&
2822 MIN(a->used, b->used) <=
2823 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2824 res = fast_s_mp_mul_digs (a, b, c, digs);
2825 } else
2826 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2827 }
2828 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2829 return res;
2830 }
2831
2832 /* b = a*2 */
2833 int mp_mul_2(const mp_int * a, mp_int * b)
2834 {
2835 int x, res, oldused;
2836
2837 /* grow to accommodate result */
2838 if (b->alloc < a->used + 1) {
2839 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2840 return res;
2841 }
2842 }
2843
2844 oldused = b->used;
2845 b->used = a->used;
2846
2847 {
2848 register mp_digit r, rr, *tmpa, *tmpb;
2849
2850 /* alias for source */
2851 tmpa = a->dp;
2852
2853 /* alias for dest */
2854 tmpb = b->dp;
2855
2856 /* carry */
2857 r = 0;
2858 for (x = 0; x < a->used; x++) {
2859
2860 /* get what will be the *next* carry bit from the
2861 * MSB of the current digit
2862 */
2863 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2864
2865 /* now shift up this digit, add in the carry [from the previous] */
2866 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2867
2868 /* copy the carry that would be from the source
2869 * digit into the next iteration
2870 */
2871 r = rr;
2872 }
2873
2874 /* new leading digit? */
2875 if (r != 0) {
2876 /* add a MSB which is always 1 at this point */
2877 *tmpb = 1;
2878 ++(b->used);
2879 }
2880
2881 /* now zero any excess digits on the destination
2882 * that we didn't write to
2883 */
2884 tmpb = b->dp + b->used;
2885 for (x = b->used; x < oldused; x++) {
2886 *tmpb++ = 0;
2887 }
2888 }
2889 b->sign = a->sign;
2890 return MP_OKAY;
2891 }
2892
2893 /* shift left by a certain bit count */
2894 int mp_mul_2d (const mp_int * a, int b, mp_int * c)
2895 {
2896 mp_digit d;
2897 int res;
2898
2899 /* copy */
2900 if (a != c) {
2901 if ((res = mp_copy (a, c)) != MP_OKAY) {
2902 return res;
2903 }
2904 }
2905
2906 if (c->alloc < c->used + b/DIGIT_BIT + 1) {
2907 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
2908 return res;
2909 }
2910 }
2911
2912 /* shift by as many digits in the bit count */
2913 if (b >= DIGIT_BIT) {
2914 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
2915 return res;
2916 }
2917 }
2918
2919 /* shift any bit count < DIGIT_BIT */
2920 d = (mp_digit) (b % DIGIT_BIT);
2921 if (d != 0) {
2922 register mp_digit *tmpc, shift, mask, r, rr;
2923 register int x;
2924
2925 /* bitmask for carries */
2926 mask = (((mp_digit)1) << d) - 1;
2927
2928 /* shift for msbs */
2929 shift = DIGIT_BIT - d;
2930
2931 /* alias */
2932 tmpc = c->dp;
2933
2934 /* carry */
2935 r = 0;
2936 for (x = 0; x < c->used; x++) {
2937 /* get the higher bits of the current word */
2938 rr = (*tmpc >> shift) & mask;
2939
2940 /* shift the current word and OR in the carry */
2941 *tmpc = ((*tmpc << d) | r) & MP_MASK;
2942 ++tmpc;
2943
2944 /* set the carry to the carry bits of the current word */
2945 r = rr;
2946 }
2947
2948 /* set final carry */
2949 if (r != 0) {
2950 c->dp[(c->used)++] = r;
2951 }
2952 }
2953 mp_clamp (c);
2954 return MP_OKAY;
2955 }
2956
2957 /* multiply by a digit */
2958 int
2959 mp_mul_d (const mp_int * a, mp_digit b, mp_int * c)
2960 {
2961 mp_digit u, *tmpa, *tmpc;
2962 mp_word r;
2963 int ix, res, olduse;
2964
2965 /* make sure c is big enough to hold a*b */
2966 if (c->alloc < a->used + 1) {
2967 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2968 return res;
2969 }
2970 }
2971
2972 /* get the original destinations used count */
2973 olduse = c->used;
2974
2975 /* set the sign */
2976 c->sign = a->sign;
2977
2978 /* alias for a->dp [source] */
2979 tmpa = a->dp;
2980
2981 /* alias for c->dp [dest] */
2982 tmpc = c->dp;
2983
2984 /* zero carry */
2985 u = 0;
2986
2987 /* compute columns */
2988 for (ix = 0; ix < a->used; ix++) {
2989 /* compute product and carry sum for this term */
2990 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
2991
2992 /* mask off higher bits to get a single digit */
2993 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2994
2995 /* send carry into next iteration */
2996 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2997 }
2998
2999 /* store final carry [if any] */
3000 *tmpc++ = u;
3001
3002 /* now zero digits above the top */
3003 while (ix++ < olduse) {
3004 *tmpc++ = 0;
3005 }
3006
3007 /* set used count */
3008 c->used = a->used + 1;
3009 mp_clamp(c);
3010
3011 return MP_OKAY;
3012 }
3013
3014 /* d = a * b (mod c) */
3015 int
3016 mp_mulmod (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
3017 {
3018 int res;
3019 mp_int t;
3020
3021 if ((res = mp_init (&t)) != MP_OKAY) {
3022 return res;
3023 }
3024
3025 if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
3026 mp_clear (&t);
3027 return res;
3028 }
3029 res = mp_mod (&t, c, d);
3030 mp_clear (&t);
3031 return res;
3032 }
3033
3034 /* table of first PRIME_SIZE primes */
3035 static const mp_digit __prime_tab[] = {
3036 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3037 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3038 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
3039 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
3040 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
3041 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
3042 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
3043 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
3044
3045 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
3046 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
3047 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
3048 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
3049 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
3050 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
3051 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
3052 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
3053
3054 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
3055 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
3056 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
3057 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
3058 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
3059 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
3060 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
3061 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
3062
3063 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
3064 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
3065 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
3066 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
3067 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
3068 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
3069 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
3070 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
3071 };
3072
3073 /* determines if an integers is divisible by one
3074 * of the first PRIME_SIZE primes or not
3075 *
3076 * sets result to 0 if not, 1 if yes
3077 */
3078 int mp_prime_is_divisible (const mp_int * a, int *result)
3079 {
3080 int err, ix;
3081 mp_digit res;
3082
3083 /* default to not */
3084 *result = MP_NO;
3085
3086 for (ix = 0; ix < PRIME_SIZE; ix++) {
3087 /* what is a mod __prime_tab[ix] */
3088 if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
3089 return err;
3090 }
3091
3092 /* is the residue zero? */
3093 if (res == 0) {
3094 *result = MP_YES;
3095 return MP_OKAY;
3096 }
3097 }
3098
3099 return MP_OKAY;
3100 }
3101
3102 /* performs a variable number of rounds of Miller-Rabin
3103 *
3104 * Probability of error after t rounds is no more than
3105
3106 *
3107 * Sets result to 1 if probably prime, 0 otherwise
3108 */
3109 int mp_prime_is_prime (mp_int * a, int t, int *result)
3110 {
3111 mp_int b;
3112 int ix, err, res;
3113
3114 /* default to no */
3115 *result = MP_NO;
3116
3117 /* valid value of t? */
3118 if (t <= 0 || t > PRIME_SIZE) {
3119 return MP_VAL;
3120 }
3121
3122 /* is the input equal to one of the primes in the table? */
3123 for (ix = 0; ix < PRIME_SIZE; ix++) {
3124 if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
3125 *result = 1;
3126 return MP_OKAY;
3127 }
3128 }
3129
3130 /* first perform trial division */
3131 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
3132 return err;
3133 }
3134
3135 /* return if it was trivially divisible */
3136 if (res == MP_YES) {
3137 return MP_OKAY;
3138 }
3139
3140 /* now perform the miller-rabin rounds */
3141 if ((err = mp_init (&b)) != MP_OKAY) {
3142 return err;
3143 }
3144
3145 for (ix = 0; ix < t; ix++) {
3146 /* set the prime */
3147 mp_set (&b, __prime_tab[ix]);
3148
3149 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
3150 goto __B;
3151 }
3152
3153 if (res == MP_NO) {
3154 goto __B;
3155 }
3156 }
3157
3158 /* passed the test */
3159 *result = MP_YES;
3160 __B:mp_clear (&b);
3161 return err;
3162 }
3163
3164 /* Miller-Rabin test of "a" to the base of "b" as described in
3165 * HAC pp. 139 Algorithm 4.24
3166 *
3167 * Sets result to 0 if definitely composite or 1 if probably prime.
3168 * Randomly the chance of error is no more than 1/4 and often
3169 * very much lower.
3170 */
3171 int mp_prime_miller_rabin (mp_int * a, const mp_int * b, int *result)
3172 {
3173 mp_int n1, y, r;
3174 int s, j, err;
3175
3176 /* default */
3177 *result = MP_NO;
3178
3179 /* ensure b > 1 */
3180 if (mp_cmp_d(b, 1) != MP_GT) {
3181 return MP_VAL;
3182 }
3183
3184 /* get n1 = a - 1 */
3185 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
3186 return err;
3187 }
3188 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
3189 goto __N1;
3190 }
3191
3192 /* set 2**s * r = n1 */
3193 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
3194 goto __N1;
3195 }
3196
3197 /* count the number of least significant bits
3198 * which are zero
3199 */
3200 s = mp_cnt_lsb(&r);
3201
3202 /* now divide n - 1 by 2**s */
3203 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
3204 goto __R;
3205 }
3206
3207 /* compute y = b**r mod a */
3208 if ((err = mp_init (&y)) != MP_OKAY) {
3209 goto __R;
3210 }
3211 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
3212 goto __Y;
3213 }
3214
3215 /* if y != 1 and y != n1 do */
3216 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
3217 j = 1;
3218 /* while j <= s-1 and y != n1 */
3219 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
3220 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
3221 goto __Y;
3222 }
3223
3224 /* if y == 1 then composite */
3225 if (mp_cmp_d (&y, 1) == MP_EQ) {
3226 goto __Y;
3227 }
3228
3229 ++j;
3230 }
3231
3232 /* if y != n1 then composite */
3233 if (mp_cmp (&y, &n1) != MP_EQ) {
3234 goto __Y;
3235 }
3236 }
3237
3238 /* probably prime now */
3239 *result = MP_YES;
3240 __Y:mp_clear (&y);
3241 __R:mp_clear (&r);
3242 __N1:mp_clear (&n1);
3243 return err;
3244 }
3245
3246 static const struct {
3247 int k, t;
3248 } sizes[] = {
3249 { 128, 28 },
3250 { 256, 16 },
3251 { 384, 10 },
3252 { 512, 7 },
3253 { 640, 6 },
3254 { 768, 5 },
3255 { 896, 4 },
3256 { 1024, 4 }
3257 };
3258
3259 /* returns # of RM trials required for a given bit size */
3260 int mp_prime_rabin_miller_trials(int size)
3261 {
3262 int x;
3263
3264 for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
3265 if (sizes[x].k == size) {
3266 return sizes[x].t;
3267 } else if (sizes[x].k > size) {
3268 return (x == 0) ? sizes[0].t : sizes[x - 1].t;
3269 }
3270 }
3271 return sizes[x-1].t + 1;
3272 }
3273
3274 /* makes a truly random prime of a given size (bits),
3275 *
3276 * Flags are as follows:
3277 *
3278 * LTM_PRIME_BBS - make prime congruent to 3 mod 4
3279 * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
3280 * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
3281 * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
3282 *
3283 * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
3284 * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
3285 * so it can be NULL
3286 *
3287 */
3288
3289 /* This is possibly the mother of all prime generation functions, muahahahahaha! */
3290 int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
3291 {
3292 unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
3293 int res, err, bsize, maskOR_msb_offset;
3294
3295 /* sanity check the input */
3296 if (size <= 1 || t <= 0) {
3297 return MP_VAL;
3298 }
3299
3300 /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
3301 if (flags & LTM_PRIME_SAFE) {
3302 flags |= LTM_PRIME_BBS;
3303 }
3304
3305 /* calc the byte size */
3306 bsize = (size>>3)+((size&7)?1:0);
3307
3308 /* we need a buffer of bsize bytes */
3309 tmp = malloc(bsize);
3310 if (tmp == NULL) {
3311 return MP_MEM;
3312 }
3313
3314 /* calc the maskAND value for the MSbyte*/
3315 maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
3316
3317 /* calc the maskOR_msb */
3318 maskOR_msb = 0;
3319 maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
3320 if (flags & LTM_PRIME_2MSB_ON) {
3321 maskOR_msb |= 1 << ((size - 2) & 7);
3322 } else if (flags & LTM_PRIME_2MSB_OFF) {
3323 maskAND &= ~(1 << ((size - 2) & 7));
3324 }
3325
3326 /* get the maskOR_lsb */
3327 maskOR_lsb = 0;
3328 if (flags & LTM_PRIME_BBS) {
3329 maskOR_lsb |= 3;
3330 }
3331
3332 do {
3333 /* read the bytes */
3334 if (cb(tmp, bsize, dat) != bsize) {
3335 err = MP_VAL;
3336 goto error;
3337 }
3338
3339 /* work over the MSbyte */
3340 tmp[0] &= maskAND;
3341 tmp[0] |= 1 << ((size - 1) & 7);
3342
3343 /* mix in the maskORs */
3344 tmp[maskOR_msb_offset] |= maskOR_msb;
3345 tmp[bsize-1] |= maskOR_lsb;
3346
3347 /* read it in */
3348 if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; }
3349
3350 /* is it prime? */
3351 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3352 if (res == MP_NO) {
3353 continue;
3354 }
3355
3356 if (flags & LTM_PRIME_SAFE) {
3357 /* see if (a-1)/2 is prime */
3358 if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; }
3359 if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; }
3360
3361 /* is it prime? */
3362 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3363 }
3364 } while (res == MP_NO);
3365
3366 if (flags & LTM_PRIME_SAFE) {
3367 /* restore a to the original value */
3368 if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; }
3369 if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; }
3370 }
3371
3372 err = MP_OKAY;
3373 error:
3374 free(tmp);
3375 return err;
3376 }
3377
3378 /* reads an unsigned char array, assumes the msb is stored first [big endian] */
3379 int
3380 mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
3381 {
3382 int res;
3383
3384 /* make sure there are at least two digits */
3385 if (a->alloc < 2) {
3386 if ((res = mp_grow(a, 2)) != MP_OKAY) {
3387 return res;
3388 }
3389 }
3390
3391 /* zero the int */
3392 mp_zero (a);
3393
3394 /* read the bytes in */
3395 while (c-- > 0) {
3396 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
3397 return res;
3398 }
3399
3400 a->dp[0] |= *b++;
3401 a->used += 1;
3402 }
3403 mp_clamp (a);
3404 return MP_OKAY;
3405 }
3406
3407 /* reduces x mod m, assumes 0 < x < m**2, mu is
3408 * precomputed via mp_reduce_setup.
3409 * From HAC pp.604 Algorithm 14.42
3410 */
3411 int
3412 mp_reduce (mp_int * x, const mp_int * m, const mp_int * mu)
3413 {
3414 mp_int q;
3415 int res, um = m->used;
3416
3417 /* q = x */
3418 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3419 return res;
3420 }
3421
3422 /* q1 = x / b**(k-1) */
3423 mp_rshd (&q, um - 1);
3424
3425 /* according to HAC this optimization is ok */
3426 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3427 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3428 goto CLEANUP;
3429 }
3430 } else {
3431 if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
3432 goto CLEANUP;
3433 }
3434 }
3435
3436 /* q3 = q2 / b**(k+1) */
3437 mp_rshd (&q, um + 1);
3438
3439 /* x = x mod b**(k+1), quick (no division) */
3440 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3441 goto CLEANUP;
3442 }
3443
3444 /* q = q * m mod b**(k+1), quick (no division) */
3445 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3446 goto CLEANUP;
3447 }
3448
3449 /* x = x - q */
3450 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3451 goto CLEANUP;
3452 }
3453
3454 /* If x < 0, add b**(k+1) to it */
3455 if (mp_cmp_d (x, 0) == MP_LT) {
3456 mp_set (&q, 1);
3457 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3458 goto CLEANUP;
3459 if ((res = mp_add (x, &q, x)) != MP_OKAY)
3460 goto CLEANUP;
3461 }
3462
3463 /* Back off if it's too big */
3464 while (mp_cmp (x, m) != MP_LT) {
3465 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3466 goto CLEANUP;
3467 }
3468 }
3469
3470 CLEANUP:
3471 mp_clear (&q);
3472
3473 return res;
3474 }
3475
3476 /* reduces a modulo n where n is of the form 2**p - d */
3477 int
3478 mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)
3479 {
3480 mp_int q;
3481 int p, res;
3482
3483 if ((res = mp_init(&q)) != MP_OKAY) {
3484 return res;
3485 }
3486
3487 p = mp_count_bits(n);
3488 top:
3489 /* q = a/2**p, a = a mod 2**p */
3490 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3491 goto ERR;
3492 }
3493
3494 if (d != 1) {
3495 /* q = q * d */
3496 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
3497 goto ERR;
3498 }
3499 }
3500
3501 /* a = a + q */
3502 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3503 goto ERR;
3504 }
3505
3506 if (mp_cmp_mag(a, n) != MP_LT) {
3507 s_mp_sub(a, n, a);
3508 goto top;
3509 }
3510
3511 ERR:
3512 mp_clear(&q);
3513 return res;
3514 }
3515
3516 /* determines the setup value */
3517 int
3518 mp_reduce_2k_setup(const mp_int *a, mp_digit *d)
3519 {
3520 int res, p;
3521 mp_int tmp;
3522
3523 if ((res = mp_init(&tmp)) != MP_OKAY) {
3524 return res;
3525 }
3526
3527 p = mp_count_bits(a);
3528 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3529 mp_clear(&tmp);
3530 return res;
3531 }
3532
3533 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
3534 mp_clear(&tmp);
3535 return res;
3536 }
3537
3538 *d = tmp.dp[0];
3539 mp_clear(&tmp);
3540 return MP_OKAY;
3541 }
3542
3543 /* pre-calculate the value required for Barrett reduction
3544 * For a given modulus "b" it calulates the value required in "a"
3545 */
3546 int mp_reduce_setup (mp_int * a, const mp_int * b)
3547 {
3548 int res;
3549
3550 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3551 return res;
3552 }
3553 return mp_div (a, b, a, NULL);
3554 }
3555
3556 /* shift right a certain amount of digits */
3557 void mp_rshd (mp_int * a, int b)
3558 {
3559 int x;
3560
3561 /* if b <= 0 then ignore it */
3562 if (b <= 0) {
3563 return;
3564 }
3565
3566 /* if b > used then simply zero it and return */
3567 if (a->used <= b) {
3568 mp_zero (a);
3569 return;
3570 }
3571
3572 {
3573 register mp_digit *bottom, *top;
3574
3575 /* shift the digits down */
3576
3577 /* bottom */
3578 bottom = a->dp;
3579
3580 /* top [offset into digits] */
3581 top = a->dp + b;
3582
3583 /* this is implemented as a sliding window where
3584 * the window is b-digits long and digits from
3585 * the top of the window are copied to the bottom
3586 *
3587 * e.g.
3588
3589 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
3590 /\ | ---->
3591 \-------------------/ ---->
3592 */
3593 for (x = 0; x < (a->used - b); x++) {
3594 *bottom++ = *top++;
3595 }
3596
3597 /* zero the top digits */
3598 for (; x < a->used; x++) {
3599 *bottom++ = 0;
3600 }
3601 }
3602
3603 /* remove excess digits */
3604 a->used -= b;
3605 }
3606
3607 /* set to a digit */
3608 void mp_set (mp_int * a, mp_digit b)
3609 {
3610 mp_zero (a);
3611 a->dp[0] = b & MP_MASK;
3612 a->used = (a->dp[0] != 0) ? 1 : 0;
3613 }
3614
3615 /* set a 32-bit const */
3616 int mp_set_int (mp_int * a, unsigned long b)
3617 {
3618 int x, res;
3619
3620 mp_zero (a);
3621
3622 /* set four bits at a time */
3623 for (x = 0; x < 8; x++) {
3624 /* shift the number up four bits */
3625 if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
3626 return res;
3627 }
3628
3629 /* OR in the top four bits of the source */
3630 a->dp[0] |= (b >> 28) & 15;
3631
3632 /* shift the source up to the next four bits */
3633 b <<= 4;
3634
3635 /* ensure that digits are not clamped off */
3636 a->used += 1;
3637 }
3638 mp_clamp (a);
3639 return MP_OKAY;
3640 }
3641
3642 /* shrink a bignum */
3643 int mp_shrink (mp_int * a)
3644 {
3645 mp_digit *tmp;
3646 if (a->alloc != a->used && a->used > 0) {
3647 if ((tmp = realloc (a->dp, sizeof (mp_digit) * a->used)) == NULL) {
3648 return MP_MEM;
3649 }
3650 a->dp = tmp;
3651 a->alloc = a->used;
3652 }
3653 return MP_OKAY;
3654 }
3655
3656 /* get the size for an signed equivalent */
3657 int mp_signed_bin_size (const mp_int * a)
3658 {
3659 return 1 + mp_unsigned_bin_size (a);
3660 }
3661
3662 /* computes b = a*a */
3663 int
3664 mp_sqr (const mp_int * a, mp_int * b)
3665 {
3666 int res;
3667
3668 if (a->used >= KARATSUBA_SQR_CUTOFF) {
3669 res = mp_karatsuba_sqr (a, b);
3670 } else
3671 {
3672 /* can we use the fast comba multiplier? */
3673 if ((a->used * 2 + 1) < MP_WARRAY &&
3674 a->used <
3675 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
3676 res = fast_s_mp_sqr (a, b);
3677 } else
3678 res = s_mp_sqr (a, b);
3679 }
3680 b->sign = MP_ZPOS;
3681 return res;
3682 }
3683
3684 /* c = a * a (mod b) */
3685 int
3686 mp_sqrmod (const mp_int * a, mp_int * b, mp_int * c)
3687 {
3688 int res;
3689 mp_int t;
3690
3691 if ((res = mp_init (&t)) != MP_OKAY) {
3692 return res;
3693 }
3694
3695 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3696 mp_clear (&t);
3697 return res;
3698 }
3699 res = mp_mod (&t, b, c);
3700 mp_clear (&t);
3701 return res;
3702 }
3703
3704 /* high level subtraction (handles signs) */
3705 int
3706 mp_sub (mp_int * a, mp_int * b, mp_int * c)
3707 {
3708 int sa, sb, res;
3709
3710 sa = a->sign;
3711 sb = b->sign;
3712
3713 if (sa != sb) {
3714 /* subtract a negative from a positive, OR */
3715 /* subtract a positive from a negative. */
3716 /* In either case, ADD their magnitudes, */
3717 /* and use the sign of the first number. */
3718 c->sign = sa;
3719 res = s_mp_add (a, b, c);
3720 } else {
3721 /* subtract a positive from a positive, OR */
3722 /* subtract a negative from a negative. */
3723 /* First, take the difference between their */
3724 /* magnitudes, then... */
3725 if (mp_cmp_mag (a, b) != MP_LT) {
3726 /* Copy the sign from the first */
3727 c->sign = sa;
3728 /* The first has a larger or equal magnitude */
3729 res = s_mp_sub (a, b, c);
3730 } else {
3731 /* The result has the *opposite* sign from */
3732 /* the first number. */
3733 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
3734 /* The second has a larger magnitude */
3735 res = s_mp_sub (b, a, c);
3736 }
3737 }
3738 return res;
3739 }
3740
3741 /* single digit subtraction */
3742 int
3743 mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
3744 {
3745 mp_digit *tmpa, *tmpc, mu;
3746 int res, ix, oldused;
3747
3748 /* grow c as required */
3749 if (c->alloc < a->used + 1) {
3750 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3751 return res;
3752 }
3753 }
3754
3755 /* if a is negative just do an unsigned
3756 * addition [with fudged signs]
3757 */
3758 if (a->sign == MP_NEG) {
3759 a->sign = MP_ZPOS;
3760 res = mp_add_d(a, b, c);
3761 a->sign = c->sign = MP_NEG;
3762 return res;
3763 }
3764
3765 /* setup regs */
3766 oldused = c->used;
3767 tmpa = a->dp;
3768 tmpc = c->dp;
3769
3770 /* if a <= b simply fix the single digit */
3771 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
3772 if (a->used == 1) {
3773 *tmpc++ = b - *tmpa;
3774 } else {
3775 *tmpc++ = b;
3776 }
3777 ix = 1;
3778
3779 /* negative/1digit */
3780 c->sign = MP_NEG;
3781 c->used = 1;
3782 } else {
3783 /* positive/size */
3784 c->sign = MP_ZPOS;
3785 c->used = a->used;
3786
3787 /* subtract first digit */
3788 *tmpc = *tmpa++ - b;
3789 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3790 *tmpc++ &= MP_MASK;
3791
3792 /* handle rest of the digits */
3793 for (ix = 1; ix < a->used; ix++) {
3794 *tmpc = *tmpa++ - mu;
3795 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3796 *tmpc++ &= MP_MASK;
3797 }
3798 }
3799
3800 /* zero excess digits */
3801 while (ix++ < oldused) {
3802 *tmpc++ = 0;
3803 }
3804 mp_clamp(c);
3805 return MP_OKAY;
3806 }
3807
3808 /* store in unsigned [big endian] format */
3809 int
3810 mp_to_unsigned_bin (const mp_int * a, unsigned char *b)
3811 {
3812 int x, res;
3813 mp_int t;
3814
3815 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
3816 return res;
3817 }
3818
3819 x = 0;
3820 while (mp_iszero (&t) == 0) {
3821 b[x++] = (unsigned char) (t.dp[0] & 255);
3822 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
3823 mp_clear (&t);
3824 return res;
3825 }
3826 }
3827 bn_reverse (b, x);
3828 mp_clear (&t);
3829 return MP_OKAY;
3830 }
3831
3832 /* get the size for an unsigned equivalent */
3833 int
3834 mp_unsigned_bin_size (const mp_int * a)
3835 {
3836 int size = mp_count_bits (a);
3837 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
3838 }
3839
3840 /* set to zero */
3841 void
3842 mp_zero (mp_int * a)
3843 {
3844 a->sign = MP_ZPOS;
3845 a->used = 0;
3846 memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
3847 }
3848
3849 /* reverse an array, used for radix code */
3850 void
3851 bn_reverse (unsigned char *s, int len)
3852 {
3853 int ix, iy;
3854 unsigned char t;
3855
3856 ix = 0;
3857 iy = len - 1;
3858 while (ix < iy) {
3859 t = s[ix];
3860 s[ix] = s[iy];
3861 s[iy] = t;
3862 ++ix;
3863 --iy;
3864 }
3865 }
3866
3867 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
3868 int
3869 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
3870 {
3871 mp_int *x;
3872 int olduse, res, min, max;
3873
3874 /* find sizes, we let |a| <= |b| which means we have to sort
3875 * them. "x" will point to the input with the most digits
3876 */
3877 if (a->used > b->used) {
3878 min = b->used;
3879 max = a->used;
3880 x = a;
3881 } else {
3882 min = a->used;
3883 max = b->used;
3884 x = b;
3885 }
3886
3887 /* init result */
3888 if (c->alloc < max + 1) {
3889 if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
3890 return res;
3891 }
3892 }
3893
3894 /* get old used digit count and set new one */
3895 olduse = c->used;
3896 c->used = max + 1;
3897
3898 {
3899 register mp_digit u, *tmpa, *tmpb, *tmpc;
3900 register int i;
3901
3902 /* alias for digit pointers */
3903
3904 /* first input */
3905 tmpa = a->dp;
3906
3907 /* second input */
3908 tmpb = b->dp;
3909
3910 /* destination */
3911 tmpc = c->dp;
3912
3913 /* zero the carry */
3914 u = 0;
3915 for (i = 0; i < min; i++) {
3916 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
3917 *tmpc = *tmpa++ + *tmpb++ + u;
3918
3919 /* U = carry bit of T[i] */
3920 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3921
3922 /* take away carry bit from T[i] */
3923 *tmpc++ &= MP_MASK;
3924 }
3925
3926 /* now copy higher words if any, that is in A+B
3927 * if A or B has more digits add those in
3928 */
3929 if (min != max) {
3930 for (; i < max; i++) {
3931 /* T[i] = X[i] + U */
3932 *tmpc = x->dp[i] + u;
3933
3934 /* U = carry bit of T[i] */
3935 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3936
3937 /* take away carry bit from T[i] */
3938 *tmpc++ &= MP_MASK;
3939 }
3940 }
3941
3942 /* add carry */
3943 *tmpc++ = u;
3944
3945 /* clear digits above oldused */
3946 for (i = c->used; i < olduse; i++) {
3947 *tmpc++ = 0;
3948 }
3949 }
3950
3951 mp_clamp (c);
3952 return MP_OKAY;
3953 }
3954
3955 int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
3956 {
3957 mp_int M[256], res, mu;
3958 mp_digit buf;
3959 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3960
3961 /* find window size */
3962 x = mp_count_bits (X);
3963 if (x <= 7) {
3964 winsize = 2;
3965 } else if (x <= 36) {
3966 winsize = 3;
3967 } else if (x <= 140) {
3968 winsize = 4;
3969 } else if (x <= 450) {
3970 winsize = 5;
3971 } else if (x <= 1303) {
3972 winsize = 6;
3973 } else if (x <= 3529) {
3974 winsize = 7;
3975 } else {
3976 winsize = 8;
3977 }
3978
3979 /* init M array */
3980 /* init first cell */
3981 if ((err = mp_init(&M[1])) != MP_OKAY) {
3982 return err;
3983 }
3984
3985 /* now init the second half of the array */
3986 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3987 if ((err = mp_init(&M[x])) != MP_OKAY) {
3988 for (y = 1<<(winsize-1); y < x; y++) {
3989 mp_clear (&M[y]);
3990 }
3991 mp_clear(&M[1]);
3992 return err;
3993 }
3994 }
3995
3996 /* create mu, used for Barrett reduction */
3997 if ((err = mp_init (&mu)) != MP_OKAY) {
3998 goto __M;
3999 }
4000 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
4001 goto __MU;
4002 }
4003
4004 /* create M table
4005 *
4006 * The M table contains powers of the base,
4007 * e.g. M[x] = G**x mod P
4008 *
4009 * The first half of the table is not
4010 * computed though accept for M[0] and M[1]
4011 */
4012 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
4013 goto __MU;
4014 }
4015
4016 /* compute the value at M[1<<(winsize-1)] by squaring
4017 * M[1] (winsize-1) times
4018 */
4019 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4020 goto __MU;
4021 }
4022
4023 for (x = 0; x < (winsize - 1); x++) {
4024 if ((err = mp_sqr (&M[1 << (winsize - 1)],
4025 &M[1 << (winsize - 1)])) != MP_OKAY) {
4026 goto __MU;
4027 }
4028 if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
4029 goto __MU;
4030 }
4031 }
4032
4033 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
4034 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
4035 */
4036 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4037 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4038 goto __MU;
4039 }
4040 if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
4041 goto __MU;
4042 }
4043 }
4044
4045 /* setup result */
4046 if ((err = mp_init (&res)) != MP_OKAY) {
4047 goto __MU;
4048 }
4049 mp_set (&res, 1);
4050
4051 /* set initial mode and bit cnt */
4052 mode = 0;
4053 bitcnt = 1;
4054 buf = 0;
4055 digidx = X->used - 1;
4056 bitcpy = 0;
4057 bitbuf = 0;
4058
4059 for (;;) {
4060 /* grab next digit as required */
4061 if (--bitcnt == 0) {
4062 /* if digidx == -1 we are out of digits */
4063 if (digidx == -1) {
4064 break;
4065 }
4066 /* read next digit and reset the bitcnt */
4067 buf = X->dp[digidx--];
4068 bitcnt = DIGIT_BIT;
4069 }
4070
4071 /* grab the next msb from the exponent */
4072 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
4073 buf <<= (mp_digit)1;
4074
4075 /* if the bit is zero and mode == 0 then we ignore it
4076 * These represent the leading zero bits before the first 1 bit
4077 * in the exponent. Technically this opt is not required but it
4078 * does lower the # of trivial squaring/reductions used
4079 */
4080 if (mode == 0 && y == 0) {
4081 continue;
4082 }
4083
4084 /* if the bit is zero and mode == 1 then we square */
4085 if (mode == 1 && y == 0) {
4086 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4087 goto __RES;
4088 }
4089 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4090 goto __RES;
4091 }
4092 continue;
4093 }
4094
4095 /* else we add it to the window */
4096 bitbuf |= (y << (winsize - ++bitcpy));
4097 mode = 2;
4098
4099 if (bitcpy == winsize) {
4100 /* ok window is filled so square as required and multiply */
4101 /* square first */
4102 for (x = 0; x < winsize; x++) {
4103 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4104 goto __RES;
4105 }
4106 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4107 goto __RES;
4108 }
4109 }
4110
4111 /* then multiply */
4112 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
4113 goto __RES;
4114 }
4115 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4116 goto __RES;
4117 }
4118
4119 /* empty window and reset */
4120 bitcpy = 0;
4121 bitbuf = 0;
4122 mode = 1;
4123 }
4124 }
4125
4126 /* if bits remain then square/multiply */
4127 if (mode == 2 && bitcpy > 0) {
4128 /* square then multiply if the bit is set */
4129 for (x = 0; x < bitcpy; x++) {
4130 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4131 goto __RES;
4132 }
4133 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4134 goto __RES;
4135 }
4136
4137 bitbuf <<= 1;
4138 if ((bitbuf & (1 << winsize)) != 0) {
4139 /* then multiply */
4140 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
4141 goto __RES;
4142 }
4143 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4144 goto __RES;
4145 }
4146 }
4147 }
4148 }
4149
4150 mp_exch (&res, Y);
4151 err = MP_OKAY;
4152 __RES:mp_clear (&res);
4153 __MU:mp_clear (&mu);
4154 __M:
4155 mp_clear(&M[1]);
4156 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4157 mp_clear (&M[x]);
4158 }
4159 return err;
4160 }
4161
4162 /* multiplies |a| * |b| and only computes up to digs digits of result
4163 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
4164 * many digits of output are created.
4165 */
4166 int
4167 s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4168 {
4169 mp_int t;
4170 int res, pa, pb, ix, iy;
4171 mp_digit u;
4172 mp_word r;
4173 mp_digit tmpx, *tmpt, *tmpy;
4174
4175 /* can we use the fast multiplier? */
4176 if (((digs) < MP_WARRAY) &&
4177 MIN (a->used, b->used) <
4178 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4179 return fast_s_mp_mul_digs (a, b, c, digs);
4180 }
4181
4182 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
4183 return res;
4184 }
4185 t.used = digs;
4186
4187 /* compute the digits of the product directly */
4188 pa = a->used;
4189 for (ix = 0; ix < pa; ix++) {
4190 /* set the carry to zero */
4191 u = 0;
4192
4193 /* limit ourselves to making digs digits of output */
4194 pb = MIN (b->used, digs - ix);
4195
4196 /* setup some aliases */
4197 /* copy of the digit from a used within the nested loop */
4198 tmpx = a->dp[ix];
4199
4200 /* an alias for the destination shifted ix places */
4201 tmpt = t.dp + ix;
4202
4203 /* an alias for the digits of b */
4204 tmpy = b->dp;
4205
4206 /* compute the columns of the output and propagate the carry */
4207 for (iy = 0; iy < pb; iy++) {
4208 /* compute the column as a mp_word */
4209 r = ((mp_word)*tmpt) +
4210 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4211 ((mp_word) u);
4212
4213 /* the new column is the lower part of the result */
4214 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4215
4216 /* get the carry word from the result */
4217 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4218 }
4219 /* set carry if it is placed below digs */
4220 if (ix + iy < digs) {
4221 *tmpt = u;
4222 }
4223 }
4224
4225 mp_clamp (&t);
4226 mp_exch (&t, c);
4227
4228 mp_clear (&t);
4229 return MP_OKAY;
4230 }
4231
4232 /* multiplies |a| * |b| and does not compute the lower digs digits
4233 * [meant to get the higher part of the product]
4234 */
4235 int
4236 s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4237 {
4238 mp_int t;
4239 int res, pa, pb, ix, iy;
4240 mp_digit u;
4241 mp_word r;
4242 mp_digit tmpx, *tmpt, *tmpy;
4243
4244 /* can we use the fast multiplier? */
4245 if (((a->used + b->used + 1) < MP_WARRAY)
4246 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4247 return fast_s_mp_mul_high_digs (a, b, c, digs);
4248 }
4249
4250 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
4251 return res;
4252 }
4253 t.used = a->used + b->used + 1;
4254
4255 pa = a->used;
4256 pb = b->used;
4257 for (ix = 0; ix < pa; ix++) {
4258 /* clear the carry */
4259 u = 0;
4260
4261 /* left hand side of A[ix] * B[iy] */
4262 tmpx = a->dp[ix];
4263
4264 /* alias to the address of where the digits will be stored */
4265 tmpt = &(t.dp[digs]);
4266
4267 /* alias for where to read the right hand side from */
4268 tmpy = b->dp + (digs - ix);
4269
4270 for (iy = digs - ix; iy < pb; iy++) {
4271 /* calculate the double precision result */
4272 r = ((mp_word)*tmpt) +
4273 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4274 ((mp_word) u);
4275
4276 /* get the lower part */
4277 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4278
4279 /* carry the carry */
4280 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4281 }
4282 *tmpt = u;
4283 }
4284 mp_clamp (&t);
4285 mp_exch (&t, c);
4286 mp_clear (&t);
4287 return MP_OKAY;
4288 }
4289
4290 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
4291 int
4292 s_mp_sqr (const mp_int * a, mp_int * b)
4293 {
4294 mp_int t;
4295 int res, ix, iy, pa;
4296 mp_word r;
4297 mp_digit u, tmpx, *tmpt;
4298
4299 pa = a->used;
4300 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
4301 return res;
4302 }
4303
4304 /* default used is maximum possible size */
4305 t.used = 2*pa + 1;
4306
4307 for (ix = 0; ix < pa; ix++) {
4308 /* first calculate the digit at 2*ix */
4309 /* calculate double precision result */
4310 r = ((mp_word) t.dp[2*ix]) +
4311 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
4312
4313 /* store lower part in result */
4314 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
4315
4316 /* get the carry */
4317 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4318
4319 /* left hand side of A[ix] * A[iy] */
4320 tmpx = a->dp[ix];
4321
4322 /* alias for where to store the results */
4323 tmpt = t.dp + (2*ix + 1);
4324
4325 for (iy = ix + 1; iy < pa; iy++) {
4326 /* first calculate the product */
4327 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
4328
4329 /* now calculate the double precision result, note we use
4330 * addition instead of *2 since it's easier to optimize
4331 */
4332 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
4333
4334 /* store lower part */
4335 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4336
4337 /* get carry */
4338 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4339 }
4340 /* propagate upwards */
4341 while (u != ((mp_digit) 0)) {
4342 r = ((mp_word) *tmpt) + ((mp_word) u);
4343 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4344 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4345 }
4346 }
4347
4348 mp_clamp (&t);
4349 mp_exch (&t, b);
4350 mp_clear (&t);
4351 return MP_OKAY;
4352 }
4353
4354 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
4355 int
4356 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
4357 {
4358 int olduse, res, min, max;
4359
4360 /* find sizes */
4361 min = b->used;
4362 max = a->used;
4363
4364 /* init result */
4365 if (c->alloc < max) {
4366 if ((res = mp_grow (c, max)) != MP_OKAY) {
4367 return res;
4368 }
4369 }
4370 olduse = c->used;
4371 c->used = max;
4372
4373 {
4374 register mp_digit u, *tmpa, *tmpb, *tmpc;
4375 register int i;
4376
4377 /* alias for digit pointers */
4378 tmpa = a->dp;
4379 tmpb = b->dp;
4380 tmpc = c->dp;
4381
4382 /* set carry to zero */
4383 u = 0;
4384 for (i = 0; i < min; i++) {
4385 /* T[i] = A[i] - B[i] - U */
4386 *tmpc = *tmpa++ - *tmpb++ - u;
4387
4388 /* U = carry bit of T[i]
4389 * Note this saves performing an AND operation since
4390 * if a carry does occur it will propagate all the way to the
4391 * MSB. As a result a single shift is enough to get the carry
4392 */
4393 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4394
4395 /* Clear carry from T[i] */
4396 *tmpc++ &= MP_MASK;
4397 }
4398
4399 /* now copy higher words if any, e.g. if A has more digits than B */
4400 for (; i < max; i++) {
4401 /* T[i] = A[i] - U */
4402 *tmpc = *tmpa++ - u;
4403
4404 /* U = carry bit of T[i] */
4405 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4406
4407 /* Clear carry from T[i] */
4408 *tmpc++ &= MP_MASK;
4409 }
4410
4411 /* clear digits above used (since we may not have grown result above) */
4412 for (i = c->used; i < olduse; i++) {
4413 *tmpc++ = 0;
4414 }
4415 }
4416
4417 mp_clamp (c);
4418 return MP_OKAY;
4419 }