Bitcoin Core 28.99.0
P2P Digital Currency
modinv32_impl.h
Go to the documentation of this file.
1/***********************************************************************
2 * Copyright (c) 2020 Peter Dettman *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
6
7#ifndef SECP256K1_MODINV32_IMPL_H
8#define SECP256K1_MODINV32_IMPL_H
9
10#include "modinv32.h"
11
12#include "util.h"
13
14#include <stdlib.h>
15
16/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
17 * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
18 *
19 * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
20 * implementation for N=30, using 30-bit signed limbs represented as int32_t.
21 */
22
23#ifdef VERIFY
24static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
25
26/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
27static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
28 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
29 int64_t c = 0;
30 int i;
31 for (i = 0; i < 8; ++i) {
32 if (i < alen) c += (int64_t)a->v[i] * factor;
33 r->v[i] = (int32_t)c & M30; c >>= 30;
34 }
35 if (8 < alen) c += (int64_t)a->v[8] * factor;
36 VERIFY_CHECK(c == (int32_t)c);
37 r->v[8] = (int32_t)c;
38}
39
40/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
41static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
42 int i;
44 secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
45 secp256k1_modinv32_mul_30(&bm, b, 9, factor);
46 for (i = 0; i < 8; ++i) {
47 /* Verify that all but the top limb of a and b are normalized. */
48 VERIFY_CHECK(am.v[i] >> 30 == 0);
49 VERIFY_CHECK(bm.v[i] >> 30 == 0);
50 }
51 for (i = 8; i >= 0; --i) {
52 if (am.v[i] < bm.v[i]) return -1;
53 if (am.v[i] > bm.v[i]) return 1;
54 }
55 return 0;
56}
57#endif
58
59/* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
60 * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
61 * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
62 * [0,2^30). */
64 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
65 int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
66 r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
67 volatile int32_t cond_add, cond_negate;
68
69#ifdef VERIFY
70 /* Verify that all limbs are in range (-2^30,2^30). */
71 int i;
72 for (i = 0; i < 9; ++i) {
73 VERIFY_CHECK(r->v[i] >= -M30);
74 VERIFY_CHECK(r->v[i] <= M30);
75 }
76 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
77 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
78#endif
79
80 /* In a first step, add the modulus if the input is negative, and then negate if requested.
81 * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
82 * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
83 * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
84 * indeed the behavior of the right shift operator). */
85 cond_add = r8 >> 31;
86 r0 += modinfo->modulus.v[0] & cond_add;
87 r1 += modinfo->modulus.v[1] & cond_add;
88 r2 += modinfo->modulus.v[2] & cond_add;
89 r3 += modinfo->modulus.v[3] & cond_add;
90 r4 += modinfo->modulus.v[4] & cond_add;
91 r5 += modinfo->modulus.v[5] & cond_add;
92 r6 += modinfo->modulus.v[6] & cond_add;
93 r7 += modinfo->modulus.v[7] & cond_add;
94 r8 += modinfo->modulus.v[8] & cond_add;
95 cond_negate = sign >> 31;
96 r0 = (r0 ^ cond_negate) - cond_negate;
97 r1 = (r1 ^ cond_negate) - cond_negate;
98 r2 = (r2 ^ cond_negate) - cond_negate;
99 r3 = (r3 ^ cond_negate) - cond_negate;
100 r4 = (r4 ^ cond_negate) - cond_negate;
101 r5 = (r5 ^ cond_negate) - cond_negate;
102 r6 = (r6 ^ cond_negate) - cond_negate;
103 r7 = (r7 ^ cond_negate) - cond_negate;
104 r8 = (r8 ^ cond_negate) - cond_negate;
105 /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
106 r1 += r0 >> 30; r0 &= M30;
107 r2 += r1 >> 30; r1 &= M30;
108 r3 += r2 >> 30; r2 &= M30;
109 r4 += r3 >> 30; r3 &= M30;
110 r5 += r4 >> 30; r4 &= M30;
111 r6 += r5 >> 30; r5 &= M30;
112 r7 += r6 >> 30; r6 &= M30;
113 r8 += r7 >> 30; r7 &= M30;
114
115 /* In a second step add the modulus again if the result is still negative, bringing r to range
116 * [0,modulus). */
117 cond_add = r8 >> 31;
118 r0 += modinfo->modulus.v[0] & cond_add;
119 r1 += modinfo->modulus.v[1] & cond_add;
120 r2 += modinfo->modulus.v[2] & cond_add;
121 r3 += modinfo->modulus.v[3] & cond_add;
122 r4 += modinfo->modulus.v[4] & cond_add;
123 r5 += modinfo->modulus.v[5] & cond_add;
124 r6 += modinfo->modulus.v[6] & cond_add;
125 r7 += modinfo->modulus.v[7] & cond_add;
126 r8 += modinfo->modulus.v[8] & cond_add;
127 /* And propagate again. */
128 r1 += r0 >> 30; r0 &= M30;
129 r2 += r1 >> 30; r1 &= M30;
130 r3 += r2 >> 30; r2 &= M30;
131 r4 += r3 >> 30; r3 &= M30;
132 r5 += r4 >> 30; r4 &= M30;
133 r6 += r5 >> 30; r5 &= M30;
134 r7 += r6 >> 30; r6 &= M30;
135 r8 += r7 >> 30; r7 &= M30;
136
137 r->v[0] = r0;
138 r->v[1] = r1;
139 r->v[2] = r2;
140 r->v[3] = r3;
141 r->v[4] = r4;
142 r->v[5] = r5;
143 r->v[6] = r6;
144 r->v[7] = r7;
145 r->v[8] = r8;
146
147 VERIFY_CHECK(r0 >> 30 == 0);
148 VERIFY_CHECK(r1 >> 30 == 0);
149 VERIFY_CHECK(r2 >> 30 == 0);
150 VERIFY_CHECK(r3 >> 30 == 0);
151 VERIFY_CHECK(r4 >> 30 == 0);
152 VERIFY_CHECK(r5 >> 30 == 0);
153 VERIFY_CHECK(r6 >> 30 == 0);
154 VERIFY_CHECK(r7 >> 30 == 0);
155 VERIFY_CHECK(r8 >> 30 == 0);
156 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
157 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
158}
159
160/* Data type for transition matrices (see section 3 of explanation).
161 *
162 * t = [ u v ]
163 * [ q r ]
164 */
165typedef struct {
166 int32_t u, v, q, r;
168
169/* Compute the transition matrix and zeta for 30 divsteps.
170 *
171 * Input: zeta: initial zeta
172 * f0: bottom limb of initial f
173 * g0: bottom limb of initial g
174 * Output: t: transition matrix
175 * Return: final zeta
176 *
177 * Implements the divsteps_n_matrix function from the explanation.
178 */
179static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
180 /* u,v,q,r are the elements of the transformation matrix being built up,
181 * starting with the identity matrix. Semantically they are signed integers
182 * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
183 * permits left shifting (which is UB for negative numbers). The range
184 * being inside [-2^31,2^31) means that casting to signed works correctly.
185 */
186 uint32_t u = 1, v = 0, q = 0, r = 1;
187 volatile uint32_t c1, c2;
188 uint32_t mask1, mask2, f = f0, g = g0, x, y, z;
189 int i;
190
191 for (i = 0; i < 30; ++i) {
192 VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
193 VERIFY_CHECK((u * f0 + v * g0) == f << i);
194 VERIFY_CHECK((q * f0 + r * g0) == g << i);
195 /* Compute conditional masks for (zeta < 0) and for (g & 1). */
196 c1 = zeta >> 31;
197 mask1 = c1;
198 c2 = g & 1;
199 mask2 = -c2;
200 /* Compute x,y,z, conditionally negated versions of f,u,v. */
201 x = (f ^ mask1) - mask1;
202 y = (u ^ mask1) - mask1;
203 z = (v ^ mask1) - mask1;
204 /* Conditionally add x,y,z to g,q,r. */
205 g += x & mask2;
206 q += y & mask2;
207 r += z & mask2;
208 /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */
209 mask1 &= mask2;
210 /* Conditionally change zeta into -zeta-2 or zeta-1. */
211 zeta = (zeta ^ mask1) - 1;
212 /* Conditionally add g,q,r to f,u,v. */
213 f += g & mask1;
214 u += q & mask1;
215 v += r & mask1;
216 /* Shifts */
217 g >>= 1;
218 u <<= 1;
219 v <<= 1;
220 /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
221 VERIFY_CHECK(zeta >= -601 && zeta <= 601);
222 }
223 /* Return data in t and return value. */
224 t->u = (int32_t)u;
225 t->v = (int32_t)v;
226 t->q = (int32_t)q;
227 t->r = (int32_t)r;
228 /* The determinant of t must be a power of two. This guarantees that multiplication with t
229 * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
230 * will be divided out again). As each divstep's individual matrix has determinant 2, the
231 * aggregate of 30 of them will have determinant 2^30. */
232 VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
233 return zeta;
234}
235
236/* secp256k1_modinv32_inv256[i] = -(2*i+1)^-1 (mod 256) */
237static const uint8_t secp256k1_modinv32_inv256[128] = {
238 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
239 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
240 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
241 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
242 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
243 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
244 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
245 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
246 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
247 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
248 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
249};
250
251/* Compute the transition matrix and eta for 30 divsteps (variable time).
252 *
253 * Input: eta: initial eta
254 * f0: bottom limb of initial f
255 * g0: bottom limb of initial g
256 * Output: t: transition matrix
257 * Return: final eta
258 *
259 * Implements the divsteps_n_matrix_var function from the explanation.
260 */
261static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
262 /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
263 uint32_t u = 1, v = 0, q = 0, r = 1;
264 uint32_t f = f0, g = g0, m;
265 uint16_t w;
266 int i = 30, limit, zeros;
267
268 for (;;) {
269 /* Use a sentinel bit to count zeros only up to i. */
270 zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
271 /* Perform zeros divsteps at once; they all just divide g by two. */
272 g >>= zeros;
273 u <<= zeros;
274 v <<= zeros;
275 eta -= zeros;
276 i -= zeros;
277 /* We're done once we've done 30 divsteps. */
278 if (i == 0) break;
279 VERIFY_CHECK((f & 1) == 1);
280 VERIFY_CHECK((g & 1) == 1);
281 VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
282 VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
283 /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
284 VERIFY_CHECK(eta >= -751 && eta <= 751);
285 /* If eta is negative, negate it and replace f,g with g,-f. */
286 if (eta < 0) {
287 uint32_t tmp;
288 eta = -eta;
289 tmp = f; f = g; g = -tmp;
290 tmp = u; u = q; q = -tmp;
291 tmp = v; v = r; r = -tmp;
292 }
293 /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
294 * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
295 * can be done as its sign will flip once that happens. */
296 limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
297 /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
298 VERIFY_CHECK(limit > 0 && limit <= 30);
299 m = (UINT32_MAX >> (32 - limit)) & 255U;
300 /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
301 w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
302 /* Do so. */
303 g += f * w;
304 q += u * w;
305 r += v * w;
306 VERIFY_CHECK((g & m) == 0);
307 }
308 /* Return data in t and return value. */
309 t->u = (int32_t)u;
310 t->v = (int32_t)v;
311 t->q = (int32_t)q;
312 t->r = (int32_t)r;
313 /* The determinant of t must be a power of two. This guarantees that multiplication with t
314 * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
315 * will be divided out again). As each divstep's individual matrix has determinant 2, the
316 * aggregate of 30 of them will have determinant 2^30. */
317 VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
318 return eta;
319}
320
321/* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
322 * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
323 * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
324 *
325 * Input: eta: initial eta
326 * f0: bottom limb of initial f
327 * g0: bottom limb of initial g
328 * Output: t: transition matrix
329 * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
330 * by applying the returned transformation matrix to it. The other bits of *jacp may
331 * change, but are meaningless.
332 * Return: final eta
333 */
334static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
335 /* Transformation matrix. */
336 uint32_t u = 1, v = 0, q = 0, r = 1;
337 uint32_t f = f0, g = g0, m;
338 uint16_t w;
339 int i = 30, limit, zeros;
340 int jac = *jacp;
341
342 for (;;) {
343 /* Use a sentinel bit to count zeros only up to i. */
344 zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
345 /* Perform zeros divsteps at once; they all just divide g by two. */
346 g >>= zeros;
347 u <<= zeros;
348 v <<= zeros;
349 eta -= zeros;
350 i -= zeros;
351 /* Update the bottom bit of jac: when dividing g by an odd power of 2,
352 * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
353 jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
354 /* We're done once we've done 30 posdivsteps. */
355 if (i == 0) break;
356 VERIFY_CHECK((f & 1) == 1);
357 VERIFY_CHECK((g & 1) == 1);
358 VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
359 VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
360 /* If eta is negative, negate it and replace f,g with g,f. */
361 if (eta < 0) {
362 uint32_t tmp;
363 eta = -eta;
364 /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
365 * if both f and g are 3 mod 4. */
366 jac ^= ((f & g) >> 1);
367 tmp = f; f = g; g = tmp;
368 tmp = u; u = q; q = tmp;
369 tmp = v; v = r; r = tmp;
370 }
371 /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
372 * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
373 * can be done as its sign will flip once that happens. */
374 limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
375 /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
376 VERIFY_CHECK(limit > 0 && limit <= 30);
377 m = (UINT32_MAX >> (32 - limit)) & 255U;
378 /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
379 w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
380 /* Do so. */
381 g += f * w;
382 q += u * w;
383 r += v * w;
384 VERIFY_CHECK((g & m) == 0);
385 }
386 /* Return data in t and return value. */
387 t->u = (int32_t)u;
388 t->v = (int32_t)v;
389 t->q = (int32_t)q;
390 t->r = (int32_t)r;
391 /* The determinant of t must be a power of two. This guarantees that multiplication with t
392 * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
393 * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
394 * the aggregate of 30 of them will have determinant 2^30 or -2^30. */
395 VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
396 (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
397 *jacp = jac;
398 return eta;
399}
400
401/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
402 *
403 * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
404 * (-2^30,2^30).
405 *
406 * This implements the update_de function from the explanation.
407 */
409 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
410 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
411 int32_t di, ei, md, me, sd, se;
412 int64_t cd, ce;
413 int i;
414 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
415 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
416 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
417 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
418 VERIFY_CHECK(labs(u) <= (M30 + 1 - labs(v))); /* |u|+|v| <= 2^30 */
419 VERIFY_CHECK(labs(q) <= (M30 + 1 - labs(r))); /* |q|+|r| <= 2^30 */
420
421 /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
422 sd = d->v[8] >> 31;
423 se = e->v[8] >> 31;
424 md = (u & sd) + (v & se);
425 me = (q & sd) + (r & se);
426 /* Begin computing t*[d,e]. */
427 di = d->v[0];
428 ei = e->v[0];
429 cd = (int64_t)u * di + (int64_t)v * ei;
430 ce = (int64_t)q * di + (int64_t)r * ei;
431 /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
432 md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
433 me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
434 /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
435 cd += (int64_t)modinfo->modulus.v[0] * md;
436 ce += (int64_t)modinfo->modulus.v[0] * me;
437 /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
438 VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
439 VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
440 /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
441 * limb i-1 (shifting down by 30 bits). */
442 for (i = 1; i < 9; ++i) {
443 di = d->v[i];
444 ei = e->v[i];
445 cd += (int64_t)u * di + (int64_t)v * ei;
446 ce += (int64_t)q * di + (int64_t)r * ei;
447 cd += (int64_t)modinfo->modulus.v[i] * md;
448 ce += (int64_t)modinfo->modulus.v[i] * me;
449 d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
450 e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
451 }
452 /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
453 d->v[8] = (int32_t)cd;
454 e->v[8] = (int32_t)ce;
455
456 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
457 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
458 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
459 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
460}
461
462/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
463 *
464 * This implements the update_fg function from the explanation.
465 */
467 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
468 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
469 int32_t fi, gi;
470 int64_t cf, cg;
471 int i;
472 /* Start computing t*[f,g]. */
473 fi = f->v[0];
474 gi = g->v[0];
475 cf = (int64_t)u * fi + (int64_t)v * gi;
476 cg = (int64_t)q * fi + (int64_t)r * gi;
477 /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
478 VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
479 VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
480 /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
481 * down by 30 bits). */
482 for (i = 1; i < 9; ++i) {
483 fi = f->v[i];
484 gi = g->v[i];
485 cf += (int64_t)u * fi + (int64_t)v * gi;
486 cg += (int64_t)q * fi + (int64_t)r * gi;
487 f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
488 g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
489 }
490 /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
491 f->v[8] = (int32_t)cf;
492 g->v[8] = (int32_t)cg;
493}
494
495/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
496 *
497 * Version that operates on a variable number of limbs in f and g.
498 *
499 * This implements the update_fg function from the explanation in modinv64_impl.h.
500 */
502 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
503 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
504 int32_t fi, gi;
505 int64_t cf, cg;
506 int i;
507 VERIFY_CHECK(len > 0);
508 /* Start computing t*[f,g]. */
509 fi = f->v[0];
510 gi = g->v[0];
511 cf = (int64_t)u * fi + (int64_t)v * gi;
512 cg = (int64_t)q * fi + (int64_t)r * gi;
513 /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
514 VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
515 VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
516 /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
517 * down by 30 bits). */
518 for (i = 1; i < len; ++i) {
519 fi = f->v[i];
520 gi = g->v[i];
521 cf += (int64_t)u * fi + (int64_t)v * gi;
522 cg += (int64_t)q * fi + (int64_t)r * gi;
523 f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
524 g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
525 }
526 /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
527 f->v[len - 1] = (int32_t)cf;
528 g->v[len - 1] = (int32_t)cg;
529}
530
531/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
533 /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
538 int i;
539 int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
540
541 /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
542 for (i = 0; i < 20; ++i) {
543 /* Compute transition matrix and new zeta after 30 divsteps. */
545 zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
546 /* Update d,e using that transition matrix. */
547 secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
548 /* Update f,g using that transition matrix. */
549 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
550 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
551 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
552 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
553
555
556 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
557 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
558 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
559 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
560 }
561
562 /* At this point sufficient iterations have been performed that g must have reached 0
563 * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
564 * values i.e. +/- 1, and d now contains +/- the modular inverse. */
565
566 /* g == 0 */
567 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
568 /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
569 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
570 secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
571 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
572 secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
573 secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0));
574
575 /* Optionally negate d, normalize to [0,modulus), and return it. */
576 secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
577 *x = d;
578}
579
580/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
582 /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
583 secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
584 secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
587#ifdef VERIFY
588 int i = 0;
589#endif
590 int j, len = 9;
591 int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */
592 int32_t cond, fn, gn;
593
594 /* Do iterations of 30 divsteps each until g=0. */
595 while (1) {
596 /* Compute transition matrix and new eta after 30 divsteps. */
598 eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
599 /* Update d,e using that transition matrix. */
600 secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
601 /* Update f,g using that transition matrix. */
602
603 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
604 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
605 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
606 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
607
609 /* If the bottom limb of g is 0, there is a chance g=0. */
610 if (g.v[0] == 0) {
611 cond = 0;
612 /* Check if all other limbs are also 0. */
613 for (j = 1; j < len; ++j) {
614 cond |= g.v[j];
615 }
616 /* If so, we're done. */
617 if (cond == 0) break;
618 }
619
620 /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
621 fn = f.v[len - 1];
622 gn = g.v[len - 1];
623 cond = ((int32_t)len - 2) >> 31;
624 cond |= fn ^ (fn >> 31);
625 cond |= gn ^ (gn >> 31);
626 /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
627 if (cond == 0) {
628 f.v[len - 2] |= (uint32_t)fn << 30;
629 g.v[len - 2] |= (uint32_t)gn << 30;
630 --len;
631 }
632
633 VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
634 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
635 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
636 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
637 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
638 }
639
640 /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
641 * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
642
643 /* g == 0 */
644 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
645 /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
646 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
647 secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
648 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
649 secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
650 secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0));
651
652 /* Optionally negate d, normalize to [0,modulus), and return it. */
653 secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
654 *x = d;
655}
656
657/* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
658 * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
659#ifdef VERIFY
660#define JACOBI32_ITERATIONS 25
661#else
662#define JACOBI32_ITERATIONS 50
663#endif
664
665/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
667 /* Start with f=modulus, g=x, eta=-1. */
670 int j, len = 9;
671 int32_t eta = -1; /* eta = -delta; delta is initially 1 */
672 int32_t cond, fn, gn;
673 int jac = 0;
674 int count;
675
676 /* The input limbs must all be non-negative. */
677 VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
678
679 /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
680 * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
681 * time out). */
682 VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0);
683
684 for (count = 0; count < JACOBI32_ITERATIONS; ++count) {
685 /* Compute transition matrix and new eta after 30 posdivsteps. */
687 eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
688 /* Update f,g using that transition matrix. */
689 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
690 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
691 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
692 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
693
695 /* If the bottom limb of f is 1, there is a chance that f=1. */
696 if (f.v[0] == 1) {
697 cond = 0;
698 /* Check if the other limbs are also 0. */
699 for (j = 1; j < len; ++j) {
700 cond |= f.v[j];
701 }
702 /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
703 if (cond == 0) return 1 - 2*(jac & 1);
704 }
705
706 /* Determine if len>1 and limb (len-1) of both f and g is 0. */
707 fn = f.v[len - 1];
708 gn = g.v[len - 1];
709 cond = ((int32_t)len - 2) >> 31;
710 cond |= fn;
711 cond |= gn;
712 /* If so, reduce length. */
713 if (cond == 0) --len;
714
715 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
716 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
717 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
718 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
719 }
720
721 /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
722 return 0;
723}
724
725#endif /* SECP256K1_MODINV32_IMPL_H */
static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo)
Definition: modinv32_impl.h:63
#define JACOBI32_ITERATIONS
static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp)
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static const uint8_t secp256k1_modinv32_inv256[128]
static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo *modinfo)
static int sign(const secp256k1_context *ctx, struct signer_secrets *signer_secrets, struct signer *signer, const secp256k1_musig_keyagg_cache *cache, const unsigned char *msg32, unsigned char *sig64)
Definition: musig.c:105
static SECP256K1_INLINE int secp256k1_ctz32_var(uint32_t x)
Definition: util.h:364
#define VERIFY_CHECK(cond)
Definition: util.h:159
secp256k1_modinv32_signed30 modulus
Definition: modinv32.h:21
static int count