27constexpr int FINAL_LIMB_POSITION = 3072 / SIGNED_LIMB_SIZE;
28constexpr int FINAL_LIMB_MODULUS_BITS = 3072 % SIGNED_LIMB_SIZE;
29constexpr limb_t MAX_LIMB = (limb_t)(-1);
30constexpr limb_t MAX_SIGNED_LIMB = (((limb_t)1) << SIGNED_LIMB_SIZE) - 1;
32constexpr limb_t MAX_PRIME_DIFF = 1103717;
34constexpr limb_t MODULUS_INVERSE = limb_t(0x70a1421da087d93);
38inline void extract3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& n)
47inline void mul(limb_t& c0, limb_t& c1,
const limb_t& a,
const limb_t& b)
49 double_limb_t
t = (double_limb_t)a * b;
55inline void mulnadd3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& d0, limb_t& d1, limb_t& d2,
const limb_t& n)
57 double_limb_t
t = (double_limb_t)d0 * n + c0;
60 t += (double_limb_t)d1 * n + c1;
67inline void muln2(limb_t& c0, limb_t& c1,
const limb_t& n)
69 double_limb_t
t = (double_limb_t)c0 * n;
72 t += (double_limb_t)c1 * n;
77inline void muladd3(limb_t& c0, limb_t& c1, limb_t& c2,
const limb_t& a,
const limb_t& b)
79 double_limb_t
t = (double_limb_t)a * b;
80 limb_t th =
t >> LIMB_SIZE;
84 th += (c0 < tl) ? 1 : 0;
86 c2 += (c1 < th) ? 1 : 0;
93inline void addnextract2(limb_t& c0, limb_t& c1,
const limb_t& a, limb_t& n)
117 if (this->
limbs[0] <= std::numeric_limits<limb_t>::max() - MAX_PRIME_DIFF)
return false;
118 for (
int i = 1; i <
LIMBS; ++i) {
119 if (this->
limbs[i] != std::numeric_limits<limb_t>::max())
return false;
126 limb_t c0 = MAX_PRIME_DIFF;
128 for (
int i = 0; i <
LIMBS; ++i) {
129 addnextract2(c0, c1, this->
limbs[i], this->
limbs[i]);
139 signed_limb_t limbs[SIGNED_LIMBS];
144 memset(limbs, 0,
sizeof(limbs));
149 void FromNum3072(
const Num3072& in)
152 int b = 0, outpos = 0;
153 for (
int i = 0; i < LIMBS; ++i) {
154 c += double_limb_t{in.
limbs[i]} << b;
156 while (b >= SIGNED_LIMB_SIZE) {
157 limbs[outpos++] = limb_t(c) & MAX_SIGNED_LIMB;
158 c >>= SIGNED_LIMB_SIZE;
159 b -= SIGNED_LIMB_SIZE;
162 Assume(outpos == SIGNED_LIMBS - 1);
163 limbs[SIGNED_LIMBS - 1] = c;
164 c >>= SIGNED_LIMB_SIZE;
172 int b = 0, outpos = 0;
173 for (
int i = 0; i < SIGNED_LIMBS; ++i) {
174 c += double_limb_t(limbs[i]) << b;
175 b += SIGNED_LIMB_SIZE;
176 if (b >= LIMB_SIZE) {
177 out.limbs[outpos++] = c;
191 void Normalize(
bool negate)
194 signed_limb_t cond_add = limbs[SIGNED_LIMBS-1] >> (LIMB_SIZE-1);
195 limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
196 limbs[FINAL_LIMB_POSITION] += (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
198 signed_limb_t cond_negate = -signed_limb_t(negate);
199 for (
int i = 0; i < SIGNED_LIMBS; ++i) {
200 limbs[i] = (limbs[i] ^ cond_negate) - cond_negate;
203 for (
int i = 0; i < SIGNED_LIMBS - 1; ++i) {
204 limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
205 limbs[i] &= MAX_SIGNED_LIMB;
208 cond_add = limbs[SIGNED_LIMBS-1] >> (LIMB_SIZE-1);
209 limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
210 limbs[FINAL_LIMB_POSITION] += (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
212 for (
int i = 0; i < SIGNED_LIMBS - 1; ++i) {
213 limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
214 limbs[i] &= MAX_SIGNED_LIMB;
222 signed_limb_t u, v, q, r;
233inline limb_t ComputeDivstepMatrix(signed_limb_t eta, limb_t f, limb_t g, SignedMatrix&
out)
236 static const uint8_t NEGINV256[128] = {
237 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
238 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
239 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
240 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
241 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
242 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
243 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
244 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
245 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
246 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
247 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
250 limb_t u = 1, v = 0, q = 0, r = 1;
252 int i = SIGNED_LIMB_SIZE;
255 int zeros = std::countr_zero(g | (MAX_LIMB << i));
268 tmp = f; f =
g;
g = -tmp;
269 tmp = u; u = q; q = -tmp;
270 tmp = v; v = r; r = -tmp;
275 int limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
277 limb_t
m = (MAX_LIMB >> (LIMB_SIZE - limit)) & 255U;
279 limb_t w = (
g * NEGINV256[(f >> 1) & 127]) &
m;
285 out.u = (signed_limb_t)u;
286 out.v = (signed_limb_t)v;
287 out.q = (signed_limb_t)q;
288 out.r = (signed_limb_t)r;
296inline void UpdateDE(Num3072Signed& d, Num3072Signed& e,
const SignedMatrix& t)
298 const signed_limb_t u =
t.u, v=
t.v, q=
t.q, r=
t.r;
301 signed_limb_t sd = d.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
302 signed_limb_t se = e.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
303 signed_limb_t md = (u & sd) + (v & se);
304 signed_limb_t me = (q & sd) + (r & se);
306 signed_limb_t di = d.limbs[0], ei = e.limbs[0];
307 signed_double_limb_t cd = (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
308 signed_double_limb_t ce = (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
310 md -= (MODULUS_INVERSE * limb_t(cd) + md) & MAX_SIGNED_LIMB;
311 me -= (MODULUS_INVERSE * limb_t(ce) + me) & MAX_SIGNED_LIMB;
313 cd -= (signed_double_limb_t)1103717 * md;
314 ce -= (signed_double_limb_t)1103717 * me;
316 Assume((cd & MAX_SIGNED_LIMB) == 0);
317 Assume((ce & MAX_SIGNED_LIMB) == 0);
318 cd >>= SIGNED_LIMB_SIZE;
319 ce >>= SIGNED_LIMB_SIZE;
323 for (
int i = 1; i < SIGNED_LIMBS - 1; ++i) {
326 cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
327 ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
328 d.limbs[i - 1] = (signed_limb_t)cd & MAX_SIGNED_LIMB; cd >>= SIGNED_LIMB_SIZE;
329 e.limbs[i - 1] = (signed_limb_t)ce & MAX_SIGNED_LIMB; ce >>= SIGNED_LIMB_SIZE;
332 di = d.limbs[SIGNED_LIMBS - 1];
333 ei = e.limbs[SIGNED_LIMBS - 1];
334 cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
335 ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
336 cd += (signed_double_limb_t)md << FINAL_LIMB_MODULUS_BITS;
337 ce += (signed_double_limb_t)me << FINAL_LIMB_MODULUS_BITS;
338 d.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)cd & MAX_SIGNED_LIMB; cd >>= SIGNED_LIMB_SIZE;
339 e.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)ce & MAX_SIGNED_LIMB; ce >>= SIGNED_LIMB_SIZE;
341 d.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)cd;
342 e.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)ce;
350inline void UpdateFG(Num3072Signed& f, Num3072Signed& g,
const SignedMatrix& t,
int len)
352 const signed_limb_t u =
t.u, v=
t.v, q=
t.q, r=
t.r;
354 signed_limb_t fi, gi;
355 signed_double_limb_t cf, cg;
359 cf = (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
360 cg = (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
362 Assume((cf & MAX_SIGNED_LIMB) == 0);
363 Assume((cg & MAX_SIGNED_LIMB) == 0);
364 cf >>= SIGNED_LIMB_SIZE;
365 cg >>= SIGNED_LIMB_SIZE;
368 for (
int i = 1; i < len; ++i) {
371 cf += (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
372 cg += (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
373 f.limbs[i - 1] = (signed_limb_t)cf & MAX_SIGNED_LIMB; cf >>= SIGNED_LIMB_SIZE;
374 g.limbs[i - 1] = (signed_limb_t)cg & MAX_SIGNED_LIMB; cg >>= SIGNED_LIMB_SIZE;
377 f.limbs[len - 1] = (signed_limb_t)cf;
378 g.limbs[len - 1] = (signed_limb_t)cg;
399 Num3072Signed d, e, f,
g;
403 f.limbs[0] = -MAX_PRIME_DIFF;
404 f.limbs[FINAL_LIMB_POSITION] = ((
limb_t)1) << FINAL_LIMB_MODULUS_BITS;
405 g.FromNum3072(*
this);
413 eta = ComputeDivstepMatrix(eta, f.limbs[0],
g.limbs[0],
t);
415 UpdateFG(f,
g,
t, len);
420 if (
g.limbs[0] == 0) {
422 for (
int j = 1; j < len; ++j) {
426 if (cond == 0)
break;
445 Assume((f.limbs[0] & MAX_SIGNED_LIMB) == 1 || (f.limbs[0] & MAX_SIGNED_LIMB) == MAX_SIGNED_LIMB);
449 d.Normalize(f.limbs[len - 1] >> (
LIMB_SIZE - 1));
457 limb_t c0 = 0, c1 = 0, c2 = 0;
461 for (
int j = 0; j <
LIMBS - 1; ++j) {
462 limb_t d0 = 0, d1 = 0, d2 = 0;
463 mul(d0, d1, this->limbs[1 + j], a.
limbs[
LIMBS + j - (1 + j)]);
464 for (
int i = 2 + j; i <
LIMBS; ++i) muladd3(d0, d1, d2, this->limbs[i], a.
limbs[
LIMBS + j - i]);
465 mulnadd3(c0, c1, c2, d0, d1, d2, MAX_PRIME_DIFF);
466 for (
int i = 0; i < j + 1; ++i) muladd3(c0, c1, c2, this->limbs[i], a.
limbs[j - i]);
467 extract3(c0, c1, c2, tmp.
limbs[j]);
472 for (
int i = 0; i <
LIMBS; ++i) muladd3(c0, c1, c2, this->limbs[i], a.
limbs[
LIMBS - 1 - i]);
476 muln2(c0, c1, MAX_PRIME_DIFF);
477 for (
int j = 0; j <
LIMBS; ++j) {
478 addnextract2(c0, c1, tmp.
limbs[j], this->limbs[j]);
482 assert(c0 == 0 || c0 == 1);
495 for (
int i = 1; i <
LIMBS; ++i) this->limbs[i] = 0;
516 for (
int i = 0; i <
LIMBS; ++i) {
517 if (
sizeof(
limb_t) == 4) {
519 }
else if (
sizeof(
limb_t) == 8) {
526 for (
int i = 0; i <
LIMBS; ++i) {
527 if (
sizeof(
limb_t) == 4) {
529 }
else if (
sizeof(
limb_t) == 8) {
548 m_numerator = ToNum3072(in);
553 m_numerator.Divide(m_denominator);
554 m_denominator.SetToOne();
557 m_numerator.ToBytes(
data);
564 m_numerator.Multiply(mul.m_numerator);
565 m_denominator.Multiply(mul.m_denominator);
571 m_numerator.Multiply(div.m_denominator);
572 m_denominator.Multiply(div.m_numerator);
577 m_numerator.Multiply(ToNum3072(in));
582 m_denominator.Multiply(ToNum3072(in));
#define Assume(val)
Assume is the identity function.
ChaCha20 cipher that only operates on multiples of 64 bytes.
static constexpr unsigned BLOCKLEN
Block size (inputs/outputs to Keystream / Crypt should be multiples of this).
A writer stream (for serialization) that computes a 256-bit hash.
A class representing MuHash sets.
Num3072 ToNum3072(Span< const unsigned char > in)
MuHash3072() noexcept=default
void Finalize(uint256 &out) noexcept
MuHash3072 & Remove(Span< const unsigned char > in) noexcept
MuHash3072 & operator/=(const MuHash3072 &div) noexcept
MuHash3072 & Insert(Span< const unsigned char > in) noexcept
MuHash3072 & operator*=(const MuHash3072 &mul) noexcept
Num3072 GetInverse() const
static constexpr int LIMBS
int64_t signed_double_limb_t
static constexpr size_t BYTE_SIZE
bool IsOverflow() const
Indicates whether d is larger than the modulus.
void ToBytes(unsigned char(&out)[BYTE_SIZE])
static constexpr int SIGNED_LIMBS
static constexpr int LIMB_SIZE
void Divide(const Num3072 &a)
static constexpr int SIGNED_LIMB_SIZE
void Multiply(const Num3072 &a)
void WriteLE32(B *ptr, uint32_t x)
uint64_t ReadLE64(const B *ptr)
void WriteLE64(B *ptr, uint64_t x)
uint32_t ReadLE32(const B *ptr)
Span< const std::byte > MakeByteSpan(V &&v) noexcept
Span< std::byte > MakeWritableByteSpan(V &&v) noexcept