28constexpr int FINAL_LIMB_POSITION = 3072 / SIGNED_LIMB_SIZE;
29constexpr int FINAL_LIMB_MODULUS_BITS = 3072 % SIGNED_LIMB_SIZE;
30constexpr limb_t MAX_LIMB = (limb_t)(-1);
31constexpr limb_t MAX_SIGNED_LIMB = (((limb_t)1) << SIGNED_LIMB_SIZE) - 1;
33constexpr limb_t MAX_PRIME_DIFF = 1103717;
35constexpr limb_t MODULUS_INVERSE = limb_t(0x70a1421da087d93);
39inline void extract3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& n)
48inline void mul(limb_t& c0, limb_t& c1,
const limb_t& a,
const limb_t& b)
50 double_limb_t
t = (double_limb_t)a * b;
56inline void mulnadd3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& d0, limb_t& d1, limb_t& d2,
const limb_t& n)
58 double_limb_t
t = (double_limb_t)d0 * n + c0;
61 t += (double_limb_t)d1 * n + c1;
68inline void muln2(limb_t& c0, limb_t& c1,
const limb_t& n)
70 double_limb_t
t = (double_limb_t)c0 * n;
73 t += (double_limb_t)c1 * n;
78inline void muladd3(limb_t& c0, limb_t& c1, limb_t& c2,
const limb_t& a,
const limb_t& b)
80 double_limb_t
t = (double_limb_t)a * b;
81 limb_t th =
t >> LIMB_SIZE;
85 th += (c0 < tl) ? 1 : 0;
87 c2 += (c1 < th) ? 1 : 0;
94inline void addnextract2(limb_t& c0, limb_t& c1,
const limb_t& a, limb_t& n)
118 if (this->
limbs[0] <= std::numeric_limits<limb_t>::max() - MAX_PRIME_DIFF)
return false;
119 for (
int i = 1; i <
LIMBS; ++i) {
120 if (this->
limbs[i] != std::numeric_limits<limb_t>::max())
return false;
127 limb_t c0 = MAX_PRIME_DIFF;
129 for (
int i = 0; i <
LIMBS; ++i) {
130 addnextract2(c0, c1, this->
limbs[i], this->
limbs[i]);
140 signed_limb_t limbs[SIGNED_LIMBS];
145 memset(limbs, 0,
sizeof(limbs));
150 void FromNum3072(
const Num3072& in)
153 int b = 0, outpos = 0;
154 for (
int i = 0; i < LIMBS; ++i) {
155 c += double_limb_t{in.
limbs[i]} << b;
157 while (b >= SIGNED_LIMB_SIZE) {
158 limbs[outpos++] = limb_t(c) & MAX_SIGNED_LIMB;
159 c >>= SIGNED_LIMB_SIZE;
160 b -= SIGNED_LIMB_SIZE;
163 Assume(outpos == SIGNED_LIMBS - 1);
164 limbs[SIGNED_LIMBS - 1] = c;
165 c >>= SIGNED_LIMB_SIZE;
173 int b = 0, outpos = 0;
174 for (
int i = 0; i < SIGNED_LIMBS; ++i) {
175 c += double_limb_t(limbs[i]) << b;
176 b += SIGNED_LIMB_SIZE;
177 if (b >= LIMB_SIZE) {
178 out.limbs[outpos++] = c;
192 void Normalize(
bool negate)
195 signed_limb_t cond_add = limbs[SIGNED_LIMBS-1] >> (LIMB_SIZE-1);
196 limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
197 limbs[FINAL_LIMB_POSITION] += (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
199 signed_limb_t cond_negate = -signed_limb_t(negate);
200 for (
int i = 0; i < SIGNED_LIMBS; ++i) {
201 limbs[i] = (limbs[i] ^ cond_negate) - cond_negate;
204 for (
int i = 0; i < SIGNED_LIMBS - 1; ++i) {
205 limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
206 limbs[i] &= MAX_SIGNED_LIMB;
209 cond_add = limbs[SIGNED_LIMBS-1] >> (LIMB_SIZE-1);
210 limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
211 limbs[FINAL_LIMB_POSITION] += (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
213 for (
int i = 0; i < SIGNED_LIMBS - 1; ++i) {
214 limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
215 limbs[i] &= MAX_SIGNED_LIMB;
223 signed_limb_t u, v, q, r;
234inline limb_t ComputeDivstepMatrix(signed_limb_t eta, limb_t f, limb_t g, SignedMatrix&
out)
237 static const uint8_t NEGINV256[128] = {
238 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
239 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
240 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
241 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
242 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
243 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
244 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
245 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
246 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
247 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
248 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
251 limb_t u = 1, v = 0, q = 0, r = 1;
253 int i = SIGNED_LIMB_SIZE;
256 int zeros = std::countr_zero(g | (MAX_LIMB << i));
269 tmp = f; f =
g;
g = -tmp;
270 tmp = u; u = q; q = -tmp;
271 tmp = v; v = r; r = -tmp;
276 int limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
278 limb_t
m = (MAX_LIMB >> (LIMB_SIZE - limit)) & 255U;
280 limb_t w = (
g * NEGINV256[(f >> 1) & 127]) &
m;
286 out.u = (signed_limb_t)u;
287 out.v = (signed_limb_t)v;
288 out.q = (signed_limb_t)q;
289 out.r = (signed_limb_t)r;
297inline void UpdateDE(Num3072Signed& d, Num3072Signed& e,
const SignedMatrix& t)
299 const signed_limb_t u =
t.u, v=
t.v, q=
t.q, r=
t.r;
302 signed_limb_t sd = d.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
303 signed_limb_t se = e.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
304 signed_limb_t md = (u & sd) + (v & se);
305 signed_limb_t me = (q & sd) + (r & se);
307 signed_limb_t di = d.limbs[0], ei = e.limbs[0];
308 signed_double_limb_t cd = (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
309 signed_double_limb_t ce = (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
311 md -= (MODULUS_INVERSE * limb_t(cd) + md) & MAX_SIGNED_LIMB;
312 me -= (MODULUS_INVERSE * limb_t(ce) + me) & MAX_SIGNED_LIMB;
314 cd -= (signed_double_limb_t)1103717 * md;
315 ce -= (signed_double_limb_t)1103717 * me;
317 Assume((cd & MAX_SIGNED_LIMB) == 0);
318 Assume((ce & MAX_SIGNED_LIMB) == 0);
319 cd >>= SIGNED_LIMB_SIZE;
320 ce >>= SIGNED_LIMB_SIZE;
324 for (
int i = 1; i < SIGNED_LIMBS - 1; ++i) {
327 cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
328 ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
329 d.limbs[i - 1] = (signed_limb_t)cd & MAX_SIGNED_LIMB; cd >>= SIGNED_LIMB_SIZE;
330 e.limbs[i - 1] = (signed_limb_t)ce & MAX_SIGNED_LIMB; ce >>= SIGNED_LIMB_SIZE;
333 di = d.limbs[SIGNED_LIMBS - 1];
334 ei = e.limbs[SIGNED_LIMBS - 1];
335 cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
336 ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
337 cd += (signed_double_limb_t)md << FINAL_LIMB_MODULUS_BITS;
338 ce += (signed_double_limb_t)me << FINAL_LIMB_MODULUS_BITS;
339 d.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)cd & MAX_SIGNED_LIMB; cd >>= SIGNED_LIMB_SIZE;
340 e.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)ce & MAX_SIGNED_LIMB; ce >>= SIGNED_LIMB_SIZE;
342 d.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)cd;
343 e.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)ce;
351inline void UpdateFG(Num3072Signed& f, Num3072Signed& g,
const SignedMatrix& t,
int len)
353 const signed_limb_t u =
t.u, v=
t.v, q=
t.q, r=
t.r;
355 signed_limb_t fi, gi;
356 signed_double_limb_t cf, cg;
360 cf = (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
361 cg = (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
363 Assume((cf & MAX_SIGNED_LIMB) == 0);
364 Assume((cg & MAX_SIGNED_LIMB) == 0);
365 cf >>= SIGNED_LIMB_SIZE;
366 cg >>= SIGNED_LIMB_SIZE;
369 for (
int i = 1; i < len; ++i) {
372 cf += (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
373 cg += (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
374 f.limbs[i - 1] = (signed_limb_t)cf & MAX_SIGNED_LIMB; cf >>= SIGNED_LIMB_SIZE;
375 g.limbs[i - 1] = (signed_limb_t)cg & MAX_SIGNED_LIMB; cg >>= SIGNED_LIMB_SIZE;
378 f.limbs[len - 1] = (signed_limb_t)cf;
379 g.limbs[len - 1] = (signed_limb_t)cg;
400 Num3072Signed d, e, f,
g;
404 f.limbs[0] = -MAX_PRIME_DIFF;
405 f.limbs[FINAL_LIMB_POSITION] = ((
limb_t)1) << FINAL_LIMB_MODULUS_BITS;
406 g.FromNum3072(*
this);
414 eta = ComputeDivstepMatrix(eta, f.limbs[0],
g.limbs[0],
t);
416 UpdateFG(f,
g,
t, len);
421 if (
g.limbs[0] == 0) {
423 for (
int j = 1; j < len; ++j) {
427 if (cond == 0)
break;
446 Assume((f.limbs[0] & MAX_SIGNED_LIMB) == 1 || (f.limbs[0] & MAX_SIGNED_LIMB) == MAX_SIGNED_LIMB);
450 d.Normalize(f.limbs[len - 1] >> (
LIMB_SIZE - 1));
458 limb_t c0 = 0, c1 = 0, c2 = 0;
462 for (
int j = 0; j <
LIMBS - 1; ++j) {
463 limb_t d0 = 0, d1 = 0, d2 = 0;
464 mul(d0, d1, this->limbs[1 + j], a.
limbs[
LIMBS + j - (1 + j)]);
465 for (
int i = 2 + j; i <
LIMBS; ++i) muladd3(d0, d1, d2, this->limbs[i], a.
limbs[
LIMBS + j - i]);
466 mulnadd3(c0, c1, c2, d0, d1, d2, MAX_PRIME_DIFF);
467 for (
int i = 0; i < j + 1; ++i) muladd3(c0, c1, c2, this->limbs[i], a.
limbs[j - i]);
468 extract3(c0, c1, c2, tmp.
limbs[j]);
473 for (
int i = 0; i <
LIMBS; ++i) muladd3(c0, c1, c2, this->limbs[i], a.
limbs[
LIMBS - 1 - i]);
477 muln2(c0, c1, MAX_PRIME_DIFF);
478 for (
int j = 0; j <
LIMBS; ++j) {
479 addnextract2(c0, c1, tmp.
limbs[j], this->limbs[j]);
483 assert(c0 == 0 || c0 == 1);
496 for (
int i = 1; i <
LIMBS; ++i) this->limbs[i] = 0;
517 for (
int i = 0; i <
LIMBS; ++i) {
518 if (
sizeof(
limb_t) == 4) {
520 }
else if (
sizeof(
limb_t) == 8) {
527 for (
int i = 0; i <
LIMBS; ++i) {
528 if (
sizeof(
limb_t) == 4) {
530 }
else if (
sizeof(
limb_t) == 8) {
549 m_numerator = ToNum3072(in);
554 m_numerator.Divide(m_denominator);
555 m_denominator.SetToOne();
558 m_numerator.ToBytes(
data);
565 m_numerator.Multiply(mul.m_numerator);
566 m_denominator.Multiply(mul.m_denominator);
572 m_numerator.Multiply(div.m_denominator);
573 m_denominator.Multiply(div.m_numerator);
578 m_numerator.Multiply(ToNum3072(in));
583 m_denominator.Multiply(ToNum3072(in));
#define Assume(val)
Assume is the identity function.
ChaCha20 cipher that only operates on multiples of 64 bytes.
static constexpr unsigned BLOCKLEN
Block size (inputs/outputs to Keystream / Crypt should be multiples of this).
A writer stream (for serialization) that computes a 256-bit hash.
A class representing MuHash sets.
MuHash3072() noexcept=default
void Finalize(uint256 &out) noexcept
Num3072 ToNum3072(std::span< const unsigned char > in)
MuHash3072 & operator/=(const MuHash3072 &div) noexcept
MuHash3072 & Insert(std::span< const unsigned char > in) noexcept
MuHash3072 & Remove(std::span< const unsigned char > in) noexcept
MuHash3072 & operator*=(const MuHash3072 &mul) noexcept
Num3072 GetInverse() const
static constexpr int LIMBS
int64_t signed_double_limb_t
static constexpr size_t BYTE_SIZE
bool IsOverflow() const
Indicates whether d is larger than the modulus.
void ToBytes(unsigned char(&out)[BYTE_SIZE])
static constexpr int SIGNED_LIMBS
static constexpr int LIMB_SIZE
void Divide(const Num3072 &a)
static constexpr int SIGNED_LIMB_SIZE
void Multiply(const Num3072 &a)
void WriteLE32(B *ptr, uint32_t x)
uint64_t ReadLE64(const B *ptr)
void WriteLE64(B *ptr, uint64_t x)
uint32_t ReadLE32(const B *ptr)
auto MakeByteSpan(const V &v) noexcept
auto MakeWritableByteSpan(V &&v) noexcept