Bitcoin Core 28.99.0
P2P Digital Currency
clmul_common_impl.h
Go to the documentation of this file.
1/**********************************************************************
2 * Copyright (c) 2018 Pieter Wuille, Greg Maxwell, Gleb Naumenko *
3 * Distributed under the MIT software license, see the accompanying *
4 * file LICENSE or http://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
6
7#ifndef _MINISKETCH_FIELDS_CLMUL_COMMON_IMPL_H_
8#define _MINISKETCH_FIELDS_CLMUL_COMMON_IMPL_H_ 1
9
10#include <stdint.h>
11#include <immintrin.h>
12
13#include "../int_utils.h"
14#include "../lintrans.h"
15
16namespace {
17
18// The memory sanitizer in clang < 11 cannot reason through _mm_clmulepi64_si128 calls.
19// Disable memory sanitization in the functions using them for those compilers.
20#if defined(__clang__) && (__clang_major__ < 11)
21# if defined(__has_feature)
22# if __has_feature(memory_sanitizer)
23# define NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
24# endif
25# endif
26#endif
27#ifndef NO_SANITIZE_MEMORY
28# define NO_SANITIZE_MEMORY
29#endif
30
31template<typename I, int BITS, I MOD> NO_SANITIZE_MEMORY I MulWithClMulReduce(I a, I b)
32{
33 static constexpr I MASK = Mask<BITS, I>();
34
35 const __m128i MOD128 = _mm_cvtsi64_si128(MOD);
36 __m128i product = _mm_clmulepi64_si128(_mm_cvtsi64_si128((uint64_t)a), _mm_cvtsi64_si128((uint64_t)b), 0x00);
37 if (BITS <= 32) {
38 __m128i high1 = _mm_srli_epi64(product, BITS);
39 __m128i red1 = _mm_clmulepi64_si128(high1, MOD128, 0x00);
40 __m128i high2 = _mm_srli_epi64(red1, BITS);
41 __m128i red2 = _mm_clmulepi64_si128(high2, MOD128, 0x00);
42 return _mm_cvtsi128_si64(_mm_xor_si128(_mm_xor_si128(product, red1), red2)) & MASK;
43 } else if (BITS == 64) {
44 __m128i red1 = _mm_clmulepi64_si128(product, MOD128, 0x01);
45 __m128i red2 = _mm_clmulepi64_si128(red1, MOD128, 0x01);
46 return _mm_cvtsi128_si64(_mm_xor_si128(_mm_xor_si128(product, red1), red2));
47 } else if ((BITS % 8) == 0) {
48 __m128i high1 = _mm_srli_si128(product, BITS / 8);
49 __m128i red1 = _mm_clmulepi64_si128(high1, MOD128, 0x00);
50 __m128i high2 = _mm_srli_si128(red1, BITS / 8);
51 __m128i red2 = _mm_clmulepi64_si128(high2, MOD128, 0x00);
52 return _mm_cvtsi128_si64(_mm_xor_si128(_mm_xor_si128(product, red1), red2)) & MASK;
53 } else {
54 __m128i high1 = _mm_or_si128(_mm_srli_epi64(product, BITS), _mm_srli_si128(_mm_slli_epi64(product, 64 - BITS), 8));
55 __m128i red1 = _mm_clmulepi64_si128(high1, MOD128, 0x00);
56 if ((uint64_t(MOD) >> (66 - BITS)) == 0) {
57 __m128i high2 = _mm_srli_epi64(red1, BITS);
58 __m128i red2 = _mm_clmulepi64_si128(high2, MOD128, 0x00);
59 return _mm_cvtsi128_si64(_mm_xor_si128(_mm_xor_si128(product, red1), red2)) & MASK;
60 } else {
61 __m128i high2 = _mm_or_si128(_mm_srli_epi64(red1, BITS), _mm_srli_si128(_mm_slli_epi64(red1, 64 - BITS), 8));
62 __m128i red2 = _mm_clmulepi64_si128(high2, MOD128, 0x00);
63 return _mm_cvtsi128_si64(_mm_xor_si128(_mm_xor_si128(product, red1), red2)) & MASK;
64 }
65 }
66}
67
68template<typename I, int BITS, int POS> NO_SANITIZE_MEMORY I MulTrinomial(I a, I b)
69{
70 static constexpr I MASK = Mask<BITS, I>();
71
72 __m128i product = _mm_clmulepi64_si128(_mm_cvtsi64_si128((uint64_t)a), _mm_cvtsi64_si128((uint64_t)b), 0x00);
73 if (BITS <= 32) {
74 __m128i high1 = _mm_srli_epi64(product, BITS);
75 __m128i red1 = _mm_xor_si128(high1, _mm_slli_epi64(high1, POS));
76 if (POS == 1) {
77 return _mm_cvtsi128_si64(_mm_xor_si128(product, red1)) & MASK;
78 } else {
79 __m128i high2 = _mm_srli_epi64(red1, BITS);
80 __m128i red2 = _mm_xor_si128(high2, _mm_slli_epi64(high2, POS));
81 return _mm_cvtsi128_si64(_mm_xor_si128(_mm_xor_si128(product, red1), red2)) & MASK;
82 }
83 } else {
84 __m128i high1 = _mm_or_si128(_mm_srli_epi64(product, BITS), _mm_srli_si128(_mm_slli_epi64(product, 64 - BITS), 8));
85 if (BITS + POS <= 66) {
86 __m128i red1 = _mm_xor_si128(high1, _mm_slli_epi64(high1, POS));
87 if (POS == 1) {
88 return _mm_cvtsi128_si64(_mm_xor_si128(product, red1)) & MASK;
89 } else if (BITS + POS <= 66) {
90 __m128i high2 = _mm_srli_epi64(red1, BITS);
91 __m128i red2 = _mm_xor_si128(high2, _mm_slli_epi64(high2, POS));
92 return _mm_cvtsi128_si64(_mm_xor_si128(_mm_xor_si128(product, red1), red2)) & MASK;
93 }
94 } else {
95 const __m128i MOD128 = _mm_cvtsi64_si128(1 + (((uint64_t)1) << POS));
96 __m128i red1 = _mm_clmulepi64_si128(high1, MOD128, 0x00);
97 __m128i high2 = _mm_or_si128(_mm_srli_epi64(red1, BITS), _mm_srli_si128(_mm_slli_epi64(red1, 64 - BITS), 8));
98 __m128i red2 = _mm_xor_si128(high2, _mm_slli_epi64(high2, POS));
99 return _mm_cvtsi128_si64(_mm_xor_si128(_mm_xor_si128(product, red1), red2)) & MASK;
100 }
101 }
102}
103
105template<typename I, int B, I MOD, I (*MUL)(I, I), typename F, const F* SQR, const F* SQR2, const F* SQR4, const F* SQR8, const F* SQR16, const F* QRT, typename T, const T* LOAD, const T* SAVE> struct GenField
106{
107 typedef BitsInt<I, B> O;
108 typedef LFSR<O, MOD> L;
109
110 static inline constexpr I Sqr1(I a) { return SQR->template Map<O>(a); }
111 static inline constexpr I Sqr2(I a) { return SQR2->template Map<O>(a); }
112 static inline constexpr I Sqr4(I a) { return SQR4->template Map<O>(a); }
113 static inline constexpr I Sqr8(I a) { return SQR8->template Map<O>(a); }
114 static inline constexpr I Sqr16(I a) { return SQR16->template Map<O>(a); }
115
116public:
117 typedef I Elem;
118
119 inline constexpr int Bits() const { return B; }
120
121 inline constexpr Elem Mul2(Elem val) const { return L::Call(val); }
122
123 inline Elem Mul(Elem a, Elem b) const { return MUL(a, b); }
124
125 class Multiplier
126 {
127 Elem m_val;
128 public:
129 inline constexpr explicit Multiplier(const GenField&, Elem a) : m_val(a) {}
130 constexpr Elem operator()(Elem a) const { return MUL(m_val, a); }
131 };
132
134 inline constexpr Elem Sqr(Elem val) const { return SQR->template Map<O>(val); }
135
137 inline constexpr Elem Qrt(Elem val) const { return QRT->template Map<O>(val); }
138
140 inline Elem Inv(Elem val) const { return InvLadder<I, O, B, MUL, Sqr1, Sqr2, Sqr4, Sqr8, Sqr16>(val); }
141
143 Elem FromSeed(uint64_t seed) const {
144 uint64_t k0 = 0x434c4d554c466c64ull; // "CLMULFld"
145 uint64_t k1 = seed;
146 uint64_t count = ((uint64_t)B) << 32;
147 I ret;
148 do {
149 ret = O::Mask(I(SipHash(k0, k1, count++)));
150 } while(ret == 0);
151 return LOAD->template Map<O>(ret);
152 }
153
154 Elem Deserialize(BitReader& in) const { return LOAD->template Map<O>(in.Read<B, I>()); }
155
156 void Serialize(BitWriter& out, Elem val) const { out.Write<B, I>(SAVE->template Map<O>(val)); }
157
158 constexpr Elem FromUint64(uint64_t x) const { return LOAD->template Map<O>(O::Mask(I(x))); }
159 constexpr uint64_t ToUint64(Elem val) const { return uint64_t(SAVE->template Map<O>(val)); }
160};
161
162template<typename I, int B, I MOD, typename F, const F* SQR, const F* SQR2, const F* SQR4, const F* SQR8, const F* SQR16, const F* QRT, typename T, const T* LOAD, const T* SAVE>
163using Field = GenField<I, B, MOD, MulWithClMulReduce<I, B, MOD>, F, SQR, SQR2, SQR4, SQR8, SQR16, QRT, T, LOAD, SAVE>;
164
165template<typename I, int B, int POS, typename F, const F* SQR, const F* SQR2, const F* SQR4, const F* SQR8, const F* SQR16, const F* QRT, typename T, const T* LOAD, const T* SAVE>
166using FieldTri = GenField<I, B, I(1) + (I(1) << POS), MulTrinomial<I, B, POS>, F, SQR, SQR2, SQR4, SQR8, SQR16, QRT, T, LOAD, SAVE>;
167
168}
169
170#endif
int ret
I Read()
Definition: int_utils.h:113
static constexpr I Mask(I val)
Definition: int_utils.h:198
#define NO_SANITIZE_MEMORY
#define T(expected, seed, data)
uint64_t SipHash(uint64_t k0, uint64_t k1, uint64_t data)
Definition: int_utils.h:35
void Serialize(Stream &, V)=delete
void Sqr(std::vector< typename F::Elem > &poly, const F &field)
Square a polynomial.
Definition: sketch_impl.h:92
Class which implements a stateless LFSR for generic moduli.
Definition: int_utils.h:230
static constexpr I Call(const I &a)
Shift a value a up once, treating it as an N-bit LFSR, with pattern MOD.
Definition: int_utils.h:233
static int count
#define B
Definition: util_tests.cpp:545