Bitcoin Core  27.99.0
P2P Digital Currency
pyminisketch.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 # Copyright (c) 2020 Pieter Wuille
3 # Distributed under the MIT software license, see the accompanying
4 # file LICENSE or http://www.opensource.org/licenses/mit-license.php.
5 
6 """Native Python (slow) reimplementation of libminisketch' algorithms."""
7 
8 import random
9 import unittest
10 
11 # Irreducible polynomials over GF(2) to use (represented as integers).
12 #
13 # Most fields can be defined by multiple such polynomials. Minisketch uses the one with the minimal
14 # number of nonzero coefficients, and tie-breaking by picking the lexicographically first among
15 # those.
16 #
17 # All polynomials for degrees 2 through 64 (inclusive) are given.
18 GF2_MODULI = [
19  None, None,
20  2**2 + 2**1 + 1,
21  2**3 + 2**1 + 1,
22  2**4 + 2**1 + 1,
23  2**5 + 2**2 + 1,
24  2**6 + 2**1 + 1,
25  2**7 + 2**1 + 1,
26  2**8 + 2**4 + 2**3 + 2**1 + 1,
27  2**9 + 2**1 + 1,
28  2**10 + 2**3 + 1,
29  2**11 + 2**2 + 1,
30  2**12 + 2**3 + 1,
31  2**13 + 2**4 + 2**3 + 2**1 + 1,
32  2**14 + 2**5 + 1,
33  2**15 + 2**1 + 1,
34  2**16 + 2**5 + 2**3 + 2**1 + 1,
35  2**17 + 2**3 + 1,
36  2**18 + 2**3 + 1,
37  2**19 + 2**5 + 2**2 + 2**1 + 1,
38  2**20 + 2**3 + 1,
39  2**21 + 2**2 + 1,
40  2**22 + 2**1 + 1,
41  2**23 + 2**5 + 1,
42  2**24 + 2**4 + 2**3 + 2**1 + 1,
43  2**25 + 2**3 + 1,
44  2**26 + 2**4 + 2**3 + 2**1 + 1,
45  2**27 + 2**5 + 2**2 + 2**1 + 1,
46  2**28 + 2**1 + 1,
47  2**29 + 2**2 + 1,
48  2**30 + 2**1 + 1,
49  2**31 + 2**3 + 1,
50  2**32 + 2**7 + 2**3 + 2**2 + 1,
51  2**33 + 2**10 + 1,
52  2**34 + 2**7 + 1,
53  2**35 + 2**2 + 1,
54  2**36 + 2**9 + 1,
55  2**37 + 2**6 + 2**4 + 2**1 + 1,
56  2**38 + 2**6 + 2**5 + 2**1 + 1,
57  2**39 + 2**4 + 1,
58  2**40 + 2**5 + 2**4 + 2**3 + 1,
59  2**41 + 2**3 + 1,
60  2**42 + 2**7 + 1,
61  2**43 + 2**6 + 2**4 + 2**3 + 1,
62  2**44 + 2**5 + 1,
63  2**45 + 2**4 + 2**3 + 2**1 + 1,
64  2**46 + 2**1 + 1,
65  2**47 + 2**5 + 1,
66  2**48 + 2**5 + 2**3 + 2**2 + 1,
67  2**49 + 2**9 + 1,
68  2**50 + 2**4 + 2**3 + 2**2 + 1,
69  2**51 + 2**6 + 2**3 + 2**1 + 1,
70  2**52 + 2**3 + 1,
71  2**53 + 2**6 + 2**2 + 2**1 + 1,
72  2**54 + 2**9 + 1,
73  2**55 + 2**7 + 1,
74  2**56 + 2**7 + 2**4 + 2**2 + 1,
75  2**57 + 2**4 + 1,
76  2**58 + 2**19 + 1,
77  2**59 + 2**7 + 2**4 + 2**2 + 1,
78  2**60 + 2**1 + 1,
79  2**61 + 2**5 + 2**2 + 2**1 + 1,
80  2**62 + 2**29 + 1,
81  2**63 + 2**1 + 1,
82  2**64 + 2**4 + 2**3 + 2**1 + 1
83 ]
84 
85 class GF2Ops:
86  """Class to perform GF(2^field_size) operations on elements represented as integers.
87 
88  Given that elements are represented as integers, addition is simply xor, and not
89  exposed here.
90  """
91 
92  def __init__(self, field_size):
93  """Construct a GF2Ops object for the specified field size."""
94  self.field_sizefield_size = field_size
95  self._modulus_modulus = GF2_MODULI[field_size]
96  assert self._modulus_modulus is not None
97 
98  def mul2(self, x):
99  """Multiply x by 2 in GF(2^field_size)."""
100  x <<= 1
101  if x >> self.field_sizefield_size:
102  x ^= self._modulus_modulus
103  return x
104 
105  def mul(self, x, y):
106  """Multiply x by y in GF(2^field_size)."""
107  ret = 0
108  while y:
109  if y & 1:
110  ret ^= x
111  y >>= 1
112  x = self.mul2mul2(x)
113  return ret
114 
115  def sqr(self, x):
116  """Square x in GF(2^field_size)."""
117  return self.mulmul(x, x)
118 
119  def inv(self, x):
120  """Compute the inverse of x in GF(2^field_size)."""
121  assert x != 0
122  # Use the extended polynomial Euclidean GCD algorithm on (modulus, x), over GF(2).
123  # See https://en.wikipedia.org/wiki/Polynomial_greatest_common_divisor.
124  t1, t2 = 0, 1
125  r1, r2 = self._modulus_modulus, x
126  r1l, r2l = self.field_sizefield_size + 1, r2.bit_length()
127  while r2:
128  q = r1l - r2l
129  r1 ^= r2 << q
130  t1 ^= t2 << q
131  r1l = r1.bit_length()
132  if r1 < r2:
133  t1, t2 = t2, t1
134  r1, r2 = r2, r1
135  r1l, r2l = r2l, r1l
136  assert r1 == 1
137  return t1
138 
139 class TestGF2Ops(unittest.TestCase):
140  """Test class for basic arithmetic properties of GF2Ops."""
141 
142  def field_size_test(self, field_size):
143  """Test operations for given field_size."""
144 
145  gf = GF2Ops(field_size)
146  for i in range(100):
147  x = random.randrange(1 << field_size)
148  y = random.randrange(1 << field_size)
149  x2 = gf.mul2(x)
150  xy = gf.mul(x, y)
151  self.assertEqual(x2, gf.mul(x, 2)) # mul2(x) == x*2
152  self.assertEqual(x2, gf.mul(2, x)) # mul2(x) == 2*x
153  self.assertEqual(xy == 0, x == 0 or y == 0)
154  self.assertEqual(xy == x, y == 1 or x == 0)
155  self.assertEqual(xy == y, x == 1 or y == 0)
156  self.assertEqual(xy, gf.mul(y, x)) # x*y == y*x
157  if i < 10:
158  xp = x
159  for _ in range(field_size):
160  xp = gf.sqr(xp)
161  self.assertEqual(xp, x) # x^(2^field_size) == x
162  if y != 0:
163  yi = gf.inv(y)
164  self.assertEqual(y == yi, y == 1) # y==1/x iff y==1
165  self.assertEqual(gf.mul(y, yi), 1) # y*(1/y) == 1
166  yii = gf.inv(yi)
167  self.assertEqual(y, yii) # 1/(1/y) == y
168  if x != 0:
169  xi = gf.inv(x)
170  xyi = gf.inv(xy)
171  self.assertEqual(xyi, gf.mul(xi, yi)) # (1/x)*(1/y) == 1/(x*y)
172 
173  def test(self):
174  """Run tests."""
175  for field_size in range(2, 65):
176  self.field_size_testfield_size_test(field_size)
177 
178 # The operations below operate on polynomials over GF(2^field_size), represented as lists of
179 # integers:
180 #
181 # [a, b, c, ...] = a + b*x + c*x^2 + ...
182 #
183 # As an invariant, there are never any trailing zeroes in the list representation.
184 #
185 # Examples:
186 # * [] = 0
187 # * [3] = 3
188 # * [0, 1] = x
189 # * [2, 0, 5] = 5*x^2 + 2
190 
191 def poly_monic(poly, gf):
192  """Return a monic version of the polynomial poly."""
193  # Multiply every coefficient with the inverse of the top coefficient.
194  inv = gf.inv(poly[-1])
195  return [gf.mul(inv, v) for v in poly]
196 
197 def poly_divmod(poly, mod, gf):
198  """Return the polynomial (quotient, remainder) of poly divided by mod."""
199  assert len(mod) > 0 and mod[-1] == 1 # Require monic mod.
200  if len(poly) < len(mod):
201  return ([], poly)
202  val = list(poly)
203  div = [0 for _ in range(len(val) - len(mod) + 1)]
204  while len(val) >= len(mod):
205  term = val[-1]
206  div[len(val) - len(mod)] = term
207  # If the highest coefficient in val is nonzero, subtract a multiple of mod from it.
208  val.pop()
209  if term != 0:
210  for x in range(len(mod) - 1):
211  val[1 + x - len(mod)] ^= gf.mul(term, mod[x])
212  # Prune trailing zero coefficients.
213  while len(val) > 0 and val[-1] == 0:
214  val.pop()
215  return div, val
216 
217 def poly_gcd(a, b, gf):
218  """Return the polynomial GCD of a and b."""
219  if len(a) < len(b):
220  a, b = b, a
221  # Use Euclid's algorithm to find the GCD of a and b.
222  # see https://en.wikipedia.org/wiki/Polynomial_greatest_common_divisor#Euclid's_algorithm.
223  while len(b) > 0:
224  b = poly_monic(b, gf)
225  (_, b), a = poly_divmod(a, b, gf), b
226  return a
227 
228 def poly_sqr(poly, gf):
229  """Return the square of polynomial poly."""
230  if len(poly) == 0:
231  return []
232  # In characteristic-2 fields, thanks to Frobenius' endomorphism ((a + b)^2 = a^2 + b^2),
233  # squaring a polynomial is easy: square all the coefficients and interleave with zeroes.
234  # E.g., (3 + 5*x + 17*x^2)^2 = 3^2 + (5*x)^2 + (17*x^2)^2.
235  # See https://en.wikipedia.org/wiki/Frobenius_endomorphism.
236  return [0 if i & 1 else gf.sqr(poly[i // 2]) for i in range(2 * len(poly) - 1)]
237 
238 def poly_tracemod(poly, param, gf):
239  """Compute y + y^2 + y^4 + ... + y^(2^(field_size-1)) mod poly, where y = param*x."""
240  out = [0, param]
241  for _ in range(gf.field_size - 1):
242  # In each loop iteration i, we start with out = y + y^2 + ... + y^(2^i). By squaring that we
243  # transform it into out = y^2 + y^4 + ... + y^(2^(i+1)).
244  out = poly_sqr(out, gf)
245  # Thus, we just need to add y again to it to get out = y + ... + y^(2^(i+1)).
246  while len(out) < 2:
247  out.append(0)
248  out[1] = param
249  # Finally take a modulus to keep the intermediary polynomials small.
250  _, out = poly_divmod(out, poly, gf)
251  return out
252 
253 def poly_frobeniusmod(poly, gf):
254  """Compute x^(2^field_size) mod poly."""
255  out = [0, 1]
256  for _ in range(gf.field_size):
257  _, out = poly_divmod(poly_sqr(out, gf), poly, gf)
258  return out
259 
260 def poly_find_roots(poly, gf):
261  """Find the roots of poly if fully factorizable with unique roots, [] otherwise."""
262  assert len(poly) > 0
263  # If the polynomial is constant (and nonzero), it has no roots.
264  if len(poly) == 1:
265  return []
266  # Make the polynomial monic (which doesn't change its roots).
267  poly = poly_monic(poly, gf)
268  # If the polynomial is of the form x+a, return a.
269  if len(poly) == 2:
270  return [poly[0]]
271  # Otherwise, first test that poly can be completely factored into unique roots. The polynomial
272  # x^(2^fieldsize)-x has every field element once as root. Thus we want to know that that is a
273  # multiple of poly. Compute x^(field_size) mod poly, which needs to equal x if that is the case
274  # (unless poly has degree <= 1, but that case is handled above).
275  if poly_frobeniusmod(poly, gf) != [0, 1]:
276  return []
277 
278  def rec_split(poly, randv):
279  """Recursively split poly using the Berlekamp trace algorithm."""
280  # See https://hal.archives-ouvertes.fr/hal-00626997/document.
281  assert len(poly) > 1 and poly[-1] == 1 # Require a monic poly.
282  # If poly is of the form x+a, its root is a.
283  if len(poly) == 2:
284  return [poly[0]]
285  # Try consecutive randomization factors randv, until one is found that factors poly.
286  while True:
287  # Compute the trace of (randv*x) mod poly. This is a polynomial that maps half of the
288  # domain to 0, and the other half to 1. Which half that is is controlled by randv.
289  # By taking it modulo poly, we only add a multiple of poly. Thus the result has at least
290  # the shared roots of the trace polynomial and poly still, but may have others.
291  trace = poly_tracemod(poly, randv, gf)
292  # Using the set {2^i*a for i=0..fieldsize-1} gives optimally independent randv values
293  # (no more than fieldsize are ever needed).
294  randv = gf.mul2(randv)
295  # Now take the GCD of this trace polynomial with poly. The result is a polynomial
296  # that only has the shared roots of the trace polynomial and poly as roots.
297  gcd = poly_gcd(trace, poly, gf)
298  # If the result has a degree higher than 1, and lower than that of poly, we found a
299  # useful factorization.
300  if len(gcd) != len(poly) and len(gcd) > 1:
301  break
302  # Otherwise, continue with another randv.
303  # Find the actual factors: the monic version of the GCD above, and poly divided by it.
304  factor1 = poly_monic(gcd, gf)
305  factor2, _ = poly_divmod(poly, gcd, gf)
306  # Recurse.
307  return rec_split(factor1, randv) + rec_split(factor2, randv)
308 
309  # Invoke the recursive splitting with a random initial factor, and sort the results.
310  return sorted(rec_split(poly, random.randrange(1, 1 << gf.field_size)))
311 
312 class TestPolyFindRoots(unittest.TestCase):
313  """Test class for poly_find_roots."""
314 
315  def field_size_test(self, field_size):
316  """Run tests for given field_size."""
317  gf = GF2Ops(field_size)
318  for test_size in [0, 1, 2, 3, 10]:
319  roots = [random.randrange(1 << field_size) for _ in range(test_size)]
320  roots_set = set(roots)
321  # Construct a polynomial with all elements of roots as roots (with multiplicity).
322  poly = [1]
323  for root in roots:
324  new_poly = [0] + poly
325  for n, c in enumerate(poly):
326  new_poly[n] ^= gf.mul(c, root)
327  poly = new_poly
328  # Invoke the root finding algorithm.
329  found_roots = poly_find_roots(poly, gf)
330  # The result must match the input, unless any roots were repeated.
331  if len(roots) == len(roots_set):
332  self.assertEqual(found_roots, sorted(roots))
333  else:
334  self.assertEqual(found_roots, [])
335 
336  def test(self):
337  """Run tests."""
338  for field_size in range(2, 65):
339  self.field_size_testfield_size_test(field_size)
340 
341 def berlekamp_massey(syndromes, gf):
342  """Implement the Berlekamp-Massey algorithm.
343 
344  Takes as input a sequence of GF(2^field_size) elements, and returns the shortest LSFR
345  that generates it, represented as a polynomial.
346  """
347  # See https://en.wikipedia.org/wiki/Berlekamp%E2%80%93Massey_algorithm.
348  current = [1]
349  prev = [1]
350  b_inv = 1
351  for n, discrepancy in enumerate(syndromes):
352  # Compute discrepancy
353  for i in range(1, len(current)):
354  discrepancy ^= gf.mul(syndromes[n - i], current[i])
355 
356  # Correct if discrepancy is nonzero.
357  if discrepancy:
358  x = n + 1 - (len(current) - 1) - (len(prev) - 1)
359  if 2 * (len(current) - 1) <= n:
360  tmp = list(current)
361  current.extend(0 for _ in range(len(prev) + x - len(current)))
362  mul = gf.mul(discrepancy, b_inv)
363  for i, v in enumerate(prev):
364  current[i + x] ^= gf.mul(mul, v)
365  prev = tmp
366  b_inv = gf.inv(discrepancy)
367  else:
368  mul = gf.mul(discrepancy, b_inv)
369  for i, v in enumerate(prev):
370  current[i + x] ^= gf.mul(mul, v)
371  return current
372 
374  """A Minisketch sketch.
375 
376  This represents a sketch of a certain capacity, with elements of a certain bit size.
377  """
378 
379  def __init__(self, field_size, capacity):
380  """Initialize an empty sketch with the specified field_size size and capacity."""
381  self.field_sizefield_size = field_size
382  self.capacitycapacity = capacity
383  self.odd_syndromesodd_syndromes = [0] * capacity
384  self.gfgf = GF2Ops(field_size)
385 
386  def add(self, element):
387  """Add an element to this sketch. 1 <= element < 2**field_size."""
388  sqr = self.gfgf.sqr(element)
389  for pos in range(self.capacitycapacity):
390  self.odd_syndromesodd_syndromes[pos] ^= element
391  element = self.gfgf.mul(sqr, element)
392 
393  def serialized_size(self):
394  """Compute how many bytes a serialization of this sketch will be in size."""
395  return (self.capacitycapacity * self.field_sizefield_size + 7) // 8
396 
397  def serialize(self):
398  """Serialize this sketch to bytes."""
399  val = 0
400  for i in range(self.capacitycapacity):
401  val |= self.odd_syndromesodd_syndromes[i] << (self.field_sizefield_size * i)
402  return val.to_bytes(self.serialized_sizeserialized_size(), 'little')
403 
404  def deserialize(self, byte_data):
405  """Deserialize a byte array into this sketch, overwriting its contents."""
406  assert len(byte_data) == self.serialized_sizeserialized_size()
407  val = int.from_bytes(byte_data, 'little')
408  for i in range(self.capacitycapacity):
409  self.odd_syndromesodd_syndromes[i] = (val >> (self.field_sizefield_size * i)) & ((1 << self.field_sizefield_size) - 1)
410 
411  def clone(self):
412  """Return a clone of this sketch."""
413  ret = Minisketch(self.field_sizefield_size, self.capacitycapacity)
414  ret.odd_syndromes = list(self.odd_syndromesodd_syndromes)
415  ret.gf = self.gfgf
416  return ret
417 
418  def merge(self, other):
419  """Merge a sketch with another sketch. Corresponds to XOR'ing their serializations."""
420  assert self.capacitycapacity == other.capacity
421  assert self.field_sizefield_size == other.field_size
422  for i in range(self.capacitycapacity):
423  self.odd_syndromesodd_syndromes[i] ^= other.odd_syndromes[i]
424 
425  def decode(self, max_count=None):
426  """Decode the contents of this sketch.
427 
428  Returns either a list of elements or None if undecodable.
429  """
430  # We know the odd syndromes s1=x+y+..., s3=x^3+y^3+..., s5=..., and reconstruct the even
431  # syndromes from this:
432  # * s2 = x^2+y^2+.... = (x+y+...)^2 = s1^2
433  # * s4 = x^4+y^4+.... = (x^2+y^2+...)^2 = s2^2
434  # * s6 = x^6+y^6+.... = (x^3+y^3+...)^2 = s3^2
435  all_syndromes = [0 for _ in range(2 * len(self.odd_syndromesodd_syndromes))]
436  for i in range(len(self.odd_syndromesodd_syndromes)):
437  all_syndromes[i * 2] = self.odd_syndromesodd_syndromes[i]
438  all_syndromes[i * 2 + 1] = self.gfgf.sqr(all_syndromes[i])
439  # Given the syndromes, find the polynomial that generates them.
440  poly = berlekamp_massey(all_syndromes, self.gfgf)
441  # Deal with failure and trivial cases.
442  if len(poly) == 0:
443  return None
444  if len(poly) == 1:
445  return []
446  if max_count is not None and len(poly) > 1 + max_count:
447  return None
448  # If the polynomial can be factored into (1-m1*x)*(1-m2*x)*...*(1-mn*x), then {m1,m2,...,mn}
449  # is our set. As each factor (1-m*x) has 1/m as root, we're really just looking for the
450  # inverses of the roots. We find these by reversing the order of the coefficients, and
451  # finding the roots.
452  roots = poly_find_roots(list(reversed(poly)), self.gfgf)
453  if len(roots) == 0:
454  return None
455  return roots
456 
457 class TestMinisketch(unittest.TestCase):
458  """Test class for Minisketch."""
459 
460  @classmethod
461  def construct_data(cls, field_size, num_a_only, num_b_only, num_both):
462  """Construct two random lists of elements in [1..2**field_size-1].
463 
464  Each list will have unique elements that don't appear in the other (num_a_only in the first
465  and num_b_only in the second), and num_both elements will appear in both."""
466  sample = []
467  # Simulate random.sample here (which doesn't work with ranges over 2**63).
468  for _ in range(num_a_only + num_b_only + num_both):
469  while True:
470  r = random.randrange(1, 1 << field_size)
471  if r not in sample:
472  sample.append(r)
473  break
474  full_a = sample[:num_a_only + num_both]
475  full_b = sample[num_a_only:]
476  random.shuffle(full_a)
477  random.shuffle(full_b)
478  return full_a, full_b
479 
480  def field_size_capacity_test(self, field_size, capacity):
481  """Test Minisketch methods for a specific field and capacity."""
482  used_capacity = random.randrange(capacity + 1)
483  num_a = random.randrange(used_capacity + 1)
484  num_both = random.randrange(min(2 * capacity, (1 << field_size) - 1 - used_capacity) + 1)
485  full_a, full_b = self.construct_dataconstruct_data(field_size, num_a, used_capacity - num_a, num_both)
486  sketch_a = Minisketch(field_size, capacity)
487  sketch_b = Minisketch(field_size, capacity)
488  for v in full_a:
489  sketch_a.add(v)
490  for v in full_b:
491  sketch_b.add(v)
492  sketch_combined = sketch_a.clone()
493  sketch_b_ser = sketch_b.serialize()
494  sketch_b_received = Minisketch(field_size, capacity)
495  sketch_b_received.deserialize(sketch_b_ser)
496  sketch_combined.merge(sketch_b_received)
497  decode = sketch_combined.decode()
498  self.assertEqual(decode, sorted(set(full_a) ^ set(full_b)))
499 
500  def test(self):
501  """Run tests."""
502  for field_size in range(2, 65):
503  for capacity in [0, 1, 2, 5, 10, field_size]:
504  self.field_size_capacity_testfield_size_capacity_test(field_size, min(capacity, (1 << field_size) - 1))
505 
506 if __name__ == '__main__':
507  unittest.main()
def sqr(self, x)
def mul2(self, x)
Definition: pyminisketch.py:98
def inv(self, x)
def mul(self, x, y)
def __init__(self, field_size)
Definition: pyminisketch.py:92
def __init__(self, field_size, capacity)
def deserialize(self, byte_data)
def merge(self, other)
def add(self, element)
def decode(self, max_count=None)
def field_size_test(self, field_size)
def construct_data(cls, field_size, num_a_only, num_b_only, num_both)
def field_size_capacity_test(self, field_size, capacity)
def field_size_test(self, field_size)
def poly_divmod(poly, mod, gf)
def poly_monic(poly, gf)
def poly_sqr(poly, gf)
def poly_tracemod(poly, param, gf)
def poly_frobeniusmod(poly, gf)
def poly_find_roots(poly, gf)
def poly_gcd(a, b, gf)
def berlekamp_massey(syndromes, gf)