14 #include "../include/minisketch.h"
19 uint64_t Combination(uint64_t n, uint64_t k) {
20 if (n - k < k)
k = n -
k;
22 for (uint64_t i = 1; i <=
k; ++i) {
30 std::vector<Minisketch> CreateSketches(uint32_t bits,
size_t capacity) {
31 if (!Minisketch::BitsSupported(bits))
return {};
32 std::vector<Minisketch> ret;
33 for (uint32_t impl = 0; impl <= Minisketch::MaxImplementation(); ++impl) {
34 if (Minisketch::ImplementationSupported(bits, impl)) {
35 CHECK(Minisketch::BitsSupported(bits));
36 ret.push_back(Minisketch(bits, impl, capacity));
37 CHECK((
bool)ret.back());
40 CHECK(impl != 0 || !Minisketch::BitsSupported(bits));
48 void TestExhaustive(uint32_t bits,
size_t capacity) {
49 auto sketches = CreateSketches(bits, capacity);
50 if (sketches.empty())
return;
51 auto sketches_rebuild = CreateSketches(bits, capacity);
53 std::vector<unsigned char> serialized;
54 std::vector<unsigned char> serialized_empty;
55 std::vector<uint64_t> counts;
56 std::vector<uint64_t> elements_0;
57 std::vector<uint64_t> elements_other;
58 std::vector<uint64_t> elements_too_small;
60 counts.resize(capacity + 1);
61 serialized.resize(sketches[0].GetSerializedSize());
62 serialized_empty.resize(sketches[0].GetSerializedSize());
65 for (uint64_t x = 0; (x >> (bits * capacity)) == 0; ++x) {
67 for (
size_t i = 0; i < serialized.size(); ++i) {
68 serialized[i] = (x >> (i * 8)) & 0xFF;
72 sketches[0].Deserialize(serialized);
73 elements_0.resize(64);
74 bool decodable_0 = sketches[0].Decode(elements_0);
75 std::sort(elements_0.begin(), elements_0.end());
78 for (
size_t impl = 1; impl < sketches.size(); ++impl) {
79 sketches[impl].Deserialize(serialized);
80 elements_other.resize(64);
81 bool decodable_other = sketches[impl].Decode(elements_other);
82 CHECK(decodable_other == decodable_0);
83 std::sort(elements_other.begin(), elements_other.end());
84 CHECK(elements_other == elements_0);
89 if (!elements_0.empty()) {
91 elements_too_small.resize(elements_0.size() - 1);
92 for (
size_t impl = 0; impl < sketches.size(); ++impl) {
98 for (
size_t impl = 0; impl < sketches.size(); ++impl) {
100 sketches_rebuild[impl].Deserialize(serialized_empty);
102 for (uint64_t elem : elements_0) {
104 CHECK(elem >> bits == 0);
105 sketches_rebuild[impl].Add(elem);
108 auto serialized_rebuild = sketches_rebuild[impl].Serialize();
110 CHECK(serialized == serialized_rebuild);
112 if (impl == 0 && elements_0.size() <= capacity) ++counts[elements_0.size()];
118 uint64_t mask = bits == 64 ? UINT64_MAX : (uint64_t{1} << bits) - 1;
119 for (uint64_t i = 0; i <= capacity && (i & mask) == i; ++i) {
120 CHECK(counts[i] == Combination(mask, i));
125 void TestRandomized(uint32_t bits,
size_t max_capacity,
size_t iter) {
126 std::random_device rnd;
127 std::uniform_int_distribution<uint64_t> capacity_dist(0, std::min<uint64_t>(std::numeric_limits<uint64_t>::max() >> (64 - bits), max_capacity));
128 std::uniform_int_distribution<uint64_t> element_dist(1, std::numeric_limits<uint64_t>::max() >> (64 - bits));
129 std::uniform_int_distribution<uint64_t> rand64(0, std::numeric_limits<uint64_t>::max());
130 std::uniform_int_distribution<int64_t> size_offset_dist(-3, 3);
132 std::vector<uint64_t> decode_0;
133 std::vector<uint64_t> decode_other;
134 std::vector<uint64_t> decode_temp;
135 std::vector<uint64_t> elements;
137 for (
size_t i = 0; i < iter; ++i) {
139 uint64_t capacity = capacity_dist(rnd);
140 auto sketches = CreateSketches(bits, capacity);
142 if (sketches.empty())
return;
143 for (
size_t impl = 0; impl < sketches.size(); ++impl) {
144 CHECK(sketches[impl].GetBits() == bits);
145 CHECK(sketches[impl].GetCapacity() == capacity);
146 CHECK(sketches[impl].GetSerializedSize() == sketches[0].GetSerializedSize());
149 size_t element_count = std::max<int64_t>(0, std::max<int64_t>(0, capacity + size_offset_dist(rnd)));
150 elements.resize(element_count);
152 for (
size_t j = 0; j < element_count; ++j) {
153 uint64_t elem = element_dist(rnd);
156 for (
auto& sketch : sketches) sketch.Add(elem);
159 std::sort(elements.begin(), elements.end());
160 size_t real_element_count = element_count;
161 for (
size_t pos = 0; pos + 1 < elements.size(); ++pos) {
162 if (elements[pos] == elements[pos + 1]) {
163 real_element_count -= 2;
166 elements[pos + 1] = 0;
170 if (real_element_count < element_count) {
172 std::sort(elements.begin(), elements.end(), [](uint64_t a, uint64_t b) { return a != b && (b == 0 || (a != 0 && a < b)); });
173 CHECK(elements[real_element_count] == 0);
174 elements.resize(real_element_count);
177 auto serialized_0 = sketches[0].Serialize();
178 for (
size_t impl = 1; impl < sketches.size(); ++impl) {
179 auto serialized_other = sketches[impl].Serialize();
180 CHECK(serialized_other == serialized_0);
183 for (
size_t impl = 0; impl < sketches.size(); ++impl) {
184 sketches[impl].Deserialize(serialized_0);
185 auto reserialized = sketches[impl].Serialize();
186 CHECK(reserialized == serialized_0);
189 decode_0.resize(capacity);
190 bool decodable_0 = sketches[0].Decode(decode_0);
191 std::sort(decode_0.begin(), decode_0.end());
192 for (
size_t impl = 1; impl < sketches.size(); ++impl) {
193 decode_other.resize(capacity);
194 bool decodable_other = sketches[impl].Decode(decode_other);
195 CHECK(decodable_other == decodable_0);
196 std::sort(decode_other.begin(), decode_other.end());
197 CHECK(decode_other == decode_0);
202 for (
auto& sketch : sketches) {
203 decode_temp.resize(decode_0.size());
204 bool decodable = sketch.Decode(decode_temp);
206 std::sort(decode_temp.begin(), decode_temp.end());
207 CHECK(decode_temp == decode_0);
208 if (!decode_0.empty()) {
209 decode_temp.resize(decode_0.size() - 1);
210 decodable = sketch.Decode(decode_temp);
217 if (real_element_count <= capacity) {
219 CHECK(decode_0 == elements);
224 void TestComputeFunctions() {
225 for (uint32_t bits = 0; bits <= 256; ++bits) {
226 for (uint32_t fpbits = 0; fpbits <= 512; ++fpbits) {
227 std::vector<size_t> table_max_elements(1025);
228 for (
size_t capacity = 0; capacity <= 1024; ++capacity) {
231 if (bits == 0)
CHECK(table_max_elements[capacity] == 0);
233 CHECK(table_max_elements[capacity] <= capacity);
235 if (bits > 0)
CHECK(table_max_elements[capacity] == 0 || capacity - table_max_elements[capacity] <= (fpbits + bits - 1) / bits);
237 if (capacity > 0)
CHECK(table_max_elements[capacity] == 0 || table_max_elements[capacity] > table_max_elements[capacity - 1]);
240 std::vector<size_t> table_capacity(513);
241 for (
size_t max_elements = 0; max_elements <= 512; ++max_elements) {
244 if (bits == 0)
CHECK(table_capacity[max_elements] == 0);
246 if (bits > 0)
CHECK(table_capacity[max_elements] >= max_elements);
248 if (bits > 0)
CHECK(bits * table_capacity[max_elements] >= fpbits);
250 if (bits > 0)
CHECK(table_capacity[max_elements] - max_elements <= (fpbits + bits - 1) / bits);
252 if (max_elements > 0 && fpbits < 256)
CHECK(table_capacity[max_elements] == table_capacity[max_elements - 1] || table_capacity[max_elements] == table_capacity[max_elements - 1] + 1);
254 CHECK(table_capacity[max_elements] <= 1024);
255 CHECK(table_max_elements[table_capacity[max_elements]] == 0 || table_max_elements[table_capacity[max_elements]] >= max_elements);
258 for (
size_t capacity = 0; capacity <= 512; ++capacity) {
260 CHECK(table_max_elements[capacity] <= 512);
261 CHECK(table_max_elements[capacity] == 0 || table_capacity[table_max_elements[capacity]] == capacity);
269 int main(
int argc,
char** argv) {
270 uint64_t test_complexity = 4;
273 std::string arg{argv[1]};
276 long long complexity = std::stoll(arg, &len);
277 if (complexity >= 1 && len == arg.size() && ((uint64_t)complexity <= std::numeric_limits<uint64_t>::max() >> 10)) {
278 test_complexity = complexity;
280 }
catch (
const std::logic_error&) {}
281 if (test_complexity == 0) {
282 fprintf(stderr,
"Invalid complexity specified: '%s'\n", arg.c_str());
287 #ifdef MINISKETCH_VERIFY
288 const char* mode =
" in verify mode";
290 const char* mode =
"";
292 printf(
"Running libminisketch tests%s with complexity=%llu\n", mode, (
unsigned long long)test_complexity);
294 TestComputeFunctions();
296 for (
unsigned j = 2; j <= 64; ++j) {
297 TestRandomized(j, 8, (test_complexity << 10) / j);
298 TestRandomized(j, 128, (test_complexity << 7) / j);
299 TestRandomized(j, 4096, test_complexity / j);
305 for (
int weight = 0; weight <= 40; ++weight) {
306 for (
int bits = 2; weight == 0 ? bits <= 64 : (bits <= 32 && bits <= weight); ++bits) {
307 int capacity = weight / bits;
308 if (capacity * bits != weight)
continue;
309 TestExhaustive(bits, capacity);
311 if (weight >= 16 && test_complexity >> (weight - 16) == 0)
break;
314 printf(
"All tests successful.\n");