Bitcoin Core 31.99.0
P2P Digital Currency
asmap.cpp
Go to the documentation of this file.
1// Copyright (c) 2019-present The Bitcoin Core developers
2// Distributed under the MIT software license, see the accompanying
3// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5#include <util/asmap.h>
6
7#include <hash.h>
8#include <streams.h>
9#include <uint256.h>
10#include <util/check.h>
11#include <util/fs.h>
12#include <util/log.h>
13
14#include <bit>
15#include <cstddef>
16#include <cstdio>
17#include <span>
18#include <string>
19#include <utility>
20#include <vector>
21
22/*
23 * ASMap (Autonomous System Map) Implementation
24 *
25 * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs).
26 * Uses a binary trie structure encoded as bytecode instructions that are interpreted
27 * at runtime to find the ASN for a given IP address.
28 *
29 * The format of the asmap data is a bit-packed binary format where the entire mapping
30 * is treated as a continuous sequence of bits. Instructions and their arguments are
31 * encoded using variable numbers of bits and concatenated together without regard for
32 * byte boundaries. The bits are stored in bytes using little-endian bit ordering.
33 *
34 * The data structure internally represents the mapping as a binary trie where:
35 * - Unassigned subnets (no ASN mapping present) map to 0
36 * - Subnets mapped entirely to one ASN become leaf nodes
37 * - Subnets whose lower and upper halves have different mappings branch into subtrees
38 *
39 * The encoding uses variable-length integers and four instruction types (RETURN, JUMP,
40 * MATCH, DEFAULT) to efficiently represent the trie.
41 */
42
43namespace {
44
45// Indicates decoding errors or invalid data
46constexpr uint32_t INVALID = 0xFFFFFFFF;
47
52inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept
53{
54 const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1;
55 ++bitpos;
56 return bit;
57}
58
63inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept
64{
65 const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1;
66 ++bitpos;
67 return bit;
68}
69
85uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes)
86{
87 uint32_t val = minval; // Start with minimum encodable value
88 bool bit;
89 for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
90 // Read continuation bit to determine if we're in this class
91 if (bit_sizes_it + 1 != bit_sizes.end()) { // Unless we're in the last class
92 if (bitpos >= data.size() * 8) break;
93 bit = ConsumeBitLE(bitpos, data);
94 } else {
95 bit = 0; // Last class has no continuation bit
96 }
97 if (bit) {
98 // If the value will not fit in this class, subtract its range from val,
99 // emit a "1" bit and continue with the next class
100 val += (1 << *bit_sizes_it); // Add size of this class
101 } else {
102 // Decode the position within this class in big endian
103 for (int b = 0; b < *bit_sizes_it; b++) {
104 if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa
105 bit = ConsumeBitLE(bitpos, data);
106 val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class
107 }
108 return val;
109 }
110 }
111 return INVALID; // Reached EOF in exponent
112}
113
121enum class Instruction : uint32_t
122{
123 // A return instruction, encoded as [0], returns a constant ASN.
124 // It is followed by an integer using the ASN encoding.
125 RETURN = 0,
126 // A jump instruction, encoded as [1,0], inspects the next unused bit in the input
127 // and either continues execution (if 0), or skips a specified number of bits (if 1).
128 // It is followed by an integer using jump encoding.
129 JUMP = 1,
130 // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits
131 // in the input. If they all match, execution continues. If not, the default ASN is returned
132 // (or 0 if unset). The match value encodes both the pattern and its length.
133 MATCH = 2,
134 // A default instruction, encoded as [1,1,1], sets the default variable to its argument,
135 // and continues execution. It is followed by an integer in ASN encoding.
136 DEFAULT = 3,
137};
138
139// Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1]
140constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1};
141Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data)
142{
143 return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES));
144}
145
146// ASN encoding: Can encode ASNs from 1 to ~16.7 million.
147// Uses variable-length encoding optimized for real-world ASN distribution.
148// ASN 0 is reserved and used if there isn't a match.
149constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
150uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data)
151{
152 return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES);
153}
154
155// MATCH argument: Values in [2, 511]. The highest set bit determines the match length
156// n ∈ [1,8]; the lower n-1 bits are the pattern to compare.
157constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8};
158uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data)
159{
160 return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES);
161}
162
163// JUMP offset: Minimum value 17. Variable-length coded and may be large
164// for skipping big subtrees.
165constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
166uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data)
167{
168 return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES);
169}
170
171} // anonymous namespace
172
180uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip)
181{
182 size_t pos{0};
183 const size_t endpos{asmap.size() * 8};
184 uint8_t ip_bit{0};
185 const uint8_t ip_bits_end = ip.size() * 8;
186 uint32_t default_asn = 0;
187 while (pos < endpos) {
188 Instruction opcode = DecodeType(pos, asmap);
189 if (opcode == Instruction::RETURN) {
190 // Found leaf node - return the ASN
191 uint32_t asn = DecodeASN(pos, asmap);
192 if (asn == INVALID) break; // ASN straddles EOF
193 return asn;
194 } else if (opcode == Instruction::JUMP) {
195 // Binary branch: if IP bit is 1, jump forward; else continue
196 uint32_t jump = DecodeJump(pos, asmap);
197 if (jump == INVALID) break; // Jump offset straddles EOF
198 if (ip_bit == ip_bits_end) break; // No input bits left
199 if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF
200 if (ConsumeBitBE(ip_bit, ip)) { // Check next IP bit (big-endian)
201 pos += jump; // Bit = 1: skip to right subtree
202 }
203 // Bit = 0: fall through to left subtree
204 } else if (opcode == Instruction::MATCH) {
205 // Compare multiple IP bits against a pattern
206 // The match value encodes both length and pattern:
207 // - highest set bit position determines length (bit_width - 1)
208 // - lower bits contain the pattern to compare
209 uint32_t match = DecodeMatch(pos, asmap);
210 if (match == INVALID) break; // Match bits straddle EOF
211 int matchlen = std::bit_width(match) - 1; // An n-bit value matches n-1 input bits
212 if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits
213 for (int bit = 0; bit < matchlen; bit++) {
214 if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) {
215 return default_asn; // Pattern mismatch - use default
216 }
217 }
218 // Pattern matched - continue execution
219 } else if (opcode == Instruction::DEFAULT) {
220 // Update the default ASN for subsequent MATCH failures
221 default_asn = DecodeASN(pos, asmap);
222 if (default_asn == INVALID) break; // ASN straddles EOF
223 } else {
224 break; // Instruction straddles EOF
225 }
226 }
227 // Reached EOF without RETURN, or aborted (see any of the breaks above)
228 // - should have been caught by SanityCheckAsmap below
229 assert(false);
230 return 0; // 0 is not a valid ASN
231}
232
237bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits)
238{
239 size_t pos{0};
240 const size_t endpos{asmap.size() * 8};
241 std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left)
242 jumps.reserve(bits);
243 Instruction prevopcode = Instruction::JUMP;
244 bool had_incomplete_match = false; // Track <8 bit matches for efficiency check
245
246 while (pos != endpos) {
247 // There was a jump into the middle of the previous instruction
248 if (!jumps.empty() && pos >= jumps.back().first) return false;
249
250 Instruction opcode = DecodeType(pos, asmap);
251 if (opcode == Instruction::RETURN) {
252 // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
253 if (prevopcode == Instruction::DEFAULT) return false;
254 uint32_t asn = DecodeASN(pos, asmap);
255 if (asn == INVALID) return false; // ASN straddles EOF
256 if (jumps.empty()) {
257 // Nothing to execute anymore
258 if (endpos - pos > 7) return false; // Excessive padding
259 while (pos != endpos) {
260 if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit
261 }
262 return true; // Sanely reached EOF
263 } else {
264 // Continue by pretending we jumped to the next instruction
265 if (pos != jumps.back().first) return false; // Unreachable code
266 bits = jumps.back().second; // Restore the number of bits we would have had left after this jump
267 jumps.pop_back();
268 prevopcode = Instruction::JUMP;
269 }
270 } else if (opcode == Instruction::JUMP) {
271 uint32_t jump = DecodeJump(pos, asmap);
272 if (jump == INVALID) return false; // Jump offset straddles EOF
273 if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range
274 if (bits == 0) return false; // Consuming bits past the end of the input
275 --bits;
276 uint32_t jump_offset = pos + jump;
277 if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps
278 jumps.emplace_back(jump_offset, bits); // Queue jump target for validation
279 prevopcode = Instruction::JUMP;
280 } else if (opcode == Instruction::MATCH) {
281 uint32_t match = DecodeMatch(pos, asmap);
282 if (match == INVALID) return false; // Match bits straddle EOF
283 int matchlen = std::bit_width(match) - 1;
284 if (prevopcode != Instruction::MATCH) had_incomplete_match = false;
285 // Within a sequence of matches only at most one should be incomplete
286 if (matchlen < 8 && had_incomplete_match) return false;
287 had_incomplete_match = (matchlen < 8);
288 if (bits < matchlen) return false; // Consuming bits past the end of the input
289 bits -= matchlen;
290 prevopcode = Instruction::MATCH;
291 } else if (opcode == Instruction::DEFAULT) {
292 // There should not be two successive DEFAULTs (they could be combined into one)
293 if (prevopcode == Instruction::DEFAULT) return false;
294 uint32_t asn = DecodeASN(pos, asmap);
295 if (asn == INVALID) return false; // ASN straddles EOF
296 prevopcode = Instruction::DEFAULT;
297 } else {
298 return false; // Instruction straddles EOF
299 }
300 }
301 return false; // Reached EOF without RETURN instruction
302}
303
308bool CheckStandardAsmap(const std::span<const std::byte> data)
309{
310 if (!SanityCheckAsmap(data, 128)) {
311 LogWarning("Sanity check of asmap data failed\n");
312 return false;
313 }
314 return true;
315}
316
320std::vector<std::byte> DecodeAsmap(fs::path path)
321{
322 FILE *filestr = fsbridge::fopen(path, "rb");
323 AutoFile file{filestr};
324 if (file.IsNull()) {
325 LogWarning("Failed to open asmap file from disk");
326 return {};
327 }
328 int64_t length{file.size()};
329 LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);
330
331 // Read entire file into memory
332 std::vector<std::byte> buffer(length);
333 file.read(buffer);
334
335 if (!CheckStandardAsmap(buffer)) {
336 LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));
337 return {};
338 }
339
340 return buffer;
341}
342
346uint256 AsmapVersion(const std::span<const std::byte> data)
347{
348 if (data.empty()) return {};
349
350 HashWriter asmap_hasher;
351 asmap_hasher << data;
352 return asmap_hasher.GetHash();
353}
Non-refcounted RAII wrapper for FILE*.
Definition: streams.h:373
A writer stream (for serialization) that computes a 256-bit hash.
Definition: hash.h:101
uint256 GetHash()
Compute the double-SHA256 hash of all data written to this object.
Definition: hash.h:115
256-bit opaque blob.
Definition: uint256.h:196
static CService ip(uint32_t i)
static auto quoted(const std::string &s)
Definition: fs.h:106
static std::string PathToString(const path &path)
Convert path object to a byte string.
Definition: fs.h:162
#define LogWarning(...)
Definition: log.h:98
#define LogInfo(...)
Definition: log.h:97
FILE * fopen(const fs::path &p, const char *mode)
Definition: fs.cpp:23
static const auto INVALID
A stack representing the lack of any (dis)satisfactions.
Definition: miniscript.h:351
bool SanityCheckAsmap(const std::span< const std::byte > asmap, int bits)
Validates ASMap structure by simulating all possible execution paths.
Definition: asmap.cpp:237
bool CheckStandardAsmap(const std::span< const std::byte > data)
Provides a safe interface for validating ASMap data before use.
Definition: asmap.cpp:308
std::vector< std::byte > DecodeAsmap(fs::path path)
Loads an ASMap file from disk and validates it.
Definition: asmap.cpp:320
uint32_t Interpret(const std::span< const std::byte > asmap, const std::span< const std::byte > ip)
Execute the ASMap bytecode to find the ASN for an IP.
Definition: asmap.cpp:180
uint256 AsmapVersion(const std::span< const std::byte > data)
Computes SHA256 hash of ASMap data for versioning and consistency checks.
Definition: asmap.cpp:346
assert(!tx.IsCoinBase())