Bitcoin Core 31.99.0
P2P Digital Currency
strencodings.cpp
Go to the documentation of this file.
1// Copyright (c) 2009-2010 Satoshi Nakamoto
2// Copyright (c) 2009-present The Bitcoin Core developers
3// Distributed under the MIT software license, see the accompanying
4// file COPYING or http://www.opensource.org/licenses/mit-license.php.
5
6#include <util/strencodings.h>
7
8#include <crypto/hex_base.h>
9#include <span.h>
10#include <util/overflow.h>
11
12#include <array>
13#include <cassert>
14#include <cstring>
15#include <limits>
16#include <optional>
17#include <ostream>
18#include <string>
19#include <vector>
20
21static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
22
23static const std::string SAFE_CHARS[] =
24{
25 CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
26 CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
27 CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
28 CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
29};
30
31std::string SanitizeString(std::string_view str, int rule)
32{
33 std::string result;
34 for (char c : str) {
35 if (SAFE_CHARS[rule].find(c) != std::string::npos) {
36 result.push_back(c);
37 }
38 }
39 return result;
40}
41
42bool IsHex(std::string_view str)
43{
44 for (char c : str) {
45 if (HexDigit(c) < 0) return false;
46 }
47 return (str.size() > 0) && (str.size()%2 == 0);
48}
49
50template <typename Byte>
51std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
52{
53 std::vector<Byte> vch;
54 vch.reserve(str.size() / 2); // two hex characters form a single byte
55
56 auto it = str.begin();
57 while (it != str.end()) {
58 if (IsSpace(*it)) {
59 ++it;
60 continue;
61 }
62 auto c1 = HexDigit(*(it++));
63 if (it == str.end()) return std::nullopt;
64 auto c2 = HexDigit(*(it++));
65 if (c1 < 0 || c2 < 0) return std::nullopt;
66 vch.push_back(Byte(c1 << 4) | Byte(c2));
67 }
68 return vch;
69}
70template std::optional<std::vector<std::byte>> TryParseHex(std::string_view);
71template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view);
72
73bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut)
74{
75 bool valid = false;
76 size_t colon = in.find_last_of(':');
77 // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
78 bool fHaveColon = colon != in.npos;
79 bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
80 bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
81 if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
82 if (const auto n{ToIntegral<uint16_t>(in.substr(colon + 1))}) {
83 in = in.substr(0, colon);
84 portOut = *n;
85 valid = (portOut != 0);
86 }
87 } else {
88 valid = true;
89 }
90 if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
91 hostOut = in.substr(1, in.size() - 2);
92 } else {
93 hostOut = in;
94 }
95
96 return valid;
97}
98
99std::string EncodeBase64(std::span<const unsigned char> input)
100{
101 static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
102
103 std::string str;
104 str.reserve(CeilDiv(input.size(), 3u) * 4);
105 ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
106 while (str.size() % 4) str += '=';
107 return str;
108}
109
110std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str)
111{
112 static const int8_t decode64_table[256]{
113 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
114 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
115 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
116 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
117 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
118 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
119 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
125 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
126 };
127
128 if (str.size() % 4 != 0) return {};
129 /* One or two = characters at the end are permitted. */
130 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
131 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
132
133 std::vector<unsigned char> ret;
134 ret.reserve((str.size() * 3) / 4);
135 bool valid = ConvertBits<6, 8, false>(
136 [&](unsigned char c) { ret.push_back(c); },
137 str.begin(), str.end(),
138 [](char c) { return decode64_table[uint8_t(c)]; }
139 );
140 if (!valid) return {};
141
142 return ret;
143}
144
145std::string EncodeBase32(std::span<const unsigned char> input, bool pad)
146{
147 static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
148
149 std::string str;
150 str.reserve(CeilDiv(input.size(), 5u) * 8);
151 ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
152 if (pad) {
153 while (str.size() % 8) {
154 str += '=';
155 }
156 }
157 return str;
158}
159
160std::string EncodeBase32(std::string_view str, bool pad)
161{
162 return EncodeBase32(MakeUCharSpan(str), pad);
163}
164
165std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str)
166{
167 static const int8_t decode32_table[256]{
168 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
171 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
172 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
173 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
174 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
180 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
181 };
182
183 if (str.size() % 8 != 0) return {};
184 /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
185 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
186 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
187 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
188 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
189
190 std::vector<unsigned char> ret;
191 ret.reserve((str.size() * 5) / 8);
192 bool valid = ConvertBits<5, 8, false>(
193 [&](unsigned char c) { ret.push_back(c); },
194 str.begin(), str.end(),
195 [](char c) { return decode32_table[uint8_t(c)]; }
196 );
197
198 if (!valid) return {};
199
200 return ret;
201}
202
203std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
204{
205 assert(width >= indent);
206 std::stringstream out;
207 size_t ptr = 0;
208 size_t indented = 0;
209 while (ptr < in.size())
210 {
211 size_t lineend = in.find_first_of('\n', ptr);
212 if (lineend == std::string::npos) {
213 lineend = in.size();
214 }
215 const size_t linelen = lineend - ptr;
216 const size_t rem_width = width - indented;
217 if (linelen <= rem_width) {
218 out << in.substr(ptr, linelen + 1);
219 ptr = lineend + 1;
220 indented = 0;
221 } else {
222 size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
223 if (finalspace == std::string::npos || finalspace < ptr) {
224 // No place to break; just include the entire word and move on
225 finalspace = in.find_first_of("\n ", ptr);
226 if (finalspace == std::string::npos) {
227 // End of the string, just add it and break
228 out << in.substr(ptr);
229 break;
230 }
231 }
232 out << in.substr(ptr, finalspace - ptr) << "\n";
233 if (in[finalspace] == '\n') {
234 indented = 0;
235 } else if (indent) {
236 out << std::string(indent, ' ');
237 indented = indent;
238 }
239 ptr = finalspace + 1;
240 }
241 }
242 return out.str();
243}
244
253static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
254
256static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
257{
258 if(ch == '0')
259 ++mantissa_tzeros;
260 else {
261 for (int i=0; i<=mantissa_tzeros; ++i) {
262 if (mantissa > (UPPER_BOUND / 10LL))
263 return false; /* overflow */
264 mantissa *= 10;
265 }
266 mantissa += ch - '0';
267 mantissa_tzeros = 0;
268 }
269 return true;
270}
271
272bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
273{
274 int64_t mantissa = 0;
275 int64_t exponent = 0;
276 int mantissa_tzeros = 0;
277 bool mantissa_sign = false;
278 bool exponent_sign = false;
279 int ptr = 0;
280 int end = val.size();
281 int point_ofs = 0;
282
283 if (ptr < end && val[ptr] == '-') {
284 mantissa_sign = true;
285 ++ptr;
286 }
287 if (ptr < end)
288 {
289 if (val[ptr] == '0') {
290 /* pass single 0 */
291 ++ptr;
292 } else if (val[ptr] >= '1' && val[ptr] <= '9') {
293 while (ptr < end && IsDigit(val[ptr])) {
294 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
295 return false; /* overflow */
296 ++ptr;
297 }
298 } else return false; /* missing expected digit */
299 } else return false; /* empty string or loose '-' */
300 if (ptr < end && val[ptr] == '.')
301 {
302 ++ptr;
303 if (ptr < end && IsDigit(val[ptr]))
304 {
305 while (ptr < end && IsDigit(val[ptr])) {
306 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
307 return false; /* overflow */
308 ++ptr;
309 ++point_ofs;
310 }
311 } else return false; /* missing expected digit */
312 }
313 if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
314 {
315 ++ptr;
316 if (ptr < end && val[ptr] == '+')
317 ++ptr;
318 else if (ptr < end && val[ptr] == '-') {
319 exponent_sign = true;
320 ++ptr;
321 }
322 if (ptr < end && IsDigit(val[ptr])) {
323 while (ptr < end && IsDigit(val[ptr])) {
324 if (exponent > (UPPER_BOUND / 10LL))
325 return false; /* overflow */
326 exponent = exponent * 10 + val[ptr] - '0';
327 ++ptr;
328 }
329 } else return false; /* missing expected digit */
330 }
331 if (ptr != end)
332 return false; /* trailing garbage */
333
334 /* finalize exponent */
335 if (exponent_sign)
336 exponent = -exponent;
337 exponent = exponent - point_ofs + mantissa_tzeros;
338
339 /* finalize mantissa */
340 if (mantissa_sign)
341 mantissa = -mantissa;
342
343 /* convert to one 64-bit fixed-point value */
344 exponent += decimals;
345 if (exponent < 0)
346 return false; /* cannot represent values smaller than 10^-decimals */
347 if (exponent >= 18)
348 return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
349
350 for (int i=0; i < exponent; ++i) {
351 if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
352 return false; /* overflow */
353 mantissa *= 10;
354 }
355 if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
356 return false; /* overflow */
357
358 if (amount_out)
359 *amount_out = mantissa;
360
361 return true;
362}
363
364std::string ToLower(std::string_view str)
365{
366 std::string r;
367 r.reserve(str.size());
368 for (auto ch : str) r += ToLower(ch);
369 return r;
370}
371
372std::string ToUpper(std::string_view str)
373{
374 std::string r;
375 r.reserve(str.size());
376 for (auto ch : str) r += ToUpper(ch);
377 return r;
378}
379
380std::string Capitalize(std::string str)
381{
382 if (str.empty()) return str;
383 str[0] = ToUpper(str.front());
384 return str;
385}
386
387std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
388{
389 if (str.empty()) {
390 return std::nullopt;
391 }
392 auto multiplier = default_multiplier;
393 char unit = str.back();
394 switch (unit) {
395 case 'k':
396 multiplier = ByteUnit::k;
397 break;
398 case 'K':
399 multiplier = ByteUnit::K;
400 break;
401 case 'm':
402 multiplier = ByteUnit::m;
403 break;
404 case 'M':
405 multiplier = ByteUnit::M;
406 break;
407 case 'g':
408 multiplier = ByteUnit::g;
409 break;
410 case 'G':
411 multiplier = ByteUnit::G;
412 break;
413 case 't':
414 multiplier = ByteUnit::t;
415 break;
416 case 'T':
417 multiplier = ByteUnit::T;
418 break;
419 default:
420 unit = 0;
421 break;
422 }
423
424 uint64_t unit_amount = static_cast<uint64_t>(multiplier);
425 auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
426 if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
427 return std::nullopt;
428 }
429 return *parsed_num * unit_amount;
430}
int ret
signed char HexDigit(char c)
Definition: hex_base.cpp:64
constexpr auto CeilDiv(const Dividend dividend, const Divisor divisor)
Integer ceiling division (for unsigned values).
Definition: overflow.h:70
constexpr auto MakeUCharSpan(const V &v) -> decltype(UCharSpanCast(std::span{v}))
Like the std::span constructor, but for (const) unsigned char member types only.
Definition: span.h:111
constexpr bool IsDigit(char c)
Tests if the given character is a decimal digit.
Definition: strencodings.h:150
ByteUnit
Used by ParseByteUnits() Lowercase base 1000 Uppercase base 1024.
Definition: strencodings.h:44
constexpr bool IsSpace(char c) noexcept
Tests if the given character is a whitespace character.
Definition: strencodings.h:166
std::string Capitalize(std::string str)
Capitalizes the first character of the given string.
static const std::string SAFE_CHARS[]
std::string ToUpper(std::string_view str)
Returns the uppercase equivalent of the given string.
bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
Parse number as fixed point according to JSON number syntax.
std::optional< std::vector< unsigned char > > DecodeBase32(std::string_view str)
std::string EncodeBase32(std::span< const unsigned char > input, bool pad)
Base32 encode.
static bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
Helper function for ParseFixedPoint.
bool IsHex(std::string_view str)
std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
Format a paragraph of text to a fixed width, adding spaces for indentation to any added line.
static const int64_t UPPER_BOUND
Upper bound for mantissa.
std::optional< std::vector< unsigned char > > DecodeBase64(std::string_view str)
bool SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
Splits socket address string into host string and port value.
std::string EncodeBase64(std::span< const unsigned char > input)
std::optional< std::vector< Byte > > TryParseHex(std::string_view str)
Parse the hex string into bytes (uint8_t or std::byte).
std::optional< uint64_t > ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
Parse a string with suffix unit [k|K|m|M|g|G|t|T].
std::string ToLower(std::string_view str)
Returns the lowercase equivalent of the given string.
std::string SanitizeString(std::string_view str, int rule)
Remove unsafe chars.
static const std::string CHARS_ALPHA_NUM
assert(!tx.IsCoinBase())