Bitcoin Core 28.99.0
P2P Digital Currency
strencodings.cpp
Go to the documentation of this file.
1// Copyright (c) 2009-2010 Satoshi Nakamoto
2// Copyright (c) 2009-2022 The Bitcoin Core developers
3// Distributed under the MIT software license, see the accompanying
4// file COPYING or http://www.opensource.org/licenses/mit-license.php.
5
6#include <util/strencodings.h>
7
8#include <crypto/hex_base.h>
9#include <span.h>
10
11#include <array>
12#include <cassert>
13#include <cstring>
14#include <limits>
15#include <optional>
16#include <ostream>
17#include <string>
18#include <vector>
19
20static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
21
22static const std::string SAFE_CHARS[] =
23{
24 CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
25 CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
26 CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
27 CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
28};
29
30std::string SanitizeString(std::string_view str, int rule)
31{
32 std::string result;
33 for (char c : str) {
34 if (SAFE_CHARS[rule].find(c) != std::string::npos) {
35 result.push_back(c);
36 }
37 }
38 return result;
39}
40
41bool IsHex(std::string_view str)
42{
43 for (char c : str) {
44 if (HexDigit(c) < 0) return false;
45 }
46 return (str.size() > 0) && (str.size()%2 == 0);
47}
48
49template <typename Byte>
50std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
51{
52 std::vector<Byte> vch;
53 vch.reserve(str.size() / 2); // two hex characters form a single byte
54
55 auto it = str.begin();
56 while (it != str.end()) {
57 if (IsSpace(*it)) {
58 ++it;
59 continue;
60 }
61 auto c1 = HexDigit(*(it++));
62 if (it == str.end()) return std::nullopt;
63 auto c2 = HexDigit(*(it++));
64 if (c1 < 0 || c2 < 0) return std::nullopt;
65 vch.push_back(Byte(c1 << 4) | Byte(c2));
66 }
67 return vch;
68}
69template std::optional<std::vector<std::byte>> TryParseHex(std::string_view);
70template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view);
71
72bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut)
73{
74 bool valid = false;
75 size_t colon = in.find_last_of(':');
76 // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
77 bool fHaveColon = colon != in.npos;
78 bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
79 bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
80 if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
81 uint16_t n;
82 if (ParseUInt16(in.substr(colon + 1), &n)) {
83 in = in.substr(0, colon);
84 portOut = n;
85 valid = (portOut != 0);
86 }
87 } else {
88 valid = true;
89 }
90 if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
91 hostOut = in.substr(1, in.size() - 2);
92 } else {
93 hostOut = in;
94 }
95
96 return valid;
97}
98
100{
101 static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
102
103 std::string str;
104 str.reserve(((input.size() + 2) / 3) * 4);
105 ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
106 while (str.size() % 4) str += '=';
107 return str;
108}
109
110std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str)
111{
112 static const int8_t decode64_table[256]{
113 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
114 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
115 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
116 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
117 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
118 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
119 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
125 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
126 };
127
128 if (str.size() % 4 != 0) return {};
129 /* One or two = characters at the end are permitted. */
130 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
131 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
132
133 std::vector<unsigned char> ret;
134 ret.reserve((str.size() * 3) / 4);
135 bool valid = ConvertBits<6, 8, false>(
136 [&](unsigned char c) { ret.push_back(c); },
137 str.begin(), str.end(),
138 [](char c) { return decode64_table[uint8_t(c)]; }
139 );
140 if (!valid) return {};
141
142 return ret;
143}
144
145std::string EncodeBase32(Span<const unsigned char> input, bool pad)
146{
147 static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
148
149 std::string str;
150 str.reserve(((input.size() + 4) / 5) * 8);
151 ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
152 if (pad) {
153 while (str.size() % 8) {
154 str += '=';
155 }
156 }
157 return str;
158}
159
160std::string EncodeBase32(std::string_view str, bool pad)
161{
162 return EncodeBase32(MakeUCharSpan(str), pad);
163}
164
165std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str)
166{
167 static const int8_t decode32_table[256]{
168 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
171 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
172 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
173 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
174 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
180 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
181 };
182
183 if (str.size() % 8 != 0) return {};
184 /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
185 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
186 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
187 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
188 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
189
190 std::vector<unsigned char> ret;
191 ret.reserve((str.size() * 5) / 8);
192 bool valid = ConvertBits<5, 8, false>(
193 [&](unsigned char c) { ret.push_back(c); },
194 str.begin(), str.end(),
195 [](char c) { return decode32_table[uint8_t(c)]; }
196 );
197
198 if (!valid) return {};
199
200 return ret;
201}
202
203namespace {
204template <typename T>
205bool ParseIntegral(std::string_view str, T* out)
206{
207 static_assert(std::is_integral<T>::value);
208 // Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when
209 // handling leading +/- for backwards compatibility.
210 if (str.length() >= 2 && str[0] == '+' && str[1] == '-') {
211 return false;
212 }
213 const std::optional<T> opt_int = ToIntegral<T>((!str.empty() && str[0] == '+') ? str.substr(1) : str);
214 if (!opt_int) {
215 return false;
216 }
217 if (out != nullptr) {
218 *out = *opt_int;
219 }
220 return true;
221}
222}; // namespace
223
224bool ParseInt32(std::string_view str, int32_t* out)
225{
226 return ParseIntegral<int32_t>(str, out);
227}
228
229bool ParseInt64(std::string_view str, int64_t* out)
230{
231 return ParseIntegral<int64_t>(str, out);
232}
233
234bool ParseUInt8(std::string_view str, uint8_t* out)
235{
236 return ParseIntegral<uint8_t>(str, out);
237}
238
239bool ParseUInt16(std::string_view str, uint16_t* out)
240{
241 return ParseIntegral<uint16_t>(str, out);
242}
243
244bool ParseUInt32(std::string_view str, uint32_t* out)
245{
246 return ParseIntegral<uint32_t>(str, out);
247}
248
249bool ParseUInt64(std::string_view str, uint64_t* out)
250{
251 return ParseIntegral<uint64_t>(str, out);
252}
253
254std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
255{
256 assert(width >= indent);
257 std::stringstream out;
258 size_t ptr = 0;
259 size_t indented = 0;
260 while (ptr < in.size())
261 {
262 size_t lineend = in.find_first_of('\n', ptr);
263 if (lineend == std::string::npos) {
264 lineend = in.size();
265 }
266 const size_t linelen = lineend - ptr;
267 const size_t rem_width = width - indented;
268 if (linelen <= rem_width) {
269 out << in.substr(ptr, linelen + 1);
270 ptr = lineend + 1;
271 indented = 0;
272 } else {
273 size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
274 if (finalspace == std::string::npos || finalspace < ptr) {
275 // No place to break; just include the entire word and move on
276 finalspace = in.find_first_of("\n ", ptr);
277 if (finalspace == std::string::npos) {
278 // End of the string, just add it and break
279 out << in.substr(ptr);
280 break;
281 }
282 }
283 out << in.substr(ptr, finalspace - ptr) << "\n";
284 if (in[finalspace] == '\n') {
285 indented = 0;
286 } else if (indent) {
287 out << std::string(indent, ' ');
288 indented = indent;
289 }
290 ptr = finalspace + 1;
291 }
292 }
293 return out.str();
294}
295
304static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
305
307static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
308{
309 if(ch == '0')
310 ++mantissa_tzeros;
311 else {
312 for (int i=0; i<=mantissa_tzeros; ++i) {
313 if (mantissa > (UPPER_BOUND / 10LL))
314 return false; /* overflow */
315 mantissa *= 10;
316 }
317 mantissa += ch - '0';
318 mantissa_tzeros = 0;
319 }
320 return true;
321}
322
323bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
324{
325 int64_t mantissa = 0;
326 int64_t exponent = 0;
327 int mantissa_tzeros = 0;
328 bool mantissa_sign = false;
329 bool exponent_sign = false;
330 int ptr = 0;
331 int end = val.size();
332 int point_ofs = 0;
333
334 if (ptr < end && val[ptr] == '-') {
335 mantissa_sign = true;
336 ++ptr;
337 }
338 if (ptr < end)
339 {
340 if (val[ptr] == '0') {
341 /* pass single 0 */
342 ++ptr;
343 } else if (val[ptr] >= '1' && val[ptr] <= '9') {
344 while (ptr < end && IsDigit(val[ptr])) {
345 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
346 return false; /* overflow */
347 ++ptr;
348 }
349 } else return false; /* missing expected digit */
350 } else return false; /* empty string or loose '-' */
351 if (ptr < end && val[ptr] == '.')
352 {
353 ++ptr;
354 if (ptr < end && IsDigit(val[ptr]))
355 {
356 while (ptr < end && IsDigit(val[ptr])) {
357 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
358 return false; /* overflow */
359 ++ptr;
360 ++point_ofs;
361 }
362 } else return false; /* missing expected digit */
363 }
364 if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
365 {
366 ++ptr;
367 if (ptr < end && val[ptr] == '+')
368 ++ptr;
369 else if (ptr < end && val[ptr] == '-') {
370 exponent_sign = true;
371 ++ptr;
372 }
373 if (ptr < end && IsDigit(val[ptr])) {
374 while (ptr < end && IsDigit(val[ptr])) {
375 if (exponent > (UPPER_BOUND / 10LL))
376 return false; /* overflow */
377 exponent = exponent * 10 + val[ptr] - '0';
378 ++ptr;
379 }
380 } else return false; /* missing expected digit */
381 }
382 if (ptr != end)
383 return false; /* trailing garbage */
384
385 /* finalize exponent */
386 if (exponent_sign)
387 exponent = -exponent;
388 exponent = exponent - point_ofs + mantissa_tzeros;
389
390 /* finalize mantissa */
391 if (mantissa_sign)
392 mantissa = -mantissa;
393
394 /* convert to one 64-bit fixed-point value */
395 exponent += decimals;
396 if (exponent < 0)
397 return false; /* cannot represent values smaller than 10^-decimals */
398 if (exponent >= 18)
399 return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
400
401 for (int i=0; i < exponent; ++i) {
402 if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
403 return false; /* overflow */
404 mantissa *= 10;
405 }
406 if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
407 return false; /* overflow */
408
409 if (amount_out)
410 *amount_out = mantissa;
411
412 return true;
413}
414
415std::string ToLower(std::string_view str)
416{
417 std::string r;
418 r.reserve(str.size());
419 for (auto ch : str) r += ToLower(ch);
420 return r;
421}
422
423std::string ToUpper(std::string_view str)
424{
425 std::string r;
426 r.reserve(str.size());
427 for (auto ch : str) r += ToUpper(ch);
428 return r;
429}
430
431std::string Capitalize(std::string str)
432{
433 if (str.empty()) return str;
434 str[0] = ToUpper(str.front());
435 return str;
436}
437
438std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
439{
440 if (str.empty()) {
441 return std::nullopt;
442 }
443 auto multiplier = default_multiplier;
444 char unit = str.back();
445 switch (unit) {
446 case 'k':
447 multiplier = ByteUnit::k;
448 break;
449 case 'K':
450 multiplier = ByteUnit::K;
451 break;
452 case 'm':
453 multiplier = ByteUnit::m;
454 break;
455 case 'M':
456 multiplier = ByteUnit::M;
457 break;
458 case 'g':
459 multiplier = ByteUnit::g;
460 break;
461 case 'G':
462 multiplier = ByteUnit::G;
463 break;
464 case 't':
465 multiplier = ByteUnit::t;
466 break;
467 case 'T':
468 multiplier = ByteUnit::T;
469 break;
470 default:
471 unit = 0;
472 break;
473 }
474
475 uint64_t unit_amount = static_cast<uint64_t>(multiplier);
476 auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
477 if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
478 return std::nullopt;
479 }
480 return *parsed_num * unit_amount;
481}
int ret
constexpr std::size_t size() const noexcept
Definition: span.h:187
constexpr C * begin() const noexcept
Definition: span.h:175
constexpr C * end() const noexcept
Definition: span.h:176
signed char HexDigit(char c)
Definition: hex_base.cpp:63
constexpr auto MakeUCharSpan(V &&v) -> decltype(UCharSpanCast(Span{std::forward< V >(v)}))
Like the Span constructor, but for (const) unsigned char member types only.
Definition: span.h:296
constexpr bool IsDigit(char c)
Tests if the given character is a decimal digit.
Definition: strencodings.h:150
ByteUnit
Used by ParseByteUnits() Lowercase base 1000 Uppercase base 1024.
Definition: strencodings.h:43
constexpr bool IsSpace(char c) noexcept
Tests if the given character is a whitespace character.
Definition: strencodings.h:166
std::string Capitalize(std::string str)
Capitalizes the first character of the given string.
static const std::string SAFE_CHARS[]
bool ParseInt32(std::string_view str, int32_t *out)
Convert string to signed 32-bit integer with strict parse error feedback.
bool ParseUInt16(std::string_view str, uint16_t *out)
Convert decimal string to unsigned 16-bit integer with strict parse error feedback.
std::string ToUpper(std::string_view str)
Returns the uppercase equivalent of the given string.
std::string EncodeBase64(Span< const unsigned char > input)
bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
Parse number as fixed point according to JSON number syntax.
std::optional< std::vector< unsigned char > > DecodeBase32(std::string_view str)
bool ParseInt64(std::string_view str, int64_t *out)
Convert string to signed 64-bit integer with strict parse error feedback.
std::string EncodeBase32(Span< const unsigned char > input, bool pad)
Base32 encode.
bool ParseUInt8(std::string_view str, uint8_t *out)
Convert decimal string to unsigned 8-bit integer with strict parse error feedback.
bool ParseUInt64(std::string_view str, uint64_t *out)
Convert decimal string to unsigned 64-bit integer with strict parse error feedback.
static bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
Helper function for ParseFixedPoint.
bool IsHex(std::string_view str)
std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
Format a paragraph of text to a fixed width, adding spaces for indentation to any added line.
static const int64_t UPPER_BOUND
Upper bound for mantissa.
std::optional< std::vector< unsigned char > > DecodeBase64(std::string_view str)
bool SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
Splits socket address string into host string and port value.
bool ParseUInt32(std::string_view str, uint32_t *out)
Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
std::optional< std::vector< Byte > > TryParseHex(std::string_view str)
Parse the hex string into bytes (uint8_t or std::byte).
std::optional< uint64_t > ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
Parse a string with suffix unit [k|K|m|M|g|G|t|T].
std::string ToLower(std::string_view str)
Returns the lowercase equivalent of the given string.
std::string SanitizeString(std::string_view str, int rule)
Remove unsafe chars.
static const std::string CHARS_ALPHA_NUM
assert(!tx.IsCoinBase())