Bitcoin Core 31.99.0
P2P Digital Currency
strencodings.cpp
Go to the documentation of this file.
1// Copyright (c) 2009-2010 Satoshi Nakamoto
2// Copyright (c) 2009-present The Bitcoin Core developers
3// Distributed under the MIT software license, see the accompanying
4// file COPYING or http://www.opensource.org/licenses/mit-license.php.
5
6#include <util/strencodings.h>
7
8#include <crypto/hex_base.h>
9#include <span.h>
10#include <util/check.h>
11#include <util/overflow.h>
12
13#include <limits>
14#include <optional>
15#include <sstream>
16#include <string>
17#include <vector>
18
19static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
20
21static const std::string SAFE_CHARS[] =
22{
23 CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
24 CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
25 CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
26 CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
27};
28
29std::string SanitizeString(std::string_view str, int rule)
30{
31 std::string result;
32 for (char c : str) {
33 if (SAFE_CHARS[rule].find(c) != std::string::npos) {
34 result.push_back(c);
35 }
36 }
37 return result;
38}
39
40bool IsHex(std::string_view str)
41{
42 for (char c : str) {
43 if (HexDigit(c) < 0) return false;
44 }
45 return (str.size() > 0) && (str.size()%2 == 0);
46}
47
48template <typename Byte>
49std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
50{
51 std::vector<Byte> vch;
52 vch.reserve(str.size() / 2); // two hex characters form a single byte
53
54 auto it = str.begin();
55 while (it != str.end()) {
56 if (IsSpace(*it)) {
57 ++it;
58 continue;
59 }
60 auto c1 = HexDigit(*(it++));
61 if (it == str.end()) return std::nullopt;
62 auto c2 = HexDigit(*(it++));
63 if (c1 < 0 || c2 < 0) return std::nullopt;
64 vch.push_back(Byte(c1 << 4) | Byte(c2));
65 }
66 return vch;
67}
68template std::optional<std::vector<std::byte>> TryParseHex(std::string_view);
69template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view);
70
71bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut)
72{
73 bool valid = false;
74 size_t colon = in.find_last_of(':');
75 // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
76 bool fHaveColon = colon != in.npos;
77 bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
78 bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
79 if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
80 if (const auto n{ToIntegral<uint16_t>(in.substr(colon + 1))}) {
81 in = in.substr(0, colon);
82 portOut = *n;
83 valid = (portOut != 0);
84 }
85 } else {
86 valid = true;
87 }
88 if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
89 hostOut = in.substr(1, in.size() - 2);
90 } else {
91 hostOut = in;
92 }
93
94 return valid;
95}
96
97std::string EncodeBase64(std::span<const unsigned char> input)
98{
99 static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
100
101 std::string str;
102 str.reserve(CeilDiv(input.size(), 3u) * 4);
103 ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
104 while (str.size() % 4) str += '=';
105 return str;
106}
107
108std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str)
109{
110 static const int8_t decode64_table[256]{
111 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
112 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
113 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
114 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
115 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
116 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
117 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
118 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
119 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
124 };
125
126 if (str.size() % 4 != 0) return {};
127 /* One or two = characters at the end are permitted. */
128 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
129 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
130
131 std::vector<unsigned char> ret;
132 ret.reserve((str.size() * 3) / 4);
133 bool valid = ConvertBits<6, 8, false>(
134 [&](unsigned char c) { ret.push_back(c); },
135 str.begin(), str.end(),
136 [](char c) { return decode64_table[uint8_t(c)]; }
137 );
138 if (!valid) return {};
139
140 return ret;
141}
142
143std::string EncodeBase32(std::span<const unsigned char> input, bool pad)
144{
145 static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
146
147 std::string str;
148 str.reserve(CeilDiv(input.size(), 5u) * 8);
149 ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
150 if (pad) {
151 while (str.size() % 8) {
152 str += '=';
153 }
154 }
155 return str;
156}
157
158std::string EncodeBase32(std::string_view str, bool pad)
159{
160 return EncodeBase32(MakeUCharSpan(str), pad);
161}
162
163std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str)
164{
165 static const int8_t decode32_table[256]{
166 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
169 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
170 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
171 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
172 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
179 };
180
181 if (str.size() % 8 != 0) return {};
182 /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
183 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
184 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
185 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
186 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
187
188 std::vector<unsigned char> ret;
189 ret.reserve((str.size() * 5) / 8);
190 bool valid = ConvertBits<5, 8, false>(
191 [&](unsigned char c) { ret.push_back(c); },
192 str.begin(), str.end(),
193 [](char c) { return decode32_table[uint8_t(c)]; }
194 );
195
196 if (!valid) return {};
197
198 return ret;
199}
200
201std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
202{
203 assert(width >= indent);
204 std::stringstream out;
205 size_t ptr = 0;
206 size_t indented = 0;
207 while (ptr < in.size())
208 {
209 size_t lineend = in.find_first_of('\n', ptr);
210 if (lineend == std::string::npos) {
211 lineend = in.size();
212 }
213 const size_t linelen = lineend - ptr;
214 const size_t rem_width = width - indented;
215 if (linelen <= rem_width) {
216 out << in.substr(ptr, linelen + 1);
217 ptr = lineend + 1;
218 indented = 0;
219 } else {
220 size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
221 if (finalspace == std::string::npos || finalspace < ptr) {
222 // No place to break; just include the entire word and move on
223 finalspace = in.find_first_of("\n ", ptr);
224 if (finalspace == std::string::npos) {
225 // End of the string, just add it and break
226 out << in.substr(ptr);
227 break;
228 }
229 }
230 out << in.substr(ptr, finalspace - ptr) << "\n";
231 if (in[finalspace] == '\n') {
232 indented = 0;
233 } else if (indent) {
234 out << std::string(indent, ' ');
235 indented = indent;
236 }
237 ptr = finalspace + 1;
238 }
239 }
240 return out.str();
241}
242
251static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
252
254static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
255{
256 if(ch == '0')
257 ++mantissa_tzeros;
258 else {
259 for (int i=0; i<=mantissa_tzeros; ++i) {
260 if (mantissa > (UPPER_BOUND / 10LL))
261 return false; /* overflow */
262 mantissa *= 10;
263 }
264 mantissa += ch - '0';
265 mantissa_tzeros = 0;
266 }
267 return true;
268}
269
270bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
271{
272 int64_t mantissa = 0;
273 int64_t exponent = 0;
274 int mantissa_tzeros = 0;
275 bool mantissa_sign = false;
276 bool exponent_sign = false;
277 int ptr = 0;
278 int end = val.size();
279 int point_ofs = 0;
280
281 if (ptr < end && val[ptr] == '-') {
282 mantissa_sign = true;
283 ++ptr;
284 }
285 if (ptr < end)
286 {
287 if (val[ptr] == '0') {
288 /* pass single 0 */
289 ++ptr;
290 } else if (val[ptr] >= '1' && val[ptr] <= '9') {
291 while (ptr < end && IsDigit(val[ptr])) {
292 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
293 return false; /* overflow */
294 ++ptr;
295 }
296 } else return false; /* missing expected digit */
297 } else return false; /* empty string or loose '-' */
298 if (ptr < end && val[ptr] == '.')
299 {
300 ++ptr;
301 if (ptr < end && IsDigit(val[ptr]))
302 {
303 while (ptr < end && IsDigit(val[ptr])) {
304 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
305 return false; /* overflow */
306 ++ptr;
307 ++point_ofs;
308 }
309 } else return false; /* missing expected digit */
310 }
311 if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
312 {
313 ++ptr;
314 if (ptr < end && val[ptr] == '+')
315 ++ptr;
316 else if (ptr < end && val[ptr] == '-') {
317 exponent_sign = true;
318 ++ptr;
319 }
320 if (ptr < end && IsDigit(val[ptr])) {
321 while (ptr < end && IsDigit(val[ptr])) {
322 if (exponent > (UPPER_BOUND / 10LL))
323 return false; /* overflow */
324 exponent = exponent * 10 + val[ptr] - '0';
325 ++ptr;
326 }
327 } else return false; /* missing expected digit */
328 }
329 if (ptr != end)
330 return false; /* trailing garbage */
331
332 /* finalize exponent */
333 if (exponent_sign)
334 exponent = -exponent;
335 exponent = exponent - point_ofs + mantissa_tzeros;
336
337 /* finalize mantissa */
338 if (mantissa_sign)
339 mantissa = -mantissa;
340
341 /* convert to one 64-bit fixed-point value */
342 exponent += decimals;
343 if (exponent < 0)
344 return false; /* cannot represent values smaller than 10^-decimals */
345 if (exponent >= 18)
346 return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
347
348 for (int i=0; i < exponent; ++i) {
349 if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
350 return false; /* overflow */
351 mantissa *= 10;
352 }
353 if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
354 return false; /* overflow */
355
356 if (amount_out)
357 *amount_out = mantissa;
358
359 return true;
360}
361
362std::string ToLower(std::string_view str)
363{
364 std::string r;
365 r.reserve(str.size());
366 for (auto ch : str) r += ToLower(ch);
367 return r;
368}
369
370std::string ToUpper(std::string_view str)
371{
372 std::string r;
373 r.reserve(str.size());
374 for (auto ch : str) r += ToUpper(ch);
375 return r;
376}
377
378std::string Capitalize(std::string str)
379{
380 if (str.empty()) return str;
381 str[0] = ToUpper(str.front());
382 return str;
383}
384
385std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
386{
387 if (str.empty()) {
388 return std::nullopt;
389 }
390 auto multiplier = default_multiplier;
391 char unit = str.back();
392 switch (unit) {
393 case 'k':
394 multiplier = ByteUnit::k;
395 break;
396 case 'K':
397 multiplier = ByteUnit::K;
398 break;
399 case 'm':
400 multiplier = ByteUnit::m;
401 break;
402 case 'M':
403 multiplier = ByteUnit::M;
404 break;
405 case 'g':
406 multiplier = ByteUnit::g;
407 break;
408 case 'G':
409 multiplier = ByteUnit::G;
410 break;
411 case 't':
412 multiplier = ByteUnit::t;
413 break;
414 case 'T':
415 multiplier = ByteUnit::T;
416 break;
417 default:
418 unit = 0;
419 break;
420 }
421
422 uint64_t unit_amount = static_cast<uint64_t>(multiplier);
423 auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
424 if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
425 return std::nullopt;
426 }
427 return *parsed_num * unit_amount;
428}
429
430bool CaseInsensitiveEqual(std::string_view s1, std::string_view s2)
431{
432 if (s1.size() != s2.size()) return false;
433 for (size_t i = 0; i < s1.size(); ++i) {
434 char c1 = s1[i];
435 if (c1 >= 'A' && c1 <= 'Z') c1 -= ('A' - 'a');
436 char c2 = s2[i];
437 if (c2 >= 'A' && c2 <= 'Z') c2 -= ('A' - 'a');
438 if (c1 != c2) return false;
439 }
440 return true;
441}
int ret
signed char HexDigit(char c)
Definition: hex_base.cpp:64
constexpr auto CeilDiv(const Dividend dividend, const Divisor divisor)
Integer ceiling division (for unsigned values).
Definition: overflow.h:70
constexpr auto MakeUCharSpan(const V &v) -> decltype(UCharSpanCast(std::span{v}))
Like the std::span constructor, but for (const) unsigned char member types only.
Definition: span.h:111
constexpr bool IsDigit(char c)
Tests if the given character is a decimal digit.
Definition: strencodings.h:149
ByteUnit
Used by ParseByteUnits() Lowercase base 1000 Uppercase base 1024.
Definition: strencodings.h:43
constexpr bool IsSpace(char c) noexcept
Tests if the given character is a whitespace character.
Definition: strencodings.h:165
std::string Capitalize(std::string str)
Capitalizes the first character of the given string.
static const std::string SAFE_CHARS[]
std::string ToUpper(std::string_view str)
Returns the uppercase equivalent of the given string.
bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
Parse number as fixed point according to JSON number syntax.
std::optional< std::vector< unsigned char > > DecodeBase32(std::string_view str)
bool CaseInsensitiveEqual(std::string_view s1, std::string_view s2)
Locale-independent, ASCII-only comparator.
std::string EncodeBase32(std::span< const unsigned char > input, bool pad)
Base32 encode.
static bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
Helper function for ParseFixedPoint.
bool IsHex(std::string_view str)
std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
Format a paragraph of text to a fixed width, adding spaces for indentation to any added line.
static const int64_t UPPER_BOUND
Upper bound for mantissa.
std::optional< std::vector< unsigned char > > DecodeBase64(std::string_view str)
bool SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
Splits socket address string into host string and port value.
std::string EncodeBase64(std::span< const unsigned char > input)
std::optional< std::vector< Byte > > TryParseHex(std::string_view str)
Parse the hex string into bytes (uint8_t or std::byte).
std::optional< uint64_t > ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
Parse a string with suffix unit [k|K|m|M|g|G|t|T].
std::string ToLower(std::string_view str)
Returns the lowercase equivalent of the given string.
std::string SanitizeString(std::string_view str, int rule)
Remove unsafe chars.
static const std::string CHARS_ALPHA_NUM
assert(!tx.IsCoinBase())