Bitcoin Core 29.99.0
P2P Digital Currency
strencodings.cpp
Go to the documentation of this file.
1// Copyright (c) 2009-2010 Satoshi Nakamoto
2// Copyright (c) 2009-present The Bitcoin Core developers
3// Distributed under the MIT software license, see the accompanying
4// file COPYING or http://www.opensource.org/licenses/mit-license.php.
5
6#include <util/strencodings.h>
7
8#include <crypto/hex_base.h>
9#include <span.h>
10
11#include <array>
12#include <cassert>
13#include <cstring>
14#include <limits>
15#include <optional>
16#include <ostream>
17#include <string>
18#include <vector>
19
20static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
21
22static const std::string SAFE_CHARS[] =
23{
24 CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
25 CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
26 CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
27 CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
28};
29
30std::string SanitizeString(std::string_view str, int rule)
31{
32 std::string result;
33 for (char c : str) {
34 if (SAFE_CHARS[rule].find(c) != std::string::npos) {
35 result.push_back(c);
36 }
37 }
38 return result;
39}
40
41bool IsHex(std::string_view str)
42{
43 for (char c : str) {
44 if (HexDigit(c) < 0) return false;
45 }
46 return (str.size() > 0) && (str.size()%2 == 0);
47}
48
49template <typename Byte>
50std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
51{
52 std::vector<Byte> vch;
53 vch.reserve(str.size() / 2); // two hex characters form a single byte
54
55 auto it = str.begin();
56 while (it != str.end()) {
57 if (IsSpace(*it)) {
58 ++it;
59 continue;
60 }
61 auto c1 = HexDigit(*(it++));
62 if (it == str.end()) return std::nullopt;
63 auto c2 = HexDigit(*(it++));
64 if (c1 < 0 || c2 < 0) return std::nullopt;
65 vch.push_back(Byte(c1 << 4) | Byte(c2));
66 }
67 return vch;
68}
69template std::optional<std::vector<std::byte>> TryParseHex(std::string_view);
70template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view);
71
72bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut)
73{
74 bool valid = false;
75 size_t colon = in.find_last_of(':');
76 // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
77 bool fHaveColon = colon != in.npos;
78 bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
79 bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
80 if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
81 if (const auto n{ToIntegral<uint16_t>(in.substr(colon + 1))}) {
82 in = in.substr(0, colon);
83 portOut = *n;
84 valid = (portOut != 0);
85 }
86 } else {
87 valid = true;
88 }
89 if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
90 hostOut = in.substr(1, in.size() - 2);
91 } else {
92 hostOut = in;
93 }
94
95 return valid;
96}
97
98std::string EncodeBase64(std::span<const unsigned char> input)
99{
100 static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
101
102 std::string str;
103 str.reserve(((input.size() + 2) / 3) * 4);
104 ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
105 while (str.size() % 4) str += '=';
106 return str;
107}
108
109std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str)
110{
111 static const int8_t decode64_table[256]{
112 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
113 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
114 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
115 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
116 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
117 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
118 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
119 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
125 };
126
127 if (str.size() % 4 != 0) return {};
128 /* One or two = characters at the end are permitted. */
129 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
130 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
131
132 std::vector<unsigned char> ret;
133 ret.reserve((str.size() * 3) / 4);
134 bool valid = ConvertBits<6, 8, false>(
135 [&](unsigned char c) { ret.push_back(c); },
136 str.begin(), str.end(),
137 [](char c) { return decode64_table[uint8_t(c)]; }
138 );
139 if (!valid) return {};
140
141 return ret;
142}
143
144std::string EncodeBase32(std::span<const unsigned char> input, bool pad)
145{
146 static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
147
148 std::string str;
149 str.reserve(((input.size() + 4) / 5) * 8);
150 ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
151 if (pad) {
152 while (str.size() % 8) {
153 str += '=';
154 }
155 }
156 return str;
157}
158
159std::string EncodeBase32(std::string_view str, bool pad)
160{
161 return EncodeBase32(MakeUCharSpan(str), pad);
162}
163
164std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str)
165{
166 static const int8_t decode32_table[256]{
167 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
170 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
171 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
172 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
173 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
180 };
181
182 if (str.size() % 8 != 0) return {};
183 /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
184 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
185 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
186 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
187 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
188
189 std::vector<unsigned char> ret;
190 ret.reserve((str.size() * 5) / 8);
191 bool valid = ConvertBits<5, 8, false>(
192 [&](unsigned char c) { ret.push_back(c); },
193 str.begin(), str.end(),
194 [](char c) { return decode32_table[uint8_t(c)]; }
195 );
196
197 if (!valid) return {};
198
199 return ret;
200}
201
202std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
203{
204 assert(width >= indent);
205 std::stringstream out;
206 size_t ptr = 0;
207 size_t indented = 0;
208 while (ptr < in.size())
209 {
210 size_t lineend = in.find_first_of('\n', ptr);
211 if (lineend == std::string::npos) {
212 lineend = in.size();
213 }
214 const size_t linelen = lineend - ptr;
215 const size_t rem_width = width - indented;
216 if (linelen <= rem_width) {
217 out << in.substr(ptr, linelen + 1);
218 ptr = lineend + 1;
219 indented = 0;
220 } else {
221 size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
222 if (finalspace == std::string::npos || finalspace < ptr) {
223 // No place to break; just include the entire word and move on
224 finalspace = in.find_first_of("\n ", ptr);
225 if (finalspace == std::string::npos) {
226 // End of the string, just add it and break
227 out << in.substr(ptr);
228 break;
229 }
230 }
231 out << in.substr(ptr, finalspace - ptr) << "\n";
232 if (in[finalspace] == '\n') {
233 indented = 0;
234 } else if (indent) {
235 out << std::string(indent, ' ');
236 indented = indent;
237 }
238 ptr = finalspace + 1;
239 }
240 }
241 return out.str();
242}
243
252static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
253
255static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
256{
257 if(ch == '0')
258 ++mantissa_tzeros;
259 else {
260 for (int i=0; i<=mantissa_tzeros; ++i) {
261 if (mantissa > (UPPER_BOUND / 10LL))
262 return false; /* overflow */
263 mantissa *= 10;
264 }
265 mantissa += ch - '0';
266 mantissa_tzeros = 0;
267 }
268 return true;
269}
270
271bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
272{
273 int64_t mantissa = 0;
274 int64_t exponent = 0;
275 int mantissa_tzeros = 0;
276 bool mantissa_sign = false;
277 bool exponent_sign = false;
278 int ptr = 0;
279 int end = val.size();
280 int point_ofs = 0;
281
282 if (ptr < end && val[ptr] == '-') {
283 mantissa_sign = true;
284 ++ptr;
285 }
286 if (ptr < end)
287 {
288 if (val[ptr] == '0') {
289 /* pass single 0 */
290 ++ptr;
291 } else if (val[ptr] >= '1' && val[ptr] <= '9') {
292 while (ptr < end && IsDigit(val[ptr])) {
293 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
294 return false; /* overflow */
295 ++ptr;
296 }
297 } else return false; /* missing expected digit */
298 } else return false; /* empty string or loose '-' */
299 if (ptr < end && val[ptr] == '.')
300 {
301 ++ptr;
302 if (ptr < end && IsDigit(val[ptr]))
303 {
304 while (ptr < end && IsDigit(val[ptr])) {
305 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
306 return false; /* overflow */
307 ++ptr;
308 ++point_ofs;
309 }
310 } else return false; /* missing expected digit */
311 }
312 if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
313 {
314 ++ptr;
315 if (ptr < end && val[ptr] == '+')
316 ++ptr;
317 else if (ptr < end && val[ptr] == '-') {
318 exponent_sign = true;
319 ++ptr;
320 }
321 if (ptr < end && IsDigit(val[ptr])) {
322 while (ptr < end && IsDigit(val[ptr])) {
323 if (exponent > (UPPER_BOUND / 10LL))
324 return false; /* overflow */
325 exponent = exponent * 10 + val[ptr] - '0';
326 ++ptr;
327 }
328 } else return false; /* missing expected digit */
329 }
330 if (ptr != end)
331 return false; /* trailing garbage */
332
333 /* finalize exponent */
334 if (exponent_sign)
335 exponent = -exponent;
336 exponent = exponent - point_ofs + mantissa_tzeros;
337
338 /* finalize mantissa */
339 if (mantissa_sign)
340 mantissa = -mantissa;
341
342 /* convert to one 64-bit fixed-point value */
343 exponent += decimals;
344 if (exponent < 0)
345 return false; /* cannot represent values smaller than 10^-decimals */
346 if (exponent >= 18)
347 return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
348
349 for (int i=0; i < exponent; ++i) {
350 if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
351 return false; /* overflow */
352 mantissa *= 10;
353 }
354 if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
355 return false; /* overflow */
356
357 if (amount_out)
358 *amount_out = mantissa;
359
360 return true;
361}
362
363std::string ToLower(std::string_view str)
364{
365 std::string r;
366 r.reserve(str.size());
367 for (auto ch : str) r += ToLower(ch);
368 return r;
369}
370
371std::string ToUpper(std::string_view str)
372{
373 std::string r;
374 r.reserve(str.size());
375 for (auto ch : str) r += ToUpper(ch);
376 return r;
377}
378
379std::string Capitalize(std::string str)
380{
381 if (str.empty()) return str;
382 str[0] = ToUpper(str.front());
383 return str;
384}
385
386std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
387{
388 if (str.empty()) {
389 return std::nullopt;
390 }
391 auto multiplier = default_multiplier;
392 char unit = str.back();
393 switch (unit) {
394 case 'k':
395 multiplier = ByteUnit::k;
396 break;
397 case 'K':
398 multiplier = ByteUnit::K;
399 break;
400 case 'm':
401 multiplier = ByteUnit::m;
402 break;
403 case 'M':
404 multiplier = ByteUnit::M;
405 break;
406 case 'g':
407 multiplier = ByteUnit::g;
408 break;
409 case 'G':
410 multiplier = ByteUnit::G;
411 break;
412 case 't':
413 multiplier = ByteUnit::t;
414 break;
415 case 'T':
416 multiplier = ByteUnit::T;
417 break;
418 default:
419 unit = 0;
420 break;
421 }
422
423 uint64_t unit_amount = static_cast<uint64_t>(multiplier);
424 auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
425 if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
426 return std::nullopt;
427 }
428 return *parsed_num * unit_amount;
429}
int ret
signed char HexDigit(char c)
Definition: hex_base.cpp:63
constexpr auto MakeUCharSpan(const V &v) -> decltype(UCharSpanCast(std::span{v}))
Like the std::span constructor, but for (const) unsigned char member types only.
Definition: span.h:111
constexpr bool IsDigit(char c)
Tests if the given character is a decimal digit.
Definition: strencodings.h:149
ByteUnit
Used by ParseByteUnits() Lowercase base 1000 Uppercase base 1024.
Definition: strencodings.h:43
constexpr bool IsSpace(char c) noexcept
Tests if the given character is a whitespace character.
Definition: strencodings.h:165
std::string Capitalize(std::string str)
Capitalizes the first character of the given string.
static const std::string SAFE_CHARS[]
std::string ToUpper(std::string_view str)
Returns the uppercase equivalent of the given string.
bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
Parse number as fixed point according to JSON number syntax.
std::optional< std::vector< unsigned char > > DecodeBase32(std::string_view str)
std::string EncodeBase32(std::span< const unsigned char > input, bool pad)
Base32 encode.
static bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
Helper function for ParseFixedPoint.
bool IsHex(std::string_view str)
std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
Format a paragraph of text to a fixed width, adding spaces for indentation to any added line.
static const int64_t UPPER_BOUND
Upper bound for mantissa.
std::optional< std::vector< unsigned char > > DecodeBase64(std::string_view str)
bool SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
Splits socket address string into host string and port value.
std::string EncodeBase64(std::span< const unsigned char > input)
std::optional< std::vector< Byte > > TryParseHex(std::string_view str)
Parse the hex string into bytes (uint8_t or std::byte).
std::optional< uint64_t > ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
Parse a string with suffix unit [k|K|m|M|g|G|t|T].
std::string ToLower(std::string_view str)
Returns the lowercase equivalent of the given string.
std::string SanitizeString(std::string_view str, int rule)
Remove unsafe chars.
static const std::string CHARS_ALPHA_NUM
assert(!tx.IsCoinBase())