Bitcoin Core 31.99.0
P2P Digital Currency
strencodings.h
Go to the documentation of this file.
1// Copyright (c) 2009-2010 Satoshi Nakamoto
2// Copyright (c) 2009-present The Bitcoin Core developers
3// Distributed under the MIT software license, see the accompanying
4// file COPYING or http://www.opensource.org/licenses/mit-license.php.
5
9#ifndef BITCOIN_UTIL_STRENCODINGS_H
10#define BITCOIN_UTIL_STRENCODINGS_H
11
12#include <span.h>
13#include <util/string.h>
14
15#include <array>
16#include <bit>
17#include <charconv>
18#include <cstddef>
19#include <cstdint>
20#include <limits>
21#include <optional>
22#include <span>
23#include <string>
24#include <string_view>
25#include <system_error>
26#include <type_traits>
27#include <vector>
28
31{
36};
37
43enum class ByteUnit : uint64_t {
44 NOOP = 1ULL,
45 k = 1000ULL,
46 K = 1024ULL,
47 m = 1'000'000ULL,
48 M = 1ULL << 20,
49 g = 1'000'000'000ULL,
50 G = 1ULL << 30,
51 t = 1'000'000'000'000ULL,
52 T = 1ULL << 40,
53};
54
62std::string SanitizeString(std::string_view str, int rule = SAFE_CHARS_DEFAULT);
64template <typename Byte = std::byte>
65std::optional<std::vector<Byte>> TryParseHex(std::string_view str);
67template <typename Byte = uint8_t>
68std::vector<Byte> ParseHex(std::string_view hex_str)
69{
70 return TryParseHex<Byte>(hex_str).value_or(std::vector<Byte>{});
71}
72/* Returns true if each character in str is a hex character, and has an even
73 * number of hex digits.*/
74bool IsHex(std::string_view str);
75std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str);
76std::string EncodeBase64(std::span<const unsigned char> input);
77inline std::string EncodeBase64(std::span<const std::byte> input) { return EncodeBase64(MakeUCharSpan(input)); }
78inline std::string EncodeBase64(std::string_view str) { return EncodeBase64(MakeUCharSpan(str)); }
79std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str);
80
86std::string EncodeBase32(std::span<const unsigned char> input, bool pad = true);
87
93std::string EncodeBase32(std::string_view str, bool pad = true);
94
104bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut);
105
106// LocaleIndependentAtoi is provided for backwards compatibility reasons.
107//
108// New code should use ToIntegral.
109//
110// The goal of LocaleIndependentAtoi is to replicate the defined behaviour of
111// std::atoi as it behaves under the "C" locale, and remove some undefined
112// behavior. If the parsed value is bigger than the integer type's maximum
113// value, or smaller than the integer type's minimum value, std::atoi has
114// undefined behavior, while this function returns the maximum or minimum
115// values, respectively.
116template <typename T>
117T LocaleIndependentAtoi(std::string_view str)
118{
119 static_assert(std::is_integral_v<T>);
120 T result;
121 // Emulate atoi(...) handling of white space and leading +/-.
122 std::string_view s = util::TrimStringView(str);
123 if (!s.empty() && s[0] == '+') {
124 if (s.length() >= 2 && s[1] == '-') {
125 return 0;
126 }
127 s = s.substr(1);
128 }
129 auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result);
130 if (error_condition == std::errc::result_out_of_range) {
131 if (s.length() >= 1 && s[0] == '-') {
132 // Saturate underflow, per strtoll's behavior.
133 return std::numeric_limits<T>::min();
134 } else {
135 // Saturate overflow, per strtoll's behavior.
136 return std::numeric_limits<T>::max();
137 }
138 } else if (error_condition != std::errc{}) {
139 return 0;
140 }
141 return result;
142}
143
149constexpr bool IsDigit(char c)
150{
151 return c >= '0' && c <= '9';
152}
153
165constexpr inline bool IsSpace(char c) noexcept {
166 return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
167}
168
178template <typename T>
179std::optional<T> ToIntegral(std::string_view str, size_t base = 10)
180{
181 static_assert(std::is_integral_v<T>);
182 T result;
183 const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result, base);
184 if (first_nonmatching != str.data() + str.size() || error_condition != std::errc{}) {
185 return std::nullopt;
186 }
187 return result;
188}
189
194std::string FormatParagraph(std::string_view in, size_t width = 79, size_t indent = 0);
195
201template <typename T>
202bool TimingResistantEqual(const T& a, const T& b)
203{
204 if (b.size() == 0) return a.size() == 0;
205 size_t accumulator = a.size() ^ b.size();
206 for (size_t i = 0; i < a.size(); i++)
207 accumulator |= size_t(a[i] ^ b[i%b.size()]);
208 return accumulator == 0;
209}
210
215[[nodiscard]] bool ParseFixedPoint(std::string_view, int decimals, int64_t *amount_out);
216
217namespace {
219struct IntIdentity
220{
221 [[maybe_unused]] int operator()(int x) const { return x; }
222};
223
224} // namespace
225
227template<int frombits, int tobits, bool pad, typename O, typename It, typename I = IntIdentity>
228bool ConvertBits(O outfn, It it, It end, I infn = {}) {
229 size_t acc = 0;
230 size_t bits = 0;
231 constexpr size_t maxv = (1 << tobits) - 1;
232 constexpr size_t max_acc = (1 << (frombits + tobits - 1)) - 1;
233 while (it != end) {
234 int v = infn(*it);
235 if (v < 0) return false;
236 acc = ((acc << frombits) | v) & max_acc;
237 bits += frombits;
238 while (bits >= tobits) {
239 bits -= tobits;
240 outfn((acc >> bits) & maxv);
241 }
242 ++it;
243 }
244 if (pad) {
245 if (bits) outfn((acc << (tobits - bits)) & maxv);
246 } else if (bits >= frombits || ((acc << (tobits - bits)) & maxv)) {
247 return false;
248 }
249 return true;
250}
251
262constexpr char ToLower(char c)
263{
264 return (c >= 'A' && c <= 'Z' ? (c - 'A') + 'a' : c);
265}
266
276std::string ToLower(std::string_view str);
277
288constexpr char ToUpper(char c)
289{
290 return (c >= 'a' && c <= 'z' ? (c - 'a') + 'A' : c);
291}
292
302std::string ToUpper(std::string_view str);
303
313std::string Capitalize(std::string str);
314
326std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier);
327
328namespace util {
330consteval uint8_t ConstevalHexDigit(const char c)
331{
332 if (c >= '0' && c <= '9') return c - '0';
333 if (c >= 'a' && c <= 'f') return c - 'a' + 0xa;
334
335 throw "Only lowercase hex digits are allowed, for consistency";
336}
337
338namespace detail {
339template <size_t N>
340struct Hex {
341 std::array<std::byte, N / 2> bytes{};
342 consteval Hex(const char (&hex_str)[N])
343 // 2 hex digits required per byte + implicit null terminator
344 requires(N % 2 == 1)
345 {
346 if (hex_str[N - 1]) throw "null terminator required";
347 for (std::size_t i = 0; i < bytes.size(); ++i) {
348 bytes[i] = static_cast<std::byte>(
349 (ConstevalHexDigit(hex_str[2 * i]) << 4) |
350 ConstevalHexDigit(hex_str[2 * i + 1]));
351 }
352 }
353};
354} // namespace detail
355
357 bool operator()(std::string_view s1, std::string_view s2) const
358 {
359 return ToLower(s1) == ToLower(s2);
360 }
361};
362
364 size_t operator()(std::string_view s) const
365 {
366 return std::hash<std::string>{}(ToLower(s));
367 }
368};
369
399inline namespace hex_literals {
400
401template <util::detail::Hex str>
402constexpr auto operator""_hex() { return str.bytes; }
403
404template <util::detail::Hex str>
405constexpr auto operator""_hex_u8() { return std::bit_cast<std::array<uint8_t, str.bytes.size()>>(str.bytes); }
406
407template <util::detail::Hex str>
408constexpr auto operator""_hex_v() { return std::vector<std::byte>{str.bytes.begin(), str.bytes.end()}; }
409
410template <util::detail::Hex str>
411inline auto operator""_hex_v_u8() { return std::vector<uint8_t>{UCharCast(str.bytes.data()), UCharCast(str.bytes.data() + str.bytes.size())}; }
412
413} // inline namespace hex_literals
414} // namespace util
415
416#endif // BITCOIN_UTIL_STRENCODINGS_H
#define T(expected, seed, data)
consteval uint8_t ConstevalHexDigit(const char c)
consteval version of HexDigit() without the lookup table.
Definition: strencodings.h:330
std::string_view TrimStringView(std::string_view str, std::string_view pattern=" \f\n\r\t\v")
Definition: string.h:160
constexpr auto MakeUCharSpan(const V &v) -> decltype(UCharSpanCast(std::span{v}))
Like the std::span constructor, but for (const) unsigned char member types only.
Definition: span.h:111
unsigned char * UCharCast(char *c)
Definition: span.h:95
std::string Capitalize(std::string str)
Capitalizes the first character of the given string.
constexpr char ToLower(char c)
Converts the given character to its lowercase equivalent.
Definition: strencodings.h:262
constexpr bool IsDigit(char c)
Tests if the given character is a decimal digit.
Definition: strencodings.h:149
constexpr char ToUpper(char c)
Converts the given character to its uppercase equivalent.
Definition: strencodings.h:288
std::string EncodeBase32(std::span< const unsigned char > input, bool pad=true)
Base32 encode.
std::optional< std::vector< unsigned char > > DecodeBase32(std::string_view str)
T LocaleIndependentAtoi(std::string_view str)
Definition: strencodings.h:117
ByteUnit
Used by ParseByteUnits() Lowercase base 1000 Uppercase base 1024.
Definition: strencodings.h:43
bool TimingResistantEqual(const T &a, const T &b)
Timing-attack-resistant comparison.
Definition: strencodings.h:202
std::vector< Byte > ParseHex(std::string_view hex_str)
Like TryParseHex, but returns an empty vector on invalid input.
Definition: strencodings.h:68
std::optional< T > ToIntegral(std::string_view str, size_t base=10)
Convert string to integral type T.
Definition: strencodings.h:179
bool ParseFixedPoint(std::string_view, int decimals, int64_t *amount_out)
Parse number as fixed point according to JSON number syntax.
constexpr bool IsSpace(char c) noexcept
Tests if the given character is a whitespace character.
Definition: strencodings.h:165
bool IsHex(std::string_view str)
std::optional< std::vector< unsigned char > > DecodeBase64(std::string_view str)
bool SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
Splits socket address string into host string and port value.
bool ConvertBits(O outfn, It it, It end, I infn={})
Convert from one power-of-2 number base to another.
Definition: strencodings.h:228
std::string EncodeBase64(std::span< const unsigned char > input)
std::string FormatParagraph(std::string_view in, size_t width=79, size_t indent=0)
Format a paragraph of text to a fixed width, adding spaces for indentation to any added line.
std::string SanitizeString(std::string_view str, int rule=SAFE_CHARS_DEFAULT)
Remove unsafe chars.
std::optional< std::vector< Byte > > TryParseHex(std::string_view str)
Parse the hex string into bytes (uint8_t or std::byte).
std::optional< uint64_t > ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
Parse a string with suffix unit [k|K|m|M|g|G|t|T].
SafeChars
Utilities for converting data from/to strings.
Definition: strencodings.h:31
@ SAFE_CHARS_DEFAULT
The full set of allowed chars.
Definition: strencodings.h:32
@ SAFE_CHARS_UA_COMMENT
BIP-0014 subset.
Definition: strencodings.h:33
@ SAFE_CHARS_URI
Chars allowed in URIs (RFC 3986)
Definition: strencodings.h:35
@ SAFE_CHARS_FILENAME
Chars allowed in filenames.
Definition: strencodings.h:34
size_t operator()(std::string_view s) const
Definition: strencodings.h:364
bool operator()(std::string_view s1, std::string_view s2) const
Definition: strencodings.h:357
consteval Hex(const char(&hex_str)[N])
Definition: strencodings.h:342
std::array< std::byte, N/2 > bytes
Definition: strencodings.h:341
consteval auto _(util::TranslatedLiteral str)
Definition: translation.h:79