Bitcoin Core 30.99.0
P2P Digital Currency
univalue_read.cpp
Go to the documentation of this file.
1// Copyright 2014 BitPay Inc.
2// Distributed under the MIT software license, see the accompanying
3// file COPYING or https://opensource.org/licenses/mit-license.php.
4
5#include <univalue.h>
7
8#include <cstdint>
9#include <cstring>
10#include <string>
11#include <string_view>
12#include <vector>
13
14/*
15 * According to stackexchange, the original json test suite wanted
16 * to limit depth to 22. Widely-deployed PHP bails at depth 512,
17 * so we will follow PHP's lead, which should be more than sufficient
18 * (further stackexchange comments indicate depth > 32 rarely occurs).
19 */
20static constexpr size_t MAX_JSON_DEPTH = 512;
21
22static bool json_isdigit(int ch)
23{
24 return ((ch >= '0') && (ch <= '9'));
25}
26
27// convert hexadecimal string to unsigned integer
28static const char *hatoui(const char *first, const char *last,
29 unsigned int& out)
30{
31 unsigned int result = 0;
32 for (; first != last; ++first)
33 {
34 int digit;
35 if (json_isdigit(*first))
36 digit = *first - '0';
37
38 else if (*first >= 'a' && *first <= 'f')
39 digit = *first - 'a' + 10;
40
41 else if (*first >= 'A' && *first <= 'F')
42 digit = *first - 'A' + 10;
43
44 else
45 break;
46
47 result = 16 * result + digit;
48 }
49 out = result;
50
51 return first;
52}
53
54enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
55 const char *raw, const char *end)
56{
57 tokenVal.clear();
58 consumed = 0;
59
60 const char *rawStart = raw;
61
62 while (raw < end && (json_isspace(*raw))) // skip whitespace
63 raw++;
64
65 if (raw >= end)
66 return JTOK_NONE;
67
68 switch (*raw) {
69
70 case '{':
71 raw++;
72 consumed = (raw - rawStart);
73 return JTOK_OBJ_OPEN;
74 case '}':
75 raw++;
76 consumed = (raw - rawStart);
77 return JTOK_OBJ_CLOSE;
78 case '[':
79 raw++;
80 consumed = (raw - rawStart);
81 return JTOK_ARR_OPEN;
82 case ']':
83 raw++;
84 consumed = (raw - rawStart);
85 return JTOK_ARR_CLOSE;
86
87 case ':':
88 raw++;
89 consumed = (raw - rawStart);
90 return JTOK_COLON;
91 case ',':
92 raw++;
93 consumed = (raw - rawStart);
94 return JTOK_COMMA;
95
96 case 'n':
97 case 't':
98 case 'f':
99 if (!strncmp(raw, "null", 4)) {
100 raw += 4;
101 consumed = (raw - rawStart);
102 return JTOK_KW_NULL;
103 } else if (!strncmp(raw, "true", 4)) {
104 raw += 4;
105 consumed = (raw - rawStart);
106 return JTOK_KW_TRUE;
107 } else if (!strncmp(raw, "false", 5)) {
108 raw += 5;
109 consumed = (raw - rawStart);
110 return JTOK_KW_FALSE;
111 } else
112 return JTOK_ERR;
113
114 case '-':
115 case '0':
116 case '1':
117 case '2':
118 case '3':
119 case '4':
120 case '5':
121 case '6':
122 case '7':
123 case '8':
124 case '9': {
125 // part 1: int
126 std::string numStr;
127
128 const char *first = raw;
129
130 const char *firstDigit = first;
131 if (!json_isdigit(*firstDigit))
132 firstDigit++;
133 if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
134 return JTOK_ERR;
135
136 numStr += *raw; // copy first char
137 raw++;
138
139 if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
140 return JTOK_ERR;
141
142 while (raw < end && json_isdigit(*raw)) { // copy digits
143 numStr += *raw;
144 raw++;
145 }
146
147 // part 2: frac
148 if (raw < end && *raw == '.') {
149 numStr += *raw; // copy .
150 raw++;
151
152 if (raw >= end || !json_isdigit(*raw))
153 return JTOK_ERR;
154 while (raw < end && json_isdigit(*raw)) { // copy digits
155 numStr += *raw;
156 raw++;
157 }
158 }
159
160 // part 3: exp
161 if (raw < end && (*raw == 'e' || *raw == 'E')) {
162 numStr += *raw; // copy E
163 raw++;
164
165 if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
166 numStr += *raw;
167 raw++;
168 }
169
170 if (raw >= end || !json_isdigit(*raw))
171 return JTOK_ERR;
172 while (raw < end && json_isdigit(*raw)) { // copy digits
173 numStr += *raw;
174 raw++;
175 }
176 }
177
178 tokenVal = numStr;
179 consumed = (raw - rawStart);
180 return JTOK_NUMBER;
181 }
182
183 case '"': {
184 raw++; // skip "
185
186 std::string valStr;
187 JSONUTF8StringFilter writer(valStr);
188
189 while (true) {
190 if (raw >= end || (unsigned char)*raw < 0x20)
191 return JTOK_ERR;
192
193 else if (*raw == '\\') {
194 raw++; // skip backslash
195
196 if (raw >= end)
197 return JTOK_ERR;
198
199 switch (*raw) {
200 case '"': writer.push_back('\"'); break;
201 case '\\': writer.push_back('\\'); break;
202 case '/': writer.push_back('/'); break;
203 case 'b': writer.push_back('\b'); break;
204 case 'f': writer.push_back('\f'); break;
205 case 'n': writer.push_back('\n'); break;
206 case 'r': writer.push_back('\r'); break;
207 case 't': writer.push_back('\t'); break;
208
209 case 'u': {
210 unsigned int codepoint;
211 if (raw + 1 + 4 >= end ||
212 hatoui(raw + 1, raw + 1 + 4, codepoint) !=
213 raw + 1 + 4)
214 return JTOK_ERR;
215 writer.push_back_u(codepoint);
216 raw += 4;
217 break;
218 }
219 default:
220 return JTOK_ERR;
221
222 }
223
224 raw++; // skip esc'd char
225 }
226
227 else if (*raw == '"') {
228 raw++; // skip "
229 break; // stop scanning
230 }
231
232 else {
233 writer.push_back(static_cast<unsigned char>(*raw));
234 raw++;
235 }
236 }
237
238 if (!writer.finalize())
239 return JTOK_ERR;
240 tokenVal = valStr;
241 consumed = (raw - rawStart);
242 return JTOK_STRING;
243 }
244
245 default:
246 return JTOK_ERR;
247 }
248}
249
250enum expect_bits : unsigned {
251 EXP_OBJ_NAME = (1U << 0),
252 EXP_COLON = (1U << 1),
253 EXP_ARR_VALUE = (1U << 2),
254 EXP_VALUE = (1U << 3),
255 EXP_NOT_VALUE = (1U << 4),
256};
257
258#define expect(bit) (expectMask & (EXP_##bit))
259#define setExpect(bit) (expectMask |= EXP_##bit)
260#define clearExpect(bit) (expectMask &= ~EXP_##bit)
261
262bool UniValue::read(std::string_view str_in)
263{
264 clear();
265
266 uint32_t expectMask = 0;
267 std::vector<UniValue*> stack;
268
269 std::string tokenVal;
270 unsigned int consumed;
271 enum jtokentype tok = JTOK_NONE;
272 enum jtokentype last_tok = JTOK_NONE;
273 const char* raw{str_in.data()};
274 const char* end{raw + str_in.size()};
275 do {
276 last_tok = tok;
277
278 tok = getJsonToken(tokenVal, consumed, raw, end);
279 if (tok == JTOK_NONE || tok == JTOK_ERR)
280 return false;
281 raw += consumed;
282
283 bool isValueOpen = jsonTokenIsValue(tok) ||
284 tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
285
286 if (expect(VALUE)) {
287 if (!isValueOpen)
288 return false;
289 clearExpect(VALUE);
290
291 } else if (expect(ARR_VALUE)) {
292 bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
293 if (!isArrValue)
294 return false;
295
296 clearExpect(ARR_VALUE);
297
298 } else if (expect(OBJ_NAME)) {
299 bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
300 if (!isObjName)
301 return false;
302
303 } else if (expect(COLON)) {
304 if (tok != JTOK_COLON)
305 return false;
306 clearExpect(COLON);
307
308 } else if (!expect(COLON) && (tok == JTOK_COLON)) {
309 return false;
310 }
311
312 if (expect(NOT_VALUE)) {
313 if (isValueOpen)
314 return false;
315 clearExpect(NOT_VALUE);
316 }
317
318 switch (tok) {
319
320 case JTOK_OBJ_OPEN:
321 case JTOK_ARR_OPEN: {
322 VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
323 if (!stack.size()) {
324 if (utyp == VOBJ)
325 setObject();
326 else
327 setArray();
328 stack.push_back(this);
329 } else {
330 UniValue tmpVal(utyp);
331 UniValue *top = stack.back();
332 top->values.push_back(tmpVal);
333
334 UniValue *newTop = &(top->values.back());
335 stack.push_back(newTop);
336 }
337
338 if (stack.size() > MAX_JSON_DEPTH)
339 return false;
340
341 if (utyp == VOBJ)
342 setExpect(OBJ_NAME);
343 else
344 setExpect(ARR_VALUE);
345 break;
346 }
347
348 case JTOK_OBJ_CLOSE:
349 case JTOK_ARR_CLOSE: {
350 if (!stack.size() || (last_tok == JTOK_COMMA))
351 return false;
352
353 VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
354 UniValue *top = stack.back();
355 if (utyp != top->getType())
356 return false;
357
358 stack.pop_back();
359 clearExpect(OBJ_NAME);
360 setExpect(NOT_VALUE);
361 break;
362 }
363
364 case JTOK_COLON: {
365 if (!stack.size())
366 return false;
367
368 UniValue *top = stack.back();
369 if (top->getType() != VOBJ)
370 return false;
371
372 setExpect(VALUE);
373 break;
374 }
375
376 case JTOK_COMMA: {
377 if (!stack.size() ||
378 (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
379 return false;
380
381 UniValue *top = stack.back();
382 if (top->getType() == VOBJ)
383 setExpect(OBJ_NAME);
384 else
385 setExpect(ARR_VALUE);
386 break;
387 }
388
389 case JTOK_KW_NULL:
390 case JTOK_KW_TRUE:
391 case JTOK_KW_FALSE: {
392 UniValue tmpVal;
393 switch (tok) {
394 case JTOK_KW_NULL:
395 // do nothing more
396 break;
397 case JTOK_KW_TRUE:
398 tmpVal.setBool(true);
399 break;
400 case JTOK_KW_FALSE:
401 tmpVal.setBool(false);
402 break;
403 default: /* impossible */ break;
404 }
405
406 if (!stack.size()) {
407 *this = tmpVal;
408 break;
409 }
410
411 UniValue *top = stack.back();
412 top->values.push_back(tmpVal);
413
414 setExpect(NOT_VALUE);
415 break;
416 }
417
418 case JTOK_NUMBER: {
419 UniValue tmpVal(VNUM, tokenVal);
420 if (!stack.size()) {
421 *this = tmpVal;
422 break;
423 }
424
425 UniValue *top = stack.back();
426 top->values.push_back(tmpVal);
427
428 setExpect(NOT_VALUE);
429 break;
430 }
431
432 case JTOK_STRING: {
433 if (expect(OBJ_NAME)) {
434 UniValue *top = stack.back();
435 top->keys.push_back(tokenVal);
436 clearExpect(OBJ_NAME);
437 setExpect(COLON);
438 } else {
439 UniValue tmpVal(VSTR, tokenVal);
440 if (!stack.size()) {
441 *this = tmpVal;
442 break;
443 }
444 UniValue *top = stack.back();
445 top->values.push_back(tmpVal);
446 }
447
448 setExpect(NOT_VALUE);
449 break;
450 }
451
452 default:
453 return false;
454 }
455 } while (!stack.empty ());
456
457 /* Check that nothing follows the initial construct (parsed above). */
458 tok = getJsonToken(tokenVal, consumed, raw, end);
459 if (tok != JTOK_NONE)
460 return false;
461
462 return true;
463}
464
Filter that generates and validates UTF-8, as well as collates UTF-16 surrogate pairs as specified in...
void push_back(unsigned char ch)
void push_back_u(unsigned int codepoint_)
void push_back(UniValue val)
Definition: univalue.cpp:103
enum VType getType() const
Definition: univalue.h:67
@ VOBJ
Definition: univalue.h:24
@ VSTR
Definition: univalue.h:24
@ VARR
Definition: univalue.h:24
@ VNUM
Definition: univalue.h:24
void setArray()
Definition: univalue.cpp:91
void clear()
Definition: univalue.cpp:17
void setBool(bool val)
Definition: univalue.cpp:30
std::vector< UniValue > values
Definition: univalue.h:108
std::vector< std::string > keys
Definition: univalue.h:107
bool read(std::string_view raw)
void setObject()
Definition: univalue.cpp:97
static bool jsonTokenIsValue(enum jtokentype jtt)
Definition: univalue.h:172
static bool json_isspace(int ch)
Definition: univalue.h:189
jtokentype
Definition: univalue.h:152
@ JTOK_OBJ_CLOSE
Definition: univalue.h:156
@ JTOK_STRING
Definition: univalue.h:165
@ JTOK_COLON
Definition: univalue.h:159
@ JTOK_OBJ_OPEN
Definition: univalue.h:155
@ JTOK_NUMBER
Definition: univalue.h:164
@ JTOK_KW_NULL
Definition: univalue.h:161
@ JTOK_COMMA
Definition: univalue.h:160
@ JTOK_ARR_CLOSE
Definition: univalue.h:158
@ JTOK_KW_TRUE
Definition: univalue.h:162
@ JTOK_ARR_OPEN
Definition: univalue.h:157
@ JTOK_KW_FALSE
Definition: univalue.h:163
@ JTOK_ERR
Definition: univalue.h:153
@ JTOK_NONE
Definition: univalue.h:154
static bool json_isdigit(int ch)
#define clearExpect(bit)
static constexpr size_t MAX_JSON_DEPTH
enum jtokentype getJsonToken(std::string &tokenVal, unsigned int &consumed, const char *raw, const char *end)
#define expect(bit)
expect_bits
@ EXP_ARR_VALUE
@ EXP_NOT_VALUE
@ EXP_COLON
@ EXP_VALUE
@ EXP_OBJ_NAME
static const char * hatoui(const char *first, const char *last, unsigned int &out)
#define setExpect(bit)