Bitcoin Core 28.99.0
P2P Digital Currency
univalue_read.cpp
Go to the documentation of this file.
1// Copyright 2014 BitPay Inc.
2// Distributed under the MIT software license, see the accompanying
3// file COPYING or https://opensource.org/licenses/mit-license.php.
4
5#include <univalue.h>
7
8#include <cstdint>
9#include <cstdio>
10#include <cstring>
11#include <string>
12#include <string_view>
13#include <vector>
14
15/*
16 * According to stackexchange, the original json test suite wanted
17 * to limit depth to 22. Widely-deployed PHP bails at depth 512,
18 * so we will follow PHP's lead, which should be more than sufficient
19 * (further stackexchange comments indicate depth > 32 rarely occurs).
20 */
21static constexpr size_t MAX_JSON_DEPTH = 512;
22
23static bool json_isdigit(int ch)
24{
25 return ((ch >= '0') && (ch <= '9'));
26}
27
28// convert hexadecimal string to unsigned integer
29static const char *hatoui(const char *first, const char *last,
30 unsigned int& out)
31{
32 unsigned int result = 0;
33 for (; first != last; ++first)
34 {
35 int digit;
36 if (json_isdigit(*first))
37 digit = *first - '0';
38
39 else if (*first >= 'a' && *first <= 'f')
40 digit = *first - 'a' + 10;
41
42 else if (*first >= 'A' && *first <= 'F')
43 digit = *first - 'A' + 10;
44
45 else
46 break;
47
48 result = 16 * result + digit;
49 }
50 out = result;
51
52 return first;
53}
54
55enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
56 const char *raw, const char *end)
57{
58 tokenVal.clear();
59 consumed = 0;
60
61 const char *rawStart = raw;
62
63 while (raw < end && (json_isspace(*raw))) // skip whitespace
64 raw++;
65
66 if (raw >= end)
67 return JTOK_NONE;
68
69 switch (*raw) {
70
71 case '{':
72 raw++;
73 consumed = (raw - rawStart);
74 return JTOK_OBJ_OPEN;
75 case '}':
76 raw++;
77 consumed = (raw - rawStart);
78 return JTOK_OBJ_CLOSE;
79 case '[':
80 raw++;
81 consumed = (raw - rawStart);
82 return JTOK_ARR_OPEN;
83 case ']':
84 raw++;
85 consumed = (raw - rawStart);
86 return JTOK_ARR_CLOSE;
87
88 case ':':
89 raw++;
90 consumed = (raw - rawStart);
91 return JTOK_COLON;
92 case ',':
93 raw++;
94 consumed = (raw - rawStart);
95 return JTOK_COMMA;
96
97 case 'n':
98 case 't':
99 case 'f':
100 if (!strncmp(raw, "null", 4)) {
101 raw += 4;
102 consumed = (raw - rawStart);
103 return JTOK_KW_NULL;
104 } else if (!strncmp(raw, "true", 4)) {
105 raw += 4;
106 consumed = (raw - rawStart);
107 return JTOK_KW_TRUE;
108 } else if (!strncmp(raw, "false", 5)) {
109 raw += 5;
110 consumed = (raw - rawStart);
111 return JTOK_KW_FALSE;
112 } else
113 return JTOK_ERR;
114
115 case '-':
116 case '0':
117 case '1':
118 case '2':
119 case '3':
120 case '4':
121 case '5':
122 case '6':
123 case '7':
124 case '8':
125 case '9': {
126 // part 1: int
127 std::string numStr;
128
129 const char *first = raw;
130
131 const char *firstDigit = first;
132 if (!json_isdigit(*firstDigit))
133 firstDigit++;
134 if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
135 return JTOK_ERR;
136
137 numStr += *raw; // copy first char
138 raw++;
139
140 if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
141 return JTOK_ERR;
142
143 while (raw < end && json_isdigit(*raw)) { // copy digits
144 numStr += *raw;
145 raw++;
146 }
147
148 // part 2: frac
149 if (raw < end && *raw == '.') {
150 numStr += *raw; // copy .
151 raw++;
152
153 if (raw >= end || !json_isdigit(*raw))
154 return JTOK_ERR;
155 while (raw < end && json_isdigit(*raw)) { // copy digits
156 numStr += *raw;
157 raw++;
158 }
159 }
160
161 // part 3: exp
162 if (raw < end && (*raw == 'e' || *raw == 'E')) {
163 numStr += *raw; // copy E
164 raw++;
165
166 if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
167 numStr += *raw;
168 raw++;
169 }
170
171 if (raw >= end || !json_isdigit(*raw))
172 return JTOK_ERR;
173 while (raw < end && json_isdigit(*raw)) { // copy digits
174 numStr += *raw;
175 raw++;
176 }
177 }
178
179 tokenVal = numStr;
180 consumed = (raw - rawStart);
181 return JTOK_NUMBER;
182 }
183
184 case '"': {
185 raw++; // skip "
186
187 std::string valStr;
188 JSONUTF8StringFilter writer(valStr);
189
190 while (true) {
191 if (raw >= end || (unsigned char)*raw < 0x20)
192 return JTOK_ERR;
193
194 else if (*raw == '\\') {
195 raw++; // skip backslash
196
197 if (raw >= end)
198 return JTOK_ERR;
199
200 switch (*raw) {
201 case '"': writer.push_back('\"'); break;
202 case '\\': writer.push_back('\\'); break;
203 case '/': writer.push_back('/'); break;
204 case 'b': writer.push_back('\b'); break;
205 case 'f': writer.push_back('\f'); break;
206 case 'n': writer.push_back('\n'); break;
207 case 'r': writer.push_back('\r'); break;
208 case 't': writer.push_back('\t'); break;
209
210 case 'u': {
211 unsigned int codepoint;
212 if (raw + 1 + 4 >= end ||
213 hatoui(raw + 1, raw + 1 + 4, codepoint) !=
214 raw + 1 + 4)
215 return JTOK_ERR;
216 writer.push_back_u(codepoint);
217 raw += 4;
218 break;
219 }
220 default:
221 return JTOK_ERR;
222
223 }
224
225 raw++; // skip esc'd char
226 }
227
228 else if (*raw == '"') {
229 raw++; // skip "
230 break; // stop scanning
231 }
232
233 else {
234 writer.push_back(static_cast<unsigned char>(*raw));
235 raw++;
236 }
237 }
238
239 if (!writer.finalize())
240 return JTOK_ERR;
241 tokenVal = valStr;
242 consumed = (raw - rawStart);
243 return JTOK_STRING;
244 }
245
246 default:
247 return JTOK_ERR;
248 }
249}
250
251enum expect_bits : unsigned {
252 EXP_OBJ_NAME = (1U << 0),
253 EXP_COLON = (1U << 1),
254 EXP_ARR_VALUE = (1U << 2),
255 EXP_VALUE = (1U << 3),
256 EXP_NOT_VALUE = (1U << 4),
257};
258
259#define expect(bit) (expectMask & (EXP_##bit))
260#define setExpect(bit) (expectMask |= EXP_##bit)
261#define clearExpect(bit) (expectMask &= ~EXP_##bit)
262
263bool UniValue::read(std::string_view str_in)
264{
265 clear();
266
267 uint32_t expectMask = 0;
268 std::vector<UniValue*> stack;
269
270 std::string tokenVal;
271 unsigned int consumed;
272 enum jtokentype tok = JTOK_NONE;
273 enum jtokentype last_tok = JTOK_NONE;
274 const char* raw{str_in.data()};
275 const char* end{raw + str_in.size()};
276 do {
277 last_tok = tok;
278
279 tok = getJsonToken(tokenVal, consumed, raw, end);
280 if (tok == JTOK_NONE || tok == JTOK_ERR)
281 return false;
282 raw += consumed;
283
284 bool isValueOpen = jsonTokenIsValue(tok) ||
285 tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
286
287 if (expect(VALUE)) {
288 if (!isValueOpen)
289 return false;
290 clearExpect(VALUE);
291
292 } else if (expect(ARR_VALUE)) {
293 bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
294 if (!isArrValue)
295 return false;
296
297 clearExpect(ARR_VALUE);
298
299 } else if (expect(OBJ_NAME)) {
300 bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
301 if (!isObjName)
302 return false;
303
304 } else if (expect(COLON)) {
305 if (tok != JTOK_COLON)
306 return false;
307 clearExpect(COLON);
308
309 } else if (!expect(COLON) && (tok == JTOK_COLON)) {
310 return false;
311 }
312
313 if (expect(NOT_VALUE)) {
314 if (isValueOpen)
315 return false;
316 clearExpect(NOT_VALUE);
317 }
318
319 switch (tok) {
320
321 case JTOK_OBJ_OPEN:
322 case JTOK_ARR_OPEN: {
323 VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
324 if (!stack.size()) {
325 if (utyp == VOBJ)
326 setObject();
327 else
328 setArray();
329 stack.push_back(this);
330 } else {
331 UniValue tmpVal(utyp);
332 UniValue *top = stack.back();
333 top->values.push_back(tmpVal);
334
335 UniValue *newTop = &(top->values.back());
336 stack.push_back(newTop);
337 }
338
339 if (stack.size() > MAX_JSON_DEPTH)
340 return false;
341
342 if (utyp == VOBJ)
343 setExpect(OBJ_NAME);
344 else
345 setExpect(ARR_VALUE);
346 break;
347 }
348
349 case JTOK_OBJ_CLOSE:
350 case JTOK_ARR_CLOSE: {
351 if (!stack.size() || (last_tok == JTOK_COMMA))
352 return false;
353
354 VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
355 UniValue *top = stack.back();
356 if (utyp != top->getType())
357 return false;
358
359 stack.pop_back();
360 clearExpect(OBJ_NAME);
361 setExpect(NOT_VALUE);
362 break;
363 }
364
365 case JTOK_COLON: {
366 if (!stack.size())
367 return false;
368
369 UniValue *top = stack.back();
370 if (top->getType() != VOBJ)
371 return false;
372
373 setExpect(VALUE);
374 break;
375 }
376
377 case JTOK_COMMA: {
378 if (!stack.size() ||
379 (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
380 return false;
381
382 UniValue *top = stack.back();
383 if (top->getType() == VOBJ)
384 setExpect(OBJ_NAME);
385 else
386 setExpect(ARR_VALUE);
387 break;
388 }
389
390 case JTOK_KW_NULL:
391 case JTOK_KW_TRUE:
392 case JTOK_KW_FALSE: {
393 UniValue tmpVal;
394 switch (tok) {
395 case JTOK_KW_NULL:
396 // do nothing more
397 break;
398 case JTOK_KW_TRUE:
399 tmpVal.setBool(true);
400 break;
401 case JTOK_KW_FALSE:
402 tmpVal.setBool(false);
403 break;
404 default: /* impossible */ break;
405 }
406
407 if (!stack.size()) {
408 *this = tmpVal;
409 break;
410 }
411
412 UniValue *top = stack.back();
413 top->values.push_back(tmpVal);
414
415 setExpect(NOT_VALUE);
416 break;
417 }
418
419 case JTOK_NUMBER: {
420 UniValue tmpVal(VNUM, tokenVal);
421 if (!stack.size()) {
422 *this = tmpVal;
423 break;
424 }
425
426 UniValue *top = stack.back();
427 top->values.push_back(tmpVal);
428
429 setExpect(NOT_VALUE);
430 break;
431 }
432
433 case JTOK_STRING: {
434 if (expect(OBJ_NAME)) {
435 UniValue *top = stack.back();
436 top->keys.push_back(tokenVal);
437 clearExpect(OBJ_NAME);
438 setExpect(COLON);
439 } else {
440 UniValue tmpVal(VSTR, tokenVal);
441 if (!stack.size()) {
442 *this = tmpVal;
443 break;
444 }
445 UniValue *top = stack.back();
446 top->values.push_back(tmpVal);
447 }
448
449 setExpect(NOT_VALUE);
450 break;
451 }
452
453 default:
454 return false;
455 }
456 } while (!stack.empty ());
457
458 /* Check that nothing follows the initial construct (parsed above). */
459 tok = getJsonToken(tokenVal, consumed, raw, end);
460 if (tok != JTOK_NONE)
461 return false;
462
463 return true;
464}
465
Filter that generates and validates UTF-8, as well as collates UTF-16 surrogate pairs as specified in...
void push_back(unsigned char ch)
void push_back_u(unsigned int codepoint_)
void push_back(UniValue val)
Definition: univalue.cpp:104
enum VType getType() const
Definition: univalue.h:67
@ VOBJ
Definition: univalue.h:24
@ VSTR
Definition: univalue.h:24
@ VARR
Definition: univalue.h:24
@ VNUM
Definition: univalue.h:24
void setArray()
Definition: univalue.cpp:92
void clear()
Definition: univalue.cpp:18
void setBool(bool val)
Definition: univalue.cpp:31
std::vector< UniValue > values
Definition: univalue.h:106
std::vector< std::string > keys
Definition: univalue.h:105
bool read(std::string_view raw)
void setObject()
Definition: univalue.cpp:98
static bool jsonTokenIsValue(enum jtokentype jtt)
Definition: univalue.h:170
static bool json_isspace(int ch)
Definition: univalue.h:187
jtokentype
Definition: univalue.h:150
@ JTOK_OBJ_CLOSE
Definition: univalue.h:154
@ JTOK_STRING
Definition: univalue.h:163
@ JTOK_COLON
Definition: univalue.h:157
@ JTOK_OBJ_OPEN
Definition: univalue.h:153
@ JTOK_NUMBER
Definition: univalue.h:162
@ JTOK_KW_NULL
Definition: univalue.h:159
@ JTOK_COMMA
Definition: univalue.h:158
@ JTOK_ARR_CLOSE
Definition: univalue.h:156
@ JTOK_KW_TRUE
Definition: univalue.h:160
@ JTOK_ARR_OPEN
Definition: univalue.h:155
@ JTOK_KW_FALSE
Definition: univalue.h:161
@ JTOK_ERR
Definition: univalue.h:151
@ JTOK_NONE
Definition: univalue.h:152
static bool json_isdigit(int ch)
#define clearExpect(bit)
static constexpr size_t MAX_JSON_DEPTH
enum jtokentype getJsonToken(std::string &tokenVal, unsigned int &consumed, const char *raw, const char *end)
#define expect(bit)
expect_bits
@ EXP_ARR_VALUE
@ EXP_NOT_VALUE
@ EXP_COLON
@ EXP_VALUE
@ EXP_OBJ_NAME
static const char * hatoui(const char *first, const char *last, unsigned int &out)
#define setExpect(bit)