root / json-parser.c @ e3e87df4
History | View | Annotate | Download (14.6 kB)
1 |
/*
|
---|---|
2 |
* JSON Parser
|
3 |
*
|
4 |
* Copyright IBM, Corp. 2009
|
5 |
*
|
6 |
* Authors:
|
7 |
* Anthony Liguori <aliguori@us.ibm.com>
|
8 |
*
|
9 |
* This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
|
10 |
* See the COPYING.LIB file in the top-level directory.
|
11 |
*
|
12 |
*/
|
13 |
|
14 |
#include <stdarg.h> |
15 |
|
16 |
#include "qemu-common.h" |
17 |
#include "qstring.h" |
18 |
#include "qint.h" |
19 |
#include "qdict.h" |
20 |
#include "qlist.h" |
21 |
#include "qfloat.h" |
22 |
#include "qbool.h" |
23 |
#include "json-parser.h" |
24 |
#include "json-lexer.h" |
25 |
#include "qerror.h" |
26 |
|
27 |
typedef struct JSONParserContext |
28 |
{ |
29 |
Error *err; |
30 |
} JSONParserContext; |
31 |
|
32 |
#define BUG_ON(cond) assert(!(cond))
|
33 |
|
34 |
/**
|
35 |
* TODO
|
36 |
*
|
37 |
* 0) make errors meaningful again
|
38 |
* 1) add geometry information to tokens
|
39 |
* 3) should we return a parsed size?
|
40 |
* 4) deal with premature EOI
|
41 |
*/
|
42 |
|
43 |
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
|
44 |
|
45 |
/**
|
46 |
* Token manipulators
|
47 |
*
|
48 |
* tokens are dictionaries that contain a type, a string value, and geometry information
|
49 |
* about a token identified by the lexer. These are routines that make working with
|
50 |
* these objects a bit easier.
|
51 |
*/
|
52 |
static const char *token_get_value(QObject *obj) |
53 |
{ |
54 |
return qdict_get_str(qobject_to_qdict(obj), "token"); |
55 |
} |
56 |
|
57 |
static JSONTokenType token_get_type(QObject *obj)
|
58 |
{ |
59 |
return qdict_get_int(qobject_to_qdict(obj), "type"); |
60 |
} |
61 |
|
62 |
static int token_is_operator(QObject *obj, char op) |
63 |
{ |
64 |
const char *val; |
65 |
|
66 |
if (token_get_type(obj) != JSON_OPERATOR) {
|
67 |
return 0; |
68 |
} |
69 |
|
70 |
val = token_get_value(obj); |
71 |
|
72 |
return (val[0] == op) && (val[1] == 0); |
73 |
} |
74 |
|
75 |
static int token_is_keyword(QObject *obj, const char *value) |
76 |
{ |
77 |
if (token_get_type(obj) != JSON_KEYWORD) {
|
78 |
return 0; |
79 |
} |
80 |
|
81 |
return strcmp(token_get_value(obj), value) == 0; |
82 |
} |
83 |
|
84 |
static int token_is_escape(QObject *obj, const char *value) |
85 |
{ |
86 |
if (token_get_type(obj) != JSON_ESCAPE) {
|
87 |
return 0; |
88 |
} |
89 |
|
90 |
return (strcmp(token_get_value(obj), value) == 0); |
91 |
} |
92 |
|
93 |
/**
|
94 |
* Error handler
|
95 |
*/
|
96 |
static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, |
97 |
QObject *token, const char *msg, ...) |
98 |
{ |
99 |
va_list ap; |
100 |
char message[1024]; |
101 |
va_start(ap, msg); |
102 |
vsnprintf(message, sizeof(message), msg, ap);
|
103 |
va_end(ap); |
104 |
if (ctxt->err) {
|
105 |
error_free(ctxt->err); |
106 |
ctxt->err = NULL;
|
107 |
} |
108 |
error_set(&ctxt->err, QERR_JSON_PARSE_ERROR, message); |
109 |
} |
110 |
|
111 |
/**
|
112 |
* String helpers
|
113 |
*
|
114 |
* These helpers are used to unescape strings.
|
115 |
*/
|
116 |
static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) |
117 |
{ |
118 |
if (wchar <= 0x007F) { |
119 |
BUG_ON(buffer_length < 2);
|
120 |
|
121 |
buffer[0] = wchar & 0x7F; |
122 |
buffer[1] = 0; |
123 |
} else if (wchar <= 0x07FF) { |
124 |
BUG_ON(buffer_length < 3);
|
125 |
|
126 |
buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); |
127 |
buffer[1] = 0x80 | (wchar & 0x3F); |
128 |
buffer[2] = 0; |
129 |
} else {
|
130 |
BUG_ON(buffer_length < 4);
|
131 |
|
132 |
buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); |
133 |
buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); |
134 |
buffer[2] = 0x80 | (wchar & 0x3F); |
135 |
buffer[3] = 0; |
136 |
} |
137 |
} |
138 |
|
139 |
static int hex2decimal(char ch) |
140 |
{ |
141 |
if (ch >= '0' && ch <= '9') { |
142 |
return (ch - '0'); |
143 |
} else if (ch >= 'a' && ch <= 'f') { |
144 |
return 10 + (ch - 'a'); |
145 |
} else if (ch >= 'A' && ch <= 'F') { |
146 |
return 10 + (ch - 'A'); |
147 |
} |
148 |
|
149 |
return -1; |
150 |
} |
151 |
|
152 |
/**
|
153 |
* parse_string(): Parse a json string and return a QObject
|
154 |
*
|
155 |
* string
|
156 |
* ""
|
157 |
* " chars "
|
158 |
* chars
|
159 |
* char
|
160 |
* char chars
|
161 |
* char
|
162 |
* any-Unicode-character-
|
163 |
* except-"-or-\-or-
|
164 |
* control-character
|
165 |
* \"
|
166 |
* \\
|
167 |
* \/
|
168 |
* \b
|
169 |
* \f
|
170 |
* \n
|
171 |
* \r
|
172 |
* \t
|
173 |
* \u four-hex-digits
|
174 |
*/
|
175 |
static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
|
176 |
{ |
177 |
const char *ptr = token_get_value(token); |
178 |
QString *str; |
179 |
int double_quote = 1; |
180 |
|
181 |
if (*ptr == '"') { |
182 |
double_quote = 1;
|
183 |
} else {
|
184 |
double_quote = 0;
|
185 |
} |
186 |
ptr++; |
187 |
|
188 |
str = qstring_new(); |
189 |
while (*ptr &&
|
190 |
((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { |
191 |
if (*ptr == '\\') { |
192 |
ptr++; |
193 |
|
194 |
switch (*ptr) {
|
195 |
case '"': |
196 |
qstring_append(str, "\"");
|
197 |
ptr++; |
198 |
break;
|
199 |
case '\'': |
200 |
qstring_append(str, "'");
|
201 |
ptr++; |
202 |
break;
|
203 |
case '\\': |
204 |
qstring_append(str, "\\");
|
205 |
ptr++; |
206 |
break;
|
207 |
case '/': |
208 |
qstring_append(str, "/");
|
209 |
ptr++; |
210 |
break;
|
211 |
case 'b': |
212 |
qstring_append(str, "\b");
|
213 |
ptr++; |
214 |
break;
|
215 |
case 'f': |
216 |
qstring_append(str, "\f");
|
217 |
ptr++; |
218 |
break;
|
219 |
case 'n': |
220 |
qstring_append(str, "\n");
|
221 |
ptr++; |
222 |
break;
|
223 |
case 'r': |
224 |
qstring_append(str, "\r");
|
225 |
ptr++; |
226 |
break;
|
227 |
case 't': |
228 |
qstring_append(str, "\t");
|
229 |
ptr++; |
230 |
break;
|
231 |
case 'u': { |
232 |
uint16_t unicode_char = 0;
|
233 |
char utf8_char[4]; |
234 |
int i = 0; |
235 |
|
236 |
ptr++; |
237 |
|
238 |
for (i = 0; i < 4; i++) { |
239 |
if (qemu_isxdigit(*ptr)) {
|
240 |
unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); |
241 |
} else {
|
242 |
parse_error(ctxt, token, |
243 |
"invalid hex escape sequence in string");
|
244 |
goto out;
|
245 |
} |
246 |
ptr++; |
247 |
} |
248 |
|
249 |
wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
|
250 |
qstring_append(str, utf8_char); |
251 |
} break;
|
252 |
default:
|
253 |
parse_error(ctxt, token, "invalid escape sequence in string");
|
254 |
goto out;
|
255 |
} |
256 |
} else {
|
257 |
char dummy[2]; |
258 |
|
259 |
dummy[0] = *ptr++;
|
260 |
dummy[1] = 0; |
261 |
|
262 |
qstring_append(str, dummy); |
263 |
} |
264 |
} |
265 |
|
266 |
return str;
|
267 |
|
268 |
out:
|
269 |
QDECREF(str); |
270 |
return NULL; |
271 |
} |
272 |
|
273 |
/**
|
274 |
* Parsing rules
|
275 |
*/
|
276 |
static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap) |
277 |
{ |
278 |
QObject *key = NULL, *token = NULL, *value, *peek; |
279 |
QList *working = qlist_copy(*tokens); |
280 |
|
281 |
peek = qlist_peek(working); |
282 |
if (peek == NULL) { |
283 |
parse_error(ctxt, NULL, "premature EOI"); |
284 |
goto out;
|
285 |
} |
286 |
|
287 |
key = parse_value(ctxt, &working, ap); |
288 |
if (!key || qobject_type(key) != QTYPE_QSTRING) {
|
289 |
parse_error(ctxt, peek, "key is not a string in object");
|
290 |
goto out;
|
291 |
} |
292 |
|
293 |
token = qlist_pop(working); |
294 |
if (token == NULL) { |
295 |
parse_error(ctxt, NULL, "premature EOI"); |
296 |
goto out;
|
297 |
} |
298 |
|
299 |
if (!token_is_operator(token, ':')) { |
300 |
parse_error(ctxt, token, "missing : in object pair");
|
301 |
goto out;
|
302 |
} |
303 |
|
304 |
value = parse_value(ctxt, &working, ap); |
305 |
if (value == NULL) { |
306 |
parse_error(ctxt, token, "Missing value in dict");
|
307 |
goto out;
|
308 |
} |
309 |
|
310 |
qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); |
311 |
|
312 |
qobject_decref(token); |
313 |
qobject_decref(key); |
314 |
QDECREF(*tokens); |
315 |
*tokens = working; |
316 |
|
317 |
return 0; |
318 |
|
319 |
out:
|
320 |
qobject_decref(token); |
321 |
qobject_decref(key); |
322 |
QDECREF(working); |
323 |
|
324 |
return -1; |
325 |
} |
326 |
|
327 |
static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
|
328 |
{ |
329 |
QDict *dict = NULL;
|
330 |
QObject *token, *peek; |
331 |
QList *working = qlist_copy(*tokens); |
332 |
|
333 |
token = qlist_pop(working); |
334 |
if (token == NULL) { |
335 |
goto out;
|
336 |
} |
337 |
|
338 |
if (!token_is_operator(token, '{')) { |
339 |
goto out;
|
340 |
} |
341 |
qobject_decref(token); |
342 |
token = NULL;
|
343 |
|
344 |
dict = qdict_new(); |
345 |
|
346 |
peek = qlist_peek(working); |
347 |
if (peek == NULL) { |
348 |
parse_error(ctxt, NULL, "premature EOI"); |
349 |
goto out;
|
350 |
} |
351 |
|
352 |
if (!token_is_operator(peek, '}')) { |
353 |
if (parse_pair(ctxt, dict, &working, ap) == -1) { |
354 |
goto out;
|
355 |
} |
356 |
|
357 |
token = qlist_pop(working); |
358 |
if (token == NULL) { |
359 |
parse_error(ctxt, NULL, "premature EOI"); |
360 |
goto out;
|
361 |
} |
362 |
|
363 |
while (!token_is_operator(token, '}')) { |
364 |
if (!token_is_operator(token, ',')) { |
365 |
parse_error(ctxt, token, "expected separator in dict");
|
366 |
goto out;
|
367 |
} |
368 |
qobject_decref(token); |
369 |
token = NULL;
|
370 |
|
371 |
if (parse_pair(ctxt, dict, &working, ap) == -1) { |
372 |
goto out;
|
373 |
} |
374 |
|
375 |
token = qlist_pop(working); |
376 |
if (token == NULL) { |
377 |
parse_error(ctxt, NULL, "premature EOI"); |
378 |
goto out;
|
379 |
} |
380 |
} |
381 |
qobject_decref(token); |
382 |
token = NULL;
|
383 |
} else {
|
384 |
token = qlist_pop(working); |
385 |
qobject_decref(token); |
386 |
token = NULL;
|
387 |
} |
388 |
|
389 |
QDECREF(*tokens); |
390 |
*tokens = working; |
391 |
|
392 |
return QOBJECT(dict);
|
393 |
|
394 |
out:
|
395 |
qobject_decref(token); |
396 |
QDECREF(working); |
397 |
QDECREF(dict); |
398 |
return NULL; |
399 |
} |
400 |
|
401 |
static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
|
402 |
{ |
403 |
QList *list = NULL;
|
404 |
QObject *token, *peek; |
405 |
QList *working = qlist_copy(*tokens); |
406 |
|
407 |
token = qlist_pop(working); |
408 |
if (token == NULL) { |
409 |
goto out;
|
410 |
} |
411 |
|
412 |
if (!token_is_operator(token, '[')) { |
413 |
goto out;
|
414 |
} |
415 |
qobject_decref(token); |
416 |
token = NULL;
|
417 |
|
418 |
list = qlist_new(); |
419 |
|
420 |
peek = qlist_peek(working); |
421 |
if (peek == NULL) { |
422 |
parse_error(ctxt, NULL, "premature EOI"); |
423 |
goto out;
|
424 |
} |
425 |
|
426 |
if (!token_is_operator(peek, ']')) { |
427 |
QObject *obj; |
428 |
|
429 |
obj = parse_value(ctxt, &working, ap); |
430 |
if (obj == NULL) { |
431 |
parse_error(ctxt, token, "expecting value");
|
432 |
goto out;
|
433 |
} |
434 |
|
435 |
qlist_append_obj(list, obj); |
436 |
|
437 |
token = qlist_pop(working); |
438 |
if (token == NULL) { |
439 |
parse_error(ctxt, NULL, "premature EOI"); |
440 |
goto out;
|
441 |
} |
442 |
|
443 |
while (!token_is_operator(token, ']')) { |
444 |
if (!token_is_operator(token, ',')) { |
445 |
parse_error(ctxt, token, "expected separator in list");
|
446 |
goto out;
|
447 |
} |
448 |
|
449 |
qobject_decref(token); |
450 |
token = NULL;
|
451 |
|
452 |
obj = parse_value(ctxt, &working, ap); |
453 |
if (obj == NULL) { |
454 |
parse_error(ctxt, token, "expecting value");
|
455 |
goto out;
|
456 |
} |
457 |
|
458 |
qlist_append_obj(list, obj); |
459 |
|
460 |
token = qlist_pop(working); |
461 |
if (token == NULL) { |
462 |
parse_error(ctxt, NULL, "premature EOI"); |
463 |
goto out;
|
464 |
} |
465 |
} |
466 |
|
467 |
qobject_decref(token); |
468 |
token = NULL;
|
469 |
} else {
|
470 |
token = qlist_pop(working); |
471 |
qobject_decref(token); |
472 |
token = NULL;
|
473 |
} |
474 |
|
475 |
QDECREF(*tokens); |
476 |
*tokens = working; |
477 |
|
478 |
return QOBJECT(list);
|
479 |
|
480 |
out:
|
481 |
qobject_decref(token); |
482 |
QDECREF(working); |
483 |
QDECREF(list); |
484 |
return NULL; |
485 |
} |
486 |
|
487 |
static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
|
488 |
{ |
489 |
QObject *token, *ret; |
490 |
QList *working = qlist_copy(*tokens); |
491 |
|
492 |
token = qlist_pop(working); |
493 |
if (token == NULL) { |
494 |
goto out;
|
495 |
} |
496 |
|
497 |
if (token_get_type(token) != JSON_KEYWORD) {
|
498 |
goto out;
|
499 |
} |
500 |
|
501 |
if (token_is_keyword(token, "true")) { |
502 |
ret = QOBJECT(qbool_from_int(true));
|
503 |
} else if (token_is_keyword(token, "false")) { |
504 |
ret = QOBJECT(qbool_from_int(false));
|
505 |
} else {
|
506 |
parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
|
507 |
goto out;
|
508 |
} |
509 |
|
510 |
qobject_decref(token); |
511 |
QDECREF(*tokens); |
512 |
*tokens = working; |
513 |
|
514 |
return ret;
|
515 |
|
516 |
out:
|
517 |
qobject_decref(token); |
518 |
QDECREF(working); |
519 |
|
520 |
return NULL; |
521 |
} |
522 |
|
523 |
static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
|
524 |
{ |
525 |
QObject *token = NULL, *obj;
|
526 |
QList *working = qlist_copy(*tokens); |
527 |
|
528 |
if (ap == NULL) { |
529 |
goto out;
|
530 |
} |
531 |
|
532 |
token = qlist_pop(working); |
533 |
if (token == NULL) { |
534 |
goto out;
|
535 |
} |
536 |
|
537 |
if (token_is_escape(token, "%p")) { |
538 |
obj = va_arg(*ap, QObject *); |
539 |
} else if (token_is_escape(token, "%i")) { |
540 |
obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
|
541 |
} else if (token_is_escape(token, "%d")) { |
542 |
obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
|
543 |
} else if (token_is_escape(token, "%ld")) { |
544 |
obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
|
545 |
} else if (token_is_escape(token, "%lld") || |
546 |
token_is_escape(token, "%I64d")) {
|
547 |
obj = QOBJECT(qint_from_int(va_arg(*ap, long long))); |
548 |
} else if (token_is_escape(token, "%s")) { |
549 |
obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *))); |
550 |
} else if (token_is_escape(token, "%f")) { |
551 |
obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
|
552 |
} else {
|
553 |
goto out;
|
554 |
} |
555 |
|
556 |
qobject_decref(token); |
557 |
QDECREF(*tokens); |
558 |
*tokens = working; |
559 |
|
560 |
return obj;
|
561 |
|
562 |
out:
|
563 |
qobject_decref(token); |
564 |
QDECREF(working); |
565 |
|
566 |
return NULL; |
567 |
} |
568 |
|
569 |
static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
|
570 |
{ |
571 |
QObject *token, *obj; |
572 |
QList *working = qlist_copy(*tokens); |
573 |
|
574 |
token = qlist_pop(working); |
575 |
if (token == NULL) { |
576 |
goto out;
|
577 |
} |
578 |
|
579 |
switch (token_get_type(token)) {
|
580 |
case JSON_STRING:
|
581 |
obj = QOBJECT(qstring_from_escaped_str(ctxt, token)); |
582 |
break;
|
583 |
case JSON_INTEGER:
|
584 |
obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10))); |
585 |
break;
|
586 |
case JSON_FLOAT:
|
587 |
/* FIXME dependent on locale */
|
588 |
obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
|
589 |
break;
|
590 |
default:
|
591 |
goto out;
|
592 |
} |
593 |
|
594 |
qobject_decref(token); |
595 |
QDECREF(*tokens); |
596 |
*tokens = working; |
597 |
|
598 |
return obj;
|
599 |
|
600 |
out:
|
601 |
qobject_decref(token); |
602 |
QDECREF(working); |
603 |
|
604 |
return NULL; |
605 |
} |
606 |
|
607 |
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
|
608 |
{ |
609 |
QObject *obj; |
610 |
|
611 |
obj = parse_object(ctxt, tokens, ap); |
612 |
if (obj == NULL) { |
613 |
obj = parse_array(ctxt, tokens, ap); |
614 |
} |
615 |
if (obj == NULL) { |
616 |
obj = parse_escape(ctxt, tokens, ap); |
617 |
} |
618 |
if (obj == NULL) { |
619 |
obj = parse_keyword(ctxt, tokens); |
620 |
} |
621 |
if (obj == NULL) { |
622 |
obj = parse_literal(ctxt, tokens); |
623 |
} |
624 |
|
625 |
return obj;
|
626 |
} |
627 |
|
628 |
QObject *json_parser_parse(QList *tokens, va_list *ap) |
629 |
{ |
630 |
return json_parser_parse_err(tokens, ap, NULL); |
631 |
} |
632 |
|
633 |
QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp) |
634 |
{ |
635 |
JSONParserContext ctxt = {}; |
636 |
QList *working; |
637 |
QObject *result; |
638 |
|
639 |
if (!tokens) {
|
640 |
return NULL; |
641 |
} |
642 |
working = qlist_copy(tokens); |
643 |
result = parse_value(&ctxt, &working, ap); |
644 |
|
645 |
QDECREF(working); |
646 |
|
647 |
error_propagate(errp, ctxt.err); |
648 |
|
649 |
return result;
|
650 |
} |