Statistics
| Branch: | Revision:

root / json-parser.c @ c28fa5a0

History | View | Annotate | Download (16.4 kB)

1
/*
2
 * JSON Parser 
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10
 * See the COPYING.LIB file in the top-level directory.
11
 *
12
 */
13

    
14
#include <stdarg.h>
15

    
16
#include "qemu-common.h"
17
#include "qstring.h"
18
#include "qint.h"
19
#include "qdict.h"
20
#include "qlist.h"
21
#include "qfloat.h"
22
#include "qbool.h"
23
#include "json-parser.h"
24
#include "json-lexer.h"
25
#include "qerror.h"
26

    
27
typedef struct JSONParserContext
28
{
29
    Error *err;
30
    struct {
31
        QObject **buf;
32
        size_t pos;
33
        size_t count;
34
    } tokens;
35
} JSONParserContext;
36

    
37
#define BUG_ON(cond) assert(!(cond))
38

    
39
/**
40
 * TODO
41
 *
42
 * 0) make errors meaningful again
43
 * 1) add geometry information to tokens
44
 * 3) should we return a parsed size?
45
 * 4) deal with premature EOI
46
 */
47

    
48
static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
49

    
50
/**
51
 * Token manipulators
52
 *
53
 * tokens are dictionaries that contain a type, a string value, and geometry information
54
 * about a token identified by the lexer.  These are routines that make working with
55
 * these objects a bit easier.
56
 */
57
static const char *token_get_value(QObject *obj)
58
{
59
    return qdict_get_str(qobject_to_qdict(obj), "token");
60
}
61

    
62
static JSONTokenType token_get_type(QObject *obj)
63
{
64
    return qdict_get_int(qobject_to_qdict(obj), "type");
65
}
66

    
67
static int token_is_operator(QObject *obj, char op)
68
{
69
    const char *val;
70

    
71
    if (token_get_type(obj) != JSON_OPERATOR) {
72
        return 0;
73
    }
74

    
75
    val = token_get_value(obj);
76

    
77
    return (val[0] == op) && (val[1] == 0);
78
}
79

    
80
static int token_is_keyword(QObject *obj, const char *value)
81
{
82
    if (token_get_type(obj) != JSON_KEYWORD) {
83
        return 0;
84
    }
85

    
86
    return strcmp(token_get_value(obj), value) == 0;
87
}
88

    
89
static int token_is_escape(QObject *obj, const char *value)
90
{
91
    if (token_get_type(obj) != JSON_ESCAPE) {
92
        return 0;
93
    }
94

    
95
    return (strcmp(token_get_value(obj), value) == 0);
96
}
97

    
98
/**
99
 * Error handler
100
 */
101
static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
102
                                           QObject *token, const char *msg, ...)
103
{
104
    va_list ap;
105
    char message[1024];
106
    va_start(ap, msg);
107
    vsnprintf(message, sizeof(message), msg, ap);
108
    va_end(ap);
109
    if (ctxt->err) {
110
        error_free(ctxt->err);
111
        ctxt->err = NULL;
112
    }
113
    error_set(&ctxt->err, QERR_JSON_PARSE_ERROR, message);
114
}
115

    
116
/**
117
 * String helpers
118
 *
119
 * These helpers are used to unescape strings.
120
 */
121
static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
122
{
123
    if (wchar <= 0x007F) {
124
        BUG_ON(buffer_length < 2);
125

    
126
        buffer[0] = wchar & 0x7F;
127
        buffer[1] = 0;
128
    } else if (wchar <= 0x07FF) {
129
        BUG_ON(buffer_length < 3);
130

    
131
        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
132
        buffer[1] = 0x80 | (wchar & 0x3F);
133
        buffer[2] = 0;
134
    } else {
135
        BUG_ON(buffer_length < 4);
136

    
137
        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
138
        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
139
        buffer[2] = 0x80 | (wchar & 0x3F);
140
        buffer[3] = 0;
141
    }
142
}
143

    
144
static int hex2decimal(char ch)
145
{
146
    if (ch >= '0' && ch <= '9') {
147
        return (ch - '0');
148
    } else if (ch >= 'a' && ch <= 'f') {
149
        return 10 + (ch - 'a');
150
    } else if (ch >= 'A' && ch <= 'F') {
151
        return 10 + (ch - 'A');
152
    }
153

    
154
    return -1;
155
}
156

    
157
/**
158
 * parse_string(): Parse a json string and return a QObject
159
 *
160
 *  string
161
 *      ""
162
 *      " chars "
163
 *  chars
164
 *      char
165
 *      char chars
166
 *  char
167
 *      any-Unicode-character-
168
 *          except-"-or-\-or-
169
 *          control-character
170
 *      \"
171
 *      \\
172
 *      \/
173
 *      \b
174
 *      \f
175
 *      \n
176
 *      \r
177
 *      \t
178
 *      \u four-hex-digits 
179
 */
180
static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
181
{
182
    const char *ptr = token_get_value(token);
183
    QString *str;
184
    int double_quote = 1;
185

    
186
    if (*ptr == '"') {
187
        double_quote = 1;
188
    } else {
189
        double_quote = 0;
190
    }
191
    ptr++;
192

    
193
    str = qstring_new();
194
    while (*ptr && 
195
           ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
196
        if (*ptr == '\\') {
197
            ptr++;
198

    
199
            switch (*ptr) {
200
            case '"':
201
                qstring_append(str, "\"");
202
                ptr++;
203
                break;
204
            case '\'':
205
                qstring_append(str, "'");
206
                ptr++;
207
                break;
208
            case '\\':
209
                qstring_append(str, "\\");
210
                ptr++;
211
                break;
212
            case '/':
213
                qstring_append(str, "/");
214
                ptr++;
215
                break;
216
            case 'b':
217
                qstring_append(str, "\b");
218
                ptr++;
219
                break;
220
            case 'f':
221
                qstring_append(str, "\f");
222
                ptr++;
223
                break;
224
            case 'n':
225
                qstring_append(str, "\n");
226
                ptr++;
227
                break;
228
            case 'r':
229
                qstring_append(str, "\r");
230
                ptr++;
231
                break;
232
            case 't':
233
                qstring_append(str, "\t");
234
                ptr++;
235
                break;
236
            case 'u': {
237
                uint16_t unicode_char = 0;
238
                char utf8_char[4];
239
                int i = 0;
240

    
241
                ptr++;
242

    
243
                for (i = 0; i < 4; i++) {
244
                    if (qemu_isxdigit(*ptr)) {
245
                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
246
                    } else {
247
                        parse_error(ctxt, token,
248
                                    "invalid hex escape sequence in string");
249
                        goto out;
250
                    }
251
                    ptr++;
252
                }
253

    
254
                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
255
                qstring_append(str, utf8_char);
256
            }   break;
257
            default:
258
                parse_error(ctxt, token, "invalid escape sequence in string");
259
                goto out;
260
            }
261
        } else {
262
            char dummy[2];
263

    
264
            dummy[0] = *ptr++;
265
            dummy[1] = 0;
266

    
267
            qstring_append(str, dummy);
268
        }
269
    }
270

    
271
    return str;
272

    
273
out:
274
    QDECREF(str);
275
    return NULL;
276
}
277

    
278
static QObject *parser_context_pop_token(JSONParserContext *ctxt)
279
{
280
    QObject *token;
281
    g_assert(ctxt->tokens.pos < ctxt->tokens.count);
282
    token = ctxt->tokens.buf[ctxt->tokens.pos];
283
    ctxt->tokens.pos++;
284
    return token;
285
}
286

    
287
/* Note: parser_context_{peek|pop}_token do not increment the
288
 * token object's refcount. In both cases the references will continue
289
 * to be tracked and cleaned up in parser_context_free(), so do not
290
 * attempt to free the token object.
291
 */
292
static QObject *parser_context_peek_token(JSONParserContext *ctxt)
293
{
294
    QObject *token;
295
    g_assert(ctxt->tokens.pos < ctxt->tokens.count);
296
    token = ctxt->tokens.buf[ctxt->tokens.pos];
297
    return token;
298
}
299

    
300
static JSONParserContext parser_context_save(JSONParserContext *ctxt)
301
{
302
    JSONParserContext saved_ctxt = {0};
303
    saved_ctxt.tokens.pos = ctxt->tokens.pos;
304
    saved_ctxt.tokens.count = ctxt->tokens.count;
305
    saved_ctxt.tokens.buf = ctxt->tokens.buf;
306
    return saved_ctxt;
307
}
308

    
309
static void parser_context_restore(JSONParserContext *ctxt,
310
                                   JSONParserContext saved_ctxt)
311
{
312
    ctxt->tokens.pos = saved_ctxt.tokens.pos;
313
    ctxt->tokens.count = saved_ctxt.tokens.count;
314
    ctxt->tokens.buf = saved_ctxt.tokens.buf;
315
}
316

    
317
static void tokens_append_from_iter(QObject *obj, void *opaque)
318
{
319
    JSONParserContext *ctxt = opaque;
320
    g_assert(ctxt->tokens.pos < ctxt->tokens.count);
321
    ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
322
    qobject_incref(obj);
323
}
324

    
325
static JSONParserContext *parser_context_new(QList *tokens)
326
{
327
    JSONParserContext *ctxt;
328
    size_t count;
329

    
330
    if (!tokens) {
331
        return NULL;
332
    }
333

    
334
    count = qlist_size(tokens);
335
    if (count == 0) {
336
        return NULL;
337
    }
338

    
339
    ctxt = g_malloc0(sizeof(JSONParserContext));
340
    ctxt->tokens.pos = 0;
341
    ctxt->tokens.count = count;
342
    ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
343
    qlist_iter(tokens, tokens_append_from_iter, ctxt);
344
    ctxt->tokens.pos = 0;
345

    
346
    return ctxt;
347
}
348

    
349
/* to support error propagation, ctxt->err must be freed separately */
350
static void parser_context_free(JSONParserContext *ctxt)
351
{
352
    int i;
353
    if (ctxt) {
354
        for (i = 0; i < ctxt->tokens.count; i++) {
355
            qobject_decref(ctxt->tokens.buf[i]);
356
        }
357
        g_free(ctxt->tokens.buf);
358
        g_free(ctxt);
359
    }
360
}
361

    
362
/**
363
 * Parsing rules
364
 */
365
static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
366
{
367
    QObject *key = NULL, *token = NULL, *value, *peek;
368
    JSONParserContext saved_ctxt = parser_context_save(ctxt);
369

    
370
    peek = parser_context_peek_token(ctxt);
371
    if (peek == NULL) {
372
        parse_error(ctxt, NULL, "premature EOI");
373
        goto out;
374
    }
375

    
376
    key = parse_value(ctxt, ap);
377
    if (!key || qobject_type(key) != QTYPE_QSTRING) {
378
        parse_error(ctxt, peek, "key is not a string in object");
379
        goto out;
380
    }
381

    
382
    token = parser_context_pop_token(ctxt);
383
    if (token == NULL) {
384
        parse_error(ctxt, NULL, "premature EOI");
385
        goto out;
386
    }
387

    
388
    if (!token_is_operator(token, ':')) {
389
        parse_error(ctxt, token, "missing : in object pair");
390
        goto out;
391
    }
392

    
393
    value = parse_value(ctxt, ap);
394
    if (value == NULL) {
395
        parse_error(ctxt, token, "Missing value in dict");
396
        goto out;
397
    }
398

    
399
    qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
400

    
401
    qobject_decref(key);
402

    
403
    return 0;
404

    
405
out:
406
    parser_context_restore(ctxt, saved_ctxt);
407
    qobject_decref(key);
408

    
409
    return -1;
410
}
411

    
412
static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
413
{
414
    QDict *dict = NULL;
415
    QObject *token, *peek;
416
    JSONParserContext saved_ctxt = parser_context_save(ctxt);
417

    
418
    token = parser_context_pop_token(ctxt);
419
    if (token == NULL) {
420
        goto out;
421
    }
422

    
423
    if (!token_is_operator(token, '{')) {
424
        goto out;
425
    }
426
    token = NULL;
427

    
428
    dict = qdict_new();
429

    
430
    peek = parser_context_peek_token(ctxt);
431
    if (peek == NULL) {
432
        parse_error(ctxt, NULL, "premature EOI");
433
        goto out;
434
    }
435

    
436
    if (!token_is_operator(peek, '}')) {
437
        if (parse_pair(ctxt, dict, ap) == -1) {
438
            goto out;
439
        }
440

    
441
        token = parser_context_pop_token(ctxt);
442
        if (token == NULL) {
443
            parse_error(ctxt, NULL, "premature EOI");
444
            goto out;
445
        }
446

    
447
        while (!token_is_operator(token, '}')) {
448
            if (!token_is_operator(token, ',')) {
449
                parse_error(ctxt, token, "expected separator in dict");
450
                goto out;
451
            }
452
            token = NULL;
453

    
454
            if (parse_pair(ctxt, dict, ap) == -1) {
455
                goto out;
456
            }
457

    
458
            token = parser_context_pop_token(ctxt);
459
            if (token == NULL) {
460
                parse_error(ctxt, NULL, "premature EOI");
461
                goto out;
462
            }
463
        }
464
        token = NULL;
465
    } else {
466
        token = parser_context_pop_token(ctxt);
467
        token = NULL;
468
    }
469

    
470
    return QOBJECT(dict);
471

    
472
out:
473
    parser_context_restore(ctxt, saved_ctxt);
474
    QDECREF(dict);
475
    return NULL;
476
}
477

    
478
static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
479
{
480
    QList *list = NULL;
481
    QObject *token, *peek;
482
    JSONParserContext saved_ctxt = parser_context_save(ctxt);
483

    
484
    token = parser_context_pop_token(ctxt);
485
    if (token == NULL) {
486
        goto out;
487
    }
488

    
489
    if (!token_is_operator(token, '[')) {
490
        token = NULL;
491
        goto out;
492
    }
493
    token = NULL;
494

    
495
    list = qlist_new();
496

    
497
    peek = parser_context_peek_token(ctxt);
498
    if (peek == NULL) {
499
        parse_error(ctxt, NULL, "premature EOI");
500
        goto out;
501
    }
502

    
503
    if (!token_is_operator(peek, ']')) {
504
        QObject *obj;
505

    
506
        obj = parse_value(ctxt, ap);
507
        if (obj == NULL) {
508
            parse_error(ctxt, token, "expecting value");
509
            goto out;
510
        }
511

    
512
        qlist_append_obj(list, obj);
513

    
514
        token = parser_context_pop_token(ctxt);
515
        if (token == NULL) {
516
            parse_error(ctxt, NULL, "premature EOI");
517
            goto out;
518
        }
519

    
520
        while (!token_is_operator(token, ']')) {
521
            if (!token_is_operator(token, ',')) {
522
                parse_error(ctxt, token, "expected separator in list");
523
                goto out;
524
            }
525

    
526
            token = NULL;
527

    
528
            obj = parse_value(ctxt, ap);
529
            if (obj == NULL) {
530
                parse_error(ctxt, token, "expecting value");
531
                goto out;
532
            }
533

    
534
            qlist_append_obj(list, obj);
535

    
536
            token = parser_context_pop_token(ctxt);
537
            if (token == NULL) {
538
                parse_error(ctxt, NULL, "premature EOI");
539
                goto out;
540
            }
541
        }
542

    
543
        token = NULL;
544
    } else {
545
        token = parser_context_pop_token(ctxt);
546
        token = NULL;
547
    }
548

    
549
    return QOBJECT(list);
550

    
551
out:
552
    parser_context_restore(ctxt, saved_ctxt);
553
    QDECREF(list);
554
    return NULL;
555
}
556

    
557
static QObject *parse_keyword(JSONParserContext *ctxt)
558
{
559
    QObject *token, *ret;
560
    JSONParserContext saved_ctxt = parser_context_save(ctxt);
561

    
562
    token = parser_context_pop_token(ctxt);
563
    if (token == NULL) {
564
        goto out;
565
    }
566

    
567
    if (token_get_type(token) != JSON_KEYWORD) {
568
        goto out;
569
    }
570

    
571
    if (token_is_keyword(token, "true")) {
572
        ret = QOBJECT(qbool_from_int(true));
573
    } else if (token_is_keyword(token, "false")) {
574
        ret = QOBJECT(qbool_from_int(false));
575
    } else {
576
        parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
577
        goto out;
578
    }
579

    
580
    return ret;
581

    
582
out: 
583
    parser_context_restore(ctxt, saved_ctxt);
584

    
585
    return NULL;
586
}
587

    
588
static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
589
{
590
    QObject *token = NULL, *obj;
591
    JSONParserContext saved_ctxt = parser_context_save(ctxt);
592

    
593
    if (ap == NULL) {
594
        goto out;
595
    }
596

    
597
    token = parser_context_pop_token(ctxt);
598
    if (token == NULL) {
599
        goto out;
600
    }
601

    
602
    if (token_is_escape(token, "%p")) {
603
        obj = va_arg(*ap, QObject *);
604
    } else if (token_is_escape(token, "%i")) {
605
        obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
606
    } else if (token_is_escape(token, "%d")) {
607
        obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
608
    } else if (token_is_escape(token, "%ld")) {
609
        obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
610
    } else if (token_is_escape(token, "%lld") ||
611
               token_is_escape(token, "%I64d")) {
612
        obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
613
    } else if (token_is_escape(token, "%s")) {
614
        obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
615
    } else if (token_is_escape(token, "%f")) {
616
        obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
617
    } else {
618
        goto out;
619
    }
620

    
621
    return obj;
622

    
623
out:
624
    parser_context_restore(ctxt, saved_ctxt);
625

    
626
    return NULL;
627
}
628

    
629
static QObject *parse_literal(JSONParserContext *ctxt)
630
{
631
    QObject *token, *obj;
632
    JSONParserContext saved_ctxt = parser_context_save(ctxt);
633

    
634
    token = parser_context_pop_token(ctxt);
635
    if (token == NULL) {
636
        goto out;
637
    }
638

    
639
    switch (token_get_type(token)) {
640
    case JSON_STRING:
641
        obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
642
        break;
643
    case JSON_INTEGER:
644
        obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
645
        break;
646
    case JSON_FLOAT:
647
        /* FIXME dependent on locale */
648
        obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
649
        break;
650
    default:
651
        goto out;
652
    }
653

    
654
    return obj;
655

    
656
out:
657
    parser_context_restore(ctxt, saved_ctxt);
658

    
659
    return NULL;
660
}
661

    
662
static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
663
{
664
    QObject *obj;
665

    
666
    obj = parse_object(ctxt, ap);
667
    if (obj == NULL) {
668
        obj = parse_array(ctxt, ap);
669
    }
670
    if (obj == NULL) {
671
        obj = parse_escape(ctxt, ap);
672
    }
673
    if (obj == NULL) {
674
        obj = parse_keyword(ctxt);
675
    } 
676
    if (obj == NULL) {
677
        obj = parse_literal(ctxt);
678
    }
679

    
680
    return obj;
681
}
682

    
683
QObject *json_parser_parse(QList *tokens, va_list *ap)
684
{
685
    return json_parser_parse_err(tokens, ap, NULL);
686
}
687

    
688
QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
689
{
690
    JSONParserContext *ctxt = parser_context_new(tokens);
691
    QObject *result;
692

    
693
    if (!ctxt) {
694
        return NULL;
695
    }
696

    
697
    result = parse_value(ctxt, ap);
698

    
699
    error_propagate(errp, ctxt->err);
700

    
701
    parser_context_free(ctxt);
702

    
703
    return result;
704
}