Statistics
| Branch: | Revision:

root / json-parser.c @ 8b7968f7

History | View | Annotate | Download (13.1 kB)

1
/*
2
 * JSON Parser 
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10
 * See the COPYING.LIB file in the top-level directory.
11
 *
12
 */
13

    
14
#include <stdarg.h>
15

    
16
#include "qemu-common.h"
17
#include "qstring.h"
18
#include "qint.h"
19
#include "qdict.h"
20
#include "qlist.h"
21
#include "qfloat.h"
22
#include "qbool.h"
23
#include "json-parser.h"
24
#include "json-lexer.h"
25

    
26
typedef struct JSONParserContext
27
{
28
} JSONParserContext;
29

    
30
#define BUG_ON(cond) assert(!(cond))
31

    
32
/**
33
 * TODO
34
 *
35
 * 0) make errors meaningful again
36
 * 1) add geometry information to tokens
37
 * 3) should we return a parsed size?
38
 * 4) deal with premature EOI
39
 */
40

    
41
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
42

    
43
/**
44
 * Token manipulators
45
 *
46
 * tokens are dictionaries that contain a type, a string value, and geometry information
47
 * about a token identified by the lexer.  These are routines that make working with
48
 * these objects a bit easier.
49
 */
50
static const char *token_get_value(QObject *obj)
51
{
52
    return qdict_get_str(qobject_to_qdict(obj), "token");
53
}
54

    
55
static JSONTokenType token_get_type(QObject *obj)
56
{
57
    return qdict_get_int(qobject_to_qdict(obj), "type");
58
}
59

    
60
static int token_is_operator(QObject *obj, char op)
61
{
62
    const char *val;
63

    
64
    if (token_get_type(obj) != JSON_OPERATOR) {
65
        return 0;
66
    }
67

    
68
    val = token_get_value(obj);
69

    
70
    return (val[0] == op) && (val[1] == 0);
71
}
72

    
73
static int token_is_keyword(QObject *obj, const char *value)
74
{
75
    if (token_get_type(obj) != JSON_KEYWORD) {
76
        return 0;
77
    }
78

    
79
    return strcmp(token_get_value(obj), value) == 0;
80
}
81

    
82
static int token_is_escape(QObject *obj, const char *value)
83
{
84
    if (token_get_type(obj) != JSON_ESCAPE) {
85
        return 0;
86
    }
87

    
88
    return (strcmp(token_get_value(obj), value) == 0);
89
}
90

    
91
/**
92
 * Error handler
93
 */
94
static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
95
                                           QObject *token, const char *msg, ...)
96
{
97
    va_list ap;
98
    va_start(ap, msg);
99
    fprintf(stderr, "parse error: ");
100
    vfprintf(stderr, msg, ap);
101
    fprintf(stderr, "\n");
102
    va_end(ap);
103
}
104

    
105
/**
106
 * String helpers
107
 *
108
 * These helpers are used to unescape strings.
109
 */
110
static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
111
{
112
    if (wchar <= 0x007F) {
113
        BUG_ON(buffer_length < 2);
114

    
115
        buffer[0] = wchar & 0x7F;
116
        buffer[1] = 0;
117
    } else if (wchar <= 0x07FF) {
118
        BUG_ON(buffer_length < 3);
119

    
120
        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
121
        buffer[1] = 0x80 | (wchar & 0x3F);
122
        buffer[2] = 0;
123
    } else {
124
        BUG_ON(buffer_length < 4);
125

    
126
        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
127
        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
128
        buffer[2] = 0x80 | (wchar & 0x3F);
129
        buffer[3] = 0;
130
    }
131
}
132

    
133
static int hex2decimal(char ch)
134
{
135
    if (ch >= '0' && ch <= '9') {
136
        return (ch - '0');
137
    } else if (ch >= 'a' && ch <= 'f') {
138
        return 10 + (ch - 'a');
139
    } else if (ch >= 'A' && ch <= 'F') {
140
        return 10 + (ch - 'A');
141
    }
142

    
143
    return -1;
144
}
145

    
146
/**
147
 * parse_string(): Parse a json string and return a QObject
148
 *
149
 *  string
150
 *      ""
151
 *      " chars "
152
 *  chars
153
 *      char
154
 *      char chars
155
 *  char
156
 *      any-Unicode-character-
157
 *          except-"-or-\-or-
158
 *          control-character
159
 *      \"
160
 *      \\
161
 *      \/
162
 *      \b
163
 *      \f
164
 *      \n
165
 *      \r
166
 *      \t
167
 *      \u four-hex-digits 
168
 */
169
static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
170
{
171
    const char *ptr = token_get_value(token);
172
    QString *str;
173
    int double_quote = 1;
174

    
175
    if (*ptr == '"') {
176
        double_quote = 1;
177
    } else {
178
        double_quote = 0;
179
    }
180
    ptr++;
181

    
182
    str = qstring_new();
183
    while (*ptr && 
184
           ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
185
        if (*ptr == '\\') {
186
            ptr++;
187

    
188
            switch (*ptr) {
189
            case '"':
190
                qstring_append(str, "\"");
191
                ptr++;
192
                break;
193
            case '\'':
194
                qstring_append(str, "'");
195
                ptr++;
196
                break;
197
            case '\\':
198
                qstring_append(str, "\\");
199
                ptr++;
200
                break;
201
            case '/':
202
                qstring_append(str, "/");
203
                ptr++;
204
                break;
205
            case 'b':
206
                qstring_append(str, "\b");
207
                ptr++;
208
                break;
209
            case 'f':
210
                qstring_append(str, "\f");
211
                ptr++;
212
                break;
213
            case 'n':
214
                qstring_append(str, "\n");
215
                ptr++;
216
                break;
217
            case 'r':
218
                qstring_append(str, "\r");
219
                ptr++;
220
                break;
221
            case 't':
222
                qstring_append(str, "\t");
223
                ptr++;
224
                break;
225
            case 'u': {
226
                uint16_t unicode_char = 0;
227
                char utf8_char[4];
228
                int i = 0;
229

    
230
                ptr++;
231

    
232
                for (i = 0; i < 4; i++) {
233
                    if (qemu_isxdigit(*ptr)) {
234
                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
235
                    } else {
236
                        parse_error(ctxt, token,
237
                                    "invalid hex escape sequence in string");
238
                        goto out;
239
                    }
240
                    ptr++;
241
                }
242

    
243
                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
244
                qstring_append(str, utf8_char);
245
            }   break;
246
            default:
247
                parse_error(ctxt, token, "invalid escape sequence in string");
248
                goto out;
249
            }
250
        } else {
251
            char dummy[2];
252

    
253
            dummy[0] = *ptr++;
254
            dummy[1] = 0;
255

    
256
            qstring_append(str, dummy);
257
        }
258
    }
259

    
260
    return str;
261

    
262
out:
263
    QDECREF(str);
264
    return NULL;
265
}
266

    
267
/**
268
 * Parsing rules
269
 */
270
static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
271
{
272
    QObject *key, *token = NULL, *value, *peek;
273
    QList *working = qlist_copy(*tokens);
274

    
275
    peek = qlist_peek(working);
276
    key = parse_value(ctxt, &working, ap);
277
    if (!key || qobject_type(key) != QTYPE_QSTRING) {
278
        parse_error(ctxt, peek, "key is not a string in object");
279
        goto out;
280
    }
281

    
282
    token = qlist_pop(working);
283
    if (!token_is_operator(token, ':')) {
284
        parse_error(ctxt, token, "missing : in object pair");
285
        goto out;
286
    }
287

    
288
    value = parse_value(ctxt, &working, ap);
289
    if (value == NULL) {
290
        parse_error(ctxt, token, "Missing value in dict");
291
        goto out;
292
    }
293

    
294
    qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
295

    
296
    qobject_decref(token);
297
    qobject_decref(key);
298
    QDECREF(*tokens);
299
    *tokens = working;
300

    
301
    return 0;
302

    
303
out:
304
    qobject_decref(token);
305
    qobject_decref(key);
306
    QDECREF(working);
307

    
308
    return -1;
309
}
310

    
311
static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
312
{
313
    QDict *dict = NULL;
314
    QObject *token, *peek;
315
    QList *working = qlist_copy(*tokens);
316

    
317
    token = qlist_pop(working);
318
    if (!token_is_operator(token, '{')) {
319
        goto out;
320
    }
321
    qobject_decref(token);
322
    token = NULL;
323

    
324
    dict = qdict_new();
325

    
326
    peek = qlist_peek(working);
327
    if (!token_is_operator(peek, '}')) {
328
        if (parse_pair(ctxt, dict, &working, ap) == -1) {
329
            goto out;
330
        }
331

    
332
        token = qlist_pop(working);
333
        while (!token_is_operator(token, '}')) {
334
            if (!token_is_operator(token, ',')) {
335
                parse_error(ctxt, token, "expected separator in dict");
336
                goto out;
337
            }
338
            qobject_decref(token);
339
            token = NULL;
340

    
341
            if (parse_pair(ctxt, dict, &working, ap) == -1) {
342
                goto out;
343
            }
344

    
345
            token = qlist_pop(working);
346
        }
347
        qobject_decref(token);
348
        token = NULL;
349
    } else {
350
        token = qlist_pop(working);
351
        qobject_decref(token);
352
        token = NULL;
353
    }
354

    
355
    QDECREF(*tokens);
356
    *tokens = working;
357

    
358
    return QOBJECT(dict);
359

    
360
out:
361
    qobject_decref(token);
362
    QDECREF(working);
363
    QDECREF(dict);
364
    return NULL;
365
}
366

    
367
static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
368
{
369
    QList *list = NULL;
370
    QObject *token, *peek;
371
    QList *working = qlist_copy(*tokens);
372

    
373
    token = qlist_pop(working);
374
    if (!token_is_operator(token, '[')) {
375
        goto out;
376
    }
377
    qobject_decref(token);
378
    token = NULL;
379

    
380
    list = qlist_new();
381

    
382
    peek = qlist_peek(working);
383
    if (!token_is_operator(peek, ']')) {
384
        QObject *obj;
385

    
386
        obj = parse_value(ctxt, &working, ap);
387
        if (obj == NULL) {
388
            parse_error(ctxt, token, "expecting value");
389
            goto out;
390
        }
391

    
392
        qlist_append_obj(list, obj);
393

    
394
        token = qlist_pop(working);
395
        while (!token_is_operator(token, ']')) {
396
            if (!token_is_operator(token, ',')) {
397
                parse_error(ctxt, token, "expected separator in list");
398
                goto out;
399
            }
400

    
401
            qobject_decref(token);
402
            token = NULL;
403

    
404
            obj = parse_value(ctxt, &working, ap);
405
            if (obj == NULL) {
406
                parse_error(ctxt, token, "expecting value");
407
                goto out;
408
            }
409

    
410
            qlist_append_obj(list, obj);
411

    
412
            token = qlist_pop(working);
413
        }
414

    
415
        qobject_decref(token);
416
        token = NULL;
417
    } else {
418
        token = qlist_pop(working);
419
        qobject_decref(token);
420
        token = NULL;
421
    }
422

    
423
    QDECREF(*tokens);
424
    *tokens = working;
425

    
426
    return QOBJECT(list);
427

    
428
out:
429
    qobject_decref(token);
430
    QDECREF(working);
431
    QDECREF(list);
432
    return NULL;
433
}
434

    
435
static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
436
{
437
    QObject *token, *ret;
438
    QList *working = qlist_copy(*tokens);
439

    
440
    token = qlist_pop(working);
441

    
442
    if (token_get_type(token) != JSON_KEYWORD) {
443
        goto out;
444
    }
445

    
446
    if (token_is_keyword(token, "true")) {
447
        ret = QOBJECT(qbool_from_int(true));
448
    } else if (token_is_keyword(token, "false")) {
449
        ret = QOBJECT(qbool_from_int(false));
450
    } else {
451
        parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
452
        goto out;
453
    }
454

    
455
    qobject_decref(token);
456
    QDECREF(*tokens);
457
    *tokens = working;
458

    
459
    return ret;
460

    
461
out: 
462
    qobject_decref(token);
463
    QDECREF(working);
464

    
465
    return NULL;
466
}
467

    
468
static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
469
{
470
    QObject *token = NULL, *obj;
471
    QList *working = qlist_copy(*tokens);
472

    
473
    if (ap == NULL) {
474
        goto out;
475
    }
476

    
477
    token = qlist_pop(working);
478

    
479
    if (token_is_escape(token, "%p")) {
480
        obj = va_arg(*ap, QObject *);
481
    } else if (token_is_escape(token, "%i")) {
482
        obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
483
    } else if (token_is_escape(token, "%d")) {
484
        obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
485
    } else if (token_is_escape(token, "%ld")) {
486
        obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
487
    } else if (token_is_escape(token, "%lld") ||
488
               token_is_escape(token, "%I64d")) {
489
        obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
490
    } else if (token_is_escape(token, "%s")) {
491
        obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
492
    } else if (token_is_escape(token, "%f")) {
493
        obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
494
    } else {
495
        goto out;
496
    }
497

    
498
    qobject_decref(token);
499
    QDECREF(*tokens);
500
    *tokens = working;
501

    
502
    return obj;
503

    
504
out:
505
    qobject_decref(token);
506
    QDECREF(working);
507

    
508
    return NULL;
509
}
510

    
511
static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
512
{
513
    QObject *token, *obj;
514
    QList *working = qlist_copy(*tokens);
515

    
516
    token = qlist_pop(working);
517
    switch (token_get_type(token)) {
518
    case JSON_STRING:
519
        obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
520
        break;
521
    case JSON_INTEGER:
522
        obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
523
        break;
524
    case JSON_FLOAT:
525
        /* FIXME dependent on locale */
526
        obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
527
        break;
528
    default:
529
        goto out;
530
    }
531

    
532
    qobject_decref(token);
533
    QDECREF(*tokens);
534
    *tokens = working;
535

    
536
    return obj;
537

    
538
out:
539
    qobject_decref(token);
540
    QDECREF(working);
541

    
542
    return NULL;
543
}
544

    
545
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
546
{
547
    QObject *obj;
548

    
549
    obj = parse_object(ctxt, tokens, ap);
550
    if (obj == NULL) {
551
        obj = parse_array(ctxt, tokens, ap);
552
    }
553
    if (obj == NULL) {
554
        obj = parse_escape(ctxt, tokens, ap);
555
    }
556
    if (obj == NULL) {
557
        obj = parse_keyword(ctxt, tokens);
558
    } 
559
    if (obj == NULL) {
560
        obj = parse_literal(ctxt, tokens);
561
    }
562

    
563
    return obj;
564
}
565

    
566
QObject *json_parser_parse(QList *tokens, va_list *ap)
567
{
568
    JSONParserContext ctxt = {};
569
    QList *working = qlist_copy(tokens);
570
    QObject *result;
571

    
572
    result = parse_value(&ctxt, &working, ap);
573

    
574
    QDECREF(working);
575

    
576
    return result;
577
}