Statistics
| Branch: | Revision:

root / json-parser.c @ 11165820

History | View | Annotate | Download (12.9 kB)

1
/*
2
 * JSON Parser 
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10
 * See the COPYING.LIB file in the top-level directory.
11
 *
12
 */
13

    
14
#include <stdarg.h>
15

    
16
#include "qemu-common.h"
17
#include "qstring.h"
18
#include "qint.h"
19
#include "qdict.h"
20
#include "qlist.h"
21
#include "qfloat.h"
22
#include "qbool.h"
23
#include "json-parser.h"
24
#include "json-lexer.h"
25

    
26
typedef struct JSONParserContext
27
{
28
} JSONParserContext;
29

    
30
#define BUG_ON(cond) assert(!(cond))
31

    
32
/**
33
 * TODO
34
 *
35
 * 0) make errors meaningful again
36
 * 1) add geometry information to tokens
37
 * 3) should we return a parsed size?
38
 * 4) deal with premature EOI
39
 */
40

    
41
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
42

    
43
/**
44
 * Token manipulators
45
 *
46
 * tokens are dictionaries that contain a type, a string value, and geometry information
47
 * about a token identified by the lexer.  These are routines that make working with
48
 * these objects a bit easier.
49
 */
50
static const char *token_get_value(QObject *obj)
51
{
52
    return qdict_get_str(qobject_to_qdict(obj), "token");
53
}
54

    
55
static JSONTokenType token_get_type(QObject *obj)
56
{
57
    return qdict_get_int(qobject_to_qdict(obj), "type");
58
}
59

    
60
static int token_is_operator(QObject *obj, char op)
61
{
62
    const char *val;
63

    
64
    if (token_get_type(obj) != JSON_OPERATOR) {
65
        return 0;
66
    }
67

    
68
    val = token_get_value(obj);
69

    
70
    return (val[0] == op) && (val[1] == 0);
71
}
72

    
73
static int token_is_keyword(QObject *obj, const char *value)
74
{
75
    if (token_get_type(obj) != JSON_KEYWORD) {
76
        return 0;
77
    }
78

    
79
    return strcmp(token_get_value(obj), value) == 0;
80
}
81

    
82
static int token_is_escape(QObject *obj, const char *value)
83
{
84
    if (token_get_type(obj) != JSON_ESCAPE) {
85
        return 0;
86
    }
87

    
88
    return (strcmp(token_get_value(obj), value) == 0);
89
}
90

    
91
/**
92
 * Error handler
93
 */
94
static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...)
95
{
96
    va_list ap;
97
    va_start(ap, msg);
98
    fprintf(stderr, "parse error: ");
99
    vfprintf(stderr, msg, ap);
100
    fprintf(stderr, "\n");
101
    va_end(ap);
102
}
103

    
104
/**
105
 * String helpers
106
 *
107
 * These helpers are used to unescape strings.
108
 */
109
static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
110
{
111
    if (wchar <= 0x007F) {
112
        BUG_ON(buffer_length < 2);
113

    
114
        buffer[0] = wchar & 0x7F;
115
        buffer[1] = 0;
116
    } else if (wchar <= 0x07FF) {
117
        BUG_ON(buffer_length < 3);
118

    
119
        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
120
        buffer[1] = 0x80 | (wchar & 0x3F);
121
        buffer[2] = 0;
122
    } else {
123
        BUG_ON(buffer_length < 4);
124

    
125
        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
126
        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
127
        buffer[2] = 0x80 | (wchar & 0x3F);
128
        buffer[3] = 0;
129
    }
130
}
131

    
132
static int hex2decimal(char ch)
133
{
134
    if (ch >= '0' && ch <= '9') {
135
        return (ch - '0');
136
    } else if (ch >= 'a' && ch <= 'f') {
137
        return 10 + (ch - 'a');
138
    } else if (ch >= 'A' && ch <= 'F') {
139
        return 10 + (ch - 'A');
140
    }
141

    
142
    return -1;
143
}
144

    
145
/**
146
 * parse_string(): Parse a json string and return a QObject
147
 *
148
 *  string
149
 *      ""
150
 *      " chars "
151
 *  chars
152
 *      char
153
 *      char chars
154
 *  char
155
 *      any-Unicode-character-
156
 *          except-"-or-\-or-
157
 *          control-character
158
 *      \"
159
 *      \\
160
 *      \/
161
 *      \b
162
 *      \f
163
 *      \n
164
 *      \r
165
 *      \t
166
 *      \u four-hex-digits 
167
 */
168
static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
169
{
170
    const char *ptr = token_get_value(token);
171
    QString *str;
172
    int double_quote = 1;
173

    
174
    if (*ptr == '"') {
175
        double_quote = 1;
176
    } else {
177
        double_quote = 0;
178
    }
179
    ptr++;
180

    
181
    str = qstring_new();
182
    while (*ptr && 
183
           ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
184
        if (*ptr == '\\') {
185
            ptr++;
186

    
187
            switch (*ptr) {
188
            case '"':
189
                qstring_append(str, "\"");
190
                ptr++;
191
                break;
192
            case '\'':
193
                qstring_append(str, "'");
194
                ptr++;
195
                break;
196
            case '\\':
197
                qstring_append(str, "\\");
198
                ptr++;
199
                break;
200
            case '/':
201
                qstring_append(str, "/");
202
                ptr++;
203
                break;
204
            case 'b':
205
                qstring_append(str, "\b");
206
                ptr++;
207
                break;
208
            case 'n':
209
                qstring_append(str, "\n");
210
                ptr++;
211
                break;
212
            case 'r':
213
                qstring_append(str, "\r");
214
                ptr++;
215
                break;
216
            case 't':
217
                qstring_append(str, "\t");
218
                ptr++;
219
                break;
220
            case 'u': {
221
                uint16_t unicode_char = 0;
222
                char utf8_char[4];
223
                int i = 0;
224

    
225
                ptr++;
226

    
227
                for (i = 0; i < 4; i++) {
228
                    if (qemu_isxdigit(*ptr)) {
229
                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
230
                    } else {
231
                        parse_error(ctxt, token,
232
                                    "invalid hex escape sequence in string");
233
                        goto out;
234
                    }
235
                    ptr++;
236
                }
237

    
238
                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
239
                qstring_append(str, utf8_char);
240
            }   break;
241
            default:
242
                parse_error(ctxt, token, "invalid escape sequence in string");
243
                goto out;
244
            }
245
        } else {
246
            char dummy[2];
247

    
248
            dummy[0] = *ptr++;
249
            dummy[1] = 0;
250

    
251
            qstring_append(str, dummy);
252
        }
253
    }
254

    
255
    return str;
256

    
257
out:
258
    QDECREF(str);
259
    return NULL;
260
}
261

    
262
/**
263
 * Parsing rules
264
 */
265
static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
266
{
267
    QObject *key, *token = NULL, *value, *peek;
268
    QList *working = qlist_copy(*tokens);
269

    
270
    peek = qlist_peek(working);
271
    key = parse_value(ctxt, &working, ap);
272
    if (!key || qobject_type(key) != QTYPE_QSTRING) {
273
        parse_error(ctxt, peek, "key is not a string in object");
274
        goto out;
275
    }
276

    
277
    token = qlist_pop(working);
278
    if (!token_is_operator(token, ':')) {
279
        parse_error(ctxt, token, "missing : in object pair");
280
        goto out;
281
    }
282

    
283
    value = parse_value(ctxt, &working, ap);
284
    if (value == NULL) {
285
        parse_error(ctxt, token, "Missing value in dict");
286
        goto out;
287
    }
288

    
289
    qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
290

    
291
    qobject_decref(token);
292
    qobject_decref(key);
293
    QDECREF(*tokens);
294
    *tokens = working;
295

    
296
    return 0;
297

    
298
out:
299
    qobject_decref(token);
300
    qobject_decref(key);
301
    QDECREF(working);
302

    
303
    return -1;
304
}
305

    
306
static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
307
{
308
    QDict *dict = NULL;
309
    QObject *token, *peek;
310
    QList *working = qlist_copy(*tokens);
311

    
312
    token = qlist_pop(working);
313
    if (!token_is_operator(token, '{')) {
314
        goto out;
315
    }
316
    qobject_decref(token);
317
    token = NULL;
318

    
319
    dict = qdict_new();
320

    
321
    peek = qlist_peek(working);
322
    if (!token_is_operator(peek, '}')) {
323
        if (parse_pair(ctxt, dict, &working, ap) == -1) {
324
            goto out;
325
        }
326

    
327
        token = qlist_pop(working);
328
        while (!token_is_operator(token, '}')) {
329
            if (!token_is_operator(token, ',')) {
330
                parse_error(ctxt, token, "expected separator in dict");
331
                goto out;
332
            }
333
            qobject_decref(token);
334
            token = NULL;
335

    
336
            if (parse_pair(ctxt, dict, &working, ap) == -1) {
337
                goto out;
338
            }
339

    
340
            token = qlist_pop(working);
341
        }
342
        qobject_decref(token);
343
        token = NULL;
344
    } else {
345
        token = qlist_pop(working);
346
        qobject_decref(token);
347
        token = NULL;
348
    }
349

    
350
    QDECREF(*tokens);
351
    *tokens = working;
352

    
353
    return QOBJECT(dict);
354

    
355
out:
356
    qobject_decref(token);
357
    QDECREF(working);
358
    QDECREF(dict);
359
    return NULL;
360
}
361

    
362
static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
363
{
364
    QList *list = NULL;
365
    QObject *token, *peek;
366
    QList *working = qlist_copy(*tokens);
367

    
368
    token = qlist_pop(working);
369
    if (!token_is_operator(token, '[')) {
370
        goto out;
371
    }
372
    qobject_decref(token);
373
    token = NULL;
374

    
375
    list = qlist_new();
376

    
377
    peek = qlist_peek(working);
378
    if (!token_is_operator(peek, ']')) {
379
        QObject *obj;
380

    
381
        obj = parse_value(ctxt, &working, ap);
382
        if (obj == NULL) {
383
            parse_error(ctxt, token, "expecting value");
384
            goto out;
385
        }
386

    
387
        qlist_append_obj(list, obj);
388

    
389
        token = qlist_pop(working);
390
        while (!token_is_operator(token, ']')) {
391
            if (!token_is_operator(token, ',')) {
392
                parse_error(ctxt, token, "expected separator in list");
393
                goto out;
394
            }
395

    
396
            qobject_decref(token);
397
            token = NULL;
398

    
399
            obj = parse_value(ctxt, &working, ap);
400
            if (obj == NULL) {
401
                parse_error(ctxt, token, "expecting value");
402
                goto out;
403
            }
404

    
405
            qlist_append_obj(list, obj);
406

    
407
            token = qlist_pop(working);
408
        }
409

    
410
        qobject_decref(token);
411
        token = NULL;
412
    } else {
413
        token = qlist_pop(working);
414
        qobject_decref(token);
415
        token = NULL;
416
    }
417

    
418
    QDECREF(*tokens);
419
    *tokens = working;
420

    
421
    return QOBJECT(list);
422

    
423
out:
424
    qobject_decref(token);
425
    QDECREF(working);
426
    QDECREF(list);
427
    return NULL;
428
}
429

    
430
static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
431
{
432
    QObject *token, *ret;
433
    QList *working = qlist_copy(*tokens);
434

    
435
    token = qlist_pop(working);
436

    
437
    if (token_get_type(token) != JSON_KEYWORD) {
438
        goto out;
439
    }
440

    
441
    if (token_is_keyword(token, "true")) {
442
        ret = QOBJECT(qbool_from_int(true));
443
    } else if (token_is_keyword(token, "false")) {
444
        ret = QOBJECT(qbool_from_int(false));
445
    } else {
446
        parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
447
        goto out;
448
    }
449

    
450
    qobject_decref(token);
451
    QDECREF(*tokens);
452
    *tokens = working;
453

    
454
    return ret;
455

    
456
out: 
457
    qobject_decref(token);
458
    QDECREF(working);
459

    
460
    return NULL;
461
}
462

    
463
static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
464
{
465
    QObject *token = NULL, *obj;
466
    QList *working = qlist_copy(*tokens);
467

    
468
    if (ap == NULL) {
469
        goto out;
470
    }
471

    
472
    token = qlist_pop(working);
473

    
474
    if (token_is_escape(token, "%p")) {
475
        obj = va_arg(*ap, QObject *);
476
    } else if (token_is_escape(token, "%i")) {
477
        obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
478
    } else if (token_is_escape(token, "%d")) {
479
        obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
480
    } else if (token_is_escape(token, "%ld")) {
481
        obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
482
    } else if (token_is_escape(token, "%lld") ||
483
               token_is_escape(token, "%I64d")) {
484
        obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
485
    } else if (token_is_escape(token, "%s")) {
486
        obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
487
    } else if (token_is_escape(token, "%f")) {
488
        obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
489
    } else {
490
        goto out;
491
    }
492

    
493
    qobject_decref(token);
494
    QDECREF(*tokens);
495
    *tokens = working;
496

    
497
    return obj;
498

    
499
out:
500
    qobject_decref(token);
501
    QDECREF(working);
502

    
503
    return NULL;
504
}
505

    
506
static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
507
{
508
    QObject *token, *obj;
509
    QList *working = qlist_copy(*tokens);
510

    
511
    token = qlist_pop(working);
512
    switch (token_get_type(token)) {
513
    case JSON_STRING:
514
        obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
515
        break;
516
    case JSON_INTEGER:
517
        obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
518
        break;
519
    case JSON_FLOAT:
520
        /* FIXME dependent on locale */
521
        obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
522
        break;
523
    default:
524
        goto out;
525
    }
526

    
527
    qobject_decref(token);
528
    QDECREF(*tokens);
529
    *tokens = working;
530

    
531
    return obj;
532

    
533
out:
534
    qobject_decref(token);
535
    QDECREF(working);
536

    
537
    return NULL;
538
}
539

    
540
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
541
{
542
    QObject *obj;
543

    
544
    obj = parse_object(ctxt, tokens, ap);
545
    if (obj == NULL) {
546
        obj = parse_array(ctxt, tokens, ap);
547
    }
548
    if (obj == NULL) {
549
        obj = parse_escape(ctxt, tokens, ap);
550
    }
551
    if (obj == NULL) {
552
        obj = parse_keyword(ctxt, tokens);
553
    } 
554
    if (obj == NULL) {
555
        obj = parse_literal(ctxt, tokens);
556
    }
557

    
558
    return obj;
559
}
560

    
561
QObject *json_parser_parse(QList *tokens, va_list *ap)
562
{
563
    JSONParserContext ctxt = {};
564
    QList *working = qlist_copy(tokens);
565
    QObject *result;
566

    
567
    result = parse_value(&ctxt, &working, ap);
568

    
569
    QDECREF(working);
570

    
571
    return result;
572
}