Statistics
| Branch: | Revision:

root / json-parser.c @ c1568af5

History | View | Annotate | Download (13.1 kB)

1
/*
2
 * JSON Parser 
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10
 * See the COPYING.LIB file in the top-level directory.
11
 *
12
 */
13

    
14
#include <stdarg.h>
15

    
16
#include "qemu-common.h"
17
#include "qstring.h"
18
#include "qint.h"
19
#include "qdict.h"
20
#include "qlist.h"
21
#include "qfloat.h"
22
#include "qbool.h"
23
#include "json-parser.h"
24
#include "json-lexer.h"
25

    
26
typedef struct JSONParserContext
27
{
28
} JSONParserContext;
29

    
30
#define BUG_ON(cond) assert(!(cond))
31

    
32
/**
33
 * TODO
34
 *
35
 * 0) make errors meaningful again
36
 * 1) add geometry information to tokens
37
 * 3) should we return a parsed size?
38
 * 4) deal with premature EOI
39
 */
40

    
41
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
42

    
43
/**
44
 * Token manipulators
45
 *
46
 * tokens are dictionaries that contain a type, a string value, and geometry information
47
 * about a token identified by the lexer.  These are routines that make working with
48
 * these objects a bit easier.
49
 */
50
static const char *token_get_value(QObject *obj)
51
{
52
    return qdict_get_str(qobject_to_qdict(obj), "token");
53
}
54

    
55
static JSONTokenType token_get_type(QObject *obj)
56
{
57
    return qdict_get_int(qobject_to_qdict(obj), "type");
58
}
59

    
60
static int token_is_operator(QObject *obj, char op)
61
{
62
    const char *val;
63

    
64
    if (token_get_type(obj) != JSON_OPERATOR) {
65
        return 0;
66
    }
67

    
68
    val = token_get_value(obj);
69

    
70
    return (val[0] == op) && (val[1] == 0);
71
}
72

    
73
static int token_is_keyword(QObject *obj, const char *value)
74
{
75
    if (token_get_type(obj) != JSON_KEYWORD) {
76
        return 0;
77
    }
78

    
79
    return strcmp(token_get_value(obj), value) == 0;
80
}
81

    
82
static int token_is_escape(QObject *obj, const char *value)
83
{
84
    if (token_get_type(obj) != JSON_ESCAPE) {
85
        return 0;
86
    }
87

    
88
    return (strcmp(token_get_value(obj), value) == 0);
89
}
90

    
91
/**
92
 * Error handler
93
 */
94
static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...)
95
{
96
    va_list ap;
97
    va_start(ap, msg);
98
    fprintf(stderr, "parse error: ");
99
    vfprintf(stderr, msg, ap);
100
    fprintf(stderr, "\n");
101
    va_end(ap);
102
}
103

    
104
/**
105
 * String helpers
106
 *
107
 * These helpers are used to unescape strings.
108
 */
109
static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
110
{
111
    if (wchar <= 0x007F) {
112
        BUG_ON(buffer_length < 2);
113

    
114
        buffer[0] = wchar & 0x7F;
115
        buffer[1] = 0;
116
    } else if (wchar <= 0x07FF) {
117
        BUG_ON(buffer_length < 3);
118

    
119
        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
120
        buffer[1] = 0x80 | (wchar & 0x3F);
121
        buffer[2] = 0;
122
    } else {
123
        BUG_ON(buffer_length < 4);
124

    
125
        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
126
        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
127
        buffer[2] = 0x80 | (wchar & 0x3F);
128
        buffer[3] = 0;
129
    }
130
}
131

    
132
static int hex2decimal(char ch)
133
{
134
    if (ch >= '0' && ch <= '9') {
135
        return (ch - '0');
136
    } else if (ch >= 'a' && ch <= 'f') {
137
        return 10 + (ch - 'a');
138
    } else if (ch >= 'A' && ch <= 'F') {
139
        return 10 + (ch - 'A');
140
    }
141

    
142
    return -1;
143
}
144

    
145
/**
146
 * parse_string(): Parse a json string and return a QObject
147
 *
148
 *  string
149
 *      ""
150
 *      " chars "
151
 *  chars
152
 *      char
153
 *      char chars
154
 *  char
155
 *      any-Unicode-character-
156
 *          except-"-or-\-or-
157
 *          control-character
158
 *      \"
159
 *      \\
160
 *      \/
161
 *      \b
162
 *      \f
163
 *      \n
164
 *      \r
165
 *      \t
166
 *      \u four-hex-digits 
167
 */
168
static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
169
{
170
    const char *ptr = token_get_value(token);
171
    QString *str;
172
    int double_quote = 1;
173

    
174
    if (*ptr == '"') {
175
        double_quote = 1;
176
    } else {
177
        double_quote = 0;
178
    }
179
    ptr++;
180

    
181
    str = qstring_new();
182
    while (*ptr && 
183
           ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
184
        if (*ptr == '\\') {
185
            ptr++;
186

    
187
            switch (*ptr) {
188
            case '"':
189
                qstring_append(str, "\"");
190
                ptr++;
191
                break;
192
            case '\'':
193
                qstring_append(str, "'");
194
                ptr++;
195
                break;
196
            case '\\':
197
                qstring_append(str, "\\");
198
                ptr++;
199
                break;
200
            case '/':
201
                qstring_append(str, "/");
202
                ptr++;
203
                break;
204
            case 'b':
205
                qstring_append(str, "\b");
206
                ptr++;
207
                break;
208
            case 'f':
209
                qstring_append(str, "\f");
210
                ptr++;
211
                break;
212
            case 'n':
213
                qstring_append(str, "\n");
214
                ptr++;
215
                break;
216
            case 'r':
217
                qstring_append(str, "\r");
218
                ptr++;
219
                break;
220
            case 't':
221
                qstring_append(str, "\t");
222
                ptr++;
223
                break;
224
            case 'u': {
225
                uint16_t unicode_char = 0;
226
                char utf8_char[4];
227
                int i = 0;
228

    
229
                ptr++;
230

    
231
                for (i = 0; i < 4; i++) {
232
                    if (qemu_isxdigit(*ptr)) {
233
                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
234
                    } else {
235
                        parse_error(ctxt, token,
236
                                    "invalid hex escape sequence in string");
237
                        goto out;
238
                    }
239
                    ptr++;
240
                }
241

    
242
                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
243
                qstring_append(str, utf8_char);
244
            }   break;
245
            default:
246
                parse_error(ctxt, token, "invalid escape sequence in string");
247
                goto out;
248
            }
249
        } else {
250
            char dummy[2];
251

    
252
            dummy[0] = *ptr++;
253
            dummy[1] = 0;
254

    
255
            qstring_append(str, dummy);
256
        }
257
    }
258

    
259
    return str;
260

    
261
out:
262
    QDECREF(str);
263
    return NULL;
264
}
265

    
266
/**
267
 * Parsing rules
268
 */
269
static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
270
{
271
    QObject *key, *token = NULL, *value, *peek;
272
    QList *working = qlist_copy(*tokens);
273

    
274
    peek = qlist_peek(working);
275
    key = parse_value(ctxt, &working, ap);
276
    if (!key || qobject_type(key) != QTYPE_QSTRING) {
277
        parse_error(ctxt, peek, "key is not a string in object");
278
        goto out;
279
    }
280

    
281
    token = qlist_pop(working);
282
    if (!token_is_operator(token, ':')) {
283
        parse_error(ctxt, token, "missing : in object pair");
284
        goto out;
285
    }
286

    
287
    value = parse_value(ctxt, &working, ap);
288
    if (value == NULL) {
289
        parse_error(ctxt, token, "Missing value in dict");
290
        goto out;
291
    }
292

    
293
    qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
294

    
295
    qobject_decref(token);
296
    qobject_decref(key);
297
    QDECREF(*tokens);
298
    *tokens = working;
299

    
300
    return 0;
301

    
302
out:
303
    qobject_decref(token);
304
    qobject_decref(key);
305
    QDECREF(working);
306

    
307
    return -1;
308
}
309

    
310
static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
311
{
312
    QDict *dict = NULL;
313
    QObject *token, *peek;
314
    QList *working = qlist_copy(*tokens);
315

    
316
    token = qlist_pop(working);
317
    if (!token_is_operator(token, '{')) {
318
        goto out;
319
    }
320
    qobject_decref(token);
321
    token = NULL;
322

    
323
    dict = qdict_new();
324

    
325
    peek = qlist_peek(working);
326
    if (!token_is_operator(peek, '}')) {
327
        if (parse_pair(ctxt, dict, &working, ap) == -1) {
328
            goto out;
329
        }
330

    
331
        token = qlist_pop(working);
332
        while (!token_is_operator(token, '}')) {
333
            if (!token_is_operator(token, ',')) {
334
                parse_error(ctxt, token, "expected separator in dict");
335
                goto out;
336
            }
337
            qobject_decref(token);
338
            token = NULL;
339

    
340
            if (parse_pair(ctxt, dict, &working, ap) == -1) {
341
                goto out;
342
            }
343

    
344
            token = qlist_pop(working);
345
        }
346
        qobject_decref(token);
347
        token = NULL;
348
    } else {
349
        token = qlist_pop(working);
350
        qobject_decref(token);
351
        token = NULL;
352
    }
353

    
354
    QDECREF(*tokens);
355
    *tokens = working;
356

    
357
    return QOBJECT(dict);
358

    
359
out:
360
    qobject_decref(token);
361
    QDECREF(working);
362
    QDECREF(dict);
363
    return NULL;
364
}
365

    
366
static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
367
{
368
    QList *list = NULL;
369
    QObject *token, *peek;
370
    QList *working = qlist_copy(*tokens);
371

    
372
    token = qlist_pop(working);
373
    if (!token_is_operator(token, '[')) {
374
        goto out;
375
    }
376
    qobject_decref(token);
377
    token = NULL;
378

    
379
    list = qlist_new();
380

    
381
    peek = qlist_peek(working);
382
    if (!token_is_operator(peek, ']')) {
383
        QObject *obj;
384

    
385
        obj = parse_value(ctxt, &working, ap);
386
        if (obj == NULL) {
387
            parse_error(ctxt, token, "expecting value");
388
            goto out;
389
        }
390

    
391
        qlist_append_obj(list, obj);
392

    
393
        token = qlist_pop(working);
394
        while (!token_is_operator(token, ']')) {
395
            if (!token_is_operator(token, ',')) {
396
                parse_error(ctxt, token, "expected separator in list");
397
                goto out;
398
            }
399

    
400
            qobject_decref(token);
401
            token = NULL;
402

    
403
            obj = parse_value(ctxt, &working, ap);
404
            if (obj == NULL) {
405
                parse_error(ctxt, token, "expecting value");
406
                goto out;
407
            }
408

    
409
            qlist_append_obj(list, obj);
410

    
411
            token = qlist_pop(working);
412
        }
413

    
414
        qobject_decref(token);
415
        token = NULL;
416
    } else {
417
        token = qlist_pop(working);
418
        qobject_decref(token);
419
        token = NULL;
420
    }
421

    
422
    QDECREF(*tokens);
423
    *tokens = working;
424

    
425
    return QOBJECT(list);
426

    
427
out:
428
    qobject_decref(token);
429
    QDECREF(working);
430
    QDECREF(list);
431
    return NULL;
432
}
433

    
434
static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
435
{
436
    QObject *token, *ret;
437
    QList *working = qlist_copy(*tokens);
438

    
439
    token = qlist_pop(working);
440

    
441
    if (token_get_type(token) != JSON_KEYWORD) {
442
        goto out;
443
    }
444

    
445
    if (token_is_keyword(token, "true")) {
446
        ret = QOBJECT(qbool_from_int(true));
447
    } else if (token_is_keyword(token, "false")) {
448
        ret = QOBJECT(qbool_from_int(false));
449
    } else {
450
        parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
451
        goto out;
452
    }
453

    
454
    qobject_decref(token);
455
    QDECREF(*tokens);
456
    *tokens = working;
457

    
458
    return ret;
459

    
460
out: 
461
    qobject_decref(token);
462
    QDECREF(working);
463

    
464
    return NULL;
465
}
466

    
467
static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
468
{
469
    QObject *token = NULL, *obj;
470
    QList *working = qlist_copy(*tokens);
471

    
472
    if (ap == NULL) {
473
        goto out;
474
    }
475

    
476
    token = qlist_pop(working);
477

    
478
    if (token_is_escape(token, "%p")) {
479
        obj = va_arg(*ap, QObject *);
480
    } else if (token_is_escape(token, "%i")) {
481
        obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
482
    } else if (token_is_escape(token, "%d")) {
483
        obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
484
    } else if (token_is_escape(token, "%ld")) {
485
        obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
486
    } else if (token_is_escape(token, "%lld") ||
487
               token_is_escape(token, "%I64d")) {
488
        obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
489
    } else if (token_is_escape(token, "%s")) {
490
        obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
491
    } else if (token_is_escape(token, "%f")) {
492
        obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
493
    } else {
494
        goto out;
495
    }
496

    
497
    qobject_decref(token);
498
    QDECREF(*tokens);
499
    *tokens = working;
500

    
501
    return obj;
502

    
503
out:
504
    qobject_decref(token);
505
    QDECREF(working);
506

    
507
    return NULL;
508
}
509

    
510
static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
511
{
512
    QObject *token, *obj;
513
    QList *working = qlist_copy(*tokens);
514

    
515
    token = qlist_pop(working);
516
    switch (token_get_type(token)) {
517
    case JSON_STRING:
518
        obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
519
        break;
520
    case JSON_INTEGER:
521
        obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
522
        break;
523
    case JSON_FLOAT:
524
        /* FIXME dependent on locale */
525
        obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
526
        break;
527
    default:
528
        goto out;
529
    }
530

    
531
    qobject_decref(token);
532
    QDECREF(*tokens);
533
    *tokens = working;
534

    
535
    return obj;
536

    
537
out:
538
    qobject_decref(token);
539
    QDECREF(working);
540

    
541
    return NULL;
542
}
543

    
544
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
545
{
546
    QObject *obj;
547

    
548
    obj = parse_object(ctxt, tokens, ap);
549
    if (obj == NULL) {
550
        obj = parse_array(ctxt, tokens, ap);
551
    }
552
    if (obj == NULL) {
553
        obj = parse_escape(ctxt, tokens, ap);
554
    }
555
    if (obj == NULL) {
556
        obj = parse_keyword(ctxt, tokens);
557
    } 
558
    if (obj == NULL) {
559
        obj = parse_literal(ctxt, tokens);
560
    }
561

    
562
    return obj;
563
}
564

    
565
QObject *json_parser_parse(QList *tokens, va_list *ap)
566
{
567
    JSONParserContext ctxt = {};
568
    QList *working = qlist_copy(tokens);
569
    QObject *result;
570

    
571
    result = parse_value(&ctxt, &working, ap);
572

    
573
    QDECREF(working);
574

    
575
    return result;
576
}