Statistics
| Branch: | Revision:

root / json-parser.c @ b3e5e3e6

History | View | Annotate | Download (13 kB)

1
/*
2
 * JSON Parser 
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10
 * See the COPYING.LIB file in the top-level directory.
11
 *
12
 */
13

    
14
#include <stdbool.h>
15
#include <stdarg.h>
16

    
17
#include "qemu-common.h"
18
#include "qstring.h"
19
#include "qint.h"
20
#include "qdict.h"
21
#include "qlist.h"
22
#include "qfloat.h"
23
#include "qbool.h"
24
#include "json-parser.h"
25
#include "json-lexer.h"
26

    
27
typedef struct JSONParserContext
28
{
29
} JSONParserContext;
30

    
31
#define BUG_ON(cond) assert(!(cond))
32

    
33
/**
34
 * TODO
35
 *
36
 * 0) make errors meaningful again
37
 * 1) add geometry information to tokens
38
 * 3) should we return a parsed size?
39
 * 4) deal with premature EOI
40
 */
41

    
42
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
43

    
44
/**
45
 * Token manipulators
46
 *
47
 * tokens are dictionaries that contain a type, a string value, and geometry information
48
 * about a token identified by the lexer.  These are routines that make working with
49
 * these objects a bit easier.
50
 */
51
static const char *token_get_value(QObject *obj)
52
{
53
    return qdict_get_str(qobject_to_qdict(obj), "token");
54
}
55

    
56
static JSONTokenType token_get_type(QObject *obj)
57
{
58
    return qdict_get_int(qobject_to_qdict(obj), "type");
59
}
60

    
61
static int token_is_operator(QObject *obj, char op)
62
{
63
    const char *val;
64

    
65
    if (token_get_type(obj) != JSON_OPERATOR) {
66
        return 0;
67
    }
68

    
69
    val = token_get_value(obj);
70

    
71
    return (val[0] == op) && (val[1] == 0);
72
}
73

    
74
static int token_is_keyword(QObject *obj, const char *value)
75
{
76
    if (token_get_type(obj) != JSON_KEYWORD) {
77
        return 0;
78
    }
79

    
80
    return strcmp(token_get_value(obj), value) == 0;
81
}
82

    
83
static int token_is_escape(QObject *obj, const char *value)
84
{
85
    if (token_get_type(obj) != JSON_ESCAPE) {
86
        return 0;
87
    }
88

    
89
    return (strcmp(token_get_value(obj), value) == 0);
90
}
91

    
92
/**
93
 * Error handler
94
 */
95
static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...)
96
{
97
    va_list ap;
98
    va_start(ap, msg);
99
    fprintf(stderr, "parse error: ");
100
    vfprintf(stderr, msg, ap);
101
    fprintf(stderr, "\n");
102
    va_end(ap);
103
}
104

    
105
/**
106
 * String helpers
107
 *
108
 * These helpers are used to unescape strings.
109
 */
110
static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
111
{
112
    if (wchar <= 0x007F) {
113
        BUG_ON(buffer_length < 2);
114

    
115
        buffer[0] = wchar & 0x7F;
116
        buffer[1] = 0;
117
    } else if (wchar <= 0x07FF) {
118
        BUG_ON(buffer_length < 3);
119

    
120
        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
121
        buffer[1] = 0x80 | (wchar & 0x3F);
122
        buffer[2] = 0;
123
    } else {
124
        BUG_ON(buffer_length < 4);
125

    
126
        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
127
        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
128
        buffer[2] = 0x80 | (wchar & 0x3F);
129
        buffer[3] = 0;
130
    }
131
}
132

    
133
static int hex2decimal(char ch)
134
{
135
    if (ch >= '0' && ch <= '9') {
136
        return (ch - '0');
137
    } else if (ch >= 'a' && ch <= 'f') {
138
        return 10 + (ch - 'a');
139
    } else if (ch >= 'A' && ch <= 'F') {
140
        return 10 + (ch - 'A');
141
    }
142

    
143
    return -1;
144
}
145

    
146
/**
147
 * parse_string(): Parse a json string and return a QObject
148
 *
149
 *  string
150
 *      ""
151
 *      " chars "
152
 *  chars
153
 *      char
154
 *      char chars
155
 *  char
156
 *      any-Unicode-character-
157
 *          except-"-or-\-or-
158
 *          control-character
159
 *      \"
160
 *      \\
161
 *      \/
162
 *      \b
163
 *      \f
164
 *      \n
165
 *      \r
166
 *      \t
167
 *      \u four-hex-digits 
168
 */
169
static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
170
{
171
    const char *ptr = token_get_value(token);
172
    QString *str;
173
    int double_quote = 1;
174

    
175
    if (*ptr == '"') {
176
        double_quote = 1;
177
    } else {
178
        double_quote = 0;
179
    }
180
    ptr++;
181

    
182
    str = qstring_new();
183
    while (*ptr && 
184
           ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
185
        if (*ptr == '\\') {
186
            ptr++;
187

    
188
            switch (*ptr) {
189
            case '"':
190
                qstring_append(str, "\"");
191
                ptr++;
192
                break;
193
            case '\'':
194
                qstring_append(str, "'");
195
                ptr++;
196
                break;
197
            case '\\':
198
                qstring_append(str, "\\");
199
                ptr++;
200
                break;
201
            case '/':
202
                qstring_append(str, "/");
203
                ptr++;
204
                break;
205
            case 'b':
206
                qstring_append(str, "\b");
207
                ptr++;
208
                break;
209
            case 'n':
210
                qstring_append(str, "\n");
211
                ptr++;
212
                break;
213
            case 'r':
214
                qstring_append(str, "\r");
215
                ptr++;
216
                break;
217
            case 't':
218
                qstring_append(str, "\t");
219
                ptr++;
220
                break;
221
            case 'u': {
222
                uint16_t unicode_char = 0;
223
                char utf8_char[4];
224
                int i = 0;
225

    
226
                ptr++;
227

    
228
                for (i = 0; i < 4; i++) {
229
                    if (qemu_isxdigit(*ptr)) {
230
                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
231
                    } else {
232
                        parse_error(ctxt, token,
233
                                    "invalid hex escape sequence in string");
234
                        goto out;
235
                    }
236
                    ptr++;
237
                }
238

    
239
                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
240
                qstring_append(str, utf8_char);
241
            }   break;
242
            default:
243
                parse_error(ctxt, token, "invalid escape sequence in string");
244
                goto out;
245
            }
246
        } else {
247
            char dummy[2];
248

    
249
            dummy[0] = *ptr++;
250
            dummy[1] = 0;
251

    
252
            qstring_append(str, dummy);
253
        }
254
    }
255

    
256
    return str;
257

    
258
out:
259
    QDECREF(str);
260
    return NULL;
261
}
262

    
263
/**
264
 * Parsing rules
265
 */
266
static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
267
{
268
    QObject *key, *token = NULL, *value, *peek;
269
    QList *working = qlist_copy(*tokens);
270

    
271
    peek = qlist_peek(working);
272
    key = parse_value(ctxt, &working, ap);
273
    if (!key || qobject_type(key) != QTYPE_QSTRING) {
274
        parse_error(ctxt, peek, "key is not a string in object");
275
        goto out;
276
    }
277

    
278
    token = qlist_pop(working);
279
    if (!token_is_operator(token, ':')) {
280
        parse_error(ctxt, token, "missing : in object pair");
281
        goto out;
282
    }
283

    
284
    value = parse_value(ctxt, &working, ap);
285
    if (value == NULL) {
286
        parse_error(ctxt, token, "Missing value in dict");
287
        goto out;
288
    }
289

    
290
    qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
291

    
292
    qobject_decref(token);
293
    qobject_decref(key);
294
    QDECREF(*tokens);
295
    *tokens = working;
296

    
297
    return 0;
298

    
299
out:
300
    qobject_decref(token);
301
    qobject_decref(key);
302
    QDECREF(working);
303

    
304
    return -1;
305
}
306

    
307
static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
308
{
309
    QDict *dict = NULL;
310
    QObject *token, *peek;
311
    QList *working = qlist_copy(*tokens);
312

    
313
    token = qlist_pop(working);
314
    if (!token_is_operator(token, '{')) {
315
        goto out;
316
    }
317
    qobject_decref(token);
318
    token = NULL;
319

    
320
    dict = qdict_new();
321

    
322
    peek = qlist_peek(working);
323
    if (!token_is_operator(peek, '}')) {
324
        if (parse_pair(ctxt, dict, &working, ap) == -1) {
325
            goto out;
326
        }
327

    
328
        token = qlist_pop(working);
329
        while (!token_is_operator(token, '}')) {
330
            if (!token_is_operator(token, ',')) {
331
                parse_error(ctxt, token, "expected separator in dict");
332
                goto out;
333
            }
334
            qobject_decref(token);
335
            token = NULL;
336

    
337
            if (parse_pair(ctxt, dict, &working, ap) == -1) {
338
                goto out;
339
            }
340

    
341
            token = qlist_pop(working);
342
        }
343
        qobject_decref(token);
344
        token = NULL;
345
    } else {
346
        token = qlist_pop(working);
347
        qobject_decref(token);
348
        token = NULL;
349
    }
350

    
351
    QDECREF(*tokens);
352
    *tokens = working;
353

    
354
    return QOBJECT(dict);
355

    
356
out:
357
    qobject_decref(token);
358
    QDECREF(working);
359
    QDECREF(dict);
360
    return NULL;
361
}
362

    
363
static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
364
{
365
    QList *list = NULL;
366
    QObject *token, *peek;
367
    QList *working = qlist_copy(*tokens);
368

    
369
    token = qlist_pop(working);
370
    if (!token_is_operator(token, '[')) {
371
        goto out;
372
    }
373
    qobject_decref(token);
374
    token = NULL;
375

    
376
    list = qlist_new();
377

    
378
    peek = qlist_peek(working);
379
    if (!token_is_operator(peek, ']')) {
380
        QObject *obj;
381

    
382
        obj = parse_value(ctxt, &working, ap);
383
        if (obj == NULL) {
384
            parse_error(ctxt, token, "expecting value");
385
            goto out;
386
        }
387

    
388
        qlist_append_obj(list, obj);
389

    
390
        token = qlist_pop(working);
391
        while (!token_is_operator(token, ']')) {
392
            if (!token_is_operator(token, ',')) {
393
                parse_error(ctxt, token, "expected separator in list");
394
                goto out;
395
            }
396

    
397
            qobject_decref(token);
398
            token = NULL;
399

    
400
            obj = parse_value(ctxt, &working, ap);
401
            if (obj == NULL) {
402
                parse_error(ctxt, token, "expecting value");
403
                goto out;
404
            }
405

    
406
            qlist_append_obj(list, obj);
407

    
408
            token = qlist_pop(working);
409
        }
410

    
411
        qobject_decref(token);
412
        token = NULL;
413
    } else {
414
        token = qlist_pop(working);
415
        qobject_decref(token);
416
        token = NULL;
417
    }
418

    
419
    QDECREF(*tokens);
420
    *tokens = working;
421

    
422
    return QOBJECT(list);
423

    
424
out:
425
    qobject_decref(token);
426
    QDECREF(working);
427
    QDECREF(list);
428
    return NULL;
429
}
430

    
431
static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
432
{
433
    QObject *token, *ret;
434
    QList *working = qlist_copy(*tokens);
435

    
436
    token = qlist_pop(working);
437

    
438
    if (token_get_type(token) != JSON_KEYWORD) {
439
        goto out;
440
    }
441

    
442
    if (token_is_keyword(token, "true")) {
443
        ret = QOBJECT(qbool_from_int(true));
444
    } else if (token_is_keyword(token, "false")) {
445
        ret = QOBJECT(qbool_from_int(false));
446
    } else {
447
        parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
448
        goto out;
449
    }
450

    
451
    qobject_decref(token);
452
    QDECREF(*tokens);
453
    *tokens = working;
454

    
455
    return ret;
456

    
457
out: 
458
    qobject_decref(token);
459
    QDECREF(working);
460

    
461
    return NULL;
462
}
463

    
464
static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
465
{
466
    QObject *token = NULL, *obj;
467
    QList *working = qlist_copy(*tokens);
468

    
469
    if (ap == NULL) {
470
        goto out;
471
    }
472

    
473
    token = qlist_pop(working);
474

    
475
    if (token_is_escape(token, "%p")) {
476
        obj = va_arg(*ap, QObject *);
477
    } else if (token_is_escape(token, "%i")) {
478
        obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
479
    } else if (token_is_escape(token, "%d")) {
480
        obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
481
    } else if (token_is_escape(token, "%ld")) {
482
        obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
483
    } else if (token_is_escape(token, "%lld") ||
484
               token_is_escape(token, "%I64d")) {
485
        obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
486
    } else if (token_is_escape(token, "%s")) {
487
        obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
488
    } else if (token_is_escape(token, "%f")) {
489
        obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
490
    } else {
491
        goto out;
492
    }
493

    
494
    qobject_decref(token);
495
    QDECREF(*tokens);
496
    *tokens = working;
497

    
498
    return obj;
499

    
500
out:
501
    qobject_decref(token);
502
    QDECREF(working);
503

    
504
    return NULL;
505
}
506

    
507
static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
508
{
509
    QObject *token, *obj;
510
    QList *working = qlist_copy(*tokens);
511

    
512
    token = qlist_pop(working);
513
    switch (token_get_type(token)) {
514
    case JSON_STRING:
515
        obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
516
        break;
517
    case JSON_INTEGER:
518
        obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
519
        break;
520
    case JSON_FLOAT:
521
        /* FIXME dependent on locale */
522
        obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
523
        break;
524
    default:
525
        goto out;
526
    }
527

    
528
    qobject_decref(token);
529
    QDECREF(*tokens);
530
    *tokens = working;
531

    
532
    return obj;
533

    
534
out:
535
    qobject_decref(token);
536
    QDECREF(working);
537

    
538
    return NULL;
539
}
540

    
541
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
542
{
543
    QObject *obj;
544

    
545
    obj = parse_object(ctxt, tokens, ap);
546
    if (obj == NULL) {
547
        obj = parse_array(ctxt, tokens, ap);
548
    }
549
    if (obj == NULL) {
550
        obj = parse_escape(ctxt, tokens, ap);
551
    }
552
    if (obj == NULL) {
553
        obj = parse_keyword(ctxt, tokens);
554
    } 
555
    if (obj == NULL) {
556
        obj = parse_literal(ctxt, tokens);
557
    }
558

    
559
    return obj;
560
}
561

    
562
QObject *json_parser_parse(QList *tokens, va_list *ap)
563
{
564
    JSONParserContext ctxt = {};
565
    QList *working = qlist_copy(tokens);
566
    QObject *result;
567

    
568
    result = parse_value(&ctxt, &working, ap);
569

    
570
    QDECREF(working);
571

    
572
    return result;
573
}