Statistics
| Branch: | Revision:

root / json-parser.c @ 4a5fcab7

History | View | Annotate | Download (12.8 kB)

1
/*
2
 * JSON Parser 
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10
 * See the COPYING.LIB file in the top-level directory.
11
 *
12
 */
13

    
14
#include <stdbool.h>
15

    
16
#include "qemu-common.h"
17
#include "qstring.h"
18
#include "qint.h"
19
#include "qdict.h"
20
#include "qlist.h"
21
#include "qfloat.h"
22
#include "qbool.h"
23
#include "json-parser.h"
24
#include "json-lexer.h"
25

    
26
typedef struct JSONParserContext
27
{
28
} JSONParserContext;
29

    
30
#define BUG_ON(cond) assert(!(cond))
31

    
32
/**
33
 * TODO
34
 *
35
 * 0) make errors meaningful again
36
 * 1) add geometry information to tokens
37
 * 3) should we return a parsed size?
38
 * 4) deal with premature EOI
39
 */
40

    
41
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
42

    
43
/**
44
 * Token manipulators
45
 *
46
 * tokens are dictionaries that contain a type, a string value, and geometry information
47
 * about a token identified by the lexer.  These are routines that make working with
48
 * these objects a bit easier.
49
 */
50
static const char *token_get_value(QObject *obj)
51
{
52
    return qdict_get_str(qobject_to_qdict(obj), "token");
53
}
54

    
55
static JSONTokenType token_get_type(QObject *obj)
56
{
57
    return qdict_get_int(qobject_to_qdict(obj), "type");
58
}
59

    
60
static int token_is_operator(QObject *obj, char op)
61
{
62
    const char *val;
63

    
64
    if (token_get_type(obj) != JSON_OPERATOR) {
65
        return 0;
66
    }
67

    
68
    val = token_get_value(obj);
69

    
70
    return (val[0] == op) && (val[1] == 0);
71
}
72

    
73
static int token_is_keyword(QObject *obj, const char *value)
74
{
75
    if (token_get_type(obj) != JSON_KEYWORD) {
76
        return 0;
77
    }
78

    
79
    return strcmp(token_get_value(obj), value) == 0;
80
}
81

    
82
static int token_is_escape(QObject *obj, const char *value)
83
{
84
    if (token_get_type(obj) != JSON_ESCAPE) {
85
        return 0;
86
    }
87

    
88
    return (strcmp(token_get_value(obj), value) == 0);
89
}
90

    
91
/**
92
 * Error handler
93
 */
94
static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...)
95
{
96
    fprintf(stderr, "parse error: %s\n", msg);
97
}
98

    
99
/**
100
 * String helpers
101
 *
102
 * These helpers are used to unescape strings.
103
 */
104
static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
105
{
106
    if (wchar <= 0x007F) {
107
        BUG_ON(buffer_length < 2);
108

    
109
        buffer[0] = wchar & 0x7F;
110
        buffer[1] = 0;
111
    } else if (wchar <= 0x07FF) {
112
        BUG_ON(buffer_length < 3);
113

    
114
        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
115
        buffer[1] = 0x80 | (wchar & 0x3F);
116
        buffer[2] = 0;
117
    } else {
118
        BUG_ON(buffer_length < 4);
119

    
120
        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
121
        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
122
        buffer[2] = 0x80 | (wchar & 0x3F);
123
        buffer[3] = 0;
124
    }
125
}
126

    
127
static int hex2decimal(char ch)
128
{
129
    if (ch >= '0' && ch <= '9') {
130
        return (ch - '0');
131
    } else if (ch >= 'a' && ch <= 'f') {
132
        return 10 + (ch - 'a');
133
    } else if (ch >= 'A' && ch <= 'F') {
134
        return 10 + (ch - 'A');
135
    }
136

    
137
    return -1;
138
}
139

    
140
/**
141
 * parse_string(): Parse a json string and return a QObject
142
 *
143
 *  string
144
 *      ""
145
 *      " chars "
146
 *  chars
147
 *      char
148
 *      char chars
149
 *  char
150
 *      any-Unicode-character-
151
 *          except-"-or-\-or-
152
 *          control-character
153
 *      \"
154
 *      \\
155
 *      \/
156
 *      \b
157
 *      \f
158
 *      \n
159
 *      \r
160
 *      \t
161
 *      \u four-hex-digits 
162
 */
163
static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
164
{
165
    const char *ptr = token_get_value(token);
166
    QString *str;
167
    int double_quote = 1;
168

    
169
    if (*ptr == '"') {
170
        double_quote = 1;
171
    } else {
172
        double_quote = 0;
173
    }
174
    ptr++;
175

    
176
    str = qstring_new();
177
    while (*ptr && 
178
           ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
179
        if (*ptr == '\\') {
180
            ptr++;
181

    
182
            switch (*ptr) {
183
            case '"':
184
                qstring_append(str, "\"");
185
                ptr++;
186
                break;
187
            case '\'':
188
                qstring_append(str, "'");
189
                ptr++;
190
                break;
191
            case '\\':
192
                qstring_append(str, "\\");
193
                ptr++;
194
                break;
195
            case '/':
196
                qstring_append(str, "/");
197
                ptr++;
198
                break;
199
            case 'b':
200
                qstring_append(str, "\b");
201
                ptr++;
202
                break;
203
            case 'n':
204
                qstring_append(str, "\n");
205
                ptr++;
206
                break;
207
            case 'r':
208
                qstring_append(str, "\r");
209
                ptr++;
210
                break;
211
            case 't':
212
                qstring_append(str, "\t");
213
                ptr++;
214
                break;
215
            case 'u': {
216
                uint16_t unicode_char = 0;
217
                char utf8_char[4];
218
                int i = 0;
219

    
220
                ptr++;
221

    
222
                for (i = 0; i < 4; i++) {
223
                    if (qemu_isxdigit(*ptr)) {
224
                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
225
                    } else {
226
                        parse_error(ctxt, token,
227
                                    "invalid hex escape sequence in string");
228
                        goto out;
229
                    }
230
                    ptr++;
231
                }
232

    
233
                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
234
                qstring_append(str, utf8_char);
235
            }   break;
236
            default:
237
                parse_error(ctxt, token, "invalid escape sequence in string");
238
                goto out;
239
            }
240
        } else {
241
            char dummy[2];
242

    
243
            dummy[0] = *ptr++;
244
            dummy[1] = 0;
245

    
246
            qstring_append(str, dummy);
247
        }
248
    }
249

    
250
    ptr++;
251

    
252
    return str;
253

    
254
out:
255
    QDECREF(str);
256
    return NULL;
257
}
258

    
259
/**
260
 * Parsing rules
261
 */
262
static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
263
{
264
    QObject *key, *token = NULL, *value, *peek;
265
    QList *working = qlist_copy(*tokens);
266

    
267
    peek = qlist_peek(working);
268
    key = parse_value(ctxt, &working, ap);
269
    if (qobject_type(key) != QTYPE_QSTRING) {
270
        parse_error(ctxt, peek, "key is not a string in object");
271
        goto out;
272
    }
273

    
274
    token = qlist_pop(working);
275
    if (!token_is_operator(token, ':')) {
276
        parse_error(ctxt, token, "missing : in object pair");
277
        goto out;
278
    }
279

    
280
    value = parse_value(ctxt, &working, ap);
281
    if (value == NULL) {
282
        parse_error(ctxt, token, "Missing value in dict");
283
        goto out;
284
    }
285

    
286
    qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
287

    
288
    qobject_decref(token);
289
    qobject_decref(key);
290
    QDECREF(*tokens);
291
    *tokens = working;
292

    
293
    return 0;
294

    
295
out:
296
    qobject_decref(token);
297
    qobject_decref(key);
298
    QDECREF(working);
299

    
300
    return -1;
301
}
302

    
303
static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
304
{
305
    QDict *dict = NULL;
306
    QObject *token, *peek;
307
    QList *working = qlist_copy(*tokens);
308

    
309
    token = qlist_pop(working);
310
    if (!token_is_operator(token, '{')) {
311
        goto out;
312
    }
313
    qobject_decref(token);
314
    token = NULL;
315

    
316
    dict = qdict_new();
317

    
318
    peek = qlist_peek(working);
319
    if (!token_is_operator(peek, '}')) {
320
        if (parse_pair(ctxt, dict, &working, ap) == -1) {
321
            goto out;
322
        }
323

    
324
        token = qlist_pop(working);
325
        while (!token_is_operator(token, '}')) {
326
            if (!token_is_operator(token, ',')) {
327
                parse_error(ctxt, token, "expected separator in dict");
328
                goto out;
329
            }
330
            qobject_decref(token);
331
            token = NULL;
332

    
333
            if (parse_pair(ctxt, dict, &working, ap) == -1) {
334
                goto out;
335
            }
336

    
337
            token = qlist_pop(working);
338
        }
339
        qobject_decref(token);
340
        token = NULL;
341
    } else {
342
        token = qlist_pop(working);
343
        qobject_decref(token);
344
        token = NULL;
345
    }
346

    
347
    QDECREF(*tokens);
348
    *tokens = working;
349

    
350
    return QOBJECT(dict);
351

    
352
out:
353
    qobject_decref(token);
354
    QDECREF(working);
355
    QDECREF(dict);
356
    return NULL;
357
}
358

    
359
static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
360
{
361
    QList *list = NULL;
362
    QObject *token, *peek;
363
    QList *working = qlist_copy(*tokens);
364

    
365
    token = qlist_pop(working);
366
    if (!token_is_operator(token, '[')) {
367
        goto out;
368
    }
369
    qobject_decref(token);
370
    token = NULL;
371

    
372
    list = qlist_new();
373

    
374
    peek = qlist_peek(working);
375
    if (!token_is_operator(peek, ']')) {
376
        QObject *obj;
377

    
378
        obj = parse_value(ctxt, &working, ap);
379
        if (obj == NULL) {
380
            parse_error(ctxt, token, "expecting value");
381
            goto out;
382
        }
383

    
384
        qlist_append_obj(list, obj);
385

    
386
        token = qlist_pop(working);
387
        while (!token_is_operator(token, ']')) {
388
            if (!token_is_operator(token, ',')) {
389
                parse_error(ctxt, token, "expected separator in list");
390
                goto out;
391
            }
392

    
393
            qobject_decref(token);
394
            token = NULL;
395

    
396
            obj = parse_value(ctxt, &working, ap);
397
            if (obj == NULL) {
398
                parse_error(ctxt, token, "expecting value");
399
                goto out;
400
            }
401

    
402
            qlist_append_obj(list, obj);
403

    
404
            token = qlist_pop(working);
405
        }
406

    
407
        qobject_decref(token);
408
        token = NULL;
409
    } else {
410
        token = qlist_pop(working);
411
        qobject_decref(token);
412
        token = NULL;
413
    }
414

    
415
    QDECREF(*tokens);
416
    *tokens = working;
417

    
418
    return QOBJECT(list);
419

    
420
out:
421
    qobject_decref(token);
422
    QDECREF(working);
423
    QDECREF(list);
424
    return NULL;
425
}
426

    
427
static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
428
{
429
    QObject *token, *ret;
430
    QList *working = qlist_copy(*tokens);
431

    
432
    token = qlist_pop(working);
433

    
434
    if (token_get_type(token) != JSON_KEYWORD) {
435
        goto out;
436
    }
437

    
438
    if (token_is_keyword(token, "true")) {
439
        ret = QOBJECT(qbool_from_int(true));
440
    } else if (token_is_keyword(token, "false")) {
441
        ret = QOBJECT(qbool_from_int(false));
442
    } else {
443
        parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
444
        goto out;
445
    }
446

    
447
    qobject_decref(token);
448
    QDECREF(*tokens);
449
    *tokens = working;
450

    
451
    return ret;
452

    
453
out: 
454
    qobject_decref(token);
455
    QDECREF(working);
456

    
457
    return NULL;
458
}
459

    
460
static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
461
{
462
    QObject *token = NULL, *obj;
463
    QList *working = qlist_copy(*tokens);
464

    
465
    if (ap == NULL) {
466
        goto out;
467
    }
468

    
469
    token = qlist_pop(working);
470

    
471
    if (token_is_escape(token, "%p")) {
472
        obj = va_arg(*ap, QObject *);
473
    } else if (token_is_escape(token, "%i")) {
474
        obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
475
    } else if (token_is_escape(token, "%d")) {
476
        obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
477
    } else if (token_is_escape(token, "%ld")) {
478
        obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
479
    } else if (token_is_escape(token, "%lld")) {
480
        obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
481
    } else if (token_is_escape(token, "%s")) {
482
        obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
483
    } else if (token_is_escape(token, "%f")) {
484
        obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
485
    } else {
486
        goto out;
487
    }
488

    
489
    qobject_decref(token);
490
    QDECREF(*tokens);
491
    *tokens = working;
492

    
493
    return obj;
494

    
495
out:
496
    qobject_decref(token);
497
    QDECREF(working);
498

    
499
    return NULL;
500
}
501

    
502
static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
503
{
504
    QObject *token, *obj;
505
    QList *working = qlist_copy(*tokens);
506

    
507
    token = qlist_pop(working);
508
    switch (token_get_type(token)) {
509
    case JSON_STRING:
510
        obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
511
        break;
512
    case JSON_INTEGER:
513
        obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
514
        break;
515
    case JSON_FLOAT:
516
        /* FIXME dependent on locale */
517
        obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
518
        break;
519
    default:
520
        goto out;
521
    }
522

    
523
    qobject_decref(token);
524
    QDECREF(*tokens);
525
    *tokens = working;
526

    
527
    return obj;
528

    
529
out:
530
    qobject_decref(token);
531
    QDECREF(working);
532

    
533
    return NULL;
534
}
535

    
536
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
537
{
538
    QObject *obj;
539

    
540
    obj = parse_object(ctxt, tokens, ap);
541
    if (obj == NULL) {
542
        obj = parse_array(ctxt, tokens, ap);
543
    }
544
    if (obj == NULL) {
545
        obj = parse_escape(ctxt, tokens, ap);
546
    }
547
    if (obj == NULL) {
548
        obj = parse_keyword(ctxt, tokens);
549
    } 
550
    if (obj == NULL) {
551
        obj = parse_literal(ctxt, tokens);
552
    }
553

    
554
    return obj;
555
}
556

    
557
QObject *json_parser_parse(QList *tokens, va_list *ap)
558
{
559
    JSONParserContext ctxt = {};
560
    QList *working = qlist_copy(tokens);
561
    QObject *result;
562

    
563
    result = parse_value(&ctxt, &working, ap);
564

    
565
    QDECREF(working);
566

    
567
    return result;
568
}