Statistics
| Branch: | Revision:

root / json-parser.c @ 2c0d4b36

History | View | Annotate | Download (12.8 kB)

1
/*
2
 * JSON Parser 
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10
 * See the COPYING.LIB file in the top-level directory.
11
 *
12
 */
13

    
14
#include <stdbool.h>
15

    
16
#include "qemu-common.h"
17
#include "qstring.h"
18
#include "qint.h"
19
#include "qdict.h"
20
#include "qlist.h"
21
#include "qfloat.h"
22
#include "qbool.h"
23
#include "json-parser.h"
24
#include "json-lexer.h"
25

    
26
typedef struct JSONParserContext
27
{
28
} JSONParserContext;
29

    
30
#define BUG_ON(cond) assert(!(cond))
31

    
32
/**
33
 * TODO
34
 *
35
 * 0) make errors meaningful again
36
 * 1) add geometry information to tokens
37
 * 3) should we return a parsed size?
38
 * 4) deal with premature EOI
39
 */
40

    
41
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
42

    
43
/**
44
 * Token manipulators
45
 *
46
 * tokens are dictionaries that contain a type, a string value, and geometry information
47
 * about a token identified by the lexer.  These are routines that make working with
48
 * these objects a bit easier.
49
 */
50
static const char *token_get_value(QObject *obj)
51
{
52
    return qdict_get_str(qobject_to_qdict(obj), "token");
53
}
54

    
55
static JSONTokenType token_get_type(QObject *obj)
56
{
57
    return qdict_get_int(qobject_to_qdict(obj), "type");
58
}
59

    
60
static int token_is_operator(QObject *obj, char op)
61
{
62
    const char *val;
63

    
64
    if (token_get_type(obj) != JSON_OPERATOR) {
65
        return 0;
66
    }
67

    
68
    val = token_get_value(obj);
69

    
70
    return (val[0] == op) && (val[1] == 0);
71
}
72

    
73
static int token_is_keyword(QObject *obj, const char *value)
74
{
75
    if (token_get_type(obj) != JSON_KEYWORD) {
76
        return 0;
77
    }
78

    
79
    return strcmp(token_get_value(obj), value) == 0;
80
}
81

    
82
static int token_is_escape(QObject *obj, const char *value)
83
{
84
    if (token_get_type(obj) != JSON_ESCAPE) {
85
        return 0;
86
    }
87

    
88
    return (strcmp(token_get_value(obj), value) == 0);
89
}
90

    
91
/**
92
 * Error handler
93
 */
94
static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...)
95
{
96
    fprintf(stderr, "parse error: %s\n", msg);
97
}
98

    
99
/**
100
 * String helpers
101
 *
102
 * These helpers are used to unescape strings.
103
 */
104
static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
105
{
106
    if (wchar <= 0x007F) {
107
        BUG_ON(buffer_length < 2);
108

    
109
        buffer[0] = wchar & 0x7F;
110
        buffer[1] = 0;
111
    } else if (wchar <= 0x07FF) {
112
        BUG_ON(buffer_length < 3);
113

    
114
        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
115
        buffer[1] = 0x80 | (wchar & 0x3F);
116
        buffer[2] = 0;
117
    } else {
118
        BUG_ON(buffer_length < 4);
119

    
120
        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
121
        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
122
        buffer[2] = 0x80 | (wchar & 0x3F);
123
        buffer[3] = 0;
124
    }
125
}
126

    
127
static int hex2decimal(char ch)
128
{
129
    if (ch >= '0' && ch <= '9') {
130
        return (ch - '0');
131
    } else if (ch >= 'a' && ch <= 'f') {
132
        return 10 + (ch - 'a');
133
    } else if (ch >= 'A' && ch <= 'F') {
134
        return 10 + (ch - 'A');
135
    }
136

    
137
    return -1;
138
}
139

    
140
/**
141
 * parse_string(): Parse a json string and return a QObject
142
 *
143
 *  string
144
 *      ""
145
 *      " chars "
146
 *  chars
147
 *      char
148
 *      char chars
149
 *  char
150
 *      any-Unicode-character-
151
 *          except-"-or-\-or-
152
 *          control-character
153
 *      \"
154
 *      \\
155
 *      \/
156
 *      \b
157
 *      \f
158
 *      \n
159
 *      \r
160
 *      \t
161
 *      \u four-hex-digits 
162
 */
163
static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
164
{
165
    const char *ptr = token_get_value(token);
166
    QString *str;
167
    int double_quote = 1;
168

    
169
    if (*ptr == '"') {
170
        double_quote = 1;
171
    } else {
172
        double_quote = 0;
173
    }
174
    ptr++;
175

    
176
    str = qstring_new();
177
    while (*ptr && 
178
           ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
179
        if (*ptr == '\\') {
180
            ptr++;
181

    
182
            switch (*ptr) {
183
            case '"':
184
                qstring_append(str, "\"");
185
                ptr++;
186
                break;
187
            case '\'':
188
                qstring_append(str, "'");
189
                ptr++;
190
                break;
191
            case '\\':
192
                qstring_append(str, "\\");
193
                ptr++;
194
                break;
195
            case '/':
196
                qstring_append(str, "/");
197
                ptr++;
198
                break;
199
            case 'b':
200
                qstring_append(str, "\b");
201
                ptr++;
202
                break;
203
            case 'n':
204
                qstring_append(str, "\n");
205
                ptr++;
206
                break;
207
            case 'r':
208
                qstring_append(str, "\r");
209
                ptr++;
210
                break;
211
            case 't':
212
                qstring_append(str, "\t");
213
                ptr++;
214
                break;
215
            case 'u': {
216
                uint16_t unicode_char = 0;
217
                char utf8_char[4];
218
                int i = 0;
219

    
220
                ptr++;
221

    
222
                for (i = 0; i < 4; i++) {
223
                    if (qemu_isxdigit(*ptr)) {
224
                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
225
                    } else {
226
                        parse_error(ctxt, token,
227
                                    "invalid hex escape sequence in string");
228
                        goto out;
229
                    }
230
                    ptr++;
231
                }
232

    
233
                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
234
                qstring_append(str, utf8_char);
235
            }   break;
236
            default:
237
                parse_error(ctxt, token, "invalid escape sequence in string");
238
                goto out;
239
            }
240
        } else {
241
            char dummy[2];
242

    
243
            dummy[0] = *ptr++;
244
            dummy[1] = 0;
245

    
246
            qstring_append(str, dummy);
247
        }
248
    }
249

    
250
    return str;
251

    
252
out:
253
    QDECREF(str);
254
    return NULL;
255
}
256

    
257
/**
258
 * Parsing rules
259
 */
260
static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
261
{
262
    QObject *key, *token = NULL, *value, *peek;
263
    QList *working = qlist_copy(*tokens);
264

    
265
    peek = qlist_peek(working);
266
    key = parse_value(ctxt, &working, ap);
267
    if (qobject_type(key) != QTYPE_QSTRING) {
268
        parse_error(ctxt, peek, "key is not a string in object");
269
        goto out;
270
    }
271

    
272
    token = qlist_pop(working);
273
    if (!token_is_operator(token, ':')) {
274
        parse_error(ctxt, token, "missing : in object pair");
275
        goto out;
276
    }
277

    
278
    value = parse_value(ctxt, &working, ap);
279
    if (value == NULL) {
280
        parse_error(ctxt, token, "Missing value in dict");
281
        goto out;
282
    }
283

    
284
    qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
285

    
286
    qobject_decref(token);
287
    qobject_decref(key);
288
    QDECREF(*tokens);
289
    *tokens = working;
290

    
291
    return 0;
292

    
293
out:
294
    qobject_decref(token);
295
    qobject_decref(key);
296
    QDECREF(working);
297

    
298
    return -1;
299
}
300

    
301
static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
302
{
303
    QDict *dict = NULL;
304
    QObject *token, *peek;
305
    QList *working = qlist_copy(*tokens);
306

    
307
    token = qlist_pop(working);
308
    if (!token_is_operator(token, '{')) {
309
        goto out;
310
    }
311
    qobject_decref(token);
312
    token = NULL;
313

    
314
    dict = qdict_new();
315

    
316
    peek = qlist_peek(working);
317
    if (!token_is_operator(peek, '}')) {
318
        if (parse_pair(ctxt, dict, &working, ap) == -1) {
319
            goto out;
320
        }
321

    
322
        token = qlist_pop(working);
323
        while (!token_is_operator(token, '}')) {
324
            if (!token_is_operator(token, ',')) {
325
                parse_error(ctxt, token, "expected separator in dict");
326
                goto out;
327
            }
328
            qobject_decref(token);
329
            token = NULL;
330

    
331
            if (parse_pair(ctxt, dict, &working, ap) == -1) {
332
                goto out;
333
            }
334

    
335
            token = qlist_pop(working);
336
        }
337
        qobject_decref(token);
338
        token = NULL;
339
    } else {
340
        token = qlist_pop(working);
341
        qobject_decref(token);
342
        token = NULL;
343
    }
344

    
345
    QDECREF(*tokens);
346
    *tokens = working;
347

    
348
    return QOBJECT(dict);
349

    
350
out:
351
    qobject_decref(token);
352
    QDECREF(working);
353
    QDECREF(dict);
354
    return NULL;
355
}
356

    
357
static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
358
{
359
    QList *list = NULL;
360
    QObject *token, *peek;
361
    QList *working = qlist_copy(*tokens);
362

    
363
    token = qlist_pop(working);
364
    if (!token_is_operator(token, '[')) {
365
        goto out;
366
    }
367
    qobject_decref(token);
368
    token = NULL;
369

    
370
    list = qlist_new();
371

    
372
    peek = qlist_peek(working);
373
    if (!token_is_operator(peek, ']')) {
374
        QObject *obj;
375

    
376
        obj = parse_value(ctxt, &working, ap);
377
        if (obj == NULL) {
378
            parse_error(ctxt, token, "expecting value");
379
            goto out;
380
        }
381

    
382
        qlist_append_obj(list, obj);
383

    
384
        token = qlist_pop(working);
385
        while (!token_is_operator(token, ']')) {
386
            if (!token_is_operator(token, ',')) {
387
                parse_error(ctxt, token, "expected separator in list");
388
                goto out;
389
            }
390

    
391
            qobject_decref(token);
392
            token = NULL;
393

    
394
            obj = parse_value(ctxt, &working, ap);
395
            if (obj == NULL) {
396
                parse_error(ctxt, token, "expecting value");
397
                goto out;
398
            }
399

    
400
            qlist_append_obj(list, obj);
401

    
402
            token = qlist_pop(working);
403
        }
404

    
405
        qobject_decref(token);
406
        token = NULL;
407
    } else {
408
        token = qlist_pop(working);
409
        qobject_decref(token);
410
        token = NULL;
411
    }
412

    
413
    QDECREF(*tokens);
414
    *tokens = working;
415

    
416
    return QOBJECT(list);
417

    
418
out:
419
    qobject_decref(token);
420
    QDECREF(working);
421
    QDECREF(list);
422
    return NULL;
423
}
424

    
425
static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
426
{
427
    QObject *token, *ret;
428
    QList *working = qlist_copy(*tokens);
429

    
430
    token = qlist_pop(working);
431

    
432
    if (token_get_type(token) != JSON_KEYWORD) {
433
        goto out;
434
    }
435

    
436
    if (token_is_keyword(token, "true")) {
437
        ret = QOBJECT(qbool_from_int(true));
438
    } else if (token_is_keyword(token, "false")) {
439
        ret = QOBJECT(qbool_from_int(false));
440
    } else {
441
        parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
442
        goto out;
443
    }
444

    
445
    qobject_decref(token);
446
    QDECREF(*tokens);
447
    *tokens = working;
448

    
449
    return ret;
450

    
451
out: 
452
    qobject_decref(token);
453
    QDECREF(working);
454

    
455
    return NULL;
456
}
457

    
458
static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
459
{
460
    QObject *token = NULL, *obj;
461
    QList *working = qlist_copy(*tokens);
462

    
463
    if (ap == NULL) {
464
        goto out;
465
    }
466

    
467
    token = qlist_pop(working);
468

    
469
    if (token_is_escape(token, "%p")) {
470
        obj = va_arg(*ap, QObject *);
471
    } else if (token_is_escape(token, "%i")) {
472
        obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
473
    } else if (token_is_escape(token, "%d")) {
474
        obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
475
    } else if (token_is_escape(token, "%ld")) {
476
        obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
477
    } else if (token_is_escape(token, "%lld") ||
478
               token_is_escape(token, "%I64d")) {
479
        obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
480
    } else if (token_is_escape(token, "%s")) {
481
        obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
482
    } else if (token_is_escape(token, "%f")) {
483
        obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
484
    } else {
485
        goto out;
486
    }
487

    
488
    qobject_decref(token);
489
    QDECREF(*tokens);
490
    *tokens = working;
491

    
492
    return obj;
493

    
494
out:
495
    qobject_decref(token);
496
    QDECREF(working);
497

    
498
    return NULL;
499
}
500

    
501
static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
502
{
503
    QObject *token, *obj;
504
    QList *working = qlist_copy(*tokens);
505

    
506
    token = qlist_pop(working);
507
    switch (token_get_type(token)) {
508
    case JSON_STRING:
509
        obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
510
        break;
511
    case JSON_INTEGER:
512
        obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
513
        break;
514
    case JSON_FLOAT:
515
        /* FIXME dependent on locale */
516
        obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
517
        break;
518
    default:
519
        goto out;
520
    }
521

    
522
    qobject_decref(token);
523
    QDECREF(*tokens);
524
    *tokens = working;
525

    
526
    return obj;
527

    
528
out:
529
    qobject_decref(token);
530
    QDECREF(working);
531

    
532
    return NULL;
533
}
534

    
535
static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
536
{
537
    QObject *obj;
538

    
539
    obj = parse_object(ctxt, tokens, ap);
540
    if (obj == NULL) {
541
        obj = parse_array(ctxt, tokens, ap);
542
    }
543
    if (obj == NULL) {
544
        obj = parse_escape(ctxt, tokens, ap);
545
    }
546
    if (obj == NULL) {
547
        obj = parse_keyword(ctxt, tokens);
548
    } 
549
    if (obj == NULL) {
550
        obj = parse_literal(ctxt, tokens);
551
    }
552

    
553
    return obj;
554
}
555

    
556
QObject *json_parser_parse(QList *tokens, va_list *ap)
557
{
558
    JSONParserContext ctxt = {};
559
    QList *working = qlist_copy(tokens);
560
    QObject *result;
561

    
562
    result = parse_value(&ctxt, &working, ap);
563

    
564
    QDECREF(working);
565

    
566
    return result;
567
}