Statistics
| Branch: | Revision:

root / json-lexer.c @ 2c0d4b36

History | View | Annotate | Download (7.4 kB)

1
/*
2
 * JSON lexer
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10
 * See the COPYING.LIB file in the top-level directory.
11
 *
12
 */
13

    
14
#include "qstring.h"
15
#include "qlist.h"
16
#include "qdict.h"
17
#include "qint.h"
18
#include "qemu-common.h"
19
#include "json-lexer.h"
20

    
21
/*
22
 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
23
 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
24
 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
25
 * [{}\[\],:]
26
 * [a-z]+
27
 *
28
 */
29

    
30
enum json_lexer_state {
31
    ERROR = 0,
32
    IN_DONE_STRING,
33
    IN_DQ_UCODE3,
34
    IN_DQ_UCODE2,
35
    IN_DQ_UCODE1,
36
    IN_DQ_UCODE0,
37
    IN_DQ_STRING_ESCAPE,
38
    IN_DQ_STRING,
39
    IN_SQ_UCODE3,
40
    IN_SQ_UCODE2,
41
    IN_SQ_UCODE1,
42
    IN_SQ_UCODE0,
43
    IN_SQ_STRING_ESCAPE,
44
    IN_SQ_STRING,
45
    IN_ZERO,
46
    IN_DIGITS,
47
    IN_DIGIT,
48
    IN_EXP_E,
49
    IN_MANTISSA,
50
    IN_MANTISSA_DIGITS,
51
    IN_NONZERO_NUMBER,
52
    IN_NEG_NONZERO_NUMBER,
53
    IN_KEYWORD,
54
    IN_ESCAPE,
55
    IN_ESCAPE_L,
56
    IN_ESCAPE_LL,
57
    IN_ESCAPE_I,
58
    IN_ESCAPE_I6,
59
    IN_ESCAPE_I64,
60
    IN_ESCAPE_DONE,
61
    IN_WHITESPACE,
62
    IN_OPERATOR_DONE,
63
    IN_START,
64
};
65

    
66
#define TERMINAL(state) [0 ... 0x7F] = (state)
67

    
68
static const uint8_t json_lexer[][256] =  {
69
    [IN_DONE_STRING] = {
70
        TERMINAL(JSON_STRING),
71
    },
72

    
73
    /* double quote string */
74
    [IN_DQ_UCODE3] = {
75
        ['0' ... '9'] = IN_DQ_STRING,
76
        ['a' ... 'f'] = IN_DQ_STRING,
77
        ['A' ... 'F'] = IN_DQ_STRING,
78
    },
79
    [IN_DQ_UCODE2] = {
80
        ['0' ... '9'] = IN_DQ_UCODE3,
81
        ['a' ... 'f'] = IN_DQ_UCODE3,
82
        ['A' ... 'F'] = IN_DQ_UCODE3,
83
    },
84
    [IN_DQ_UCODE1] = {
85
        ['0' ... '9'] = IN_DQ_UCODE2,
86
        ['a' ... 'f'] = IN_DQ_UCODE2,
87
        ['A' ... 'F'] = IN_DQ_UCODE2,
88
    },
89
    [IN_DQ_UCODE0] = {
90
        ['0' ... '9'] = IN_DQ_UCODE1,
91
        ['a' ... 'f'] = IN_DQ_UCODE1,
92
        ['A' ... 'F'] = IN_DQ_UCODE1,
93
    },
94
    [IN_DQ_STRING_ESCAPE] = {
95
        ['b'] = IN_DQ_STRING,
96
        ['f'] =  IN_DQ_STRING,
97
        ['n'] =  IN_DQ_STRING,
98
        ['r'] =  IN_DQ_STRING,
99
        ['t'] =  IN_DQ_STRING,
100
        ['\''] = IN_DQ_STRING,
101
        ['\"'] = IN_DQ_STRING,
102
        ['u'] = IN_DQ_UCODE0,
103
    },
104
    [IN_DQ_STRING] = {
105
        [1 ... 0xFF] = IN_DQ_STRING,
106
        ['\\'] = IN_DQ_STRING_ESCAPE,
107
        ['"'] = IN_DONE_STRING,
108
    },
109

    
110
    /* single quote string */
111
    [IN_SQ_UCODE3] = {
112
        ['0' ... '9'] = IN_SQ_STRING,
113
        ['a' ... 'f'] = IN_SQ_STRING,
114
        ['A' ... 'F'] = IN_SQ_STRING,
115
    },
116
    [IN_SQ_UCODE2] = {
117
        ['0' ... '9'] = IN_SQ_UCODE3,
118
        ['a' ... 'f'] = IN_SQ_UCODE3,
119
        ['A' ... 'F'] = IN_SQ_UCODE3,
120
    },
121
    [IN_SQ_UCODE1] = {
122
        ['0' ... '9'] = IN_SQ_UCODE2,
123
        ['a' ... 'f'] = IN_SQ_UCODE2,
124
        ['A' ... 'F'] = IN_SQ_UCODE2,
125
    },
126
    [IN_SQ_UCODE0] = {
127
        ['0' ... '9'] = IN_SQ_UCODE1,
128
        ['a' ... 'f'] = IN_SQ_UCODE1,
129
        ['A' ... 'F'] = IN_SQ_UCODE1,
130
    },
131
    [IN_SQ_STRING_ESCAPE] = {
132
        ['b'] = IN_SQ_STRING,
133
        ['f'] =  IN_SQ_STRING,
134
        ['n'] =  IN_SQ_STRING,
135
        ['r'] =  IN_SQ_STRING,
136
        ['t'] =  IN_SQ_STRING,
137
        ['\''] = IN_SQ_STRING,
138
        ['\"'] = IN_SQ_STRING,
139
        ['u'] = IN_SQ_UCODE0,
140
    },
141
    [IN_SQ_STRING] = {
142
        [1 ... 0xFF] = IN_SQ_STRING,
143
        ['\\'] = IN_SQ_STRING_ESCAPE,
144
        ['\''] = IN_DONE_STRING,
145
    },
146

    
147
    /* Zero */
148
    [IN_ZERO] = {
149
        TERMINAL(JSON_INTEGER),
150
        ['0' ... '9'] = ERROR,
151
        ['.'] = IN_MANTISSA,
152
    },
153

    
154
    /* Float */
155
    [IN_DIGITS] = {
156
        TERMINAL(JSON_FLOAT),
157
        ['0' ... '9'] = IN_DIGITS,
158
    },
159

    
160
    [IN_DIGIT] = {
161
        ['0' ... '9'] = IN_DIGITS,
162
    },
163

    
164
    [IN_EXP_E] = {
165
        ['-'] = IN_DIGIT,
166
        ['+'] = IN_DIGIT,
167
        ['0' ... '9'] = IN_DIGITS,
168
    },
169

    
170
    [IN_MANTISSA_DIGITS] = {
171
        TERMINAL(JSON_FLOAT),
172
        ['0' ... '9'] = IN_MANTISSA_DIGITS,
173
        ['e'] = IN_EXP_E,
174
        ['E'] = IN_EXP_E,
175
    },
176

    
177
    [IN_MANTISSA] = {
178
        ['0' ... '9'] = IN_MANTISSA_DIGITS,
179
    },
180

    
181
    /* Number */
182
    [IN_NONZERO_NUMBER] = {
183
        TERMINAL(JSON_INTEGER),
184
        ['0' ... '9'] = IN_NONZERO_NUMBER,
185
        ['e'] = IN_EXP_E,
186
        ['E'] = IN_EXP_E,
187
        ['.'] = IN_MANTISSA,
188
    },
189

    
190
    [IN_NEG_NONZERO_NUMBER] = {
191
        ['0'] = IN_ZERO,
192
        ['1' ... '9'] = IN_NONZERO_NUMBER,
193
    },
194

    
195
    /* keywords */
196
    [IN_KEYWORD] = {
197
        TERMINAL(JSON_KEYWORD),
198
        ['a' ... 'z'] = IN_KEYWORD,
199
    },
200

    
201
    /* whitespace */
202
    [IN_WHITESPACE] = {
203
        TERMINAL(JSON_SKIP),
204
        [' '] = IN_WHITESPACE,
205
        ['\t'] = IN_WHITESPACE,
206
        ['\r'] = IN_WHITESPACE,
207
        ['\n'] = IN_WHITESPACE,
208
    },        
209

    
210
    /* operator */
211
    [IN_OPERATOR_DONE] = {
212
        TERMINAL(JSON_OPERATOR),
213
    },
214

    
215
    /* escape */
216
    [IN_ESCAPE_DONE] = {
217
        TERMINAL(JSON_ESCAPE),
218
    },
219

    
220
    [IN_ESCAPE_LL] = {
221
        ['d'] = IN_ESCAPE_DONE,
222
    },
223

    
224
    [IN_ESCAPE_L] = {
225
        ['d'] = IN_ESCAPE_DONE,
226
        ['l'] = IN_ESCAPE_LL,
227
    },
228

    
229
    [IN_ESCAPE_I64] = {
230
        ['d'] = IN_ESCAPE_DONE,
231
    },
232

    
233
    [IN_ESCAPE_I6] = {
234
        ['4'] = IN_ESCAPE_I64,
235
    },
236

    
237
    [IN_ESCAPE_I] = {
238
        ['6'] = IN_ESCAPE_I6,
239
    },
240

    
241
    [IN_ESCAPE] = {
242
        ['d'] = IN_ESCAPE_DONE,
243
        ['i'] = IN_ESCAPE_DONE,
244
        ['p'] = IN_ESCAPE_DONE,
245
        ['s'] = IN_ESCAPE_DONE,
246
        ['f'] = IN_ESCAPE_DONE,
247
        ['l'] = IN_ESCAPE_L,
248
        ['I'] = IN_ESCAPE_I,
249
    },
250

    
251
    /* top level rule */
252
    [IN_START] = {
253
        ['"'] = IN_DQ_STRING,
254
        ['\''] = IN_SQ_STRING,
255
        ['0'] = IN_ZERO,
256
        ['1' ... '9'] = IN_NONZERO_NUMBER,
257
        ['-'] = IN_NEG_NONZERO_NUMBER,
258
        ['{'] = IN_OPERATOR_DONE,
259
        ['}'] = IN_OPERATOR_DONE,
260
        ['['] = IN_OPERATOR_DONE,
261
        [']'] = IN_OPERATOR_DONE,
262
        [','] = IN_OPERATOR_DONE,
263
        [':'] = IN_OPERATOR_DONE,
264
        ['a' ... 'z'] = IN_KEYWORD,
265
        ['%'] = IN_ESCAPE,
266
        [' '] = IN_WHITESPACE,
267
        ['\t'] = IN_WHITESPACE,
268
        ['\r'] = IN_WHITESPACE,
269
        ['\n'] = IN_WHITESPACE,
270
    },
271
};
272

    
273
void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
274
{
275
    lexer->emit = func;
276
    lexer->state = IN_START;
277
    lexer->token = qstring_new();
278
}
279

    
280
static int json_lexer_feed_char(JSONLexer *lexer, char ch)
281
{
282
    char buf[2];
283

    
284
    lexer->x++;
285
    if (ch == '\n') {
286
        lexer->x = 0;
287
        lexer->y++;
288
    }
289

    
290
    lexer->state = json_lexer[lexer->state][(uint8_t)ch];
291

    
292
    switch (lexer->state) {
293
    case JSON_OPERATOR:
294
    case JSON_ESCAPE:
295
    case JSON_INTEGER:
296
    case JSON_FLOAT:
297
    case JSON_KEYWORD:
298
    case JSON_STRING:
299
        lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
300
    case JSON_SKIP:
301
        lexer->state = json_lexer[IN_START][(uint8_t)ch];
302
        QDECREF(lexer->token);
303
        lexer->token = qstring_new();
304
        break;
305
    case ERROR:
306
        return -EINVAL;
307
    default:
308
        break;
309
    }
310

    
311
    buf[0] = ch;
312
    buf[1] = 0;
313

    
314
    qstring_append(lexer->token, buf);
315

    
316
    return 0;
317
}
318

    
319
int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
320
{
321
    size_t i;
322

    
323
    for (i = 0; i < size; i++) {
324
        int err;
325

    
326
        err = json_lexer_feed_char(lexer, buffer[i]);
327
        if (err < 0) {
328
            return err;
329
        }
330
    }
331

    
332
    return 0;
333
}
334

    
335
int json_lexer_flush(JSONLexer *lexer)
336
{
337
    return json_lexer_feed_char(lexer, 0);
338
}
339

    
340
void json_lexer_destroy(JSONLexer *lexer)
341
{
342
    QDECREF(lexer->token);
343
}