Statistics
| Branch: | Revision:

root / json-lexer.c @ 57a46d05

History | View | Annotate | Download (7.1 kB)

1
/*
2
 * JSON lexer
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10
 * See the COPYING.LIB file in the top-level directory.
11
 *
12
 */
13

    
14
#include "qstring.h"
15
#include "qlist.h"
16
#include "qdict.h"
17
#include "qint.h"
18
#include "qemu-common.h"
19
#include "json-lexer.h"
20

    
21
/*
22
 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
23
 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
24
 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
25
 * [{}\[\],:]
26
 * [a-z]+
27
 *
28
 */
29

    
30
enum json_lexer_state {
31
    ERROR = 0,
32
    IN_DONE_STRING,
33
    IN_DQ_UCODE3,
34
    IN_DQ_UCODE2,
35
    IN_DQ_UCODE1,
36
    IN_DQ_UCODE0,
37
    IN_DQ_STRING_ESCAPE,
38
    IN_DQ_STRING,
39
    IN_SQ_UCODE3,
40
    IN_SQ_UCODE2,
41
    IN_SQ_UCODE1,
42
    IN_SQ_UCODE0,
43
    IN_SQ_STRING_ESCAPE,
44
    IN_SQ_STRING,
45
    IN_ZERO,
46
    IN_DIGITS,
47
    IN_DIGIT,
48
    IN_EXP_E,
49
    IN_MANTISSA,
50
    IN_MANTISSA_DIGITS,
51
    IN_NONZERO_NUMBER,
52
    IN_NEG_NONZERO_NUMBER,
53
    IN_KEYWORD,
54
    IN_ESCAPE,
55
    IN_ESCAPE_L,
56
    IN_ESCAPE_LL,
57
    IN_ESCAPE_DONE,
58
    IN_WHITESPACE,
59
    IN_OPERATOR_DONE,
60
    IN_START,
61
};
62

    
63
#define TERMINAL(state) [0 ... 0x7F] = (state)
64

    
65
static const uint8_t json_lexer[][256] =  {
66
    [IN_DONE_STRING] = {
67
        TERMINAL(JSON_STRING),
68
    },
69

    
70
    /* double quote string */
71
    [IN_DQ_UCODE3] = {
72
        ['0' ... '9'] = IN_DQ_STRING,
73
        ['a' ... 'f'] = IN_DQ_STRING,
74
        ['A' ... 'F'] = IN_DQ_STRING,
75
    },
76
    [IN_DQ_UCODE2] = {
77
        ['0' ... '9'] = IN_DQ_UCODE3,
78
        ['a' ... 'f'] = IN_DQ_UCODE3,
79
        ['A' ... 'F'] = IN_DQ_UCODE3,
80
    },
81
    [IN_DQ_UCODE1] = {
82
        ['0' ... '9'] = IN_DQ_UCODE2,
83
        ['a' ... 'f'] = IN_DQ_UCODE2,
84
        ['A' ... 'F'] = IN_DQ_UCODE2,
85
    },
86
    [IN_DQ_UCODE0] = {
87
        ['0' ... '9'] = IN_DQ_UCODE1,
88
        ['a' ... 'f'] = IN_DQ_UCODE1,
89
        ['A' ... 'F'] = IN_DQ_UCODE1,
90
    },
91
    [IN_DQ_STRING_ESCAPE] = {
92
        ['b'] = IN_DQ_STRING,
93
        ['f'] =  IN_DQ_STRING,
94
        ['n'] =  IN_DQ_STRING,
95
        ['r'] =  IN_DQ_STRING,
96
        ['t'] =  IN_DQ_STRING,
97
        ['\''] = IN_DQ_STRING,
98
        ['\"'] = IN_DQ_STRING,
99
        ['u'] = IN_DQ_UCODE0,
100
    },
101
    [IN_DQ_STRING] = {
102
        [1 ... 0xFF] = IN_DQ_STRING,
103
        ['\\'] = IN_DQ_STRING_ESCAPE,
104
        ['"'] = IN_DONE_STRING,
105
    },
106

    
107
    /* single quote string */
108
    [IN_SQ_UCODE3] = {
109
        ['0' ... '9'] = IN_SQ_STRING,
110
        ['a' ... 'f'] = IN_SQ_STRING,
111
        ['A' ... 'F'] = IN_SQ_STRING,
112
    },
113
    [IN_SQ_UCODE2] = {
114
        ['0' ... '9'] = IN_SQ_UCODE3,
115
        ['a' ... 'f'] = IN_SQ_UCODE3,
116
        ['A' ... 'F'] = IN_SQ_UCODE3,
117
    },
118
    [IN_SQ_UCODE1] = {
119
        ['0' ... '9'] = IN_SQ_UCODE2,
120
        ['a' ... 'f'] = IN_SQ_UCODE2,
121
        ['A' ... 'F'] = IN_SQ_UCODE2,
122
    },
123
    [IN_SQ_UCODE0] = {
124
        ['0' ... '9'] = IN_SQ_UCODE1,
125
        ['a' ... 'f'] = IN_SQ_UCODE1,
126
        ['A' ... 'F'] = IN_SQ_UCODE1,
127
    },
128
    [IN_SQ_STRING_ESCAPE] = {
129
        ['b'] = IN_SQ_STRING,
130
        ['f'] =  IN_SQ_STRING,
131
        ['n'] =  IN_SQ_STRING,
132
        ['r'] =  IN_SQ_STRING,
133
        ['t'] =  IN_SQ_STRING,
134
        ['\''] = IN_SQ_STRING,
135
        ['\"'] = IN_SQ_STRING,
136
        ['u'] = IN_SQ_UCODE0,
137
    },
138
    [IN_SQ_STRING] = {
139
        [1 ... 0xFF] = IN_SQ_STRING,
140
        ['\\'] = IN_SQ_STRING_ESCAPE,
141
        ['\''] = IN_DONE_STRING,
142
    },
143

    
144
    /* Zero */
145
    [IN_ZERO] = {
146
        TERMINAL(JSON_INTEGER),
147
        ['0' ... '9'] = ERROR,
148
        ['.'] = IN_MANTISSA,
149
    },
150

    
151
    /* Float */
152
    [IN_DIGITS] = {
153
        TERMINAL(JSON_FLOAT),
154
        ['0' ... '9'] = IN_DIGITS,
155
    },
156

    
157
    [IN_DIGIT] = {
158
        ['0' ... '9'] = IN_DIGITS,
159
    },
160

    
161
    [IN_EXP_E] = {
162
        ['-'] = IN_DIGIT,
163
        ['+'] = IN_DIGIT,
164
        ['0' ... '9'] = IN_DIGITS,
165
    },
166

    
167
    [IN_MANTISSA_DIGITS] = {
168
        TERMINAL(JSON_FLOAT),
169
        ['0' ... '9'] = IN_MANTISSA_DIGITS,
170
        ['e'] = IN_EXP_E,
171
        ['E'] = IN_EXP_E,
172
    },
173

    
174
    [IN_MANTISSA] = {
175
        ['0' ... '9'] = IN_MANTISSA_DIGITS,
176
    },
177

    
178
    /* Number */
179
    [IN_NONZERO_NUMBER] = {
180
        TERMINAL(JSON_INTEGER),
181
        ['0' ... '9'] = IN_NONZERO_NUMBER,
182
        ['e'] = IN_EXP_E,
183
        ['E'] = IN_EXP_E,
184
        ['.'] = IN_MANTISSA,
185
    },
186

    
187
    [IN_NEG_NONZERO_NUMBER] = {
188
        ['0'] = IN_ZERO,
189
        ['1' ... '9'] = IN_NONZERO_NUMBER,
190
    },
191

    
192
    /* keywords */
193
    [IN_KEYWORD] = {
194
        TERMINAL(JSON_KEYWORD),
195
        ['a' ... 'z'] = IN_KEYWORD,
196
    },
197

    
198
    /* whitespace */
199
    [IN_WHITESPACE] = {
200
        TERMINAL(JSON_SKIP),
201
        [' '] = IN_WHITESPACE,
202
        ['\t'] = IN_WHITESPACE,
203
        ['\r'] = IN_WHITESPACE,
204
        ['\n'] = IN_WHITESPACE,
205
    },        
206

    
207
    /* operator */
208
    [IN_OPERATOR_DONE] = {
209
        TERMINAL(JSON_OPERATOR),
210
    },
211

    
212
    /* escape */
213
    [IN_ESCAPE_DONE] = {
214
        TERMINAL(JSON_ESCAPE),
215
    },
216

    
217
    [IN_ESCAPE_LL] = {
218
        ['d'] = IN_ESCAPE_DONE,
219
    },
220

    
221
    [IN_ESCAPE_L] = {
222
        ['d'] = IN_ESCAPE_DONE,
223
        ['l'] = IN_ESCAPE_LL,
224
    },
225

    
226
    [IN_ESCAPE] = {
227
        ['d'] = IN_ESCAPE_DONE,
228
        ['i'] = IN_ESCAPE_DONE,
229
        ['p'] = IN_ESCAPE_DONE,
230
        ['s'] = IN_ESCAPE_DONE,
231
        ['f'] = IN_ESCAPE_DONE,
232
        ['l'] = IN_ESCAPE_L,
233
    },
234

    
235
    /* top level rule */
236
    [IN_START] = {
237
        ['"'] = IN_DQ_STRING,
238
        ['\''] = IN_SQ_STRING,
239
        ['0'] = IN_ZERO,
240
        ['1' ... '9'] = IN_NONZERO_NUMBER,
241
        ['-'] = IN_NEG_NONZERO_NUMBER,
242
        ['{'] = IN_OPERATOR_DONE,
243
        ['}'] = IN_OPERATOR_DONE,
244
        ['['] = IN_OPERATOR_DONE,
245
        [']'] = IN_OPERATOR_DONE,
246
        [','] = IN_OPERATOR_DONE,
247
        [':'] = IN_OPERATOR_DONE,
248
        ['a' ... 'z'] = IN_KEYWORD,
249
        ['%'] = IN_ESCAPE,
250
        [' '] = IN_WHITESPACE,
251
        ['\t'] = IN_WHITESPACE,
252
        ['\r'] = IN_WHITESPACE,
253
        ['\n'] = IN_WHITESPACE,
254
    },
255
};
256

    
257
void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
258
{
259
    lexer->emit = func;
260
    lexer->state = IN_START;
261
    lexer->token = qstring_new();
262
}
263

    
264
static int json_lexer_feed_char(JSONLexer *lexer, char ch)
265
{
266
    char buf[2];
267

    
268
    lexer->x++;
269
    if (ch == '\n') {
270
        lexer->x = 0;
271
        lexer->y++;
272
    }
273

    
274
    lexer->state = json_lexer[lexer->state][(uint8_t)ch];
275

    
276
    switch (lexer->state) {
277
    case JSON_OPERATOR:
278
    case JSON_ESCAPE:
279
    case JSON_INTEGER:
280
    case JSON_FLOAT:
281
    case JSON_KEYWORD:
282
    case JSON_STRING:
283
        lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
284
    case JSON_SKIP:
285
        lexer->state = json_lexer[IN_START][(uint8_t)ch];
286
        QDECREF(lexer->token);
287
        lexer->token = qstring_new();
288
        break;
289
    case ERROR:
290
        return -EINVAL;
291
    default:
292
        break;
293
    }
294

    
295
    buf[0] = ch;
296
    buf[1] = 0;
297

    
298
    qstring_append(lexer->token, buf);
299

    
300
    return 0;
301
}
302

    
303
int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
304
{
305
    size_t i;
306

    
307
    for (i = 0; i < size; i++) {
308
        int err;
309

    
310
        err = json_lexer_feed_char(lexer, buffer[i]);
311
        if (err < 0) {
312
            return err;
313
        }
314
    }
315

    
316
    return 0;
317
}
318

    
319
int json_lexer_flush(JSONLexer *lexer)
320
{
321
    return json_lexer_feed_char(lexer, 0);
322
}
323

    
324
void json_lexer_destroy(JSONLexer *lexer)
325
{
326
    QDECREF(lexer->token);
327
}