Statistics
| Branch: | Revision:

root / json-lexer.c @ 57a46d05

History | View | Annotate | Download (7.1 kB)

1 5ab8558d Anthony Liguori
/*
2 5ab8558d Anthony Liguori
 * JSON lexer
3 5ab8558d Anthony Liguori
 *
4 5ab8558d Anthony Liguori
 * Copyright IBM, Corp. 2009
5 5ab8558d Anthony Liguori
 *
6 5ab8558d Anthony Liguori
 * Authors:
7 5ab8558d Anthony Liguori
 *  Anthony Liguori   <aliguori@us.ibm.com>
8 5ab8558d Anthony Liguori
 *
9 5ab8558d Anthony Liguori
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 5ab8558d Anthony Liguori
 * See the COPYING.LIB file in the top-level directory.
11 5ab8558d Anthony Liguori
 *
12 5ab8558d Anthony Liguori
 */
13 5ab8558d Anthony Liguori
14 5ab8558d Anthony Liguori
#include "qstring.h"
15 5ab8558d Anthony Liguori
#include "qlist.h"
16 5ab8558d Anthony Liguori
#include "qdict.h"
17 5ab8558d Anthony Liguori
#include "qint.h"
18 5ab8558d Anthony Liguori
#include "qemu-common.h"
19 5ab8558d Anthony Liguori
#include "json-lexer.h"
20 5ab8558d Anthony Liguori
21 5ab8558d Anthony Liguori
/*
22 5ab8558d Anthony Liguori
 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
23 5ab8558d Anthony Liguori
 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
24 5ab8558d Anthony Liguori
 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
25 5ab8558d Anthony Liguori
 * [{}\[\],:]
26 5ab8558d Anthony Liguori
 * [a-z]+
27 5ab8558d Anthony Liguori
 *
28 5ab8558d Anthony Liguori
 */
29 5ab8558d Anthony Liguori
30 5ab8558d Anthony Liguori
enum json_lexer_state {
31 5ab8558d Anthony Liguori
    ERROR = 0,
32 5ab8558d Anthony Liguori
    IN_DONE_STRING,
33 5ab8558d Anthony Liguori
    IN_DQ_UCODE3,
34 5ab8558d Anthony Liguori
    IN_DQ_UCODE2,
35 5ab8558d Anthony Liguori
    IN_DQ_UCODE1,
36 5ab8558d Anthony Liguori
    IN_DQ_UCODE0,
37 5ab8558d Anthony Liguori
    IN_DQ_STRING_ESCAPE,
38 5ab8558d Anthony Liguori
    IN_DQ_STRING,
39 5ab8558d Anthony Liguori
    IN_SQ_UCODE3,
40 5ab8558d Anthony Liguori
    IN_SQ_UCODE2,
41 5ab8558d Anthony Liguori
    IN_SQ_UCODE1,
42 5ab8558d Anthony Liguori
    IN_SQ_UCODE0,
43 5ab8558d Anthony Liguori
    IN_SQ_STRING_ESCAPE,
44 5ab8558d Anthony Liguori
    IN_SQ_STRING,
45 5ab8558d Anthony Liguori
    IN_ZERO,
46 5ab8558d Anthony Liguori
    IN_DIGITS,
47 5ab8558d Anthony Liguori
    IN_DIGIT,
48 5ab8558d Anthony Liguori
    IN_EXP_E,
49 5ab8558d Anthony Liguori
    IN_MANTISSA,
50 5ab8558d Anthony Liguori
    IN_MANTISSA_DIGITS,
51 5ab8558d Anthony Liguori
    IN_NONZERO_NUMBER,
52 5ab8558d Anthony Liguori
    IN_NEG_NONZERO_NUMBER,
53 5ab8558d Anthony Liguori
    IN_KEYWORD,
54 5ab8558d Anthony Liguori
    IN_ESCAPE,
55 5ab8558d Anthony Liguori
    IN_ESCAPE_L,
56 5ab8558d Anthony Liguori
    IN_ESCAPE_LL,
57 5ab8558d Anthony Liguori
    IN_ESCAPE_DONE,
58 5ab8558d Anthony Liguori
    IN_WHITESPACE,
59 5ab8558d Anthony Liguori
    IN_OPERATOR_DONE,
60 5ab8558d Anthony Liguori
    IN_START,
61 5ab8558d Anthony Liguori
};
62 5ab8558d Anthony Liguori
63 5ab8558d Anthony Liguori
#define TERMINAL(state) [0 ... 0x7F] = (state)
64 5ab8558d Anthony Liguori
65 5ab8558d Anthony Liguori
static const uint8_t json_lexer[][256] =  {
66 5ab8558d Anthony Liguori
    [IN_DONE_STRING] = {
67 5ab8558d Anthony Liguori
        TERMINAL(JSON_STRING),
68 5ab8558d Anthony Liguori
    },
69 5ab8558d Anthony Liguori
70 5ab8558d Anthony Liguori
    /* double quote string */
71 5ab8558d Anthony Liguori
    [IN_DQ_UCODE3] = {
72 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DQ_STRING,
73 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_DQ_STRING,
74 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_DQ_STRING,
75 5ab8558d Anthony Liguori
    },
76 5ab8558d Anthony Liguori
    [IN_DQ_UCODE2] = {
77 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DQ_UCODE3,
78 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_DQ_UCODE3,
79 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_DQ_UCODE3,
80 5ab8558d Anthony Liguori
    },
81 5ab8558d Anthony Liguori
    [IN_DQ_UCODE1] = {
82 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DQ_UCODE2,
83 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_DQ_UCODE2,
84 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_DQ_UCODE2,
85 5ab8558d Anthony Liguori
    },
86 5ab8558d Anthony Liguori
    [IN_DQ_UCODE0] = {
87 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DQ_UCODE1,
88 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_DQ_UCODE1,
89 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_DQ_UCODE1,
90 5ab8558d Anthony Liguori
    },
91 5ab8558d Anthony Liguori
    [IN_DQ_STRING_ESCAPE] = {
92 5ab8558d Anthony Liguori
        ['b'] = IN_DQ_STRING,
93 5ab8558d Anthony Liguori
        ['f'] =  IN_DQ_STRING,
94 5ab8558d Anthony Liguori
        ['n'] =  IN_DQ_STRING,
95 5ab8558d Anthony Liguori
        ['r'] =  IN_DQ_STRING,
96 5ab8558d Anthony Liguori
        ['t'] =  IN_DQ_STRING,
97 5ab8558d Anthony Liguori
        ['\''] = IN_DQ_STRING,
98 5ab8558d Anthony Liguori
        ['\"'] = IN_DQ_STRING,
99 5ab8558d Anthony Liguori
        ['u'] = IN_DQ_UCODE0,
100 5ab8558d Anthony Liguori
    },
101 5ab8558d Anthony Liguori
    [IN_DQ_STRING] = {
102 5ab8558d Anthony Liguori
        [1 ... 0xFF] = IN_DQ_STRING,
103 5ab8558d Anthony Liguori
        ['\\'] = IN_DQ_STRING_ESCAPE,
104 5ab8558d Anthony Liguori
        ['"'] = IN_DONE_STRING,
105 5ab8558d Anthony Liguori
    },
106 5ab8558d Anthony Liguori
107 5ab8558d Anthony Liguori
    /* single quote string */
108 5ab8558d Anthony Liguori
    [IN_SQ_UCODE3] = {
109 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_SQ_STRING,
110 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_SQ_STRING,
111 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_SQ_STRING,
112 5ab8558d Anthony Liguori
    },
113 5ab8558d Anthony Liguori
    [IN_SQ_UCODE2] = {
114 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_SQ_UCODE3,
115 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_SQ_UCODE3,
116 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_SQ_UCODE3,
117 5ab8558d Anthony Liguori
    },
118 5ab8558d Anthony Liguori
    [IN_SQ_UCODE1] = {
119 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_SQ_UCODE2,
120 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_SQ_UCODE2,
121 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_SQ_UCODE2,
122 5ab8558d Anthony Liguori
    },
123 5ab8558d Anthony Liguori
    [IN_SQ_UCODE0] = {
124 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_SQ_UCODE1,
125 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_SQ_UCODE1,
126 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_SQ_UCODE1,
127 5ab8558d Anthony Liguori
    },
128 5ab8558d Anthony Liguori
    [IN_SQ_STRING_ESCAPE] = {
129 5ab8558d Anthony Liguori
        ['b'] = IN_SQ_STRING,
130 5ab8558d Anthony Liguori
        ['f'] =  IN_SQ_STRING,
131 5ab8558d Anthony Liguori
        ['n'] =  IN_SQ_STRING,
132 5ab8558d Anthony Liguori
        ['r'] =  IN_SQ_STRING,
133 5ab8558d Anthony Liguori
        ['t'] =  IN_SQ_STRING,
134 5ab8558d Anthony Liguori
        ['\''] = IN_SQ_STRING,
135 5ab8558d Anthony Liguori
        ['\"'] = IN_SQ_STRING,
136 5ab8558d Anthony Liguori
        ['u'] = IN_SQ_UCODE0,
137 5ab8558d Anthony Liguori
    },
138 5ab8558d Anthony Liguori
    [IN_SQ_STRING] = {
139 5ab8558d Anthony Liguori
        [1 ... 0xFF] = IN_SQ_STRING,
140 5ab8558d Anthony Liguori
        ['\\'] = IN_SQ_STRING_ESCAPE,
141 5ab8558d Anthony Liguori
        ['\''] = IN_DONE_STRING,
142 5ab8558d Anthony Liguori
    },
143 5ab8558d Anthony Liguori
144 5ab8558d Anthony Liguori
    /* Zero */
145 5ab8558d Anthony Liguori
    [IN_ZERO] = {
146 5ab8558d Anthony Liguori
        TERMINAL(JSON_INTEGER),
147 5ab8558d Anthony Liguori
        ['0' ... '9'] = ERROR,
148 5ab8558d Anthony Liguori
        ['.'] = IN_MANTISSA,
149 5ab8558d Anthony Liguori
    },
150 5ab8558d Anthony Liguori
151 5ab8558d Anthony Liguori
    /* Float */
152 5ab8558d Anthony Liguori
    [IN_DIGITS] = {
153 5ab8558d Anthony Liguori
        TERMINAL(JSON_FLOAT),
154 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DIGITS,
155 5ab8558d Anthony Liguori
    },
156 5ab8558d Anthony Liguori
157 5ab8558d Anthony Liguori
    [IN_DIGIT] = {
158 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DIGITS,
159 5ab8558d Anthony Liguori
    },
160 5ab8558d Anthony Liguori
161 5ab8558d Anthony Liguori
    [IN_EXP_E] = {
162 5ab8558d Anthony Liguori
        ['-'] = IN_DIGIT,
163 5ab8558d Anthony Liguori
        ['+'] = IN_DIGIT,
164 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DIGITS,
165 5ab8558d Anthony Liguori
    },
166 5ab8558d Anthony Liguori
167 5ab8558d Anthony Liguori
    [IN_MANTISSA_DIGITS] = {
168 5ab8558d Anthony Liguori
        TERMINAL(JSON_FLOAT),
169 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_MANTISSA_DIGITS,
170 5ab8558d Anthony Liguori
        ['e'] = IN_EXP_E,
171 5ab8558d Anthony Liguori
        ['E'] = IN_EXP_E,
172 5ab8558d Anthony Liguori
    },
173 5ab8558d Anthony Liguori
174 5ab8558d Anthony Liguori
    [IN_MANTISSA] = {
175 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_MANTISSA_DIGITS,
176 5ab8558d Anthony Liguori
    },
177 5ab8558d Anthony Liguori
178 5ab8558d Anthony Liguori
    /* Number */
179 5ab8558d Anthony Liguori
    [IN_NONZERO_NUMBER] = {
180 5ab8558d Anthony Liguori
        TERMINAL(JSON_INTEGER),
181 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_NONZERO_NUMBER,
182 5ab8558d Anthony Liguori
        ['e'] = IN_EXP_E,
183 5ab8558d Anthony Liguori
        ['E'] = IN_EXP_E,
184 5ab8558d Anthony Liguori
        ['.'] = IN_MANTISSA,
185 5ab8558d Anthony Liguori
    },
186 5ab8558d Anthony Liguori
187 5ab8558d Anthony Liguori
    [IN_NEG_NONZERO_NUMBER] = {
188 5ab8558d Anthony Liguori
        ['0'] = IN_ZERO,
189 5ab8558d Anthony Liguori
        ['1' ... '9'] = IN_NONZERO_NUMBER,
190 5ab8558d Anthony Liguori
    },
191 5ab8558d Anthony Liguori
192 5ab8558d Anthony Liguori
    /* keywords */
193 5ab8558d Anthony Liguori
    [IN_KEYWORD] = {
194 5ab8558d Anthony Liguori
        TERMINAL(JSON_KEYWORD),
195 5ab8558d Anthony Liguori
        ['a' ... 'z'] = IN_KEYWORD,
196 5ab8558d Anthony Liguori
    },
197 5ab8558d Anthony Liguori
198 5ab8558d Anthony Liguori
    /* whitespace */
199 5ab8558d Anthony Liguori
    [IN_WHITESPACE] = {
200 5ab8558d Anthony Liguori
        TERMINAL(JSON_SKIP),
201 5ab8558d Anthony Liguori
        [' '] = IN_WHITESPACE,
202 5ab8558d Anthony Liguori
        ['\t'] = IN_WHITESPACE,
203 5ab8558d Anthony Liguori
        ['\r'] = IN_WHITESPACE,
204 5ab8558d Anthony Liguori
        ['\n'] = IN_WHITESPACE,
205 5ab8558d Anthony Liguori
    },        
206 5ab8558d Anthony Liguori
207 5ab8558d Anthony Liguori
    /* operator */
208 5ab8558d Anthony Liguori
    [IN_OPERATOR_DONE] = {
209 5ab8558d Anthony Liguori
        TERMINAL(JSON_OPERATOR),
210 5ab8558d Anthony Liguori
    },
211 5ab8558d Anthony Liguori
212 5ab8558d Anthony Liguori
    /* escape */
213 5ab8558d Anthony Liguori
    [IN_ESCAPE_DONE] = {
214 5ab8558d Anthony Liguori
        TERMINAL(JSON_ESCAPE),
215 5ab8558d Anthony Liguori
    },
216 5ab8558d Anthony Liguori
217 5ab8558d Anthony Liguori
    [IN_ESCAPE_LL] = {
218 5ab8558d Anthony Liguori
        ['d'] = IN_ESCAPE_DONE,
219 5ab8558d Anthony Liguori
    },
220 5ab8558d Anthony Liguori
221 5ab8558d Anthony Liguori
    [IN_ESCAPE_L] = {
222 5ab8558d Anthony Liguori
        ['d'] = IN_ESCAPE_DONE,
223 5ab8558d Anthony Liguori
        ['l'] = IN_ESCAPE_LL,
224 5ab8558d Anthony Liguori
    },
225 5ab8558d Anthony Liguori
226 5ab8558d Anthony Liguori
    [IN_ESCAPE] = {
227 5ab8558d Anthony Liguori
        ['d'] = IN_ESCAPE_DONE,
228 5ab8558d Anthony Liguori
        ['i'] = IN_ESCAPE_DONE,
229 5ab8558d Anthony Liguori
        ['p'] = IN_ESCAPE_DONE,
230 5ab8558d Anthony Liguori
        ['s'] = IN_ESCAPE_DONE,
231 5ab8558d Anthony Liguori
        ['f'] = IN_ESCAPE_DONE,
232 5ab8558d Anthony Liguori
        ['l'] = IN_ESCAPE_L,
233 5ab8558d Anthony Liguori
    },
234 5ab8558d Anthony Liguori
235 5ab8558d Anthony Liguori
    /* top level rule */
236 5ab8558d Anthony Liguori
    [IN_START] = {
237 5ab8558d Anthony Liguori
        ['"'] = IN_DQ_STRING,
238 5ab8558d Anthony Liguori
        ['\''] = IN_SQ_STRING,
239 5ab8558d Anthony Liguori
        ['0'] = IN_ZERO,
240 5ab8558d Anthony Liguori
        ['1' ... '9'] = IN_NONZERO_NUMBER,
241 5ab8558d Anthony Liguori
        ['-'] = IN_NEG_NONZERO_NUMBER,
242 5ab8558d Anthony Liguori
        ['{'] = IN_OPERATOR_DONE,
243 5ab8558d Anthony Liguori
        ['}'] = IN_OPERATOR_DONE,
244 5ab8558d Anthony Liguori
        ['['] = IN_OPERATOR_DONE,
245 5ab8558d Anthony Liguori
        [']'] = IN_OPERATOR_DONE,
246 5ab8558d Anthony Liguori
        [','] = IN_OPERATOR_DONE,
247 5ab8558d Anthony Liguori
        [':'] = IN_OPERATOR_DONE,
248 5ab8558d Anthony Liguori
        ['a' ... 'z'] = IN_KEYWORD,
249 5ab8558d Anthony Liguori
        ['%'] = IN_ESCAPE,
250 5ab8558d Anthony Liguori
        [' '] = IN_WHITESPACE,
251 5ab8558d Anthony Liguori
        ['\t'] = IN_WHITESPACE,
252 5ab8558d Anthony Liguori
        ['\r'] = IN_WHITESPACE,
253 5ab8558d Anthony Liguori
        ['\n'] = IN_WHITESPACE,
254 5ab8558d Anthony Liguori
    },
255 5ab8558d Anthony Liguori
};
256 5ab8558d Anthony Liguori
257 5ab8558d Anthony Liguori
void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
258 5ab8558d Anthony Liguori
{
259 5ab8558d Anthony Liguori
    lexer->emit = func;
260 5ab8558d Anthony Liguori
    lexer->state = IN_START;
261 5ab8558d Anthony Liguori
    lexer->token = qstring_new();
262 5ab8558d Anthony Liguori
}
263 5ab8558d Anthony Liguori
264 5ab8558d Anthony Liguori
static int json_lexer_feed_char(JSONLexer *lexer, char ch)
265 5ab8558d Anthony Liguori
{
266 5ab8558d Anthony Liguori
    char buf[2];
267 5ab8558d Anthony Liguori
268 5ab8558d Anthony Liguori
    lexer->x++;
269 5ab8558d Anthony Liguori
    if (ch == '\n') {
270 5ab8558d Anthony Liguori
        lexer->x = 0;
271 5ab8558d Anthony Liguori
        lexer->y++;
272 5ab8558d Anthony Liguori
    }
273 5ab8558d Anthony Liguori
274 5ab8558d Anthony Liguori
    lexer->state = json_lexer[lexer->state][(uint8_t)ch];
275 5ab8558d Anthony Liguori
276 5ab8558d Anthony Liguori
    switch (lexer->state) {
277 5ab8558d Anthony Liguori
    case JSON_OPERATOR:
278 5ab8558d Anthony Liguori
    case JSON_ESCAPE:
279 5ab8558d Anthony Liguori
    case JSON_INTEGER:
280 5ab8558d Anthony Liguori
    case JSON_FLOAT:
281 5ab8558d Anthony Liguori
    case JSON_KEYWORD:
282 5ab8558d Anthony Liguori
    case JSON_STRING:
283 5ab8558d Anthony Liguori
        lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
284 5ab8558d Anthony Liguori
    case JSON_SKIP:
285 5ab8558d Anthony Liguori
        lexer->state = json_lexer[IN_START][(uint8_t)ch];
286 5ab8558d Anthony Liguori
        QDECREF(lexer->token);
287 5ab8558d Anthony Liguori
        lexer->token = qstring_new();
288 5ab8558d Anthony Liguori
        break;
289 5ab8558d Anthony Liguori
    case ERROR:
290 5ab8558d Anthony Liguori
        return -EINVAL;
291 5ab8558d Anthony Liguori
    default:
292 5ab8558d Anthony Liguori
        break;
293 5ab8558d Anthony Liguori
    }
294 5ab8558d Anthony Liguori
295 5ab8558d Anthony Liguori
    buf[0] = ch;
296 5ab8558d Anthony Liguori
    buf[1] = 0;
297 5ab8558d Anthony Liguori
298 5ab8558d Anthony Liguori
    qstring_append(lexer->token, buf);
299 5ab8558d Anthony Liguori
300 5ab8558d Anthony Liguori
    return 0;
301 5ab8558d Anthony Liguori
}
302 5ab8558d Anthony Liguori
303 5ab8558d Anthony Liguori
int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
304 5ab8558d Anthony Liguori
{
305 5ab8558d Anthony Liguori
    size_t i;
306 5ab8558d Anthony Liguori
307 5ab8558d Anthony Liguori
    for (i = 0; i < size; i++) {
308 5ab8558d Anthony Liguori
        int err;
309 5ab8558d Anthony Liguori
310 5ab8558d Anthony Liguori
        err = json_lexer_feed_char(lexer, buffer[i]);
311 5ab8558d Anthony Liguori
        if (err < 0) {
312 5ab8558d Anthony Liguori
            return err;
313 5ab8558d Anthony Liguori
        }
314 5ab8558d Anthony Liguori
    }
315 5ab8558d Anthony Liguori
316 5ab8558d Anthony Liguori
    return 0;
317 5ab8558d Anthony Liguori
}
318 5ab8558d Anthony Liguori
319 5ab8558d Anthony Liguori
int json_lexer_flush(JSONLexer *lexer)
320 5ab8558d Anthony Liguori
{
321 5ab8558d Anthony Liguori
    return json_lexer_feed_char(lexer, 0);
322 5ab8558d Anthony Liguori
}
323 5ab8558d Anthony Liguori
324 5ab8558d Anthony Liguori
void json_lexer_destroy(JSONLexer *lexer)
325 5ab8558d Anthony Liguori
{
326 5ab8558d Anthony Liguori
    QDECREF(lexer->token);
327 5ab8558d Anthony Liguori
}