root / json-lexer.c @ 57a46d05
History | View | Annotate | Download (7.1 kB)
1 |
/*
|
---|---|
2 |
* JSON lexer
|
3 |
*
|
4 |
* Copyright IBM, Corp. 2009
|
5 |
*
|
6 |
* Authors:
|
7 |
* Anthony Liguori <aliguori@us.ibm.com>
|
8 |
*
|
9 |
* This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
|
10 |
* See the COPYING.LIB file in the top-level directory.
|
11 |
*
|
12 |
*/
|
13 |
|
14 |
#include "qstring.h" |
15 |
#include "qlist.h" |
16 |
#include "qdict.h" |
17 |
#include "qint.h" |
18 |
#include "qemu-common.h" |
19 |
#include "json-lexer.h" |
20 |
|
21 |
/*
|
22 |
* \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
|
23 |
* '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
|
24 |
* 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
|
25 |
* [{}\[\],:]
|
26 |
* [a-z]+
|
27 |
*
|
28 |
*/
|
29 |
|
30 |
enum json_lexer_state {
|
31 |
ERROR = 0,
|
32 |
IN_DONE_STRING, |
33 |
IN_DQ_UCODE3, |
34 |
IN_DQ_UCODE2, |
35 |
IN_DQ_UCODE1, |
36 |
IN_DQ_UCODE0, |
37 |
IN_DQ_STRING_ESCAPE, |
38 |
IN_DQ_STRING, |
39 |
IN_SQ_UCODE3, |
40 |
IN_SQ_UCODE2, |
41 |
IN_SQ_UCODE1, |
42 |
IN_SQ_UCODE0, |
43 |
IN_SQ_STRING_ESCAPE, |
44 |
IN_SQ_STRING, |
45 |
IN_ZERO, |
46 |
IN_DIGITS, |
47 |
IN_DIGIT, |
48 |
IN_EXP_E, |
49 |
IN_MANTISSA, |
50 |
IN_MANTISSA_DIGITS, |
51 |
IN_NONZERO_NUMBER, |
52 |
IN_NEG_NONZERO_NUMBER, |
53 |
IN_KEYWORD, |
54 |
IN_ESCAPE, |
55 |
IN_ESCAPE_L, |
56 |
IN_ESCAPE_LL, |
57 |
IN_ESCAPE_DONE, |
58 |
IN_WHITESPACE, |
59 |
IN_OPERATOR_DONE, |
60 |
IN_START, |
61 |
}; |
62 |
|
63 |
#define TERMINAL(state) [0 ... 0x7F] = (state) |
64 |
|
65 |
static const uint8_t json_lexer[][256] = { |
66 |
[IN_DONE_STRING] = { |
67 |
TERMINAL(JSON_STRING), |
68 |
}, |
69 |
|
70 |
/* double quote string */
|
71 |
[IN_DQ_UCODE3] = { |
72 |
['0' ... '9'] = IN_DQ_STRING, |
73 |
['a' ... 'f'] = IN_DQ_STRING, |
74 |
['A' ... 'F'] = IN_DQ_STRING, |
75 |
}, |
76 |
[IN_DQ_UCODE2] = { |
77 |
['0' ... '9'] = IN_DQ_UCODE3, |
78 |
['a' ... 'f'] = IN_DQ_UCODE3, |
79 |
['A' ... 'F'] = IN_DQ_UCODE3, |
80 |
}, |
81 |
[IN_DQ_UCODE1] = { |
82 |
['0' ... '9'] = IN_DQ_UCODE2, |
83 |
['a' ... 'f'] = IN_DQ_UCODE2, |
84 |
['A' ... 'F'] = IN_DQ_UCODE2, |
85 |
}, |
86 |
[IN_DQ_UCODE0] = { |
87 |
['0' ... '9'] = IN_DQ_UCODE1, |
88 |
['a' ... 'f'] = IN_DQ_UCODE1, |
89 |
['A' ... 'F'] = IN_DQ_UCODE1, |
90 |
}, |
91 |
[IN_DQ_STRING_ESCAPE] = { |
92 |
['b'] = IN_DQ_STRING,
|
93 |
['f'] = IN_DQ_STRING,
|
94 |
['n'] = IN_DQ_STRING,
|
95 |
['r'] = IN_DQ_STRING,
|
96 |
['t'] = IN_DQ_STRING,
|
97 |
['\''] = IN_DQ_STRING,
|
98 |
['\"'] = IN_DQ_STRING,
|
99 |
['u'] = IN_DQ_UCODE0,
|
100 |
}, |
101 |
[IN_DQ_STRING] = { |
102 |
[1 ... 0xFF] = IN_DQ_STRING, |
103 |
['\\'] = IN_DQ_STRING_ESCAPE,
|
104 |
['"'] = IN_DONE_STRING,
|
105 |
}, |
106 |
|
107 |
/* single quote string */
|
108 |
[IN_SQ_UCODE3] = { |
109 |
['0' ... '9'] = IN_SQ_STRING, |
110 |
['a' ... 'f'] = IN_SQ_STRING, |
111 |
['A' ... 'F'] = IN_SQ_STRING, |
112 |
}, |
113 |
[IN_SQ_UCODE2] = { |
114 |
['0' ... '9'] = IN_SQ_UCODE3, |
115 |
['a' ... 'f'] = IN_SQ_UCODE3, |
116 |
['A' ... 'F'] = IN_SQ_UCODE3, |
117 |
}, |
118 |
[IN_SQ_UCODE1] = { |
119 |
['0' ... '9'] = IN_SQ_UCODE2, |
120 |
['a' ... 'f'] = IN_SQ_UCODE2, |
121 |
['A' ... 'F'] = IN_SQ_UCODE2, |
122 |
}, |
123 |
[IN_SQ_UCODE0] = { |
124 |
['0' ... '9'] = IN_SQ_UCODE1, |
125 |
['a' ... 'f'] = IN_SQ_UCODE1, |
126 |
['A' ... 'F'] = IN_SQ_UCODE1, |
127 |
}, |
128 |
[IN_SQ_STRING_ESCAPE] = { |
129 |
['b'] = IN_SQ_STRING,
|
130 |
['f'] = IN_SQ_STRING,
|
131 |
['n'] = IN_SQ_STRING,
|
132 |
['r'] = IN_SQ_STRING,
|
133 |
['t'] = IN_SQ_STRING,
|
134 |
['\''] = IN_SQ_STRING,
|
135 |
['\"'] = IN_SQ_STRING,
|
136 |
['u'] = IN_SQ_UCODE0,
|
137 |
}, |
138 |
[IN_SQ_STRING] = { |
139 |
[1 ... 0xFF] = IN_SQ_STRING, |
140 |
['\\'] = IN_SQ_STRING_ESCAPE,
|
141 |
['\''] = IN_DONE_STRING,
|
142 |
}, |
143 |
|
144 |
/* Zero */
|
145 |
[IN_ZERO] = { |
146 |
TERMINAL(JSON_INTEGER), |
147 |
['0' ... '9'] = ERROR, |
148 |
['.'] = IN_MANTISSA,
|
149 |
}, |
150 |
|
151 |
/* Float */
|
152 |
[IN_DIGITS] = { |
153 |
TERMINAL(JSON_FLOAT), |
154 |
['0' ... '9'] = IN_DIGITS, |
155 |
}, |
156 |
|
157 |
[IN_DIGIT] = { |
158 |
['0' ... '9'] = IN_DIGITS, |
159 |
}, |
160 |
|
161 |
[IN_EXP_E] = { |
162 |
['-'] = IN_DIGIT,
|
163 |
['+'] = IN_DIGIT,
|
164 |
['0' ... '9'] = IN_DIGITS, |
165 |
}, |
166 |
|
167 |
[IN_MANTISSA_DIGITS] = { |
168 |
TERMINAL(JSON_FLOAT), |
169 |
['0' ... '9'] = IN_MANTISSA_DIGITS, |
170 |
['e'] = IN_EXP_E,
|
171 |
['E'] = IN_EXP_E,
|
172 |
}, |
173 |
|
174 |
[IN_MANTISSA] = { |
175 |
['0' ... '9'] = IN_MANTISSA_DIGITS, |
176 |
}, |
177 |
|
178 |
/* Number */
|
179 |
[IN_NONZERO_NUMBER] = { |
180 |
TERMINAL(JSON_INTEGER), |
181 |
['0' ... '9'] = IN_NONZERO_NUMBER, |
182 |
['e'] = IN_EXP_E,
|
183 |
['E'] = IN_EXP_E,
|
184 |
['.'] = IN_MANTISSA,
|
185 |
}, |
186 |
|
187 |
[IN_NEG_NONZERO_NUMBER] = { |
188 |
['0'] = IN_ZERO,
|
189 |
['1' ... '9'] = IN_NONZERO_NUMBER, |
190 |
}, |
191 |
|
192 |
/* keywords */
|
193 |
[IN_KEYWORD] = { |
194 |
TERMINAL(JSON_KEYWORD), |
195 |
['a' ... 'z'] = IN_KEYWORD, |
196 |
}, |
197 |
|
198 |
/* whitespace */
|
199 |
[IN_WHITESPACE] = { |
200 |
TERMINAL(JSON_SKIP), |
201 |
[' '] = IN_WHITESPACE,
|
202 |
['\t'] = IN_WHITESPACE,
|
203 |
['\r'] = IN_WHITESPACE,
|
204 |
['\n'] = IN_WHITESPACE,
|
205 |
}, |
206 |
|
207 |
/* operator */
|
208 |
[IN_OPERATOR_DONE] = { |
209 |
TERMINAL(JSON_OPERATOR), |
210 |
}, |
211 |
|
212 |
/* escape */
|
213 |
[IN_ESCAPE_DONE] = { |
214 |
TERMINAL(JSON_ESCAPE), |
215 |
}, |
216 |
|
217 |
[IN_ESCAPE_LL] = { |
218 |
['d'] = IN_ESCAPE_DONE,
|
219 |
}, |
220 |
|
221 |
[IN_ESCAPE_L] = { |
222 |
['d'] = IN_ESCAPE_DONE,
|
223 |
['l'] = IN_ESCAPE_LL,
|
224 |
}, |
225 |
|
226 |
[IN_ESCAPE] = { |
227 |
['d'] = IN_ESCAPE_DONE,
|
228 |
['i'] = IN_ESCAPE_DONE,
|
229 |
['p'] = IN_ESCAPE_DONE,
|
230 |
['s'] = IN_ESCAPE_DONE,
|
231 |
['f'] = IN_ESCAPE_DONE,
|
232 |
['l'] = IN_ESCAPE_L,
|
233 |
}, |
234 |
|
235 |
/* top level rule */
|
236 |
[IN_START] = { |
237 |
['"'] = IN_DQ_STRING,
|
238 |
['\''] = IN_SQ_STRING,
|
239 |
['0'] = IN_ZERO,
|
240 |
['1' ... '9'] = IN_NONZERO_NUMBER, |
241 |
['-'] = IN_NEG_NONZERO_NUMBER,
|
242 |
['{'] = IN_OPERATOR_DONE,
|
243 |
['}'] = IN_OPERATOR_DONE,
|
244 |
['['] = IN_OPERATOR_DONE,
|
245 |
[']'] = IN_OPERATOR_DONE,
|
246 |
[','] = IN_OPERATOR_DONE,
|
247 |
[':'] = IN_OPERATOR_DONE,
|
248 |
['a' ... 'z'] = IN_KEYWORD, |
249 |
['%'] = IN_ESCAPE,
|
250 |
[' '] = IN_WHITESPACE,
|
251 |
['\t'] = IN_WHITESPACE,
|
252 |
['\r'] = IN_WHITESPACE,
|
253 |
['\n'] = IN_WHITESPACE,
|
254 |
}, |
255 |
}; |
256 |
|
257 |
void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
|
258 |
{ |
259 |
lexer->emit = func; |
260 |
lexer->state = IN_START; |
261 |
lexer->token = qstring_new(); |
262 |
} |
263 |
|
264 |
static int json_lexer_feed_char(JSONLexer *lexer, char ch) |
265 |
{ |
266 |
char buf[2]; |
267 |
|
268 |
lexer->x++; |
269 |
if (ch == '\n') { |
270 |
lexer->x = 0;
|
271 |
lexer->y++; |
272 |
} |
273 |
|
274 |
lexer->state = json_lexer[lexer->state][(uint8_t)ch]; |
275 |
|
276 |
switch (lexer->state) {
|
277 |
case JSON_OPERATOR:
|
278 |
case JSON_ESCAPE:
|
279 |
case JSON_INTEGER:
|
280 |
case JSON_FLOAT:
|
281 |
case JSON_KEYWORD:
|
282 |
case JSON_STRING:
|
283 |
lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); |
284 |
case JSON_SKIP:
|
285 |
lexer->state = json_lexer[IN_START][(uint8_t)ch]; |
286 |
QDECREF(lexer->token); |
287 |
lexer->token = qstring_new(); |
288 |
break;
|
289 |
case ERROR:
|
290 |
return -EINVAL;
|
291 |
default:
|
292 |
break;
|
293 |
} |
294 |
|
295 |
buf[0] = ch;
|
296 |
buf[1] = 0; |
297 |
|
298 |
qstring_append(lexer->token, buf); |
299 |
|
300 |
return 0; |
301 |
} |
302 |
|
303 |
int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) |
304 |
{ |
305 |
size_t i; |
306 |
|
307 |
for (i = 0; i < size; i++) { |
308 |
int err;
|
309 |
|
310 |
err = json_lexer_feed_char(lexer, buffer[i]); |
311 |
if (err < 0) { |
312 |
return err;
|
313 |
} |
314 |
} |
315 |
|
316 |
return 0; |
317 |
} |
318 |
|
319 |
int json_lexer_flush(JSONLexer *lexer)
|
320 |
{ |
321 |
return json_lexer_feed_char(lexer, 0); |
322 |
} |
323 |
|
324 |
void json_lexer_destroy(JSONLexer *lexer)
|
325 |
{ |
326 |
QDECREF(lexer->token); |
327 |
} |