root / qobject / json-lexer.c @ a372823a
History | View | Annotate | Download (9.3 kB)
1 |
/*
|
---|---|
2 |
* JSON lexer
|
3 |
*
|
4 |
* Copyright IBM, Corp. 2009
|
5 |
*
|
6 |
* Authors:
|
7 |
* Anthony Liguori <aliguori@us.ibm.com>
|
8 |
*
|
9 |
* This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
|
10 |
* See the COPYING.LIB file in the top-level directory.
|
11 |
*
|
12 |
*/
|
13 |
|
14 |
#include "qapi/qmp/qstring.h" |
15 |
#include "qapi/qmp/qlist.h" |
16 |
#include "qapi/qmp/qdict.h" |
17 |
#include "qapi/qmp/qint.h" |
18 |
#include "qemu-common.h" |
19 |
#include "qapi/qmp/json-lexer.h" |
20 |
|
21 |
#define MAX_TOKEN_SIZE (64ULL << 20) |
22 |
|
23 |
/*
|
24 |
* \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
|
25 |
* '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
|
26 |
* 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
|
27 |
* [{}\[\],:]
|
28 |
* [a-z]+
|
29 |
*
|
30 |
*/
|
31 |
|
32 |
enum json_lexer_state {
|
33 |
IN_ERROR = 0,
|
34 |
IN_DQ_UCODE3, |
35 |
IN_DQ_UCODE2, |
36 |
IN_DQ_UCODE1, |
37 |
IN_DQ_UCODE0, |
38 |
IN_DQ_STRING_ESCAPE, |
39 |
IN_DQ_STRING, |
40 |
IN_SQ_UCODE3, |
41 |
IN_SQ_UCODE2, |
42 |
IN_SQ_UCODE1, |
43 |
IN_SQ_UCODE0, |
44 |
IN_SQ_STRING_ESCAPE, |
45 |
IN_SQ_STRING, |
46 |
IN_ZERO, |
47 |
IN_DIGITS, |
48 |
IN_DIGIT, |
49 |
IN_EXP_E, |
50 |
IN_MANTISSA, |
51 |
IN_MANTISSA_DIGITS, |
52 |
IN_NONZERO_NUMBER, |
53 |
IN_NEG_NONZERO_NUMBER, |
54 |
IN_KEYWORD, |
55 |
IN_ESCAPE, |
56 |
IN_ESCAPE_L, |
57 |
IN_ESCAPE_LL, |
58 |
IN_ESCAPE_I, |
59 |
IN_ESCAPE_I6, |
60 |
IN_ESCAPE_I64, |
61 |
IN_WHITESPACE, |
62 |
IN_START, |
63 |
}; |
64 |
|
65 |
#define TERMINAL(state) [0 ... 0x7F] = (state) |
66 |
|
67 |
/* Return whether TERMINAL is a terminal state and the transition to it
|
68 |
from OLD_STATE required lookahead. This happens whenever the table
|
69 |
below uses the TERMINAL macro. */
|
70 |
#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
|
71 |
(json_lexer[(old_state)][0] == (terminal))
|
72 |
|
73 |
static const uint8_t json_lexer[][256] = { |
74 |
/* double quote string */
|
75 |
[IN_DQ_UCODE3] = { |
76 |
['0' ... '9'] = IN_DQ_STRING, |
77 |
['a' ... 'f'] = IN_DQ_STRING, |
78 |
['A' ... 'F'] = IN_DQ_STRING, |
79 |
}, |
80 |
[IN_DQ_UCODE2] = { |
81 |
['0' ... '9'] = IN_DQ_UCODE3, |
82 |
['a' ... 'f'] = IN_DQ_UCODE3, |
83 |
['A' ... 'F'] = IN_DQ_UCODE3, |
84 |
}, |
85 |
[IN_DQ_UCODE1] = { |
86 |
['0' ... '9'] = IN_DQ_UCODE2, |
87 |
['a' ... 'f'] = IN_DQ_UCODE2, |
88 |
['A' ... 'F'] = IN_DQ_UCODE2, |
89 |
}, |
90 |
[IN_DQ_UCODE0] = { |
91 |
['0' ... '9'] = IN_DQ_UCODE1, |
92 |
['a' ... 'f'] = IN_DQ_UCODE1, |
93 |
['A' ... 'F'] = IN_DQ_UCODE1, |
94 |
}, |
95 |
[IN_DQ_STRING_ESCAPE] = { |
96 |
['b'] = IN_DQ_STRING,
|
97 |
['f'] = IN_DQ_STRING,
|
98 |
['n'] = IN_DQ_STRING,
|
99 |
['r'] = IN_DQ_STRING,
|
100 |
['t'] = IN_DQ_STRING,
|
101 |
['/'] = IN_DQ_STRING,
|
102 |
['\\'] = IN_DQ_STRING,
|
103 |
['\''] = IN_DQ_STRING,
|
104 |
['\"'] = IN_DQ_STRING,
|
105 |
['u'] = IN_DQ_UCODE0,
|
106 |
}, |
107 |
[IN_DQ_STRING] = { |
108 |
[1 ... 0xBF] = IN_DQ_STRING, |
109 |
[0xC2 ... 0xF4] = IN_DQ_STRING, |
110 |
['\\'] = IN_DQ_STRING_ESCAPE,
|
111 |
['"'] = JSON_STRING,
|
112 |
}, |
113 |
|
114 |
/* single quote string */
|
115 |
[IN_SQ_UCODE3] = { |
116 |
['0' ... '9'] = IN_SQ_STRING, |
117 |
['a' ... 'f'] = IN_SQ_STRING, |
118 |
['A' ... 'F'] = IN_SQ_STRING, |
119 |
}, |
120 |
[IN_SQ_UCODE2] = { |
121 |
['0' ... '9'] = IN_SQ_UCODE3, |
122 |
['a' ... 'f'] = IN_SQ_UCODE3, |
123 |
['A' ... 'F'] = IN_SQ_UCODE3, |
124 |
}, |
125 |
[IN_SQ_UCODE1] = { |
126 |
['0' ... '9'] = IN_SQ_UCODE2, |
127 |
['a' ... 'f'] = IN_SQ_UCODE2, |
128 |
['A' ... 'F'] = IN_SQ_UCODE2, |
129 |
}, |
130 |
[IN_SQ_UCODE0] = { |
131 |
['0' ... '9'] = IN_SQ_UCODE1, |
132 |
['a' ... 'f'] = IN_SQ_UCODE1, |
133 |
['A' ... 'F'] = IN_SQ_UCODE1, |
134 |
}, |
135 |
[IN_SQ_STRING_ESCAPE] = { |
136 |
['b'] = IN_SQ_STRING,
|
137 |
['f'] = IN_SQ_STRING,
|
138 |
['n'] = IN_SQ_STRING,
|
139 |
['r'] = IN_SQ_STRING,
|
140 |
['t'] = IN_SQ_STRING,
|
141 |
['/'] = IN_DQ_STRING,
|
142 |
['\\'] = IN_DQ_STRING,
|
143 |
['\''] = IN_SQ_STRING,
|
144 |
['\"'] = IN_SQ_STRING,
|
145 |
['u'] = IN_SQ_UCODE0,
|
146 |
}, |
147 |
[IN_SQ_STRING] = { |
148 |
[1 ... 0xBF] = IN_SQ_STRING, |
149 |
[0xC2 ... 0xF4] = IN_SQ_STRING, |
150 |
['\\'] = IN_SQ_STRING_ESCAPE,
|
151 |
['\''] = JSON_STRING,
|
152 |
}, |
153 |
|
154 |
/* Zero */
|
155 |
[IN_ZERO] = { |
156 |
TERMINAL(JSON_INTEGER), |
157 |
['0' ... '9'] = IN_ERROR, |
158 |
['.'] = IN_MANTISSA,
|
159 |
}, |
160 |
|
161 |
/* Float */
|
162 |
[IN_DIGITS] = { |
163 |
TERMINAL(JSON_FLOAT), |
164 |
['0' ... '9'] = IN_DIGITS, |
165 |
}, |
166 |
|
167 |
[IN_DIGIT] = { |
168 |
['0' ... '9'] = IN_DIGITS, |
169 |
}, |
170 |
|
171 |
[IN_EXP_E] = { |
172 |
['-'] = IN_DIGIT,
|
173 |
['+'] = IN_DIGIT,
|
174 |
['0' ... '9'] = IN_DIGITS, |
175 |
}, |
176 |
|
177 |
[IN_MANTISSA_DIGITS] = { |
178 |
TERMINAL(JSON_FLOAT), |
179 |
['0' ... '9'] = IN_MANTISSA_DIGITS, |
180 |
['e'] = IN_EXP_E,
|
181 |
['E'] = IN_EXP_E,
|
182 |
}, |
183 |
|
184 |
[IN_MANTISSA] = { |
185 |
['0' ... '9'] = IN_MANTISSA_DIGITS, |
186 |
}, |
187 |
|
188 |
/* Number */
|
189 |
[IN_NONZERO_NUMBER] = { |
190 |
TERMINAL(JSON_INTEGER), |
191 |
['0' ... '9'] = IN_NONZERO_NUMBER, |
192 |
['e'] = IN_EXP_E,
|
193 |
['E'] = IN_EXP_E,
|
194 |
['.'] = IN_MANTISSA,
|
195 |
}, |
196 |
|
197 |
[IN_NEG_NONZERO_NUMBER] = { |
198 |
['0'] = IN_ZERO,
|
199 |
['1' ... '9'] = IN_NONZERO_NUMBER, |
200 |
}, |
201 |
|
202 |
/* keywords */
|
203 |
[IN_KEYWORD] = { |
204 |
TERMINAL(JSON_KEYWORD), |
205 |
['a' ... 'z'] = IN_KEYWORD, |
206 |
}, |
207 |
|
208 |
/* whitespace */
|
209 |
[IN_WHITESPACE] = { |
210 |
TERMINAL(JSON_SKIP), |
211 |
[' '] = IN_WHITESPACE,
|
212 |
['\t'] = IN_WHITESPACE,
|
213 |
['\r'] = IN_WHITESPACE,
|
214 |
['\n'] = IN_WHITESPACE,
|
215 |
}, |
216 |
|
217 |
/* escape */
|
218 |
[IN_ESCAPE_LL] = { |
219 |
['d'] = JSON_ESCAPE,
|
220 |
}, |
221 |
|
222 |
[IN_ESCAPE_L] = { |
223 |
['d'] = JSON_ESCAPE,
|
224 |
['l'] = IN_ESCAPE_LL,
|
225 |
}, |
226 |
|
227 |
[IN_ESCAPE_I64] = { |
228 |
['d'] = JSON_ESCAPE,
|
229 |
}, |
230 |
|
231 |
[IN_ESCAPE_I6] = { |
232 |
['4'] = IN_ESCAPE_I64,
|
233 |
}, |
234 |
|
235 |
[IN_ESCAPE_I] = { |
236 |
['6'] = IN_ESCAPE_I6,
|
237 |
}, |
238 |
|
239 |
[IN_ESCAPE] = { |
240 |
['d'] = JSON_ESCAPE,
|
241 |
['i'] = JSON_ESCAPE,
|
242 |
['p'] = JSON_ESCAPE,
|
243 |
['s'] = JSON_ESCAPE,
|
244 |
['f'] = JSON_ESCAPE,
|
245 |
['l'] = IN_ESCAPE_L,
|
246 |
['I'] = IN_ESCAPE_I,
|
247 |
}, |
248 |
|
249 |
/* top level rule */
|
250 |
[IN_START] = { |
251 |
['"'] = IN_DQ_STRING,
|
252 |
['\''] = IN_SQ_STRING,
|
253 |
['0'] = IN_ZERO,
|
254 |
['1' ... '9'] = IN_NONZERO_NUMBER, |
255 |
['-'] = IN_NEG_NONZERO_NUMBER,
|
256 |
['{'] = JSON_OPERATOR,
|
257 |
['}'] = JSON_OPERATOR,
|
258 |
['['] = JSON_OPERATOR,
|
259 |
[']'] = JSON_OPERATOR,
|
260 |
[','] = JSON_OPERATOR,
|
261 |
[':'] = JSON_OPERATOR,
|
262 |
['a' ... 'z'] = IN_KEYWORD, |
263 |
['%'] = IN_ESCAPE,
|
264 |
[' '] = IN_WHITESPACE,
|
265 |
['\t'] = IN_WHITESPACE,
|
266 |
['\r'] = IN_WHITESPACE,
|
267 |
['\n'] = IN_WHITESPACE,
|
268 |
}, |
269 |
}; |
270 |
|
271 |
void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
|
272 |
{ |
273 |
lexer->emit = func; |
274 |
lexer->state = IN_START; |
275 |
lexer->token = qstring_new(); |
276 |
lexer->x = lexer->y = 0;
|
277 |
} |
278 |
|
279 |
static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) |
280 |
{ |
281 |
int char_consumed, new_state;
|
282 |
|
283 |
lexer->x++; |
284 |
if (ch == '\n') { |
285 |
lexer->x = 0;
|
286 |
lexer->y++; |
287 |
} |
288 |
|
289 |
do {
|
290 |
new_state = json_lexer[lexer->state][(uint8_t)ch]; |
291 |
char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state); |
292 |
if (char_consumed) {
|
293 |
qstring_append_chr(lexer->token, ch); |
294 |
} |
295 |
|
296 |
switch (new_state) {
|
297 |
case JSON_OPERATOR:
|
298 |
case JSON_ESCAPE:
|
299 |
case JSON_INTEGER:
|
300 |
case JSON_FLOAT:
|
301 |
case JSON_KEYWORD:
|
302 |
case JSON_STRING:
|
303 |
lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y); |
304 |
/* fall through */
|
305 |
case JSON_SKIP:
|
306 |
QDECREF(lexer->token); |
307 |
lexer->token = qstring_new(); |
308 |
new_state = IN_START; |
309 |
break;
|
310 |
case IN_ERROR:
|
311 |
/* XXX: To avoid having previous bad input leaving the parser in an
|
312 |
* unresponsive state where we consume unpredictable amounts of
|
313 |
* subsequent "good" input, percolate this error state up to the
|
314 |
* tokenizer/parser by forcing a NULL object to be emitted, then
|
315 |
* reset state.
|
316 |
*
|
317 |
* Also note that this handling is required for reliable channel
|
318 |
* negotiation between QMP and the guest agent, since chr(0xFF)
|
319 |
* is placed at the beginning of certain events to ensure proper
|
320 |
* delivery when the channel is in an unknown state. chr(0xFF) is
|
321 |
* never a valid ASCII/UTF-8 sequence, so this should reliably
|
322 |
* induce an error/flush state.
|
323 |
*/
|
324 |
lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); |
325 |
QDECREF(lexer->token); |
326 |
lexer->token = qstring_new(); |
327 |
new_state = IN_START; |
328 |
lexer->state = new_state; |
329 |
return 0; |
330 |
default:
|
331 |
break;
|
332 |
} |
333 |
lexer->state = new_state; |
334 |
} while (!char_consumed && !flush);
|
335 |
|
336 |
/* Do not let a single token grow to an arbitrarily large size,
|
337 |
* this is a security consideration.
|
338 |
*/
|
339 |
if (lexer->token->length > MAX_TOKEN_SIZE) {
|
340 |
lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); |
341 |
QDECREF(lexer->token); |
342 |
lexer->token = qstring_new(); |
343 |
lexer->state = IN_START; |
344 |
} |
345 |
|
346 |
return 0; |
347 |
} |
348 |
|
349 |
int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) |
350 |
{ |
351 |
size_t i; |
352 |
|
353 |
for (i = 0; i < size; i++) { |
354 |
int err;
|
355 |
|
356 |
err = json_lexer_feed_char(lexer, buffer[i], false);
|
357 |
if (err < 0) { |
358 |
return err;
|
359 |
} |
360 |
} |
361 |
|
362 |
return 0; |
363 |
} |
364 |
|
365 |
int json_lexer_flush(JSONLexer *lexer)
|
366 |
{ |
367 |
return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true); |
368 |
} |
369 |
|
370 |
void json_lexer_destroy(JSONLexer *lexer)
|
371 |
{ |
372 |
QDECREF(lexer->token); |
373 |
} |