root / json-lexer.c @ e3e87df4
History | View | Annotate | Download (9.3 kB)
1 | 5ab8558d | Anthony Liguori | /*
|
---|---|---|---|
2 | 5ab8558d | Anthony Liguori | * JSON lexer
|
3 | 5ab8558d | Anthony Liguori | *
|
4 | 5ab8558d | Anthony Liguori | * Copyright IBM, Corp. 2009
|
5 | 5ab8558d | Anthony Liguori | *
|
6 | 5ab8558d | Anthony Liguori | * Authors:
|
7 | 5ab8558d | Anthony Liguori | * Anthony Liguori <aliguori@us.ibm.com>
|
8 | 5ab8558d | Anthony Liguori | *
|
9 | 5ab8558d | Anthony Liguori | * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
|
10 | 5ab8558d | Anthony Liguori | * See the COPYING.LIB file in the top-level directory.
|
11 | 5ab8558d | Anthony Liguori | *
|
12 | 5ab8558d | Anthony Liguori | */
|
13 | 5ab8558d | Anthony Liguori | |
14 | 5ab8558d | Anthony Liguori | #include "qstring.h" |
15 | 5ab8558d | Anthony Liguori | #include "qlist.h" |
16 | 5ab8558d | Anthony Liguori | #include "qdict.h" |
17 | 5ab8558d | Anthony Liguori | #include "qint.h" |
18 | 5ab8558d | Anthony Liguori | #include "qemu-common.h" |
19 | 5ab8558d | Anthony Liguori | #include "json-lexer.h" |
20 | 5ab8558d | Anthony Liguori | |
21 | 325601b4 | Anthony Liguori | #define MAX_TOKEN_SIZE (64ULL << 20) |
22 | 325601b4 | Anthony Liguori | |
23 | 5ab8558d | Anthony Liguori | /*
|
24 | 5ab8558d | Anthony Liguori | * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
|
25 | 5ab8558d | Anthony Liguori | * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
|
26 | 5ab8558d | Anthony Liguori | * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
|
27 | 5ab8558d | Anthony Liguori | * [{}\[\],:]
|
28 | 5ab8558d | Anthony Liguori | * [a-z]+
|
29 | 5ab8558d | Anthony Liguori | *
|
30 | 5ab8558d | Anthony Liguori | */
|
31 | 5ab8558d | Anthony Liguori | |
32 | 5ab8558d | Anthony Liguori | enum json_lexer_state {
|
33 | 33d05394 | Blue Swirl | IN_ERROR = 0,
|
34 | 5ab8558d | Anthony Liguori | IN_DQ_UCODE3, |
35 | 5ab8558d | Anthony Liguori | IN_DQ_UCODE2, |
36 | 5ab8558d | Anthony Liguori | IN_DQ_UCODE1, |
37 | 5ab8558d | Anthony Liguori | IN_DQ_UCODE0, |
38 | 5ab8558d | Anthony Liguori | IN_DQ_STRING_ESCAPE, |
39 | 5ab8558d | Anthony Liguori | IN_DQ_STRING, |
40 | 5ab8558d | Anthony Liguori | IN_SQ_UCODE3, |
41 | 5ab8558d | Anthony Liguori | IN_SQ_UCODE2, |
42 | 5ab8558d | Anthony Liguori | IN_SQ_UCODE1, |
43 | 5ab8558d | Anthony Liguori | IN_SQ_UCODE0, |
44 | 5ab8558d | Anthony Liguori | IN_SQ_STRING_ESCAPE, |
45 | 5ab8558d | Anthony Liguori | IN_SQ_STRING, |
46 | 5ab8558d | Anthony Liguori | IN_ZERO, |
47 | 5ab8558d | Anthony Liguori | IN_DIGITS, |
48 | 5ab8558d | Anthony Liguori | IN_DIGIT, |
49 | 5ab8558d | Anthony Liguori | IN_EXP_E, |
50 | 5ab8558d | Anthony Liguori | IN_MANTISSA, |
51 | 5ab8558d | Anthony Liguori | IN_MANTISSA_DIGITS, |
52 | 5ab8558d | Anthony Liguori | IN_NONZERO_NUMBER, |
53 | 5ab8558d | Anthony Liguori | IN_NEG_NONZERO_NUMBER, |
54 | 5ab8558d | Anthony Liguori | IN_KEYWORD, |
55 | 5ab8558d | Anthony Liguori | IN_ESCAPE, |
56 | 5ab8558d | Anthony Liguori | IN_ESCAPE_L, |
57 | 5ab8558d | Anthony Liguori | IN_ESCAPE_LL, |
58 | 2c0d4b36 | Roy Tam | IN_ESCAPE_I, |
59 | 2c0d4b36 | Roy Tam | IN_ESCAPE_I6, |
60 | 2c0d4b36 | Roy Tam | IN_ESCAPE_I64, |
61 | 5ab8558d | Anthony Liguori | IN_WHITESPACE, |
62 | 5ab8558d | Anthony Liguori | IN_START, |
63 | 5ab8558d | Anthony Liguori | }; |
64 | 5ab8558d | Anthony Liguori | |
65 | 5ab8558d | Anthony Liguori | #define TERMINAL(state) [0 ... 0x7F] = (state) |
66 | 5ab8558d | Anthony Liguori | |
67 | f7c05274 | Paolo Bonzini | /* Return whether TERMINAL is a terminal state and the transition to it
|
68 | f7c05274 | Paolo Bonzini | from OLD_STATE required lookahead. This happens whenever the table
|
69 | f7c05274 | Paolo Bonzini | below uses the TERMINAL macro. */
|
70 | f7c05274 | Paolo Bonzini | #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
|
71 | f7c05274 | Paolo Bonzini | (json_lexer[(old_state)][0] == (terminal))
|
72 | f7c05274 | Paolo Bonzini | |
73 | 5ab8558d | Anthony Liguori | static const uint8_t json_lexer[][256] = { |
74 | 5ab8558d | Anthony Liguori | /* double quote string */
|
75 | 5ab8558d | Anthony Liguori | [IN_DQ_UCODE3] = { |
76 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_DQ_STRING, |
77 | 5ab8558d | Anthony Liguori | ['a' ... 'f'] = IN_DQ_STRING, |
78 | 5ab8558d | Anthony Liguori | ['A' ... 'F'] = IN_DQ_STRING, |
79 | 5ab8558d | Anthony Liguori | }, |
80 | 5ab8558d | Anthony Liguori | [IN_DQ_UCODE2] = { |
81 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_DQ_UCODE3, |
82 | 5ab8558d | Anthony Liguori | ['a' ... 'f'] = IN_DQ_UCODE3, |
83 | 5ab8558d | Anthony Liguori | ['A' ... 'F'] = IN_DQ_UCODE3, |
84 | 5ab8558d | Anthony Liguori | }, |
85 | 5ab8558d | Anthony Liguori | [IN_DQ_UCODE1] = { |
86 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_DQ_UCODE2, |
87 | 5ab8558d | Anthony Liguori | ['a' ... 'f'] = IN_DQ_UCODE2, |
88 | 5ab8558d | Anthony Liguori | ['A' ... 'F'] = IN_DQ_UCODE2, |
89 | 5ab8558d | Anthony Liguori | }, |
90 | 5ab8558d | Anthony Liguori | [IN_DQ_UCODE0] = { |
91 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_DQ_UCODE1, |
92 | 5ab8558d | Anthony Liguori | ['a' ... 'f'] = IN_DQ_UCODE1, |
93 | 5ab8558d | Anthony Liguori | ['A' ... 'F'] = IN_DQ_UCODE1, |
94 | 5ab8558d | Anthony Liguori | }, |
95 | 5ab8558d | Anthony Liguori | [IN_DQ_STRING_ESCAPE] = { |
96 | 5ab8558d | Anthony Liguori | ['b'] = IN_DQ_STRING,
|
97 | 5ab8558d | Anthony Liguori | ['f'] = IN_DQ_STRING,
|
98 | 5ab8558d | Anthony Liguori | ['n'] = IN_DQ_STRING,
|
99 | 5ab8558d | Anthony Liguori | ['r'] = IN_DQ_STRING,
|
100 | 5ab8558d | Anthony Liguori | ['t'] = IN_DQ_STRING,
|
101 | 1041ba7a | Luiz Capitulino | ['/'] = IN_DQ_STRING,
|
102 | 1041ba7a | Luiz Capitulino | ['\\'] = IN_DQ_STRING,
|
103 | 5ab8558d | Anthony Liguori | ['\''] = IN_DQ_STRING,
|
104 | 5ab8558d | Anthony Liguori | ['\"'] = IN_DQ_STRING,
|
105 | 5ab8558d | Anthony Liguori | ['u'] = IN_DQ_UCODE0,
|
106 | 5ab8558d | Anthony Liguori | }, |
107 | 5ab8558d | Anthony Liguori | [IN_DQ_STRING] = { |
108 | b011f619 | Michael Roth | [1 ... 0xBF] = IN_DQ_STRING, |
109 | b011f619 | Michael Roth | [0xC2 ... 0xF4] = IN_DQ_STRING, |
110 | 5ab8558d | Anthony Liguori | ['\\'] = IN_DQ_STRING_ESCAPE,
|
111 | 28e91a68 | Paolo Bonzini | ['"'] = JSON_STRING,
|
112 | 5ab8558d | Anthony Liguori | }, |
113 | 5ab8558d | Anthony Liguori | |
114 | 5ab8558d | Anthony Liguori | /* single quote string */
|
115 | 5ab8558d | Anthony Liguori | [IN_SQ_UCODE3] = { |
116 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_SQ_STRING, |
117 | 5ab8558d | Anthony Liguori | ['a' ... 'f'] = IN_SQ_STRING, |
118 | 5ab8558d | Anthony Liguori | ['A' ... 'F'] = IN_SQ_STRING, |
119 | 5ab8558d | Anthony Liguori | }, |
120 | 5ab8558d | Anthony Liguori | [IN_SQ_UCODE2] = { |
121 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_SQ_UCODE3, |
122 | 5ab8558d | Anthony Liguori | ['a' ... 'f'] = IN_SQ_UCODE3, |
123 | 5ab8558d | Anthony Liguori | ['A' ... 'F'] = IN_SQ_UCODE3, |
124 | 5ab8558d | Anthony Liguori | }, |
125 | 5ab8558d | Anthony Liguori | [IN_SQ_UCODE1] = { |
126 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_SQ_UCODE2, |
127 | 5ab8558d | Anthony Liguori | ['a' ... 'f'] = IN_SQ_UCODE2, |
128 | 5ab8558d | Anthony Liguori | ['A' ... 'F'] = IN_SQ_UCODE2, |
129 | 5ab8558d | Anthony Liguori | }, |
130 | 5ab8558d | Anthony Liguori | [IN_SQ_UCODE0] = { |
131 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_SQ_UCODE1, |
132 | 5ab8558d | Anthony Liguori | ['a' ... 'f'] = IN_SQ_UCODE1, |
133 | 5ab8558d | Anthony Liguori | ['A' ... 'F'] = IN_SQ_UCODE1, |
134 | 5ab8558d | Anthony Liguori | }, |
135 | 5ab8558d | Anthony Liguori | [IN_SQ_STRING_ESCAPE] = { |
136 | 5ab8558d | Anthony Liguori | ['b'] = IN_SQ_STRING,
|
137 | 5ab8558d | Anthony Liguori | ['f'] = IN_SQ_STRING,
|
138 | 5ab8558d | Anthony Liguori | ['n'] = IN_SQ_STRING,
|
139 | 5ab8558d | Anthony Liguori | ['r'] = IN_SQ_STRING,
|
140 | 5ab8558d | Anthony Liguori | ['t'] = IN_SQ_STRING,
|
141 | 1041ba7a | Luiz Capitulino | ['/'] = IN_DQ_STRING,
|
142 | 1041ba7a | Luiz Capitulino | ['\\'] = IN_DQ_STRING,
|
143 | 5ab8558d | Anthony Liguori | ['\''] = IN_SQ_STRING,
|
144 | 5ab8558d | Anthony Liguori | ['\"'] = IN_SQ_STRING,
|
145 | 5ab8558d | Anthony Liguori | ['u'] = IN_SQ_UCODE0,
|
146 | 5ab8558d | Anthony Liguori | }, |
147 | 5ab8558d | Anthony Liguori | [IN_SQ_STRING] = { |
148 | b011f619 | Michael Roth | [1 ... 0xBF] = IN_SQ_STRING, |
149 | b011f619 | Michael Roth | [0xC2 ... 0xF4] = IN_SQ_STRING, |
150 | 5ab8558d | Anthony Liguori | ['\\'] = IN_SQ_STRING_ESCAPE,
|
151 | 28e91a68 | Paolo Bonzini | ['\''] = JSON_STRING,
|
152 | 5ab8558d | Anthony Liguori | }, |
153 | 5ab8558d | Anthony Liguori | |
154 | 5ab8558d | Anthony Liguori | /* Zero */
|
155 | 5ab8558d | Anthony Liguori | [IN_ZERO] = { |
156 | 5ab8558d | Anthony Liguori | TERMINAL(JSON_INTEGER), |
157 | 33d05394 | Blue Swirl | ['0' ... '9'] = IN_ERROR, |
158 | 5ab8558d | Anthony Liguori | ['.'] = IN_MANTISSA,
|
159 | 5ab8558d | Anthony Liguori | }, |
160 | 5ab8558d | Anthony Liguori | |
161 | 5ab8558d | Anthony Liguori | /* Float */
|
162 | 5ab8558d | Anthony Liguori | [IN_DIGITS] = { |
163 | 5ab8558d | Anthony Liguori | TERMINAL(JSON_FLOAT), |
164 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_DIGITS, |
165 | 5ab8558d | Anthony Liguori | }, |
166 | 5ab8558d | Anthony Liguori | |
167 | 5ab8558d | Anthony Liguori | [IN_DIGIT] = { |
168 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_DIGITS, |
169 | 5ab8558d | Anthony Liguori | }, |
170 | 5ab8558d | Anthony Liguori | |
171 | 5ab8558d | Anthony Liguori | [IN_EXP_E] = { |
172 | 5ab8558d | Anthony Liguori | ['-'] = IN_DIGIT,
|
173 | 5ab8558d | Anthony Liguori | ['+'] = IN_DIGIT,
|
174 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_DIGITS, |
175 | 5ab8558d | Anthony Liguori | }, |
176 | 5ab8558d | Anthony Liguori | |
177 | 5ab8558d | Anthony Liguori | [IN_MANTISSA_DIGITS] = { |
178 | 5ab8558d | Anthony Liguori | TERMINAL(JSON_FLOAT), |
179 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_MANTISSA_DIGITS, |
180 | 5ab8558d | Anthony Liguori | ['e'] = IN_EXP_E,
|
181 | 5ab8558d | Anthony Liguori | ['E'] = IN_EXP_E,
|
182 | 5ab8558d | Anthony Liguori | }, |
183 | 5ab8558d | Anthony Liguori | |
184 | 5ab8558d | Anthony Liguori | [IN_MANTISSA] = { |
185 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_MANTISSA_DIGITS, |
186 | 5ab8558d | Anthony Liguori | }, |
187 | 5ab8558d | Anthony Liguori | |
188 | 5ab8558d | Anthony Liguori | /* Number */
|
189 | 5ab8558d | Anthony Liguori | [IN_NONZERO_NUMBER] = { |
190 | 5ab8558d | Anthony Liguori | TERMINAL(JSON_INTEGER), |
191 | 5ab8558d | Anthony Liguori | ['0' ... '9'] = IN_NONZERO_NUMBER, |
192 | 5ab8558d | Anthony Liguori | ['e'] = IN_EXP_E,
|
193 | 5ab8558d | Anthony Liguori | ['E'] = IN_EXP_E,
|
194 | 5ab8558d | Anthony Liguori | ['.'] = IN_MANTISSA,
|
195 | 5ab8558d | Anthony Liguori | }, |
196 | 5ab8558d | Anthony Liguori | |
197 | 5ab8558d | Anthony Liguori | [IN_NEG_NONZERO_NUMBER] = { |
198 | 5ab8558d | Anthony Liguori | ['0'] = IN_ZERO,
|
199 | 5ab8558d | Anthony Liguori | ['1' ... '9'] = IN_NONZERO_NUMBER, |
200 | 5ab8558d | Anthony Liguori | }, |
201 | 5ab8558d | Anthony Liguori | |
202 | 5ab8558d | Anthony Liguori | /* keywords */
|
203 | 5ab8558d | Anthony Liguori | [IN_KEYWORD] = { |
204 | 5ab8558d | Anthony Liguori | TERMINAL(JSON_KEYWORD), |
205 | 5ab8558d | Anthony Liguori | ['a' ... 'z'] = IN_KEYWORD, |
206 | 5ab8558d | Anthony Liguori | }, |
207 | 5ab8558d | Anthony Liguori | |
208 | 5ab8558d | Anthony Liguori | /* whitespace */
|
209 | 5ab8558d | Anthony Liguori | [IN_WHITESPACE] = { |
210 | 5ab8558d | Anthony Liguori | TERMINAL(JSON_SKIP), |
211 | 5ab8558d | Anthony Liguori | [' '] = IN_WHITESPACE,
|
212 | 5ab8558d | Anthony Liguori | ['\t'] = IN_WHITESPACE,
|
213 | 5ab8558d | Anthony Liguori | ['\r'] = IN_WHITESPACE,
|
214 | 5ab8558d | Anthony Liguori | ['\n'] = IN_WHITESPACE,
|
215 | 5ab8558d | Anthony Liguori | }, |
216 | 5ab8558d | Anthony Liguori | |
217 | 5ab8558d | Anthony Liguori | /* escape */
|
218 | 5ab8558d | Anthony Liguori | [IN_ESCAPE_LL] = { |
219 | 28e91a68 | Paolo Bonzini | ['d'] = JSON_ESCAPE,
|
220 | 5ab8558d | Anthony Liguori | }, |
221 | 5ab8558d | Anthony Liguori | |
222 | 5ab8558d | Anthony Liguori | [IN_ESCAPE_L] = { |
223 | 28e91a68 | Paolo Bonzini | ['d'] = JSON_ESCAPE,
|
224 | 5ab8558d | Anthony Liguori | ['l'] = IN_ESCAPE_LL,
|
225 | 5ab8558d | Anthony Liguori | }, |
226 | 5ab8558d | Anthony Liguori | |
227 | 2c0d4b36 | Roy Tam | [IN_ESCAPE_I64] = { |
228 | 28e91a68 | Paolo Bonzini | ['d'] = JSON_ESCAPE,
|
229 | 2c0d4b36 | Roy Tam | }, |
230 | 2c0d4b36 | Roy Tam | |
231 | 2c0d4b36 | Roy Tam | [IN_ESCAPE_I6] = { |
232 | 2c0d4b36 | Roy Tam | ['4'] = IN_ESCAPE_I64,
|
233 | 2c0d4b36 | Roy Tam | }, |
234 | 2c0d4b36 | Roy Tam | |
235 | 2c0d4b36 | Roy Tam | [IN_ESCAPE_I] = { |
236 | 2c0d4b36 | Roy Tam | ['6'] = IN_ESCAPE_I6,
|
237 | 2c0d4b36 | Roy Tam | }, |
238 | 2c0d4b36 | Roy Tam | |
239 | 5ab8558d | Anthony Liguori | [IN_ESCAPE] = { |
240 | 28e91a68 | Paolo Bonzini | ['d'] = JSON_ESCAPE,
|
241 | 28e91a68 | Paolo Bonzini | ['i'] = JSON_ESCAPE,
|
242 | 28e91a68 | Paolo Bonzini | ['p'] = JSON_ESCAPE,
|
243 | 28e91a68 | Paolo Bonzini | ['s'] = JSON_ESCAPE,
|
244 | 28e91a68 | Paolo Bonzini | ['f'] = JSON_ESCAPE,
|
245 | 5ab8558d | Anthony Liguori | ['l'] = IN_ESCAPE_L,
|
246 | 2c0d4b36 | Roy Tam | ['I'] = IN_ESCAPE_I,
|
247 | 5ab8558d | Anthony Liguori | }, |
248 | 5ab8558d | Anthony Liguori | |
249 | 5ab8558d | Anthony Liguori | /* top level rule */
|
250 | 5ab8558d | Anthony Liguori | [IN_START] = { |
251 | 5ab8558d | Anthony Liguori | ['"'] = IN_DQ_STRING,
|
252 | 5ab8558d | Anthony Liguori | ['\''] = IN_SQ_STRING,
|
253 | 5ab8558d | Anthony Liguori | ['0'] = IN_ZERO,
|
254 | 5ab8558d | Anthony Liguori | ['1' ... '9'] = IN_NONZERO_NUMBER, |
255 | 5ab8558d | Anthony Liguori | ['-'] = IN_NEG_NONZERO_NUMBER,
|
256 | 28e91a68 | Paolo Bonzini | ['{'] = JSON_OPERATOR,
|
257 | 28e91a68 | Paolo Bonzini | ['}'] = JSON_OPERATOR,
|
258 | 28e91a68 | Paolo Bonzini | ['['] = JSON_OPERATOR,
|
259 | 28e91a68 | Paolo Bonzini | [']'] = JSON_OPERATOR,
|
260 | 28e91a68 | Paolo Bonzini | [','] = JSON_OPERATOR,
|
261 | 28e91a68 | Paolo Bonzini | [':'] = JSON_OPERATOR,
|
262 | 5ab8558d | Anthony Liguori | ['a' ... 'z'] = IN_KEYWORD, |
263 | 5ab8558d | Anthony Liguori | ['%'] = IN_ESCAPE,
|
264 | 5ab8558d | Anthony Liguori | [' '] = IN_WHITESPACE,
|
265 | 5ab8558d | Anthony Liguori | ['\t'] = IN_WHITESPACE,
|
266 | 5ab8558d | Anthony Liguori | ['\r'] = IN_WHITESPACE,
|
267 | 5ab8558d | Anthony Liguori | ['\n'] = IN_WHITESPACE,
|
268 | 5ab8558d | Anthony Liguori | }, |
269 | 5ab8558d | Anthony Liguori | }; |
270 | 5ab8558d | Anthony Liguori | |
271 | 5ab8558d | Anthony Liguori | void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
|
272 | 5ab8558d | Anthony Liguori | { |
273 | 5ab8558d | Anthony Liguori | lexer->emit = func; |
274 | 5ab8558d | Anthony Liguori | lexer->state = IN_START; |
275 | 5ab8558d | Anthony Liguori | lexer->token = qstring_new(); |
276 | 03308f6c | Luiz Capitulino | lexer->x = lexer->y = 0;
|
277 | 5ab8558d | Anthony Liguori | } |
278 | 5ab8558d | Anthony Liguori | |
279 | bd3924a3 | Michael Roth | static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) |
280 | 5ab8558d | Anthony Liguori | { |
281 | f7c05274 | Paolo Bonzini | int char_consumed, new_state;
|
282 | f7c05274 | Paolo Bonzini | |
283 | 5ab8558d | Anthony Liguori | lexer->x++; |
284 | 5ab8558d | Anthony Liguori | if (ch == '\n') { |
285 | 5ab8558d | Anthony Liguori | lexer->x = 0;
|
286 | 5ab8558d | Anthony Liguori | lexer->y++; |
287 | 5ab8558d | Anthony Liguori | } |
288 | 5ab8558d | Anthony Liguori | |
289 | f7c05274 | Paolo Bonzini | do {
|
290 | f7c05274 | Paolo Bonzini | new_state = json_lexer[lexer->state][(uint8_t)ch]; |
291 | f7c05274 | Paolo Bonzini | char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state); |
292 | f7c05274 | Paolo Bonzini | if (char_consumed) {
|
293 | f7c05274 | Paolo Bonzini | qstring_append_chr(lexer->token, ch); |
294 | f7c05274 | Paolo Bonzini | } |
295 | 5ab8558d | Anthony Liguori | |
296 | f7c05274 | Paolo Bonzini | switch (new_state) {
|
297 | f7c05274 | Paolo Bonzini | case JSON_OPERATOR:
|
298 | f7c05274 | Paolo Bonzini | case JSON_ESCAPE:
|
299 | f7c05274 | Paolo Bonzini | case JSON_INTEGER:
|
300 | f7c05274 | Paolo Bonzini | case JSON_FLOAT:
|
301 | f7c05274 | Paolo Bonzini | case JSON_KEYWORD:
|
302 | f7c05274 | Paolo Bonzini | case JSON_STRING:
|
303 | f7c05274 | Paolo Bonzini | lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y); |
304 | 0b0404bf | Stefan Weil | /* fall through */
|
305 | f7c05274 | Paolo Bonzini | case JSON_SKIP:
|
306 | f7c05274 | Paolo Bonzini | QDECREF(lexer->token); |
307 | f7c05274 | Paolo Bonzini | lexer->token = qstring_new(); |
308 | f7c05274 | Paolo Bonzini | new_state = IN_START; |
309 | f7c05274 | Paolo Bonzini | break;
|
310 | 33d05394 | Blue Swirl | case IN_ERROR:
|
311 | b011f619 | Michael Roth | /* XXX: To avoid having previous bad input leaving the parser in an
|
312 | b011f619 | Michael Roth | * unresponsive state where we consume unpredictable amounts of
|
313 | b011f619 | Michael Roth | * subsequent "good" input, percolate this error state up to the
|
314 | b011f619 | Michael Roth | * tokenizer/parser by forcing a NULL object to be emitted, then
|
315 | b011f619 | Michael Roth | * reset state.
|
316 | b011f619 | Michael Roth | *
|
317 | b011f619 | Michael Roth | * Also note that this handling is required for reliable channel
|
318 | b011f619 | Michael Roth | * negotiation between QMP and the guest agent, since chr(0xFF)
|
319 | b011f619 | Michael Roth | * is placed at the beginning of certain events to ensure proper
|
320 | b011f619 | Michael Roth | * delivery when the channel is in an unknown state. chr(0xFF) is
|
321 | b011f619 | Michael Roth | * never a valid ASCII/UTF-8 sequence, so this should reliably
|
322 | b011f619 | Michael Roth | * induce an error/flush state.
|
323 | b011f619 | Michael Roth | */
|
324 | b011f619 | Michael Roth | lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); |
325 | 529a0ef5 | Anthony Liguori | QDECREF(lexer->token); |
326 | 529a0ef5 | Anthony Liguori | lexer->token = qstring_new(); |
327 | 529a0ef5 | Anthony Liguori | new_state = IN_START; |
328 | b011f619 | Michael Roth | lexer->state = new_state; |
329 | b011f619 | Michael Roth | return 0; |
330 | f7c05274 | Paolo Bonzini | default:
|
331 | f7c05274 | Paolo Bonzini | break;
|
332 | f7c05274 | Paolo Bonzini | } |
333 | f7c05274 | Paolo Bonzini | lexer->state = new_state; |
334 | bd3924a3 | Michael Roth | } while (!char_consumed && !flush);
|
335 | 325601b4 | Anthony Liguori | |
336 | 325601b4 | Anthony Liguori | /* Do not let a single token grow to an arbitrarily large size,
|
337 | 325601b4 | Anthony Liguori | * this is a security consideration.
|
338 | 325601b4 | Anthony Liguori | */
|
339 | 325601b4 | Anthony Liguori | if (lexer->token->length > MAX_TOKEN_SIZE) {
|
340 | 325601b4 | Anthony Liguori | lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); |
341 | 325601b4 | Anthony Liguori | QDECREF(lexer->token); |
342 | 325601b4 | Anthony Liguori | lexer->token = qstring_new(); |
343 | 325601b4 | Anthony Liguori | lexer->state = IN_START; |
344 | 325601b4 | Anthony Liguori | } |
345 | 325601b4 | Anthony Liguori | |
346 | 5ab8558d | Anthony Liguori | return 0; |
347 | 5ab8558d | Anthony Liguori | } |
348 | 5ab8558d | Anthony Liguori | |
349 | 5ab8558d | Anthony Liguori | int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) |
350 | 5ab8558d | Anthony Liguori | { |
351 | 5ab8558d | Anthony Liguori | size_t i; |
352 | 5ab8558d | Anthony Liguori | |
353 | 5ab8558d | Anthony Liguori | for (i = 0; i < size; i++) { |
354 | 5ab8558d | Anthony Liguori | int err;
|
355 | 5ab8558d | Anthony Liguori | |
356 | bd3924a3 | Michael Roth | err = json_lexer_feed_char(lexer, buffer[i], false);
|
357 | 5ab8558d | Anthony Liguori | if (err < 0) { |
358 | 5ab8558d | Anthony Liguori | return err;
|
359 | 5ab8558d | Anthony Liguori | } |
360 | 5ab8558d | Anthony Liguori | } |
361 | 5ab8558d | Anthony Liguori | |
362 | 5ab8558d | Anthony Liguori | return 0; |
363 | 5ab8558d | Anthony Liguori | } |
364 | 5ab8558d | Anthony Liguori | |
365 | 5ab8558d | Anthony Liguori | int json_lexer_flush(JSONLexer *lexer)
|
366 | 5ab8558d | Anthony Liguori | { |
367 | b011f619 | Michael Roth | return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true); |
368 | 5ab8558d | Anthony Liguori | } |
369 | 5ab8558d | Anthony Liguori | |
370 | 5ab8558d | Anthony Liguori | void json_lexer_destroy(JSONLexer *lexer)
|
371 | 5ab8558d | Anthony Liguori | { |
372 | 5ab8558d | Anthony Liguori | QDECREF(lexer->token); |
373 | 5ab8558d | Anthony Liguori | } |