Statistics
| Branch: | Revision:

root / json-lexer.c @ f2c88dc1

History | View | Annotate | Download (9.2 kB)

1 5ab8558d Anthony Liguori
/*
2 5ab8558d Anthony Liguori
 * JSON lexer
3 5ab8558d Anthony Liguori
 *
4 5ab8558d Anthony Liguori
 * Copyright IBM, Corp. 2009
5 5ab8558d Anthony Liguori
 *
6 5ab8558d Anthony Liguori
 * Authors:
7 5ab8558d Anthony Liguori
 *  Anthony Liguori   <aliguori@us.ibm.com>
8 5ab8558d Anthony Liguori
 *
9 5ab8558d Anthony Liguori
 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 5ab8558d Anthony Liguori
 * See the COPYING.LIB file in the top-level directory.
11 5ab8558d Anthony Liguori
 *
12 5ab8558d Anthony Liguori
 */
13 5ab8558d Anthony Liguori
14 5ab8558d Anthony Liguori
#include "qstring.h"
15 5ab8558d Anthony Liguori
#include "qlist.h"
16 5ab8558d Anthony Liguori
#include "qdict.h"
17 5ab8558d Anthony Liguori
#include "qint.h"
18 5ab8558d Anthony Liguori
#include "qemu-common.h"
19 5ab8558d Anthony Liguori
#include "json-lexer.h"
20 5ab8558d Anthony Liguori
21 325601b4 Anthony Liguori
#define MAX_TOKEN_SIZE (64ULL << 20)
22 325601b4 Anthony Liguori
23 5ab8558d Anthony Liguori
/*
24 5ab8558d Anthony Liguori
 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
25 5ab8558d Anthony Liguori
 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
26 5ab8558d Anthony Liguori
 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
27 5ab8558d Anthony Liguori
 * [{}\[\],:]
28 5ab8558d Anthony Liguori
 * [a-z]+
29 5ab8558d Anthony Liguori
 *
30 5ab8558d Anthony Liguori
 */
31 5ab8558d Anthony Liguori
32 5ab8558d Anthony Liguori
enum json_lexer_state {
33 33d05394 Blue Swirl
    IN_ERROR = 0,
34 5ab8558d Anthony Liguori
    IN_DQ_UCODE3,
35 5ab8558d Anthony Liguori
    IN_DQ_UCODE2,
36 5ab8558d Anthony Liguori
    IN_DQ_UCODE1,
37 5ab8558d Anthony Liguori
    IN_DQ_UCODE0,
38 5ab8558d Anthony Liguori
    IN_DQ_STRING_ESCAPE,
39 5ab8558d Anthony Liguori
    IN_DQ_STRING,
40 5ab8558d Anthony Liguori
    IN_SQ_UCODE3,
41 5ab8558d Anthony Liguori
    IN_SQ_UCODE2,
42 5ab8558d Anthony Liguori
    IN_SQ_UCODE1,
43 5ab8558d Anthony Liguori
    IN_SQ_UCODE0,
44 5ab8558d Anthony Liguori
    IN_SQ_STRING_ESCAPE,
45 5ab8558d Anthony Liguori
    IN_SQ_STRING,
46 5ab8558d Anthony Liguori
    IN_ZERO,
47 5ab8558d Anthony Liguori
    IN_DIGITS,
48 5ab8558d Anthony Liguori
    IN_DIGIT,
49 5ab8558d Anthony Liguori
    IN_EXP_E,
50 5ab8558d Anthony Liguori
    IN_MANTISSA,
51 5ab8558d Anthony Liguori
    IN_MANTISSA_DIGITS,
52 5ab8558d Anthony Liguori
    IN_NONZERO_NUMBER,
53 5ab8558d Anthony Liguori
    IN_NEG_NONZERO_NUMBER,
54 5ab8558d Anthony Liguori
    IN_KEYWORD,
55 5ab8558d Anthony Liguori
    IN_ESCAPE,
56 5ab8558d Anthony Liguori
    IN_ESCAPE_L,
57 5ab8558d Anthony Liguori
    IN_ESCAPE_LL,
58 2c0d4b36 Roy Tam
    IN_ESCAPE_I,
59 2c0d4b36 Roy Tam
    IN_ESCAPE_I6,
60 2c0d4b36 Roy Tam
    IN_ESCAPE_I64,
61 5ab8558d Anthony Liguori
    IN_WHITESPACE,
62 5ab8558d Anthony Liguori
    IN_START,
63 5ab8558d Anthony Liguori
};
64 5ab8558d Anthony Liguori
65 5ab8558d Anthony Liguori
#define TERMINAL(state) [0 ... 0x7F] = (state)
66 5ab8558d Anthony Liguori
67 f7c05274 Paolo Bonzini
/* Return whether TERMINAL is a terminal state and the transition to it
68 f7c05274 Paolo Bonzini
   from OLD_STATE required lookahead.  This happens whenever the table
69 f7c05274 Paolo Bonzini
   below uses the TERMINAL macro.  */
70 f7c05274 Paolo Bonzini
#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
71 f7c05274 Paolo Bonzini
            (json_lexer[(old_state)][0] == (terminal))
72 f7c05274 Paolo Bonzini
73 5ab8558d Anthony Liguori
static const uint8_t json_lexer[][256] =  {
74 5ab8558d Anthony Liguori
    /* double quote string */
75 5ab8558d Anthony Liguori
    [IN_DQ_UCODE3] = {
76 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DQ_STRING,
77 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_DQ_STRING,
78 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_DQ_STRING,
79 5ab8558d Anthony Liguori
    },
80 5ab8558d Anthony Liguori
    [IN_DQ_UCODE2] = {
81 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DQ_UCODE3,
82 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_DQ_UCODE3,
83 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_DQ_UCODE3,
84 5ab8558d Anthony Liguori
    },
85 5ab8558d Anthony Liguori
    [IN_DQ_UCODE1] = {
86 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DQ_UCODE2,
87 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_DQ_UCODE2,
88 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_DQ_UCODE2,
89 5ab8558d Anthony Liguori
    },
90 5ab8558d Anthony Liguori
    [IN_DQ_UCODE0] = {
91 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DQ_UCODE1,
92 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_DQ_UCODE1,
93 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_DQ_UCODE1,
94 5ab8558d Anthony Liguori
    },
95 5ab8558d Anthony Liguori
    [IN_DQ_STRING_ESCAPE] = {
96 5ab8558d Anthony Liguori
        ['b'] = IN_DQ_STRING,
97 5ab8558d Anthony Liguori
        ['f'] =  IN_DQ_STRING,
98 5ab8558d Anthony Liguori
        ['n'] =  IN_DQ_STRING,
99 5ab8558d Anthony Liguori
        ['r'] =  IN_DQ_STRING,
100 5ab8558d Anthony Liguori
        ['t'] =  IN_DQ_STRING,
101 1041ba7a Luiz Capitulino
        ['/'] = IN_DQ_STRING,
102 1041ba7a Luiz Capitulino
        ['\\'] = IN_DQ_STRING,
103 5ab8558d Anthony Liguori
        ['\''] = IN_DQ_STRING,
104 5ab8558d Anthony Liguori
        ['\"'] = IN_DQ_STRING,
105 5ab8558d Anthony Liguori
        ['u'] = IN_DQ_UCODE0,
106 5ab8558d Anthony Liguori
    },
107 5ab8558d Anthony Liguori
    [IN_DQ_STRING] = {
108 b011f619 Michael Roth
        [1 ... 0xBF] = IN_DQ_STRING,
109 b011f619 Michael Roth
        [0xC2 ... 0xF4] = IN_DQ_STRING,
110 5ab8558d Anthony Liguori
        ['\\'] = IN_DQ_STRING_ESCAPE,
111 28e91a68 Paolo Bonzini
        ['"'] = JSON_STRING,
112 5ab8558d Anthony Liguori
    },
113 5ab8558d Anthony Liguori
114 5ab8558d Anthony Liguori
    /* single quote string */
115 5ab8558d Anthony Liguori
    [IN_SQ_UCODE3] = {
116 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_SQ_STRING,
117 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_SQ_STRING,
118 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_SQ_STRING,
119 5ab8558d Anthony Liguori
    },
120 5ab8558d Anthony Liguori
    [IN_SQ_UCODE2] = {
121 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_SQ_UCODE3,
122 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_SQ_UCODE3,
123 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_SQ_UCODE3,
124 5ab8558d Anthony Liguori
    },
125 5ab8558d Anthony Liguori
    [IN_SQ_UCODE1] = {
126 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_SQ_UCODE2,
127 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_SQ_UCODE2,
128 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_SQ_UCODE2,
129 5ab8558d Anthony Liguori
    },
130 5ab8558d Anthony Liguori
    [IN_SQ_UCODE0] = {
131 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_SQ_UCODE1,
132 5ab8558d Anthony Liguori
        ['a' ... 'f'] = IN_SQ_UCODE1,
133 5ab8558d Anthony Liguori
        ['A' ... 'F'] = IN_SQ_UCODE1,
134 5ab8558d Anthony Liguori
    },
135 5ab8558d Anthony Liguori
    [IN_SQ_STRING_ESCAPE] = {
136 5ab8558d Anthony Liguori
        ['b'] = IN_SQ_STRING,
137 5ab8558d Anthony Liguori
        ['f'] =  IN_SQ_STRING,
138 5ab8558d Anthony Liguori
        ['n'] =  IN_SQ_STRING,
139 5ab8558d Anthony Liguori
        ['r'] =  IN_SQ_STRING,
140 5ab8558d Anthony Liguori
        ['t'] =  IN_SQ_STRING,
141 1041ba7a Luiz Capitulino
        ['/'] = IN_DQ_STRING,
142 1041ba7a Luiz Capitulino
        ['\\'] = IN_DQ_STRING,
143 5ab8558d Anthony Liguori
        ['\''] = IN_SQ_STRING,
144 5ab8558d Anthony Liguori
        ['\"'] = IN_SQ_STRING,
145 5ab8558d Anthony Liguori
        ['u'] = IN_SQ_UCODE0,
146 5ab8558d Anthony Liguori
    },
147 5ab8558d Anthony Liguori
    [IN_SQ_STRING] = {
148 b011f619 Michael Roth
        [1 ... 0xBF] = IN_SQ_STRING,
149 b011f619 Michael Roth
        [0xC2 ... 0xF4] = IN_SQ_STRING,
150 5ab8558d Anthony Liguori
        ['\\'] = IN_SQ_STRING_ESCAPE,
151 28e91a68 Paolo Bonzini
        ['\''] = JSON_STRING,
152 5ab8558d Anthony Liguori
    },
153 5ab8558d Anthony Liguori
154 5ab8558d Anthony Liguori
    /* Zero */
155 5ab8558d Anthony Liguori
    [IN_ZERO] = {
156 5ab8558d Anthony Liguori
        TERMINAL(JSON_INTEGER),
157 33d05394 Blue Swirl
        ['0' ... '9'] = IN_ERROR,
158 5ab8558d Anthony Liguori
        ['.'] = IN_MANTISSA,
159 5ab8558d Anthony Liguori
    },
160 5ab8558d Anthony Liguori
161 5ab8558d Anthony Liguori
    /* Float */
162 5ab8558d Anthony Liguori
    [IN_DIGITS] = {
163 5ab8558d Anthony Liguori
        TERMINAL(JSON_FLOAT),
164 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DIGITS,
165 5ab8558d Anthony Liguori
    },
166 5ab8558d Anthony Liguori
167 5ab8558d Anthony Liguori
    [IN_DIGIT] = {
168 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DIGITS,
169 5ab8558d Anthony Liguori
    },
170 5ab8558d Anthony Liguori
171 5ab8558d Anthony Liguori
    [IN_EXP_E] = {
172 5ab8558d Anthony Liguori
        ['-'] = IN_DIGIT,
173 5ab8558d Anthony Liguori
        ['+'] = IN_DIGIT,
174 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_DIGITS,
175 5ab8558d Anthony Liguori
    },
176 5ab8558d Anthony Liguori
177 5ab8558d Anthony Liguori
    [IN_MANTISSA_DIGITS] = {
178 5ab8558d Anthony Liguori
        TERMINAL(JSON_FLOAT),
179 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_MANTISSA_DIGITS,
180 5ab8558d Anthony Liguori
        ['e'] = IN_EXP_E,
181 5ab8558d Anthony Liguori
        ['E'] = IN_EXP_E,
182 5ab8558d Anthony Liguori
    },
183 5ab8558d Anthony Liguori
184 5ab8558d Anthony Liguori
    [IN_MANTISSA] = {
185 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_MANTISSA_DIGITS,
186 5ab8558d Anthony Liguori
    },
187 5ab8558d Anthony Liguori
188 5ab8558d Anthony Liguori
    /* Number */
189 5ab8558d Anthony Liguori
    [IN_NONZERO_NUMBER] = {
190 5ab8558d Anthony Liguori
        TERMINAL(JSON_INTEGER),
191 5ab8558d Anthony Liguori
        ['0' ... '9'] = IN_NONZERO_NUMBER,
192 5ab8558d Anthony Liguori
        ['e'] = IN_EXP_E,
193 5ab8558d Anthony Liguori
        ['E'] = IN_EXP_E,
194 5ab8558d Anthony Liguori
        ['.'] = IN_MANTISSA,
195 5ab8558d Anthony Liguori
    },
196 5ab8558d Anthony Liguori
197 5ab8558d Anthony Liguori
    [IN_NEG_NONZERO_NUMBER] = {
198 5ab8558d Anthony Liguori
        ['0'] = IN_ZERO,
199 5ab8558d Anthony Liguori
        ['1' ... '9'] = IN_NONZERO_NUMBER,
200 5ab8558d Anthony Liguori
    },
201 5ab8558d Anthony Liguori
202 5ab8558d Anthony Liguori
    /* keywords */
203 5ab8558d Anthony Liguori
    [IN_KEYWORD] = {
204 5ab8558d Anthony Liguori
        TERMINAL(JSON_KEYWORD),
205 5ab8558d Anthony Liguori
        ['a' ... 'z'] = IN_KEYWORD,
206 5ab8558d Anthony Liguori
    },
207 5ab8558d Anthony Liguori
208 5ab8558d Anthony Liguori
    /* whitespace */
209 5ab8558d Anthony Liguori
    [IN_WHITESPACE] = {
210 5ab8558d Anthony Liguori
        TERMINAL(JSON_SKIP),
211 5ab8558d Anthony Liguori
        [' '] = IN_WHITESPACE,
212 5ab8558d Anthony Liguori
        ['\t'] = IN_WHITESPACE,
213 5ab8558d Anthony Liguori
        ['\r'] = IN_WHITESPACE,
214 5ab8558d Anthony Liguori
        ['\n'] = IN_WHITESPACE,
215 5ab8558d Anthony Liguori
    },        
216 5ab8558d Anthony Liguori
217 5ab8558d Anthony Liguori
    /* escape */
218 5ab8558d Anthony Liguori
    [IN_ESCAPE_LL] = {
219 28e91a68 Paolo Bonzini
        ['d'] = JSON_ESCAPE,
220 5ab8558d Anthony Liguori
    },
221 5ab8558d Anthony Liguori
222 5ab8558d Anthony Liguori
    [IN_ESCAPE_L] = {
223 28e91a68 Paolo Bonzini
        ['d'] = JSON_ESCAPE,
224 5ab8558d Anthony Liguori
        ['l'] = IN_ESCAPE_LL,
225 5ab8558d Anthony Liguori
    },
226 5ab8558d Anthony Liguori
227 2c0d4b36 Roy Tam
    [IN_ESCAPE_I64] = {
228 28e91a68 Paolo Bonzini
        ['d'] = JSON_ESCAPE,
229 2c0d4b36 Roy Tam
    },
230 2c0d4b36 Roy Tam
231 2c0d4b36 Roy Tam
    [IN_ESCAPE_I6] = {
232 2c0d4b36 Roy Tam
        ['4'] = IN_ESCAPE_I64,
233 2c0d4b36 Roy Tam
    },
234 2c0d4b36 Roy Tam
235 2c0d4b36 Roy Tam
    [IN_ESCAPE_I] = {
236 2c0d4b36 Roy Tam
        ['6'] = IN_ESCAPE_I6,
237 2c0d4b36 Roy Tam
    },
238 2c0d4b36 Roy Tam
239 5ab8558d Anthony Liguori
    [IN_ESCAPE] = {
240 28e91a68 Paolo Bonzini
        ['d'] = JSON_ESCAPE,
241 28e91a68 Paolo Bonzini
        ['i'] = JSON_ESCAPE,
242 28e91a68 Paolo Bonzini
        ['p'] = JSON_ESCAPE,
243 28e91a68 Paolo Bonzini
        ['s'] = JSON_ESCAPE,
244 28e91a68 Paolo Bonzini
        ['f'] = JSON_ESCAPE,
245 5ab8558d Anthony Liguori
        ['l'] = IN_ESCAPE_L,
246 2c0d4b36 Roy Tam
        ['I'] = IN_ESCAPE_I,
247 5ab8558d Anthony Liguori
    },
248 5ab8558d Anthony Liguori
249 5ab8558d Anthony Liguori
    /* top level rule */
250 5ab8558d Anthony Liguori
    [IN_START] = {
251 5ab8558d Anthony Liguori
        ['"'] = IN_DQ_STRING,
252 5ab8558d Anthony Liguori
        ['\''] = IN_SQ_STRING,
253 5ab8558d Anthony Liguori
        ['0'] = IN_ZERO,
254 5ab8558d Anthony Liguori
        ['1' ... '9'] = IN_NONZERO_NUMBER,
255 5ab8558d Anthony Liguori
        ['-'] = IN_NEG_NONZERO_NUMBER,
256 28e91a68 Paolo Bonzini
        ['{'] = JSON_OPERATOR,
257 28e91a68 Paolo Bonzini
        ['}'] = JSON_OPERATOR,
258 28e91a68 Paolo Bonzini
        ['['] = JSON_OPERATOR,
259 28e91a68 Paolo Bonzini
        [']'] = JSON_OPERATOR,
260 28e91a68 Paolo Bonzini
        [','] = JSON_OPERATOR,
261 28e91a68 Paolo Bonzini
        [':'] = JSON_OPERATOR,
262 5ab8558d Anthony Liguori
        ['a' ... 'z'] = IN_KEYWORD,
263 5ab8558d Anthony Liguori
        ['%'] = IN_ESCAPE,
264 5ab8558d Anthony Liguori
        [' '] = IN_WHITESPACE,
265 5ab8558d Anthony Liguori
        ['\t'] = IN_WHITESPACE,
266 5ab8558d Anthony Liguori
        ['\r'] = IN_WHITESPACE,
267 5ab8558d Anthony Liguori
        ['\n'] = IN_WHITESPACE,
268 5ab8558d Anthony Liguori
    },
269 5ab8558d Anthony Liguori
};
270 5ab8558d Anthony Liguori
271 5ab8558d Anthony Liguori
void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
272 5ab8558d Anthony Liguori
{
273 5ab8558d Anthony Liguori
    lexer->emit = func;
274 5ab8558d Anthony Liguori
    lexer->state = IN_START;
275 5ab8558d Anthony Liguori
    lexer->token = qstring_new();
276 03308f6c Luiz Capitulino
    lexer->x = lexer->y = 0;
277 5ab8558d Anthony Liguori
}
278 5ab8558d Anthony Liguori
279 bd3924a3 Michael Roth
static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
280 5ab8558d Anthony Liguori
{
281 f7c05274 Paolo Bonzini
    int char_consumed, new_state;
282 f7c05274 Paolo Bonzini
283 5ab8558d Anthony Liguori
    lexer->x++;
284 5ab8558d Anthony Liguori
    if (ch == '\n') {
285 5ab8558d Anthony Liguori
        lexer->x = 0;
286 5ab8558d Anthony Liguori
        lexer->y++;
287 5ab8558d Anthony Liguori
    }
288 5ab8558d Anthony Liguori
289 f7c05274 Paolo Bonzini
    do {
290 f7c05274 Paolo Bonzini
        new_state = json_lexer[lexer->state][(uint8_t)ch];
291 f7c05274 Paolo Bonzini
        char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
292 f7c05274 Paolo Bonzini
        if (char_consumed) {
293 f7c05274 Paolo Bonzini
            qstring_append_chr(lexer->token, ch);
294 f7c05274 Paolo Bonzini
        }
295 5ab8558d Anthony Liguori
296 f7c05274 Paolo Bonzini
        switch (new_state) {
297 f7c05274 Paolo Bonzini
        case JSON_OPERATOR:
298 f7c05274 Paolo Bonzini
        case JSON_ESCAPE:
299 f7c05274 Paolo Bonzini
        case JSON_INTEGER:
300 f7c05274 Paolo Bonzini
        case JSON_FLOAT:
301 f7c05274 Paolo Bonzini
        case JSON_KEYWORD:
302 f7c05274 Paolo Bonzini
        case JSON_STRING:
303 f7c05274 Paolo Bonzini
            lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
304 f7c05274 Paolo Bonzini
        case JSON_SKIP:
305 f7c05274 Paolo Bonzini
            QDECREF(lexer->token);
306 f7c05274 Paolo Bonzini
            lexer->token = qstring_new();
307 f7c05274 Paolo Bonzini
            new_state = IN_START;
308 f7c05274 Paolo Bonzini
            break;
309 33d05394 Blue Swirl
        case IN_ERROR:
310 b011f619 Michael Roth
            /* XXX: To avoid having previous bad input leaving the parser in an
311 b011f619 Michael Roth
             * unresponsive state where we consume unpredictable amounts of
312 b011f619 Michael Roth
             * subsequent "good" input, percolate this error state up to the
313 b011f619 Michael Roth
             * tokenizer/parser by forcing a NULL object to be emitted, then
314 b011f619 Michael Roth
             * reset state.
315 b011f619 Michael Roth
             *
316 b011f619 Michael Roth
             * Also note that this handling is required for reliable channel
317 b011f619 Michael Roth
             * negotiation between QMP and the guest agent, since chr(0xFF)
318 b011f619 Michael Roth
             * is placed at the beginning of certain events to ensure proper
319 b011f619 Michael Roth
             * delivery when the channel is in an unknown state. chr(0xFF) is
320 b011f619 Michael Roth
             * never a valid ASCII/UTF-8 sequence, so this should reliably
321 b011f619 Michael Roth
             * induce an error/flush state.
322 b011f619 Michael Roth
             */
323 b011f619 Michael Roth
            lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y);
324 529a0ef5 Anthony Liguori
            QDECREF(lexer->token);
325 529a0ef5 Anthony Liguori
            lexer->token = qstring_new();
326 529a0ef5 Anthony Liguori
            new_state = IN_START;
327 b011f619 Michael Roth
            lexer->state = new_state;
328 b011f619 Michael Roth
            return 0;
329 f7c05274 Paolo Bonzini
        default:
330 f7c05274 Paolo Bonzini
            break;
331 f7c05274 Paolo Bonzini
        }
332 f7c05274 Paolo Bonzini
        lexer->state = new_state;
333 bd3924a3 Michael Roth
    } while (!char_consumed && !flush);
334 325601b4 Anthony Liguori
335 325601b4 Anthony Liguori
    /* Do not let a single token grow to an arbitrarily large size,
336 325601b4 Anthony Liguori
     * this is a security consideration.
337 325601b4 Anthony Liguori
     */
338 325601b4 Anthony Liguori
    if (lexer->token->length > MAX_TOKEN_SIZE) {
339 325601b4 Anthony Liguori
        lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
340 325601b4 Anthony Liguori
        QDECREF(lexer->token);
341 325601b4 Anthony Liguori
        lexer->token = qstring_new();
342 325601b4 Anthony Liguori
        lexer->state = IN_START;
343 325601b4 Anthony Liguori
    }
344 325601b4 Anthony Liguori
345 5ab8558d Anthony Liguori
    return 0;
346 5ab8558d Anthony Liguori
}
347 5ab8558d Anthony Liguori
348 5ab8558d Anthony Liguori
int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
349 5ab8558d Anthony Liguori
{
350 5ab8558d Anthony Liguori
    size_t i;
351 5ab8558d Anthony Liguori
352 5ab8558d Anthony Liguori
    for (i = 0; i < size; i++) {
353 5ab8558d Anthony Liguori
        int err;
354 5ab8558d Anthony Liguori
355 bd3924a3 Michael Roth
        err = json_lexer_feed_char(lexer, buffer[i], false);
356 5ab8558d Anthony Liguori
        if (err < 0) {
357 5ab8558d Anthony Liguori
            return err;
358 5ab8558d Anthony Liguori
        }
359 5ab8558d Anthony Liguori
    }
360 5ab8558d Anthony Liguori
361 5ab8558d Anthony Liguori
    return 0;
362 5ab8558d Anthony Liguori
}
363 5ab8558d Anthony Liguori
364 5ab8558d Anthony Liguori
int json_lexer_flush(JSONLexer *lexer)
365 5ab8558d Anthony Liguori
{
366 b011f619 Michael Roth
    return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true);
367 5ab8558d Anthony Liguori
}
368 5ab8558d Anthony Liguori
369 5ab8558d Anthony Liguori
void json_lexer_destroy(JSONLexer *lexer)
370 5ab8558d Anthony Liguori
{
371 5ab8558d Anthony Liguori
    QDECREF(lexer->token);
372 5ab8558d Anthony Liguori
}