root / tcg / optimize.c @ c8d70272
History | View | Annotate | Download (37 kB)
1 |
/*
|
---|---|
2 |
* Optimizations for Tiny Code Generator for QEMU
|
3 |
*
|
4 |
* Copyright (c) 2010 Samsung Electronics.
|
5 |
* Contributed by Kirill Batuzov <batuzovk@ispras.ru>
|
6 |
*
|
7 |
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
8 |
* of this software and associated documentation files (the "Software"), to deal
|
9 |
* in the Software without restriction, including without limitation the rights
|
10 |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11 |
* copies of the Software, and to permit persons to whom the Software is
|
12 |
* furnished to do so, subject to the following conditions:
|
13 |
*
|
14 |
* The above copyright notice and this permission notice shall be included in
|
15 |
* all copies or substantial portions of the Software.
|
16 |
*
|
17 |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
18 |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
19 |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
20 |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
21 |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
22 |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
23 |
* THE SOFTWARE.
|
24 |
*/
|
25 |
|
26 |
#include "config.h" |
27 |
|
28 |
#include <stdlib.h> |
29 |
#include <stdio.h> |
30 |
|
31 |
#include "qemu-common.h" |
32 |
#include "tcg-op.h" |
33 |
|
34 |
#define CASE_OP_32_64(x) \
|
35 |
glue(glue(case INDEX_op_, x), _i32): \
|
36 |
glue(glue(case INDEX_op_, x), _i64)
|
37 |
|
38 |
typedef enum { |
39 |
TCG_TEMP_UNDEF = 0,
|
40 |
TCG_TEMP_CONST, |
41 |
TCG_TEMP_COPY, |
42 |
} tcg_temp_state; |
43 |
|
44 |
struct tcg_temp_info {
|
45 |
tcg_temp_state state; |
46 |
uint16_t prev_copy; |
47 |
uint16_t next_copy; |
48 |
tcg_target_ulong val; |
49 |
tcg_target_ulong mask; |
50 |
}; |
51 |
|
52 |
static struct tcg_temp_info temps[TCG_MAX_TEMPS]; |
53 |
|
54 |
/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove
|
55 |
the copy flag from the left temp. */
|
56 |
static void reset_temp(TCGArg temp) |
57 |
{ |
58 |
if (temps[temp].state == TCG_TEMP_COPY) {
|
59 |
if (temps[temp].prev_copy == temps[temp].next_copy) {
|
60 |
temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; |
61 |
} else {
|
62 |
temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; |
63 |
temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; |
64 |
} |
65 |
} |
66 |
temps[temp].state = TCG_TEMP_UNDEF; |
67 |
temps[temp].mask = -1;
|
68 |
} |
69 |
|
70 |
/* Reset all temporaries, given that there are NB_TEMPS of them. */
|
71 |
static void reset_all_temps(int nb_temps) |
72 |
{ |
73 |
int i;
|
74 |
for (i = 0; i < nb_temps; i++) { |
75 |
temps[i].state = TCG_TEMP_UNDEF; |
76 |
temps[i].mask = -1;
|
77 |
} |
78 |
} |
79 |
|
80 |
static int op_bits(TCGOpcode op) |
81 |
{ |
82 |
const TCGOpDef *def = &tcg_op_defs[op];
|
83 |
return def->flags & TCG_OPF_64BIT ? 64 : 32; |
84 |
} |
85 |
|
86 |
static TCGOpcode op_to_movi(TCGOpcode op)
|
87 |
{ |
88 |
switch (op_bits(op)) {
|
89 |
case 32: |
90 |
return INDEX_op_movi_i32;
|
91 |
case 64: |
92 |
return INDEX_op_movi_i64;
|
93 |
default:
|
94 |
fprintf(stderr, "op_to_movi: unexpected return value of "
|
95 |
"function op_bits.\n");
|
96 |
tcg_abort(); |
97 |
} |
98 |
} |
99 |
|
100 |
static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
|
101 |
{ |
102 |
TCGArg i; |
103 |
|
104 |
/* If this is already a global, we can't do better. */
|
105 |
if (temp < s->nb_globals) {
|
106 |
return temp;
|
107 |
} |
108 |
|
109 |
/* Search for a global first. */
|
110 |
for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
|
111 |
if (i < s->nb_globals) {
|
112 |
return i;
|
113 |
} |
114 |
} |
115 |
|
116 |
/* If it is a temp, search for a temp local. */
|
117 |
if (!s->temps[temp].temp_local) {
|
118 |
for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
|
119 |
if (s->temps[i].temp_local) {
|
120 |
return i;
|
121 |
} |
122 |
} |
123 |
} |
124 |
|
125 |
/* Failure to find a better representation, return the same temp. */
|
126 |
return temp;
|
127 |
} |
128 |
|
129 |
static bool temps_are_copies(TCGArg arg1, TCGArg arg2) |
130 |
{ |
131 |
TCGArg i; |
132 |
|
133 |
if (arg1 == arg2) {
|
134 |
return true; |
135 |
} |
136 |
|
137 |
if (temps[arg1].state != TCG_TEMP_COPY
|
138 |
|| temps[arg2].state != TCG_TEMP_COPY) { |
139 |
return false; |
140 |
} |
141 |
|
142 |
for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
|
143 |
if (i == arg2) {
|
144 |
return true; |
145 |
} |
146 |
} |
147 |
|
148 |
return false; |
149 |
} |
150 |
|
151 |
static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, |
152 |
TCGArg dst, TCGArg src) |
153 |
{ |
154 |
reset_temp(dst); |
155 |
temps[dst].mask = temps[src].mask; |
156 |
assert(temps[src].state != TCG_TEMP_CONST); |
157 |
|
158 |
if (s->temps[src].type == s->temps[dst].type) {
|
159 |
if (temps[src].state != TCG_TEMP_COPY) {
|
160 |
temps[src].state = TCG_TEMP_COPY; |
161 |
temps[src].next_copy = src; |
162 |
temps[src].prev_copy = src; |
163 |
} |
164 |
temps[dst].state = TCG_TEMP_COPY; |
165 |
temps[dst].next_copy = temps[src].next_copy; |
166 |
temps[dst].prev_copy = src; |
167 |
temps[temps[dst].next_copy].prev_copy = dst; |
168 |
temps[src].next_copy = dst; |
169 |
} |
170 |
|
171 |
gen_args[0] = dst;
|
172 |
gen_args[1] = src;
|
173 |
} |
174 |
|
175 |
static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) |
176 |
{ |
177 |
reset_temp(dst); |
178 |
temps[dst].state = TCG_TEMP_CONST; |
179 |
temps[dst].val = val; |
180 |
temps[dst].mask = val; |
181 |
gen_args[0] = dst;
|
182 |
gen_args[1] = val;
|
183 |
} |
184 |
|
185 |
static TCGOpcode op_to_mov(TCGOpcode op)
|
186 |
{ |
187 |
switch (op_bits(op)) {
|
188 |
case 32: |
189 |
return INDEX_op_mov_i32;
|
190 |
case 64: |
191 |
return INDEX_op_mov_i64;
|
192 |
default:
|
193 |
fprintf(stderr, "op_to_mov: unexpected return value of "
|
194 |
"function op_bits.\n");
|
195 |
tcg_abort(); |
196 |
} |
197 |
} |
198 |
|
199 |
static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
|
200 |
{ |
201 |
uint64_t l64, h64; |
202 |
|
203 |
switch (op) {
|
204 |
CASE_OP_32_64(add): |
205 |
return x + y;
|
206 |
|
207 |
CASE_OP_32_64(sub): |
208 |
return x - y;
|
209 |
|
210 |
CASE_OP_32_64(mul): |
211 |
return x * y;
|
212 |
|
213 |
CASE_OP_32_64(and): |
214 |
return x & y;
|
215 |
|
216 |
CASE_OP_32_64(or): |
217 |
return x | y;
|
218 |
|
219 |
CASE_OP_32_64(xor): |
220 |
return x ^ y;
|
221 |
|
222 |
case INDEX_op_shl_i32:
|
223 |
return (uint32_t)x << (uint32_t)y;
|
224 |
|
225 |
case INDEX_op_shl_i64:
|
226 |
return (uint64_t)x << (uint64_t)y;
|
227 |
|
228 |
case INDEX_op_shr_i32:
|
229 |
return (uint32_t)x >> (uint32_t)y;
|
230 |
|
231 |
case INDEX_op_shr_i64:
|
232 |
return (uint64_t)x >> (uint64_t)y;
|
233 |
|
234 |
case INDEX_op_sar_i32:
|
235 |
return (int32_t)x >> (int32_t)y;
|
236 |
|
237 |
case INDEX_op_sar_i64:
|
238 |
return (int64_t)x >> (int64_t)y;
|
239 |
|
240 |
case INDEX_op_rotr_i32:
|
241 |
return ror32(x, y);
|
242 |
|
243 |
case INDEX_op_rotr_i64:
|
244 |
return ror64(x, y);
|
245 |
|
246 |
case INDEX_op_rotl_i32:
|
247 |
return rol32(x, y);
|
248 |
|
249 |
case INDEX_op_rotl_i64:
|
250 |
return rol64(x, y);
|
251 |
|
252 |
CASE_OP_32_64(not): |
253 |
return ~x;
|
254 |
|
255 |
CASE_OP_32_64(neg): |
256 |
return -x;
|
257 |
|
258 |
CASE_OP_32_64(andc): |
259 |
return x & ~y;
|
260 |
|
261 |
CASE_OP_32_64(orc): |
262 |
return x | ~y;
|
263 |
|
264 |
CASE_OP_32_64(eqv): |
265 |
return ~(x ^ y);
|
266 |
|
267 |
CASE_OP_32_64(nand): |
268 |
return ~(x & y);
|
269 |
|
270 |
CASE_OP_32_64(nor): |
271 |
return ~(x | y);
|
272 |
|
273 |
CASE_OP_32_64(ext8s): |
274 |
return (int8_t)x;
|
275 |
|
276 |
CASE_OP_32_64(ext16s): |
277 |
return (int16_t)x;
|
278 |
|
279 |
CASE_OP_32_64(ext8u): |
280 |
return (uint8_t)x;
|
281 |
|
282 |
CASE_OP_32_64(ext16u): |
283 |
return (uint16_t)x;
|
284 |
|
285 |
case INDEX_op_ext32s_i64:
|
286 |
return (int32_t)x;
|
287 |
|
288 |
case INDEX_op_ext32u_i64:
|
289 |
return (uint32_t)x;
|
290 |
|
291 |
case INDEX_op_muluh_i32:
|
292 |
return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; |
293 |
case INDEX_op_mulsh_i32:
|
294 |
return ((int64_t)(int32_t)x * (int32_t)y) >> 32; |
295 |
|
296 |
case INDEX_op_muluh_i64:
|
297 |
mulu64(&l64, &h64, x, y); |
298 |
return h64;
|
299 |
case INDEX_op_mulsh_i64:
|
300 |
muls64(&l64, &h64, x, y); |
301 |
return h64;
|
302 |
|
303 |
case INDEX_op_div_i32:
|
304 |
/* Avoid crashing on divide by zero, otherwise undefined. */
|
305 |
return (int32_t)x / ((int32_t)y ? : 1); |
306 |
case INDEX_op_divu_i32:
|
307 |
return (uint32_t)x / ((uint32_t)y ? : 1); |
308 |
case INDEX_op_div_i64:
|
309 |
return (int64_t)x / ((int64_t)y ? : 1); |
310 |
case INDEX_op_divu_i64:
|
311 |
return (uint64_t)x / ((uint64_t)y ? : 1); |
312 |
|
313 |
case INDEX_op_rem_i32:
|
314 |
return (int32_t)x % ((int32_t)y ? : 1); |
315 |
case INDEX_op_remu_i32:
|
316 |
return (uint32_t)x % ((uint32_t)y ? : 1); |
317 |
case INDEX_op_rem_i64:
|
318 |
return (int64_t)x % ((int64_t)y ? : 1); |
319 |
case INDEX_op_remu_i64:
|
320 |
return (uint64_t)x % ((uint64_t)y ? : 1); |
321 |
|
322 |
default:
|
323 |
fprintf(stderr, |
324 |
"Unrecognized operation %d in do_constant_folding.\n", op);
|
325 |
tcg_abort(); |
326 |
} |
327 |
} |
328 |
|
329 |
static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
|
330 |
{ |
331 |
TCGArg res = do_constant_folding_2(op, x, y); |
332 |
if (op_bits(op) == 32) { |
333 |
res &= 0xffffffff;
|
334 |
} |
335 |
return res;
|
336 |
} |
337 |
|
338 |
static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) |
339 |
{ |
340 |
switch (c) {
|
341 |
case TCG_COND_EQ:
|
342 |
return x == y;
|
343 |
case TCG_COND_NE:
|
344 |
return x != y;
|
345 |
case TCG_COND_LT:
|
346 |
return (int32_t)x < (int32_t)y;
|
347 |
case TCG_COND_GE:
|
348 |
return (int32_t)x >= (int32_t)y;
|
349 |
case TCG_COND_LE:
|
350 |
return (int32_t)x <= (int32_t)y;
|
351 |
case TCG_COND_GT:
|
352 |
return (int32_t)x > (int32_t)y;
|
353 |
case TCG_COND_LTU:
|
354 |
return x < y;
|
355 |
case TCG_COND_GEU:
|
356 |
return x >= y;
|
357 |
case TCG_COND_LEU:
|
358 |
return x <= y;
|
359 |
case TCG_COND_GTU:
|
360 |
return x > y;
|
361 |
default:
|
362 |
tcg_abort(); |
363 |
} |
364 |
} |
365 |
|
366 |
static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) |
367 |
{ |
368 |
switch (c) {
|
369 |
case TCG_COND_EQ:
|
370 |
return x == y;
|
371 |
case TCG_COND_NE:
|
372 |
return x != y;
|
373 |
case TCG_COND_LT:
|
374 |
return (int64_t)x < (int64_t)y;
|
375 |
case TCG_COND_GE:
|
376 |
return (int64_t)x >= (int64_t)y;
|
377 |
case TCG_COND_LE:
|
378 |
return (int64_t)x <= (int64_t)y;
|
379 |
case TCG_COND_GT:
|
380 |
return (int64_t)x > (int64_t)y;
|
381 |
case TCG_COND_LTU:
|
382 |
return x < y;
|
383 |
case TCG_COND_GEU:
|
384 |
return x >= y;
|
385 |
case TCG_COND_LEU:
|
386 |
return x <= y;
|
387 |
case TCG_COND_GTU:
|
388 |
return x > y;
|
389 |
default:
|
390 |
tcg_abort(); |
391 |
} |
392 |
} |
393 |
|
394 |
static bool do_constant_folding_cond_eq(TCGCond c) |
395 |
{ |
396 |
switch (c) {
|
397 |
case TCG_COND_GT:
|
398 |
case TCG_COND_LTU:
|
399 |
case TCG_COND_LT:
|
400 |
case TCG_COND_GTU:
|
401 |
case TCG_COND_NE:
|
402 |
return 0; |
403 |
case TCG_COND_GE:
|
404 |
case TCG_COND_GEU:
|
405 |
case TCG_COND_LE:
|
406 |
case TCG_COND_LEU:
|
407 |
case TCG_COND_EQ:
|
408 |
return 1; |
409 |
default:
|
410 |
tcg_abort(); |
411 |
} |
412 |
} |
413 |
|
414 |
/* Return 2 if the condition can't be simplified, and the result
|
415 |
of the condition (0 or 1) if it can */
|
416 |
static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
|
417 |
TCGArg y, TCGCond c) |
418 |
{ |
419 |
if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
|
420 |
switch (op_bits(op)) {
|
421 |
case 32: |
422 |
return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
|
423 |
case 64: |
424 |
return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
|
425 |
default:
|
426 |
tcg_abort(); |
427 |
} |
428 |
} else if (temps_are_copies(x, y)) { |
429 |
return do_constant_folding_cond_eq(c);
|
430 |
} else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { |
431 |
switch (c) {
|
432 |
case TCG_COND_LTU:
|
433 |
return 0; |
434 |
case TCG_COND_GEU:
|
435 |
return 1; |
436 |
default:
|
437 |
return 2; |
438 |
} |
439 |
} else {
|
440 |
return 2; |
441 |
} |
442 |
} |
443 |
|
444 |
/* Return 2 if the condition can't be simplified, and the result
|
445 |
of the condition (0 or 1) if it can */
|
446 |
static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
|
447 |
{ |
448 |
TCGArg al = p1[0], ah = p1[1]; |
449 |
TCGArg bl = p2[0], bh = p2[1]; |
450 |
|
451 |
if (temps[bl].state == TCG_TEMP_CONST
|
452 |
&& temps[bh].state == TCG_TEMP_CONST) { |
453 |
uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
|
454 |
|
455 |
if (temps[al].state == TCG_TEMP_CONST
|
456 |
&& temps[ah].state == TCG_TEMP_CONST) { |
457 |
uint64_t a; |
458 |
a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
|
459 |
return do_constant_folding_cond_64(a, b, c);
|
460 |
} |
461 |
if (b == 0) { |
462 |
switch (c) {
|
463 |
case TCG_COND_LTU:
|
464 |
return 0; |
465 |
case TCG_COND_GEU:
|
466 |
return 1; |
467 |
default:
|
468 |
break;
|
469 |
} |
470 |
} |
471 |
} |
472 |
if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
|
473 |
return do_constant_folding_cond_eq(c);
|
474 |
} |
475 |
return 2; |
476 |
} |
477 |
|
478 |
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) |
479 |
{ |
480 |
TCGArg a1 = *p1, a2 = *p2; |
481 |
int sum = 0; |
482 |
sum += temps[a1].state == TCG_TEMP_CONST; |
483 |
sum -= temps[a2].state == TCG_TEMP_CONST; |
484 |
|
485 |
/* Prefer the constant in second argument, and then the form
|
486 |
op a, a, b, which is better handled on non-RISC hosts. */
|
487 |
if (sum > 0 || (sum == 0 && dest == a2)) { |
488 |
*p1 = a2; |
489 |
*p2 = a1; |
490 |
return true; |
491 |
} |
492 |
return false; |
493 |
} |
494 |
|
495 |
static bool swap_commutative2(TCGArg *p1, TCGArg *p2) |
496 |
{ |
497 |
int sum = 0; |
498 |
sum += temps[p1[0]].state == TCG_TEMP_CONST;
|
499 |
sum += temps[p1[1]].state == TCG_TEMP_CONST;
|
500 |
sum -= temps[p2[0]].state == TCG_TEMP_CONST;
|
501 |
sum -= temps[p2[1]].state == TCG_TEMP_CONST;
|
502 |
if (sum > 0) { |
503 |
TCGArg t; |
504 |
t = p1[0], p1[0] = p2[0], p2[0] = t; |
505 |
t = p1[1], p1[1] = p2[1], p2[1] = t; |
506 |
return true; |
507 |
} |
508 |
return false; |
509 |
} |
510 |
|
511 |
/* Propagate constants and copies, fold constant expressions. */
|
512 |
static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
|
513 |
TCGArg *args, TCGOpDef *tcg_op_defs) |
514 |
{ |
515 |
int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
|
516 |
tcg_target_ulong mask, affected; |
517 |
TCGOpcode op; |
518 |
const TCGOpDef *def;
|
519 |
TCGArg *gen_args; |
520 |
TCGArg tmp; |
521 |
|
522 |
/* Array VALS has an element for each temp.
|
523 |
If this temp holds a constant then its value is kept in VALS' element.
|
524 |
If this temp is a copy of other ones then the other copies are
|
525 |
available through the doubly linked circular list. */
|
526 |
|
527 |
nb_temps = s->nb_temps; |
528 |
nb_globals = s->nb_globals; |
529 |
reset_all_temps(nb_temps); |
530 |
|
531 |
nb_ops = tcg_opc_ptr - s->gen_opc_buf; |
532 |
gen_args = args; |
533 |
for (op_index = 0; op_index < nb_ops; op_index++) { |
534 |
op = s->gen_opc_buf[op_index]; |
535 |
def = &tcg_op_defs[op]; |
536 |
/* Do copy propagation */
|
537 |
if (op == INDEX_op_call) {
|
538 |
int nb_oargs = args[0] >> 16; |
539 |
int nb_iargs = args[0] & 0xffff; |
540 |
for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) { |
541 |
if (temps[args[i]].state == TCG_TEMP_COPY) {
|
542 |
args[i] = find_better_copy(s, args[i]); |
543 |
} |
544 |
} |
545 |
} else {
|
546 |
for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
|
547 |
if (temps[args[i]].state == TCG_TEMP_COPY) {
|
548 |
args[i] = find_better_copy(s, args[i]); |
549 |
} |
550 |
} |
551 |
} |
552 |
|
553 |
/* For commutative operations make constant second argument */
|
554 |
switch (op) {
|
555 |
CASE_OP_32_64(add): |
556 |
CASE_OP_32_64(mul): |
557 |
CASE_OP_32_64(and): |
558 |
CASE_OP_32_64(or): |
559 |
CASE_OP_32_64(xor): |
560 |
CASE_OP_32_64(eqv): |
561 |
CASE_OP_32_64(nand): |
562 |
CASE_OP_32_64(nor): |
563 |
CASE_OP_32_64(muluh): |
564 |
CASE_OP_32_64(mulsh): |
565 |
swap_commutative(args[0], &args[1], &args[2]); |
566 |
break;
|
567 |
CASE_OP_32_64(brcond): |
568 |
if (swap_commutative(-1, &args[0], &args[1])) { |
569 |
args[2] = tcg_swap_cond(args[2]); |
570 |
} |
571 |
break;
|
572 |
CASE_OP_32_64(setcond): |
573 |
if (swap_commutative(args[0], &args[1], &args[2])) { |
574 |
args[3] = tcg_swap_cond(args[3]); |
575 |
} |
576 |
break;
|
577 |
CASE_OP_32_64(movcond): |
578 |
if (swap_commutative(-1, &args[1], &args[2])) { |
579 |
args[5] = tcg_swap_cond(args[5]); |
580 |
} |
581 |
/* For movcond, we canonicalize the "false" input reg to match
|
582 |
the destination reg so that the tcg backend can implement
|
583 |
a "move if true" operation. */
|
584 |
if (swap_commutative(args[0], &args[4], &args[3])) { |
585 |
args[5] = tcg_invert_cond(args[5]); |
586 |
} |
587 |
break;
|
588 |
CASE_OP_32_64(add2): |
589 |
swap_commutative(args[0], &args[2], &args[4]); |
590 |
swap_commutative(args[1], &args[3], &args[5]); |
591 |
break;
|
592 |
CASE_OP_32_64(mulu2): |
593 |
CASE_OP_32_64(muls2): |
594 |
swap_commutative(args[0], &args[2], &args[3]); |
595 |
break;
|
596 |
case INDEX_op_brcond2_i32:
|
597 |
if (swap_commutative2(&args[0], &args[2])) { |
598 |
args[4] = tcg_swap_cond(args[4]); |
599 |
} |
600 |
break;
|
601 |
case INDEX_op_setcond2_i32:
|
602 |
if (swap_commutative2(&args[1], &args[3])) { |
603 |
args[5] = tcg_swap_cond(args[5]); |
604 |
} |
605 |
break;
|
606 |
default:
|
607 |
break;
|
608 |
} |
609 |
|
610 |
/* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
|
611 |
and "sub r, 0, a => neg r, a" case. */
|
612 |
switch (op) {
|
613 |
CASE_OP_32_64(shl): |
614 |
CASE_OP_32_64(shr): |
615 |
CASE_OP_32_64(sar): |
616 |
CASE_OP_32_64(rotl): |
617 |
CASE_OP_32_64(rotr): |
618 |
if (temps[args[1]].state == TCG_TEMP_CONST |
619 |
&& temps[args[1]].val == 0) { |
620 |
s->gen_opc_buf[op_index] = op_to_movi(op); |
621 |
tcg_opt_gen_movi(gen_args, args[0], 0); |
622 |
args += 3;
|
623 |
gen_args += 2;
|
624 |
continue;
|
625 |
} |
626 |
break;
|
627 |
CASE_OP_32_64(sub): |
628 |
{ |
629 |
TCGOpcode neg_op; |
630 |
bool have_neg;
|
631 |
|
632 |
if (temps[args[2]].state == TCG_TEMP_CONST) { |
633 |
/* Proceed with possible constant folding. */
|
634 |
break;
|
635 |
} |
636 |
if (op == INDEX_op_sub_i32) {
|
637 |
neg_op = INDEX_op_neg_i32; |
638 |
have_neg = TCG_TARGET_HAS_neg_i32; |
639 |
} else {
|
640 |
neg_op = INDEX_op_neg_i64; |
641 |
have_neg = TCG_TARGET_HAS_neg_i64; |
642 |
} |
643 |
if (!have_neg) {
|
644 |
break;
|
645 |
} |
646 |
if (temps[args[1]].state == TCG_TEMP_CONST |
647 |
&& temps[args[1]].val == 0) { |
648 |
s->gen_opc_buf[op_index] = neg_op; |
649 |
reset_temp(args[0]);
|
650 |
gen_args[0] = args[0]; |
651 |
gen_args[1] = args[2]; |
652 |
args += 3;
|
653 |
gen_args += 2;
|
654 |
continue;
|
655 |
} |
656 |
} |
657 |
break;
|
658 |
default:
|
659 |
break;
|
660 |
} |
661 |
|
662 |
/* Simplify expression for "op r, a, 0 => mov r, a" cases */
|
663 |
switch (op) {
|
664 |
CASE_OP_32_64(add): |
665 |
CASE_OP_32_64(sub): |
666 |
CASE_OP_32_64(shl): |
667 |
CASE_OP_32_64(shr): |
668 |
CASE_OP_32_64(sar): |
669 |
CASE_OP_32_64(rotl): |
670 |
CASE_OP_32_64(rotr): |
671 |
CASE_OP_32_64(or): |
672 |
CASE_OP_32_64(xor): |
673 |
if (temps[args[1]].state == TCG_TEMP_CONST) { |
674 |
/* Proceed with possible constant folding. */
|
675 |
break;
|
676 |
} |
677 |
if (temps[args[2]].state == TCG_TEMP_CONST |
678 |
&& temps[args[2]].val == 0) { |
679 |
if (temps_are_copies(args[0], args[1])) { |
680 |
s->gen_opc_buf[op_index] = INDEX_op_nop; |
681 |
} else {
|
682 |
s->gen_opc_buf[op_index] = op_to_mov(op); |
683 |
tcg_opt_gen_mov(s, gen_args, args[0], args[1]); |
684 |
gen_args += 2;
|
685 |
} |
686 |
args += 3;
|
687 |
continue;
|
688 |
} |
689 |
break;
|
690 |
default:
|
691 |
break;
|
692 |
} |
693 |
|
694 |
/* Simplify using known-zero bits. Currently only ops with a single
|
695 |
output argument is supported. */
|
696 |
mask = -1;
|
697 |
affected = -1;
|
698 |
switch (op) {
|
699 |
CASE_OP_32_64(ext8s): |
700 |
if ((temps[args[1]].mask & 0x80) != 0) { |
701 |
break;
|
702 |
} |
703 |
CASE_OP_32_64(ext8u): |
704 |
mask = 0xff;
|
705 |
goto and_const;
|
706 |
CASE_OP_32_64(ext16s): |
707 |
if ((temps[args[1]].mask & 0x8000) != 0) { |
708 |
break;
|
709 |
} |
710 |
CASE_OP_32_64(ext16u): |
711 |
mask = 0xffff;
|
712 |
goto and_const;
|
713 |
case INDEX_op_ext32s_i64:
|
714 |
if ((temps[args[1]].mask & 0x80000000) != 0) { |
715 |
break;
|
716 |
} |
717 |
case INDEX_op_ext32u_i64:
|
718 |
mask = 0xffffffffU;
|
719 |
goto and_const;
|
720 |
|
721 |
CASE_OP_32_64(and): |
722 |
mask = temps[args[2]].mask;
|
723 |
if (temps[args[2]].state == TCG_TEMP_CONST) { |
724 |
and_const:
|
725 |
affected = temps[args[1]].mask & ~mask;
|
726 |
} |
727 |
mask = temps[args[1]].mask & mask;
|
728 |
break;
|
729 |
|
730 |
case INDEX_op_sar_i32:
|
731 |
if (temps[args[2]].state == TCG_TEMP_CONST) { |
732 |
mask = (int32_t)temps[args[1]].mask >> temps[args[2]].val; |
733 |
} |
734 |
break;
|
735 |
case INDEX_op_sar_i64:
|
736 |
if (temps[args[2]].state == TCG_TEMP_CONST) { |
737 |
mask = (int64_t)temps[args[1]].mask >> temps[args[2]].val; |
738 |
} |
739 |
break;
|
740 |
|
741 |
case INDEX_op_shr_i32:
|
742 |
if (temps[args[2]].state == TCG_TEMP_CONST) { |
743 |
mask = (uint32_t)temps[args[1]].mask >> temps[args[2]].val; |
744 |
} |
745 |
break;
|
746 |
case INDEX_op_shr_i64:
|
747 |
if (temps[args[2]].state == TCG_TEMP_CONST) { |
748 |
mask = (uint64_t)temps[args[1]].mask >> temps[args[2]].val; |
749 |
} |
750 |
break;
|
751 |
|
752 |
CASE_OP_32_64(shl): |
753 |
if (temps[args[2]].state == TCG_TEMP_CONST) { |
754 |
mask = temps[args[1]].mask << temps[args[2]].val; |
755 |
} |
756 |
break;
|
757 |
|
758 |
CASE_OP_32_64(neg): |
759 |
/* Set to 1 all bits to the left of the rightmost. */
|
760 |
mask = -(temps[args[1]].mask & -temps[args[1]].mask); |
761 |
break;
|
762 |
|
763 |
CASE_OP_32_64(deposit): |
764 |
tmp = ((1ull << args[4]) - 1); |
765 |
mask = ((temps[args[1]].mask & ~(tmp << args[3])) |
766 |
| ((temps[args[2]].mask & tmp) << args[3])); |
767 |
break;
|
768 |
|
769 |
CASE_OP_32_64(or): |
770 |
CASE_OP_32_64(xor): |
771 |
mask = temps[args[1]].mask | temps[args[2]].mask; |
772 |
break;
|
773 |
|
774 |
CASE_OP_32_64(setcond): |
775 |
mask = 1;
|
776 |
break;
|
777 |
|
778 |
CASE_OP_32_64(movcond): |
779 |
mask = temps[args[3]].mask | temps[args[4]].mask; |
780 |
break;
|
781 |
|
782 |
CASE_OP_32_64(ld8u): |
783 |
case INDEX_op_qemu_ld8u:
|
784 |
mask = 0xff;
|
785 |
break;
|
786 |
CASE_OP_32_64(ld16u): |
787 |
case INDEX_op_qemu_ld16u:
|
788 |
mask = 0xffff;
|
789 |
break;
|
790 |
case INDEX_op_ld32u_i64:
|
791 |
#if TCG_TARGET_REG_BITS == 64 |
792 |
case INDEX_op_qemu_ld32u:
|
793 |
#endif
|
794 |
mask = 0xffffffffu;
|
795 |
break;
|
796 |
|
797 |
CASE_OP_32_64(qemu_ld): |
798 |
{ |
799 |
TCGMemOp mop = args[def->nb_oargs + def->nb_iargs]; |
800 |
if (!(mop & MO_SIGN)) {
|
801 |
mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; |
802 |
} |
803 |
} |
804 |
break;
|
805 |
|
806 |
default:
|
807 |
break;
|
808 |
} |
809 |
|
810 |
/* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit
|
811 |
results */
|
812 |
if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) {
|
813 |
mask &= 0xffffffffu;
|
814 |
} |
815 |
|
816 |
if (mask == 0) { |
817 |
assert(def->nb_oargs == 1);
|
818 |
s->gen_opc_buf[op_index] = op_to_movi(op); |
819 |
tcg_opt_gen_movi(gen_args, args[0], 0); |
820 |
args += def->nb_oargs + def->nb_iargs + def->nb_cargs; |
821 |
gen_args += 2;
|
822 |
continue;
|
823 |
} |
824 |
if (affected == 0) { |
825 |
assert(def->nb_oargs == 1);
|
826 |
if (temps_are_copies(args[0], args[1])) { |
827 |
s->gen_opc_buf[op_index] = INDEX_op_nop; |
828 |
} else if (temps[args[1]].state != TCG_TEMP_CONST) { |
829 |
s->gen_opc_buf[op_index] = op_to_mov(op); |
830 |
tcg_opt_gen_mov(s, gen_args, args[0], args[1]); |
831 |
gen_args += 2;
|
832 |
} else {
|
833 |
s->gen_opc_buf[op_index] = op_to_movi(op); |
834 |
tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val); |
835 |
gen_args += 2;
|
836 |
} |
837 |
args += def->nb_iargs + 1;
|
838 |
continue;
|
839 |
} |
840 |
|
841 |
/* Simplify expression for "op r, a, 0 => movi r, 0" cases */
|
842 |
switch (op) {
|
843 |
CASE_OP_32_64(and): |
844 |
CASE_OP_32_64(mul): |
845 |
CASE_OP_32_64(muluh): |
846 |
CASE_OP_32_64(mulsh): |
847 |
if ((temps[args[2]].state == TCG_TEMP_CONST |
848 |
&& temps[args[2]].val == 0)) { |
849 |
s->gen_opc_buf[op_index] = op_to_movi(op); |
850 |
tcg_opt_gen_movi(gen_args, args[0], 0); |
851 |
args += 3;
|
852 |
gen_args += 2;
|
853 |
continue;
|
854 |
} |
855 |
break;
|
856 |
default:
|
857 |
break;
|
858 |
} |
859 |
|
860 |
/* Simplify expression for "op r, a, a => mov r, a" cases */
|
861 |
switch (op) {
|
862 |
CASE_OP_32_64(or): |
863 |
CASE_OP_32_64(and): |
864 |
if (temps_are_copies(args[1], args[2])) { |
865 |
if (temps_are_copies(args[0], args[1])) { |
866 |
s->gen_opc_buf[op_index] = INDEX_op_nop; |
867 |
} else {
|
868 |
s->gen_opc_buf[op_index] = op_to_mov(op); |
869 |
tcg_opt_gen_mov(s, gen_args, args[0], args[1]); |
870 |
gen_args += 2;
|
871 |
} |
872 |
args += 3;
|
873 |
continue;
|
874 |
} |
875 |
break;
|
876 |
default:
|
877 |
break;
|
878 |
} |
879 |
|
880 |
/* Simplify expression for "op r, a, a => movi r, 0" cases */
|
881 |
switch (op) {
|
882 |
CASE_OP_32_64(sub): |
883 |
CASE_OP_32_64(xor): |
884 |
if (temps_are_copies(args[1], args[2])) { |
885 |
s->gen_opc_buf[op_index] = op_to_movi(op); |
886 |
tcg_opt_gen_movi(gen_args, args[0], 0); |
887 |
gen_args += 2;
|
888 |
args += 3;
|
889 |
continue;
|
890 |
} |
891 |
break;
|
892 |
default:
|
893 |
break;
|
894 |
} |
895 |
|
896 |
/* Propagate constants through copy operations and do constant
|
897 |
folding. Constants will be substituted to arguments by register
|
898 |
allocator where needed and possible. Also detect copies. */
|
899 |
switch (op) {
|
900 |
CASE_OP_32_64(mov): |
901 |
if (temps_are_copies(args[0], args[1])) { |
902 |
args += 2;
|
903 |
s->gen_opc_buf[op_index] = INDEX_op_nop; |
904 |
break;
|
905 |
} |
906 |
if (temps[args[1]].state != TCG_TEMP_CONST) { |
907 |
tcg_opt_gen_mov(s, gen_args, args[0], args[1]); |
908 |
gen_args += 2;
|
909 |
args += 2;
|
910 |
break;
|
911 |
} |
912 |
/* Source argument is constant. Rewrite the operation and
|
913 |
let movi case handle it. */
|
914 |
op = op_to_movi(op); |
915 |
s->gen_opc_buf[op_index] = op; |
916 |
args[1] = temps[args[1]].val; |
917 |
/* fallthrough */
|
918 |
CASE_OP_32_64(movi): |
919 |
tcg_opt_gen_movi(gen_args, args[0], args[1]); |
920 |
gen_args += 2;
|
921 |
args += 2;
|
922 |
break;
|
923 |
|
924 |
CASE_OP_32_64(not): |
925 |
CASE_OP_32_64(neg): |
926 |
CASE_OP_32_64(ext8s): |
927 |
CASE_OP_32_64(ext8u): |
928 |
CASE_OP_32_64(ext16s): |
929 |
CASE_OP_32_64(ext16u): |
930 |
case INDEX_op_ext32s_i64:
|
931 |
case INDEX_op_ext32u_i64:
|
932 |
if (temps[args[1]].state == TCG_TEMP_CONST) { |
933 |
s->gen_opc_buf[op_index] = op_to_movi(op); |
934 |
tmp = do_constant_folding(op, temps[args[1]].val, 0); |
935 |
tcg_opt_gen_movi(gen_args, args[0], tmp);
|
936 |
gen_args += 2;
|
937 |
args += 2;
|
938 |
break;
|
939 |
} |
940 |
goto do_default;
|
941 |
|
942 |
CASE_OP_32_64(add): |
943 |
CASE_OP_32_64(sub): |
944 |
CASE_OP_32_64(mul): |
945 |
CASE_OP_32_64(or): |
946 |
CASE_OP_32_64(and): |
947 |
CASE_OP_32_64(xor): |
948 |
CASE_OP_32_64(shl): |
949 |
CASE_OP_32_64(shr): |
950 |
CASE_OP_32_64(sar): |
951 |
CASE_OP_32_64(rotl): |
952 |
CASE_OP_32_64(rotr): |
953 |
CASE_OP_32_64(andc): |
954 |
CASE_OP_32_64(orc): |
955 |
CASE_OP_32_64(eqv): |
956 |
CASE_OP_32_64(nand): |
957 |
CASE_OP_32_64(nor): |
958 |
CASE_OP_32_64(muluh): |
959 |
CASE_OP_32_64(mulsh): |
960 |
CASE_OP_32_64(div): |
961 |
CASE_OP_32_64(divu): |
962 |
CASE_OP_32_64(rem): |
963 |
CASE_OP_32_64(remu): |
964 |
if (temps[args[1]].state == TCG_TEMP_CONST |
965 |
&& temps[args[2]].state == TCG_TEMP_CONST) {
|
966 |
s->gen_opc_buf[op_index] = op_to_movi(op); |
967 |
tmp = do_constant_folding(op, temps[args[1]].val,
|
968 |
temps[args[2]].val);
|
969 |
tcg_opt_gen_movi(gen_args, args[0], tmp);
|
970 |
gen_args += 2;
|
971 |
args += 3;
|
972 |
break;
|
973 |
} |
974 |
goto do_default;
|
975 |
|
976 |
CASE_OP_32_64(deposit): |
977 |
if (temps[args[1]].state == TCG_TEMP_CONST |
978 |
&& temps[args[2]].state == TCG_TEMP_CONST) {
|
979 |
s->gen_opc_buf[op_index] = op_to_movi(op); |
980 |
tmp = ((1ull << args[4]) - 1); |
981 |
tmp = (temps[args[1]].val & ~(tmp << args[3])) |
982 |
| ((temps[args[2]].val & tmp) << args[3]); |
983 |
tcg_opt_gen_movi(gen_args, args[0], tmp);
|
984 |
gen_args += 2;
|
985 |
args += 5;
|
986 |
break;
|
987 |
} |
988 |
goto do_default;
|
989 |
|
990 |
CASE_OP_32_64(setcond): |
991 |
tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); |
992 |
if (tmp != 2) { |
993 |
s->gen_opc_buf[op_index] = op_to_movi(op); |
994 |
tcg_opt_gen_movi(gen_args, args[0], tmp);
|
995 |
gen_args += 2;
|
996 |
args += 4;
|
997 |
break;
|
998 |
} |
999 |
goto do_default;
|
1000 |
|
1001 |
CASE_OP_32_64(brcond): |
1002 |
tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); |
1003 |
if (tmp != 2) { |
1004 |
if (tmp) {
|
1005 |
reset_all_temps(nb_temps); |
1006 |
s->gen_opc_buf[op_index] = INDEX_op_br; |
1007 |
gen_args[0] = args[3]; |
1008 |
gen_args += 1;
|
1009 |
} else {
|
1010 |
s->gen_opc_buf[op_index] = INDEX_op_nop; |
1011 |
} |
1012 |
args += 4;
|
1013 |
break;
|
1014 |
} |
1015 |
goto do_default;
|
1016 |
|
1017 |
CASE_OP_32_64(movcond): |
1018 |
tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); |
1019 |
if (tmp != 2) { |
1020 |
if (temps_are_copies(args[0], args[4-tmp])) { |
1021 |
s->gen_opc_buf[op_index] = INDEX_op_nop; |
1022 |
} else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { |
1023 |
s->gen_opc_buf[op_index] = op_to_movi(op); |
1024 |
tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); |
1025 |
gen_args += 2;
|
1026 |
} else {
|
1027 |
s->gen_opc_buf[op_index] = op_to_mov(op); |
1028 |
tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); |
1029 |
gen_args += 2;
|
1030 |
} |
1031 |
args += 6;
|
1032 |
break;
|
1033 |
} |
1034 |
goto do_default;
|
1035 |
|
1036 |
case INDEX_op_add2_i32:
|
1037 |
case INDEX_op_sub2_i32:
|
1038 |
if (temps[args[2]].state == TCG_TEMP_CONST |
1039 |
&& temps[args[3]].state == TCG_TEMP_CONST
|
1040 |
&& temps[args[4]].state == TCG_TEMP_CONST
|
1041 |
&& temps[args[5]].state == TCG_TEMP_CONST) {
|
1042 |
uint32_t al = temps[args[2]].val;
|
1043 |
uint32_t ah = temps[args[3]].val;
|
1044 |
uint32_t bl = temps[args[4]].val;
|
1045 |
uint32_t bh = temps[args[5]].val;
|
1046 |
uint64_t a = ((uint64_t)ah << 32) | al;
|
1047 |
uint64_t b = ((uint64_t)bh << 32) | bl;
|
1048 |
TCGArg rl, rh; |
1049 |
|
1050 |
if (op == INDEX_op_add2_i32) {
|
1051 |
a += b; |
1052 |
} else {
|
1053 |
a -= b; |
1054 |
} |
1055 |
|
1056 |
/* We emit the extra nop when we emit the add2/sub2. */
|
1057 |
assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
|
1058 |
|
1059 |
rl = args[0];
|
1060 |
rh = args[1];
|
1061 |
s->gen_opc_buf[op_index] = INDEX_op_movi_i32; |
1062 |
s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; |
1063 |
tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
|
1064 |
tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); |
1065 |
gen_args += 4;
|
1066 |
args += 6;
|
1067 |
break;
|
1068 |
} |
1069 |
goto do_default;
|
1070 |
|
1071 |
case INDEX_op_mulu2_i32:
|
1072 |
if (temps[args[2]].state == TCG_TEMP_CONST |
1073 |
&& temps[args[3]].state == TCG_TEMP_CONST) {
|
1074 |
uint32_t a = temps[args[2]].val;
|
1075 |
uint32_t b = temps[args[3]].val;
|
1076 |
uint64_t r = (uint64_t)a * b; |
1077 |
TCGArg rl, rh; |
1078 |
|
1079 |
/* We emit the extra nop when we emit the mulu2. */
|
1080 |
assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
|
1081 |
|
1082 |
rl = args[0];
|
1083 |
rh = args[1];
|
1084 |
s->gen_opc_buf[op_index] = INDEX_op_movi_i32; |
1085 |
s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; |
1086 |
tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
|
1087 |
tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); |
1088 |
gen_args += 4;
|
1089 |
args += 4;
|
1090 |
break;
|
1091 |
} |
1092 |
goto do_default;
|
1093 |
|
1094 |
case INDEX_op_brcond2_i32:
|
1095 |
tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); |
1096 |
if (tmp != 2) { |
1097 |
if (tmp) {
|
1098 |
reset_all_temps(nb_temps); |
1099 |
s->gen_opc_buf[op_index] = INDEX_op_br; |
1100 |
gen_args[0] = args[5]; |
1101 |
gen_args += 1;
|
1102 |
} else {
|
1103 |
s->gen_opc_buf[op_index] = INDEX_op_nop; |
1104 |
} |
1105 |
} else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) |
1106 |
&& temps[args[2]].state == TCG_TEMP_CONST
|
1107 |
&& temps[args[3]].state == TCG_TEMP_CONST
|
1108 |
&& temps[args[2]].val == 0 |
1109 |
&& temps[args[3]].val == 0) { |
1110 |
/* Simplify LT/GE comparisons vs zero to a single compare
|
1111 |
vs the high word of the input. */
|
1112 |
reset_all_temps(nb_temps); |
1113 |
s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; |
1114 |
gen_args[0] = args[1]; |
1115 |
gen_args[1] = args[3]; |
1116 |
gen_args[2] = args[4]; |
1117 |
gen_args[3] = args[5]; |
1118 |
gen_args += 4;
|
1119 |
} else {
|
1120 |
goto do_default;
|
1121 |
} |
1122 |
args += 6;
|
1123 |
break;
|
1124 |
|
1125 |
case INDEX_op_setcond2_i32:
|
1126 |
tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); |
1127 |
if (tmp != 2) { |
1128 |
s->gen_opc_buf[op_index] = INDEX_op_movi_i32; |
1129 |
tcg_opt_gen_movi(gen_args, args[0], tmp);
|
1130 |
gen_args += 2;
|
1131 |
} else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) |
1132 |
&& temps[args[3]].state == TCG_TEMP_CONST
|
1133 |
&& temps[args[4]].state == TCG_TEMP_CONST
|
1134 |
&& temps[args[3]].val == 0 |
1135 |
&& temps[args[4]].val == 0) { |
1136 |
/* Simplify LT/GE comparisons vs zero to a single compare
|
1137 |
vs the high word of the input. */
|
1138 |
s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; |
1139 |
reset_temp(args[0]);
|
1140 |
gen_args[0] = args[0]; |
1141 |
gen_args[1] = args[2]; |
1142 |
gen_args[2] = args[4]; |
1143 |
gen_args[3] = args[5]; |
1144 |
gen_args += 4;
|
1145 |
} else {
|
1146 |
goto do_default;
|
1147 |
} |
1148 |
args += 6;
|
1149 |
break;
|
1150 |
|
1151 |
case INDEX_op_call:
|
1152 |
nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); |
1153 |
if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS | |
1154 |
TCG_CALL_NO_WRITE_GLOBALS))) { |
1155 |
for (i = 0; i < nb_globals; i++) { |
1156 |
reset_temp(i); |
1157 |
} |
1158 |
} |
1159 |
for (i = 0; i < (args[0] >> 16); i++) { |
1160 |
reset_temp(args[i + 1]);
|
1161 |
} |
1162 |
i = nb_call_args + 3;
|
1163 |
while (i) {
|
1164 |
*gen_args = *args; |
1165 |
args++; |
1166 |
gen_args++; |
1167 |
i--; |
1168 |
} |
1169 |
break;
|
1170 |
|
1171 |
default:
|
1172 |
do_default:
|
1173 |
/* Default case: we know nothing about operation (or were unable
|
1174 |
to compute the operation result) so no propagation is done.
|
1175 |
We trash everything if the operation is the end of a basic
|
1176 |
block, otherwise we only trash the output args. "mask" is
|
1177 |
the non-zero bits mask for the first output arg. */
|
1178 |
if (def->flags & TCG_OPF_BB_END) {
|
1179 |
reset_all_temps(nb_temps); |
1180 |
} else {
|
1181 |
for (i = 0; i < def->nb_oargs; i++) { |
1182 |
reset_temp(args[i]); |
1183 |
/* Save the corresponding known-zero bits mask for the
|
1184 |
first output argument (only one supported so far). */
|
1185 |
if (i == 0) { |
1186 |
temps[args[i]].mask = mask; |
1187 |
} |
1188 |
} |
1189 |
} |
1190 |
for (i = 0; i < def->nb_args; i++) { |
1191 |
gen_args[i] = args[i]; |
1192 |
} |
1193 |
args += def->nb_args; |
1194 |
gen_args += def->nb_args; |
1195 |
break;
|
1196 |
} |
1197 |
} |
1198 |
|
1199 |
return gen_args;
|
1200 |
} |
1201 |
|
1202 |
TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, |
1203 |
TCGArg *args, TCGOpDef *tcg_op_defs) |
1204 |
{ |
1205 |
TCGArg *res; |
1206 |
res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); |
1207 |
return res;
|
1208 |
} |