Statistics
| Branch: | Revision:

root / tcg / optimize.c @ c8d70272

History | View | Annotate | Download (37 kB)

1
/*
2
 * Optimizations for Tiny Code Generator for QEMU
3
 *
4
 * Copyright (c) 2010 Samsung Electronics.
5
 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy
8
 * of this software and associated documentation files (the "Software"), to deal
9
 * in the Software without restriction, including without limitation the rights
10
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the Software is
12
 * furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
 * THE SOFTWARE.
24
 */
25

    
26
#include "config.h"
27

    
28
#include <stdlib.h>
29
#include <stdio.h>
30

    
31
#include "qemu-common.h"
32
#include "tcg-op.h"
33

    
34
#define CASE_OP_32_64(x)                        \
35
        glue(glue(case INDEX_op_, x), _i32):    \
36
        glue(glue(case INDEX_op_, x), _i64)
37

    
38
typedef enum {
39
    TCG_TEMP_UNDEF = 0,
40
    TCG_TEMP_CONST,
41
    TCG_TEMP_COPY,
42
} tcg_temp_state;
43

    
44
struct tcg_temp_info {
45
    tcg_temp_state state;
46
    uint16_t prev_copy;
47
    uint16_t next_copy;
48
    tcg_target_ulong val;
49
    tcg_target_ulong mask;
50
};
51

    
52
static struct tcg_temp_info temps[TCG_MAX_TEMPS];
53

    
54
/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
55
   the copy flag from the left temp.  */
56
static void reset_temp(TCGArg temp)
57
{
58
    if (temps[temp].state == TCG_TEMP_COPY) {
59
        if (temps[temp].prev_copy == temps[temp].next_copy) {
60
            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
61
        } else {
62
            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
63
            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
64
        }
65
    }
66
    temps[temp].state = TCG_TEMP_UNDEF;
67
    temps[temp].mask = -1;
68
}
69

    
70
/* Reset all temporaries, given that there are NB_TEMPS of them.  */
71
static void reset_all_temps(int nb_temps)
72
{
73
    int i;
74
    for (i = 0; i < nb_temps; i++) {
75
        temps[i].state = TCG_TEMP_UNDEF;
76
        temps[i].mask = -1;
77
    }
78
}
79

    
80
static int op_bits(TCGOpcode op)
81
{
82
    const TCGOpDef *def = &tcg_op_defs[op];
83
    return def->flags & TCG_OPF_64BIT ? 64 : 32;
84
}
85

    
86
static TCGOpcode op_to_movi(TCGOpcode op)
87
{
88
    switch (op_bits(op)) {
89
    case 32:
90
        return INDEX_op_movi_i32;
91
    case 64:
92
        return INDEX_op_movi_i64;
93
    default:
94
        fprintf(stderr, "op_to_movi: unexpected return value of "
95
                "function op_bits.\n");
96
        tcg_abort();
97
    }
98
}
99

    
100
static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
101
{
102
    TCGArg i;
103

    
104
    /* If this is already a global, we can't do better. */
105
    if (temp < s->nb_globals) {
106
        return temp;
107
    }
108

    
109
    /* Search for a global first. */
110
    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
111
        if (i < s->nb_globals) {
112
            return i;
113
        }
114
    }
115

    
116
    /* If it is a temp, search for a temp local. */
117
    if (!s->temps[temp].temp_local) {
118
        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
119
            if (s->temps[i].temp_local) {
120
                return i;
121
            }
122
        }
123
    }
124

    
125
    /* Failure to find a better representation, return the same temp. */
126
    return temp;
127
}
128

    
129
static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
130
{
131
    TCGArg i;
132

    
133
    if (arg1 == arg2) {
134
        return true;
135
    }
136

    
137
    if (temps[arg1].state != TCG_TEMP_COPY
138
        || temps[arg2].state != TCG_TEMP_COPY) {
139
        return false;
140
    }
141

    
142
    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
143
        if (i == arg2) {
144
            return true;
145
        }
146
    }
147

    
148
    return false;
149
}
150

    
151
static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
152
                            TCGArg dst, TCGArg src)
153
{
154
    reset_temp(dst);
155
    temps[dst].mask = temps[src].mask;
156
    assert(temps[src].state != TCG_TEMP_CONST);
157

    
158
    if (s->temps[src].type == s->temps[dst].type) {
159
        if (temps[src].state != TCG_TEMP_COPY) {
160
            temps[src].state = TCG_TEMP_COPY;
161
            temps[src].next_copy = src;
162
            temps[src].prev_copy = src;
163
        }
164
        temps[dst].state = TCG_TEMP_COPY;
165
        temps[dst].next_copy = temps[src].next_copy;
166
        temps[dst].prev_copy = src;
167
        temps[temps[dst].next_copy].prev_copy = dst;
168
        temps[src].next_copy = dst;
169
    }
170

    
171
    gen_args[0] = dst;
172
    gen_args[1] = src;
173
}
174

    
175
static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
176
{
177
    reset_temp(dst);
178
    temps[dst].state = TCG_TEMP_CONST;
179
    temps[dst].val = val;
180
    temps[dst].mask = val;
181
    gen_args[0] = dst;
182
    gen_args[1] = val;
183
}
184

    
185
static TCGOpcode op_to_mov(TCGOpcode op)
186
{
187
    switch (op_bits(op)) {
188
    case 32:
189
        return INDEX_op_mov_i32;
190
    case 64:
191
        return INDEX_op_mov_i64;
192
    default:
193
        fprintf(stderr, "op_to_mov: unexpected return value of "
194
                "function op_bits.\n");
195
        tcg_abort();
196
    }
197
}
198

    
199
static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
200
{
201
    uint64_t l64, h64;
202

    
203
    switch (op) {
204
    CASE_OP_32_64(add):
205
        return x + y;
206

    
207
    CASE_OP_32_64(sub):
208
        return x - y;
209

    
210
    CASE_OP_32_64(mul):
211
        return x * y;
212

    
213
    CASE_OP_32_64(and):
214
        return x & y;
215

    
216
    CASE_OP_32_64(or):
217
        return x | y;
218

    
219
    CASE_OP_32_64(xor):
220
        return x ^ y;
221

    
222
    case INDEX_op_shl_i32:
223
        return (uint32_t)x << (uint32_t)y;
224

    
225
    case INDEX_op_shl_i64:
226
        return (uint64_t)x << (uint64_t)y;
227

    
228
    case INDEX_op_shr_i32:
229
        return (uint32_t)x >> (uint32_t)y;
230

    
231
    case INDEX_op_shr_i64:
232
        return (uint64_t)x >> (uint64_t)y;
233

    
234
    case INDEX_op_sar_i32:
235
        return (int32_t)x >> (int32_t)y;
236

    
237
    case INDEX_op_sar_i64:
238
        return (int64_t)x >> (int64_t)y;
239

    
240
    case INDEX_op_rotr_i32:
241
        return ror32(x, y);
242

    
243
    case INDEX_op_rotr_i64:
244
        return ror64(x, y);
245

    
246
    case INDEX_op_rotl_i32:
247
        return rol32(x, y);
248

    
249
    case INDEX_op_rotl_i64:
250
        return rol64(x, y);
251

    
252
    CASE_OP_32_64(not):
253
        return ~x;
254

    
255
    CASE_OP_32_64(neg):
256
        return -x;
257

    
258
    CASE_OP_32_64(andc):
259
        return x & ~y;
260

    
261
    CASE_OP_32_64(orc):
262
        return x | ~y;
263

    
264
    CASE_OP_32_64(eqv):
265
        return ~(x ^ y);
266

    
267
    CASE_OP_32_64(nand):
268
        return ~(x & y);
269

    
270
    CASE_OP_32_64(nor):
271
        return ~(x | y);
272

    
273
    CASE_OP_32_64(ext8s):
274
        return (int8_t)x;
275

    
276
    CASE_OP_32_64(ext16s):
277
        return (int16_t)x;
278

    
279
    CASE_OP_32_64(ext8u):
280
        return (uint8_t)x;
281

    
282
    CASE_OP_32_64(ext16u):
283
        return (uint16_t)x;
284

    
285
    case INDEX_op_ext32s_i64:
286
        return (int32_t)x;
287

    
288
    case INDEX_op_ext32u_i64:
289
        return (uint32_t)x;
290

    
291
    case INDEX_op_muluh_i32:
292
        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
293
    case INDEX_op_mulsh_i32:
294
        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
295

    
296
    case INDEX_op_muluh_i64:
297
        mulu64(&l64, &h64, x, y);
298
        return h64;
299
    case INDEX_op_mulsh_i64:
300
        muls64(&l64, &h64, x, y);
301
        return h64;
302

    
303
    case INDEX_op_div_i32:
304
        /* Avoid crashing on divide by zero, otherwise undefined.  */
305
        return (int32_t)x / ((int32_t)y ? : 1);
306
    case INDEX_op_divu_i32:
307
        return (uint32_t)x / ((uint32_t)y ? : 1);
308
    case INDEX_op_div_i64:
309
        return (int64_t)x / ((int64_t)y ? : 1);
310
    case INDEX_op_divu_i64:
311
        return (uint64_t)x / ((uint64_t)y ? : 1);
312

    
313
    case INDEX_op_rem_i32:
314
        return (int32_t)x % ((int32_t)y ? : 1);
315
    case INDEX_op_remu_i32:
316
        return (uint32_t)x % ((uint32_t)y ? : 1);
317
    case INDEX_op_rem_i64:
318
        return (int64_t)x % ((int64_t)y ? : 1);
319
    case INDEX_op_remu_i64:
320
        return (uint64_t)x % ((uint64_t)y ? : 1);
321

    
322
    default:
323
        fprintf(stderr,
324
                "Unrecognized operation %d in do_constant_folding.\n", op);
325
        tcg_abort();
326
    }
327
}
328

    
329
static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
330
{
331
    TCGArg res = do_constant_folding_2(op, x, y);
332
    if (op_bits(op) == 32) {
333
        res &= 0xffffffff;
334
    }
335
    return res;
336
}
337

    
338
static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
339
{
340
    switch (c) {
341
    case TCG_COND_EQ:
342
        return x == y;
343
    case TCG_COND_NE:
344
        return x != y;
345
    case TCG_COND_LT:
346
        return (int32_t)x < (int32_t)y;
347
    case TCG_COND_GE:
348
        return (int32_t)x >= (int32_t)y;
349
    case TCG_COND_LE:
350
        return (int32_t)x <= (int32_t)y;
351
    case TCG_COND_GT:
352
        return (int32_t)x > (int32_t)y;
353
    case TCG_COND_LTU:
354
        return x < y;
355
    case TCG_COND_GEU:
356
        return x >= y;
357
    case TCG_COND_LEU:
358
        return x <= y;
359
    case TCG_COND_GTU:
360
        return x > y;
361
    default:
362
        tcg_abort();
363
    }
364
}
365

    
366
static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
367
{
368
    switch (c) {
369
    case TCG_COND_EQ:
370
        return x == y;
371
    case TCG_COND_NE:
372
        return x != y;
373
    case TCG_COND_LT:
374
        return (int64_t)x < (int64_t)y;
375
    case TCG_COND_GE:
376
        return (int64_t)x >= (int64_t)y;
377
    case TCG_COND_LE:
378
        return (int64_t)x <= (int64_t)y;
379
    case TCG_COND_GT:
380
        return (int64_t)x > (int64_t)y;
381
    case TCG_COND_LTU:
382
        return x < y;
383
    case TCG_COND_GEU:
384
        return x >= y;
385
    case TCG_COND_LEU:
386
        return x <= y;
387
    case TCG_COND_GTU:
388
        return x > y;
389
    default:
390
        tcg_abort();
391
    }
392
}
393

    
394
static bool do_constant_folding_cond_eq(TCGCond c)
395
{
396
    switch (c) {
397
    case TCG_COND_GT:
398
    case TCG_COND_LTU:
399
    case TCG_COND_LT:
400
    case TCG_COND_GTU:
401
    case TCG_COND_NE:
402
        return 0;
403
    case TCG_COND_GE:
404
    case TCG_COND_GEU:
405
    case TCG_COND_LE:
406
    case TCG_COND_LEU:
407
    case TCG_COND_EQ:
408
        return 1;
409
    default:
410
        tcg_abort();
411
    }
412
}
413

    
414
/* Return 2 if the condition can't be simplified, and the result
415
   of the condition (0 or 1) if it can */
416
static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
417
                                       TCGArg y, TCGCond c)
418
{
419
    if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
420
        switch (op_bits(op)) {
421
        case 32:
422
            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
423
        case 64:
424
            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
425
        default:
426
            tcg_abort();
427
        }
428
    } else if (temps_are_copies(x, y)) {
429
        return do_constant_folding_cond_eq(c);
430
    } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
431
        switch (c) {
432
        case TCG_COND_LTU:
433
            return 0;
434
        case TCG_COND_GEU:
435
            return 1;
436
        default:
437
            return 2;
438
        }
439
    } else {
440
        return 2;
441
    }
442
}
443

    
444
/* Return 2 if the condition can't be simplified, and the result
445
   of the condition (0 or 1) if it can */
446
static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
447
{
448
    TCGArg al = p1[0], ah = p1[1];
449
    TCGArg bl = p2[0], bh = p2[1];
450

    
451
    if (temps[bl].state == TCG_TEMP_CONST
452
        && temps[bh].state == TCG_TEMP_CONST) {
453
        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
454

    
455
        if (temps[al].state == TCG_TEMP_CONST
456
            && temps[ah].state == TCG_TEMP_CONST) {
457
            uint64_t a;
458
            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
459
            return do_constant_folding_cond_64(a, b, c);
460
        }
461
        if (b == 0) {
462
            switch (c) {
463
            case TCG_COND_LTU:
464
                return 0;
465
            case TCG_COND_GEU:
466
                return 1;
467
            default:
468
                break;
469
            }
470
        }
471
    }
472
    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
473
        return do_constant_folding_cond_eq(c);
474
    }
475
    return 2;
476
}
477

    
478
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
479
{
480
    TCGArg a1 = *p1, a2 = *p2;
481
    int sum = 0;
482
    sum += temps[a1].state == TCG_TEMP_CONST;
483
    sum -= temps[a2].state == TCG_TEMP_CONST;
484

    
485
    /* Prefer the constant in second argument, and then the form
486
       op a, a, b, which is better handled on non-RISC hosts. */
487
    if (sum > 0 || (sum == 0 && dest == a2)) {
488
        *p1 = a2;
489
        *p2 = a1;
490
        return true;
491
    }
492
    return false;
493
}
494

    
495
static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
496
{
497
    int sum = 0;
498
    sum += temps[p1[0]].state == TCG_TEMP_CONST;
499
    sum += temps[p1[1]].state == TCG_TEMP_CONST;
500
    sum -= temps[p2[0]].state == TCG_TEMP_CONST;
501
    sum -= temps[p2[1]].state == TCG_TEMP_CONST;
502
    if (sum > 0) {
503
        TCGArg t;
504
        t = p1[0], p1[0] = p2[0], p2[0] = t;
505
        t = p1[1], p1[1] = p2[1], p2[1] = t;
506
        return true;
507
    }
508
    return false;
509
}
510

    
511
/* Propagate constants and copies, fold constant expressions. */
512
static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
513
                                    TCGArg *args, TCGOpDef *tcg_op_defs)
514
{
515
    int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
516
    tcg_target_ulong mask, affected;
517
    TCGOpcode op;
518
    const TCGOpDef *def;
519
    TCGArg *gen_args;
520
    TCGArg tmp;
521

    
522
    /* Array VALS has an element for each temp.
523
       If this temp holds a constant then its value is kept in VALS' element.
524
       If this temp is a copy of other ones then the other copies are
525
       available through the doubly linked circular list. */
526

    
527
    nb_temps = s->nb_temps;
528
    nb_globals = s->nb_globals;
529
    reset_all_temps(nb_temps);
530

    
531
    nb_ops = tcg_opc_ptr - s->gen_opc_buf;
532
    gen_args = args;
533
    for (op_index = 0; op_index < nb_ops; op_index++) {
534
        op = s->gen_opc_buf[op_index];
535
        def = &tcg_op_defs[op];
536
        /* Do copy propagation */
537
        if (op == INDEX_op_call) {
538
            int nb_oargs = args[0] >> 16;
539
            int nb_iargs = args[0] & 0xffff;
540
            for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
541
                if (temps[args[i]].state == TCG_TEMP_COPY) {
542
                    args[i] = find_better_copy(s, args[i]);
543
                }
544
            }
545
        } else {
546
            for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
547
                if (temps[args[i]].state == TCG_TEMP_COPY) {
548
                    args[i] = find_better_copy(s, args[i]);
549
                }
550
            }
551
        }
552

    
553
        /* For commutative operations make constant second argument */
554
        switch (op) {
555
        CASE_OP_32_64(add):
556
        CASE_OP_32_64(mul):
557
        CASE_OP_32_64(and):
558
        CASE_OP_32_64(or):
559
        CASE_OP_32_64(xor):
560
        CASE_OP_32_64(eqv):
561
        CASE_OP_32_64(nand):
562
        CASE_OP_32_64(nor):
563
        CASE_OP_32_64(muluh):
564
        CASE_OP_32_64(mulsh):
565
            swap_commutative(args[0], &args[1], &args[2]);
566
            break;
567
        CASE_OP_32_64(brcond):
568
            if (swap_commutative(-1, &args[0], &args[1])) {
569
                args[2] = tcg_swap_cond(args[2]);
570
            }
571
            break;
572
        CASE_OP_32_64(setcond):
573
            if (swap_commutative(args[0], &args[1], &args[2])) {
574
                args[3] = tcg_swap_cond(args[3]);
575
            }
576
            break;
577
        CASE_OP_32_64(movcond):
578
            if (swap_commutative(-1, &args[1], &args[2])) {
579
                args[5] = tcg_swap_cond(args[5]);
580
            }
581
            /* For movcond, we canonicalize the "false" input reg to match
582
               the destination reg so that the tcg backend can implement
583
               a "move if true" operation.  */
584
            if (swap_commutative(args[0], &args[4], &args[3])) {
585
                args[5] = tcg_invert_cond(args[5]);
586
            }
587
            break;
588
        CASE_OP_32_64(add2):
589
            swap_commutative(args[0], &args[2], &args[4]);
590
            swap_commutative(args[1], &args[3], &args[5]);
591
            break;
592
        CASE_OP_32_64(mulu2):
593
        CASE_OP_32_64(muls2):
594
            swap_commutative(args[0], &args[2], &args[3]);
595
            break;
596
        case INDEX_op_brcond2_i32:
597
            if (swap_commutative2(&args[0], &args[2])) {
598
                args[4] = tcg_swap_cond(args[4]);
599
            }
600
            break;
601
        case INDEX_op_setcond2_i32:
602
            if (swap_commutative2(&args[1], &args[3])) {
603
                args[5] = tcg_swap_cond(args[5]);
604
            }
605
            break;
606
        default:
607
            break;
608
        }
609

    
610
        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
611
           and "sub r, 0, a => neg r, a" case.  */
612
        switch (op) {
613
        CASE_OP_32_64(shl):
614
        CASE_OP_32_64(shr):
615
        CASE_OP_32_64(sar):
616
        CASE_OP_32_64(rotl):
617
        CASE_OP_32_64(rotr):
618
            if (temps[args[1]].state == TCG_TEMP_CONST
619
                && temps[args[1]].val == 0) {
620
                s->gen_opc_buf[op_index] = op_to_movi(op);
621
                tcg_opt_gen_movi(gen_args, args[0], 0);
622
                args += 3;
623
                gen_args += 2;
624
                continue;
625
            }
626
            break;
627
        CASE_OP_32_64(sub):
628
            {
629
                TCGOpcode neg_op;
630
                bool have_neg;
631

    
632
                if (temps[args[2]].state == TCG_TEMP_CONST) {
633
                    /* Proceed with possible constant folding. */
634
                    break;
635
                }
636
                if (op == INDEX_op_sub_i32) {
637
                    neg_op = INDEX_op_neg_i32;
638
                    have_neg = TCG_TARGET_HAS_neg_i32;
639
                } else {
640
                    neg_op = INDEX_op_neg_i64;
641
                    have_neg = TCG_TARGET_HAS_neg_i64;
642
                }
643
                if (!have_neg) {
644
                    break;
645
                }
646
                if (temps[args[1]].state == TCG_TEMP_CONST
647
                    && temps[args[1]].val == 0) {
648
                    s->gen_opc_buf[op_index] = neg_op;
649
                    reset_temp(args[0]);
650
                    gen_args[0] = args[0];
651
                    gen_args[1] = args[2];
652
                    args += 3;
653
                    gen_args += 2;
654
                    continue;
655
                }
656
            }
657
            break;
658
        default:
659
            break;
660
        }
661

    
662
        /* Simplify expression for "op r, a, 0 => mov r, a" cases */
663
        switch (op) {
664
        CASE_OP_32_64(add):
665
        CASE_OP_32_64(sub):
666
        CASE_OP_32_64(shl):
667
        CASE_OP_32_64(shr):
668
        CASE_OP_32_64(sar):
669
        CASE_OP_32_64(rotl):
670
        CASE_OP_32_64(rotr):
671
        CASE_OP_32_64(or):
672
        CASE_OP_32_64(xor):
673
            if (temps[args[1]].state == TCG_TEMP_CONST) {
674
                /* Proceed with possible constant folding. */
675
                break;
676
            }
677
            if (temps[args[2]].state == TCG_TEMP_CONST
678
                && temps[args[2]].val == 0) {
679
                if (temps_are_copies(args[0], args[1])) {
680
                    s->gen_opc_buf[op_index] = INDEX_op_nop;
681
                } else {
682
                    s->gen_opc_buf[op_index] = op_to_mov(op);
683
                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
684
                    gen_args += 2;
685
                }
686
                args += 3;
687
                continue;
688
            }
689
            break;
690
        default:
691
            break;
692
        }
693

    
694
        /* Simplify using known-zero bits. Currently only ops with a single
695
           output argument is supported. */
696
        mask = -1;
697
        affected = -1;
698
        switch (op) {
699
        CASE_OP_32_64(ext8s):
700
            if ((temps[args[1]].mask & 0x80) != 0) {
701
                break;
702
            }
703
        CASE_OP_32_64(ext8u):
704
            mask = 0xff;
705
            goto and_const;
706
        CASE_OP_32_64(ext16s):
707
            if ((temps[args[1]].mask & 0x8000) != 0) {
708
                break;
709
            }
710
        CASE_OP_32_64(ext16u):
711
            mask = 0xffff;
712
            goto and_const;
713
        case INDEX_op_ext32s_i64:
714
            if ((temps[args[1]].mask & 0x80000000) != 0) {
715
                break;
716
            }
717
        case INDEX_op_ext32u_i64:
718
            mask = 0xffffffffU;
719
            goto and_const;
720

    
721
        CASE_OP_32_64(and):
722
            mask = temps[args[2]].mask;
723
            if (temps[args[2]].state == TCG_TEMP_CONST) {
724
        and_const:
725
                affected = temps[args[1]].mask & ~mask;
726
            }
727
            mask = temps[args[1]].mask & mask;
728
            break;
729

    
730
        case INDEX_op_sar_i32:
731
            if (temps[args[2]].state == TCG_TEMP_CONST) {
732
                mask = (int32_t)temps[args[1]].mask >> temps[args[2]].val;
733
            }
734
            break;
735
        case INDEX_op_sar_i64:
736
            if (temps[args[2]].state == TCG_TEMP_CONST) {
737
                mask = (int64_t)temps[args[1]].mask >> temps[args[2]].val;
738
            }
739
            break;
740

    
741
        case INDEX_op_shr_i32:
742
            if (temps[args[2]].state == TCG_TEMP_CONST) {
743
                mask = (uint32_t)temps[args[1]].mask >> temps[args[2]].val;
744
            }
745
            break;
746
        case INDEX_op_shr_i64:
747
            if (temps[args[2]].state == TCG_TEMP_CONST) {
748
                mask = (uint64_t)temps[args[1]].mask >> temps[args[2]].val;
749
            }
750
            break;
751

    
752
        CASE_OP_32_64(shl):
753
            if (temps[args[2]].state == TCG_TEMP_CONST) {
754
                mask = temps[args[1]].mask << temps[args[2]].val;
755
            }
756
            break;
757

    
758
        CASE_OP_32_64(neg):
759
            /* Set to 1 all bits to the left of the rightmost.  */
760
            mask = -(temps[args[1]].mask & -temps[args[1]].mask);
761
            break;
762

    
763
        CASE_OP_32_64(deposit):
764
            tmp = ((1ull << args[4]) - 1);
765
            mask = ((temps[args[1]].mask & ~(tmp << args[3]))
766
                    | ((temps[args[2]].mask & tmp) << args[3]));
767
            break;
768

    
769
        CASE_OP_32_64(or):
770
        CASE_OP_32_64(xor):
771
            mask = temps[args[1]].mask | temps[args[2]].mask;
772
            break;
773

    
774
        CASE_OP_32_64(setcond):
775
            mask = 1;
776
            break;
777

    
778
        CASE_OP_32_64(movcond):
779
            mask = temps[args[3]].mask | temps[args[4]].mask;
780
            break;
781

    
782
        CASE_OP_32_64(ld8u):
783
        case INDEX_op_qemu_ld8u:
784
            mask = 0xff;
785
            break;
786
        CASE_OP_32_64(ld16u):
787
        case INDEX_op_qemu_ld16u:
788
            mask = 0xffff;
789
            break;
790
        case INDEX_op_ld32u_i64:
791
#if TCG_TARGET_REG_BITS == 64
792
        case INDEX_op_qemu_ld32u:
793
#endif
794
            mask = 0xffffffffu;
795
            break;
796

    
797
        CASE_OP_32_64(qemu_ld):
798
            {
799
                TCGMemOp mop = args[def->nb_oargs + def->nb_iargs];
800
                if (!(mop & MO_SIGN)) {
801
                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
802
                }
803
            }
804
            break;
805

    
806
        default:
807
            break;
808
        }
809

    
810
        /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit
811
           results */
812
        if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) {
813
            mask &= 0xffffffffu;
814
        }
815

    
816
        if (mask == 0) {
817
            assert(def->nb_oargs == 1);
818
            s->gen_opc_buf[op_index] = op_to_movi(op);
819
            tcg_opt_gen_movi(gen_args, args[0], 0);
820
            args += def->nb_oargs + def->nb_iargs + def->nb_cargs;
821
            gen_args += 2;
822
            continue;
823
        }
824
        if (affected == 0) {
825
            assert(def->nb_oargs == 1);
826
            if (temps_are_copies(args[0], args[1])) {
827
                s->gen_opc_buf[op_index] = INDEX_op_nop;
828
            } else if (temps[args[1]].state != TCG_TEMP_CONST) {
829
                s->gen_opc_buf[op_index] = op_to_mov(op);
830
                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
831
                gen_args += 2;
832
            } else {
833
                s->gen_opc_buf[op_index] = op_to_movi(op);
834
                tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val);
835
                gen_args += 2;
836
            }
837
            args += def->nb_iargs + 1;
838
            continue;
839
        }
840

    
841
        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
842
        switch (op) {
843
        CASE_OP_32_64(and):
844
        CASE_OP_32_64(mul):
845
        CASE_OP_32_64(muluh):
846
        CASE_OP_32_64(mulsh):
847
            if ((temps[args[2]].state == TCG_TEMP_CONST
848
                && temps[args[2]].val == 0)) {
849
                s->gen_opc_buf[op_index] = op_to_movi(op);
850
                tcg_opt_gen_movi(gen_args, args[0], 0);
851
                args += 3;
852
                gen_args += 2;
853
                continue;
854
            }
855
            break;
856
        default:
857
            break;
858
        }
859

    
860
        /* Simplify expression for "op r, a, a => mov r, a" cases */
861
        switch (op) {
862
        CASE_OP_32_64(or):
863
        CASE_OP_32_64(and):
864
            if (temps_are_copies(args[1], args[2])) {
865
                if (temps_are_copies(args[0], args[1])) {
866
                    s->gen_opc_buf[op_index] = INDEX_op_nop;
867
                } else {
868
                    s->gen_opc_buf[op_index] = op_to_mov(op);
869
                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
870
                    gen_args += 2;
871
                }
872
                args += 3;
873
                continue;
874
            }
875
            break;
876
        default:
877
            break;
878
        }
879

    
880
        /* Simplify expression for "op r, a, a => movi r, 0" cases */
881
        switch (op) {
882
        CASE_OP_32_64(sub):
883
        CASE_OP_32_64(xor):
884
            if (temps_are_copies(args[1], args[2])) {
885
                s->gen_opc_buf[op_index] = op_to_movi(op);
886
                tcg_opt_gen_movi(gen_args, args[0], 0);
887
                gen_args += 2;
888
                args += 3;
889
                continue;
890
            }
891
            break;
892
        default:
893
            break;
894
        }
895

    
896
        /* Propagate constants through copy operations and do constant
897
           folding.  Constants will be substituted to arguments by register
898
           allocator where needed and possible.  Also detect copies. */
899
        switch (op) {
900
        CASE_OP_32_64(mov):
901
            if (temps_are_copies(args[0], args[1])) {
902
                args += 2;
903
                s->gen_opc_buf[op_index] = INDEX_op_nop;
904
                break;
905
            }
906
            if (temps[args[1]].state != TCG_TEMP_CONST) {
907
                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
908
                gen_args += 2;
909
                args += 2;
910
                break;
911
            }
912
            /* Source argument is constant.  Rewrite the operation and
913
               let movi case handle it. */
914
            op = op_to_movi(op);
915
            s->gen_opc_buf[op_index] = op;
916
            args[1] = temps[args[1]].val;
917
            /* fallthrough */
918
        CASE_OP_32_64(movi):
919
            tcg_opt_gen_movi(gen_args, args[0], args[1]);
920
            gen_args += 2;
921
            args += 2;
922
            break;
923

    
924
        CASE_OP_32_64(not):
925
        CASE_OP_32_64(neg):
926
        CASE_OP_32_64(ext8s):
927
        CASE_OP_32_64(ext8u):
928
        CASE_OP_32_64(ext16s):
929
        CASE_OP_32_64(ext16u):
930
        case INDEX_op_ext32s_i64:
931
        case INDEX_op_ext32u_i64:
932
            if (temps[args[1]].state == TCG_TEMP_CONST) {
933
                s->gen_opc_buf[op_index] = op_to_movi(op);
934
                tmp = do_constant_folding(op, temps[args[1]].val, 0);
935
                tcg_opt_gen_movi(gen_args, args[0], tmp);
936
                gen_args += 2;
937
                args += 2;
938
                break;
939
            }
940
            goto do_default;
941

    
942
        CASE_OP_32_64(add):
943
        CASE_OP_32_64(sub):
944
        CASE_OP_32_64(mul):
945
        CASE_OP_32_64(or):
946
        CASE_OP_32_64(and):
947
        CASE_OP_32_64(xor):
948
        CASE_OP_32_64(shl):
949
        CASE_OP_32_64(shr):
950
        CASE_OP_32_64(sar):
951
        CASE_OP_32_64(rotl):
952
        CASE_OP_32_64(rotr):
953
        CASE_OP_32_64(andc):
954
        CASE_OP_32_64(orc):
955
        CASE_OP_32_64(eqv):
956
        CASE_OP_32_64(nand):
957
        CASE_OP_32_64(nor):
958
        CASE_OP_32_64(muluh):
959
        CASE_OP_32_64(mulsh):
960
        CASE_OP_32_64(div):
961
        CASE_OP_32_64(divu):
962
        CASE_OP_32_64(rem):
963
        CASE_OP_32_64(remu):
964
            if (temps[args[1]].state == TCG_TEMP_CONST
965
                && temps[args[2]].state == TCG_TEMP_CONST) {
966
                s->gen_opc_buf[op_index] = op_to_movi(op);
967
                tmp = do_constant_folding(op, temps[args[1]].val,
968
                                          temps[args[2]].val);
969
                tcg_opt_gen_movi(gen_args, args[0], tmp);
970
                gen_args += 2;
971
                args += 3;
972
                break;
973
            }
974
            goto do_default;
975

    
976
        CASE_OP_32_64(deposit):
977
            if (temps[args[1]].state == TCG_TEMP_CONST
978
                && temps[args[2]].state == TCG_TEMP_CONST) {
979
                s->gen_opc_buf[op_index] = op_to_movi(op);
980
                tmp = ((1ull << args[4]) - 1);
981
                tmp = (temps[args[1]].val & ~(tmp << args[3]))
982
                      | ((temps[args[2]].val & tmp) << args[3]);
983
                tcg_opt_gen_movi(gen_args, args[0], tmp);
984
                gen_args += 2;
985
                args += 5;
986
                break;
987
            }
988
            goto do_default;
989

    
990
        CASE_OP_32_64(setcond):
991
            tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
992
            if (tmp != 2) {
993
                s->gen_opc_buf[op_index] = op_to_movi(op);
994
                tcg_opt_gen_movi(gen_args, args[0], tmp);
995
                gen_args += 2;
996
                args += 4;
997
                break;
998
            }
999
            goto do_default;
1000

    
1001
        CASE_OP_32_64(brcond):
1002
            tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
1003
            if (tmp != 2) {
1004
                if (tmp) {
1005
                    reset_all_temps(nb_temps);
1006
                    s->gen_opc_buf[op_index] = INDEX_op_br;
1007
                    gen_args[0] = args[3];
1008
                    gen_args += 1;
1009
                } else {
1010
                    s->gen_opc_buf[op_index] = INDEX_op_nop;
1011
                }
1012
                args += 4;
1013
                break;
1014
            }
1015
            goto do_default;
1016

    
1017
        CASE_OP_32_64(movcond):
1018
            tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
1019
            if (tmp != 2) {
1020
                if (temps_are_copies(args[0], args[4-tmp])) {
1021
                    s->gen_opc_buf[op_index] = INDEX_op_nop;
1022
                } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
1023
                    s->gen_opc_buf[op_index] = op_to_movi(op);
1024
                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
1025
                    gen_args += 2;
1026
                } else {
1027
                    s->gen_opc_buf[op_index] = op_to_mov(op);
1028
                    tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
1029
                    gen_args += 2;
1030
                }
1031
                args += 6;
1032
                break;
1033
            }
1034
            goto do_default;
1035

    
1036
        case INDEX_op_add2_i32:
1037
        case INDEX_op_sub2_i32:
1038
            if (temps[args[2]].state == TCG_TEMP_CONST
1039
                && temps[args[3]].state == TCG_TEMP_CONST
1040
                && temps[args[4]].state == TCG_TEMP_CONST
1041
                && temps[args[5]].state == TCG_TEMP_CONST) {
1042
                uint32_t al = temps[args[2]].val;
1043
                uint32_t ah = temps[args[3]].val;
1044
                uint32_t bl = temps[args[4]].val;
1045
                uint32_t bh = temps[args[5]].val;
1046
                uint64_t a = ((uint64_t)ah << 32) | al;
1047
                uint64_t b = ((uint64_t)bh << 32) | bl;
1048
                TCGArg rl, rh;
1049

    
1050
                if (op == INDEX_op_add2_i32) {
1051
                    a += b;
1052
                } else {
1053
                    a -= b;
1054
                }
1055

    
1056
                /* We emit the extra nop when we emit the add2/sub2.  */
1057
                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1058

    
1059
                rl = args[0];
1060
                rh = args[1];
1061
                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
1062
                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
1063
                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
1064
                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
1065
                gen_args += 4;
1066
                args += 6;
1067
                break;
1068
            }
1069
            goto do_default;
1070

    
1071
        case INDEX_op_mulu2_i32:
1072
            if (temps[args[2]].state == TCG_TEMP_CONST
1073
                && temps[args[3]].state == TCG_TEMP_CONST) {
1074
                uint32_t a = temps[args[2]].val;
1075
                uint32_t b = temps[args[3]].val;
1076
                uint64_t r = (uint64_t)a * b;
1077
                TCGArg rl, rh;
1078

    
1079
                /* We emit the extra nop when we emit the mulu2.  */
1080
                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1081

    
1082
                rl = args[0];
1083
                rh = args[1];
1084
                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
1085
                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
1086
                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
1087
                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
1088
                gen_args += 4;
1089
                args += 4;
1090
                break;
1091
            }
1092
            goto do_default;
1093

    
1094
        case INDEX_op_brcond2_i32:
1095
            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
1096
            if (tmp != 2) {
1097
                if (tmp) {
1098
                    reset_all_temps(nb_temps);
1099
                    s->gen_opc_buf[op_index] = INDEX_op_br;
1100
                    gen_args[0] = args[5];
1101
                    gen_args += 1;
1102
                } else {
1103
                    s->gen_opc_buf[op_index] = INDEX_op_nop;
1104
                }
1105
            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
1106
                       && temps[args[2]].state == TCG_TEMP_CONST
1107
                       && temps[args[3]].state == TCG_TEMP_CONST
1108
                       && temps[args[2]].val == 0
1109
                       && temps[args[3]].val == 0) {
1110
                /* Simplify LT/GE comparisons vs zero to a single compare
1111
                   vs the high word of the input.  */
1112
                reset_all_temps(nb_temps);
1113
                s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
1114
                gen_args[0] = args[1];
1115
                gen_args[1] = args[3];
1116
                gen_args[2] = args[4];
1117
                gen_args[3] = args[5];
1118
                gen_args += 4;
1119
            } else {
1120
                goto do_default;
1121
            }
1122
            args += 6;
1123
            break;
1124

    
1125
        case INDEX_op_setcond2_i32:
1126
            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
1127
            if (tmp != 2) {
1128
                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
1129
                tcg_opt_gen_movi(gen_args, args[0], tmp);
1130
                gen_args += 2;
1131
            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
1132
                       && temps[args[3]].state == TCG_TEMP_CONST
1133
                       && temps[args[4]].state == TCG_TEMP_CONST
1134
                       && temps[args[3]].val == 0
1135
                       && temps[args[4]].val == 0) {
1136
                /* Simplify LT/GE comparisons vs zero to a single compare
1137
                   vs the high word of the input.  */
1138
                s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
1139
                reset_temp(args[0]);
1140
                gen_args[0] = args[0];
1141
                gen_args[1] = args[2];
1142
                gen_args[2] = args[4];
1143
                gen_args[3] = args[5];
1144
                gen_args += 4;
1145
            } else {
1146
                goto do_default;
1147
            }
1148
            args += 6;
1149
            break;
1150

    
1151
        case INDEX_op_call:
1152
            nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
1153
            if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
1154
                                            TCG_CALL_NO_WRITE_GLOBALS))) {
1155
                for (i = 0; i < nb_globals; i++) {
1156
                    reset_temp(i);
1157
                }
1158
            }
1159
            for (i = 0; i < (args[0] >> 16); i++) {
1160
                reset_temp(args[i + 1]);
1161
            }
1162
            i = nb_call_args + 3;
1163
            while (i) {
1164
                *gen_args = *args;
1165
                args++;
1166
                gen_args++;
1167
                i--;
1168
            }
1169
            break;
1170

    
1171
        default:
1172
        do_default:
1173
            /* Default case: we know nothing about operation (or were unable
1174
               to compute the operation result) so no propagation is done.
1175
               We trash everything if the operation is the end of a basic
1176
               block, otherwise we only trash the output args.  "mask" is
1177
               the non-zero bits mask for the first output arg.  */
1178
            if (def->flags & TCG_OPF_BB_END) {
1179
                reset_all_temps(nb_temps);
1180
            } else {
1181
                for (i = 0; i < def->nb_oargs; i++) {
1182
                    reset_temp(args[i]);
1183
                    /* Save the corresponding known-zero bits mask for the
1184
                       first output argument (only one supported so far). */
1185
                    if (i == 0) {
1186
                        temps[args[i]].mask = mask;
1187
                    }
1188
                }
1189
            }
1190
            for (i = 0; i < def->nb_args; i++) {
1191
                gen_args[i] = args[i];
1192
            }
1193
            args += def->nb_args;
1194
            gen_args += def->nb_args;
1195
            break;
1196
        }
1197
    }
1198

    
1199
    return gen_args;
1200
}
1201

    
1202
TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
1203
        TCGArg *args, TCGOpDef *tcg_op_defs)
1204
{
1205
    TCGArg *res;
1206
    res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
1207
    return res;
1208
}