Statistics
| Branch: | Revision:

root / hw / milkymist-pfpu.c @ a8170e5e

History | View | Annotate | Download (14.3 kB)

1
/*
2
 *  QEMU model of the Milkymist programmable FPU.
3
 *
4
 *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 *
19
 *
20
 * Specification available at:
21
 *   http://www.milkymist.org/socdoc/pfpu.pdf
22
 *
23
 */
24

    
25
#include "hw.h"
26
#include "sysbus.h"
27
#include "trace.h"
28
#include "qemu-log.h"
29
#include "qemu-error.h"
30
#include <math.h>
31

    
32
/* #define TRACE_EXEC */
33

    
34
#ifdef TRACE_EXEC
35
#    define D_EXEC(x) x
36
#else
37
#    define D_EXEC(x)
38
#endif
39

    
40
enum {
41
    R_CTL = 0,
42
    R_MESHBASE,
43
    R_HMESHLAST,
44
    R_VMESHLAST,
45
    R_CODEPAGE,
46
    R_VERTICES,
47
    R_COLLISIONS,
48
    R_STRAYWRITES,
49
    R_LASTDMA,
50
    R_PC,
51
    R_DREGBASE,
52
    R_CODEBASE,
53
    R_MAX
54
};
55

    
56
enum {
57
    CTL_START_BUSY = (1<<0),
58
};
59

    
60
enum {
61
    OP_NOP = 0,
62
    OP_FADD,
63
    OP_FSUB,
64
    OP_FMUL,
65
    OP_FABS,
66
    OP_F2I,
67
    OP_I2F,
68
    OP_VECTOUT,
69
    OP_SIN,
70
    OP_COS,
71
    OP_ABOVE,
72
    OP_EQUAL,
73
    OP_COPY,
74
    OP_IF,
75
    OP_TSIGN,
76
    OP_QUAKE,
77
};
78

    
79
enum {
80
    GPR_X = 0,
81
    GPR_Y = 1,
82
    GPR_FLAGS = 2,
83
};
84

    
85
enum {
86
    LATENCY_FADD = 5,
87
    LATENCY_FSUB = 5,
88
    LATENCY_FMUL = 7,
89
    LATENCY_FABS = 2,
90
    LATENCY_F2I = 2,
91
    LATENCY_I2F = 3,
92
    LATENCY_VECTOUT = 0,
93
    LATENCY_SIN = 4,
94
    LATENCY_COS = 4,
95
    LATENCY_ABOVE = 2,
96
    LATENCY_EQUAL = 2,
97
    LATENCY_COPY = 2,
98
    LATENCY_IF = 2,
99
    LATENCY_TSIGN = 2,
100
    LATENCY_QUAKE = 2,
101
    MAX_LATENCY = 7
102
};
103

    
104
#define GPR_BEGIN       0x100
105
#define GPR_END         0x17f
106
#define MICROCODE_BEGIN 0x200
107
#define MICROCODE_END   0x3ff
108
#define MICROCODE_WORDS 2048
109

    
110
#define REINTERPRET_CAST(type, val) (*((type *)&(val)))
111

    
112
#ifdef TRACE_EXEC
113
static const char *opcode_to_str[] = {
114
    "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
115
    "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
116
};
117
#endif
118

    
119
struct MilkymistPFPUState {
120
    SysBusDevice busdev;
121
    MemoryRegion regs_region;
122
    CharDriverState *chr;
123
    qemu_irq irq;
124

    
125
    uint32_t regs[R_MAX];
126
    uint32_t gp_regs[128];
127
    uint32_t microcode[MICROCODE_WORDS];
128

    
129
    int output_queue_pos;
130
    uint32_t output_queue[MAX_LATENCY];
131
};
132
typedef struct MilkymistPFPUState MilkymistPFPUState;
133

    
134
static inline hwaddr
135
get_dma_address(uint32_t base, uint32_t x, uint32_t y)
136
{
137
    return base + 8 * (128 * y + x);
138
}
139

    
140
static inline void
141
output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
142
{
143
    s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
144
}
145

    
146
static inline uint32_t
147
output_queue_remove(MilkymistPFPUState *s)
148
{
149
    return s->output_queue[s->output_queue_pos];
150
}
151

    
152
static inline void
153
output_queue_advance(MilkymistPFPUState *s)
154
{
155
    s->output_queue[s->output_queue_pos] = 0;
156
    s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
157
}
158

    
159
static int pfpu_decode_insn(MilkymistPFPUState *s)
160
{
161
    uint32_t pc = s->regs[R_PC];
162
    uint32_t insn = s->microcode[pc];
163
    uint32_t reg_a = (insn >> 18) & 0x7f;
164
    uint32_t reg_b = (insn >> 11) & 0x7f;
165
    uint32_t op = (insn >> 7) & 0xf;
166
    uint32_t reg_d = insn & 0x7f;
167
    uint32_t r = 0;
168
    int latency = 0;
169

    
170
    switch (op) {
171
    case OP_NOP:
172
        break;
173
    case OP_FADD:
174
    {
175
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
176
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
177
        float t = a + b;
178
        r = REINTERPRET_CAST(uint32_t, t);
179
        latency = LATENCY_FADD;
180
        D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
181
    } break;
182
    case OP_FSUB:
183
    {
184
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
185
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
186
        float t = a - b;
187
        r = REINTERPRET_CAST(uint32_t, t);
188
        latency = LATENCY_FSUB;
189
        D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
190
    } break;
191
    case OP_FMUL:
192
    {
193
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
194
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
195
        float t = a * b;
196
        r = REINTERPRET_CAST(uint32_t, t);
197
        latency = LATENCY_FMUL;
198
        D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
199
    } break;
200
    case OP_FABS:
201
    {
202
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
203
        float t = fabsf(a);
204
        r = REINTERPRET_CAST(uint32_t, t);
205
        latency = LATENCY_FABS;
206
        D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
207
    } break;
208
    case OP_F2I:
209
    {
210
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
211
        int32_t t = a;
212
        r = REINTERPRET_CAST(uint32_t, t);
213
        latency = LATENCY_F2I;
214
        D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
215
    } break;
216
    case OP_I2F:
217
    {
218
        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
219
        float t = a;
220
        r = REINTERPRET_CAST(uint32_t, t);
221
        latency = LATENCY_I2F;
222
        D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
223
    } break;
224
    case OP_VECTOUT:
225
    {
226
        uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
227
        uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
228
        hwaddr dma_ptr =
229
            get_dma_address(s->regs[R_MESHBASE],
230
                    s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
231
        cpu_physical_memory_write(dma_ptr, (uint8_t *)&a, 4);
232
        cpu_physical_memory_write(dma_ptr + 4, (uint8_t *)&b, 4);
233
        s->regs[R_LASTDMA] = dma_ptr + 4;
234
        D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
235
        trace_milkymist_pfpu_vectout(a, b, dma_ptr);
236
    } break;
237
    case OP_SIN:
238
    {
239
        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
240
        float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
241
        r = REINTERPRET_CAST(uint32_t, t);
242
        latency = LATENCY_SIN;
243
        D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
244
    } break;
245
    case OP_COS:
246
    {
247
        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
248
        float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
249
        r = REINTERPRET_CAST(uint32_t, t);
250
        latency = LATENCY_COS;
251
        D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
252
    } break;
253
    case OP_ABOVE:
254
    {
255
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
256
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
257
        float t = (a > b) ? 1.0f : 0.0f;
258
        r = REINTERPRET_CAST(uint32_t, t);
259
        latency = LATENCY_ABOVE;
260
        D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
261
    } break;
262
    case OP_EQUAL:
263
    {
264
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
265
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
266
        float t = (a == b) ? 1.0f : 0.0f;
267
        r = REINTERPRET_CAST(uint32_t, t);
268
        latency = LATENCY_EQUAL;
269
        D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
270
    } break;
271
    case OP_COPY:
272
    {
273
        r = s->gp_regs[reg_a];
274
        latency = LATENCY_COPY;
275
        D_EXEC(qemu_log("COPY"));
276
    } break;
277
    case OP_IF:
278
    {
279
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
280
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
281
        uint32_t f = s->gp_regs[GPR_FLAGS];
282
        float t = (f != 0) ? a : b;
283
        r = REINTERPRET_CAST(uint32_t, t);
284
        latency = LATENCY_IF;
285
        D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
286
    } break;
287
    case OP_TSIGN:
288
    {
289
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
290
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
291
        float t = (b < 0) ? -a : a;
292
        r = REINTERPRET_CAST(uint32_t, t);
293
        latency = LATENCY_TSIGN;
294
        D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
295
    } break;
296
    case OP_QUAKE:
297
    {
298
        uint32_t a = s->gp_regs[reg_a];
299
        r = 0x5f3759df - (a >> 1);
300
        latency = LATENCY_QUAKE;
301
        D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
302
    } break;
303

    
304
    default:
305
        error_report("milkymist_pfpu: unknown opcode %d", op);
306
        break;
307
    }
308

    
309
    if (!reg_d) {
310
        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
311
                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
312
                    s->regs[R_PC] + latency));
313
    } else {
314
        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
315
                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
316
                    s->regs[R_PC] + latency, reg_d));
317
    }
318

    
319
    if (op == OP_VECTOUT) {
320
        return 0;
321
    }
322

    
323
    /* store output for this cycle */
324
    if (reg_d) {
325
        uint32_t val = output_queue_remove(s);
326
        D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
327
        s->gp_regs[reg_d] = val;
328
    }
329

    
330
    output_queue_advance(s);
331

    
332
    /* store op output */
333
    if (op != OP_NOP) {
334
        output_queue_insert(s, r, latency-1);
335
    }
336

    
337
    /* advance PC */
338
    s->regs[R_PC]++;
339

    
340
    return 1;
341
};
342

    
343
static void pfpu_start(MilkymistPFPUState *s)
344
{
345
    int x, y;
346
    int i;
347

    
348
    for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
349
        for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
350
            D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
351

    
352
            /* set current position */
353
            s->gp_regs[GPR_X] = x;
354
            s->gp_regs[GPR_Y] = y;
355

    
356
            /* run microcode on this position */
357
            i = 0;
358
            while (pfpu_decode_insn(s)) {
359
                /* decode at most MICROCODE_WORDS instructions */
360
                if (i++ >= MICROCODE_WORDS) {
361
                    error_report("milkymist_pfpu: too many instructions "
362
                            "executed in microcode. No VECTOUT?");
363
                    break;
364
                }
365
            }
366

    
367
            /* reset pc for next run */
368
            s->regs[R_PC] = 0;
369
        }
370
    }
371

    
372
    s->regs[R_VERTICES] = x * y;
373

    
374
    trace_milkymist_pfpu_pulse_irq();
375
    qemu_irq_pulse(s->irq);
376
}
377

    
378
static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
379
{
380
    return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
381
}
382

    
383
static uint64_t pfpu_read(void *opaque, hwaddr addr,
384
                          unsigned size)
385
{
386
    MilkymistPFPUState *s = opaque;
387
    uint32_t r = 0;
388

    
389
    addr >>= 2;
390
    switch (addr) {
391
    case R_CTL:
392
    case R_MESHBASE:
393
    case R_HMESHLAST:
394
    case R_VMESHLAST:
395
    case R_CODEPAGE:
396
    case R_VERTICES:
397
    case R_COLLISIONS:
398
    case R_STRAYWRITES:
399
    case R_LASTDMA:
400
    case R_PC:
401
    case R_DREGBASE:
402
    case R_CODEBASE:
403
        r = s->regs[addr];
404
        break;
405
    case GPR_BEGIN ... GPR_END:
406
        r = s->gp_regs[addr - GPR_BEGIN];
407
        break;
408
    case MICROCODE_BEGIN ...  MICROCODE_END:
409
        r = s->microcode[get_microcode_address(s, addr)];
410
        break;
411

    
412
    default:
413
        error_report("milkymist_pfpu: read access to unknown register 0x"
414
                TARGET_FMT_plx, addr << 2);
415
        break;
416
    }
417

    
418
    trace_milkymist_pfpu_memory_read(addr << 2, r);
419

    
420
    return r;
421
}
422

    
423
static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
424
                       unsigned size)
425
{
426
    MilkymistPFPUState *s = opaque;
427

    
428
    trace_milkymist_pfpu_memory_write(addr, value);
429

    
430
    addr >>= 2;
431
    switch (addr) {
432
    case R_CTL:
433
        if (value & CTL_START_BUSY) {
434
            pfpu_start(s);
435
        }
436
        break;
437
    case R_MESHBASE:
438
    case R_HMESHLAST:
439
    case R_VMESHLAST:
440
    case R_CODEPAGE:
441
    case R_VERTICES:
442
    case R_COLLISIONS:
443
    case R_STRAYWRITES:
444
    case R_LASTDMA:
445
    case R_PC:
446
    case R_DREGBASE:
447
    case R_CODEBASE:
448
        s->regs[addr] = value;
449
        break;
450
    case GPR_BEGIN ...  GPR_END:
451
        s->gp_regs[addr - GPR_BEGIN] = value;
452
        break;
453
    case MICROCODE_BEGIN ...  MICROCODE_END:
454
        s->microcode[get_microcode_address(s, addr)] = value;
455
        break;
456

    
457
    default:
458
        error_report("milkymist_pfpu: write access to unknown register 0x"
459
                TARGET_FMT_plx, addr << 2);
460
        break;
461
    }
462
}
463

    
464
static const MemoryRegionOps pfpu_mmio_ops = {
465
    .read = pfpu_read,
466
    .write = pfpu_write,
467
    .valid = {
468
        .min_access_size = 4,
469
        .max_access_size = 4,
470
    },
471
    .endianness = DEVICE_NATIVE_ENDIAN,
472
};
473

    
474
static void milkymist_pfpu_reset(DeviceState *d)
475
{
476
    MilkymistPFPUState *s = container_of(d, MilkymistPFPUState, busdev.qdev);
477
    int i;
478

    
479
    for (i = 0; i < R_MAX; i++) {
480
        s->regs[i] = 0;
481
    }
482
    for (i = 0; i < 128; i++) {
483
        s->gp_regs[i] = 0;
484
    }
485
    for (i = 0; i < MICROCODE_WORDS; i++) {
486
        s->microcode[i] = 0;
487
    }
488
    s->output_queue_pos = 0;
489
    for (i = 0; i < MAX_LATENCY; i++) {
490
        s->output_queue[i] = 0;
491
    }
492
}
493

    
494
static int milkymist_pfpu_init(SysBusDevice *dev)
495
{
496
    MilkymistPFPUState *s = FROM_SYSBUS(typeof(*s), dev);
497

    
498
    sysbus_init_irq(dev, &s->irq);
499

    
500
    memory_region_init_io(&s->regs_region, &pfpu_mmio_ops, s,
501
            "milkymist-pfpu", MICROCODE_END * 4);
502
    sysbus_init_mmio(dev, &s->regs_region);
503

    
504
    return 0;
505
}
506

    
507
static const VMStateDescription vmstate_milkymist_pfpu = {
508
    .name = "milkymist-pfpu",
509
    .version_id = 1,
510
    .minimum_version_id = 1,
511
    .minimum_version_id_old = 1,
512
    .fields      = (VMStateField[]) {
513
        VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
514
        VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
515
        VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
516
        VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
517
        VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
518
        VMSTATE_END_OF_LIST()
519
    }
520
};
521

    
522
static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
523
{
524
    DeviceClass *dc = DEVICE_CLASS(klass);
525
    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
526

    
527
    k->init = milkymist_pfpu_init;
528
    dc->reset = milkymist_pfpu_reset;
529
    dc->vmsd = &vmstate_milkymist_pfpu;
530
}
531

    
532
static TypeInfo milkymist_pfpu_info = {
533
    .name          = "milkymist-pfpu",
534
    .parent        = TYPE_SYS_BUS_DEVICE,
535
    .instance_size = sizeof(MilkymistPFPUState),
536
    .class_init    = milkymist_pfpu_class_init,
537
};
538

    
539
static void milkymist_pfpu_register_types(void)
540
{
541
    type_register_static(&milkymist_pfpu_info);
542
}
543

    
544
type_init(milkymist_pfpu_register_types)