Statistics
| Branch: | Revision:

root / hw / milkymist-pfpu.c @ 3e1c0c9a

History | View | Annotate | Download (14 kB)

1
/*
2
 *  QEMU model of the Milkymist programmable FPU.
3
 *
4
 *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 *
19
 *
20
 * Specification available at:
21
 *   http://www.milkymist.org/socdoc/pfpu.pdf
22
 *
23
 */
24

    
25
#include "hw.h"
26
#include "sysbus.h"
27
#include "trace.h"
28
#include "qemu-log.h"
29
#include "qemu-error.h"
30
#include <math.h>
31

    
32
/* #define TRACE_EXEC */
33

    
34
#ifdef TRACE_EXEC
35
#    define D_EXEC(x) x
36
#else
37
#    define D_EXEC(x)
38
#endif
39

    
40
enum {
41
    R_CTL = 0,
42
    R_MESHBASE,
43
    R_HMESHLAST,
44
    R_VMESHLAST,
45
    R_CODEPAGE,
46
    R_VERTICES,
47
    R_COLLISIONS,
48
    R_STRAYWRITES,
49
    R_LASTDMA,
50
    R_PC,
51
    R_DREGBASE,
52
    R_CODEBASE,
53
    R_MAX
54
};
55

    
56
enum {
57
    CTL_START_BUSY = (1<<0),
58
};
59

    
60
enum {
61
    OP_NOP = 0,
62
    OP_FADD,
63
    OP_FSUB,
64
    OP_FMUL,
65
    OP_FABS,
66
    OP_F2I,
67
    OP_I2F,
68
    OP_VECTOUT,
69
    OP_SIN,
70
    OP_COS,
71
    OP_ABOVE,
72
    OP_EQUAL,
73
    OP_COPY,
74
    OP_IF,
75
    OP_TSIGN,
76
    OP_QUAKE,
77
};
78

    
79
enum {
80
    GPR_X = 0,
81
    GPR_Y = 1,
82
    GPR_FLAGS = 2,
83
};
84

    
85
enum {
86
    LATENCY_FADD = 5,
87
    LATENCY_FSUB = 5,
88
    LATENCY_FMUL = 7,
89
    LATENCY_FABS = 2,
90
    LATENCY_F2I = 2,
91
    LATENCY_I2F = 3,
92
    LATENCY_VECTOUT = 0,
93
    LATENCY_SIN = 4,
94
    LATENCY_COS = 4,
95
    LATENCY_ABOVE = 2,
96
    LATENCY_EQUAL = 2,
97
    LATENCY_COPY = 2,
98
    LATENCY_IF = 2,
99
    LATENCY_TSIGN = 2,
100
    LATENCY_QUAKE = 2,
101
    MAX_LATENCY = 7
102
};
103

    
104
#define GPR_BEGIN       0x100
105
#define GPR_END         0x17f
106
#define MICROCODE_BEGIN 0x200
107
#define MICROCODE_END   0x3ff
108
#define MICROCODE_WORDS 2048
109

    
110
#define REINTERPRET_CAST(type, val) (*((type *)&(val)))
111

    
112
#ifdef TRACE_EXEC
113
static const char *opcode_to_str[] = {
114
    "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
115
    "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
116
};
117
#endif
118

    
119
struct MilkymistPFPUState {
120
    SysBusDevice busdev;
121
    CharDriverState *chr;
122
    qemu_irq irq;
123

    
124
    uint32_t regs[R_MAX];
125
    uint32_t gp_regs[128];
126
    uint32_t microcode[MICROCODE_WORDS];
127

    
128
    int output_queue_pos;
129
    uint32_t output_queue[MAX_LATENCY];
130
};
131
typedef struct MilkymistPFPUState MilkymistPFPUState;
132

    
133
static inline target_phys_addr_t
134
get_dma_address(uint32_t base, uint32_t x, uint32_t y)
135
{
136
    return base + 8 * (128 * y + x);
137
}
138

    
139
static inline void
140
output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
141
{
142
    s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
143
}
144

    
145
static inline uint32_t
146
output_queue_remove(MilkymistPFPUState *s)
147
{
148
    return s->output_queue[s->output_queue_pos];
149
}
150

    
151
static inline void
152
output_queue_advance(MilkymistPFPUState *s)
153
{
154
    s->output_queue[s->output_queue_pos] = 0;
155
    s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
156
}
157

    
158
static int pfpu_decode_insn(MilkymistPFPUState *s)
159
{
160
    uint32_t pc = s->regs[R_PC];
161
    uint32_t insn = s->microcode[pc];
162
    uint32_t reg_a = (insn >> 18) & 0x7f;
163
    uint32_t reg_b = (insn >> 11) & 0x7f;
164
    uint32_t op = (insn >> 7) & 0xf;
165
    uint32_t reg_d = insn & 0x7f;
166
    uint32_t r = 0;
167
    int latency = 0;
168

    
169
    switch (op) {
170
    case OP_NOP:
171
        break;
172
    case OP_FADD:
173
    {
174
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
175
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
176
        float t = a + b;
177
        r = REINTERPRET_CAST(uint32_t, t);
178
        latency = LATENCY_FADD;
179
        D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
180
    } break;
181
    case OP_FSUB:
182
    {
183
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
184
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
185
        float t = a - b;
186
        r = REINTERPRET_CAST(uint32_t, t);
187
        latency = LATENCY_FSUB;
188
        D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
189
    } break;
190
    case OP_FMUL:
191
    {
192
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
193
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
194
        float t = a * b;
195
        r = REINTERPRET_CAST(uint32_t, t);
196
        latency = LATENCY_FMUL;
197
        D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
198
    } break;
199
    case OP_FABS:
200
    {
201
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
202
        float t = fabsf(a);
203
        r = REINTERPRET_CAST(uint32_t, t);
204
        latency = LATENCY_FABS;
205
        D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
206
    } break;
207
    case OP_F2I:
208
    {
209
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
210
        int32_t t = a;
211
        r = REINTERPRET_CAST(uint32_t, t);
212
        latency = LATENCY_F2I;
213
        D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
214
    } break;
215
    case OP_I2F:
216
    {
217
        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
218
        float t = a;
219
        r = REINTERPRET_CAST(uint32_t, t);
220
        latency = LATENCY_I2F;
221
        D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
222
    } break;
223
    case OP_VECTOUT:
224
    {
225
        uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
226
        uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
227
        target_phys_addr_t dma_ptr =
228
            get_dma_address(s->regs[R_MESHBASE],
229
                    s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
230
        cpu_physical_memory_write(dma_ptr, (uint8_t *)&a, 4);
231
        cpu_physical_memory_write(dma_ptr + 4, (uint8_t *)&b, 4);
232
        s->regs[R_LASTDMA] = dma_ptr + 4;
233
        D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
234
        trace_milkymist_pfpu_vectout(a, b, dma_ptr);
235
    } break;
236
    case OP_SIN:
237
    {
238
        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
239
        float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
240
        r = REINTERPRET_CAST(uint32_t, t);
241
        latency = LATENCY_SIN;
242
        D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
243
    } break;
244
    case OP_COS:
245
    {
246
        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
247
        float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
248
        r = REINTERPRET_CAST(uint32_t, t);
249
        latency = LATENCY_COS;
250
        D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
251
    } break;
252
    case OP_ABOVE:
253
    {
254
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
255
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
256
        float t = (a > b) ? 1.0f : 0.0f;
257
        r = REINTERPRET_CAST(uint32_t, t);
258
        latency = LATENCY_ABOVE;
259
        D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
260
    } break;
261
    case OP_EQUAL:
262
    {
263
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
264
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
265
        float t = (a == b) ? 1.0f : 0.0f;
266
        r = REINTERPRET_CAST(uint32_t, t);
267
        latency = LATENCY_EQUAL;
268
        D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
269
    } break;
270
    case OP_COPY:
271
    {
272
        r = s->gp_regs[reg_a];
273
        latency = LATENCY_COPY;
274
        D_EXEC(qemu_log("COPY"));
275
    } break;
276
    case OP_IF:
277
    {
278
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
279
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
280
        uint32_t f = s->gp_regs[GPR_FLAGS];
281
        float t = (f != 0) ? a : b;
282
        r = REINTERPRET_CAST(uint32_t, t);
283
        latency = LATENCY_IF;
284
        D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
285
    } break;
286
    case OP_TSIGN:
287
    {
288
        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
289
        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
290
        float t = (b < 0) ? -a : a;
291
        r = REINTERPRET_CAST(uint32_t, t);
292
        latency = LATENCY_TSIGN;
293
        D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
294
    } break;
295
    case OP_QUAKE:
296
    {
297
        uint32_t a = s->gp_regs[reg_a];
298
        r = 0x5f3759df - (a >> 1);
299
        latency = LATENCY_QUAKE;
300
        D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
301
    } break;
302

    
303
    default:
304
        error_report("milkymist_pfpu: unknown opcode %d", op);
305
        break;
306
    }
307

    
308
    if (!reg_d) {
309
        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
310
                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
311
                    s->regs[R_PC] + latency));
312
    } else {
313
        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
314
                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
315
                    s->regs[R_PC] + latency, reg_d));
316
    }
317

    
318
    if (op == OP_VECTOUT) {
319
        return 0;
320
    }
321

    
322
    /* store output for this cycle */
323
    if (reg_d) {
324
        uint32_t val = output_queue_remove(s);
325
        D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
326
        s->gp_regs[reg_d] = val;
327
    }
328

    
329
    output_queue_advance(s);
330

    
331
    /* store op output */
332
    if (op != OP_NOP) {
333
        output_queue_insert(s, r, latency-1);
334
    }
335

    
336
    /* advance PC */
337
    s->regs[R_PC]++;
338

    
339
    return 1;
340
};
341

    
342
static void pfpu_start(MilkymistPFPUState *s)
343
{
344
    int x, y;
345
    int i;
346

    
347
    for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
348
        for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
349
            D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
350

    
351
            /* set current position */
352
            s->gp_regs[GPR_X] = x;
353
            s->gp_regs[GPR_Y] = y;
354

    
355
            /* run microcode on this position */
356
            i = 0;
357
            while (pfpu_decode_insn(s)) {
358
                /* decode at most MICROCODE_WORDS instructions */
359
                if (i++ >= MICROCODE_WORDS) {
360
                    error_report("milkymist_pfpu: too many instructions "
361
                            "executed in microcode. No VECTOUT?");
362
                    break;
363
                }
364
            }
365

    
366
            /* reset pc for next run */
367
            s->regs[R_PC] = 0;
368
        }
369
    }
370

    
371
    s->regs[R_VERTICES] = x * y;
372

    
373
    trace_milkymist_pfpu_pulse_irq();
374
    qemu_irq_pulse(s->irq);
375
}
376

    
377
static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
378
{
379
    return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
380
}
381

    
382
static uint32_t pfpu_read(void *opaque, target_phys_addr_t addr)
383
{
384
    MilkymistPFPUState *s = opaque;
385
    uint32_t r = 0;
386

    
387
    addr >>= 2;
388
    switch (addr) {
389
    case R_CTL:
390
    case R_MESHBASE:
391
    case R_HMESHLAST:
392
    case R_VMESHLAST:
393
    case R_CODEPAGE:
394
    case R_VERTICES:
395
    case R_COLLISIONS:
396
    case R_STRAYWRITES:
397
    case R_LASTDMA:
398
    case R_PC:
399
    case R_DREGBASE:
400
    case R_CODEBASE:
401
        r = s->regs[addr];
402
        break;
403
    case GPR_BEGIN ... GPR_END:
404
        r = s->gp_regs[addr - GPR_BEGIN];
405
        break;
406
    case MICROCODE_BEGIN ...  MICROCODE_END:
407
        r = s->microcode[get_microcode_address(s, addr)];
408
        break;
409

    
410
    default:
411
        error_report("milkymist_pfpu: read access to unknown register 0x"
412
                TARGET_FMT_plx, addr << 2);
413
        break;
414
    }
415

    
416
    trace_milkymist_pfpu_memory_read(addr << 2, r);
417

    
418
    return r;
419
}
420

    
421
static void
422
pfpu_write(void *opaque, target_phys_addr_t addr, uint32_t value)
423
{
424
    MilkymistPFPUState *s = opaque;
425

    
426
    trace_milkymist_pfpu_memory_write(addr, value);
427

    
428
    addr >>= 2;
429
    switch (addr) {
430
    case R_CTL:
431
        if (value & CTL_START_BUSY) {
432
            pfpu_start(s);
433
        }
434
        break;
435
    case R_MESHBASE:
436
    case R_HMESHLAST:
437
    case R_VMESHLAST:
438
    case R_CODEPAGE:
439
    case R_VERTICES:
440
    case R_COLLISIONS:
441
    case R_STRAYWRITES:
442
    case R_LASTDMA:
443
    case R_PC:
444
    case R_DREGBASE:
445
    case R_CODEBASE:
446
        s->regs[addr] = value;
447
        break;
448
    case GPR_BEGIN ...  GPR_END:
449
        s->gp_regs[addr - GPR_BEGIN] = value;
450
        break;
451
    case MICROCODE_BEGIN ...  MICROCODE_END:
452
        s->microcode[get_microcode_address(s, addr)] = value;
453
        break;
454

    
455
    default:
456
        error_report("milkymist_pfpu: write access to unknown register 0x"
457
                TARGET_FMT_plx, addr << 2);
458
        break;
459
    }
460
}
461

    
462
static CPUReadMemoryFunc * const pfpu_read_fn[] = {
463
    NULL,
464
    NULL,
465
    &pfpu_read,
466
};
467

    
468
static CPUWriteMemoryFunc * const pfpu_write_fn[] = {
469
    NULL,
470
    NULL,
471
    &pfpu_write,
472
};
473

    
474
static void milkymist_pfpu_reset(DeviceState *d)
475
{
476
    MilkymistPFPUState *s = container_of(d, MilkymistPFPUState, busdev.qdev);
477
    int i;
478

    
479
    for (i = 0; i < R_MAX; i++) {
480
        s->regs[i] = 0;
481
    }
482
    for (i = 0; i < 128; i++) {
483
        s->gp_regs[i] = 0;
484
    }
485
    for (i = 0; i < MICROCODE_WORDS; i++) {
486
        s->microcode[i] = 0;
487
    }
488
    s->output_queue_pos = 0;
489
    for (i = 0; i < MAX_LATENCY; i++) {
490
        s->output_queue[i] = 0;
491
    }
492
}
493

    
494
static int milkymist_pfpu_init(SysBusDevice *dev)
495
{
496
    MilkymistPFPUState *s = FROM_SYSBUS(typeof(*s), dev);
497
    int pfpu_regs;
498

    
499
    sysbus_init_irq(dev, &s->irq);
500

    
501
    pfpu_regs = cpu_register_io_memory(pfpu_read_fn, pfpu_write_fn, s,
502
            DEVICE_NATIVE_ENDIAN);
503
    sysbus_init_mmio(dev, MICROCODE_END * 4, pfpu_regs);
504

    
505
    return 0;
506
}
507

    
508
static const VMStateDescription vmstate_milkymist_pfpu = {
509
    .name = "milkymist-pfpu",
510
    .version_id = 1,
511
    .minimum_version_id = 1,
512
    .minimum_version_id_old = 1,
513
    .fields      = (VMStateField[]) {
514
        VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
515
        VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
516
        VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
517
        VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
518
        VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
519
        VMSTATE_END_OF_LIST()
520
    }
521
};
522

    
523
static SysBusDeviceInfo milkymist_pfpu_info = {
524
    .init = milkymist_pfpu_init,
525
    .qdev.name  = "milkymist-pfpu",
526
    .qdev.size  = sizeof(MilkymistPFPUState),
527
    .qdev.vmsd  = &vmstate_milkymist_pfpu,
528
    .qdev.reset = milkymist_pfpu_reset,
529
};
530

    
531
static void milkymist_pfpu_register(void)
532
{
533
    sysbus_register_withprop(&milkymist_pfpu_info);
534
}
535

    
536
device_init(milkymist_pfpu_register)