Statistics
| Branch: | Revision:

root / hw / net / e1000.c @ 49ab747f

History | View | Annotate | Download (44 kB)

1
/*
2
 * QEMU e1000 emulation
3
 *
4
 * Software developer's manual:
5
 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6
 *
7
 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8
 * Copyright (c) 2008 Qumranet
9
 * Based on work done by:
10
 * Copyright (c) 2007 Dan Aloni
11
 * Copyright (c) 2004 Antony T Curtis
12
 *
13
 * This library is free software; you can redistribute it and/or
14
 * modify it under the terms of the GNU Lesser General Public
15
 * License as published by the Free Software Foundation; either
16
 * version 2 of the License, or (at your option) any later version.
17
 *
18
 * This library is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21
 * Lesser General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU Lesser General Public
24
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25
 */
26

    
27

    
28
#include "hw/hw.h"
29
#include "hw/pci/pci.h"
30
#include "net/net.h"
31
#include "net/checksum.h"
32
#include "hw/loader.h"
33
#include "sysemu/sysemu.h"
34
#include "sysemu/dma.h"
35

    
36
#include "hw/e1000_hw.h"
37

    
38
#define E1000_DEBUG
39

    
40
#ifdef E1000_DEBUG
41
enum {
42
    DEBUG_GENERAL,        DEBUG_IO,        DEBUG_MMIO,        DEBUG_INTERRUPT,
43
    DEBUG_RX,                DEBUG_TX,        DEBUG_MDIC,        DEBUG_EEPROM,
44
    DEBUG_UNKNOWN,        DEBUG_TXSUM,        DEBUG_TXERR,        DEBUG_RXERR,
45
    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
46
};
47
#define DBGBIT(x)        (1<<DEBUG_##x)
48
static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
49

    
50
#define        DBGOUT(what, fmt, ...) do { \
51
    if (debugflags & DBGBIT(what)) \
52
        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
53
    } while (0)
54
#else
55
#define        DBGOUT(what, fmt, ...) do {} while (0)
56
#endif
57

    
58
#define IOPORT_SIZE       0x40
59
#define PNPMMIO_SIZE      0x20000
60
#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
61

    
62
/* this is the size past which hardware will drop packets when setting LPE=0 */
63
#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
64
/* this is the size past which hardware will drop packets when setting LPE=1 */
65
#define MAXIMUM_ETHERNET_LPE_SIZE 16384
66

    
67
/*
68
 * HW models:
69
 *  E1000_DEV_ID_82540EM works with Windows and Linux
70
 *  E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
71
 *        appears to perform better than 82540EM, but breaks with Linux 2.6.18
72
 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
73
 *  Others never tested
74
 */
75
enum { E1000_DEVID = E1000_DEV_ID_82540EM };
76

    
77
/*
78
 * May need to specify additional MAC-to-PHY entries --
79
 * Intel's Windows driver refuses to initialize unless they match
80
 */
81
enum {
82
    PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ?                0xcc2 :
83
                   E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ?        0xc30 :
84
                   /* default to E1000_DEV_ID_82540EM */        0xc20
85
};
86

    
87
typedef struct E1000State_st {
88
    PCIDevice dev;
89
    NICState *nic;
90
    NICConf conf;
91
    MemoryRegion mmio;
92
    MemoryRegion io;
93

    
94
    uint32_t mac_reg[0x8000];
95
    uint16_t phy_reg[0x20];
96
    uint16_t eeprom_data[64];
97

    
98
    uint32_t rxbuf_size;
99
    uint32_t rxbuf_min_shift;
100
    struct e1000_tx {
101
        unsigned char header[256];
102
        unsigned char vlan_header[4];
103
        /* Fields vlan and data must not be reordered or separated. */
104
        unsigned char vlan[4];
105
        unsigned char data[0x10000];
106
        uint16_t size;
107
        unsigned char sum_needed;
108
        unsigned char vlan_needed;
109
        uint8_t ipcss;
110
        uint8_t ipcso;
111
        uint16_t ipcse;
112
        uint8_t tucss;
113
        uint8_t tucso;
114
        uint16_t tucse;
115
        uint8_t hdr_len;
116
        uint16_t mss;
117
        uint32_t paylen;
118
        uint16_t tso_frames;
119
        char tse;
120
        int8_t ip;
121
        int8_t tcp;
122
        char cptse;     // current packet tse bit
123
    } tx;
124

    
125
    struct {
126
        uint32_t val_in;        // shifted in from guest driver
127
        uint16_t bitnum_in;
128
        uint16_t bitnum_out;
129
        uint16_t reading;
130
        uint32_t old_eecd;
131
    } eecd_state;
132

    
133
    QEMUTimer *autoneg_timer;
134

    
135
/* Compatibility flags for migration to/from qemu 1.3.0 and older */
136
#define E1000_FLAG_AUTONEG_BIT 0
137
#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
138
    uint32_t compat_flags;
139
} E1000State;
140

    
141
#define        defreg(x)        x = (E1000_##x>>2)
142
enum {
143
    defreg(CTRL),        defreg(EECD),        defreg(EERD),        defreg(GPRC),
144
    defreg(GPTC),        defreg(ICR),        defreg(ICS),        defreg(IMC),
145
    defreg(IMS),        defreg(LEDCTL),        defreg(MANC),        defreg(MDIC),
146
    defreg(MPC),        defreg(PBA),        defreg(RCTL),        defreg(RDBAH),
147
    defreg(RDBAL),        defreg(RDH),        defreg(RDLEN),        defreg(RDT),
148
    defreg(STATUS),        defreg(SWSM),        defreg(TCTL),        defreg(TDBAH),
149
    defreg(TDBAL),        defreg(TDH),        defreg(TDLEN),        defreg(TDT),
150
    defreg(TORH),        defreg(TORL),        defreg(TOTH),        defreg(TOTL),
151
    defreg(TPR),        defreg(TPT),        defreg(TXDCTL),        defreg(WUFC),
152
    defreg(RA),                defreg(MTA),        defreg(CRCERRS),defreg(VFTA),
153
    defreg(VET),
154
};
155

    
156
static void
157
e1000_link_down(E1000State *s)
158
{
159
    s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
160
    s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
161
}
162

    
163
static void
164
e1000_link_up(E1000State *s)
165
{
166
    s->mac_reg[STATUS] |= E1000_STATUS_LU;
167
    s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
168
}
169

    
170
static void
171
set_phy_ctrl(E1000State *s, int index, uint16_t val)
172
{
173
    /*
174
     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
175
     * migrate during auto negotiation, after migration the link will be
176
     * down.
177
     */
178
    if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
179
        return;
180
    }
181
    if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
182
        e1000_link_down(s);
183
        s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
184
        DBGOUT(PHY, "Start link auto negotiation\n");
185
        qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
186
    }
187
}
188

    
189
static void
190
e1000_autoneg_timer(void *opaque)
191
{
192
    E1000State *s = opaque;
193
    if (!qemu_get_queue(s->nic)->link_down) {
194
        e1000_link_up(s);
195
    }
196
    s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
197
    DBGOUT(PHY, "Auto negotiation is completed\n");
198
}
199

    
200
static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
201
    [PHY_CTRL] = set_phy_ctrl,
202
};
203

    
204
enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
205

    
206
enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
207
static const char phy_regcap[0x20] = {
208
    [PHY_STATUS] = PHY_R,        [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
209
    [PHY_ID1] = PHY_R,                [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
210
    [PHY_CTRL] = PHY_RW,        [PHY_1000T_CTRL] = PHY_RW,
211
    [PHY_LP_ABILITY] = PHY_R,        [PHY_1000T_STATUS] = PHY_R,
212
    [PHY_AUTONEG_ADV] = PHY_RW,        [M88E1000_RX_ERR_CNTR] = PHY_R,
213
    [PHY_ID2] = PHY_R,                [M88E1000_PHY_SPEC_STATUS] = PHY_R
214
};
215

    
216
static const uint16_t phy_reg_init[] = {
217
    [PHY_CTRL] = 0x1140,
218
    [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
219
    [PHY_ID1] = 0x141,                                [PHY_ID2] = PHY_ID2_INIT,
220
    [PHY_1000T_CTRL] = 0x0e00,                        [M88E1000_PHY_SPEC_CTRL] = 0x360,
221
    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,        [PHY_AUTONEG_ADV] = 0xde1,
222
    [PHY_LP_ABILITY] = 0x1e0,                        [PHY_1000T_STATUS] = 0x3c00,
223
    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
224
};
225

    
226
static const uint32_t mac_reg_init[] = {
227
    [PBA] =     0x00100030,
228
    [LEDCTL] =  0x602,
229
    [CTRL] =    E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
230
                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
231
    [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
232
                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
233
                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
234
                E1000_STATUS_LU,
235
    [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
236
                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
237
                E1000_MANC_RMCP_EN,
238
};
239

    
240
static void
241
set_interrupt_cause(E1000State *s, int index, uint32_t val)
242
{
243
    if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
244
        /* Only for 8257x */
245
        val |= E1000_ICR_INT_ASSERTED;
246
    }
247
    s->mac_reg[ICR] = val;
248

    
249
    /*
250
     * Make sure ICR and ICS registers have the same value.
251
     * The spec says that the ICS register is write-only.  However in practice,
252
     * on real hardware ICS is readable, and for reads it has the same value as
253
     * ICR (except that ICS does not have the clear on read behaviour of ICR).
254
     *
255
     * The VxWorks PRO/1000 driver uses this behaviour.
256
     */
257
    s->mac_reg[ICS] = val;
258

    
259
    qemu_set_irq(s->dev.irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
260
}
261

    
262
static void
263
set_ics(E1000State *s, int index, uint32_t val)
264
{
265
    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
266
        s->mac_reg[IMS]);
267
    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
268
}
269

    
270
static int
271
rxbufsize(uint32_t v)
272
{
273
    v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
274
         E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
275
         E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
276
    switch (v) {
277
    case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
278
        return 16384;
279
    case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
280
        return 8192;
281
    case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
282
        return 4096;
283
    case E1000_RCTL_SZ_1024:
284
        return 1024;
285
    case E1000_RCTL_SZ_512:
286
        return 512;
287
    case E1000_RCTL_SZ_256:
288
        return 256;
289
    }
290
    return 2048;
291
}
292

    
293
static void e1000_reset(void *opaque)
294
{
295
    E1000State *d = opaque;
296
    uint8_t *macaddr = d->conf.macaddr.a;
297
    int i;
298

    
299
    qemu_del_timer(d->autoneg_timer);
300
    memset(d->phy_reg, 0, sizeof d->phy_reg);
301
    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
302
    memset(d->mac_reg, 0, sizeof d->mac_reg);
303
    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
304
    d->rxbuf_min_shift = 1;
305
    memset(&d->tx, 0, sizeof d->tx);
306

    
307
    if (qemu_get_queue(d->nic)->link_down) {
308
        e1000_link_down(d);
309
    }
310

    
311
    /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
312
    d->mac_reg[RA] = 0;
313
    d->mac_reg[RA + 1] = E1000_RAH_AV;
314
    for (i = 0; i < 4; i++) {
315
        d->mac_reg[RA] |= macaddr[i] << (8 * i);
316
        d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
317
    }
318
}
319

    
320
static void
321
set_ctrl(E1000State *s, int index, uint32_t val)
322
{
323
    /* RST is self clearing */
324
    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
325
}
326

    
327
static void
328
set_rx_control(E1000State *s, int index, uint32_t val)
329
{
330
    s->mac_reg[RCTL] = val;
331
    s->rxbuf_size = rxbufsize(val);
332
    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
333
    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
334
           s->mac_reg[RCTL]);
335
    qemu_flush_queued_packets(qemu_get_queue(s->nic));
336
}
337

    
338
static void
339
set_mdic(E1000State *s, int index, uint32_t val)
340
{
341
    uint32_t data = val & E1000_MDIC_DATA_MASK;
342
    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
343

    
344
    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
345
        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
346
    else if (val & E1000_MDIC_OP_READ) {
347
        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
348
        if (!(phy_regcap[addr] & PHY_R)) {
349
            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
350
            val |= E1000_MDIC_ERROR;
351
        } else
352
            val = (val ^ data) | s->phy_reg[addr];
353
    } else if (val & E1000_MDIC_OP_WRITE) {
354
        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
355
        if (!(phy_regcap[addr] & PHY_W)) {
356
            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
357
            val |= E1000_MDIC_ERROR;
358
        } else {
359
            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
360
                phyreg_writeops[addr](s, index, data);
361
            }
362
            s->phy_reg[addr] = data;
363
        }
364
    }
365
    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
366

    
367
    if (val & E1000_MDIC_INT_EN) {
368
        set_ics(s, 0, E1000_ICR_MDAC);
369
    }
370
}
371

    
372
static uint32_t
373
get_eecd(E1000State *s, int index)
374
{
375
    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
376

    
377
    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
378
           s->eecd_state.bitnum_out, s->eecd_state.reading);
379
    if (!s->eecd_state.reading ||
380
        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
381
          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
382
        ret |= E1000_EECD_DO;
383
    return ret;
384
}
385

    
386
static void
387
set_eecd(E1000State *s, int index, uint32_t val)
388
{
389
    uint32_t oldval = s->eecd_state.old_eecd;
390

    
391
    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
392
            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
393
    if (!(E1000_EECD_CS & val))                        // CS inactive; nothing to do
394
        return;
395
    if (E1000_EECD_CS & (val ^ oldval)) {        // CS rise edge; reset state
396
        s->eecd_state.val_in = 0;
397
        s->eecd_state.bitnum_in = 0;
398
        s->eecd_state.bitnum_out = 0;
399
        s->eecd_state.reading = 0;
400
    }
401
    if (!(E1000_EECD_SK & (val ^ oldval)))        // no clock edge
402
        return;
403
    if (!(E1000_EECD_SK & val)) {                // falling edge
404
        s->eecd_state.bitnum_out++;
405
        return;
406
    }
407
    s->eecd_state.val_in <<= 1;
408
    if (val & E1000_EECD_DI)
409
        s->eecd_state.val_in |= 1;
410
    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
411
        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
412
        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
413
            EEPROM_READ_OPCODE_MICROWIRE);
414
    }
415
    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
416
           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
417
           s->eecd_state.reading);
418
}
419

    
420
static uint32_t
421
flash_eerd_read(E1000State *s, int x)
422
{
423
    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
424

    
425
    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
426
        return (s->mac_reg[EERD]);
427

    
428
    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
429
        return (E1000_EEPROM_RW_REG_DONE | r);
430

    
431
    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
432
           E1000_EEPROM_RW_REG_DONE | r);
433
}
434

    
435
static void
436
putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
437
{
438
    uint32_t sum;
439

    
440
    if (cse && cse < n)
441
        n = cse + 1;
442
    if (sloc < n-1) {
443
        sum = net_checksum_add(n-css, data+css);
444
        cpu_to_be16wu((uint16_t *)(data + sloc),
445
                      net_checksum_finish(sum));
446
    }
447
}
448

    
449
static inline int
450
vlan_enabled(E1000State *s)
451
{
452
    return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
453
}
454

    
455
static inline int
456
vlan_rx_filter_enabled(E1000State *s)
457
{
458
    return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
459
}
460

    
461
static inline int
462
is_vlan_packet(E1000State *s, const uint8_t *buf)
463
{
464
    return (be16_to_cpup((uint16_t *)(buf + 12)) ==
465
                le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
466
}
467

    
468
static inline int
469
is_vlan_txd(uint32_t txd_lower)
470
{
471
    return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
472
}
473

    
474
/* FCS aka Ethernet CRC-32. We don't get it from backends and can't
475
 * fill it in, just pad descriptor length by 4 bytes unless guest
476
 * told us to strip it off the packet. */
477
static inline int
478
fcs_len(E1000State *s)
479
{
480
    return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
481
}
482

    
483
static void
484
e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
485
{
486
    NetClientState *nc = qemu_get_queue(s->nic);
487
    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
488
        nc->info->receive(nc, buf, size);
489
    } else {
490
        qemu_send_packet(nc, buf, size);
491
    }
492
}
493

    
494
static void
495
xmit_seg(E1000State *s)
496
{
497
    uint16_t len, *sp;
498
    unsigned int frames = s->tx.tso_frames, css, sofar, n;
499
    struct e1000_tx *tp = &s->tx;
500

    
501
    if (tp->tse && tp->cptse) {
502
        css = tp->ipcss;
503
        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
504
               frames, tp->size, css);
505
        if (tp->ip) {                // IPv4
506
            cpu_to_be16wu((uint16_t *)(tp->data+css+2),
507
                          tp->size - css);
508
            cpu_to_be16wu((uint16_t *)(tp->data+css+4),
509
                          be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
510
        } else                        // IPv6
511
            cpu_to_be16wu((uint16_t *)(tp->data+css+4),
512
                          tp->size - css);
513
        css = tp->tucss;
514
        len = tp->size - css;
515
        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
516
        if (tp->tcp) {
517
            sofar = frames * tp->mss;
518
            cpu_to_be32wu((uint32_t *)(tp->data+css+4),        // seq
519
                be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
520
            if (tp->paylen - sofar > tp->mss)
521
                tp->data[css + 13] &= ~9;                // PSH, FIN
522
        } else        // UDP
523
            cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
524
        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
525
            unsigned int phsum;
526
            // add pseudo-header length before checksum calculation
527
            sp = (uint16_t *)(tp->data + tp->tucso);
528
            phsum = be16_to_cpup(sp) + len;
529
            phsum = (phsum >> 16) + (phsum & 0xffff);
530
            cpu_to_be16wu(sp, phsum);
531
        }
532
        tp->tso_frames++;
533
    }
534

    
535
    if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
536
        putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
537
    if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
538
        putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
539
    if (tp->vlan_needed) {
540
        memmove(tp->vlan, tp->data, 4);
541
        memmove(tp->data, tp->data + 4, 8);
542
        memcpy(tp->data + 8, tp->vlan_header, 4);
543
        e1000_send_packet(s, tp->vlan, tp->size + 4);
544
    } else
545
        e1000_send_packet(s, tp->data, tp->size);
546
    s->mac_reg[TPT]++;
547
    s->mac_reg[GPTC]++;
548
    n = s->mac_reg[TOTL];
549
    if ((s->mac_reg[TOTL] += s->tx.size) < n)
550
        s->mac_reg[TOTH]++;
551
}
552

    
553
static void
554
process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
555
{
556
    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
557
    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
558
    unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
559
    unsigned int msh = 0xfffff, hdr = 0;
560
    uint64_t addr;
561
    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
562
    struct e1000_tx *tp = &s->tx;
563

    
564
    if (dtype == E1000_TXD_CMD_DEXT) {        // context descriptor
565
        op = le32_to_cpu(xp->cmd_and_length);
566
        tp->ipcss = xp->lower_setup.ip_fields.ipcss;
567
        tp->ipcso = xp->lower_setup.ip_fields.ipcso;
568
        tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
569
        tp->tucss = xp->upper_setup.tcp_fields.tucss;
570
        tp->tucso = xp->upper_setup.tcp_fields.tucso;
571
        tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
572
        tp->paylen = op & 0xfffff;
573
        tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
574
        tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
575
        tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
576
        tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
577
        tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
578
        tp->tso_frames = 0;
579
        if (tp->tucso == 0) {        // this is probably wrong
580
            DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
581
            tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
582
        }
583
        return;
584
    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
585
        // data descriptor
586
        if (tp->size == 0) {
587
            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
588
        }
589
        tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
590
    } else {
591
        // legacy descriptor
592
        tp->cptse = 0;
593
    }
594

    
595
    if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
596
        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
597
        tp->vlan_needed = 1;
598
        cpu_to_be16wu((uint16_t *)(tp->vlan_header),
599
                      le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
600
        cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
601
                      le16_to_cpu(dp->upper.fields.special));
602
    }
603
        
604
    addr = le64_to_cpu(dp->buffer_addr);
605
    if (tp->tse && tp->cptse) {
606
        hdr = tp->hdr_len;
607
        msh = hdr + tp->mss;
608
        do {
609
            bytes = split_size;
610
            if (tp->size + bytes > msh)
611
                bytes = msh - tp->size;
612

    
613
            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
614
            pci_dma_read(&s->dev, addr, tp->data + tp->size, bytes);
615
            if ((sz = tp->size + bytes) >= hdr && tp->size < hdr)
616
                memmove(tp->header, tp->data, hdr);
617
            tp->size = sz;
618
            addr += bytes;
619
            if (sz == msh) {
620
                xmit_seg(s);
621
                memmove(tp->data, tp->header, hdr);
622
                tp->size = hdr;
623
            }
624
        } while (split_size -= bytes);
625
    } else if (!tp->tse && tp->cptse) {
626
        // context descriptor TSE is not set, while data descriptor TSE is set
627
        DBGOUT(TXERR, "TCP segmentation error\n");
628
    } else {
629
        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
630
        pci_dma_read(&s->dev, addr, tp->data + tp->size, split_size);
631
        tp->size += split_size;
632
    }
633

    
634
    if (!(txd_lower & E1000_TXD_CMD_EOP))
635
        return;
636
    if (!(tp->tse && tp->cptse && tp->size < hdr))
637
        xmit_seg(s);
638
    tp->tso_frames = 0;
639
    tp->sum_needed = 0;
640
    tp->vlan_needed = 0;
641
    tp->size = 0;
642
    tp->cptse = 0;
643
}
644

    
645
static uint32_t
646
txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
647
{
648
    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
649

    
650
    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
651
        return 0;
652
    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
653
                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
654
    dp->upper.data = cpu_to_le32(txd_upper);
655
    pci_dma_write(&s->dev, base + ((char *)&dp->upper - (char *)dp),
656
                  &dp->upper, sizeof(dp->upper));
657
    return E1000_ICR_TXDW;
658
}
659

    
660
static uint64_t tx_desc_base(E1000State *s)
661
{
662
    uint64_t bah = s->mac_reg[TDBAH];
663
    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
664

    
665
    return (bah << 32) + bal;
666
}
667

    
668
static void
669
start_xmit(E1000State *s)
670
{
671
    dma_addr_t base;
672
    struct e1000_tx_desc desc;
673
    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
674

    
675
    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
676
        DBGOUT(TX, "tx disabled\n");
677
        return;
678
    }
679

    
680
    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
681
        base = tx_desc_base(s) +
682
               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
683
        pci_dma_read(&s->dev, base, &desc, sizeof(desc));
684

    
685
        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
686
               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
687
               desc.upper.data);
688

    
689
        process_tx_desc(s, &desc);
690
        cause |= txdesc_writeback(s, base, &desc);
691

    
692
        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
693
            s->mac_reg[TDH] = 0;
694
        /*
695
         * the following could happen only if guest sw assigns
696
         * bogus values to TDT/TDLEN.
697
         * there's nothing too intelligent we could do about this.
698
         */
699
        if (s->mac_reg[TDH] == tdh_start) {
700
            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
701
                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
702
            break;
703
        }
704
    }
705
    set_ics(s, 0, cause);
706
}
707

    
708
static int
709
receive_filter(E1000State *s, const uint8_t *buf, int size)
710
{
711
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
712
    static const int mta_shift[] = {4, 3, 2, 0};
713
    uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
714

    
715
    if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
716
        uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
717
        uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
718
                                     ((vid >> 5) & 0x7f));
719
        if ((vfta & (1 << (vid & 0x1f))) == 0)
720
            return 0;
721
    }
722

    
723
    if (rctl & E1000_RCTL_UPE)                        // promiscuous
724
        return 1;
725

    
726
    if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))        // promiscuous mcast
727
        return 1;
728

    
729
    if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
730
        return 1;
731

    
732
    for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
733
        if (!(rp[1] & E1000_RAH_AV))
734
            continue;
735
        ra[0] = cpu_to_le32(rp[0]);
736
        ra[1] = cpu_to_le32(rp[1]);
737
        if (!memcmp(buf, (uint8_t *)ra, 6)) {
738
            DBGOUT(RXFILTER,
739
                   "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
740
                   (int)(rp - s->mac_reg - RA)/2,
741
                   buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
742
            return 1;
743
        }
744
    }
745
    DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
746
           buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
747

    
748
    f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
749
    f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
750
    if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
751
        return 1;
752
    DBGOUT(RXFILTER,
753
           "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
754
           buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
755
           (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
756
           s->mac_reg[MTA + (f >> 5)]);
757

    
758
    return 0;
759
}
760

    
761
static void
762
e1000_set_link_status(NetClientState *nc)
763
{
764
    E1000State *s = qemu_get_nic_opaque(nc);
765
    uint32_t old_status = s->mac_reg[STATUS];
766

    
767
    if (nc->link_down) {
768
        e1000_link_down(s);
769
    } else {
770
        e1000_link_up(s);
771
    }
772

    
773
    if (s->mac_reg[STATUS] != old_status)
774
        set_ics(s, 0, E1000_ICR_LSC);
775
}
776

    
777
static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
778
{
779
    int bufs;
780
    /* Fast-path short packets */
781
    if (total_size <= s->rxbuf_size) {
782
        return s->mac_reg[RDH] != s->mac_reg[RDT];
783
    }
784
    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
785
        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
786
    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
787
        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
788
            s->mac_reg[RDT] - s->mac_reg[RDH];
789
    } else {
790
        return false;
791
    }
792
    return total_size <= bufs * s->rxbuf_size;
793
}
794

    
795
static int
796
e1000_can_receive(NetClientState *nc)
797
{
798
    E1000State *s = qemu_get_nic_opaque(nc);
799

    
800
    return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
801
        (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
802
}
803

    
804
static uint64_t rx_desc_base(E1000State *s)
805
{
806
    uint64_t bah = s->mac_reg[RDBAH];
807
    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
808

    
809
    return (bah << 32) + bal;
810
}
811

    
812
static ssize_t
813
e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
814
{
815
    E1000State *s = qemu_get_nic_opaque(nc);
816
    struct e1000_rx_desc desc;
817
    dma_addr_t base;
818
    unsigned int n, rdt;
819
    uint32_t rdh_start;
820
    uint16_t vlan_special = 0;
821
    uint8_t vlan_status = 0, vlan_offset = 0;
822
    uint8_t min_buf[MIN_BUF_SIZE];
823
    size_t desc_offset;
824
    size_t desc_size;
825
    size_t total_size;
826

    
827
    if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
828
        return -1;
829
    }
830

    
831
    if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
832
        return -1;
833
    }
834

    
835
    /* Pad to minimum Ethernet frame length */
836
    if (size < sizeof(min_buf)) {
837
        memcpy(min_buf, buf, size);
838
        memset(&min_buf[size], 0, sizeof(min_buf) - size);
839
        buf = min_buf;
840
        size = sizeof(min_buf);
841
    }
842

    
843
    /* Discard oversized packets if !LPE and !SBP. */
844
    if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
845
        (size > MAXIMUM_ETHERNET_VLAN_SIZE
846
        && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
847
        && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
848
        return size;
849
    }
850

    
851
    if (!receive_filter(s, buf, size))
852
        return size;
853

    
854
    if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
855
        vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
856
        memmove((uint8_t *)buf + 4, buf, 12);
857
        vlan_status = E1000_RXD_STAT_VP;
858
        vlan_offset = 4;
859
        size -= 4;
860
    }
861

    
862
    rdh_start = s->mac_reg[RDH];
863
    desc_offset = 0;
864
    total_size = size + fcs_len(s);
865
    if (!e1000_has_rxbufs(s, total_size)) {
866
            set_ics(s, 0, E1000_ICS_RXO);
867
            return -1;
868
    }
869
    do {
870
        desc_size = total_size - desc_offset;
871
        if (desc_size > s->rxbuf_size) {
872
            desc_size = s->rxbuf_size;
873
        }
874
        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
875
        pci_dma_read(&s->dev, base, &desc, sizeof(desc));
876
        desc.special = vlan_special;
877
        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
878
        if (desc.buffer_addr) {
879
            if (desc_offset < size) {
880
                size_t copy_size = size - desc_offset;
881
                if (copy_size > s->rxbuf_size) {
882
                    copy_size = s->rxbuf_size;
883
                }
884
                pci_dma_write(&s->dev, le64_to_cpu(desc.buffer_addr),
885
                              buf + desc_offset + vlan_offset, copy_size);
886
            }
887
            desc_offset += desc_size;
888
            desc.length = cpu_to_le16(desc_size);
889
            if (desc_offset >= total_size) {
890
                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
891
            } else {
892
                /* Guest zeroing out status is not a hardware requirement.
893
                   Clear EOP in case guest didn't do it. */
894
                desc.status &= ~E1000_RXD_STAT_EOP;
895
            }
896
        } else { // as per intel docs; skip descriptors with null buf addr
897
            DBGOUT(RX, "Null RX descriptor!!\n");
898
        }
899
        pci_dma_write(&s->dev, base, &desc, sizeof(desc));
900

    
901
        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
902
            s->mac_reg[RDH] = 0;
903
        /* see comment in start_xmit; same here */
904
        if (s->mac_reg[RDH] == rdh_start) {
905
            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
906
                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
907
            set_ics(s, 0, E1000_ICS_RXO);
908
            return -1;
909
        }
910
    } while (desc_offset < total_size);
911

    
912
    s->mac_reg[GPRC]++;
913
    s->mac_reg[TPR]++;
914
    /* TOR - Total Octets Received:
915
     * This register includes bytes received in a packet from the <Destination
916
     * Address> field through the <CRC> field, inclusively.
917
     */
918
    n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
919
    if (n < s->mac_reg[TORL])
920
        s->mac_reg[TORH]++;
921
    s->mac_reg[TORL] = n;
922

    
923
    n = E1000_ICS_RXT0;
924
    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
925
        rdt += s->mac_reg[RDLEN] / sizeof(desc);
926
    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
927
        s->rxbuf_min_shift)
928
        n |= E1000_ICS_RXDMT0;
929

    
930
    set_ics(s, 0, n);
931

    
932
    return size;
933
}
934

    
935
static uint32_t
936
mac_readreg(E1000State *s, int index)
937
{
938
    return s->mac_reg[index];
939
}
940

    
941
static uint32_t
942
mac_icr_read(E1000State *s, int index)
943
{
944
    uint32_t ret = s->mac_reg[ICR];
945

    
946
    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
947
    set_interrupt_cause(s, 0, 0);
948
    return ret;
949
}
950

    
951
static uint32_t
952
mac_read_clr4(E1000State *s, int index)
953
{
954
    uint32_t ret = s->mac_reg[index];
955

    
956
    s->mac_reg[index] = 0;
957
    return ret;
958
}
959

    
960
static uint32_t
961
mac_read_clr8(E1000State *s, int index)
962
{
963
    uint32_t ret = s->mac_reg[index];
964

    
965
    s->mac_reg[index] = 0;
966
    s->mac_reg[index-1] = 0;
967
    return ret;
968
}
969

    
970
static void
971
mac_writereg(E1000State *s, int index, uint32_t val)
972
{
973
    s->mac_reg[index] = val;
974
}
975

    
976
static void
977
set_rdt(E1000State *s, int index, uint32_t val)
978
{
979
    s->mac_reg[index] = val & 0xffff;
980
    if (e1000_has_rxbufs(s, 1)) {
981
        qemu_flush_queued_packets(qemu_get_queue(s->nic));
982
    }
983
}
984

    
985
static void
986
set_16bit(E1000State *s, int index, uint32_t val)
987
{
988
    s->mac_reg[index] = val & 0xffff;
989
}
990

    
991
static void
992
set_dlen(E1000State *s, int index, uint32_t val)
993
{
994
    s->mac_reg[index] = val & 0xfff80;
995
}
996

    
997
static void
998
set_tctl(E1000State *s, int index, uint32_t val)
999
{
1000
    s->mac_reg[index] = val;
1001
    s->mac_reg[TDT] &= 0xffff;
1002
    start_xmit(s);
1003
}
1004

    
1005
static void
1006
set_icr(E1000State *s, int index, uint32_t val)
1007
{
1008
    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1009
    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1010
}
1011

    
1012
static void
1013
set_imc(E1000State *s, int index, uint32_t val)
1014
{
1015
    s->mac_reg[IMS] &= ~val;
1016
    set_ics(s, 0, 0);
1017
}
1018

    
1019
static void
1020
set_ims(E1000State *s, int index, uint32_t val)
1021
{
1022
    s->mac_reg[IMS] |= val;
1023
    set_ics(s, 0, 0);
1024
}
1025

    
1026
#define getreg(x)        [x] = mac_readreg
1027
static uint32_t (*macreg_readops[])(E1000State *, int) = {
1028
    getreg(PBA),        getreg(RCTL),        getreg(TDH),        getreg(TXDCTL),
1029
    getreg(WUFC),        getreg(TDT),        getreg(CTRL),        getreg(LEDCTL),
1030
    getreg(MANC),        getreg(MDIC),        getreg(SWSM),        getreg(STATUS),
1031
    getreg(TORL),        getreg(TOTL),        getreg(IMS),        getreg(TCTL),
1032
    getreg(RDH),        getreg(RDT),        getreg(VET),        getreg(ICS),
1033
    getreg(TDBAL),        getreg(TDBAH),        getreg(RDBAH),        getreg(RDBAL),
1034
    getreg(TDLEN),        getreg(RDLEN),
1035

    
1036
    [TOTH] = mac_read_clr8,        [TORH] = mac_read_clr8,        [GPRC] = mac_read_clr4,
1037
    [GPTC] = mac_read_clr4,        [TPR] = mac_read_clr4,        [TPT] = mac_read_clr4,
1038
    [ICR] = mac_icr_read,        [EECD] = get_eecd,        [EERD] = flash_eerd_read,
1039
    [CRCERRS ... MPC] = &mac_readreg,
1040
    [RA ... RA+31] = &mac_readreg,
1041
    [MTA ... MTA+127] = &mac_readreg,
1042
    [VFTA ... VFTA+127] = &mac_readreg,
1043
};
1044
enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1045

    
1046
#define putreg(x)        [x] = mac_writereg
1047
static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1048
    putreg(PBA),        putreg(EERD),        putreg(SWSM),        putreg(WUFC),
1049
    putreg(TDBAL),        putreg(TDBAH),        putreg(TXDCTL),        putreg(RDBAH),
1050
    putreg(RDBAL),        putreg(LEDCTL), putreg(VET),
1051
    [TDLEN] = set_dlen,        [RDLEN] = set_dlen,        [TCTL] = set_tctl,
1052
    [TDT] = set_tctl,        [MDIC] = set_mdic,        [ICS] = set_ics,
1053
    [TDH] = set_16bit,        [RDH] = set_16bit,        [RDT] = set_rdt,
1054
    [IMC] = set_imc,        [IMS] = set_ims,        [ICR] = set_icr,
1055
    [EECD] = set_eecd,        [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1056
    [RA ... RA+31] = &mac_writereg,
1057
    [MTA ... MTA+127] = &mac_writereg,
1058
    [VFTA ... VFTA+127] = &mac_writereg,
1059
};
1060

    
1061
enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1062

    
1063
static void
1064
e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1065
                 unsigned size)
1066
{
1067
    E1000State *s = opaque;
1068
    unsigned int index = (addr & 0x1ffff) >> 2;
1069

    
1070
    if (index < NWRITEOPS && macreg_writeops[index]) {
1071
        macreg_writeops[index](s, index, val);
1072
    } else if (index < NREADOPS && macreg_readops[index]) {
1073
        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1074
    } else {
1075
        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1076
               index<<2, val);
1077
    }
1078
}
1079

    
1080
static uint64_t
1081
e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1082
{
1083
    E1000State *s = opaque;
1084
    unsigned int index = (addr & 0x1ffff) >> 2;
1085

    
1086
    if (index < NREADOPS && macreg_readops[index])
1087
    {
1088
        return macreg_readops[index](s, index);
1089
    }
1090
    DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1091
    return 0;
1092
}
1093

    
1094
static const MemoryRegionOps e1000_mmio_ops = {
1095
    .read = e1000_mmio_read,
1096
    .write = e1000_mmio_write,
1097
    .endianness = DEVICE_LITTLE_ENDIAN,
1098
    .impl = {
1099
        .min_access_size = 4,
1100
        .max_access_size = 4,
1101
    },
1102
};
1103

    
1104
static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1105
                              unsigned size)
1106
{
1107
    E1000State *s = opaque;
1108

    
1109
    (void)s;
1110
    return 0;
1111
}
1112

    
1113
static void e1000_io_write(void *opaque, hwaddr addr,
1114
                           uint64_t val, unsigned size)
1115
{
1116
    E1000State *s = opaque;
1117

    
1118
    (void)s;
1119
}
1120

    
1121
static const MemoryRegionOps e1000_io_ops = {
1122
    .read = e1000_io_read,
1123
    .write = e1000_io_write,
1124
    .endianness = DEVICE_LITTLE_ENDIAN,
1125
};
1126

    
1127
static bool is_version_1(void *opaque, int version_id)
1128
{
1129
    return version_id == 1;
1130
}
1131

    
1132
static void e1000_pre_save(void *opaque)
1133
{
1134
    E1000State *s = opaque;
1135
    NetClientState *nc = qemu_get_queue(s->nic);
1136

    
1137
    if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1138
        return;
1139
    }
1140

    
1141
    /*
1142
     * If link is down and auto-negotiation is ongoing, complete
1143
     * auto-negotiation immediately.  This allows is to look at
1144
     * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1145
     */
1146
    if (nc->link_down &&
1147
        s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1148
        s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1149
         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1150
    }
1151
}
1152

    
1153
static int e1000_post_load(void *opaque, int version_id)
1154
{
1155
    E1000State *s = opaque;
1156
    NetClientState *nc = qemu_get_queue(s->nic);
1157

    
1158
    /* nc.link_down can't be migrated, so infer link_down according
1159
     * to link status bit in mac_reg[STATUS].
1160
     * Alternatively, restart link negotiation if it was in progress. */
1161
    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1162

    
1163
    if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1164
        return 0;
1165
    }
1166

    
1167
    if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1168
        s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1169
        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1170
        nc->link_down = false;
1171
        qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
1172
    }
1173

    
1174
    return 0;
1175
}
1176

    
1177
static const VMStateDescription vmstate_e1000 = {
1178
    .name = "e1000",
1179
    .version_id = 2,
1180
    .minimum_version_id = 1,
1181
    .minimum_version_id_old = 1,
1182
    .pre_save = e1000_pre_save,
1183
    .post_load = e1000_post_load,
1184
    .fields      = (VMStateField []) {
1185
        VMSTATE_PCI_DEVICE(dev, E1000State),
1186
        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1187
        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1188
        VMSTATE_UINT32(rxbuf_size, E1000State),
1189
        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1190
        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1191
        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1192
        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1193
        VMSTATE_UINT16(eecd_state.reading, E1000State),
1194
        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1195
        VMSTATE_UINT8(tx.ipcss, E1000State),
1196
        VMSTATE_UINT8(tx.ipcso, E1000State),
1197
        VMSTATE_UINT16(tx.ipcse, E1000State),
1198
        VMSTATE_UINT8(tx.tucss, E1000State),
1199
        VMSTATE_UINT8(tx.tucso, E1000State),
1200
        VMSTATE_UINT16(tx.tucse, E1000State),
1201
        VMSTATE_UINT32(tx.paylen, E1000State),
1202
        VMSTATE_UINT8(tx.hdr_len, E1000State),
1203
        VMSTATE_UINT16(tx.mss, E1000State),
1204
        VMSTATE_UINT16(tx.size, E1000State),
1205
        VMSTATE_UINT16(tx.tso_frames, E1000State),
1206
        VMSTATE_UINT8(tx.sum_needed, E1000State),
1207
        VMSTATE_INT8(tx.ip, E1000State),
1208
        VMSTATE_INT8(tx.tcp, E1000State),
1209
        VMSTATE_BUFFER(tx.header, E1000State),
1210
        VMSTATE_BUFFER(tx.data, E1000State),
1211
        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1212
        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1213
        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1214
        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1215
        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1216
        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1217
        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1218
        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1219
        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1220
        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1221
        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1222
        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1223
        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1224
        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1225
        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1226
        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1227
        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1228
        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1229
        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1230
        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1231
        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1232
        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1233
        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1234
        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1235
        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1236
        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1237
        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1238
        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1239
        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1240
        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1241
        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1242
        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1243
        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1244
        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1245
        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1246
        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1247
        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1248
        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1249
        VMSTATE_UINT32(mac_reg[VET], E1000State),
1250
        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1251
        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1252
        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1253
        VMSTATE_END_OF_LIST()
1254
    }
1255
};
1256

    
1257
static const uint16_t e1000_eeprom_template[64] = {
1258
    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1259
    0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1260
    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1261
    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1262
    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1263
    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1264
    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1265
    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1266
};
1267

    
1268
/* PCI interface */
1269

    
1270
static void
1271
e1000_mmio_setup(E1000State *d)
1272
{
1273
    int i;
1274
    const uint32_t excluded_regs[] = {
1275
        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1276
        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1277
    };
1278

    
1279
    memory_region_init_io(&d->mmio, &e1000_mmio_ops, d, "e1000-mmio",
1280
                          PNPMMIO_SIZE);
1281
    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1282
    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1283
        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1284
                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1285
    memory_region_init_io(&d->io, &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1286
}
1287

    
1288
static void
1289
e1000_cleanup(NetClientState *nc)
1290
{
1291
    E1000State *s = qemu_get_nic_opaque(nc);
1292

    
1293
    s->nic = NULL;
1294
}
1295

    
1296
static void
1297
pci_e1000_uninit(PCIDevice *dev)
1298
{
1299
    E1000State *d = DO_UPCAST(E1000State, dev, dev);
1300

    
1301
    qemu_del_timer(d->autoneg_timer);
1302
    qemu_free_timer(d->autoneg_timer);
1303
    memory_region_destroy(&d->mmio);
1304
    memory_region_destroy(&d->io);
1305
    qemu_del_nic(d->nic);
1306
}
1307

    
1308
static NetClientInfo net_e1000_info = {
1309
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
1310
    .size = sizeof(NICState),
1311
    .can_receive = e1000_can_receive,
1312
    .receive = e1000_receive,
1313
    .cleanup = e1000_cleanup,
1314
    .link_status_changed = e1000_set_link_status,
1315
};
1316

    
1317
static int pci_e1000_init(PCIDevice *pci_dev)
1318
{
1319
    E1000State *d = DO_UPCAST(E1000State, dev, pci_dev);
1320
    uint8_t *pci_conf;
1321
    uint16_t checksum = 0;
1322
    int i;
1323
    uint8_t *macaddr;
1324

    
1325
    pci_conf = d->dev.config;
1326

    
1327
    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1328
    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1329

    
1330
    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1331

    
1332
    e1000_mmio_setup(d);
1333

    
1334
    pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1335

    
1336
    pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1337

    
1338
    memmove(d->eeprom_data, e1000_eeprom_template,
1339
        sizeof e1000_eeprom_template);
1340
    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1341
    macaddr = d->conf.macaddr.a;
1342
    for (i = 0; i < 3; i++)
1343
        d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1344
    for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1345
        checksum += d->eeprom_data[i];
1346
    checksum = (uint16_t) EEPROM_SUM - checksum;
1347
    d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1348

    
1349
    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1350
                          object_get_typename(OBJECT(d)), d->dev.qdev.id, d);
1351

    
1352
    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1353

    
1354
    add_boot_device_path(d->conf.bootindex, &pci_dev->qdev, "/ethernet-phy@0");
1355

    
1356
    d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d);
1357

    
1358
    return 0;
1359
}
1360

    
1361
static void qdev_e1000_reset(DeviceState *dev)
1362
{
1363
    E1000State *d = DO_UPCAST(E1000State, dev.qdev, dev);
1364
    e1000_reset(d);
1365
}
1366

    
1367
static Property e1000_properties[] = {
1368
    DEFINE_NIC_PROPERTIES(E1000State, conf),
1369
    DEFINE_PROP_BIT("autonegotiation", E1000State,
1370
                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1371
    DEFINE_PROP_END_OF_LIST(),
1372
};
1373

    
1374
static void e1000_class_init(ObjectClass *klass, void *data)
1375
{
1376
    DeviceClass *dc = DEVICE_CLASS(klass);
1377
    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1378

    
1379
    k->init = pci_e1000_init;
1380
    k->exit = pci_e1000_uninit;
1381
    k->romfile = "efi-e1000.rom";
1382
    k->vendor_id = PCI_VENDOR_ID_INTEL;
1383
    k->device_id = E1000_DEVID;
1384
    k->revision = 0x03;
1385
    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1386
    dc->desc = "Intel Gigabit Ethernet";
1387
    dc->reset = qdev_e1000_reset;
1388
    dc->vmsd = &vmstate_e1000;
1389
    dc->props = e1000_properties;
1390
}
1391

    
1392
static const TypeInfo e1000_info = {
1393
    .name          = "e1000",
1394
    .parent        = TYPE_PCI_DEVICE,
1395
    .instance_size = sizeof(E1000State),
1396
    .class_init    = e1000_class_init,
1397
};
1398

    
1399
static void e1000_register_types(void)
1400
{
1401
    type_register_static(&e1000_info);
1402
}
1403

    
1404
type_init(e1000_register_types)