Statistics
| Branch: | Revision:

root / hw / virtio-blk.c @ 53c25cea

History | View | Annotate | Download (10.1 kB)

1
/*
2
 * Virtio Block Device
3
 *
4
 * Copyright IBM, Corp. 2007
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include <qemu-common.h>
15
#include <sysemu.h>
16
#include "virtio-blk.h"
17
#include "block_int.h"
18
#ifdef __linux__
19
# include <scsi/sg.h>
20
#endif
21

    
22
typedef struct VirtIOBlock
23
{
24
    VirtIODevice vdev;
25
    BlockDriverState *bs;
26
    VirtQueue *vq;
27
    void *rq;
28
} VirtIOBlock;
29

    
30
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
31
{
32
    return (VirtIOBlock *)vdev;
33
}
34

    
35
typedef struct VirtIOBlockReq
36
{
37
    VirtIOBlock *dev;
38
    VirtQueueElement elem;
39
    struct virtio_blk_inhdr *in;
40
    struct virtio_blk_outhdr *out;
41
    struct virtio_scsi_inhdr *scsi;
42
    QEMUIOVector qiov;
43
    struct VirtIOBlockReq *next;
44
} VirtIOBlockReq;
45

    
46
static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
47
{
48
    VirtIOBlock *s = req->dev;
49

    
50
    req->in->status = status;
51
    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
52
    virtio_notify(&s->vdev, s->vq);
53

    
54
    qemu_free(req);
55
}
56

    
57
static int virtio_blk_handle_write_error(VirtIOBlockReq *req, int error)
58
{
59
    BlockInterfaceErrorAction action = drive_get_onerror(req->dev->bs);
60
    VirtIOBlock *s = req->dev;
61

    
62
    if (action == BLOCK_ERR_IGNORE)
63
        return 0;
64

    
65
    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
66
            || action == BLOCK_ERR_STOP_ANY) {
67
        req->next = s->rq;
68
        s->rq = req;
69
        vm_stop(0);
70
    } else {
71
        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
72
    }
73

    
74
    return 1;
75
}
76

    
77
static void virtio_blk_rw_complete(void *opaque, int ret)
78
{
79
    VirtIOBlockReq *req = opaque;
80

    
81
    if (ret && (req->out->type & VIRTIO_BLK_T_OUT)) {
82
        if (virtio_blk_handle_write_error(req, -ret))
83
            return;
84
    }
85

    
86
    virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
87
}
88

    
89
static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
90
{
91
    VirtIOBlockReq *req = qemu_mallocz(sizeof(*req));
92
    req->dev = s;
93
    return req;
94
}
95

    
96
static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
97
{
98
    VirtIOBlockReq *req = virtio_blk_alloc_request(s);
99

    
100
    if (req != NULL) {
101
        if (!virtqueue_pop(s->vq, &req->elem)) {
102
            qemu_free(req);
103
            return NULL;
104
        }
105
    }
106

    
107
    return req;
108
}
109

    
110
#ifdef __linux__
111
static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
112
{
113
    struct sg_io_hdr hdr;
114
    int ret, size = 0;
115
    int status;
116
    int i;
117

    
118
    /*
119
     * We require at least one output segment each for the virtio_blk_outhdr
120
     * and the SCSI command block.
121
     *
122
     * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
123
     * and the sense buffer pointer in the input segments.
124
     */
125
    if (req->elem.out_num < 2 || req->elem.in_num < 3) {
126
        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
127
        return;
128
    }
129

    
130
    /*
131
     * No support for bidirection commands yet.
132
     */
133
    if (req->elem.out_num > 2 && req->elem.in_num > 3) {
134
        virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
135
        return;
136
    }
137

    
138
    /*
139
     * The scsi inhdr is placed in the second-to-last input segment, just
140
     * before the regular inhdr.
141
     */
142
    req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
143
    size = sizeof(*req->in) + sizeof(*req->scsi);
144

    
145
    memset(&hdr, 0, sizeof(struct sg_io_hdr));
146
    hdr.interface_id = 'S';
147
    hdr.cmd_len = req->elem.out_sg[1].iov_len;
148
    hdr.cmdp = req->elem.out_sg[1].iov_base;
149
    hdr.dxfer_len = 0;
150

    
151
    if (req->elem.out_num > 2) {
152
        /*
153
         * If there are more than the minimally required 2 output segments
154
         * there is write payload starting from the third iovec.
155
         */
156
        hdr.dxfer_direction = SG_DXFER_TO_DEV;
157
        hdr.iovec_count = req->elem.out_num - 2;
158

    
159
        for (i = 0; i < hdr.iovec_count; i++)
160
            hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
161

    
162
        hdr.dxferp = req->elem.out_sg + 2;
163

    
164
    } else if (req->elem.in_num > 3) {
165
        /*
166
         * If we have more than 3 input segments the guest wants to actually
167
         * read data.
168
         */
169
        hdr.dxfer_direction = SG_DXFER_FROM_DEV;
170
        hdr.iovec_count = req->elem.in_num - 3;
171
        for (i = 0; i < hdr.iovec_count; i++)
172
            hdr.dxfer_len += req->elem.in_sg[i].iov_len;
173

    
174
        hdr.dxferp = req->elem.in_sg;
175
        size += hdr.dxfer_len;
176
    } else {
177
        /*
178
         * Some SCSI commands don't actually transfer any data.
179
         */
180
        hdr.dxfer_direction = SG_DXFER_NONE;
181
    }
182

    
183
    hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
184
    hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
185
    size += hdr.mx_sb_len;
186

    
187
    ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
188
    if (ret) {
189
        status = VIRTIO_BLK_S_UNSUPP;
190
        hdr.status = ret;
191
        hdr.resid = hdr.dxfer_len;
192
    } else if (hdr.status) {
193
        status = VIRTIO_BLK_S_IOERR;
194
    } else {
195
        status = VIRTIO_BLK_S_OK;
196
    }
197

    
198
    req->scsi->errors = hdr.status;
199
    req->scsi->residual = hdr.resid;
200
    req->scsi->sense_len = hdr.sb_len_wr;
201
    req->scsi->data_len = hdr.dxfer_len;
202

    
203
    virtio_blk_req_complete(req, status);
204
}
205
#else
206
static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
207
{
208
    virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
209
}
210
#endif /* __linux__ */
211

    
212
static void virtio_blk_handle_write(VirtIOBlockReq *req)
213
{
214
    bdrv_aio_writev(req->dev->bs, req->out->sector, &req->qiov,
215
                    req->qiov.size / 512, virtio_blk_rw_complete, req);
216
}
217

    
218
static void virtio_blk_handle_read(VirtIOBlockReq *req)
219
{
220
    bdrv_aio_readv(req->dev->bs, req->out->sector, &req->qiov,
221
                   req->qiov.size / 512, virtio_blk_rw_complete, req);
222
}
223

    
224
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
225
{
226
    VirtIOBlock *s = to_virtio_blk(vdev);
227
    VirtIOBlockReq *req;
228

    
229
    while ((req = virtio_blk_get_request(s))) {
230
        if (req->elem.out_num < 1 || req->elem.in_num < 1) {
231
            fprintf(stderr, "virtio-blk missing headers\n");
232
            exit(1);
233
        }
234

    
235
        if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
236
            req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
237
            fprintf(stderr, "virtio-blk header not in correct element\n");
238
            exit(1);
239
        }
240

    
241
        req->out = (void *)req->elem.out_sg[0].iov_base;
242
        req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
243

    
244
        if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
245
            virtio_blk_handle_scsi(req);
246
        } else if (req->out->type & VIRTIO_BLK_T_OUT) {
247
            qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
248
                                     req->elem.out_num - 1);
249
            virtio_blk_handle_write(req);
250
        } else {
251
            qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
252
                                     req->elem.in_num - 1);
253
            virtio_blk_handle_read(req);
254
        }
255
    }
256
    /*
257
     * FIXME: Want to check for completions before returning to guest mode,
258
     * so cached reads and writes are reported as quickly as possible. But
259
     * that should be done in the generic block layer.
260
     */
261
}
262

    
263
static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason)
264
{
265
    VirtIOBlock *s = opaque;
266
    VirtIOBlockReq *req = s->rq;
267

    
268
    if (!running)
269
        return;
270

    
271
    s->rq = NULL;
272

    
273
    while (req) {
274
        virtio_blk_handle_write(req);
275
        req = req->next;
276
    }
277
}
278

    
279
static void virtio_blk_reset(VirtIODevice *vdev)
280
{
281
    /*
282
     * This should cancel pending requests, but can't do nicely until there
283
     * are per-device request lists.
284
     */
285
    qemu_aio_flush();
286
}
287

    
288
static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
289
{
290
    VirtIOBlock *s = to_virtio_blk(vdev);
291
    struct virtio_blk_config blkcfg;
292
    uint64_t capacity;
293
    int cylinders, heads, secs;
294

    
295
    bdrv_get_geometry(s->bs, &capacity);
296
    bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs);
297
    stq_raw(&blkcfg.capacity, capacity);
298
    stl_raw(&blkcfg.seg_max, 128 - 2);
299
    stw_raw(&blkcfg.cylinders, cylinders);
300
    blkcfg.heads = heads;
301
    blkcfg.sectors = secs;
302
    memcpy(config, &blkcfg, sizeof(blkcfg));
303
}
304

    
305
static uint32_t virtio_blk_get_features(VirtIODevice *vdev)
306
{
307
    uint32_t features = 0;
308

    
309
    features |= (1 << VIRTIO_BLK_F_SEG_MAX);
310
    features |= (1 << VIRTIO_BLK_F_GEOMETRY);
311
#ifdef __linux__
312
    features |= (1 << VIRTIO_BLK_F_SCSI);
313
#endif
314

    
315
    return features;
316
}
317

    
318
static void virtio_blk_save(QEMUFile *f, void *opaque)
319
{
320
    VirtIOBlock *s = opaque;
321
    VirtIOBlockReq *req = s->rq;
322

    
323
    virtio_save(&s->vdev, f);
324
    
325
    while (req) {
326
        qemu_put_sbyte(f, 1);
327
        qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
328
        req = req->next;
329
    }
330
    qemu_put_sbyte(f, 0);
331
}
332

    
333
static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
334
{
335
    VirtIOBlock *s = opaque;
336

    
337
    if (version_id != 2)
338
        return -EINVAL;
339

    
340
    virtio_load(&s->vdev, f);
341
    while (qemu_get_sbyte(f)) {
342
        VirtIOBlockReq *req = virtio_blk_alloc_request(s);
343
        qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
344
        req->next = s->rq;
345
        s->rq = req->next;
346
    }
347

    
348
    return 0;
349
}
350

    
351
VirtIODevice *virtio_blk_init(DeviceState *dev)
352
{
353
    VirtIOBlock *s;
354
    int cylinders, heads, secs;
355
    static int virtio_blk_id;
356
    BlockDriverState *bs;
357

    
358
    s = (VirtIOBlock *)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK,
359
                                          sizeof(struct virtio_blk_config),
360
                                          sizeof(VirtIOBlock));
361

    
362
    bs = qdev_init_bdrv(dev, IF_VIRTIO);
363
    s->vdev.get_config = virtio_blk_update_config;
364
    s->vdev.get_features = virtio_blk_get_features;
365
    s->vdev.reset = virtio_blk_reset;
366
    s->bs = bs;
367
    s->rq = NULL;
368
    bs->private = dev;
369
    bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
370
    bdrv_set_geometry_hint(s->bs, cylinders, heads, secs);
371

    
372
    s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
373

    
374
    qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
375
    register_savevm("virtio-blk", virtio_blk_id++, 2,
376
                    virtio_blk_save, virtio_blk_load, s);
377

    
378
    return &s->vdev;
379
}