Statistics
| Branch: | Revision:

root / hw / xen_disk.c @ bf3bc4c4

History | View | Annotate | Download (24.2 kB)

1
/*
2
 *  xen paravirt block device backend
3
 *
4
 *  (c) Gerd Hoffmann <kraxel@redhat.com>
5
 *
6
 *  This program is free software; you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation; under version 2 of the License.
9
 *
10
 *  This program is distributed in the hope that it will be useful,
11
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 *  GNU General Public License for more details.
14
 *
15
 *  You should have received a copy of the GNU General Public License along
16
 *  with this program; if not, see <http://www.gnu.org/licenses/>.
17
 *
18
 *  Contributions after 2012-01-13 are licensed under the terms of the
19
 *  GNU GPL, version 2 or (at your option) any later version.
20
 */
21

    
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <stdarg.h>
25
#include <string.h>
26
#include <unistd.h>
27
#include <signal.h>
28
#include <inttypes.h>
29
#include <time.h>
30
#include <fcntl.h>
31
#include <errno.h>
32
#include <sys/ioctl.h>
33
#include <sys/types.h>
34
#include <sys/stat.h>
35
#include <sys/mman.h>
36
#include <sys/uio.h>
37

    
38
#include "hw.h"
39
#include "qemu-char.h"
40
#include "xen_backend.h"
41
#include "xen_blkif.h"
42
#include "blockdev.h"
43

    
44
/* ------------------------------------------------------------- */
45

    
46
static int batch_maps   = 0;
47

    
48
static int max_requests = 32;
49

    
50
/* ------------------------------------------------------------- */
51

    
52
#define BLOCK_SIZE  512
53
#define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
54

    
55
struct ioreq {
56
    blkif_request_t     req;
57
    int16_t             status;
58

    
59
    /* parsed request */
60
    off_t               start;
61
    QEMUIOVector        v;
62
    int                 presync;
63
    int                 postsync;
64
    uint8_t             mapped;
65

    
66
    /* grant mapping */
67
    uint32_t            domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
68
    uint32_t            refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
69
    int                 prot;
70
    void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
71
    void                *pages;
72

    
73
    /* aio status */
74
    int                 aio_inflight;
75
    int                 aio_errors;
76

    
77
    struct XenBlkDev    *blkdev;
78
    QLIST_ENTRY(ioreq)   list;
79
    BlockAcctCookie     acct;
80
};
81

    
82
struct XenBlkDev {
83
    struct XenDevice    xendev;  /* must be first */
84
    char                *params;
85
    char                *mode;
86
    char                *type;
87
    char                *dev;
88
    char                *devtype;
89
    const char          *fileproto;
90
    const char          *filename;
91
    int                 ring_ref;
92
    void                *sring;
93
    int64_t             file_blk;
94
    int64_t             file_size;
95
    int                 protocol;
96
    blkif_back_rings_t  rings;
97
    int                 more_work;
98
    int                 cnt_map;
99

    
100
    /* request lists */
101
    QLIST_HEAD(inflight_head, ioreq) inflight;
102
    QLIST_HEAD(finished_head, ioreq) finished;
103
    QLIST_HEAD(freelist_head, ioreq) freelist;
104
    int                 requests_total;
105
    int                 requests_inflight;
106
    int                 requests_finished;
107

    
108
    /* qemu block driver */
109
    DriveInfo           *dinfo;
110
    BlockDriverState    *bs;
111
    QEMUBH              *bh;
112
};
113

    
114
/* ------------------------------------------------------------- */
115

    
116
static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
117
{
118
    struct ioreq *ioreq = NULL;
119

    
120
    if (QLIST_EMPTY(&blkdev->freelist)) {
121
        if (blkdev->requests_total >= max_requests) {
122
            goto out;
123
        }
124
        /* allocate new struct */
125
        ioreq = g_malloc0(sizeof(*ioreq));
126
        ioreq->blkdev = blkdev;
127
        blkdev->requests_total++;
128
        qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST);
129
    } else {
130
        /* get one from freelist */
131
        ioreq = QLIST_FIRST(&blkdev->freelist);
132
        QLIST_REMOVE(ioreq, list);
133
        qemu_iovec_reset(&ioreq->v);
134
    }
135
    QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
136
    blkdev->requests_inflight++;
137

    
138
out:
139
    return ioreq;
140
}
141

    
142
static void ioreq_finish(struct ioreq *ioreq)
143
{
144
    struct XenBlkDev *blkdev = ioreq->blkdev;
145

    
146
    QLIST_REMOVE(ioreq, list);
147
    QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list);
148
    blkdev->requests_inflight--;
149
    blkdev->requests_finished++;
150
}
151

    
152
static void ioreq_release(struct ioreq *ioreq, bool finish)
153
{
154
    struct XenBlkDev *blkdev = ioreq->blkdev;
155

    
156
    QLIST_REMOVE(ioreq, list);
157
    memset(ioreq, 0, sizeof(*ioreq));
158
    ioreq->blkdev = blkdev;
159
    QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
160
    if (finish) {
161
        blkdev->requests_finished--;
162
    } else {
163
        blkdev->requests_inflight--;
164
    }
165
}
166

    
167
/*
168
 * translate request into iovec + start offset
169
 * do sanity checks along the way
170
 */
171
static int ioreq_parse(struct ioreq *ioreq)
172
{
173
    struct XenBlkDev *blkdev = ioreq->blkdev;
174
    uintptr_t mem;
175
    size_t len;
176
    int i;
177

    
178
    xen_be_printf(&blkdev->xendev, 3,
179
                  "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
180
                  ioreq->req.operation, ioreq->req.nr_segments,
181
                  ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
182
    switch (ioreq->req.operation) {
183
    case BLKIF_OP_READ:
184
        ioreq->prot = PROT_WRITE; /* to memory */
185
        break;
186
    case BLKIF_OP_WRITE_BARRIER:
187
        if (!ioreq->req.nr_segments) {
188
            ioreq->presync = 1;
189
            return 0;
190
        }
191
        ioreq->presync = ioreq->postsync = 1;
192
        /* fall through */
193
    case BLKIF_OP_WRITE:
194
        ioreq->prot = PROT_READ; /* from memory */
195
        break;
196
    default:
197
        xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
198
                      ioreq->req.operation);
199
        goto err;
200
    };
201

    
202
    if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
203
        xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
204
        goto err;
205
    }
206

    
207
    ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
208
    for (i = 0; i < ioreq->req.nr_segments; i++) {
209
        if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
210
            xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
211
            goto err;
212
        }
213
        if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
214
            xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
215
            goto err;
216
        }
217
        if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
218
            xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
219
            goto err;
220
        }
221

    
222
        ioreq->domids[i] = blkdev->xendev.dom;
223
        ioreq->refs[i]   = ioreq->req.seg[i].gref;
224

    
225
        mem = ioreq->req.seg[i].first_sect * blkdev->file_blk;
226
        len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
227
        qemu_iovec_add(&ioreq->v, (void*)mem, len);
228
    }
229
    if (ioreq->start + ioreq->v.size > blkdev->file_size) {
230
        xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
231
        goto err;
232
    }
233
    return 0;
234

    
235
err:
236
    ioreq->status = BLKIF_RSP_ERROR;
237
    return -1;
238
}
239

    
240
static void ioreq_unmap(struct ioreq *ioreq)
241
{
242
    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
243
    int i;
244

    
245
    if (ioreq->v.niov == 0 || ioreq->mapped == 0) {
246
        return;
247
    }
248
    if (batch_maps) {
249
        if (!ioreq->pages) {
250
            return;
251
        }
252
        if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) {
253
            xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
254
                          strerror(errno));
255
        }
256
        ioreq->blkdev->cnt_map -= ioreq->v.niov;
257
        ioreq->pages = NULL;
258
    } else {
259
        for (i = 0; i < ioreq->v.niov; i++) {
260
            if (!ioreq->page[i]) {
261
                continue;
262
            }
263
            if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0) {
264
                xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
265
                              strerror(errno));
266
            }
267
            ioreq->blkdev->cnt_map--;
268
            ioreq->page[i] = NULL;
269
        }
270
    }
271
    ioreq->mapped = 0;
272
}
273

    
274
static int ioreq_map(struct ioreq *ioreq)
275
{
276
    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
277
    int i;
278

    
279
    if (ioreq->v.niov == 0 || ioreq->mapped == 1) {
280
        return 0;
281
    }
282
    if (batch_maps) {
283
        ioreq->pages = xc_gnttab_map_grant_refs
284
            (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
285
        if (ioreq->pages == NULL) {
286
            xen_be_printf(&ioreq->blkdev->xendev, 0,
287
                          "can't map %d grant refs (%s, %d maps)\n",
288
                          ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map);
289
            return -1;
290
        }
291
        for (i = 0; i < ioreq->v.niov; i++) {
292
            ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
293
                (uintptr_t)ioreq->v.iov[i].iov_base;
294
        }
295
        ioreq->blkdev->cnt_map += ioreq->v.niov;
296
    } else  {
297
        for (i = 0; i < ioreq->v.niov; i++) {
298
            ioreq->page[i] = xc_gnttab_map_grant_ref
299
                (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot);
300
            if (ioreq->page[i] == NULL) {
301
                xen_be_printf(&ioreq->blkdev->xendev, 0,
302
                              "can't map grant ref %d (%s, %d maps)\n",
303
                              ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map);
304
                ioreq_unmap(ioreq);
305
                return -1;
306
            }
307
            ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base;
308
            ioreq->blkdev->cnt_map++;
309
        }
310
    }
311
    ioreq->mapped = 1;
312
    return 0;
313
}
314

    
315
static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
316

    
317
static void qemu_aio_complete(void *opaque, int ret)
318
{
319
    struct ioreq *ioreq = opaque;
320

    
321
    if (ret != 0) {
322
        xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
323
                      ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
324
        ioreq->aio_errors++;
325
    }
326

    
327
    ioreq->aio_inflight--;
328
    if (ioreq->presync) {
329
        ioreq->presync = 0;
330
        ioreq_runio_qemu_aio(ioreq);
331
        return;
332
    }
333
    if (ioreq->aio_inflight > 0) {
334
        return;
335
    }
336
    if (ioreq->postsync) {
337
        ioreq->postsync = 0;
338
        ioreq->aio_inflight++;
339
        bdrv_aio_flush(ioreq->blkdev->bs, qemu_aio_complete, ioreq);
340
        return;
341
    }
342

    
343
    ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
344
    ioreq_unmap(ioreq);
345
    ioreq_finish(ioreq);
346
    bdrv_acct_done(ioreq->blkdev->bs, &ioreq->acct);
347
    qemu_bh_schedule(ioreq->blkdev->bh);
348
}
349

    
350
static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
351
{
352
    struct XenBlkDev *blkdev = ioreq->blkdev;
353

    
354
    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
355
        goto err_no_map;
356
    }
357

    
358
    ioreq->aio_inflight++;
359
    if (ioreq->presync) {
360
        bdrv_aio_flush(ioreq->blkdev->bs, qemu_aio_complete, ioreq);
361
        return 0;
362
    }
363

    
364
    switch (ioreq->req.operation) {
365
    case BLKIF_OP_READ:
366
        bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_READ);
367
        ioreq->aio_inflight++;
368
        bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
369
                       &ioreq->v, ioreq->v.size / BLOCK_SIZE,
370
                       qemu_aio_complete, ioreq);
371
        break;
372
    case BLKIF_OP_WRITE:
373
    case BLKIF_OP_WRITE_BARRIER:
374
        if (!ioreq->req.nr_segments) {
375
            break;
376
        }
377

    
378
        bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_WRITE);
379
        ioreq->aio_inflight++;
380
        bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
381
                        &ioreq->v, ioreq->v.size / BLOCK_SIZE,
382
                        qemu_aio_complete, ioreq);
383
        break;
384
    default:
385
        /* unknown operation (shouldn't happen -- parse catches this) */
386
        goto err;
387
    }
388

    
389
    qemu_aio_complete(ioreq, 0);
390

    
391
    return 0;
392

    
393
err:
394
    ioreq_unmap(ioreq);
395
err_no_map:
396
    ioreq_finish(ioreq);
397
    ioreq->status = BLKIF_RSP_ERROR;
398
    return -1;
399
}
400

    
401
static int blk_send_response_one(struct ioreq *ioreq)
402
{
403
    struct XenBlkDev  *blkdev = ioreq->blkdev;
404
    int               send_notify   = 0;
405
    int               have_requests = 0;
406
    blkif_response_t  resp;
407
    void              *dst;
408

    
409
    resp.id        = ioreq->req.id;
410
    resp.operation = ioreq->req.operation;
411
    resp.status    = ioreq->status;
412

    
413
    /* Place on the response ring for the relevant domain. */
414
    switch (blkdev->protocol) {
415
    case BLKIF_PROTOCOL_NATIVE:
416
        dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt);
417
        break;
418
    case BLKIF_PROTOCOL_X86_32:
419
        dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
420
                                blkdev->rings.x86_32_part.rsp_prod_pvt);
421
        break;
422
    case BLKIF_PROTOCOL_X86_64:
423
        dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
424
                                blkdev->rings.x86_64_part.rsp_prod_pvt);
425
        break;
426
    default:
427
        dst = NULL;
428
    }
429
    memcpy(dst, &resp, sizeof(resp));
430
    blkdev->rings.common.rsp_prod_pvt++;
431

    
432
    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify);
433
    if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) {
434
        /*
435
         * Tail check for pending requests. Allows frontend to avoid
436
         * notifications if requests are already in flight (lower
437
         * overheads and promotes batching).
438
         */
439
        RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests);
440
    } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) {
441
        have_requests = 1;
442
    }
443

    
444
    if (have_requests) {
445
        blkdev->more_work++;
446
    }
447
    return send_notify;
448
}
449

    
450
/* walk finished list, send outstanding responses, free requests */
451
static void blk_send_response_all(struct XenBlkDev *blkdev)
452
{
453
    struct ioreq *ioreq;
454
    int send_notify = 0;
455

    
456
    while (!QLIST_EMPTY(&blkdev->finished)) {
457
        ioreq = QLIST_FIRST(&blkdev->finished);
458
        send_notify += blk_send_response_one(ioreq);
459
        ioreq_release(ioreq, true);
460
    }
461
    if (send_notify) {
462
        xen_be_send_notify(&blkdev->xendev);
463
    }
464
}
465

    
466
static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc)
467
{
468
    switch (blkdev->protocol) {
469
    case BLKIF_PROTOCOL_NATIVE:
470
        memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc),
471
               sizeof(ioreq->req));
472
        break;
473
    case BLKIF_PROTOCOL_X86_32:
474
        blkif_get_x86_32_req(&ioreq->req,
475
                             RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc));
476
        break;
477
    case BLKIF_PROTOCOL_X86_64:
478
        blkif_get_x86_64_req(&ioreq->req,
479
                             RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc));
480
        break;
481
    }
482
    return 0;
483
}
484

    
485
static void blk_handle_requests(struct XenBlkDev *blkdev)
486
{
487
    RING_IDX rc, rp;
488
    struct ioreq *ioreq;
489

    
490
    blkdev->more_work = 0;
491

    
492
    rc = blkdev->rings.common.req_cons;
493
    rp = blkdev->rings.common.sring->req_prod;
494
    xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
495

    
496
    blk_send_response_all(blkdev);
497
    while (rc != rp) {
498
        /* pull request from ring */
499
        if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
500
            break;
501
        }
502
        ioreq = ioreq_start(blkdev);
503
        if (ioreq == NULL) {
504
            blkdev->more_work++;
505
            break;
506
        }
507
        blk_get_request(blkdev, ioreq, rc);
508
        blkdev->rings.common.req_cons = ++rc;
509

    
510
        /* parse them */
511
        if (ioreq_parse(ioreq) != 0) {
512
            if (blk_send_response_one(ioreq)) {
513
                xen_be_send_notify(&blkdev->xendev);
514
            }
515
            ioreq_release(ioreq, false);
516
            continue;
517
        }
518

    
519
        ioreq_runio_qemu_aio(ioreq);
520
    }
521

    
522
    if (blkdev->more_work && blkdev->requests_inflight < max_requests) {
523
        qemu_bh_schedule(blkdev->bh);
524
    }
525
}
526

    
527
/* ------------------------------------------------------------- */
528

    
529
static void blk_bh(void *opaque)
530
{
531
    struct XenBlkDev *blkdev = opaque;
532
    blk_handle_requests(blkdev);
533
}
534

    
535
/*
536
 * We need to account for the grant allocations requiring contiguous
537
 * chunks; the worst case number would be
538
 *     max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1,
539
 * but in order to keep things simple just use
540
 *     2 * max_req * max_seg.
541
 */
542
#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg))
543

    
544
static void blk_alloc(struct XenDevice *xendev)
545
{
546
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
547

    
548
    QLIST_INIT(&blkdev->inflight);
549
    QLIST_INIT(&blkdev->finished);
550
    QLIST_INIT(&blkdev->freelist);
551
    blkdev->bh = qemu_bh_new(blk_bh, blkdev);
552
    if (xen_mode != XEN_EMULATE) {
553
        batch_maps = 1;
554
    }
555
    if (xc_gnttab_set_max_grants(xendev->gnttabdev,
556
            MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
557
        xen_be_printf(xendev, 0, "xc_gnttab_set_max_grants failed: %s\n",
558
                      strerror(errno));
559
    }
560
}
561

    
562
static int blk_init(struct XenDevice *xendev)
563
{
564
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
565
    int index, qflags, info = 0;
566

    
567
    /* read xenstore entries */
568
    if (blkdev->params == NULL) {
569
        char *h = NULL;
570
        blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params");
571
        if (blkdev->params != NULL) {
572
            h = strchr(blkdev->params, ':');
573
        }
574
        if (h != NULL) {
575
            blkdev->fileproto = blkdev->params;
576
            blkdev->filename  = h+1;
577
            *h = 0;
578
        } else {
579
            blkdev->fileproto = "<unset>";
580
            blkdev->filename  = blkdev->params;
581
        }
582
    }
583
    if (!strcmp("aio", blkdev->fileproto)) {
584
        blkdev->fileproto = "raw";
585
    }
586
    if (blkdev->mode == NULL) {
587
        blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode");
588
    }
589
    if (blkdev->type == NULL) {
590
        blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type");
591
    }
592
    if (blkdev->dev == NULL) {
593
        blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev");
594
    }
595
    if (blkdev->devtype == NULL) {
596
        blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type");
597
    }
598

    
599
    /* do we have all we need? */
600
    if (blkdev->params == NULL ||
601
        blkdev->mode == NULL   ||
602
        blkdev->type == NULL   ||
603
        blkdev->dev == NULL) {
604
        goto out_error;
605
    }
606

    
607
    /* read-only ? */
608
    qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
609
    if (strcmp(blkdev->mode, "w") == 0) {
610
        qflags |= BDRV_O_RDWR;
611
    } else {
612
        info  |= VDISK_READONLY;
613
    }
614

    
615
    /* cdrom ? */
616
    if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) {
617
        info  |= VDISK_CDROM;
618
    }
619

    
620
    /* init qemu block driver */
621
    index = (blkdev->xendev.dev - 202 * 256) / 16;
622
    blkdev->dinfo = drive_get(IF_XEN, 0, index);
623
    if (!blkdev->dinfo) {
624
        /* setup via xenbus -> create new block driver instance */
625
        xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
626
        blkdev->bs = bdrv_new(blkdev->dev);
627
        if (blkdev->bs) {
628
            if (bdrv_open(blkdev->bs, blkdev->filename, qflags,
629
                        bdrv_find_whitelisted_format(blkdev->fileproto)) != 0) {
630
                bdrv_delete(blkdev->bs);
631
                blkdev->bs = NULL;
632
            }
633
        }
634
        if (!blkdev->bs) {
635
            goto out_error;
636
        }
637
    } else {
638
        /* setup via qemu cmdline -> already setup for us */
639
        xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
640
        blkdev->bs = blkdev->dinfo->bdrv;
641
    }
642
    bdrv_attach_dev_nofail(blkdev->bs, blkdev);
643
    blkdev->file_blk  = BLOCK_SIZE;
644
    blkdev->file_size = bdrv_getlength(blkdev->bs);
645
    if (blkdev->file_size < 0) {
646
        xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n",
647
                      (int)blkdev->file_size, strerror(-blkdev->file_size),
648
                      bdrv_get_format_name(blkdev->bs) ?: "-");
649
        blkdev->file_size = 0;
650
    }
651

    
652
    xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
653
                  " size %" PRId64 " (%" PRId64 " MB)\n",
654
                  blkdev->type, blkdev->fileproto, blkdev->filename,
655
                  blkdev->file_size, blkdev->file_size >> 20);
656

    
657
    /* fill info */
658
    xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1);
659
    xenstore_write_be_int(&blkdev->xendev, "info",            info);
660
    xenstore_write_be_int(&blkdev->xendev, "sector-size",     blkdev->file_blk);
661
    xenstore_write_be_int(&blkdev->xendev, "sectors",
662
                          blkdev->file_size / blkdev->file_blk);
663
    return 0;
664

    
665
out_error:
666
    g_free(blkdev->params);
667
    blkdev->params = NULL;
668
    g_free(blkdev->mode);
669
    blkdev->mode = NULL;
670
    g_free(blkdev->type);
671
    blkdev->type = NULL;
672
    g_free(blkdev->dev);
673
    blkdev->dev = NULL;
674
    g_free(blkdev->devtype);
675
    blkdev->devtype = NULL;
676
    return -1;
677
}
678

    
679
static int blk_connect(struct XenDevice *xendev)
680
{
681
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
682

    
683
    if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) {
684
        return -1;
685
    }
686
    if (xenstore_read_fe_int(&blkdev->xendev, "event-channel",
687
                             &blkdev->xendev.remote_port) == -1) {
688
        return -1;
689
    }
690

    
691
    blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
692
    if (blkdev->xendev.protocol) {
693
        if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
694
            blkdev->protocol = BLKIF_PROTOCOL_X86_32;
695
        }
696
        if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
697
            blkdev->protocol = BLKIF_PROTOCOL_X86_64;
698
        }
699
    }
700

    
701
    blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev,
702
                                            blkdev->xendev.dom,
703
                                            blkdev->ring_ref,
704
                                            PROT_READ | PROT_WRITE);
705
    if (!blkdev->sring) {
706
        return -1;
707
    }
708
    blkdev->cnt_map++;
709

    
710
    switch (blkdev->protocol) {
711
    case BLKIF_PROTOCOL_NATIVE:
712
    {
713
        blkif_sring_t *sring_native = blkdev->sring;
714
        BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE);
715
        break;
716
    }
717
    case BLKIF_PROTOCOL_X86_32:
718
    {
719
        blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring;
720

    
721
        BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE);
722
        break;
723
    }
724
    case BLKIF_PROTOCOL_X86_64:
725
    {
726
        blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring;
727

    
728
        BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE);
729
        break;
730
    }
731
    }
732

    
733
    xen_be_bind_evtchn(&blkdev->xendev);
734

    
735
    xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
736
                  "remote port %d, local port %d\n",
737
                  blkdev->xendev.protocol, blkdev->ring_ref,
738
                  blkdev->xendev.remote_port, blkdev->xendev.local_port);
739
    return 0;
740
}
741

    
742
static void blk_disconnect(struct XenDevice *xendev)
743
{
744
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
745

    
746
    if (blkdev->bs) {
747
        if (!blkdev->dinfo) {
748
            /* close/delete only if we created it ourself */
749
            bdrv_close(blkdev->bs);
750
            bdrv_detach_dev(blkdev->bs, blkdev);
751
            bdrv_delete(blkdev->bs);
752
        }
753
        blkdev->bs = NULL;
754
    }
755
    xen_be_unbind_evtchn(&blkdev->xendev);
756

    
757
    if (blkdev->sring) {
758
        xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
759
        blkdev->cnt_map--;
760
        blkdev->sring = NULL;
761
    }
762
}
763

    
764
static int blk_free(struct XenDevice *xendev)
765
{
766
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
767
    struct ioreq *ioreq;
768

    
769
    if (blkdev->bs || blkdev->sring) {
770
        blk_disconnect(xendev);
771
    }
772

    
773
    while (!QLIST_EMPTY(&blkdev->freelist)) {
774
        ioreq = QLIST_FIRST(&blkdev->freelist);
775
        QLIST_REMOVE(ioreq, list);
776
        qemu_iovec_destroy(&ioreq->v);
777
        g_free(ioreq);
778
    }
779

    
780
    g_free(blkdev->params);
781
    g_free(blkdev->mode);
782
    g_free(blkdev->type);
783
    g_free(blkdev->dev);
784
    g_free(blkdev->devtype);
785
    qemu_bh_delete(blkdev->bh);
786
    return 0;
787
}
788

    
789
static void blk_event(struct XenDevice *xendev)
790
{
791
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
792

    
793
    qemu_bh_schedule(blkdev->bh);
794
}
795

    
796
struct XenDevOps xen_blkdev_ops = {
797
    .size       = sizeof(struct XenBlkDev),
798
    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
799
    .alloc      = blk_alloc,
800
    .init       = blk_init,
801
    .initialise    = blk_connect,
802
    .disconnect = blk_disconnect,
803
    .event      = blk_event,
804
    .free       = blk_free,
805
};