Statistics
| Branch: | Revision:

root / hw / xen_disk.c @ 9c17d615

History | View | Annotate | Download (24.2 kB)

1
/*
2
 *  xen paravirt block device backend
3
 *
4
 *  (c) Gerd Hoffmann <kraxel@redhat.com>
5
 *
6
 *  This program is free software; you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation; under version 2 of the License.
9
 *
10
 *  This program is distributed in the hope that it will be useful,
11
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 *  GNU General Public License for more details.
14
 *
15
 *  You should have received a copy of the GNU General Public License along
16
 *  with this program; if not, see <http://www.gnu.org/licenses/>.
17
 *
18
 *  Contributions after 2012-01-13 are licensed under the terms of the
19
 *  GNU GPL, version 2 or (at your option) any later version.
20
 */
21

    
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <stdarg.h>
25
#include <string.h>
26
#include <unistd.h>
27
#include <signal.h>
28
#include <inttypes.h>
29
#include <time.h>
30
#include <fcntl.h>
31
#include <errno.h>
32
#include <sys/ioctl.h>
33
#include <sys/types.h>
34
#include <sys/stat.h>
35
#include <sys/mman.h>
36
#include <sys/uio.h>
37

    
38
#include "hw.h"
39
#include "xen_backend.h"
40
#include "xen_blkif.h"
41
#include "sysemu/blockdev.h"
42

    
43
/* ------------------------------------------------------------- */
44

    
45
static int batch_maps   = 0;
46

    
47
static int max_requests = 32;
48

    
49
/* ------------------------------------------------------------- */
50

    
51
#define BLOCK_SIZE  512
52
#define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
53

    
54
struct ioreq {
55
    blkif_request_t     req;
56
    int16_t             status;
57

    
58
    /* parsed request */
59
    off_t               start;
60
    QEMUIOVector        v;
61
    int                 presync;
62
    int                 postsync;
63
    uint8_t             mapped;
64

    
65
    /* grant mapping */
66
    uint32_t            domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
67
    uint32_t            refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
68
    int                 prot;
69
    void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
70
    void                *pages;
71

    
72
    /* aio status */
73
    int                 aio_inflight;
74
    int                 aio_errors;
75

    
76
    struct XenBlkDev    *blkdev;
77
    QLIST_ENTRY(ioreq)   list;
78
    BlockAcctCookie     acct;
79
};
80

    
81
struct XenBlkDev {
82
    struct XenDevice    xendev;  /* must be first */
83
    char                *params;
84
    char                *mode;
85
    char                *type;
86
    char                *dev;
87
    char                *devtype;
88
    const char          *fileproto;
89
    const char          *filename;
90
    int                 ring_ref;
91
    void                *sring;
92
    int64_t             file_blk;
93
    int64_t             file_size;
94
    int                 protocol;
95
    blkif_back_rings_t  rings;
96
    int                 more_work;
97
    int                 cnt_map;
98

    
99
    /* request lists */
100
    QLIST_HEAD(inflight_head, ioreq) inflight;
101
    QLIST_HEAD(finished_head, ioreq) finished;
102
    QLIST_HEAD(freelist_head, ioreq) freelist;
103
    int                 requests_total;
104
    int                 requests_inflight;
105
    int                 requests_finished;
106

    
107
    /* qemu block driver */
108
    DriveInfo           *dinfo;
109
    BlockDriverState    *bs;
110
    QEMUBH              *bh;
111
};
112

    
113
/* ------------------------------------------------------------- */
114

    
115
static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
116
{
117
    struct ioreq *ioreq = NULL;
118

    
119
    if (QLIST_EMPTY(&blkdev->freelist)) {
120
        if (blkdev->requests_total >= max_requests) {
121
            goto out;
122
        }
123
        /* allocate new struct */
124
        ioreq = g_malloc0(sizeof(*ioreq));
125
        ioreq->blkdev = blkdev;
126
        blkdev->requests_total++;
127
        qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST);
128
    } else {
129
        /* get one from freelist */
130
        ioreq = QLIST_FIRST(&blkdev->freelist);
131
        QLIST_REMOVE(ioreq, list);
132
        qemu_iovec_reset(&ioreq->v);
133
    }
134
    QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
135
    blkdev->requests_inflight++;
136

    
137
out:
138
    return ioreq;
139
}
140

    
141
static void ioreq_finish(struct ioreq *ioreq)
142
{
143
    struct XenBlkDev *blkdev = ioreq->blkdev;
144

    
145
    QLIST_REMOVE(ioreq, list);
146
    QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list);
147
    blkdev->requests_inflight--;
148
    blkdev->requests_finished++;
149
}
150

    
151
static void ioreq_release(struct ioreq *ioreq, bool finish)
152
{
153
    struct XenBlkDev *blkdev = ioreq->blkdev;
154

    
155
    QLIST_REMOVE(ioreq, list);
156
    memset(ioreq, 0, sizeof(*ioreq));
157
    ioreq->blkdev = blkdev;
158
    QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
159
    if (finish) {
160
        blkdev->requests_finished--;
161
    } else {
162
        blkdev->requests_inflight--;
163
    }
164
}
165

    
166
/*
167
 * translate request into iovec + start offset
168
 * do sanity checks along the way
169
 */
170
static int ioreq_parse(struct ioreq *ioreq)
171
{
172
    struct XenBlkDev *blkdev = ioreq->blkdev;
173
    uintptr_t mem;
174
    size_t len;
175
    int i;
176

    
177
    xen_be_printf(&blkdev->xendev, 3,
178
                  "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
179
                  ioreq->req.operation, ioreq->req.nr_segments,
180
                  ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
181
    switch (ioreq->req.operation) {
182
    case BLKIF_OP_READ:
183
        ioreq->prot = PROT_WRITE; /* to memory */
184
        break;
185
    case BLKIF_OP_WRITE_BARRIER:
186
        if (!ioreq->req.nr_segments) {
187
            ioreq->presync = 1;
188
            return 0;
189
        }
190
        ioreq->presync = ioreq->postsync = 1;
191
        /* fall through */
192
    case BLKIF_OP_WRITE:
193
        ioreq->prot = PROT_READ; /* from memory */
194
        break;
195
    default:
196
        xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
197
                      ioreq->req.operation);
198
        goto err;
199
    };
200

    
201
    if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
202
        xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
203
        goto err;
204
    }
205

    
206
    ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
207
    for (i = 0; i < ioreq->req.nr_segments; i++) {
208
        if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
209
            xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
210
            goto err;
211
        }
212
        if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
213
            xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
214
            goto err;
215
        }
216
        if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
217
            xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
218
            goto err;
219
        }
220

    
221
        ioreq->domids[i] = blkdev->xendev.dom;
222
        ioreq->refs[i]   = ioreq->req.seg[i].gref;
223

    
224
        mem = ioreq->req.seg[i].first_sect * blkdev->file_blk;
225
        len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
226
        qemu_iovec_add(&ioreq->v, (void*)mem, len);
227
    }
228
    if (ioreq->start + ioreq->v.size > blkdev->file_size) {
229
        xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
230
        goto err;
231
    }
232
    return 0;
233

    
234
err:
235
    ioreq->status = BLKIF_RSP_ERROR;
236
    return -1;
237
}
238

    
239
static void ioreq_unmap(struct ioreq *ioreq)
240
{
241
    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
242
    int i;
243

    
244
    if (ioreq->v.niov == 0 || ioreq->mapped == 0) {
245
        return;
246
    }
247
    if (batch_maps) {
248
        if (!ioreq->pages) {
249
            return;
250
        }
251
        if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) {
252
            xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
253
                          strerror(errno));
254
        }
255
        ioreq->blkdev->cnt_map -= ioreq->v.niov;
256
        ioreq->pages = NULL;
257
    } else {
258
        for (i = 0; i < ioreq->v.niov; i++) {
259
            if (!ioreq->page[i]) {
260
                continue;
261
            }
262
            if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0) {
263
                xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
264
                              strerror(errno));
265
            }
266
            ioreq->blkdev->cnt_map--;
267
            ioreq->page[i] = NULL;
268
        }
269
    }
270
    ioreq->mapped = 0;
271
}
272

    
273
static int ioreq_map(struct ioreq *ioreq)
274
{
275
    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
276
    int i;
277

    
278
    if (ioreq->v.niov == 0 || ioreq->mapped == 1) {
279
        return 0;
280
    }
281
    if (batch_maps) {
282
        ioreq->pages = xc_gnttab_map_grant_refs
283
            (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
284
        if (ioreq->pages == NULL) {
285
            xen_be_printf(&ioreq->blkdev->xendev, 0,
286
                          "can't map %d grant refs (%s, %d maps)\n",
287
                          ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map);
288
            return -1;
289
        }
290
        for (i = 0; i < ioreq->v.niov; i++) {
291
            ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
292
                (uintptr_t)ioreq->v.iov[i].iov_base;
293
        }
294
        ioreq->blkdev->cnt_map += ioreq->v.niov;
295
    } else  {
296
        for (i = 0; i < ioreq->v.niov; i++) {
297
            ioreq->page[i] = xc_gnttab_map_grant_ref
298
                (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot);
299
            if (ioreq->page[i] == NULL) {
300
                xen_be_printf(&ioreq->blkdev->xendev, 0,
301
                              "can't map grant ref %d (%s, %d maps)\n",
302
                              ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map);
303
                ioreq_unmap(ioreq);
304
                return -1;
305
            }
306
            ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base;
307
            ioreq->blkdev->cnt_map++;
308
        }
309
    }
310
    ioreq->mapped = 1;
311
    return 0;
312
}
313

    
314
static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
315

    
316
static void qemu_aio_complete(void *opaque, int ret)
317
{
318
    struct ioreq *ioreq = opaque;
319

    
320
    if (ret != 0) {
321
        xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
322
                      ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
323
        ioreq->aio_errors++;
324
    }
325

    
326
    ioreq->aio_inflight--;
327
    if (ioreq->presync) {
328
        ioreq->presync = 0;
329
        ioreq_runio_qemu_aio(ioreq);
330
        return;
331
    }
332
    if (ioreq->aio_inflight > 0) {
333
        return;
334
    }
335
    if (ioreq->postsync) {
336
        ioreq->postsync = 0;
337
        ioreq->aio_inflight++;
338
        bdrv_aio_flush(ioreq->blkdev->bs, qemu_aio_complete, ioreq);
339
        return;
340
    }
341

    
342
    ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
343
    ioreq_unmap(ioreq);
344
    ioreq_finish(ioreq);
345
    bdrv_acct_done(ioreq->blkdev->bs, &ioreq->acct);
346
    qemu_bh_schedule(ioreq->blkdev->bh);
347
}
348

    
349
static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
350
{
351
    struct XenBlkDev *blkdev = ioreq->blkdev;
352

    
353
    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
354
        goto err_no_map;
355
    }
356

    
357
    ioreq->aio_inflight++;
358
    if (ioreq->presync) {
359
        bdrv_aio_flush(ioreq->blkdev->bs, qemu_aio_complete, ioreq);
360
        return 0;
361
    }
362

    
363
    switch (ioreq->req.operation) {
364
    case BLKIF_OP_READ:
365
        bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_READ);
366
        ioreq->aio_inflight++;
367
        bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
368
                       &ioreq->v, ioreq->v.size / BLOCK_SIZE,
369
                       qemu_aio_complete, ioreq);
370
        break;
371
    case BLKIF_OP_WRITE:
372
    case BLKIF_OP_WRITE_BARRIER:
373
        if (!ioreq->req.nr_segments) {
374
            break;
375
        }
376

    
377
        bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_WRITE);
378
        ioreq->aio_inflight++;
379
        bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
380
                        &ioreq->v, ioreq->v.size / BLOCK_SIZE,
381
                        qemu_aio_complete, ioreq);
382
        break;
383
    default:
384
        /* unknown operation (shouldn't happen -- parse catches this) */
385
        goto err;
386
    }
387

    
388
    qemu_aio_complete(ioreq, 0);
389

    
390
    return 0;
391

    
392
err:
393
    ioreq_unmap(ioreq);
394
err_no_map:
395
    ioreq_finish(ioreq);
396
    ioreq->status = BLKIF_RSP_ERROR;
397
    return -1;
398
}
399

    
400
static int blk_send_response_one(struct ioreq *ioreq)
401
{
402
    struct XenBlkDev  *blkdev = ioreq->blkdev;
403
    int               send_notify   = 0;
404
    int               have_requests = 0;
405
    blkif_response_t  resp;
406
    void              *dst;
407

    
408
    resp.id        = ioreq->req.id;
409
    resp.operation = ioreq->req.operation;
410
    resp.status    = ioreq->status;
411

    
412
    /* Place on the response ring for the relevant domain. */
413
    switch (blkdev->protocol) {
414
    case BLKIF_PROTOCOL_NATIVE:
415
        dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt);
416
        break;
417
    case BLKIF_PROTOCOL_X86_32:
418
        dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
419
                                blkdev->rings.x86_32_part.rsp_prod_pvt);
420
        break;
421
    case BLKIF_PROTOCOL_X86_64:
422
        dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
423
                                blkdev->rings.x86_64_part.rsp_prod_pvt);
424
        break;
425
    default:
426
        dst = NULL;
427
    }
428
    memcpy(dst, &resp, sizeof(resp));
429
    blkdev->rings.common.rsp_prod_pvt++;
430

    
431
    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify);
432
    if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) {
433
        /*
434
         * Tail check for pending requests. Allows frontend to avoid
435
         * notifications if requests are already in flight (lower
436
         * overheads and promotes batching).
437
         */
438
        RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests);
439
    } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) {
440
        have_requests = 1;
441
    }
442

    
443
    if (have_requests) {
444
        blkdev->more_work++;
445
    }
446
    return send_notify;
447
}
448

    
449
/* walk finished list, send outstanding responses, free requests */
450
static void blk_send_response_all(struct XenBlkDev *blkdev)
451
{
452
    struct ioreq *ioreq;
453
    int send_notify = 0;
454

    
455
    while (!QLIST_EMPTY(&blkdev->finished)) {
456
        ioreq = QLIST_FIRST(&blkdev->finished);
457
        send_notify += blk_send_response_one(ioreq);
458
        ioreq_release(ioreq, true);
459
    }
460
    if (send_notify) {
461
        xen_be_send_notify(&blkdev->xendev);
462
    }
463
}
464

    
465
static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc)
466
{
467
    switch (blkdev->protocol) {
468
    case BLKIF_PROTOCOL_NATIVE:
469
        memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc),
470
               sizeof(ioreq->req));
471
        break;
472
    case BLKIF_PROTOCOL_X86_32:
473
        blkif_get_x86_32_req(&ioreq->req,
474
                             RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc));
475
        break;
476
    case BLKIF_PROTOCOL_X86_64:
477
        blkif_get_x86_64_req(&ioreq->req,
478
                             RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc));
479
        break;
480
    }
481
    return 0;
482
}
483

    
484
static void blk_handle_requests(struct XenBlkDev *blkdev)
485
{
486
    RING_IDX rc, rp;
487
    struct ioreq *ioreq;
488

    
489
    blkdev->more_work = 0;
490

    
491
    rc = blkdev->rings.common.req_cons;
492
    rp = blkdev->rings.common.sring->req_prod;
493
    xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
494

    
495
    blk_send_response_all(blkdev);
496
    while (rc != rp) {
497
        /* pull request from ring */
498
        if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
499
            break;
500
        }
501
        ioreq = ioreq_start(blkdev);
502
        if (ioreq == NULL) {
503
            blkdev->more_work++;
504
            break;
505
        }
506
        blk_get_request(blkdev, ioreq, rc);
507
        blkdev->rings.common.req_cons = ++rc;
508

    
509
        /* parse them */
510
        if (ioreq_parse(ioreq) != 0) {
511
            if (blk_send_response_one(ioreq)) {
512
                xen_be_send_notify(&blkdev->xendev);
513
            }
514
            ioreq_release(ioreq, false);
515
            continue;
516
        }
517

    
518
        ioreq_runio_qemu_aio(ioreq);
519
    }
520

    
521
    if (blkdev->more_work && blkdev->requests_inflight < max_requests) {
522
        qemu_bh_schedule(blkdev->bh);
523
    }
524
}
525

    
526
/* ------------------------------------------------------------- */
527

    
528
static void blk_bh(void *opaque)
529
{
530
    struct XenBlkDev *blkdev = opaque;
531
    blk_handle_requests(blkdev);
532
}
533

    
534
/*
535
 * We need to account for the grant allocations requiring contiguous
536
 * chunks; the worst case number would be
537
 *     max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1,
538
 * but in order to keep things simple just use
539
 *     2 * max_req * max_seg.
540
 */
541
#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg))
542

    
543
static void blk_alloc(struct XenDevice *xendev)
544
{
545
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
546

    
547
    QLIST_INIT(&blkdev->inflight);
548
    QLIST_INIT(&blkdev->finished);
549
    QLIST_INIT(&blkdev->freelist);
550
    blkdev->bh = qemu_bh_new(blk_bh, blkdev);
551
    if (xen_mode != XEN_EMULATE) {
552
        batch_maps = 1;
553
    }
554
    if (xc_gnttab_set_max_grants(xendev->gnttabdev,
555
            MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
556
        xen_be_printf(xendev, 0, "xc_gnttab_set_max_grants failed: %s\n",
557
                      strerror(errno));
558
    }
559
}
560

    
561
static int blk_init(struct XenDevice *xendev)
562
{
563
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
564
    int index, qflags, info = 0;
565

    
566
    /* read xenstore entries */
567
    if (blkdev->params == NULL) {
568
        char *h = NULL;
569
        blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params");
570
        if (blkdev->params != NULL) {
571
            h = strchr(blkdev->params, ':');
572
        }
573
        if (h != NULL) {
574
            blkdev->fileproto = blkdev->params;
575
            blkdev->filename  = h+1;
576
            *h = 0;
577
        } else {
578
            blkdev->fileproto = "<unset>";
579
            blkdev->filename  = blkdev->params;
580
        }
581
    }
582
    if (!strcmp("aio", blkdev->fileproto)) {
583
        blkdev->fileproto = "raw";
584
    }
585
    if (blkdev->mode == NULL) {
586
        blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode");
587
    }
588
    if (blkdev->type == NULL) {
589
        blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type");
590
    }
591
    if (blkdev->dev == NULL) {
592
        blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev");
593
    }
594
    if (blkdev->devtype == NULL) {
595
        blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type");
596
    }
597

    
598
    /* do we have all we need? */
599
    if (blkdev->params == NULL ||
600
        blkdev->mode == NULL   ||
601
        blkdev->type == NULL   ||
602
        blkdev->dev == NULL) {
603
        goto out_error;
604
    }
605

    
606
    /* read-only ? */
607
    qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
608
    if (strcmp(blkdev->mode, "w") == 0) {
609
        qflags |= BDRV_O_RDWR;
610
    } else {
611
        info  |= VDISK_READONLY;
612
    }
613

    
614
    /* cdrom ? */
615
    if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) {
616
        info  |= VDISK_CDROM;
617
    }
618

    
619
    /* init qemu block driver */
620
    index = (blkdev->xendev.dev - 202 * 256) / 16;
621
    blkdev->dinfo = drive_get(IF_XEN, 0, index);
622
    if (!blkdev->dinfo) {
623
        /* setup via xenbus -> create new block driver instance */
624
        xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
625
        blkdev->bs = bdrv_new(blkdev->dev);
626
        if (blkdev->bs) {
627
            if (bdrv_open(blkdev->bs, blkdev->filename, qflags,
628
                        bdrv_find_whitelisted_format(blkdev->fileproto)) != 0) {
629
                bdrv_delete(blkdev->bs);
630
                blkdev->bs = NULL;
631
            }
632
        }
633
        if (!blkdev->bs) {
634
            goto out_error;
635
        }
636
    } else {
637
        /* setup via qemu cmdline -> already setup for us */
638
        xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
639
        blkdev->bs = blkdev->dinfo->bdrv;
640
    }
641
    bdrv_attach_dev_nofail(blkdev->bs, blkdev);
642
    blkdev->file_blk  = BLOCK_SIZE;
643
    blkdev->file_size = bdrv_getlength(blkdev->bs);
644
    if (blkdev->file_size < 0) {
645
        xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n",
646
                      (int)blkdev->file_size, strerror(-blkdev->file_size),
647
                      bdrv_get_format_name(blkdev->bs) ?: "-");
648
        blkdev->file_size = 0;
649
    }
650

    
651
    xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
652
                  " size %" PRId64 " (%" PRId64 " MB)\n",
653
                  blkdev->type, blkdev->fileproto, blkdev->filename,
654
                  blkdev->file_size, blkdev->file_size >> 20);
655

    
656
    /* fill info */
657
    xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1);
658
    xenstore_write_be_int(&blkdev->xendev, "info",            info);
659
    xenstore_write_be_int(&blkdev->xendev, "sector-size",     blkdev->file_blk);
660
    xenstore_write_be_int(&blkdev->xendev, "sectors",
661
                          blkdev->file_size / blkdev->file_blk);
662
    return 0;
663

    
664
out_error:
665
    g_free(blkdev->params);
666
    blkdev->params = NULL;
667
    g_free(blkdev->mode);
668
    blkdev->mode = NULL;
669
    g_free(blkdev->type);
670
    blkdev->type = NULL;
671
    g_free(blkdev->dev);
672
    blkdev->dev = NULL;
673
    g_free(blkdev->devtype);
674
    blkdev->devtype = NULL;
675
    return -1;
676
}
677

    
678
static int blk_connect(struct XenDevice *xendev)
679
{
680
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
681

    
682
    if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) {
683
        return -1;
684
    }
685
    if (xenstore_read_fe_int(&blkdev->xendev, "event-channel",
686
                             &blkdev->xendev.remote_port) == -1) {
687
        return -1;
688
    }
689

    
690
    blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
691
    if (blkdev->xendev.protocol) {
692
        if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
693
            blkdev->protocol = BLKIF_PROTOCOL_X86_32;
694
        }
695
        if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
696
            blkdev->protocol = BLKIF_PROTOCOL_X86_64;
697
        }
698
    }
699

    
700
    blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev,
701
                                            blkdev->xendev.dom,
702
                                            blkdev->ring_ref,
703
                                            PROT_READ | PROT_WRITE);
704
    if (!blkdev->sring) {
705
        return -1;
706
    }
707
    blkdev->cnt_map++;
708

    
709
    switch (blkdev->protocol) {
710
    case BLKIF_PROTOCOL_NATIVE:
711
    {
712
        blkif_sring_t *sring_native = blkdev->sring;
713
        BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE);
714
        break;
715
    }
716
    case BLKIF_PROTOCOL_X86_32:
717
    {
718
        blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring;
719

    
720
        BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE);
721
        break;
722
    }
723
    case BLKIF_PROTOCOL_X86_64:
724
    {
725
        blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring;
726

    
727
        BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE);
728
        break;
729
    }
730
    }
731

    
732
    xen_be_bind_evtchn(&blkdev->xendev);
733

    
734
    xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
735
                  "remote port %d, local port %d\n",
736
                  blkdev->xendev.protocol, blkdev->ring_ref,
737
                  blkdev->xendev.remote_port, blkdev->xendev.local_port);
738
    return 0;
739
}
740

    
741
static void blk_disconnect(struct XenDevice *xendev)
742
{
743
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
744

    
745
    if (blkdev->bs) {
746
        if (!blkdev->dinfo) {
747
            /* close/delete only if we created it ourself */
748
            bdrv_close(blkdev->bs);
749
            bdrv_detach_dev(blkdev->bs, blkdev);
750
            bdrv_delete(blkdev->bs);
751
        }
752
        blkdev->bs = NULL;
753
    }
754
    xen_be_unbind_evtchn(&blkdev->xendev);
755

    
756
    if (blkdev->sring) {
757
        xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
758
        blkdev->cnt_map--;
759
        blkdev->sring = NULL;
760
    }
761
}
762

    
763
static int blk_free(struct XenDevice *xendev)
764
{
765
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
766
    struct ioreq *ioreq;
767

    
768
    if (blkdev->bs || blkdev->sring) {
769
        blk_disconnect(xendev);
770
    }
771

    
772
    while (!QLIST_EMPTY(&blkdev->freelist)) {
773
        ioreq = QLIST_FIRST(&blkdev->freelist);
774
        QLIST_REMOVE(ioreq, list);
775
        qemu_iovec_destroy(&ioreq->v);
776
        g_free(ioreq);
777
    }
778

    
779
    g_free(blkdev->params);
780
    g_free(blkdev->mode);
781
    g_free(blkdev->type);
782
    g_free(blkdev->dev);
783
    g_free(blkdev->devtype);
784
    qemu_bh_delete(blkdev->bh);
785
    return 0;
786
}
787

    
788
static void blk_event(struct XenDevice *xendev)
789
{
790
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
791

    
792
    qemu_bh_schedule(blkdev->bh);
793
}
794

    
795
struct XenDevOps xen_blkdev_ops = {
796
    .size       = sizeof(struct XenBlkDev),
797
    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
798
    .alloc      = blk_alloc,
799
    .init       = blk_init,
800
    .initialise    = blk_connect,
801
    .disconnect = blk_disconnect,
802
    .event      = blk_event,
803
    .free       = blk_free,
804
};