Statistics
| Branch: | Revision:

root / block.c @ 8fc94e5a

History | View | Annotate | Download (125.7 kB)

1 fc01f7e7 bellard
/*
2 fc01f7e7 bellard
 * QEMU System Emulator block driver
3 5fafdf24 ths
 *
4 fc01f7e7 bellard
 * Copyright (c) 2003 Fabrice Bellard
5 5fafdf24 ths
 *
6 fc01f7e7 bellard
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 fc01f7e7 bellard
 * of this software and associated documentation files (the "Software"), to deal
8 fc01f7e7 bellard
 * in the Software without restriction, including without limitation the rights
9 fc01f7e7 bellard
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 fc01f7e7 bellard
 * copies of the Software, and to permit persons to whom the Software is
11 fc01f7e7 bellard
 * furnished to do so, subject to the following conditions:
12 fc01f7e7 bellard
 *
13 fc01f7e7 bellard
 * The above copyright notice and this permission notice shall be included in
14 fc01f7e7 bellard
 * all copies or substantial portions of the Software.
15 fc01f7e7 bellard
 *
16 fc01f7e7 bellard
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 fc01f7e7 bellard
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 fc01f7e7 bellard
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 fc01f7e7 bellard
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 fc01f7e7 bellard
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 fc01f7e7 bellard
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 fc01f7e7 bellard
 * THE SOFTWARE.
23 fc01f7e7 bellard
 */
24 3990d09a blueswir1
#include "config-host.h"
25 faf07963 pbrook
#include "qemu-common.h"
26 6d519a5f Stefan Hajnoczi
#include "trace.h"
27 83c9089e Paolo Bonzini
#include "monitor/monitor.h"
28 737e150e Paolo Bonzini
#include "block/block_int.h"
29 737e150e Paolo Bonzini
#include "block/blockjob.h"
30 1de7afc9 Paolo Bonzini
#include "qemu/module.h"
31 7b1b5d19 Paolo Bonzini
#include "qapi/qmp/qjson.h"
32 9c17d615 Paolo Bonzini
#include "sysemu/sysemu.h"
33 1de7afc9 Paolo Bonzini
#include "qemu/notify.h"
34 737e150e Paolo Bonzini
#include "block/coroutine.h"
35 b2023818 Luiz Capitulino
#include "qmp-commands.h"
36 1de7afc9 Paolo Bonzini
#include "qemu/timer.h"
37 fc01f7e7 bellard
38 71e72a19 Juan Quintela
#ifdef CONFIG_BSD
39 7674e7bf bellard
#include <sys/types.h>
40 7674e7bf bellard
#include <sys/stat.h>
41 7674e7bf bellard
#include <sys/ioctl.h>
42 72cf2d4f Blue Swirl
#include <sys/queue.h>
43 c5e97233 blueswir1
#ifndef __DragonFly__
44 7674e7bf bellard
#include <sys/disk.h>
45 7674e7bf bellard
#endif
46 c5e97233 blueswir1
#endif
47 7674e7bf bellard
48 49dc768d aliguori
#ifdef _WIN32
49 49dc768d aliguori
#include <windows.h>
50 49dc768d aliguori
#endif
51 49dc768d aliguori
52 1c9805a3 Stefan Hajnoczi
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
53 1c9805a3 Stefan Hajnoczi
54 470c0504 Stefan Hajnoczi
typedef enum {
55 470c0504 Stefan Hajnoczi
    BDRV_REQ_COPY_ON_READ = 0x1,
56 f08f2dda Stefan Hajnoczi
    BDRV_REQ_ZERO_WRITE   = 0x2,
57 470c0504 Stefan Hajnoczi
} BdrvRequestFlags;
58 470c0504 Stefan Hajnoczi
59 7d4b4ba5 Markus Armbruster
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
60 f141eafe aliguori
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
61 f141eafe aliguori
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62 c87c0672 aliguori
        BlockDriverCompletionFunc *cb, void *opaque);
63 f141eafe aliguori
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
64 f141eafe aliguori
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
65 ce1a14dc pbrook
        BlockDriverCompletionFunc *cb, void *opaque);
66 f9f05dc5 Kevin Wolf
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
67 f9f05dc5 Kevin Wolf
                                         int64_t sector_num, int nb_sectors,
68 f9f05dc5 Kevin Wolf
                                         QEMUIOVector *iov);
69 f9f05dc5 Kevin Wolf
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
70 f9f05dc5 Kevin Wolf
                                         int64_t sector_num, int nb_sectors,
71 f9f05dc5 Kevin Wolf
                                         QEMUIOVector *iov);
72 c5fbe571 Stefan Hajnoczi
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
73 470c0504 Stefan Hajnoczi
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 470c0504 Stefan Hajnoczi
    BdrvRequestFlags flags);
75 1c9805a3 Stefan Hajnoczi
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
76 f08f2dda Stefan Hajnoczi
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
77 f08f2dda Stefan Hajnoczi
    BdrvRequestFlags flags);
78 b2a61371 Stefan Hajnoczi
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
79 b2a61371 Stefan Hajnoczi
                                               int64_t sector_num,
80 b2a61371 Stefan Hajnoczi
                                               QEMUIOVector *qiov,
81 b2a61371 Stefan Hajnoczi
                                               int nb_sectors,
82 b2a61371 Stefan Hajnoczi
                                               BlockDriverCompletionFunc *cb,
83 b2a61371 Stefan Hajnoczi
                                               void *opaque,
84 8c5873d6 Stefan Hajnoczi
                                               bool is_write);
85 b2a61371 Stefan Hajnoczi
static void coroutine_fn bdrv_co_do_rw(void *opaque);
86 621f0589 Kevin Wolf
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
87 621f0589 Kevin Wolf
    int64_t sector_num, int nb_sectors);
88 ec530c81 bellard
89 98f90dba Zhi Yong Wu
static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
90 98f90dba Zhi Yong Wu
        bool is_write, double elapsed_time, uint64_t *wait);
91 98f90dba Zhi Yong Wu
static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
92 98f90dba Zhi Yong Wu
        double elapsed_time, uint64_t *wait);
93 98f90dba Zhi Yong Wu
static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
94 98f90dba Zhi Yong Wu
        bool is_write, int64_t *wait);
95 98f90dba Zhi Yong Wu
96 1b7bdbc1 Stefan Hajnoczi
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
97 1b7bdbc1 Stefan Hajnoczi
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
98 7ee930d0 blueswir1
99 8a22f02a Stefan Hajnoczi
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
100 8a22f02a Stefan Hajnoczi
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
101 ea2384d3 bellard
102 f9092b10 Markus Armbruster
/* The device to use for VM snapshots */
103 f9092b10 Markus Armbruster
static BlockDriverState *bs_snapshots;
104 f9092b10 Markus Armbruster
105 eb852011 Markus Armbruster
/* If non-zero, use only whitelisted block drivers */
106 eb852011 Markus Armbruster
static int use_bdrv_whitelist;
107 eb852011 Markus Armbruster
108 9e0b22f4 Stefan Hajnoczi
#ifdef _WIN32
109 9e0b22f4 Stefan Hajnoczi
static int is_windows_drive_prefix(const char *filename)
110 9e0b22f4 Stefan Hajnoczi
{
111 9e0b22f4 Stefan Hajnoczi
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
112 9e0b22f4 Stefan Hajnoczi
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
113 9e0b22f4 Stefan Hajnoczi
            filename[1] == ':');
114 9e0b22f4 Stefan Hajnoczi
}
115 9e0b22f4 Stefan Hajnoczi
116 9e0b22f4 Stefan Hajnoczi
int is_windows_drive(const char *filename)
117 9e0b22f4 Stefan Hajnoczi
{
118 9e0b22f4 Stefan Hajnoczi
    if (is_windows_drive_prefix(filename) &&
119 9e0b22f4 Stefan Hajnoczi
        filename[2] == '\0')
120 9e0b22f4 Stefan Hajnoczi
        return 1;
121 9e0b22f4 Stefan Hajnoczi
    if (strstart(filename, "\\\\.\\", NULL) ||
122 9e0b22f4 Stefan Hajnoczi
        strstart(filename, "//./", NULL))
123 9e0b22f4 Stefan Hajnoczi
        return 1;
124 9e0b22f4 Stefan Hajnoczi
    return 0;
125 9e0b22f4 Stefan Hajnoczi
}
126 9e0b22f4 Stefan Hajnoczi
#endif
127 9e0b22f4 Stefan Hajnoczi
128 0563e191 Zhi Yong Wu
/* throttling disk I/O limits */
129 98f90dba Zhi Yong Wu
void bdrv_io_limits_disable(BlockDriverState *bs)
130 98f90dba Zhi Yong Wu
{
131 98f90dba Zhi Yong Wu
    bs->io_limits_enabled = false;
132 98f90dba Zhi Yong Wu
133 98f90dba Zhi Yong Wu
    while (qemu_co_queue_next(&bs->throttled_reqs));
134 98f90dba Zhi Yong Wu
135 98f90dba Zhi Yong Wu
    if (bs->block_timer) {
136 98f90dba Zhi Yong Wu
        qemu_del_timer(bs->block_timer);
137 98f90dba Zhi Yong Wu
        qemu_free_timer(bs->block_timer);
138 98f90dba Zhi Yong Wu
        bs->block_timer = NULL;
139 98f90dba Zhi Yong Wu
    }
140 98f90dba Zhi Yong Wu
141 98f90dba Zhi Yong Wu
    bs->slice_start = 0;
142 98f90dba Zhi Yong Wu
    bs->slice_end   = 0;
143 98f90dba Zhi Yong Wu
    bs->slice_time  = 0;
144 98f90dba Zhi Yong Wu
    memset(&bs->io_base, 0, sizeof(bs->io_base));
145 98f90dba Zhi Yong Wu
}
146 98f90dba Zhi Yong Wu
147 0563e191 Zhi Yong Wu
static void bdrv_block_timer(void *opaque)
148 0563e191 Zhi Yong Wu
{
149 0563e191 Zhi Yong Wu
    BlockDriverState *bs = opaque;
150 0563e191 Zhi Yong Wu
151 0563e191 Zhi Yong Wu
    qemu_co_queue_next(&bs->throttled_reqs);
152 0563e191 Zhi Yong Wu
}
153 0563e191 Zhi Yong Wu
154 0563e191 Zhi Yong Wu
void bdrv_io_limits_enable(BlockDriverState *bs)
155 0563e191 Zhi Yong Wu
{
156 0563e191 Zhi Yong Wu
    qemu_co_queue_init(&bs->throttled_reqs);
157 0563e191 Zhi Yong Wu
    bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
158 0563e191 Zhi Yong Wu
    bs->slice_time  = 5 * BLOCK_IO_SLICE_TIME;
159 0563e191 Zhi Yong Wu
    bs->slice_start = qemu_get_clock_ns(vm_clock);
160 0563e191 Zhi Yong Wu
    bs->slice_end   = bs->slice_start + bs->slice_time;
161 0563e191 Zhi Yong Wu
    memset(&bs->io_base, 0, sizeof(bs->io_base));
162 0563e191 Zhi Yong Wu
    bs->io_limits_enabled = true;
163 0563e191 Zhi Yong Wu
}
164 0563e191 Zhi Yong Wu
165 0563e191 Zhi Yong Wu
bool bdrv_io_limits_enabled(BlockDriverState *bs)
166 0563e191 Zhi Yong Wu
{
167 0563e191 Zhi Yong Wu
    BlockIOLimit *io_limits = &bs->io_limits;
168 0563e191 Zhi Yong Wu
    return io_limits->bps[BLOCK_IO_LIMIT_READ]
169 0563e191 Zhi Yong Wu
         || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
170 0563e191 Zhi Yong Wu
         || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
171 0563e191 Zhi Yong Wu
         || io_limits->iops[BLOCK_IO_LIMIT_READ]
172 0563e191 Zhi Yong Wu
         || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
173 0563e191 Zhi Yong Wu
         || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
174 0563e191 Zhi Yong Wu
}
175 0563e191 Zhi Yong Wu
176 98f90dba Zhi Yong Wu
static void bdrv_io_limits_intercept(BlockDriverState *bs,
177 98f90dba Zhi Yong Wu
                                     bool is_write, int nb_sectors)
178 98f90dba Zhi Yong Wu
{
179 98f90dba Zhi Yong Wu
    int64_t wait_time = -1;
180 98f90dba Zhi Yong Wu
181 98f90dba Zhi Yong Wu
    if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
182 98f90dba Zhi Yong Wu
        qemu_co_queue_wait(&bs->throttled_reqs);
183 98f90dba Zhi Yong Wu
    }
184 98f90dba Zhi Yong Wu
185 98f90dba Zhi Yong Wu
    /* In fact, we hope to keep each request's timing, in FIFO mode. The next
186 98f90dba Zhi Yong Wu
     * throttled requests will not be dequeued until the current request is
187 98f90dba Zhi Yong Wu
     * allowed to be serviced. So if the current request still exceeds the
188 98f90dba Zhi Yong Wu
     * limits, it will be inserted to the head. All requests followed it will
189 98f90dba Zhi Yong Wu
     * be still in throttled_reqs queue.
190 98f90dba Zhi Yong Wu
     */
191 98f90dba Zhi Yong Wu
192 98f90dba Zhi Yong Wu
    while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
193 98f90dba Zhi Yong Wu
        qemu_mod_timer(bs->block_timer,
194 98f90dba Zhi Yong Wu
                       wait_time + qemu_get_clock_ns(vm_clock));
195 98f90dba Zhi Yong Wu
        qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
196 98f90dba Zhi Yong Wu
    }
197 98f90dba Zhi Yong Wu
198 98f90dba Zhi Yong Wu
    qemu_co_queue_next(&bs->throttled_reqs);
199 98f90dba Zhi Yong Wu
}
200 98f90dba Zhi Yong Wu
201 9e0b22f4 Stefan Hajnoczi
/* check if the path starts with "<protocol>:" */
202 9e0b22f4 Stefan Hajnoczi
static int path_has_protocol(const char *path)
203 9e0b22f4 Stefan Hajnoczi
{
204 947995c0 Paolo Bonzini
    const char *p;
205 947995c0 Paolo Bonzini
206 9e0b22f4 Stefan Hajnoczi
#ifdef _WIN32
207 9e0b22f4 Stefan Hajnoczi
    if (is_windows_drive(path) ||
208 9e0b22f4 Stefan Hajnoczi
        is_windows_drive_prefix(path)) {
209 9e0b22f4 Stefan Hajnoczi
        return 0;
210 9e0b22f4 Stefan Hajnoczi
    }
211 947995c0 Paolo Bonzini
    p = path + strcspn(path, ":/\\");
212 947995c0 Paolo Bonzini
#else
213 947995c0 Paolo Bonzini
    p = path + strcspn(path, ":/");
214 9e0b22f4 Stefan Hajnoczi
#endif
215 9e0b22f4 Stefan Hajnoczi
216 947995c0 Paolo Bonzini
    return *p == ':';
217 9e0b22f4 Stefan Hajnoczi
}
218 9e0b22f4 Stefan Hajnoczi
219 83f64091 bellard
int path_is_absolute(const char *path)
220 3b0d4f61 bellard
{
221 21664424 bellard
#ifdef _WIN32
222 21664424 bellard
    /* specific case for names like: "\\.\d:" */
223 f53f4da9 Paolo Bonzini
    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
224 21664424 bellard
        return 1;
225 f53f4da9 Paolo Bonzini
    }
226 f53f4da9 Paolo Bonzini
    return (*path == '/' || *path == '\\');
227 3b9f94e1 bellard
#else
228 f53f4da9 Paolo Bonzini
    return (*path == '/');
229 3b9f94e1 bellard
#endif
230 3b0d4f61 bellard
}
231 3b0d4f61 bellard
232 83f64091 bellard
/* if filename is absolute, just copy it to dest. Otherwise, build a
233 83f64091 bellard
   path to it by considering it is relative to base_path. URL are
234 83f64091 bellard
   supported. */
235 83f64091 bellard
void path_combine(char *dest, int dest_size,
236 83f64091 bellard
                  const char *base_path,
237 83f64091 bellard
                  const char *filename)
238 3b0d4f61 bellard
{
239 83f64091 bellard
    const char *p, *p1;
240 83f64091 bellard
    int len;
241 83f64091 bellard
242 83f64091 bellard
    if (dest_size <= 0)
243 83f64091 bellard
        return;
244 83f64091 bellard
    if (path_is_absolute(filename)) {
245 83f64091 bellard
        pstrcpy(dest, dest_size, filename);
246 83f64091 bellard
    } else {
247 83f64091 bellard
        p = strchr(base_path, ':');
248 83f64091 bellard
        if (p)
249 83f64091 bellard
            p++;
250 83f64091 bellard
        else
251 83f64091 bellard
            p = base_path;
252 3b9f94e1 bellard
        p1 = strrchr(base_path, '/');
253 3b9f94e1 bellard
#ifdef _WIN32
254 3b9f94e1 bellard
        {
255 3b9f94e1 bellard
            const char *p2;
256 3b9f94e1 bellard
            p2 = strrchr(base_path, '\\');
257 3b9f94e1 bellard
            if (!p1 || p2 > p1)
258 3b9f94e1 bellard
                p1 = p2;
259 3b9f94e1 bellard
        }
260 3b9f94e1 bellard
#endif
261 83f64091 bellard
        if (p1)
262 83f64091 bellard
            p1++;
263 83f64091 bellard
        else
264 83f64091 bellard
            p1 = base_path;
265 83f64091 bellard
        if (p1 > p)
266 83f64091 bellard
            p = p1;
267 83f64091 bellard
        len = p - base_path;
268 83f64091 bellard
        if (len > dest_size - 1)
269 83f64091 bellard
            len = dest_size - 1;
270 83f64091 bellard
        memcpy(dest, base_path, len);
271 83f64091 bellard
        dest[len] = '\0';
272 83f64091 bellard
        pstrcat(dest, dest_size, filename);
273 3b0d4f61 bellard
    }
274 3b0d4f61 bellard
}
275 3b0d4f61 bellard
276 dc5a1371 Paolo Bonzini
void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
277 dc5a1371 Paolo Bonzini
{
278 dc5a1371 Paolo Bonzini
    if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
279 dc5a1371 Paolo Bonzini
        pstrcpy(dest, sz, bs->backing_file);
280 dc5a1371 Paolo Bonzini
    } else {
281 dc5a1371 Paolo Bonzini
        path_combine(dest, sz, bs->filename, bs->backing_file);
282 dc5a1371 Paolo Bonzini
    }
283 dc5a1371 Paolo Bonzini
}
284 dc5a1371 Paolo Bonzini
285 5efa9d5a Anthony Liguori
void bdrv_register(BlockDriver *bdrv)
286 ea2384d3 bellard
{
287 8c5873d6 Stefan Hajnoczi
    /* Block drivers without coroutine functions need emulation */
288 8c5873d6 Stefan Hajnoczi
    if (!bdrv->bdrv_co_readv) {
289 f9f05dc5 Kevin Wolf
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
290 f9f05dc5 Kevin Wolf
        bdrv->bdrv_co_writev = bdrv_co_writev_em;
291 f9f05dc5 Kevin Wolf
292 f8c35c1d Stefan Hajnoczi
        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
293 f8c35c1d Stefan Hajnoczi
         * the block driver lacks aio we need to emulate that too.
294 f8c35c1d Stefan Hajnoczi
         */
295 f9f05dc5 Kevin Wolf
        if (!bdrv->bdrv_aio_readv) {
296 f9f05dc5 Kevin Wolf
            /* add AIO emulation layer */
297 f9f05dc5 Kevin Wolf
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
298 f9f05dc5 Kevin Wolf
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
299 f9f05dc5 Kevin Wolf
        }
300 83f64091 bellard
    }
301 b2e12bc6 Christoph Hellwig
302 8a22f02a Stefan Hajnoczi
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
303 ea2384d3 bellard
}
304 b338082b bellard
305 b338082b bellard
/* create a new block device (by default it is empty) */
306 b338082b bellard
BlockDriverState *bdrv_new(const char *device_name)
307 b338082b bellard
{
308 1b7bdbc1 Stefan Hajnoczi
    BlockDriverState *bs;
309 b338082b bellard
310 7267c094 Anthony Liguori
    bs = g_malloc0(sizeof(BlockDriverState));
311 b338082b bellard
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
312 ea2384d3 bellard
    if (device_name[0] != '\0') {
313 1b7bdbc1 Stefan Hajnoczi
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
314 ea2384d3 bellard
    }
315 28a7282a Luiz Capitulino
    bdrv_iostatus_disable(bs);
316 d7d512f6 Paolo Bonzini
    notifier_list_init(&bs->close_notifiers);
317 d7d512f6 Paolo Bonzini
318 b338082b bellard
    return bs;
319 b338082b bellard
}
320 b338082b bellard
321 d7d512f6 Paolo Bonzini
void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
322 d7d512f6 Paolo Bonzini
{
323 d7d512f6 Paolo Bonzini
    notifier_list_add(&bs->close_notifiers, notify);
324 d7d512f6 Paolo Bonzini
}
325 d7d512f6 Paolo Bonzini
326 ea2384d3 bellard
BlockDriver *bdrv_find_format(const char *format_name)
327 ea2384d3 bellard
{
328 ea2384d3 bellard
    BlockDriver *drv1;
329 8a22f02a Stefan Hajnoczi
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
330 8a22f02a Stefan Hajnoczi
        if (!strcmp(drv1->format_name, format_name)) {
331 ea2384d3 bellard
            return drv1;
332 8a22f02a Stefan Hajnoczi
        }
333 ea2384d3 bellard
    }
334 ea2384d3 bellard
    return NULL;
335 ea2384d3 bellard
}
336 ea2384d3 bellard
337 eb852011 Markus Armbruster
static int bdrv_is_whitelisted(BlockDriver *drv)
338 eb852011 Markus Armbruster
{
339 eb852011 Markus Armbruster
    static const char *whitelist[] = {
340 eb852011 Markus Armbruster
        CONFIG_BDRV_WHITELIST
341 eb852011 Markus Armbruster
    };
342 eb852011 Markus Armbruster
    const char **p;
343 eb852011 Markus Armbruster
344 eb852011 Markus Armbruster
    if (!whitelist[0])
345 eb852011 Markus Armbruster
        return 1;               /* no whitelist, anything goes */
346 eb852011 Markus Armbruster
347 eb852011 Markus Armbruster
    for (p = whitelist; *p; p++) {
348 eb852011 Markus Armbruster
        if (!strcmp(drv->format_name, *p)) {
349 eb852011 Markus Armbruster
            return 1;
350 eb852011 Markus Armbruster
        }
351 eb852011 Markus Armbruster
    }
352 eb852011 Markus Armbruster
    return 0;
353 eb852011 Markus Armbruster
}
354 eb852011 Markus Armbruster
355 eb852011 Markus Armbruster
BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
356 eb852011 Markus Armbruster
{
357 eb852011 Markus Armbruster
    BlockDriver *drv = bdrv_find_format(format_name);
358 eb852011 Markus Armbruster
    return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
359 eb852011 Markus Armbruster
}
360 eb852011 Markus Armbruster
361 5b7e1542 Zhi Yong Wu
typedef struct CreateCo {
362 5b7e1542 Zhi Yong Wu
    BlockDriver *drv;
363 5b7e1542 Zhi Yong Wu
    char *filename;
364 5b7e1542 Zhi Yong Wu
    QEMUOptionParameter *options;
365 5b7e1542 Zhi Yong Wu
    int ret;
366 5b7e1542 Zhi Yong Wu
} CreateCo;
367 5b7e1542 Zhi Yong Wu
368 5b7e1542 Zhi Yong Wu
static void coroutine_fn bdrv_create_co_entry(void *opaque)
369 5b7e1542 Zhi Yong Wu
{
370 5b7e1542 Zhi Yong Wu
    CreateCo *cco = opaque;
371 5b7e1542 Zhi Yong Wu
    assert(cco->drv);
372 5b7e1542 Zhi Yong Wu
373 5b7e1542 Zhi Yong Wu
    cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
374 5b7e1542 Zhi Yong Wu
}
375 5b7e1542 Zhi Yong Wu
376 0e7e1989 Kevin Wolf
int bdrv_create(BlockDriver *drv, const char* filename,
377 0e7e1989 Kevin Wolf
    QEMUOptionParameter *options)
378 ea2384d3 bellard
{
379 5b7e1542 Zhi Yong Wu
    int ret;
380 5b7e1542 Zhi Yong Wu
381 5b7e1542 Zhi Yong Wu
    Coroutine *co;
382 5b7e1542 Zhi Yong Wu
    CreateCo cco = {
383 5b7e1542 Zhi Yong Wu
        .drv = drv,
384 5b7e1542 Zhi Yong Wu
        .filename = g_strdup(filename),
385 5b7e1542 Zhi Yong Wu
        .options = options,
386 5b7e1542 Zhi Yong Wu
        .ret = NOT_DONE,
387 5b7e1542 Zhi Yong Wu
    };
388 5b7e1542 Zhi Yong Wu
389 5b7e1542 Zhi Yong Wu
    if (!drv->bdrv_create) {
390 80168bff Luiz Capitulino
        ret = -ENOTSUP;
391 80168bff Luiz Capitulino
        goto out;
392 5b7e1542 Zhi Yong Wu
    }
393 5b7e1542 Zhi Yong Wu
394 5b7e1542 Zhi Yong Wu
    if (qemu_in_coroutine()) {
395 5b7e1542 Zhi Yong Wu
        /* Fast-path if already in coroutine context */
396 5b7e1542 Zhi Yong Wu
        bdrv_create_co_entry(&cco);
397 5b7e1542 Zhi Yong Wu
    } else {
398 5b7e1542 Zhi Yong Wu
        co = qemu_coroutine_create(bdrv_create_co_entry);
399 5b7e1542 Zhi Yong Wu
        qemu_coroutine_enter(co, &cco);
400 5b7e1542 Zhi Yong Wu
        while (cco.ret == NOT_DONE) {
401 5b7e1542 Zhi Yong Wu
            qemu_aio_wait();
402 5b7e1542 Zhi Yong Wu
        }
403 5b7e1542 Zhi Yong Wu
    }
404 5b7e1542 Zhi Yong Wu
405 5b7e1542 Zhi Yong Wu
    ret = cco.ret;
406 0e7e1989 Kevin Wolf
407 80168bff Luiz Capitulino
out:
408 80168bff Luiz Capitulino
    g_free(cco.filename);
409 5b7e1542 Zhi Yong Wu
    return ret;
410 ea2384d3 bellard
}
411 ea2384d3 bellard
412 84a12e66 Christoph Hellwig
int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
413 84a12e66 Christoph Hellwig
{
414 84a12e66 Christoph Hellwig
    BlockDriver *drv;
415 84a12e66 Christoph Hellwig
416 b50cbabc MORITA Kazutaka
    drv = bdrv_find_protocol(filename);
417 84a12e66 Christoph Hellwig
    if (drv == NULL) {
418 16905d71 Stefan Hajnoczi
        return -ENOENT;
419 84a12e66 Christoph Hellwig
    }
420 84a12e66 Christoph Hellwig
421 84a12e66 Christoph Hellwig
    return bdrv_create(drv, filename, options);
422 84a12e66 Christoph Hellwig
}
423 84a12e66 Christoph Hellwig
424 eba25057 Jim Meyering
/*
425 eba25057 Jim Meyering
 * Create a uniquely-named empty temporary file.
426 eba25057 Jim Meyering
 * Return 0 upon success, otherwise a negative errno value.
427 eba25057 Jim Meyering
 */
428 eba25057 Jim Meyering
int get_tmp_filename(char *filename, int size)
429 d5249393 bellard
{
430 eba25057 Jim Meyering
#ifdef _WIN32
431 3b9f94e1 bellard
    char temp_dir[MAX_PATH];
432 eba25057 Jim Meyering
    /* GetTempFileName requires that its output buffer (4th param)
433 eba25057 Jim Meyering
       have length MAX_PATH or greater.  */
434 eba25057 Jim Meyering
    assert(size >= MAX_PATH);
435 eba25057 Jim Meyering
    return (GetTempPath(MAX_PATH, temp_dir)
436 eba25057 Jim Meyering
            && GetTempFileName(temp_dir, "qem", 0, filename)
437 eba25057 Jim Meyering
            ? 0 : -GetLastError());
438 d5249393 bellard
#else
439 67b915a5 bellard
    int fd;
440 7ccfb2eb blueswir1
    const char *tmpdir;
441 0badc1ee aurel32
    tmpdir = getenv("TMPDIR");
442 0badc1ee aurel32
    if (!tmpdir)
443 0badc1ee aurel32
        tmpdir = "/tmp";
444 eba25057 Jim Meyering
    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
445 eba25057 Jim Meyering
        return -EOVERFLOW;
446 eba25057 Jim Meyering
    }
447 ea2384d3 bellard
    fd = mkstemp(filename);
448 fe235a06 Dunrong Huang
    if (fd < 0) {
449 fe235a06 Dunrong Huang
        return -errno;
450 fe235a06 Dunrong Huang
    }
451 fe235a06 Dunrong Huang
    if (close(fd) != 0) {
452 fe235a06 Dunrong Huang
        unlink(filename);
453 eba25057 Jim Meyering
        return -errno;
454 eba25057 Jim Meyering
    }
455 eba25057 Jim Meyering
    return 0;
456 d5249393 bellard
#endif
457 eba25057 Jim Meyering
}
458 fc01f7e7 bellard
459 84a12e66 Christoph Hellwig
/*
460 84a12e66 Christoph Hellwig
 * Detect host devices. By convention, /dev/cdrom[N] is always
461 84a12e66 Christoph Hellwig
 * recognized as a host CDROM.
462 84a12e66 Christoph Hellwig
 */
463 84a12e66 Christoph Hellwig
static BlockDriver *find_hdev_driver(const char *filename)
464 84a12e66 Christoph Hellwig
{
465 84a12e66 Christoph Hellwig
    int score_max = 0, score;
466 84a12e66 Christoph Hellwig
    BlockDriver *drv = NULL, *d;
467 84a12e66 Christoph Hellwig
468 84a12e66 Christoph Hellwig
    QLIST_FOREACH(d, &bdrv_drivers, list) {
469 84a12e66 Christoph Hellwig
        if (d->bdrv_probe_device) {
470 84a12e66 Christoph Hellwig
            score = d->bdrv_probe_device(filename);
471 84a12e66 Christoph Hellwig
            if (score > score_max) {
472 84a12e66 Christoph Hellwig
                score_max = score;
473 84a12e66 Christoph Hellwig
                drv = d;
474 84a12e66 Christoph Hellwig
            }
475 84a12e66 Christoph Hellwig
        }
476 84a12e66 Christoph Hellwig
    }
477 84a12e66 Christoph Hellwig
478 84a12e66 Christoph Hellwig
    return drv;
479 84a12e66 Christoph Hellwig
}
480 84a12e66 Christoph Hellwig
481 b50cbabc MORITA Kazutaka
BlockDriver *bdrv_find_protocol(const char *filename)
482 83f64091 bellard
{
483 83f64091 bellard
    BlockDriver *drv1;
484 83f64091 bellard
    char protocol[128];
485 1cec71e3 Anthony Liguori
    int len;
486 83f64091 bellard
    const char *p;
487 19cb3738 bellard
488 66f82cee Kevin Wolf
    /* TODO Drivers without bdrv_file_open must be specified explicitly */
489 66f82cee Kevin Wolf
490 39508e7a Christoph Hellwig
    /*
491 39508e7a Christoph Hellwig
     * XXX(hch): we really should not let host device detection
492 39508e7a Christoph Hellwig
     * override an explicit protocol specification, but moving this
493 39508e7a Christoph Hellwig
     * later breaks access to device names with colons in them.
494 39508e7a Christoph Hellwig
     * Thanks to the brain-dead persistent naming schemes on udev-
495 39508e7a Christoph Hellwig
     * based Linux systems those actually are quite common.
496 39508e7a Christoph Hellwig
     */
497 39508e7a Christoph Hellwig
    drv1 = find_hdev_driver(filename);
498 39508e7a Christoph Hellwig
    if (drv1) {
499 39508e7a Christoph Hellwig
        return drv1;
500 39508e7a Christoph Hellwig
    }
501 39508e7a Christoph Hellwig
502 9e0b22f4 Stefan Hajnoczi
    if (!path_has_protocol(filename)) {
503 39508e7a Christoph Hellwig
        return bdrv_find_format("file");
504 84a12e66 Christoph Hellwig
    }
505 9e0b22f4 Stefan Hajnoczi
    p = strchr(filename, ':');
506 9e0b22f4 Stefan Hajnoczi
    assert(p != NULL);
507 1cec71e3 Anthony Liguori
    len = p - filename;
508 1cec71e3 Anthony Liguori
    if (len > sizeof(protocol) - 1)
509 1cec71e3 Anthony Liguori
        len = sizeof(protocol) - 1;
510 1cec71e3 Anthony Liguori
    memcpy(protocol, filename, len);
511 1cec71e3 Anthony Liguori
    protocol[len] = '\0';
512 8a22f02a Stefan Hajnoczi
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
513 5fafdf24 ths
        if (drv1->protocol_name &&
514 8a22f02a Stefan Hajnoczi
            !strcmp(drv1->protocol_name, protocol)) {
515 83f64091 bellard
            return drv1;
516 8a22f02a Stefan Hajnoczi
        }
517 83f64091 bellard
    }
518 83f64091 bellard
    return NULL;
519 83f64091 bellard
}
520 83f64091 bellard
521 f500a6d3 Kevin Wolf
static int find_image_format(BlockDriverState *bs, const char *filename,
522 f500a6d3 Kevin Wolf
                             BlockDriver **pdrv)
523 f3a5d3f8 Christoph Hellwig
{
524 f500a6d3 Kevin Wolf
    int score, score_max;
525 f3a5d3f8 Christoph Hellwig
    BlockDriver *drv1, *drv;
526 f3a5d3f8 Christoph Hellwig
    uint8_t buf[2048];
527 f500a6d3 Kevin Wolf
    int ret = 0;
528 f8ea0b00 Nicholas Bellinger
529 08a00559 Kevin Wolf
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
530 08a00559 Kevin Wolf
    if (bs->sg || !bdrv_is_inserted(bs)) {
531 c98ac35d Stefan Weil
        drv = bdrv_find_format("raw");
532 c98ac35d Stefan Weil
        if (!drv) {
533 c98ac35d Stefan Weil
            ret = -ENOENT;
534 c98ac35d Stefan Weil
        }
535 c98ac35d Stefan Weil
        *pdrv = drv;
536 c98ac35d Stefan Weil
        return ret;
537 1a396859 Nicholas A. Bellinger
    }
538 f8ea0b00 Nicholas Bellinger
539 83f64091 bellard
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
540 83f64091 bellard
    if (ret < 0) {
541 c98ac35d Stefan Weil
        *pdrv = NULL;
542 c98ac35d Stefan Weil
        return ret;
543 83f64091 bellard
    }
544 83f64091 bellard
545 ea2384d3 bellard
    score_max = 0;
546 84a12e66 Christoph Hellwig
    drv = NULL;
547 8a22f02a Stefan Hajnoczi
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
548 83f64091 bellard
        if (drv1->bdrv_probe) {
549 83f64091 bellard
            score = drv1->bdrv_probe(buf, ret, filename);
550 83f64091 bellard
            if (score > score_max) {
551 83f64091 bellard
                score_max = score;
552 83f64091 bellard
                drv = drv1;
553 83f64091 bellard
            }
554 0849bf08 bellard
        }
555 fc01f7e7 bellard
    }
556 c98ac35d Stefan Weil
    if (!drv) {
557 c98ac35d Stefan Weil
        ret = -ENOENT;
558 c98ac35d Stefan Weil
    }
559 c98ac35d Stefan Weil
    *pdrv = drv;
560 c98ac35d Stefan Weil
    return ret;
561 ea2384d3 bellard
}
562 ea2384d3 bellard
563 51762288 Stefan Hajnoczi
/**
564 51762288 Stefan Hajnoczi
 * Set the current 'total_sectors' value
565 51762288 Stefan Hajnoczi
 */
566 51762288 Stefan Hajnoczi
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
567 51762288 Stefan Hajnoczi
{
568 51762288 Stefan Hajnoczi
    BlockDriver *drv = bs->drv;
569 51762288 Stefan Hajnoczi
570 396759ad Nicholas Bellinger
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
571 396759ad Nicholas Bellinger
    if (bs->sg)
572 396759ad Nicholas Bellinger
        return 0;
573 396759ad Nicholas Bellinger
574 51762288 Stefan Hajnoczi
    /* query actual device if possible, otherwise just trust the hint */
575 51762288 Stefan Hajnoczi
    if (drv->bdrv_getlength) {
576 51762288 Stefan Hajnoczi
        int64_t length = drv->bdrv_getlength(bs);
577 51762288 Stefan Hajnoczi
        if (length < 0) {
578 51762288 Stefan Hajnoczi
            return length;
579 51762288 Stefan Hajnoczi
        }
580 51762288 Stefan Hajnoczi
        hint = length >> BDRV_SECTOR_BITS;
581 51762288 Stefan Hajnoczi
    }
582 51762288 Stefan Hajnoczi
583 51762288 Stefan Hajnoczi
    bs->total_sectors = hint;
584 51762288 Stefan Hajnoczi
    return 0;
585 51762288 Stefan Hajnoczi
}
586 51762288 Stefan Hajnoczi
587 c3993cdc Stefan Hajnoczi
/**
588 c3993cdc Stefan Hajnoczi
 * Set open flags for a given cache mode
589 c3993cdc Stefan Hajnoczi
 *
590 c3993cdc Stefan Hajnoczi
 * Return 0 on success, -1 if the cache mode was invalid.
591 c3993cdc Stefan Hajnoczi
 */
592 c3993cdc Stefan Hajnoczi
int bdrv_parse_cache_flags(const char *mode, int *flags)
593 c3993cdc Stefan Hajnoczi
{
594 c3993cdc Stefan Hajnoczi
    *flags &= ~BDRV_O_CACHE_MASK;
595 c3993cdc Stefan Hajnoczi
596 c3993cdc Stefan Hajnoczi
    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
597 c3993cdc Stefan Hajnoczi
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
598 92196b2f Stefan Hajnoczi
    } else if (!strcmp(mode, "directsync")) {
599 92196b2f Stefan Hajnoczi
        *flags |= BDRV_O_NOCACHE;
600 c3993cdc Stefan Hajnoczi
    } else if (!strcmp(mode, "writeback")) {
601 c3993cdc Stefan Hajnoczi
        *flags |= BDRV_O_CACHE_WB;
602 c3993cdc Stefan Hajnoczi
    } else if (!strcmp(mode, "unsafe")) {
603 c3993cdc Stefan Hajnoczi
        *flags |= BDRV_O_CACHE_WB;
604 c3993cdc Stefan Hajnoczi
        *flags |= BDRV_O_NO_FLUSH;
605 c3993cdc Stefan Hajnoczi
    } else if (!strcmp(mode, "writethrough")) {
606 c3993cdc Stefan Hajnoczi
        /* this is the default */
607 c3993cdc Stefan Hajnoczi
    } else {
608 c3993cdc Stefan Hajnoczi
        return -1;
609 c3993cdc Stefan Hajnoczi
    }
610 c3993cdc Stefan Hajnoczi
611 c3993cdc Stefan Hajnoczi
    return 0;
612 c3993cdc Stefan Hajnoczi
}
613 c3993cdc Stefan Hajnoczi
614 53fec9d3 Stefan Hajnoczi
/**
615 53fec9d3 Stefan Hajnoczi
 * The copy-on-read flag is actually a reference count so multiple users may
616 53fec9d3 Stefan Hajnoczi
 * use the feature without worrying about clobbering its previous state.
617 53fec9d3 Stefan Hajnoczi
 * Copy-on-read stays enabled until all users have called to disable it.
618 53fec9d3 Stefan Hajnoczi
 */
619 53fec9d3 Stefan Hajnoczi
void bdrv_enable_copy_on_read(BlockDriverState *bs)
620 53fec9d3 Stefan Hajnoczi
{
621 53fec9d3 Stefan Hajnoczi
    bs->copy_on_read++;
622 53fec9d3 Stefan Hajnoczi
}
623 53fec9d3 Stefan Hajnoczi
624 53fec9d3 Stefan Hajnoczi
void bdrv_disable_copy_on_read(BlockDriverState *bs)
625 53fec9d3 Stefan Hajnoczi
{
626 53fec9d3 Stefan Hajnoczi
    assert(bs->copy_on_read > 0);
627 53fec9d3 Stefan Hajnoczi
    bs->copy_on_read--;
628 53fec9d3 Stefan Hajnoczi
}
629 53fec9d3 Stefan Hajnoczi
630 7b272452 Kevin Wolf
static int bdrv_open_flags(BlockDriverState *bs, int flags)
631 7b272452 Kevin Wolf
{
632 7b272452 Kevin Wolf
    int open_flags = flags | BDRV_O_CACHE_WB;
633 7b272452 Kevin Wolf
634 7b272452 Kevin Wolf
    /*
635 7b272452 Kevin Wolf
     * Clear flags that are internal to the block layer before opening the
636 7b272452 Kevin Wolf
     * image.
637 7b272452 Kevin Wolf
     */
638 7b272452 Kevin Wolf
    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
639 7b272452 Kevin Wolf
640 7b272452 Kevin Wolf
    /*
641 7b272452 Kevin Wolf
     * Snapshots should be writable.
642 7b272452 Kevin Wolf
     */
643 7b272452 Kevin Wolf
    if (bs->is_temporary) {
644 7b272452 Kevin Wolf
        open_flags |= BDRV_O_RDWR;
645 7b272452 Kevin Wolf
    }
646 7b272452 Kevin Wolf
647 7b272452 Kevin Wolf
    return open_flags;
648 7b272452 Kevin Wolf
}
649 7b272452 Kevin Wolf
650 b6ce07aa Kevin Wolf
/*
651 57915332 Kevin Wolf
 * Common part for opening disk images and files
652 57915332 Kevin Wolf
 */
653 f500a6d3 Kevin Wolf
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
654 f500a6d3 Kevin Wolf
    const char *filename,
655 57915332 Kevin Wolf
    int flags, BlockDriver *drv)
656 57915332 Kevin Wolf
{
657 57915332 Kevin Wolf
    int ret, open_flags;
658 57915332 Kevin Wolf
659 57915332 Kevin Wolf
    assert(drv != NULL);
660 6405875c Paolo Bonzini
    assert(bs->file == NULL);
661 57915332 Kevin Wolf
662 28dcee10 Stefan Hajnoczi
    trace_bdrv_open_common(bs, filename, flags, drv->format_name);
663 28dcee10 Stefan Hajnoczi
664 57915332 Kevin Wolf
    bs->open_flags = flags;
665 57915332 Kevin Wolf
    bs->buffer_alignment = 512;
666 57915332 Kevin Wolf
667 53fec9d3 Stefan Hajnoczi
    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
668 53fec9d3 Stefan Hajnoczi
    if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
669 53fec9d3 Stefan Hajnoczi
        bdrv_enable_copy_on_read(bs);
670 53fec9d3 Stefan Hajnoczi
    }
671 53fec9d3 Stefan Hajnoczi
672 57915332 Kevin Wolf
    pstrcpy(bs->filename, sizeof(bs->filename), filename);
673 57915332 Kevin Wolf
674 57915332 Kevin Wolf
    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
675 57915332 Kevin Wolf
        return -ENOTSUP;
676 57915332 Kevin Wolf
    }
677 57915332 Kevin Wolf
678 57915332 Kevin Wolf
    bs->drv = drv;
679 7267c094 Anthony Liguori
    bs->opaque = g_malloc0(drv->instance_size);
680 57915332 Kevin Wolf
681 03f541bd Stefan Hajnoczi
    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
682 7b272452 Kevin Wolf
    open_flags = bdrv_open_flags(bs, flags);
683 57915332 Kevin Wolf
684 be028adc Jeff Cody
    bs->read_only = !(open_flags & BDRV_O_RDWR);
685 e7c63796 Stefan Hajnoczi
686 66f82cee Kevin Wolf
    /* Open the image, either directly or using a protocol */
687 66f82cee Kevin Wolf
    if (drv->bdrv_file_open) {
688 f500a6d3 Kevin Wolf
        if (file != NULL) {
689 f500a6d3 Kevin Wolf
            bdrv_swap(file, bs);
690 f500a6d3 Kevin Wolf
            ret = 0;
691 f500a6d3 Kevin Wolf
        } else {
692 f500a6d3 Kevin Wolf
            ret = drv->bdrv_file_open(bs, filename, open_flags);
693 66f82cee Kevin Wolf
        }
694 f500a6d3 Kevin Wolf
    } else {
695 f500a6d3 Kevin Wolf
        assert(file != NULL);
696 f500a6d3 Kevin Wolf
        bs->file = file;
697 f500a6d3 Kevin Wolf
        ret = drv->bdrv_open(bs, open_flags);
698 66f82cee Kevin Wolf
    }
699 66f82cee Kevin Wolf
700 57915332 Kevin Wolf
    if (ret < 0) {
701 57915332 Kevin Wolf
        goto free_and_fail;
702 57915332 Kevin Wolf
    }
703 57915332 Kevin Wolf
704 51762288 Stefan Hajnoczi
    ret = refresh_total_sectors(bs, bs->total_sectors);
705 51762288 Stefan Hajnoczi
    if (ret < 0) {
706 51762288 Stefan Hajnoczi
        goto free_and_fail;
707 57915332 Kevin Wolf
    }
708 51762288 Stefan Hajnoczi
709 57915332 Kevin Wolf
#ifndef _WIN32
710 57915332 Kevin Wolf
    if (bs->is_temporary) {
711 57915332 Kevin Wolf
        unlink(filename);
712 57915332 Kevin Wolf
    }
713 57915332 Kevin Wolf
#endif
714 57915332 Kevin Wolf
    return 0;
715 57915332 Kevin Wolf
716 57915332 Kevin Wolf
free_and_fail:
717 f500a6d3 Kevin Wolf
    bs->file = NULL;
718 7267c094 Anthony Liguori
    g_free(bs->opaque);
719 57915332 Kevin Wolf
    bs->opaque = NULL;
720 57915332 Kevin Wolf
    bs->drv = NULL;
721 57915332 Kevin Wolf
    return ret;
722 57915332 Kevin Wolf
}
723 57915332 Kevin Wolf
724 57915332 Kevin Wolf
/*
725 b6ce07aa Kevin Wolf
 * Opens a file using a protocol (file, host_device, nbd, ...)
726 b6ce07aa Kevin Wolf
 */
727 83f64091 bellard
int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
728 ea2384d3 bellard
{
729 83f64091 bellard
    BlockDriverState *bs;
730 6db95603 Christoph Hellwig
    BlockDriver *drv;
731 83f64091 bellard
    int ret;
732 83f64091 bellard
733 b50cbabc MORITA Kazutaka
    drv = bdrv_find_protocol(filename);
734 6db95603 Christoph Hellwig
    if (!drv) {
735 6db95603 Christoph Hellwig
        return -ENOENT;
736 6db95603 Christoph Hellwig
    }
737 6db95603 Christoph Hellwig
738 83f64091 bellard
    bs = bdrv_new("");
739 f500a6d3 Kevin Wolf
    ret = bdrv_open_common(bs, NULL, filename, flags, drv);
740 83f64091 bellard
    if (ret < 0) {
741 83f64091 bellard
        bdrv_delete(bs);
742 83f64091 bellard
        return ret;
743 3b0d4f61 bellard
    }
744 71d0770c aliguori
    bs->growable = 1;
745 83f64091 bellard
    *pbs = bs;
746 83f64091 bellard
    return 0;
747 83f64091 bellard
}
748 83f64091 bellard
749 9156df12 Paolo Bonzini
int bdrv_open_backing_file(BlockDriverState *bs)
750 9156df12 Paolo Bonzini
{
751 9156df12 Paolo Bonzini
    char backing_filename[PATH_MAX];
752 9156df12 Paolo Bonzini
    int back_flags, ret;
753 9156df12 Paolo Bonzini
    BlockDriver *back_drv = NULL;
754 9156df12 Paolo Bonzini
755 9156df12 Paolo Bonzini
    if (bs->backing_hd != NULL) {
756 9156df12 Paolo Bonzini
        return 0;
757 9156df12 Paolo Bonzini
    }
758 9156df12 Paolo Bonzini
759 9156df12 Paolo Bonzini
    bs->open_flags &= ~BDRV_O_NO_BACKING;
760 9156df12 Paolo Bonzini
    if (bs->backing_file[0] == '\0') {
761 9156df12 Paolo Bonzini
        return 0;
762 9156df12 Paolo Bonzini
    }
763 9156df12 Paolo Bonzini
764 9156df12 Paolo Bonzini
    bs->backing_hd = bdrv_new("");
765 9156df12 Paolo Bonzini
    bdrv_get_full_backing_filename(bs, backing_filename,
766 9156df12 Paolo Bonzini
                                   sizeof(backing_filename));
767 9156df12 Paolo Bonzini
768 9156df12 Paolo Bonzini
    if (bs->backing_format[0] != '\0') {
769 9156df12 Paolo Bonzini
        back_drv = bdrv_find_format(bs->backing_format);
770 9156df12 Paolo Bonzini
    }
771 9156df12 Paolo Bonzini
772 9156df12 Paolo Bonzini
    /* backing files always opened read-only */
773 9156df12 Paolo Bonzini
    back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);
774 9156df12 Paolo Bonzini
775 9156df12 Paolo Bonzini
    ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 9156df12 Paolo Bonzini
    if (ret < 0) {
777 9156df12 Paolo Bonzini
        bdrv_delete(bs->backing_hd);
778 9156df12 Paolo Bonzini
        bs->backing_hd = NULL;
779 9156df12 Paolo Bonzini
        bs->open_flags |= BDRV_O_NO_BACKING;
780 9156df12 Paolo Bonzini
        return ret;
781 9156df12 Paolo Bonzini
    }
782 9156df12 Paolo Bonzini
    return 0;
783 9156df12 Paolo Bonzini
}
784 9156df12 Paolo Bonzini
785 b6ce07aa Kevin Wolf
/*
786 b6ce07aa Kevin Wolf
 * Opens a disk image (raw, qcow2, vmdk, ...)
787 b6ce07aa Kevin Wolf
 */
788 d6e9098e Kevin Wolf
int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
789 d6e9098e Kevin Wolf
              BlockDriver *drv)
790 ea2384d3 bellard
{
791 b6ce07aa Kevin Wolf
    int ret;
792 89c9bc3d Stefan Weil
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
793 89c9bc3d Stefan Weil
    char tmp_filename[PATH_MAX + 1];
794 f500a6d3 Kevin Wolf
    BlockDriverState *file = NULL;
795 712e7874 bellard
796 83f64091 bellard
    if (flags & BDRV_O_SNAPSHOT) {
797 ea2384d3 bellard
        BlockDriverState *bs1;
798 ea2384d3 bellard
        int64_t total_size;
799 7c96d46e aliguori
        int is_protocol = 0;
800 91a073a9 Kevin Wolf
        BlockDriver *bdrv_qcow2;
801 91a073a9 Kevin Wolf
        QEMUOptionParameter *options;
802 b6ce07aa Kevin Wolf
        char backing_filename[PATH_MAX];
803 3b46e624 ths
804 ea2384d3 bellard
        /* if snapshot, we create a temporary backing file and open it
805 ea2384d3 bellard
           instead of opening 'filename' directly */
806 33e3963e bellard
807 ea2384d3 bellard
        /* if there is a backing file, use it */
808 ea2384d3 bellard
        bs1 = bdrv_new("");
809 d6e9098e Kevin Wolf
        ret = bdrv_open(bs1, filename, 0, drv);
810 51d7c00c aliguori
        if (ret < 0) {
811 ea2384d3 bellard
            bdrv_delete(bs1);
812 51d7c00c aliguori
            return ret;
813 ea2384d3 bellard
        }
814 3e82990b Jes Sorensen
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
815 7c96d46e aliguori
816 7c96d46e aliguori
        if (bs1->drv && bs1->drv->protocol_name)
817 7c96d46e aliguori
            is_protocol = 1;
818 7c96d46e aliguori
819 ea2384d3 bellard
        bdrv_delete(bs1);
820 3b46e624 ths
821 eba25057 Jim Meyering
        ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
822 eba25057 Jim Meyering
        if (ret < 0) {
823 eba25057 Jim Meyering
            return ret;
824 eba25057 Jim Meyering
        }
825 7c96d46e aliguori
826 7c96d46e aliguori
        /* Real path is meaningless for protocols */
827 7c96d46e aliguori
        if (is_protocol)
828 7c96d46e aliguori
            snprintf(backing_filename, sizeof(backing_filename),
829 7c96d46e aliguori
                     "%s", filename);
830 114cdfa9 Kirill A. Shutemov
        else if (!realpath(filename, backing_filename))
831 114cdfa9 Kirill A. Shutemov
            return -errno;
832 7c96d46e aliguori
833 91a073a9 Kevin Wolf
        bdrv_qcow2 = bdrv_find_format("qcow2");
834 91a073a9 Kevin Wolf
        options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
835 91a073a9 Kevin Wolf
836 3e82990b Jes Sorensen
        set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
837 91a073a9 Kevin Wolf
        set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
838 91a073a9 Kevin Wolf
        if (drv) {
839 91a073a9 Kevin Wolf
            set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
840 91a073a9 Kevin Wolf
                drv->format_name);
841 91a073a9 Kevin Wolf
        }
842 91a073a9 Kevin Wolf
843 91a073a9 Kevin Wolf
        ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
844 d748768c Jan Kiszka
        free_option_parameters(options);
845 51d7c00c aliguori
        if (ret < 0) {
846 51d7c00c aliguori
            return ret;
847 ea2384d3 bellard
        }
848 91a073a9 Kevin Wolf
849 ea2384d3 bellard
        filename = tmp_filename;
850 91a073a9 Kevin Wolf
        drv = bdrv_qcow2;
851 ea2384d3 bellard
        bs->is_temporary = 1;
852 ea2384d3 bellard
    }
853 712e7874 bellard
854 f500a6d3 Kevin Wolf
    /* Open image file without format layer */
855 f500a6d3 Kevin Wolf
    if (flags & BDRV_O_RDWR) {
856 f500a6d3 Kevin Wolf
        flags |= BDRV_O_ALLOW_RDWR;
857 f500a6d3 Kevin Wolf
    }
858 f500a6d3 Kevin Wolf
859 f500a6d3 Kevin Wolf
    ret = bdrv_file_open(&file, filename, bdrv_open_flags(bs, flags));
860 f500a6d3 Kevin Wolf
    if (ret < 0) {
861 f500a6d3 Kevin Wolf
        return ret;
862 f500a6d3 Kevin Wolf
    }
863 f500a6d3 Kevin Wolf
864 b6ce07aa Kevin Wolf
    /* Find the right image format driver */
865 6db95603 Christoph Hellwig
    if (!drv) {
866 f500a6d3 Kevin Wolf
        ret = find_image_format(file, filename, &drv);
867 51d7c00c aliguori
    }
868 6987307c Christoph Hellwig
869 51d7c00c aliguori
    if (!drv) {
870 51d7c00c aliguori
        goto unlink_and_fail;
871 ea2384d3 bellard
    }
872 b6ce07aa Kevin Wolf
873 b6ce07aa Kevin Wolf
    /* Open the image */
874 f500a6d3 Kevin Wolf
    ret = bdrv_open_common(bs, file, filename, flags, drv);
875 b6ce07aa Kevin Wolf
    if (ret < 0) {
876 6987307c Christoph Hellwig
        goto unlink_and_fail;
877 6987307c Christoph Hellwig
    }
878 6987307c Christoph Hellwig
879 f500a6d3 Kevin Wolf
    if (bs->file != file) {
880 f500a6d3 Kevin Wolf
        bdrv_delete(file);
881 f500a6d3 Kevin Wolf
        file = NULL;
882 f500a6d3 Kevin Wolf
    }
883 f500a6d3 Kevin Wolf
884 b6ce07aa Kevin Wolf
    /* If there is a backing file, use it */
885 9156df12 Paolo Bonzini
    if ((flags & BDRV_O_NO_BACKING) == 0) {
886 9156df12 Paolo Bonzini
        ret = bdrv_open_backing_file(bs);
887 b6ce07aa Kevin Wolf
        if (ret < 0) {
888 b6ce07aa Kevin Wolf
            bdrv_close(bs);
889 b6ce07aa Kevin Wolf
            return ret;
890 b6ce07aa Kevin Wolf
        }
891 b6ce07aa Kevin Wolf
    }
892 b6ce07aa Kevin Wolf
893 b6ce07aa Kevin Wolf
    if (!bdrv_key_required(bs)) {
894 7d4b4ba5 Markus Armbruster
        bdrv_dev_change_media_cb(bs, true);
895 b6ce07aa Kevin Wolf
    }
896 b6ce07aa Kevin Wolf
897 98f90dba Zhi Yong Wu
    /* throttling disk I/O limits */
898 98f90dba Zhi Yong Wu
    if (bs->io_limits_enabled) {
899 98f90dba Zhi Yong Wu
        bdrv_io_limits_enable(bs);
900 98f90dba Zhi Yong Wu
    }
901 98f90dba Zhi Yong Wu
902 b6ce07aa Kevin Wolf
    return 0;
903 b6ce07aa Kevin Wolf
904 b6ce07aa Kevin Wolf
unlink_and_fail:
905 f500a6d3 Kevin Wolf
    if (file != NULL) {
906 f500a6d3 Kevin Wolf
        bdrv_delete(file);
907 f500a6d3 Kevin Wolf
    }
908 b6ce07aa Kevin Wolf
    if (bs->is_temporary) {
909 b6ce07aa Kevin Wolf
        unlink(filename);
910 b6ce07aa Kevin Wolf
    }
911 b6ce07aa Kevin Wolf
    return ret;
912 b6ce07aa Kevin Wolf
}
913 b6ce07aa Kevin Wolf
914 e971aa12 Jeff Cody
typedef struct BlockReopenQueueEntry {
915 e971aa12 Jeff Cody
     bool prepared;
916 e971aa12 Jeff Cody
     BDRVReopenState state;
917 e971aa12 Jeff Cody
     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
918 e971aa12 Jeff Cody
} BlockReopenQueueEntry;
919 e971aa12 Jeff Cody
920 e971aa12 Jeff Cody
/*
921 e971aa12 Jeff Cody
 * Adds a BlockDriverState to a simple queue for an atomic, transactional
922 e971aa12 Jeff Cody
 * reopen of multiple devices.
923 e971aa12 Jeff Cody
 *
924 e971aa12 Jeff Cody
 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
925 e971aa12 Jeff Cody
 * already performed, or alternatively may be NULL a new BlockReopenQueue will
926 e971aa12 Jeff Cody
 * be created and initialized. This newly created BlockReopenQueue should be
927 e971aa12 Jeff Cody
 * passed back in for subsequent calls that are intended to be of the same
928 e971aa12 Jeff Cody
 * atomic 'set'.
929 e971aa12 Jeff Cody
 *
930 e971aa12 Jeff Cody
 * bs is the BlockDriverState to add to the reopen queue.
931 e971aa12 Jeff Cody
 *
932 e971aa12 Jeff Cody
 * flags contains the open flags for the associated bs
933 e971aa12 Jeff Cody
 *
934 e971aa12 Jeff Cody
 * returns a pointer to bs_queue, which is either the newly allocated
935 e971aa12 Jeff Cody
 * bs_queue, or the existing bs_queue being used.
936 e971aa12 Jeff Cody
 *
937 e971aa12 Jeff Cody
 */
938 e971aa12 Jeff Cody
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
939 e971aa12 Jeff Cody
                                    BlockDriverState *bs, int flags)
940 e971aa12 Jeff Cody
{
941 e971aa12 Jeff Cody
    assert(bs != NULL);
942 e971aa12 Jeff Cody
943 e971aa12 Jeff Cody
    BlockReopenQueueEntry *bs_entry;
944 e971aa12 Jeff Cody
    if (bs_queue == NULL) {
945 e971aa12 Jeff Cody
        bs_queue = g_new0(BlockReopenQueue, 1);
946 e971aa12 Jeff Cody
        QSIMPLEQ_INIT(bs_queue);
947 e971aa12 Jeff Cody
    }
948 e971aa12 Jeff Cody
949 e971aa12 Jeff Cody
    if (bs->file) {
950 e971aa12 Jeff Cody
        bdrv_reopen_queue(bs_queue, bs->file, flags);
951 e971aa12 Jeff Cody
    }
952 e971aa12 Jeff Cody
953 e971aa12 Jeff Cody
    bs_entry = g_new0(BlockReopenQueueEntry, 1);
954 e971aa12 Jeff Cody
    QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
955 e971aa12 Jeff Cody
956 e971aa12 Jeff Cody
    bs_entry->state.bs = bs;
957 e971aa12 Jeff Cody
    bs_entry->state.flags = flags;
958 e971aa12 Jeff Cody
959 e971aa12 Jeff Cody
    return bs_queue;
960 e971aa12 Jeff Cody
}
961 e971aa12 Jeff Cody
962 e971aa12 Jeff Cody
/*
963 e971aa12 Jeff Cody
 * Reopen multiple BlockDriverStates atomically & transactionally.
964 e971aa12 Jeff Cody
 *
965 e971aa12 Jeff Cody
 * The queue passed in (bs_queue) must have been built up previous
966 e971aa12 Jeff Cody
 * via bdrv_reopen_queue().
967 e971aa12 Jeff Cody
 *
968 e971aa12 Jeff Cody
 * Reopens all BDS specified in the queue, with the appropriate
969 e971aa12 Jeff Cody
 * flags.  All devices are prepared for reopen, and failure of any
970 e971aa12 Jeff Cody
 * device will cause all device changes to be abandonded, and intermediate
971 e971aa12 Jeff Cody
 * data cleaned up.
972 e971aa12 Jeff Cody
 *
973 e971aa12 Jeff Cody
 * If all devices prepare successfully, then the changes are committed
974 e971aa12 Jeff Cody
 * to all devices.
975 e971aa12 Jeff Cody
 *
976 e971aa12 Jeff Cody
 */
977 e971aa12 Jeff Cody
int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
978 e971aa12 Jeff Cody
{
979 e971aa12 Jeff Cody
    int ret = -1;
980 e971aa12 Jeff Cody
    BlockReopenQueueEntry *bs_entry, *next;
981 e971aa12 Jeff Cody
    Error *local_err = NULL;
982 e971aa12 Jeff Cody
983 e971aa12 Jeff Cody
    assert(bs_queue != NULL);
984 e971aa12 Jeff Cody
985 e971aa12 Jeff Cody
    bdrv_drain_all();
986 e971aa12 Jeff Cody
987 e971aa12 Jeff Cody
    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
988 e971aa12 Jeff Cody
        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
989 e971aa12 Jeff Cody
            error_propagate(errp, local_err);
990 e971aa12 Jeff Cody
            goto cleanup;
991 e971aa12 Jeff Cody
        }
992 e971aa12 Jeff Cody
        bs_entry->prepared = true;
993 e971aa12 Jeff Cody
    }
994 e971aa12 Jeff Cody
995 e971aa12 Jeff Cody
    /* If we reach this point, we have success and just need to apply the
996 e971aa12 Jeff Cody
     * changes
997 e971aa12 Jeff Cody
     */
998 e971aa12 Jeff Cody
    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
999 e971aa12 Jeff Cody
        bdrv_reopen_commit(&bs_entry->state);
1000 e971aa12 Jeff Cody
    }
1001 e971aa12 Jeff Cody
1002 e971aa12 Jeff Cody
    ret = 0;
1003 e971aa12 Jeff Cody
1004 e971aa12 Jeff Cody
cleanup:
1005 e971aa12 Jeff Cody
    QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1006 e971aa12 Jeff Cody
        if (ret && bs_entry->prepared) {
1007 e971aa12 Jeff Cody
            bdrv_reopen_abort(&bs_entry->state);
1008 e971aa12 Jeff Cody
        }
1009 e971aa12 Jeff Cody
        g_free(bs_entry);
1010 e971aa12 Jeff Cody
    }
1011 e971aa12 Jeff Cody
    g_free(bs_queue);
1012 e971aa12 Jeff Cody
    return ret;
1013 e971aa12 Jeff Cody
}
1014 e971aa12 Jeff Cody
1015 e971aa12 Jeff Cody
1016 e971aa12 Jeff Cody
/* Reopen a single BlockDriverState with the specified flags. */
1017 e971aa12 Jeff Cody
int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1018 e971aa12 Jeff Cody
{
1019 e971aa12 Jeff Cody
    int ret = -1;
1020 e971aa12 Jeff Cody
    Error *local_err = NULL;
1021 e971aa12 Jeff Cody
    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1022 e971aa12 Jeff Cody
1023 e971aa12 Jeff Cody
    ret = bdrv_reopen_multiple(queue, &local_err);
1024 e971aa12 Jeff Cody
    if (local_err != NULL) {
1025 e971aa12 Jeff Cody
        error_propagate(errp, local_err);
1026 e971aa12 Jeff Cody
    }
1027 e971aa12 Jeff Cody
    return ret;
1028 e971aa12 Jeff Cody
}
1029 e971aa12 Jeff Cody
1030 e971aa12 Jeff Cody
1031 e971aa12 Jeff Cody
/*
1032 e971aa12 Jeff Cody
 * Prepares a BlockDriverState for reopen. All changes are staged in the
1033 e971aa12 Jeff Cody
 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1034 e971aa12 Jeff Cody
 * the block driver layer .bdrv_reopen_prepare()
1035 e971aa12 Jeff Cody
 *
1036 e971aa12 Jeff Cody
 * bs is the BlockDriverState to reopen
1037 e971aa12 Jeff Cody
 * flags are the new open flags
1038 e971aa12 Jeff Cody
 * queue is the reopen queue
1039 e971aa12 Jeff Cody
 *
1040 e971aa12 Jeff Cody
 * Returns 0 on success, non-zero on error.  On error errp will be set
1041 e971aa12 Jeff Cody
 * as well.
1042 e971aa12 Jeff Cody
 *
1043 e971aa12 Jeff Cody
 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1044 e971aa12 Jeff Cody
 * It is the responsibility of the caller to then call the abort() or
1045 e971aa12 Jeff Cody
 * commit() for any other BDS that have been left in a prepare() state
1046 e971aa12 Jeff Cody
 *
1047 e971aa12 Jeff Cody
 */
1048 e971aa12 Jeff Cody
int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1049 e971aa12 Jeff Cody
                        Error **errp)
1050 e971aa12 Jeff Cody
{
1051 e971aa12 Jeff Cody
    int ret = -1;
1052 e971aa12 Jeff Cody
    Error *local_err = NULL;
1053 e971aa12 Jeff Cody
    BlockDriver *drv;
1054 e971aa12 Jeff Cody
1055 e971aa12 Jeff Cody
    assert(reopen_state != NULL);
1056 e971aa12 Jeff Cody
    assert(reopen_state->bs->drv != NULL);
1057 e971aa12 Jeff Cody
    drv = reopen_state->bs->drv;
1058 e971aa12 Jeff Cody
1059 e971aa12 Jeff Cody
    /* if we are to stay read-only, do not allow permission change
1060 e971aa12 Jeff Cody
     * to r/w */
1061 e971aa12 Jeff Cody
    if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1062 e971aa12 Jeff Cody
        reopen_state->flags & BDRV_O_RDWR) {
1063 e971aa12 Jeff Cody
        error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1064 e971aa12 Jeff Cody
                  reopen_state->bs->device_name);
1065 e971aa12 Jeff Cody
        goto error;
1066 e971aa12 Jeff Cody
    }
1067 e971aa12 Jeff Cody
1068 e971aa12 Jeff Cody
1069 e971aa12 Jeff Cody
    ret = bdrv_flush(reopen_state->bs);
1070 e971aa12 Jeff Cody
    if (ret) {
1071 e971aa12 Jeff Cody
        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1072 e971aa12 Jeff Cody
                  strerror(-ret));
1073 e971aa12 Jeff Cody
        goto error;
1074 e971aa12 Jeff Cody
    }
1075 e971aa12 Jeff Cody
1076 e971aa12 Jeff Cody
    if (drv->bdrv_reopen_prepare) {
1077 e971aa12 Jeff Cody
        ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1078 e971aa12 Jeff Cody
        if (ret) {
1079 e971aa12 Jeff Cody
            if (local_err != NULL) {
1080 e971aa12 Jeff Cody
                error_propagate(errp, local_err);
1081 e971aa12 Jeff Cody
            } else {
1082 e971aa12 Jeff Cody
                error_set(errp, QERR_OPEN_FILE_FAILED,
1083 e971aa12 Jeff Cody
                          reopen_state->bs->filename);
1084 e971aa12 Jeff Cody
            }
1085 e971aa12 Jeff Cody
            goto error;
1086 e971aa12 Jeff Cody
        }
1087 e971aa12 Jeff Cody
    } else {
1088 e971aa12 Jeff Cody
        /* It is currently mandatory to have a bdrv_reopen_prepare()
1089 e971aa12 Jeff Cody
         * handler for each supported drv. */
1090 e971aa12 Jeff Cody
        error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1091 e971aa12 Jeff Cody
                  drv->format_name, reopen_state->bs->device_name,
1092 e971aa12 Jeff Cody
                 "reopening of file");
1093 e971aa12 Jeff Cody
        ret = -1;
1094 e971aa12 Jeff Cody
        goto error;
1095 e971aa12 Jeff Cody
    }
1096 e971aa12 Jeff Cody
1097 e971aa12 Jeff Cody
    ret = 0;
1098 e971aa12 Jeff Cody
1099 e971aa12 Jeff Cody
error:
1100 e971aa12 Jeff Cody
    return ret;
1101 e971aa12 Jeff Cody
}
1102 e971aa12 Jeff Cody
1103 e971aa12 Jeff Cody
/*
1104 e971aa12 Jeff Cody
 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1105 e971aa12 Jeff Cody
 * makes them final by swapping the staging BlockDriverState contents into
1106 e971aa12 Jeff Cody
 * the active BlockDriverState contents.
1107 e971aa12 Jeff Cody
 */
1108 e971aa12 Jeff Cody
void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1109 e971aa12 Jeff Cody
{
1110 e971aa12 Jeff Cody
    BlockDriver *drv;
1111 e971aa12 Jeff Cody
1112 e971aa12 Jeff Cody
    assert(reopen_state != NULL);
1113 e971aa12 Jeff Cody
    drv = reopen_state->bs->drv;
1114 e971aa12 Jeff Cody
    assert(drv != NULL);
1115 e971aa12 Jeff Cody
1116 e971aa12 Jeff Cody
    /* If there are any driver level actions to take */
1117 e971aa12 Jeff Cody
    if (drv->bdrv_reopen_commit) {
1118 e971aa12 Jeff Cody
        drv->bdrv_reopen_commit(reopen_state);
1119 e971aa12 Jeff Cody
    }
1120 e971aa12 Jeff Cody
1121 e971aa12 Jeff Cody
    /* set BDS specific flags now */
1122 e971aa12 Jeff Cody
    reopen_state->bs->open_flags         = reopen_state->flags;
1123 e971aa12 Jeff Cody
    reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1124 e971aa12 Jeff Cody
                                              BDRV_O_CACHE_WB);
1125 e971aa12 Jeff Cody
    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1126 e971aa12 Jeff Cody
}
1127 e971aa12 Jeff Cody
1128 e971aa12 Jeff Cody
/*
1129 e971aa12 Jeff Cody
 * Abort the reopen, and delete and free the staged changes in
1130 e971aa12 Jeff Cody
 * reopen_state
1131 e971aa12 Jeff Cody
 */
1132 e971aa12 Jeff Cody
void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1133 e971aa12 Jeff Cody
{
1134 e971aa12 Jeff Cody
    BlockDriver *drv;
1135 e971aa12 Jeff Cody
1136 e971aa12 Jeff Cody
    assert(reopen_state != NULL);
1137 e971aa12 Jeff Cody
    drv = reopen_state->bs->drv;
1138 e971aa12 Jeff Cody
    assert(drv != NULL);
1139 e971aa12 Jeff Cody
1140 e971aa12 Jeff Cody
    if (drv->bdrv_reopen_abort) {
1141 e971aa12 Jeff Cody
        drv->bdrv_reopen_abort(reopen_state);
1142 e971aa12 Jeff Cody
    }
1143 e971aa12 Jeff Cody
}
1144 e971aa12 Jeff Cody
1145 e971aa12 Jeff Cody
1146 fc01f7e7 bellard
void bdrv_close(BlockDriverState *bs)
1147 fc01f7e7 bellard
{
1148 80ccf93b Liu Yuan
    bdrv_flush(bs);
1149 3cbc002c Paolo Bonzini
    if (bs->job) {
1150 3cbc002c Paolo Bonzini
        block_job_cancel_sync(bs->job);
1151 3cbc002c Paolo Bonzini
    }
1152 3cbc002c Paolo Bonzini
    bdrv_drain_all();
1153 d7d512f6 Paolo Bonzini
    notifier_list_notify(&bs->close_notifiers, bs);
1154 7094f12f Kevin Wolf
1155 3cbc002c Paolo Bonzini
    if (bs->drv) {
1156 f9092b10 Markus Armbruster
        if (bs == bs_snapshots) {
1157 f9092b10 Markus Armbruster
            bs_snapshots = NULL;
1158 f9092b10 Markus Armbruster
        }
1159 557df6ac Stefan Hajnoczi
        if (bs->backing_hd) {
1160 ea2384d3 bellard
            bdrv_delete(bs->backing_hd);
1161 557df6ac Stefan Hajnoczi
            bs->backing_hd = NULL;
1162 557df6ac Stefan Hajnoczi
        }
1163 ea2384d3 bellard
        bs->drv->bdrv_close(bs);
1164 7267c094 Anthony Liguori
        g_free(bs->opaque);
1165 ea2384d3 bellard
#ifdef _WIN32
1166 ea2384d3 bellard
        if (bs->is_temporary) {
1167 ea2384d3 bellard
            unlink(bs->filename);
1168 ea2384d3 bellard
        }
1169 67b915a5 bellard
#endif
1170 ea2384d3 bellard
        bs->opaque = NULL;
1171 ea2384d3 bellard
        bs->drv = NULL;
1172 53fec9d3 Stefan Hajnoczi
        bs->copy_on_read = 0;
1173 a275fa42 Paolo Bonzini
        bs->backing_file[0] = '\0';
1174 a275fa42 Paolo Bonzini
        bs->backing_format[0] = '\0';
1175 6405875c Paolo Bonzini
        bs->total_sectors = 0;
1176 6405875c Paolo Bonzini
        bs->encrypted = 0;
1177 6405875c Paolo Bonzini
        bs->valid_key = 0;
1178 6405875c Paolo Bonzini
        bs->sg = 0;
1179 6405875c Paolo Bonzini
        bs->growable = 0;
1180 b338082b bellard
1181 66f82cee Kevin Wolf
        if (bs->file != NULL) {
1182 0ac9377d Paolo Bonzini
            bdrv_delete(bs->file);
1183 0ac9377d Paolo Bonzini
            bs->file = NULL;
1184 66f82cee Kevin Wolf
        }
1185 b338082b bellard
    }
1186 98f90dba Zhi Yong Wu
1187 9ca11154 Pavel Hrdina
    bdrv_dev_change_media_cb(bs, false);
1188 9ca11154 Pavel Hrdina
1189 98f90dba Zhi Yong Wu
    /*throttling disk I/O limits*/
1190 98f90dba Zhi Yong Wu
    if (bs->io_limits_enabled) {
1191 98f90dba Zhi Yong Wu
        bdrv_io_limits_disable(bs);
1192 98f90dba Zhi Yong Wu
    }
1193 b338082b bellard
}
1194 b338082b bellard
1195 2bc93fed MORITA Kazutaka
void bdrv_close_all(void)
1196 2bc93fed MORITA Kazutaka
{
1197 2bc93fed MORITA Kazutaka
    BlockDriverState *bs;
1198 2bc93fed MORITA Kazutaka
1199 2bc93fed MORITA Kazutaka
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1200 2bc93fed MORITA Kazutaka
        bdrv_close(bs);
1201 2bc93fed MORITA Kazutaka
    }
1202 2bc93fed MORITA Kazutaka
}
1203 2bc93fed MORITA Kazutaka
1204 922453bc Stefan Hajnoczi
/*
1205 922453bc Stefan Hajnoczi
 * Wait for pending requests to complete across all BlockDriverStates
1206 922453bc Stefan Hajnoczi
 *
1207 922453bc Stefan Hajnoczi
 * This function does not flush data to disk, use bdrv_flush_all() for that
1208 922453bc Stefan Hajnoczi
 * after calling this function.
1209 4c355d53 Zhi Yong Wu
 *
1210 4c355d53 Zhi Yong Wu
 * Note that completion of an asynchronous I/O operation can trigger any
1211 4c355d53 Zhi Yong Wu
 * number of other I/O operations on other devices---for example a coroutine
1212 4c355d53 Zhi Yong Wu
 * can be arbitrarily complex and a constant flow of I/O can come until the
1213 4c355d53 Zhi Yong Wu
 * coroutine is complete.  Because of this, it is not possible to have a
1214 4c355d53 Zhi Yong Wu
 * function to drain a single device's I/O queue.
1215 922453bc Stefan Hajnoczi
 */
1216 922453bc Stefan Hajnoczi
void bdrv_drain_all(void)
1217 922453bc Stefan Hajnoczi
{
1218 922453bc Stefan Hajnoczi
    BlockDriverState *bs;
1219 4c355d53 Zhi Yong Wu
    bool busy;
1220 4c355d53 Zhi Yong Wu
1221 4c355d53 Zhi Yong Wu
    do {
1222 4c355d53 Zhi Yong Wu
        busy = qemu_aio_wait();
1223 922453bc Stefan Hajnoczi
1224 4c355d53 Zhi Yong Wu
        /* FIXME: We do not have timer support here, so this is effectively
1225 4c355d53 Zhi Yong Wu
         * a busy wait.
1226 4c355d53 Zhi Yong Wu
         */
1227 4c355d53 Zhi Yong Wu
        QTAILQ_FOREACH(bs, &bdrv_states, list) {
1228 4c355d53 Zhi Yong Wu
            if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
1229 4c355d53 Zhi Yong Wu
                qemu_co_queue_restart_all(&bs->throttled_reqs);
1230 4c355d53 Zhi Yong Wu
                busy = true;
1231 4c355d53 Zhi Yong Wu
            }
1232 4c355d53 Zhi Yong Wu
        }
1233 4c355d53 Zhi Yong Wu
    } while (busy);
1234 922453bc Stefan Hajnoczi
1235 922453bc Stefan Hajnoczi
    /* If requests are still pending there is a bug somewhere */
1236 922453bc Stefan Hajnoczi
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1237 922453bc Stefan Hajnoczi
        assert(QLIST_EMPTY(&bs->tracked_requests));
1238 922453bc Stefan Hajnoczi
        assert(qemu_co_queue_empty(&bs->throttled_reqs));
1239 922453bc Stefan Hajnoczi
    }
1240 922453bc Stefan Hajnoczi
}
1241 922453bc Stefan Hajnoczi
1242 d22b2f41 Ryan Harper
/* make a BlockDriverState anonymous by removing from bdrv_state list.
1243 d22b2f41 Ryan Harper
   Also, NULL terminate the device_name to prevent double remove */
1244 d22b2f41 Ryan Harper
void bdrv_make_anon(BlockDriverState *bs)
1245 d22b2f41 Ryan Harper
{
1246 d22b2f41 Ryan Harper
    if (bs->device_name[0] != '\0') {
1247 d22b2f41 Ryan Harper
        QTAILQ_REMOVE(&bdrv_states, bs, list);
1248 d22b2f41 Ryan Harper
    }
1249 d22b2f41 Ryan Harper
    bs->device_name[0] = '\0';
1250 d22b2f41 Ryan Harper
}
1251 d22b2f41 Ryan Harper
1252 e023b2e2 Paolo Bonzini
static void bdrv_rebind(BlockDriverState *bs)
1253 e023b2e2 Paolo Bonzini
{
1254 e023b2e2 Paolo Bonzini
    if (bs->drv && bs->drv->bdrv_rebind) {
1255 e023b2e2 Paolo Bonzini
        bs->drv->bdrv_rebind(bs);
1256 e023b2e2 Paolo Bonzini
    }
1257 e023b2e2 Paolo Bonzini
}
1258 e023b2e2 Paolo Bonzini
1259 4ddc07ca Paolo Bonzini
static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1260 4ddc07ca Paolo Bonzini
                                     BlockDriverState *bs_src)
1261 8802d1fd Jeff Cody
{
1262 4ddc07ca Paolo Bonzini
    /* move some fields that need to stay attached to the device */
1263 4ddc07ca Paolo Bonzini
    bs_dest->open_flags         = bs_src->open_flags;
1264 8802d1fd Jeff Cody
1265 8802d1fd Jeff Cody
    /* dev info */
1266 4ddc07ca Paolo Bonzini
    bs_dest->dev_ops            = bs_src->dev_ops;
1267 4ddc07ca Paolo Bonzini
    bs_dest->dev_opaque         = bs_src->dev_opaque;
1268 4ddc07ca Paolo Bonzini
    bs_dest->dev                = bs_src->dev;
1269 4ddc07ca Paolo Bonzini
    bs_dest->buffer_alignment   = bs_src->buffer_alignment;
1270 4ddc07ca Paolo Bonzini
    bs_dest->copy_on_read       = bs_src->copy_on_read;
1271 8802d1fd Jeff Cody
1272 4ddc07ca Paolo Bonzini
    bs_dest->enable_write_cache = bs_src->enable_write_cache;
1273 c4a248a1 Paolo Bonzini
1274 8802d1fd Jeff Cody
    /* i/o timing parameters */
1275 4ddc07ca Paolo Bonzini
    bs_dest->slice_time         = bs_src->slice_time;
1276 4ddc07ca Paolo Bonzini
    bs_dest->slice_start        = bs_src->slice_start;
1277 4ddc07ca Paolo Bonzini
    bs_dest->slice_end          = bs_src->slice_end;
1278 4ddc07ca Paolo Bonzini
    bs_dest->io_limits          = bs_src->io_limits;
1279 4ddc07ca Paolo Bonzini
    bs_dest->io_base            = bs_src->io_base;
1280 4ddc07ca Paolo Bonzini
    bs_dest->throttled_reqs     = bs_src->throttled_reqs;
1281 4ddc07ca Paolo Bonzini
    bs_dest->block_timer        = bs_src->block_timer;
1282 4ddc07ca Paolo Bonzini
    bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
1283 8802d1fd Jeff Cody
1284 8802d1fd Jeff Cody
    /* r/w error */
1285 4ddc07ca Paolo Bonzini
    bs_dest->on_read_error      = bs_src->on_read_error;
1286 4ddc07ca Paolo Bonzini
    bs_dest->on_write_error     = bs_src->on_write_error;
1287 8802d1fd Jeff Cody
1288 8802d1fd Jeff Cody
    /* i/o status */
1289 4ddc07ca Paolo Bonzini
    bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
1290 4ddc07ca Paolo Bonzini
    bs_dest->iostatus           = bs_src->iostatus;
1291 8802d1fd Jeff Cody
1292 a9fc4408 Paolo Bonzini
    /* dirty bitmap */
1293 4ddc07ca Paolo Bonzini
    bs_dest->dirty_count        = bs_src->dirty_count;
1294 4ddc07ca Paolo Bonzini
    bs_dest->dirty_bitmap       = bs_src->dirty_bitmap;
1295 a9fc4408 Paolo Bonzini
1296 a9fc4408 Paolo Bonzini
    /* job */
1297 4ddc07ca Paolo Bonzini
    bs_dest->in_use             = bs_src->in_use;
1298 4ddc07ca Paolo Bonzini
    bs_dest->job                = bs_src->job;
1299 a9fc4408 Paolo Bonzini
1300 8802d1fd Jeff Cody
    /* keep the same entry in bdrv_states */
1301 4ddc07ca Paolo Bonzini
    pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
1302 4ddc07ca Paolo Bonzini
            bs_src->device_name);
1303 4ddc07ca Paolo Bonzini
    bs_dest->list = bs_src->list;
1304 4ddc07ca Paolo Bonzini
}
1305 8802d1fd Jeff Cody
1306 4ddc07ca Paolo Bonzini
/*
1307 4ddc07ca Paolo Bonzini
 * Swap bs contents for two image chains while they are live,
1308 4ddc07ca Paolo Bonzini
 * while keeping required fields on the BlockDriverState that is
1309 4ddc07ca Paolo Bonzini
 * actually attached to a device.
1310 4ddc07ca Paolo Bonzini
 *
1311 4ddc07ca Paolo Bonzini
 * This will modify the BlockDriverState fields, and swap contents
1312 4ddc07ca Paolo Bonzini
 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1313 4ddc07ca Paolo Bonzini
 *
1314 4ddc07ca Paolo Bonzini
 * bs_new is required to be anonymous.
1315 4ddc07ca Paolo Bonzini
 *
1316 4ddc07ca Paolo Bonzini
 * This function does not create any image files.
1317 4ddc07ca Paolo Bonzini
 */
1318 4ddc07ca Paolo Bonzini
void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1319 4ddc07ca Paolo Bonzini
{
1320 4ddc07ca Paolo Bonzini
    BlockDriverState tmp;
1321 f6801b83 Jeff Cody
1322 4ddc07ca Paolo Bonzini
    /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1323 4ddc07ca Paolo Bonzini
    assert(bs_new->device_name[0] == '\0');
1324 4ddc07ca Paolo Bonzini
    assert(bs_new->dirty_bitmap == NULL);
1325 4ddc07ca Paolo Bonzini
    assert(bs_new->job == NULL);
1326 4ddc07ca Paolo Bonzini
    assert(bs_new->dev == NULL);
1327 4ddc07ca Paolo Bonzini
    assert(bs_new->in_use == 0);
1328 4ddc07ca Paolo Bonzini
    assert(bs_new->io_limits_enabled == false);
1329 4ddc07ca Paolo Bonzini
    assert(bs_new->block_timer == NULL);
1330 8802d1fd Jeff Cody
1331 4ddc07ca Paolo Bonzini
    tmp = *bs_new;
1332 4ddc07ca Paolo Bonzini
    *bs_new = *bs_old;
1333 4ddc07ca Paolo Bonzini
    *bs_old = tmp;
1334 a9fc4408 Paolo Bonzini
1335 4ddc07ca Paolo Bonzini
    /* there are some fields that should not be swapped, move them back */
1336 4ddc07ca Paolo Bonzini
    bdrv_move_feature_fields(&tmp, bs_old);
1337 4ddc07ca Paolo Bonzini
    bdrv_move_feature_fields(bs_old, bs_new);
1338 4ddc07ca Paolo Bonzini
    bdrv_move_feature_fields(bs_new, &tmp);
1339 8802d1fd Jeff Cody
1340 4ddc07ca Paolo Bonzini
    /* bs_new shouldn't be in bdrv_states even after the swap!  */
1341 4ddc07ca Paolo Bonzini
    assert(bs_new->device_name[0] == '\0');
1342 4ddc07ca Paolo Bonzini
1343 4ddc07ca Paolo Bonzini
    /* Check a few fields that should remain attached to the device */
1344 4ddc07ca Paolo Bonzini
    assert(bs_new->dev == NULL);
1345 4ddc07ca Paolo Bonzini
    assert(bs_new->job == NULL);
1346 4ddc07ca Paolo Bonzini
    assert(bs_new->in_use == 0);
1347 4ddc07ca Paolo Bonzini
    assert(bs_new->io_limits_enabled == false);
1348 4ddc07ca Paolo Bonzini
    assert(bs_new->block_timer == NULL);
1349 e023b2e2 Paolo Bonzini
1350 e023b2e2 Paolo Bonzini
    bdrv_rebind(bs_new);
1351 4ddc07ca Paolo Bonzini
    bdrv_rebind(bs_old);
1352 4ddc07ca Paolo Bonzini
}
1353 4ddc07ca Paolo Bonzini
1354 4ddc07ca Paolo Bonzini
/*
1355 4ddc07ca Paolo Bonzini
 * Add new bs contents at the top of an image chain while the chain is
1356 4ddc07ca Paolo Bonzini
 * live, while keeping required fields on the top layer.
1357 4ddc07ca Paolo Bonzini
 *
1358 4ddc07ca Paolo Bonzini
 * This will modify the BlockDriverState fields, and swap contents
1359 4ddc07ca Paolo Bonzini
 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1360 4ddc07ca Paolo Bonzini
 *
1361 4ddc07ca Paolo Bonzini
 * bs_new is required to be anonymous.
1362 4ddc07ca Paolo Bonzini
 *
1363 4ddc07ca Paolo Bonzini
 * This function does not create any image files.
1364 4ddc07ca Paolo Bonzini
 */
1365 4ddc07ca Paolo Bonzini
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1366 4ddc07ca Paolo Bonzini
{
1367 4ddc07ca Paolo Bonzini
    bdrv_swap(bs_new, bs_top);
1368 4ddc07ca Paolo Bonzini
1369 4ddc07ca Paolo Bonzini
    /* The contents of 'tmp' will become bs_top, as we are
1370 4ddc07ca Paolo Bonzini
     * swapping bs_new and bs_top contents. */
1371 4ddc07ca Paolo Bonzini
    bs_top->backing_hd = bs_new;
1372 4ddc07ca Paolo Bonzini
    bs_top->open_flags &= ~BDRV_O_NO_BACKING;
1373 4ddc07ca Paolo Bonzini
    pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
1374 4ddc07ca Paolo Bonzini
            bs_new->filename);
1375 4ddc07ca Paolo Bonzini
    pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
1376 4ddc07ca Paolo Bonzini
            bs_new->drv ? bs_new->drv->format_name : "");
1377 8802d1fd Jeff Cody
}
1378 8802d1fd Jeff Cody
1379 b338082b bellard
void bdrv_delete(BlockDriverState *bs)
1380 b338082b bellard
{
1381 fa879d62 Markus Armbruster
    assert(!bs->dev);
1382 3e914655 Paolo Bonzini
    assert(!bs->job);
1383 3e914655 Paolo Bonzini
    assert(!bs->in_use);
1384 18846dee Markus Armbruster
1385 1b7bdbc1 Stefan Hajnoczi
    /* remove from list, if necessary */
1386 d22b2f41 Ryan Harper
    bdrv_make_anon(bs);
1387 34c6f050 aurel32
1388 b338082b bellard
    bdrv_close(bs);
1389 66f82cee Kevin Wolf
1390 f9092b10 Markus Armbruster
    assert(bs != bs_snapshots);
1391 7267c094 Anthony Liguori
    g_free(bs);
1392 fc01f7e7 bellard
}
1393 fc01f7e7 bellard
1394 fa879d62 Markus Armbruster
int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1395 fa879d62 Markus Armbruster
/* TODO change to DeviceState *dev when all users are qdevified */
1396 18846dee Markus Armbruster
{
1397 fa879d62 Markus Armbruster
    if (bs->dev) {
1398 18846dee Markus Armbruster
        return -EBUSY;
1399 18846dee Markus Armbruster
    }
1400 fa879d62 Markus Armbruster
    bs->dev = dev;
1401 28a7282a Luiz Capitulino
    bdrv_iostatus_reset(bs);
1402 18846dee Markus Armbruster
    return 0;
1403 18846dee Markus Armbruster
}
1404 18846dee Markus Armbruster
1405 fa879d62 Markus Armbruster
/* TODO qdevified devices don't use this, remove when devices are qdevified */
1406 fa879d62 Markus Armbruster
void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
1407 18846dee Markus Armbruster
{
1408 fa879d62 Markus Armbruster
    if (bdrv_attach_dev(bs, dev) < 0) {
1409 fa879d62 Markus Armbruster
        abort();
1410 fa879d62 Markus Armbruster
    }
1411 fa879d62 Markus Armbruster
}
1412 fa879d62 Markus Armbruster
1413 fa879d62 Markus Armbruster
void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1414 fa879d62 Markus Armbruster
/* TODO change to DeviceState *dev when all users are qdevified */
1415 fa879d62 Markus Armbruster
{
1416 fa879d62 Markus Armbruster
    assert(bs->dev == dev);
1417 fa879d62 Markus Armbruster
    bs->dev = NULL;
1418 0e49de52 Markus Armbruster
    bs->dev_ops = NULL;
1419 0e49de52 Markus Armbruster
    bs->dev_opaque = NULL;
1420 29e05f20 Markus Armbruster
    bs->buffer_alignment = 512;
1421 18846dee Markus Armbruster
}
1422 18846dee Markus Armbruster
1423 fa879d62 Markus Armbruster
/* TODO change to return DeviceState * when all users are qdevified */
1424 fa879d62 Markus Armbruster
void *bdrv_get_attached_dev(BlockDriverState *bs)
1425 18846dee Markus Armbruster
{
1426 fa879d62 Markus Armbruster
    return bs->dev;
1427 18846dee Markus Armbruster
}
1428 18846dee Markus Armbruster
1429 0e49de52 Markus Armbruster
void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1430 0e49de52 Markus Armbruster
                      void *opaque)
1431 0e49de52 Markus Armbruster
{
1432 0e49de52 Markus Armbruster
    bs->dev_ops = ops;
1433 0e49de52 Markus Armbruster
    bs->dev_opaque = opaque;
1434 2c6942fa Markus Armbruster
    if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1435 2c6942fa Markus Armbruster
        bs_snapshots = NULL;
1436 2c6942fa Markus Armbruster
    }
1437 0e49de52 Markus Armbruster
}
1438 0e49de52 Markus Armbruster
1439 32c81a4a Paolo Bonzini
void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1440 32c81a4a Paolo Bonzini
                               enum MonitorEvent ev,
1441 32c81a4a Paolo Bonzini
                               BlockErrorAction action, bool is_read)
1442 329c0a48 Luiz Capitulino
{
1443 329c0a48 Luiz Capitulino
    QObject *data;
1444 329c0a48 Luiz Capitulino
    const char *action_str;
1445 329c0a48 Luiz Capitulino
1446 329c0a48 Luiz Capitulino
    switch (action) {
1447 329c0a48 Luiz Capitulino
    case BDRV_ACTION_REPORT:
1448 329c0a48 Luiz Capitulino
        action_str = "report";
1449 329c0a48 Luiz Capitulino
        break;
1450 329c0a48 Luiz Capitulino
    case BDRV_ACTION_IGNORE:
1451 329c0a48 Luiz Capitulino
        action_str = "ignore";
1452 329c0a48 Luiz Capitulino
        break;
1453 329c0a48 Luiz Capitulino
    case BDRV_ACTION_STOP:
1454 329c0a48 Luiz Capitulino
        action_str = "stop";
1455 329c0a48 Luiz Capitulino
        break;
1456 329c0a48 Luiz Capitulino
    default:
1457 329c0a48 Luiz Capitulino
        abort();
1458 329c0a48 Luiz Capitulino
    }
1459 329c0a48 Luiz Capitulino
1460 329c0a48 Luiz Capitulino
    data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1461 329c0a48 Luiz Capitulino
                              bdrv->device_name,
1462 329c0a48 Luiz Capitulino
                              action_str,
1463 329c0a48 Luiz Capitulino
                              is_read ? "read" : "write");
1464 32c81a4a Paolo Bonzini
    monitor_protocol_event(ev, data);
1465 329c0a48 Luiz Capitulino
1466 329c0a48 Luiz Capitulino
    qobject_decref(data);
1467 329c0a48 Luiz Capitulino
}
1468 329c0a48 Luiz Capitulino
1469 6f382ed2 Luiz Capitulino
static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1470 6f382ed2 Luiz Capitulino
{
1471 6f382ed2 Luiz Capitulino
    QObject *data;
1472 6f382ed2 Luiz Capitulino
1473 6f382ed2 Luiz Capitulino
    data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1474 6f382ed2 Luiz Capitulino
                              bdrv_get_device_name(bs), ejected);
1475 6f382ed2 Luiz Capitulino
    monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1476 6f382ed2 Luiz Capitulino
1477 6f382ed2 Luiz Capitulino
    qobject_decref(data);
1478 6f382ed2 Luiz Capitulino
}
1479 6f382ed2 Luiz Capitulino
1480 7d4b4ba5 Markus Armbruster
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
1481 0e49de52 Markus Armbruster
{
1482 145feb17 Markus Armbruster
    if (bs->dev_ops && bs->dev_ops->change_media_cb) {
1483 6f382ed2 Luiz Capitulino
        bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
1484 7d4b4ba5 Markus Armbruster
        bs->dev_ops->change_media_cb(bs->dev_opaque, load);
1485 6f382ed2 Luiz Capitulino
        if (tray_was_closed) {
1486 6f382ed2 Luiz Capitulino
            /* tray open */
1487 6f382ed2 Luiz Capitulino
            bdrv_emit_qmp_eject_event(bs, true);
1488 6f382ed2 Luiz Capitulino
        }
1489 6f382ed2 Luiz Capitulino
        if (load) {
1490 6f382ed2 Luiz Capitulino
            /* tray close */
1491 6f382ed2 Luiz Capitulino
            bdrv_emit_qmp_eject_event(bs, false);
1492 6f382ed2 Luiz Capitulino
        }
1493 145feb17 Markus Armbruster
    }
1494 145feb17 Markus Armbruster
}
1495 145feb17 Markus Armbruster
1496 2c6942fa Markus Armbruster
bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1497 2c6942fa Markus Armbruster
{
1498 2c6942fa Markus Armbruster
    return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1499 2c6942fa Markus Armbruster
}
1500 2c6942fa Markus Armbruster
1501 025ccaa7 Paolo Bonzini
void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1502 025ccaa7 Paolo Bonzini
{
1503 025ccaa7 Paolo Bonzini
    if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1504 025ccaa7 Paolo Bonzini
        bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1505 025ccaa7 Paolo Bonzini
    }
1506 025ccaa7 Paolo Bonzini
}
1507 025ccaa7 Paolo Bonzini
1508 e4def80b Markus Armbruster
bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1509 e4def80b Markus Armbruster
{
1510 e4def80b Markus Armbruster
    if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1511 e4def80b Markus Armbruster
        return bs->dev_ops->is_tray_open(bs->dev_opaque);
1512 e4def80b Markus Armbruster
    }
1513 e4def80b Markus Armbruster
    return false;
1514 e4def80b Markus Armbruster
}
1515 e4def80b Markus Armbruster
1516 145feb17 Markus Armbruster
static void bdrv_dev_resize_cb(BlockDriverState *bs)
1517 145feb17 Markus Armbruster
{
1518 145feb17 Markus Armbruster
    if (bs->dev_ops && bs->dev_ops->resize_cb) {
1519 145feb17 Markus Armbruster
        bs->dev_ops->resize_cb(bs->dev_opaque);
1520 0e49de52 Markus Armbruster
    }
1521 0e49de52 Markus Armbruster
}
1522 0e49de52 Markus Armbruster
1523 f107639a Markus Armbruster
bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1524 f107639a Markus Armbruster
{
1525 f107639a Markus Armbruster
    if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1526 f107639a Markus Armbruster
        return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1527 f107639a Markus Armbruster
    }
1528 f107639a Markus Armbruster
    return false;
1529 f107639a Markus Armbruster
}
1530 f107639a Markus Armbruster
1531 e97fc193 aliguori
/*
1532 e97fc193 aliguori
 * Run consistency checks on an image
1533 e97fc193 aliguori
 *
1534 e076f338 Kevin Wolf
 * Returns 0 if the check could be completed (it doesn't mean that the image is
1535 a1c7273b Stefan Weil
 * free of errors) or -errno when an internal error occurred. The results of the
1536 e076f338 Kevin Wolf
 * check are stored in res.
1537 e97fc193 aliguori
 */
1538 4534ff54 Kevin Wolf
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
1539 e97fc193 aliguori
{
1540 e97fc193 aliguori
    if (bs->drv->bdrv_check == NULL) {
1541 e97fc193 aliguori
        return -ENOTSUP;
1542 e97fc193 aliguori
    }
1543 e97fc193 aliguori
1544 e076f338 Kevin Wolf
    memset(res, 0, sizeof(*res));
1545 4534ff54 Kevin Wolf
    return bs->drv->bdrv_check(bs, res, fix);
1546 e97fc193 aliguori
}
1547 e97fc193 aliguori
1548 8a426614 Kevin Wolf
#define COMMIT_BUF_SECTORS 2048
1549 8a426614 Kevin Wolf
1550 33e3963e bellard
/* commit COW file into the raw image */
1551 33e3963e bellard
int bdrv_commit(BlockDriverState *bs)
1552 33e3963e bellard
{
1553 19cb3738 bellard
    BlockDriver *drv = bs->drv;
1554 8a426614 Kevin Wolf
    int64_t sector, total_sectors;
1555 8a426614 Kevin Wolf
    int n, ro, open_flags;
1556 0bce597d Jeff Cody
    int ret = 0;
1557 8a426614 Kevin Wolf
    uint8_t *buf;
1558 c2cba3d9 Jim Meyering
    char filename[PATH_MAX];
1559 33e3963e bellard
1560 19cb3738 bellard
    if (!drv)
1561 19cb3738 bellard
        return -ENOMEDIUM;
1562 4dca4b63 Naphtali Sprei
    
1563 4dca4b63 Naphtali Sprei
    if (!bs->backing_hd) {
1564 4dca4b63 Naphtali Sprei
        return -ENOTSUP;
1565 33e3963e bellard
    }
1566 33e3963e bellard
1567 2d3735d3 Stefan Hajnoczi
    if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1568 2d3735d3 Stefan Hajnoczi
        return -EBUSY;
1569 2d3735d3 Stefan Hajnoczi
    }
1570 2d3735d3 Stefan Hajnoczi
1571 4dca4b63 Naphtali Sprei
    ro = bs->backing_hd->read_only;
1572 c2cba3d9 Jim Meyering
    /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
1573 c2cba3d9 Jim Meyering
    pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
1574 4dca4b63 Naphtali Sprei
    open_flags =  bs->backing_hd->open_flags;
1575 4dca4b63 Naphtali Sprei
1576 4dca4b63 Naphtali Sprei
    if (ro) {
1577 0bce597d Jeff Cody
        if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
1578 0bce597d Jeff Cody
            return -EACCES;
1579 4dca4b63 Naphtali Sprei
        }
1580 ea2384d3 bellard
    }
1581 33e3963e bellard
1582 6ea44308 Jan Kiszka
    total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
1583 7267c094 Anthony Liguori
    buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
1584 8a426614 Kevin Wolf
1585 8a426614 Kevin Wolf
    for (sector = 0; sector < total_sectors; sector += n) {
1586 05c4af54 Stefan Hajnoczi
        if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
1587 8a426614 Kevin Wolf
1588 8a426614 Kevin Wolf
            if (bdrv_read(bs, sector, buf, n) != 0) {
1589 8a426614 Kevin Wolf
                ret = -EIO;
1590 8a426614 Kevin Wolf
                goto ro_cleanup;
1591 8a426614 Kevin Wolf
            }
1592 8a426614 Kevin Wolf
1593 8a426614 Kevin Wolf
            if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1594 8a426614 Kevin Wolf
                ret = -EIO;
1595 8a426614 Kevin Wolf
                goto ro_cleanup;
1596 8a426614 Kevin Wolf
            }
1597 ea2384d3 bellard
        }
1598 33e3963e bellard
    }
1599 95389c86 bellard
1600 1d44952f Christoph Hellwig
    if (drv->bdrv_make_empty) {
1601 1d44952f Christoph Hellwig
        ret = drv->bdrv_make_empty(bs);
1602 1d44952f Christoph Hellwig
        bdrv_flush(bs);
1603 1d44952f Christoph Hellwig
    }
1604 95389c86 bellard
1605 3f5075ae Christoph Hellwig
    /*
1606 3f5075ae Christoph Hellwig
     * Make sure all data we wrote to the backing device is actually
1607 3f5075ae Christoph Hellwig
     * stable on disk.
1608 3f5075ae Christoph Hellwig
     */
1609 3f5075ae Christoph Hellwig
    if (bs->backing_hd)
1610 3f5075ae Christoph Hellwig
        bdrv_flush(bs->backing_hd);
1611 4dca4b63 Naphtali Sprei
1612 4dca4b63 Naphtali Sprei
ro_cleanup:
1613 7267c094 Anthony Liguori
    g_free(buf);
1614 4dca4b63 Naphtali Sprei
1615 4dca4b63 Naphtali Sprei
    if (ro) {
1616 0bce597d Jeff Cody
        /* ignoring error return here */
1617 0bce597d Jeff Cody
        bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
1618 4dca4b63 Naphtali Sprei
    }
1619 4dca4b63 Naphtali Sprei
1620 1d44952f Christoph Hellwig
    return ret;
1621 33e3963e bellard
}
1622 33e3963e bellard
1623 e8877497 Stefan Hajnoczi
int bdrv_commit_all(void)
1624 6ab4b5ab Markus Armbruster
{
1625 6ab4b5ab Markus Armbruster
    BlockDriverState *bs;
1626 6ab4b5ab Markus Armbruster
1627 6ab4b5ab Markus Armbruster
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1628 e8877497 Stefan Hajnoczi
        int ret = bdrv_commit(bs);
1629 e8877497 Stefan Hajnoczi
        if (ret < 0) {
1630 e8877497 Stefan Hajnoczi
            return ret;
1631 e8877497 Stefan Hajnoczi
        }
1632 6ab4b5ab Markus Armbruster
    }
1633 e8877497 Stefan Hajnoczi
    return 0;
1634 6ab4b5ab Markus Armbruster
}
1635 6ab4b5ab Markus Armbruster
1636 dbffbdcf Stefan Hajnoczi
struct BdrvTrackedRequest {
1637 dbffbdcf Stefan Hajnoczi
    BlockDriverState *bs;
1638 dbffbdcf Stefan Hajnoczi
    int64_t sector_num;
1639 dbffbdcf Stefan Hajnoczi
    int nb_sectors;
1640 dbffbdcf Stefan Hajnoczi
    bool is_write;
1641 dbffbdcf Stefan Hajnoczi
    QLIST_ENTRY(BdrvTrackedRequest) list;
1642 5f8b6491 Stefan Hajnoczi
    Coroutine *co; /* owner, used for deadlock detection */
1643 f4658285 Stefan Hajnoczi
    CoQueue wait_queue; /* coroutines blocked on this request */
1644 dbffbdcf Stefan Hajnoczi
};
1645 dbffbdcf Stefan Hajnoczi
1646 dbffbdcf Stefan Hajnoczi
/**
1647 dbffbdcf Stefan Hajnoczi
 * Remove an active request from the tracked requests list
1648 dbffbdcf Stefan Hajnoczi
 *
1649 dbffbdcf Stefan Hajnoczi
 * This function should be called when a tracked request is completing.
1650 dbffbdcf Stefan Hajnoczi
 */
1651 dbffbdcf Stefan Hajnoczi
static void tracked_request_end(BdrvTrackedRequest *req)
1652 dbffbdcf Stefan Hajnoczi
{
1653 dbffbdcf Stefan Hajnoczi
    QLIST_REMOVE(req, list);
1654 f4658285 Stefan Hajnoczi
    qemu_co_queue_restart_all(&req->wait_queue);
1655 dbffbdcf Stefan Hajnoczi
}
1656 dbffbdcf Stefan Hajnoczi
1657 dbffbdcf Stefan Hajnoczi
/**
1658 dbffbdcf Stefan Hajnoczi
 * Add an active request to the tracked requests list
1659 dbffbdcf Stefan Hajnoczi
 */
1660 dbffbdcf Stefan Hajnoczi
static void tracked_request_begin(BdrvTrackedRequest *req,
1661 dbffbdcf Stefan Hajnoczi
                                  BlockDriverState *bs,
1662 dbffbdcf Stefan Hajnoczi
                                  int64_t sector_num,
1663 dbffbdcf Stefan Hajnoczi
                                  int nb_sectors, bool is_write)
1664 dbffbdcf Stefan Hajnoczi
{
1665 dbffbdcf Stefan Hajnoczi
    *req = (BdrvTrackedRequest){
1666 dbffbdcf Stefan Hajnoczi
        .bs = bs,
1667 dbffbdcf Stefan Hajnoczi
        .sector_num = sector_num,
1668 dbffbdcf Stefan Hajnoczi
        .nb_sectors = nb_sectors,
1669 dbffbdcf Stefan Hajnoczi
        .is_write = is_write,
1670 5f8b6491 Stefan Hajnoczi
        .co = qemu_coroutine_self(),
1671 dbffbdcf Stefan Hajnoczi
    };
1672 dbffbdcf Stefan Hajnoczi
1673 f4658285 Stefan Hajnoczi
    qemu_co_queue_init(&req->wait_queue);
1674 f4658285 Stefan Hajnoczi
1675 dbffbdcf Stefan Hajnoczi
    QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1676 dbffbdcf Stefan Hajnoczi
}
1677 dbffbdcf Stefan Hajnoczi
1678 d83947ac Stefan Hajnoczi
/**
1679 d83947ac Stefan Hajnoczi
 * Round a region to cluster boundaries
1680 d83947ac Stefan Hajnoczi
 */
1681 d83947ac Stefan Hajnoczi
static void round_to_clusters(BlockDriverState *bs,
1682 d83947ac Stefan Hajnoczi
                              int64_t sector_num, int nb_sectors,
1683 d83947ac Stefan Hajnoczi
                              int64_t *cluster_sector_num,
1684 d83947ac Stefan Hajnoczi
                              int *cluster_nb_sectors)
1685 d83947ac Stefan Hajnoczi
{
1686 d83947ac Stefan Hajnoczi
    BlockDriverInfo bdi;
1687 d83947ac Stefan Hajnoczi
1688 d83947ac Stefan Hajnoczi
    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1689 d83947ac Stefan Hajnoczi
        *cluster_sector_num = sector_num;
1690 d83947ac Stefan Hajnoczi
        *cluster_nb_sectors = nb_sectors;
1691 d83947ac Stefan Hajnoczi
    } else {
1692 d83947ac Stefan Hajnoczi
        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1693 d83947ac Stefan Hajnoczi
        *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1694 d83947ac Stefan Hajnoczi
        *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1695 d83947ac Stefan Hajnoczi
                                            nb_sectors, c);
1696 d83947ac Stefan Hajnoczi
    }
1697 d83947ac Stefan Hajnoczi
}
1698 d83947ac Stefan Hajnoczi
1699 f4658285 Stefan Hajnoczi
static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1700 f4658285 Stefan Hajnoczi
                                     int64_t sector_num, int nb_sectors) {
1701 d83947ac Stefan Hajnoczi
    /*        aaaa   bbbb */
1702 d83947ac Stefan Hajnoczi
    if (sector_num >= req->sector_num + req->nb_sectors) {
1703 d83947ac Stefan Hajnoczi
        return false;
1704 d83947ac Stefan Hajnoczi
    }
1705 d83947ac Stefan Hajnoczi
    /* bbbb   aaaa        */
1706 d83947ac Stefan Hajnoczi
    if (req->sector_num >= sector_num + nb_sectors) {
1707 d83947ac Stefan Hajnoczi
        return false;
1708 d83947ac Stefan Hajnoczi
    }
1709 d83947ac Stefan Hajnoczi
    return true;
1710 f4658285 Stefan Hajnoczi
}
1711 f4658285 Stefan Hajnoczi
1712 f4658285 Stefan Hajnoczi
static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1713 f4658285 Stefan Hajnoczi
        int64_t sector_num, int nb_sectors)
1714 f4658285 Stefan Hajnoczi
{
1715 f4658285 Stefan Hajnoczi
    BdrvTrackedRequest *req;
1716 d83947ac Stefan Hajnoczi
    int64_t cluster_sector_num;
1717 d83947ac Stefan Hajnoczi
    int cluster_nb_sectors;
1718 f4658285 Stefan Hajnoczi
    bool retry;
1719 f4658285 Stefan Hajnoczi
1720 d83947ac Stefan Hajnoczi
    /* If we touch the same cluster it counts as an overlap.  This guarantees
1721 d83947ac Stefan Hajnoczi
     * that allocating writes will be serialized and not race with each other
1722 d83947ac Stefan Hajnoczi
     * for the same cluster.  For example, in copy-on-read it ensures that the
1723 d83947ac Stefan Hajnoczi
     * CoR read and write operations are atomic and guest writes cannot
1724 d83947ac Stefan Hajnoczi
     * interleave between them.
1725 d83947ac Stefan Hajnoczi
     */
1726 d83947ac Stefan Hajnoczi
    round_to_clusters(bs, sector_num, nb_sectors,
1727 d83947ac Stefan Hajnoczi
                      &cluster_sector_num, &cluster_nb_sectors);
1728 d83947ac Stefan Hajnoczi
1729 f4658285 Stefan Hajnoczi
    do {
1730 f4658285 Stefan Hajnoczi
        retry = false;
1731 f4658285 Stefan Hajnoczi
        QLIST_FOREACH(req, &bs->tracked_requests, list) {
1732 d83947ac Stefan Hajnoczi
            if (tracked_request_overlaps(req, cluster_sector_num,
1733 d83947ac Stefan Hajnoczi
                                         cluster_nb_sectors)) {
1734 5f8b6491 Stefan Hajnoczi
                /* Hitting this means there was a reentrant request, for
1735 5f8b6491 Stefan Hajnoczi
                 * example, a block driver issuing nested requests.  This must
1736 5f8b6491 Stefan Hajnoczi
                 * never happen since it means deadlock.
1737 5f8b6491 Stefan Hajnoczi
                 */
1738 5f8b6491 Stefan Hajnoczi
                assert(qemu_coroutine_self() != req->co);
1739 5f8b6491 Stefan Hajnoczi
1740 f4658285 Stefan Hajnoczi
                qemu_co_queue_wait(&req->wait_queue);
1741 f4658285 Stefan Hajnoczi
                retry = true;
1742 f4658285 Stefan Hajnoczi
                break;
1743 f4658285 Stefan Hajnoczi
            }
1744 f4658285 Stefan Hajnoczi
        }
1745 f4658285 Stefan Hajnoczi
    } while (retry);
1746 f4658285 Stefan Hajnoczi
}
1747 f4658285 Stefan Hajnoczi
1748 756e6736 Kevin Wolf
/*
1749 756e6736 Kevin Wolf
 * Return values:
1750 756e6736 Kevin Wolf
 * 0        - success
1751 756e6736 Kevin Wolf
 * -EINVAL  - backing format specified, but no file
1752 756e6736 Kevin Wolf
 * -ENOSPC  - can't update the backing file because no space is left in the
1753 756e6736 Kevin Wolf
 *            image file header
1754 756e6736 Kevin Wolf
 * -ENOTSUP - format driver doesn't support changing the backing file
1755 756e6736 Kevin Wolf
 */
1756 756e6736 Kevin Wolf
int bdrv_change_backing_file(BlockDriverState *bs,
1757 756e6736 Kevin Wolf
    const char *backing_file, const char *backing_fmt)
1758 756e6736 Kevin Wolf
{
1759 756e6736 Kevin Wolf
    BlockDriver *drv = bs->drv;
1760 469ef350 Paolo Bonzini
    int ret;
1761 756e6736 Kevin Wolf
1762 5f377794 Paolo Bonzini
    /* Backing file format doesn't make sense without a backing file */
1763 5f377794 Paolo Bonzini
    if (backing_fmt && !backing_file) {
1764 5f377794 Paolo Bonzini
        return -EINVAL;
1765 5f377794 Paolo Bonzini
    }
1766 5f377794 Paolo Bonzini
1767 756e6736 Kevin Wolf
    if (drv->bdrv_change_backing_file != NULL) {
1768 469ef350 Paolo Bonzini
        ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1769 756e6736 Kevin Wolf
    } else {
1770 469ef350 Paolo Bonzini
        ret = -ENOTSUP;
1771 756e6736 Kevin Wolf
    }
1772 469ef350 Paolo Bonzini
1773 469ef350 Paolo Bonzini
    if (ret == 0) {
1774 469ef350 Paolo Bonzini
        pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1775 469ef350 Paolo Bonzini
        pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1776 469ef350 Paolo Bonzini
    }
1777 469ef350 Paolo Bonzini
    return ret;
1778 756e6736 Kevin Wolf
}
1779 756e6736 Kevin Wolf
1780 6ebdcee2 Jeff Cody
/*
1781 6ebdcee2 Jeff Cody
 * Finds the image layer in the chain that has 'bs' as its backing file.
1782 6ebdcee2 Jeff Cody
 *
1783 6ebdcee2 Jeff Cody
 * active is the current topmost image.
1784 6ebdcee2 Jeff Cody
 *
1785 6ebdcee2 Jeff Cody
 * Returns NULL if bs is not found in active's image chain,
1786 6ebdcee2 Jeff Cody
 * or if active == bs.
1787 6ebdcee2 Jeff Cody
 */
1788 6ebdcee2 Jeff Cody
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
1789 6ebdcee2 Jeff Cody
                                    BlockDriverState *bs)
1790 6ebdcee2 Jeff Cody
{
1791 6ebdcee2 Jeff Cody
    BlockDriverState *overlay = NULL;
1792 6ebdcee2 Jeff Cody
    BlockDriverState *intermediate;
1793 6ebdcee2 Jeff Cody
1794 6ebdcee2 Jeff Cody
    assert(active != NULL);
1795 6ebdcee2 Jeff Cody
    assert(bs != NULL);
1796 6ebdcee2 Jeff Cody
1797 6ebdcee2 Jeff Cody
    /* if bs is the same as active, then by definition it has no overlay
1798 6ebdcee2 Jeff Cody
     */
1799 6ebdcee2 Jeff Cody
    if (active == bs) {
1800 6ebdcee2 Jeff Cody
        return NULL;
1801 6ebdcee2 Jeff Cody
    }
1802 6ebdcee2 Jeff Cody
1803 6ebdcee2 Jeff Cody
    intermediate = active;
1804 6ebdcee2 Jeff Cody
    while (intermediate->backing_hd) {
1805 6ebdcee2 Jeff Cody
        if (intermediate->backing_hd == bs) {
1806 6ebdcee2 Jeff Cody
            overlay = intermediate;
1807 6ebdcee2 Jeff Cody
            break;
1808 6ebdcee2 Jeff Cody
        }
1809 6ebdcee2 Jeff Cody
        intermediate = intermediate->backing_hd;
1810 6ebdcee2 Jeff Cody
    }
1811 6ebdcee2 Jeff Cody
1812 6ebdcee2 Jeff Cody
    return overlay;
1813 6ebdcee2 Jeff Cody
}
1814 6ebdcee2 Jeff Cody
1815 6ebdcee2 Jeff Cody
typedef struct BlkIntermediateStates {
1816 6ebdcee2 Jeff Cody
    BlockDriverState *bs;
1817 6ebdcee2 Jeff Cody
    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
1818 6ebdcee2 Jeff Cody
} BlkIntermediateStates;
1819 6ebdcee2 Jeff Cody
1820 6ebdcee2 Jeff Cody
1821 6ebdcee2 Jeff Cody
/*
1822 6ebdcee2 Jeff Cody
 * Drops images above 'base' up to and including 'top', and sets the image
1823 6ebdcee2 Jeff Cody
 * above 'top' to have base as its backing file.
1824 6ebdcee2 Jeff Cody
 *
1825 6ebdcee2 Jeff Cody
 * Requires that the overlay to 'top' is opened r/w, so that the backing file
1826 6ebdcee2 Jeff Cody
 * information in 'bs' can be properly updated.
1827 6ebdcee2 Jeff Cody
 *
1828 6ebdcee2 Jeff Cody
 * E.g., this will convert the following chain:
1829 6ebdcee2 Jeff Cody
 * bottom <- base <- intermediate <- top <- active
1830 6ebdcee2 Jeff Cody
 *
1831 6ebdcee2 Jeff Cody
 * to
1832 6ebdcee2 Jeff Cody
 *
1833 6ebdcee2 Jeff Cody
 * bottom <- base <- active
1834 6ebdcee2 Jeff Cody
 *
1835 6ebdcee2 Jeff Cody
 * It is allowed for bottom==base, in which case it converts:
1836 6ebdcee2 Jeff Cody
 *
1837 6ebdcee2 Jeff Cody
 * base <- intermediate <- top <- active
1838 6ebdcee2 Jeff Cody
 *
1839 6ebdcee2 Jeff Cody
 * to
1840 6ebdcee2 Jeff Cody
 *
1841 6ebdcee2 Jeff Cody
 * base <- active
1842 6ebdcee2 Jeff Cody
 *
1843 6ebdcee2 Jeff Cody
 * Error conditions:
1844 6ebdcee2 Jeff Cody
 *  if active == top, that is considered an error
1845 6ebdcee2 Jeff Cody
 *
1846 6ebdcee2 Jeff Cody
 */
1847 6ebdcee2 Jeff Cody
int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
1848 6ebdcee2 Jeff Cody
                           BlockDriverState *base)
1849 6ebdcee2 Jeff Cody
{
1850 6ebdcee2 Jeff Cody
    BlockDriverState *intermediate;
1851 6ebdcee2 Jeff Cody
    BlockDriverState *base_bs = NULL;
1852 6ebdcee2 Jeff Cody
    BlockDriverState *new_top_bs = NULL;
1853 6ebdcee2 Jeff Cody
    BlkIntermediateStates *intermediate_state, *next;
1854 6ebdcee2 Jeff Cody
    int ret = -EIO;
1855 6ebdcee2 Jeff Cody
1856 6ebdcee2 Jeff Cody
    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
1857 6ebdcee2 Jeff Cody
    QSIMPLEQ_INIT(&states_to_delete);
1858 6ebdcee2 Jeff Cody
1859 6ebdcee2 Jeff Cody
    if (!top->drv || !base->drv) {
1860 6ebdcee2 Jeff Cody
        goto exit;
1861 6ebdcee2 Jeff Cody
    }
1862 6ebdcee2 Jeff Cody
1863 6ebdcee2 Jeff Cody
    new_top_bs = bdrv_find_overlay(active, top);
1864 6ebdcee2 Jeff Cody
1865 6ebdcee2 Jeff Cody
    if (new_top_bs == NULL) {
1866 6ebdcee2 Jeff Cody
        /* we could not find the image above 'top', this is an error */
1867 6ebdcee2 Jeff Cody
        goto exit;
1868 6ebdcee2 Jeff Cody
    }
1869 6ebdcee2 Jeff Cody
1870 6ebdcee2 Jeff Cody
    /* special case of new_top_bs->backing_hd already pointing to base - nothing
1871 6ebdcee2 Jeff Cody
     * to do, no intermediate images */
1872 6ebdcee2 Jeff Cody
    if (new_top_bs->backing_hd == base) {
1873 6ebdcee2 Jeff Cody
        ret = 0;
1874 6ebdcee2 Jeff Cody
        goto exit;
1875 6ebdcee2 Jeff Cody
    }
1876 6ebdcee2 Jeff Cody
1877 6ebdcee2 Jeff Cody
    intermediate = top;
1878 6ebdcee2 Jeff Cody
1879 6ebdcee2 Jeff Cody
    /* now we will go down through the list, and add each BDS we find
1880 6ebdcee2 Jeff Cody
     * into our deletion queue, until we hit the 'base'
1881 6ebdcee2 Jeff Cody
     */
1882 6ebdcee2 Jeff Cody
    while (intermediate) {
1883 6ebdcee2 Jeff Cody
        intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
1884 6ebdcee2 Jeff Cody
        intermediate_state->bs = intermediate;
1885 6ebdcee2 Jeff Cody
        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
1886 6ebdcee2 Jeff Cody
1887 6ebdcee2 Jeff Cody
        if (intermediate->backing_hd == base) {
1888 6ebdcee2 Jeff Cody
            base_bs = intermediate->backing_hd;
1889 6ebdcee2 Jeff Cody
            break;
1890 6ebdcee2 Jeff Cody
        }
1891 6ebdcee2 Jeff Cody
        intermediate = intermediate->backing_hd;
1892 6ebdcee2 Jeff Cody
    }
1893 6ebdcee2 Jeff Cody
    if (base_bs == NULL) {
1894 6ebdcee2 Jeff Cody
        /* something went wrong, we did not end at the base. safely
1895 6ebdcee2 Jeff Cody
         * unravel everything, and exit with error */
1896 6ebdcee2 Jeff Cody
        goto exit;
1897 6ebdcee2 Jeff Cody
    }
1898 6ebdcee2 Jeff Cody
1899 6ebdcee2 Jeff Cody
    /* success - we can delete the intermediate states, and link top->base */
1900 6ebdcee2 Jeff Cody
    ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
1901 6ebdcee2 Jeff Cody
                                   base_bs->drv ? base_bs->drv->format_name : "");
1902 6ebdcee2 Jeff Cody
    if (ret) {
1903 6ebdcee2 Jeff Cody
        goto exit;
1904 6ebdcee2 Jeff Cody
    }
1905 6ebdcee2 Jeff Cody
    new_top_bs->backing_hd = base_bs;
1906 6ebdcee2 Jeff Cody
1907 6ebdcee2 Jeff Cody
1908 6ebdcee2 Jeff Cody
    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
1909 6ebdcee2 Jeff Cody
        /* so that bdrv_close() does not recursively close the chain */
1910 6ebdcee2 Jeff Cody
        intermediate_state->bs->backing_hd = NULL;
1911 6ebdcee2 Jeff Cody
        bdrv_delete(intermediate_state->bs);
1912 6ebdcee2 Jeff Cody
    }
1913 6ebdcee2 Jeff Cody
    ret = 0;
1914 6ebdcee2 Jeff Cody
1915 6ebdcee2 Jeff Cody
exit:
1916 6ebdcee2 Jeff Cody
    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
1917 6ebdcee2 Jeff Cody
        g_free(intermediate_state);
1918 6ebdcee2 Jeff Cody
    }
1919 6ebdcee2 Jeff Cody
    return ret;
1920 6ebdcee2 Jeff Cody
}
1921 6ebdcee2 Jeff Cody
1922 6ebdcee2 Jeff Cody
1923 71d0770c aliguori
static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1924 71d0770c aliguori
                                   size_t size)
1925 71d0770c aliguori
{
1926 71d0770c aliguori
    int64_t len;
1927 71d0770c aliguori
1928 71d0770c aliguori
    if (!bdrv_is_inserted(bs))
1929 71d0770c aliguori
        return -ENOMEDIUM;
1930 71d0770c aliguori
1931 71d0770c aliguori
    if (bs->growable)
1932 71d0770c aliguori
        return 0;
1933 71d0770c aliguori
1934 71d0770c aliguori
    len = bdrv_getlength(bs);
1935 71d0770c aliguori
1936 fbb7b4e0 Kevin Wolf
    if (offset < 0)
1937 fbb7b4e0 Kevin Wolf
        return -EIO;
1938 fbb7b4e0 Kevin Wolf
1939 fbb7b4e0 Kevin Wolf
    if ((offset > len) || (len - offset < size))
1940 71d0770c aliguori
        return -EIO;
1941 71d0770c aliguori
1942 71d0770c aliguori
    return 0;
1943 71d0770c aliguori
}
1944 71d0770c aliguori
1945 71d0770c aliguori
static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1946 71d0770c aliguori
                              int nb_sectors)
1947 71d0770c aliguori
{
1948 eb5a3165 Jes Sorensen
    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1949 eb5a3165 Jes Sorensen
                                   nb_sectors * BDRV_SECTOR_SIZE);
1950 71d0770c aliguori
}
1951 71d0770c aliguori
1952 1c9805a3 Stefan Hajnoczi
typedef struct RwCo {
1953 1c9805a3 Stefan Hajnoczi
    BlockDriverState *bs;
1954 1c9805a3 Stefan Hajnoczi
    int64_t sector_num;
1955 1c9805a3 Stefan Hajnoczi
    int nb_sectors;
1956 1c9805a3 Stefan Hajnoczi
    QEMUIOVector *qiov;
1957 1c9805a3 Stefan Hajnoczi
    bool is_write;
1958 1c9805a3 Stefan Hajnoczi
    int ret;
1959 1c9805a3 Stefan Hajnoczi
} RwCo;
1960 1c9805a3 Stefan Hajnoczi
1961 1c9805a3 Stefan Hajnoczi
static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1962 fc01f7e7 bellard
{
1963 1c9805a3 Stefan Hajnoczi
    RwCo *rwco = opaque;
1964 ea2384d3 bellard
1965 1c9805a3 Stefan Hajnoczi
    if (!rwco->is_write) {
1966 1c9805a3 Stefan Hajnoczi
        rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1967 470c0504 Stefan Hajnoczi
                                     rwco->nb_sectors, rwco->qiov, 0);
1968 1c9805a3 Stefan Hajnoczi
    } else {
1969 1c9805a3 Stefan Hajnoczi
        rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1970 f08f2dda Stefan Hajnoczi
                                      rwco->nb_sectors, rwco->qiov, 0);
1971 1c9805a3 Stefan Hajnoczi
    }
1972 1c9805a3 Stefan Hajnoczi
}
1973 e7a8a783 Kevin Wolf
1974 1c9805a3 Stefan Hajnoczi
/*
1975 1c9805a3 Stefan Hajnoczi
 * Process a synchronous request using coroutines
1976 1c9805a3 Stefan Hajnoczi
 */
1977 1c9805a3 Stefan Hajnoczi
static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1978 1c9805a3 Stefan Hajnoczi
                      int nb_sectors, bool is_write)
1979 1c9805a3 Stefan Hajnoczi
{
1980 1c9805a3 Stefan Hajnoczi
    QEMUIOVector qiov;
1981 1c9805a3 Stefan Hajnoczi
    struct iovec iov = {
1982 1c9805a3 Stefan Hajnoczi
        .iov_base = (void *)buf,
1983 1c9805a3 Stefan Hajnoczi
        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1984 1c9805a3 Stefan Hajnoczi
    };
1985 1c9805a3 Stefan Hajnoczi
    Coroutine *co;
1986 1c9805a3 Stefan Hajnoczi
    RwCo rwco = {
1987 1c9805a3 Stefan Hajnoczi
        .bs = bs,
1988 1c9805a3 Stefan Hajnoczi
        .sector_num = sector_num,
1989 1c9805a3 Stefan Hajnoczi
        .nb_sectors = nb_sectors,
1990 1c9805a3 Stefan Hajnoczi
        .qiov = &qiov,
1991 1c9805a3 Stefan Hajnoczi
        .is_write = is_write,
1992 1c9805a3 Stefan Hajnoczi
        .ret = NOT_DONE,
1993 1c9805a3 Stefan Hajnoczi
    };
1994 e7a8a783 Kevin Wolf
1995 1c9805a3 Stefan Hajnoczi
    qemu_iovec_init_external(&qiov, &iov, 1);
1996 e7a8a783 Kevin Wolf
1997 498e386c Zhi Yong Wu
    /**
1998 498e386c Zhi Yong Wu
     * In sync call context, when the vcpu is blocked, this throttling timer
1999 498e386c Zhi Yong Wu
     * will not fire; so the I/O throttling function has to be disabled here
2000 498e386c Zhi Yong Wu
     * if it has been enabled.
2001 498e386c Zhi Yong Wu
     */
2002 498e386c Zhi Yong Wu
    if (bs->io_limits_enabled) {
2003 498e386c Zhi Yong Wu
        fprintf(stderr, "Disabling I/O throttling on '%s' due "
2004 498e386c Zhi Yong Wu
                        "to synchronous I/O.\n", bdrv_get_device_name(bs));
2005 498e386c Zhi Yong Wu
        bdrv_io_limits_disable(bs);
2006 498e386c Zhi Yong Wu
    }
2007 498e386c Zhi Yong Wu
2008 1c9805a3 Stefan Hajnoczi
    if (qemu_in_coroutine()) {
2009 1c9805a3 Stefan Hajnoczi
        /* Fast-path if already in coroutine context */
2010 1c9805a3 Stefan Hajnoczi
        bdrv_rw_co_entry(&rwco);
2011 1c9805a3 Stefan Hajnoczi
    } else {
2012 1c9805a3 Stefan Hajnoczi
        co = qemu_coroutine_create(bdrv_rw_co_entry);
2013 1c9805a3 Stefan Hajnoczi
        qemu_coroutine_enter(co, &rwco);
2014 1c9805a3 Stefan Hajnoczi
        while (rwco.ret == NOT_DONE) {
2015 1c9805a3 Stefan Hajnoczi
            qemu_aio_wait();
2016 1c9805a3 Stefan Hajnoczi
        }
2017 1c9805a3 Stefan Hajnoczi
    }
2018 1c9805a3 Stefan Hajnoczi
    return rwco.ret;
2019 1c9805a3 Stefan Hajnoczi
}
2020 b338082b bellard
2021 1c9805a3 Stefan Hajnoczi
/* return < 0 if error. See bdrv_write() for the return codes */
2022 1c9805a3 Stefan Hajnoczi
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
2023 1c9805a3 Stefan Hajnoczi
              uint8_t *buf, int nb_sectors)
2024 1c9805a3 Stefan Hajnoczi
{
2025 1c9805a3 Stefan Hajnoczi
    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
2026 fc01f7e7 bellard
}
2027 fc01f7e7 bellard
2028 07d27a44 Markus Armbruster
/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2029 07d27a44 Markus Armbruster
int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2030 07d27a44 Markus Armbruster
                          uint8_t *buf, int nb_sectors)
2031 07d27a44 Markus Armbruster
{
2032 07d27a44 Markus Armbruster
    bool enabled;
2033 07d27a44 Markus Armbruster
    int ret;
2034 07d27a44 Markus Armbruster
2035 07d27a44 Markus Armbruster
    enabled = bs->io_limits_enabled;
2036 07d27a44 Markus Armbruster
    bs->io_limits_enabled = false;
2037 07d27a44 Markus Armbruster
    ret = bdrv_read(bs, 0, buf, 1);
2038 07d27a44 Markus Armbruster
    bs->io_limits_enabled = enabled;
2039 07d27a44 Markus Armbruster
    return ret;
2040 07d27a44 Markus Armbruster
}
2041 07d27a44 Markus Armbruster
2042 71df14fc Paolo Bonzini
#define BITS_PER_LONG  (sizeof(unsigned long) * 8)
2043 71df14fc Paolo Bonzini
2044 7cd1e32a lirans@il.ibm.com
static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
2045 a55eb92c Jan Kiszka
                             int nb_sectors, int dirty)
2046 7cd1e32a lirans@il.ibm.com
{
2047 7cd1e32a lirans@il.ibm.com
    int64_t start, end;
2048 c6d22830 Jan Kiszka
    unsigned long val, idx, bit;
2049 a55eb92c Jan Kiszka
2050 6ea44308 Jan Kiszka
    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
2051 c6d22830 Jan Kiszka
    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
2052 a55eb92c Jan Kiszka
2053 a55eb92c Jan Kiszka
    for (; start <= end; start++) {
2054 71df14fc Paolo Bonzini
        idx = start / BITS_PER_LONG;
2055 71df14fc Paolo Bonzini
        bit = start % BITS_PER_LONG;
2056 c6d22830 Jan Kiszka
        val = bs->dirty_bitmap[idx];
2057 c6d22830 Jan Kiszka
        if (dirty) {
2058 6d59fec1 Marcelo Tosatti
            if (!(val & (1UL << bit))) {
2059 aaa0eb75 Liran Schour
                bs->dirty_count++;
2060 6d59fec1 Marcelo Tosatti
                val |= 1UL << bit;
2061 aaa0eb75 Liran Schour
            }
2062 c6d22830 Jan Kiszka
        } else {
2063 6d59fec1 Marcelo Tosatti
            if (val & (1UL << bit)) {
2064 aaa0eb75 Liran Schour
                bs->dirty_count--;
2065 6d59fec1 Marcelo Tosatti
                val &= ~(1UL << bit);
2066 aaa0eb75 Liran Schour
            }
2067 c6d22830 Jan Kiszka
        }
2068 c6d22830 Jan Kiszka
        bs->dirty_bitmap[idx] = val;
2069 7cd1e32a lirans@il.ibm.com
    }
2070 7cd1e32a lirans@il.ibm.com
}
2071 7cd1e32a lirans@il.ibm.com
2072 5fafdf24 ths
/* Return < 0 if error. Important errors are:
2073 19cb3738 bellard
  -EIO         generic I/O error (may happen for all errors)
2074 19cb3738 bellard
  -ENOMEDIUM   No media inserted.
2075 19cb3738 bellard
  -EINVAL      Invalid sector number or nb_sectors
2076 19cb3738 bellard
  -EACCES      Trying to write a read-only device
2077 19cb3738 bellard
*/
2078 5fafdf24 ths
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
2079 fc01f7e7 bellard
               const uint8_t *buf, int nb_sectors)
2080 fc01f7e7 bellard
{
2081 1c9805a3 Stefan Hajnoczi
    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
2082 83f64091 bellard
}
2083 83f64091 bellard
2084 eda578e5 aliguori
int bdrv_pread(BlockDriverState *bs, int64_t offset,
2085 eda578e5 aliguori
               void *buf, int count1)
2086 83f64091 bellard
{
2087 6ea44308 Jan Kiszka
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
2088 83f64091 bellard
    int len, nb_sectors, count;
2089 83f64091 bellard
    int64_t sector_num;
2090 9a8c4cce Kevin Wolf
    int ret;
2091 83f64091 bellard
2092 83f64091 bellard
    count = count1;
2093 83f64091 bellard
    /* first read to align to sector start */
2094 6ea44308 Jan Kiszka
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
2095 83f64091 bellard
    if (len > count)
2096 83f64091 bellard
        len = count;
2097 6ea44308 Jan Kiszka
    sector_num = offset >> BDRV_SECTOR_BITS;
2098 83f64091 bellard
    if (len > 0) {
2099 9a8c4cce Kevin Wolf
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2100 9a8c4cce Kevin Wolf
            return ret;
2101 6ea44308 Jan Kiszka
        memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
2102 83f64091 bellard
        count -= len;
2103 83f64091 bellard
        if (count == 0)
2104 83f64091 bellard
            return count1;
2105 83f64091 bellard
        sector_num++;
2106 83f64091 bellard
        buf += len;
2107 83f64091 bellard
    }
2108 83f64091 bellard
2109 83f64091 bellard
    /* read the sectors "in place" */
2110 6ea44308 Jan Kiszka
    nb_sectors = count >> BDRV_SECTOR_BITS;
2111 83f64091 bellard
    if (nb_sectors > 0) {
2112 9a8c4cce Kevin Wolf
        if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
2113 9a8c4cce Kevin Wolf
            return ret;
2114 83f64091 bellard
        sector_num += nb_sectors;
2115 6ea44308 Jan Kiszka
        len = nb_sectors << BDRV_SECTOR_BITS;
2116 83f64091 bellard
        buf += len;
2117 83f64091 bellard
        count -= len;
2118 83f64091 bellard
    }
2119 83f64091 bellard
2120 83f64091 bellard
    /* add data from the last sector */
2121 83f64091 bellard
    if (count > 0) {
2122 9a8c4cce Kevin Wolf
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2123 9a8c4cce Kevin Wolf
            return ret;
2124 83f64091 bellard
        memcpy(buf, tmp_buf, count);
2125 83f64091 bellard
    }
2126 83f64091 bellard
    return count1;
2127 83f64091 bellard
}
2128 83f64091 bellard
2129 eda578e5 aliguori
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
2130 eda578e5 aliguori
                const void *buf, int count1)
2131 83f64091 bellard
{
2132 6ea44308 Jan Kiszka
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
2133 83f64091 bellard
    int len, nb_sectors, count;
2134 83f64091 bellard
    int64_t sector_num;
2135 9a8c4cce Kevin Wolf
    int ret;
2136 83f64091 bellard
2137 83f64091 bellard
    count = count1;
2138 83f64091 bellard
    /* first write to align to sector start */
2139 6ea44308 Jan Kiszka
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
2140 83f64091 bellard
    if (len > count)
2141 83f64091 bellard
        len = count;
2142 6ea44308 Jan Kiszka
    sector_num = offset >> BDRV_SECTOR_BITS;
2143 83f64091 bellard
    if (len > 0) {
2144 9a8c4cce Kevin Wolf
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2145 9a8c4cce Kevin Wolf
            return ret;
2146 6ea44308 Jan Kiszka
        memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
2147 9a8c4cce Kevin Wolf
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
2148 9a8c4cce Kevin Wolf
            return ret;
2149 83f64091 bellard
        count -= len;
2150 83f64091 bellard
        if (count == 0)
2151 83f64091 bellard
            return count1;
2152 83f64091 bellard
        sector_num++;
2153 83f64091 bellard
        buf += len;
2154 83f64091 bellard
    }
2155 83f64091 bellard
2156 83f64091 bellard
    /* write the sectors "in place" */
2157 6ea44308 Jan Kiszka
    nb_sectors = count >> BDRV_SECTOR_BITS;
2158 83f64091 bellard
    if (nb_sectors > 0) {
2159 9a8c4cce Kevin Wolf
        if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
2160 9a8c4cce Kevin Wolf
            return ret;
2161 83f64091 bellard
        sector_num += nb_sectors;
2162 6ea44308 Jan Kiszka
        len = nb_sectors << BDRV_SECTOR_BITS;
2163 83f64091 bellard
        buf += len;
2164 83f64091 bellard
        count -= len;
2165 83f64091 bellard
    }
2166 83f64091 bellard
2167 83f64091 bellard
    /* add data from the last sector */
2168 83f64091 bellard
    if (count > 0) {
2169 9a8c4cce Kevin Wolf
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2170 9a8c4cce Kevin Wolf
            return ret;
2171 83f64091 bellard
        memcpy(tmp_buf, buf, count);
2172 9a8c4cce Kevin Wolf
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
2173 9a8c4cce Kevin Wolf
            return ret;
2174 83f64091 bellard
    }
2175 83f64091 bellard
    return count1;
2176 83f64091 bellard
}
2177 83f64091 bellard
2178 f08145fe Kevin Wolf
/*
2179 f08145fe Kevin Wolf
 * Writes to the file and ensures that no writes are reordered across this
2180 f08145fe Kevin Wolf
 * request (acts as a barrier)
2181 f08145fe Kevin Wolf
 *
2182 f08145fe Kevin Wolf
 * Returns 0 on success, -errno in error cases.
2183 f08145fe Kevin Wolf
 */
2184 f08145fe Kevin Wolf
int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2185 f08145fe Kevin Wolf
    const void *buf, int count)
2186 f08145fe Kevin Wolf
{
2187 f08145fe Kevin Wolf
    int ret;
2188 f08145fe Kevin Wolf
2189 f08145fe Kevin Wolf
    ret = bdrv_pwrite(bs, offset, buf, count);
2190 f08145fe Kevin Wolf
    if (ret < 0) {
2191 f08145fe Kevin Wolf
        return ret;
2192 f08145fe Kevin Wolf
    }
2193 f08145fe Kevin Wolf
2194 f05fa4ad Paolo Bonzini
    /* No flush needed for cache modes that already do it */
2195 f05fa4ad Paolo Bonzini
    if (bs->enable_write_cache) {
2196 f08145fe Kevin Wolf
        bdrv_flush(bs);
2197 f08145fe Kevin Wolf
    }
2198 f08145fe Kevin Wolf
2199 f08145fe Kevin Wolf
    return 0;
2200 f08145fe Kevin Wolf
}
2201 f08145fe Kevin Wolf
2202 470c0504 Stefan Hajnoczi
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
2203 ab185921 Stefan Hajnoczi
        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2204 ab185921 Stefan Hajnoczi
{
2205 ab185921 Stefan Hajnoczi
    /* Perform I/O through a temporary buffer so that users who scribble over
2206 ab185921 Stefan Hajnoczi
     * their read buffer while the operation is in progress do not end up
2207 ab185921 Stefan Hajnoczi
     * modifying the image file.  This is critical for zero-copy guest I/O
2208 ab185921 Stefan Hajnoczi
     * where anything might happen inside guest memory.
2209 ab185921 Stefan Hajnoczi
     */
2210 ab185921 Stefan Hajnoczi
    void *bounce_buffer;
2211 ab185921 Stefan Hajnoczi
2212 79c053bd Stefan Hajnoczi
    BlockDriver *drv = bs->drv;
2213 ab185921 Stefan Hajnoczi
    struct iovec iov;
2214 ab185921 Stefan Hajnoczi
    QEMUIOVector bounce_qiov;
2215 ab185921 Stefan Hajnoczi
    int64_t cluster_sector_num;
2216 ab185921 Stefan Hajnoczi
    int cluster_nb_sectors;
2217 ab185921 Stefan Hajnoczi
    size_t skip_bytes;
2218 ab185921 Stefan Hajnoczi
    int ret;
2219 ab185921 Stefan Hajnoczi
2220 ab185921 Stefan Hajnoczi
    /* Cover entire cluster so no additional backing file I/O is required when
2221 ab185921 Stefan Hajnoczi
     * allocating cluster in the image file.
2222 ab185921 Stefan Hajnoczi
     */
2223 ab185921 Stefan Hajnoczi
    round_to_clusters(bs, sector_num, nb_sectors,
2224 ab185921 Stefan Hajnoczi
                      &cluster_sector_num, &cluster_nb_sectors);
2225 ab185921 Stefan Hajnoczi
2226 470c0504 Stefan Hajnoczi
    trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2227 470c0504 Stefan Hajnoczi
                                   cluster_sector_num, cluster_nb_sectors);
2228 ab185921 Stefan Hajnoczi
2229 ab185921 Stefan Hajnoczi
    iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2230 ab185921 Stefan Hajnoczi
    iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2231 ab185921 Stefan Hajnoczi
    qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2232 ab185921 Stefan Hajnoczi
2233 79c053bd Stefan Hajnoczi
    ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2234 79c053bd Stefan Hajnoczi
                             &bounce_qiov);
2235 ab185921 Stefan Hajnoczi
    if (ret < 0) {
2236 ab185921 Stefan Hajnoczi
        goto err;
2237 ab185921 Stefan Hajnoczi
    }
2238 ab185921 Stefan Hajnoczi
2239 79c053bd Stefan Hajnoczi
    if (drv->bdrv_co_write_zeroes &&
2240 79c053bd Stefan Hajnoczi
        buffer_is_zero(bounce_buffer, iov.iov_len)) {
2241 621f0589 Kevin Wolf
        ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
2242 621f0589 Kevin Wolf
                                      cluster_nb_sectors);
2243 79c053bd Stefan Hajnoczi
    } else {
2244 f05fa4ad Paolo Bonzini
        /* This does not change the data on the disk, it is not necessary
2245 f05fa4ad Paolo Bonzini
         * to flush even in cache=writethrough mode.
2246 f05fa4ad Paolo Bonzini
         */
2247 79c053bd Stefan Hajnoczi
        ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
2248 ab185921 Stefan Hajnoczi
                                  &bounce_qiov);
2249 79c053bd Stefan Hajnoczi
    }
2250 79c053bd Stefan Hajnoczi
2251 ab185921 Stefan Hajnoczi
    if (ret < 0) {
2252 ab185921 Stefan Hajnoczi
        /* It might be okay to ignore write errors for guest requests.  If this
2253 ab185921 Stefan Hajnoczi
         * is a deliberate copy-on-read then we don't want to ignore the error.
2254 ab185921 Stefan Hajnoczi
         * Simply report it in all cases.
2255 ab185921 Stefan Hajnoczi
         */
2256 ab185921 Stefan Hajnoczi
        goto err;
2257 ab185921 Stefan Hajnoczi
    }
2258 ab185921 Stefan Hajnoczi
2259 ab185921 Stefan Hajnoczi
    skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
2260 03396148 Michael Tokarev
    qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2261 03396148 Michael Tokarev
                        nb_sectors * BDRV_SECTOR_SIZE);
2262 ab185921 Stefan Hajnoczi
2263 ab185921 Stefan Hajnoczi
err:
2264 ab185921 Stefan Hajnoczi
    qemu_vfree(bounce_buffer);
2265 ab185921 Stefan Hajnoczi
    return ret;
2266 ab185921 Stefan Hajnoczi
}
2267 ab185921 Stefan Hajnoczi
2268 c5fbe571 Stefan Hajnoczi
/*
2269 c5fbe571 Stefan Hajnoczi
 * Handle a read request in coroutine context
2270 c5fbe571 Stefan Hajnoczi
 */
2271 c5fbe571 Stefan Hajnoczi
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
2272 470c0504 Stefan Hajnoczi
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
2273 470c0504 Stefan Hajnoczi
    BdrvRequestFlags flags)
2274 da1fa91d Kevin Wolf
{
2275 da1fa91d Kevin Wolf
    BlockDriver *drv = bs->drv;
2276 dbffbdcf Stefan Hajnoczi
    BdrvTrackedRequest req;
2277 dbffbdcf Stefan Hajnoczi
    int ret;
2278 da1fa91d Kevin Wolf
2279 da1fa91d Kevin Wolf
    if (!drv) {
2280 da1fa91d Kevin Wolf
        return -ENOMEDIUM;
2281 da1fa91d Kevin Wolf
    }
2282 da1fa91d Kevin Wolf
    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2283 da1fa91d Kevin Wolf
        return -EIO;
2284 da1fa91d Kevin Wolf
    }
2285 da1fa91d Kevin Wolf
2286 98f90dba Zhi Yong Wu
    /* throttling disk read I/O */
2287 98f90dba Zhi Yong Wu
    if (bs->io_limits_enabled) {
2288 98f90dba Zhi Yong Wu
        bdrv_io_limits_intercept(bs, false, nb_sectors);
2289 98f90dba Zhi Yong Wu
    }
2290 98f90dba Zhi Yong Wu
2291 f4658285 Stefan Hajnoczi
    if (bs->copy_on_read) {
2292 470c0504 Stefan Hajnoczi
        flags |= BDRV_REQ_COPY_ON_READ;
2293 470c0504 Stefan Hajnoczi
    }
2294 470c0504 Stefan Hajnoczi
    if (flags & BDRV_REQ_COPY_ON_READ) {
2295 470c0504 Stefan Hajnoczi
        bs->copy_on_read_in_flight++;
2296 470c0504 Stefan Hajnoczi
    }
2297 470c0504 Stefan Hajnoczi
2298 470c0504 Stefan Hajnoczi
    if (bs->copy_on_read_in_flight) {
2299 f4658285 Stefan Hajnoczi
        wait_for_overlapping_requests(bs, sector_num, nb_sectors);
2300 f4658285 Stefan Hajnoczi
    }
2301 f4658285 Stefan Hajnoczi
2302 dbffbdcf Stefan Hajnoczi
    tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
2303 ab185921 Stefan Hajnoczi
2304 470c0504 Stefan Hajnoczi
    if (flags & BDRV_REQ_COPY_ON_READ) {
2305 ab185921 Stefan Hajnoczi
        int pnum;
2306 ab185921 Stefan Hajnoczi
2307 ab185921 Stefan Hajnoczi
        ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
2308 ab185921 Stefan Hajnoczi
        if (ret < 0) {
2309 ab185921 Stefan Hajnoczi
            goto out;
2310 ab185921 Stefan Hajnoczi
        }
2311 ab185921 Stefan Hajnoczi
2312 ab185921 Stefan Hajnoczi
        if (!ret || pnum != nb_sectors) {
2313 470c0504 Stefan Hajnoczi
            ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
2314 ab185921 Stefan Hajnoczi
            goto out;
2315 ab185921 Stefan Hajnoczi
        }
2316 ab185921 Stefan Hajnoczi
    }
2317 ab185921 Stefan Hajnoczi
2318 dbffbdcf Stefan Hajnoczi
    ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
2319 ab185921 Stefan Hajnoczi
2320 ab185921 Stefan Hajnoczi
out:
2321 dbffbdcf Stefan Hajnoczi
    tracked_request_end(&req);
2322 470c0504 Stefan Hajnoczi
2323 470c0504 Stefan Hajnoczi
    if (flags & BDRV_REQ_COPY_ON_READ) {
2324 470c0504 Stefan Hajnoczi
        bs->copy_on_read_in_flight--;
2325 470c0504 Stefan Hajnoczi
    }
2326 470c0504 Stefan Hajnoczi
2327 dbffbdcf Stefan Hajnoczi
    return ret;
2328 da1fa91d Kevin Wolf
}
2329 da1fa91d Kevin Wolf
2330 c5fbe571 Stefan Hajnoczi
int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
2331 da1fa91d Kevin Wolf
    int nb_sectors, QEMUIOVector *qiov)
2332 da1fa91d Kevin Wolf
{
2333 c5fbe571 Stefan Hajnoczi
    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
2334 da1fa91d Kevin Wolf
2335 470c0504 Stefan Hajnoczi
    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
2336 470c0504 Stefan Hajnoczi
}
2337 470c0504 Stefan Hajnoczi
2338 470c0504 Stefan Hajnoczi
int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
2339 470c0504 Stefan Hajnoczi
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2340 470c0504 Stefan Hajnoczi
{
2341 470c0504 Stefan Hajnoczi
    trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
2342 470c0504 Stefan Hajnoczi
2343 470c0504 Stefan Hajnoczi
    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
2344 470c0504 Stefan Hajnoczi
                            BDRV_REQ_COPY_ON_READ);
2345 c5fbe571 Stefan Hajnoczi
}
2346 c5fbe571 Stefan Hajnoczi
2347 f08f2dda Stefan Hajnoczi
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
2348 f08f2dda Stefan Hajnoczi
    int64_t sector_num, int nb_sectors)
2349 f08f2dda Stefan Hajnoczi
{
2350 f08f2dda Stefan Hajnoczi
    BlockDriver *drv = bs->drv;
2351 f08f2dda Stefan Hajnoczi
    QEMUIOVector qiov;
2352 f08f2dda Stefan Hajnoczi
    struct iovec iov;
2353 f08f2dda Stefan Hajnoczi
    int ret;
2354 f08f2dda Stefan Hajnoczi
2355 621f0589 Kevin Wolf
    /* TODO Emulate only part of misaligned requests instead of letting block
2356 621f0589 Kevin Wolf
     * drivers return -ENOTSUP and emulate everything */
2357 621f0589 Kevin Wolf
2358 f08f2dda Stefan Hajnoczi
    /* First try the efficient write zeroes operation */
2359 f08f2dda Stefan Hajnoczi
    if (drv->bdrv_co_write_zeroes) {
2360 621f0589 Kevin Wolf
        ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
2361 621f0589 Kevin Wolf
        if (ret != -ENOTSUP) {
2362 621f0589 Kevin Wolf
            return ret;
2363 621f0589 Kevin Wolf
        }
2364 f08f2dda Stefan Hajnoczi
    }
2365 f08f2dda Stefan Hajnoczi
2366 f08f2dda Stefan Hajnoczi
    /* Fall back to bounce buffer if write zeroes is unsupported */
2367 f08f2dda Stefan Hajnoczi
    iov.iov_len  = nb_sectors * BDRV_SECTOR_SIZE;
2368 f08f2dda Stefan Hajnoczi
    iov.iov_base = qemu_blockalign(bs, iov.iov_len);
2369 f08f2dda Stefan Hajnoczi
    memset(iov.iov_base, 0, iov.iov_len);
2370 f08f2dda Stefan Hajnoczi
    qemu_iovec_init_external(&qiov, &iov, 1);
2371 f08f2dda Stefan Hajnoczi
2372 f08f2dda Stefan Hajnoczi
    ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
2373 f08f2dda Stefan Hajnoczi
2374 f08f2dda Stefan Hajnoczi
    qemu_vfree(iov.iov_base);
2375 f08f2dda Stefan Hajnoczi
    return ret;
2376 f08f2dda Stefan Hajnoczi
}
2377 f08f2dda Stefan Hajnoczi
2378 c5fbe571 Stefan Hajnoczi
/*
2379 c5fbe571 Stefan Hajnoczi
 * Handle a write request in coroutine context
2380 c5fbe571 Stefan Hajnoczi
 */
2381 c5fbe571 Stefan Hajnoczi
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
2382 f08f2dda Stefan Hajnoczi
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
2383 f08f2dda Stefan Hajnoczi
    BdrvRequestFlags flags)
2384 c5fbe571 Stefan Hajnoczi
{
2385 c5fbe571 Stefan Hajnoczi
    BlockDriver *drv = bs->drv;
2386 dbffbdcf Stefan Hajnoczi
    BdrvTrackedRequest req;
2387 6b7cb247 Stefan Hajnoczi
    int ret;
2388 da1fa91d Kevin Wolf
2389 da1fa91d Kevin Wolf
    if (!bs->drv) {
2390 da1fa91d Kevin Wolf
        return -ENOMEDIUM;
2391 da1fa91d Kevin Wolf
    }
2392 da1fa91d Kevin Wolf
    if (bs->read_only) {
2393 da1fa91d Kevin Wolf
        return -EACCES;
2394 da1fa91d Kevin Wolf
    }
2395 da1fa91d Kevin Wolf
    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2396 da1fa91d Kevin Wolf
        return -EIO;
2397 da1fa91d Kevin Wolf
    }
2398 da1fa91d Kevin Wolf
2399 98f90dba Zhi Yong Wu
    /* throttling disk write I/O */
2400 98f90dba Zhi Yong Wu
    if (bs->io_limits_enabled) {
2401 98f90dba Zhi Yong Wu
        bdrv_io_limits_intercept(bs, true, nb_sectors);
2402 98f90dba Zhi Yong Wu
    }
2403 98f90dba Zhi Yong Wu
2404 470c0504 Stefan Hajnoczi
    if (bs->copy_on_read_in_flight) {
2405 f4658285 Stefan Hajnoczi
        wait_for_overlapping_requests(bs, sector_num, nb_sectors);
2406 f4658285 Stefan Hajnoczi
    }
2407 f4658285 Stefan Hajnoczi
2408 dbffbdcf Stefan Hajnoczi
    tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
2409 dbffbdcf Stefan Hajnoczi
2410 f08f2dda Stefan Hajnoczi
    if (flags & BDRV_REQ_ZERO_WRITE) {
2411 f08f2dda Stefan Hajnoczi
        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
2412 f08f2dda Stefan Hajnoczi
    } else {
2413 f08f2dda Stefan Hajnoczi
        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
2414 f08f2dda Stefan Hajnoczi
    }
2415 6b7cb247 Stefan Hajnoczi
2416 f05fa4ad Paolo Bonzini
    if (ret == 0 && !bs->enable_write_cache) {
2417 f05fa4ad Paolo Bonzini
        ret = bdrv_co_flush(bs);
2418 f05fa4ad Paolo Bonzini
    }
2419 f05fa4ad Paolo Bonzini
2420 da1fa91d Kevin Wolf
    if (bs->dirty_bitmap) {
2421 1755da16 Paolo Bonzini
        bdrv_set_dirty(bs, sector_num, nb_sectors);
2422 da1fa91d Kevin Wolf
    }
2423 da1fa91d Kevin Wolf
2424 da1fa91d Kevin Wolf
    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2425 da1fa91d Kevin Wolf
        bs->wr_highest_sector = sector_num + nb_sectors - 1;
2426 da1fa91d Kevin Wolf
    }
2427 da1fa91d Kevin Wolf
2428 dbffbdcf Stefan Hajnoczi
    tracked_request_end(&req);
2429 dbffbdcf Stefan Hajnoczi
2430 6b7cb247 Stefan Hajnoczi
    return ret;
2431 da1fa91d Kevin Wolf
}
2432 da1fa91d Kevin Wolf
2433 c5fbe571 Stefan Hajnoczi
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
2434 c5fbe571 Stefan Hajnoczi
    int nb_sectors, QEMUIOVector *qiov)
2435 c5fbe571 Stefan Hajnoczi
{
2436 c5fbe571 Stefan Hajnoczi
    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
2437 c5fbe571 Stefan Hajnoczi
2438 f08f2dda Stefan Hajnoczi
    return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
2439 f08f2dda Stefan Hajnoczi
}
2440 f08f2dda Stefan Hajnoczi
2441 f08f2dda Stefan Hajnoczi
int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
2442 f08f2dda Stefan Hajnoczi
                                      int64_t sector_num, int nb_sectors)
2443 f08f2dda Stefan Hajnoczi
{
2444 f08f2dda Stefan Hajnoczi
    trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
2445 f08f2dda Stefan Hajnoczi
2446 f08f2dda Stefan Hajnoczi
    return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
2447 f08f2dda Stefan Hajnoczi
                             BDRV_REQ_ZERO_WRITE);
2448 c5fbe571 Stefan Hajnoczi
}
2449 c5fbe571 Stefan Hajnoczi
2450 83f64091 bellard
/**
2451 83f64091 bellard
 * Truncate file to 'offset' bytes (needed only for file protocols)
2452 83f64091 bellard
 */
2453 83f64091 bellard
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2454 83f64091 bellard
{
2455 83f64091 bellard
    BlockDriver *drv = bs->drv;
2456 51762288 Stefan Hajnoczi
    int ret;
2457 83f64091 bellard
    if (!drv)
2458 19cb3738 bellard
        return -ENOMEDIUM;
2459 83f64091 bellard
    if (!drv->bdrv_truncate)
2460 83f64091 bellard
        return -ENOTSUP;
2461 59f2689d Naphtali Sprei
    if (bs->read_only)
2462 59f2689d Naphtali Sprei
        return -EACCES;
2463 8591675f Marcelo Tosatti
    if (bdrv_in_use(bs))
2464 8591675f Marcelo Tosatti
        return -EBUSY;
2465 51762288 Stefan Hajnoczi
    ret = drv->bdrv_truncate(bs, offset);
2466 51762288 Stefan Hajnoczi
    if (ret == 0) {
2467 51762288 Stefan Hajnoczi
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2468 145feb17 Markus Armbruster
        bdrv_dev_resize_cb(bs);
2469 51762288 Stefan Hajnoczi
    }
2470 51762288 Stefan Hajnoczi
    return ret;
2471 83f64091 bellard
}
2472 83f64091 bellard
2473 83f64091 bellard
/**
2474 4a1d5e1f Fam Zheng
 * Length of a allocated file in bytes. Sparse files are counted by actual
2475 4a1d5e1f Fam Zheng
 * allocated space. Return < 0 if error or unknown.
2476 4a1d5e1f Fam Zheng
 */
2477 4a1d5e1f Fam Zheng
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2478 4a1d5e1f Fam Zheng
{
2479 4a1d5e1f Fam Zheng
    BlockDriver *drv = bs->drv;
2480 4a1d5e1f Fam Zheng
    if (!drv) {
2481 4a1d5e1f Fam Zheng
        return -ENOMEDIUM;
2482 4a1d5e1f Fam Zheng
    }
2483 4a1d5e1f Fam Zheng
    if (drv->bdrv_get_allocated_file_size) {
2484 4a1d5e1f Fam Zheng
        return drv->bdrv_get_allocated_file_size(bs);
2485 4a1d5e1f Fam Zheng
    }
2486 4a1d5e1f Fam Zheng
    if (bs->file) {
2487 4a1d5e1f Fam Zheng
        return bdrv_get_allocated_file_size(bs->file);
2488 4a1d5e1f Fam Zheng
    }
2489 4a1d5e1f Fam Zheng
    return -ENOTSUP;
2490 4a1d5e1f Fam Zheng
}
2491 4a1d5e1f Fam Zheng
2492 4a1d5e1f Fam Zheng
/**
2493 83f64091 bellard
 * Length of a file in bytes. Return < 0 if error or unknown.
2494 83f64091 bellard
 */
2495 83f64091 bellard
int64_t bdrv_getlength(BlockDriverState *bs)
2496 83f64091 bellard
{
2497 83f64091 bellard
    BlockDriver *drv = bs->drv;
2498 83f64091 bellard
    if (!drv)
2499 19cb3738 bellard
        return -ENOMEDIUM;
2500 51762288 Stefan Hajnoczi
2501 2c6942fa Markus Armbruster
    if (bs->growable || bdrv_dev_has_removable_media(bs)) {
2502 46a4e4e6 Stefan Hajnoczi
        if (drv->bdrv_getlength) {
2503 46a4e4e6 Stefan Hajnoczi
            return drv->bdrv_getlength(bs);
2504 46a4e4e6 Stefan Hajnoczi
        }
2505 83f64091 bellard
    }
2506 46a4e4e6 Stefan Hajnoczi
    return bs->total_sectors * BDRV_SECTOR_SIZE;
2507 fc01f7e7 bellard
}
2508 fc01f7e7 bellard
2509 19cb3738 bellard
/* return 0 as number of sectors if no device present or error */
2510 96b8f136 ths
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2511 fc01f7e7 bellard
{
2512 19cb3738 bellard
    int64_t length;
2513 19cb3738 bellard
    length = bdrv_getlength(bs);
2514 19cb3738 bellard
    if (length < 0)
2515 19cb3738 bellard
        length = 0;
2516 19cb3738 bellard
    else
2517 6ea44308 Jan Kiszka
        length = length >> BDRV_SECTOR_BITS;
2518 19cb3738 bellard
    *nb_sectors_ptr = length;
2519 fc01f7e7 bellard
}
2520 cf98951b bellard
2521 0563e191 Zhi Yong Wu
/* throttling disk io limits */
2522 0563e191 Zhi Yong Wu
void bdrv_set_io_limits(BlockDriverState *bs,
2523 0563e191 Zhi Yong Wu
                        BlockIOLimit *io_limits)
2524 0563e191 Zhi Yong Wu
{
2525 0563e191 Zhi Yong Wu
    bs->io_limits = *io_limits;
2526 0563e191 Zhi Yong Wu
    bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2527 0563e191 Zhi Yong Wu
}
2528 0563e191 Zhi Yong Wu
2529 ff06f5f3 Paolo Bonzini
void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2530 ff06f5f3 Paolo Bonzini
                       BlockdevOnError on_write_error)
2531 abd7f68d Markus Armbruster
{
2532 abd7f68d Markus Armbruster
    bs->on_read_error = on_read_error;
2533 abd7f68d Markus Armbruster
    bs->on_write_error = on_write_error;
2534 abd7f68d Markus Armbruster
}
2535 abd7f68d Markus Armbruster
2536 1ceee0d5 Paolo Bonzini
BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2537 abd7f68d Markus Armbruster
{
2538 abd7f68d Markus Armbruster
    return is_read ? bs->on_read_error : bs->on_write_error;
2539 abd7f68d Markus Armbruster
}
2540 abd7f68d Markus Armbruster
2541 3e1caa5f Paolo Bonzini
BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2542 3e1caa5f Paolo Bonzini
{
2543 3e1caa5f Paolo Bonzini
    BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2544 3e1caa5f Paolo Bonzini
2545 3e1caa5f Paolo Bonzini
    switch (on_err) {
2546 3e1caa5f Paolo Bonzini
    case BLOCKDEV_ON_ERROR_ENOSPC:
2547 3e1caa5f Paolo Bonzini
        return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
2548 3e1caa5f Paolo Bonzini
    case BLOCKDEV_ON_ERROR_STOP:
2549 3e1caa5f Paolo Bonzini
        return BDRV_ACTION_STOP;
2550 3e1caa5f Paolo Bonzini
    case BLOCKDEV_ON_ERROR_REPORT:
2551 3e1caa5f Paolo Bonzini
        return BDRV_ACTION_REPORT;
2552 3e1caa5f Paolo Bonzini
    case BLOCKDEV_ON_ERROR_IGNORE:
2553 3e1caa5f Paolo Bonzini
        return BDRV_ACTION_IGNORE;
2554 3e1caa5f Paolo Bonzini
    default:
2555 3e1caa5f Paolo Bonzini
        abort();
2556 3e1caa5f Paolo Bonzini
    }
2557 3e1caa5f Paolo Bonzini
}
2558 3e1caa5f Paolo Bonzini
2559 3e1caa5f Paolo Bonzini
/* This is done by device models because, while the block layer knows
2560 3e1caa5f Paolo Bonzini
 * about the error, it does not know whether an operation comes from
2561 3e1caa5f Paolo Bonzini
 * the device or the block layer (from a job, for example).
2562 3e1caa5f Paolo Bonzini
 */
2563 3e1caa5f Paolo Bonzini
void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2564 3e1caa5f Paolo Bonzini
                       bool is_read, int error)
2565 3e1caa5f Paolo Bonzini
{
2566 3e1caa5f Paolo Bonzini
    assert(error >= 0);
2567 32c81a4a Paolo Bonzini
    bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
2568 3e1caa5f Paolo Bonzini
    if (action == BDRV_ACTION_STOP) {
2569 3e1caa5f Paolo Bonzini
        vm_stop(RUN_STATE_IO_ERROR);
2570 3e1caa5f Paolo Bonzini
        bdrv_iostatus_set_err(bs, error);
2571 3e1caa5f Paolo Bonzini
    }
2572 3e1caa5f Paolo Bonzini
}
2573 3e1caa5f Paolo Bonzini
2574 b338082b bellard
int bdrv_is_read_only(BlockDriverState *bs)
2575 b338082b bellard
{
2576 b338082b bellard
    return bs->read_only;
2577 b338082b bellard
}
2578 b338082b bellard
2579 985a03b0 ths
int bdrv_is_sg(BlockDriverState *bs)
2580 985a03b0 ths
{
2581 985a03b0 ths
    return bs->sg;
2582 985a03b0 ths
}
2583 985a03b0 ths
2584 e900a7b7 Christoph Hellwig
int bdrv_enable_write_cache(BlockDriverState *bs)
2585 e900a7b7 Christoph Hellwig
{
2586 e900a7b7 Christoph Hellwig
    return bs->enable_write_cache;
2587 e900a7b7 Christoph Hellwig
}
2588 e900a7b7 Christoph Hellwig
2589 425b0148 Paolo Bonzini
void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2590 425b0148 Paolo Bonzini
{
2591 425b0148 Paolo Bonzini
    bs->enable_write_cache = wce;
2592 55b110f2 Jeff Cody
2593 55b110f2 Jeff Cody
    /* so a reopen() will preserve wce */
2594 55b110f2 Jeff Cody
    if (wce) {
2595 55b110f2 Jeff Cody
        bs->open_flags |= BDRV_O_CACHE_WB;
2596 55b110f2 Jeff Cody
    } else {
2597 55b110f2 Jeff Cody
        bs->open_flags &= ~BDRV_O_CACHE_WB;
2598 55b110f2 Jeff Cody
    }
2599 425b0148 Paolo Bonzini
}
2600 425b0148 Paolo Bonzini
2601 ea2384d3 bellard
int bdrv_is_encrypted(BlockDriverState *bs)
2602 ea2384d3 bellard
{
2603 ea2384d3 bellard
    if (bs->backing_hd && bs->backing_hd->encrypted)
2604 ea2384d3 bellard
        return 1;
2605 ea2384d3 bellard
    return bs->encrypted;
2606 ea2384d3 bellard
}
2607 ea2384d3 bellard
2608 c0f4ce77 aliguori
int bdrv_key_required(BlockDriverState *bs)
2609 c0f4ce77 aliguori
{
2610 c0f4ce77 aliguori
    BlockDriverState *backing_hd = bs->backing_hd;
2611 c0f4ce77 aliguori
2612 c0f4ce77 aliguori
    if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2613 c0f4ce77 aliguori
        return 1;
2614 c0f4ce77 aliguori
    return (bs->encrypted && !bs->valid_key);
2615 c0f4ce77 aliguori
}
2616 c0f4ce77 aliguori
2617 ea2384d3 bellard
int bdrv_set_key(BlockDriverState *bs, const char *key)
2618 ea2384d3 bellard
{
2619 ea2384d3 bellard
    int ret;
2620 ea2384d3 bellard
    if (bs->backing_hd && bs->backing_hd->encrypted) {
2621 ea2384d3 bellard
        ret = bdrv_set_key(bs->backing_hd, key);
2622 ea2384d3 bellard
        if (ret < 0)
2623 ea2384d3 bellard
            return ret;
2624 ea2384d3 bellard
        if (!bs->encrypted)
2625 ea2384d3 bellard
            return 0;
2626 ea2384d3 bellard
    }
2627 fd04a2ae Shahar Havivi
    if (!bs->encrypted) {
2628 fd04a2ae Shahar Havivi
        return -EINVAL;
2629 fd04a2ae Shahar Havivi
    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2630 fd04a2ae Shahar Havivi
        return -ENOMEDIUM;
2631 fd04a2ae Shahar Havivi
    }
2632 c0f4ce77 aliguori
    ret = bs->drv->bdrv_set_key(bs, key);
2633 bb5fc20f aliguori
    if (ret < 0) {
2634 bb5fc20f aliguori
        bs->valid_key = 0;
2635 bb5fc20f aliguori
    } else if (!bs->valid_key) {
2636 bb5fc20f aliguori
        bs->valid_key = 1;
2637 bb5fc20f aliguori
        /* call the change callback now, we skipped it on open */
2638 7d4b4ba5 Markus Armbruster
        bdrv_dev_change_media_cb(bs, true);
2639 bb5fc20f aliguori
    }
2640 c0f4ce77 aliguori
    return ret;
2641 ea2384d3 bellard
}
2642 ea2384d3 bellard
2643 f8d6bba1 Markus Armbruster
const char *bdrv_get_format_name(BlockDriverState *bs)
2644 ea2384d3 bellard
{
2645 f8d6bba1 Markus Armbruster
    return bs->drv ? bs->drv->format_name : NULL;
2646 ea2384d3 bellard
}
2647 ea2384d3 bellard
2648 5fafdf24 ths
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2649 ea2384d3 bellard
                         void *opaque)
2650 ea2384d3 bellard
{
2651 ea2384d3 bellard
    BlockDriver *drv;
2652 ea2384d3 bellard
2653 8a22f02a Stefan Hajnoczi
    QLIST_FOREACH(drv, &bdrv_drivers, list) {
2654 ea2384d3 bellard
        it(opaque, drv->format_name);
2655 ea2384d3 bellard
    }
2656 ea2384d3 bellard
}
2657 ea2384d3 bellard
2658 b338082b bellard
BlockDriverState *bdrv_find(const char *name)
2659 b338082b bellard
{
2660 b338082b bellard
    BlockDriverState *bs;
2661 b338082b bellard
2662 1b7bdbc1 Stefan Hajnoczi
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
2663 1b7bdbc1 Stefan Hajnoczi
        if (!strcmp(name, bs->device_name)) {
2664 b338082b bellard
            return bs;
2665 1b7bdbc1 Stefan Hajnoczi
        }
2666 b338082b bellard
    }
2667 b338082b bellard
    return NULL;
2668 b338082b bellard
}
2669 b338082b bellard
2670 2f399b0a Markus Armbruster
BlockDriverState *bdrv_next(BlockDriverState *bs)
2671 2f399b0a Markus Armbruster
{
2672 2f399b0a Markus Armbruster
    if (!bs) {
2673 2f399b0a Markus Armbruster
        return QTAILQ_FIRST(&bdrv_states);
2674 2f399b0a Markus Armbruster
    }
2675 2f399b0a Markus Armbruster
    return QTAILQ_NEXT(bs, list);
2676 2f399b0a Markus Armbruster
}
2677 2f399b0a Markus Armbruster
2678 51de9760 aliguori
void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
2679 81d0912d bellard
{
2680 81d0912d bellard
    BlockDriverState *bs;
2681 81d0912d bellard
2682 1b7bdbc1 Stefan Hajnoczi
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
2683 51de9760 aliguori
        it(opaque, bs);
2684 81d0912d bellard
    }
2685 81d0912d bellard
}
2686 81d0912d bellard
2687 ea2384d3 bellard
const char *bdrv_get_device_name(BlockDriverState *bs)
2688 ea2384d3 bellard
{
2689 ea2384d3 bellard
    return bs->device_name;
2690 ea2384d3 bellard
}
2691 ea2384d3 bellard
2692 c8433287 Markus Armbruster
int bdrv_get_flags(BlockDriverState *bs)
2693 c8433287 Markus Armbruster
{
2694 c8433287 Markus Armbruster
    return bs->open_flags;
2695 c8433287 Markus Armbruster
}
2696 c8433287 Markus Armbruster
2697 c6ca28d6 aliguori
void bdrv_flush_all(void)
2698 c6ca28d6 aliguori
{
2699 c6ca28d6 aliguori
    BlockDriverState *bs;
2700 c6ca28d6 aliguori
2701 1b7bdbc1 Stefan Hajnoczi
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
2702 29cdb251 Paolo Bonzini
        bdrv_flush(bs);
2703 1b7bdbc1 Stefan Hajnoczi
    }
2704 c6ca28d6 aliguori
}
2705 c6ca28d6 aliguori
2706 f2feebbd Kevin Wolf
int bdrv_has_zero_init(BlockDriverState *bs)
2707 f2feebbd Kevin Wolf
{
2708 f2feebbd Kevin Wolf
    assert(bs->drv);
2709 f2feebbd Kevin Wolf
2710 336c1c12 Kevin Wolf
    if (bs->drv->bdrv_has_zero_init) {
2711 336c1c12 Kevin Wolf
        return bs->drv->bdrv_has_zero_init(bs);
2712 f2feebbd Kevin Wolf
    }
2713 f2feebbd Kevin Wolf
2714 f2feebbd Kevin Wolf
    return 1;
2715 f2feebbd Kevin Wolf
}
2716 f2feebbd Kevin Wolf
2717 376ae3f1 Stefan Hajnoczi
typedef struct BdrvCoIsAllocatedData {
2718 376ae3f1 Stefan Hajnoczi
    BlockDriverState *bs;
2719 376ae3f1 Stefan Hajnoczi
    int64_t sector_num;
2720 376ae3f1 Stefan Hajnoczi
    int nb_sectors;
2721 376ae3f1 Stefan Hajnoczi
    int *pnum;
2722 376ae3f1 Stefan Hajnoczi
    int ret;
2723 376ae3f1 Stefan Hajnoczi
    bool done;
2724 376ae3f1 Stefan Hajnoczi
} BdrvCoIsAllocatedData;
2725 376ae3f1 Stefan Hajnoczi
2726 f58c7b35 ths
/*
2727 f58c7b35 ths
 * Returns true iff the specified sector is present in the disk image. Drivers
2728 f58c7b35 ths
 * not implementing the functionality are assumed to not support backing files,
2729 f58c7b35 ths
 * hence all their sectors are reported as allocated.
2730 f58c7b35 ths
 *
2731 bd9533e3 Stefan Hajnoczi
 * If 'sector_num' is beyond the end of the disk image the return value is 0
2732 bd9533e3 Stefan Hajnoczi
 * and 'pnum' is set to 0.
2733 bd9533e3 Stefan Hajnoczi
 *
2734 f58c7b35 ths
 * 'pnum' is set to the number of sectors (including and immediately following
2735 f58c7b35 ths
 * the specified sector) that are known to be in the same
2736 f58c7b35 ths
 * allocated/unallocated state.
2737 f58c7b35 ths
 *
2738 bd9533e3 Stefan Hajnoczi
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
2739 bd9533e3 Stefan Hajnoczi
 * beyond the end of the disk image it will be clamped.
2740 f58c7b35 ths
 */
2741 060f51c9 Stefan Hajnoczi
int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2742 060f51c9 Stefan Hajnoczi
                                      int nb_sectors, int *pnum)
2743 f58c7b35 ths
{
2744 bd9533e3 Stefan Hajnoczi
    int64_t n;
2745 bd9533e3 Stefan Hajnoczi
2746 bd9533e3 Stefan Hajnoczi
    if (sector_num >= bs->total_sectors) {
2747 bd9533e3 Stefan Hajnoczi
        *pnum = 0;
2748 bd9533e3 Stefan Hajnoczi
        return 0;
2749 bd9533e3 Stefan Hajnoczi
    }
2750 bd9533e3 Stefan Hajnoczi
2751 bd9533e3 Stefan Hajnoczi
    n = bs->total_sectors - sector_num;
2752 bd9533e3 Stefan Hajnoczi
    if (n < nb_sectors) {
2753 bd9533e3 Stefan Hajnoczi
        nb_sectors = n;
2754 bd9533e3 Stefan Hajnoczi
    }
2755 bd9533e3 Stefan Hajnoczi
2756 6aebab14 Stefan Hajnoczi
    if (!bs->drv->bdrv_co_is_allocated) {
2757 bd9533e3 Stefan Hajnoczi
        *pnum = nb_sectors;
2758 f58c7b35 ths
        return 1;
2759 f58c7b35 ths
    }
2760 6aebab14 Stefan Hajnoczi
2761 060f51c9 Stefan Hajnoczi
    return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2762 060f51c9 Stefan Hajnoczi
}
2763 060f51c9 Stefan Hajnoczi
2764 060f51c9 Stefan Hajnoczi
/* Coroutine wrapper for bdrv_is_allocated() */
2765 060f51c9 Stefan Hajnoczi
static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2766 060f51c9 Stefan Hajnoczi
{
2767 060f51c9 Stefan Hajnoczi
    BdrvCoIsAllocatedData *data = opaque;
2768 060f51c9 Stefan Hajnoczi
    BlockDriverState *bs = data->bs;
2769 060f51c9 Stefan Hajnoczi
2770 060f51c9 Stefan Hajnoczi
    data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2771 060f51c9 Stefan Hajnoczi
                                     data->pnum);
2772 060f51c9 Stefan Hajnoczi
    data->done = true;
2773 060f51c9 Stefan Hajnoczi
}
2774 060f51c9 Stefan Hajnoczi
2775 060f51c9 Stefan Hajnoczi
/*
2776 060f51c9 Stefan Hajnoczi
 * Synchronous wrapper around bdrv_co_is_allocated().
2777 060f51c9 Stefan Hajnoczi
 *
2778 060f51c9 Stefan Hajnoczi
 * See bdrv_co_is_allocated() for details.
2779 060f51c9 Stefan Hajnoczi
 */
2780 060f51c9 Stefan Hajnoczi
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2781 060f51c9 Stefan Hajnoczi
                      int *pnum)
2782 060f51c9 Stefan Hajnoczi
{
2783 6aebab14 Stefan Hajnoczi
    Coroutine *co;
2784 6aebab14 Stefan Hajnoczi
    BdrvCoIsAllocatedData data = {
2785 6aebab14 Stefan Hajnoczi
        .bs = bs,
2786 6aebab14 Stefan Hajnoczi
        .sector_num = sector_num,
2787 6aebab14 Stefan Hajnoczi
        .nb_sectors = nb_sectors,
2788 6aebab14 Stefan Hajnoczi
        .pnum = pnum,
2789 6aebab14 Stefan Hajnoczi
        .done = false,
2790 6aebab14 Stefan Hajnoczi
    };
2791 6aebab14 Stefan Hajnoczi
2792 6aebab14 Stefan Hajnoczi
    co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2793 6aebab14 Stefan Hajnoczi
    qemu_coroutine_enter(co, &data);
2794 6aebab14 Stefan Hajnoczi
    while (!data.done) {
2795 6aebab14 Stefan Hajnoczi
        qemu_aio_wait();
2796 6aebab14 Stefan Hajnoczi
    }
2797 6aebab14 Stefan Hajnoczi
    return data.ret;
2798 f58c7b35 ths
}
2799 f58c7b35 ths
2800 188a7bbf Paolo Bonzini
/*
2801 188a7bbf Paolo Bonzini
 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
2802 188a7bbf Paolo Bonzini
 *
2803 188a7bbf Paolo Bonzini
 * Return true if the given sector is allocated in any image between
2804 188a7bbf Paolo Bonzini
 * BASE and TOP (inclusive).  BASE can be NULL to check if the given
2805 188a7bbf Paolo Bonzini
 * sector is allocated in any image of the chain.  Return false otherwise.
2806 188a7bbf Paolo Bonzini
 *
2807 188a7bbf Paolo Bonzini
 * 'pnum' is set to the number of sectors (including and immediately following
2808 188a7bbf Paolo Bonzini
 *  the specified sector) that are known to be in the same
2809 188a7bbf Paolo Bonzini
 *  allocated/unallocated state.
2810 188a7bbf Paolo Bonzini
 *
2811 188a7bbf Paolo Bonzini
 */
2812 188a7bbf Paolo Bonzini
int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
2813 188a7bbf Paolo Bonzini
                                            BlockDriverState *base,
2814 188a7bbf Paolo Bonzini
                                            int64_t sector_num,
2815 188a7bbf Paolo Bonzini
                                            int nb_sectors, int *pnum)
2816 188a7bbf Paolo Bonzini
{
2817 188a7bbf Paolo Bonzini
    BlockDriverState *intermediate;
2818 188a7bbf Paolo Bonzini
    int ret, n = nb_sectors;
2819 188a7bbf Paolo Bonzini
2820 188a7bbf Paolo Bonzini
    intermediate = top;
2821 188a7bbf Paolo Bonzini
    while (intermediate && intermediate != base) {
2822 188a7bbf Paolo Bonzini
        int pnum_inter;
2823 188a7bbf Paolo Bonzini
        ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
2824 188a7bbf Paolo Bonzini
                                   &pnum_inter);
2825 188a7bbf Paolo Bonzini
        if (ret < 0) {
2826 188a7bbf Paolo Bonzini
            return ret;
2827 188a7bbf Paolo Bonzini
        } else if (ret) {
2828 188a7bbf Paolo Bonzini
            *pnum = pnum_inter;
2829 188a7bbf Paolo Bonzini
            return 1;
2830 188a7bbf Paolo Bonzini
        }
2831 188a7bbf Paolo Bonzini
2832 188a7bbf Paolo Bonzini
        /*
2833 188a7bbf Paolo Bonzini
         * [sector_num, nb_sectors] is unallocated on top but intermediate
2834 188a7bbf Paolo Bonzini
         * might have
2835 188a7bbf Paolo Bonzini
         *
2836 188a7bbf Paolo Bonzini
         * [sector_num+x, nr_sectors] allocated.
2837 188a7bbf Paolo Bonzini
         */
2838 188a7bbf Paolo Bonzini
        if (n > pnum_inter) {
2839 188a7bbf Paolo Bonzini
            n = pnum_inter;
2840 188a7bbf Paolo Bonzini
        }
2841 188a7bbf Paolo Bonzini
2842 188a7bbf Paolo Bonzini
        intermediate = intermediate->backing_hd;
2843 188a7bbf Paolo Bonzini
    }
2844 188a7bbf Paolo Bonzini
2845 188a7bbf Paolo Bonzini
    *pnum = n;
2846 188a7bbf Paolo Bonzini
    return 0;
2847 188a7bbf Paolo Bonzini
}
2848 188a7bbf Paolo Bonzini
2849 ac84adac Paolo Bonzini
BlockInfo *bdrv_query_info(BlockDriverState *bs)
2850 b338082b bellard
{
2851 ac84adac Paolo Bonzini
    BlockInfo *info = g_malloc0(sizeof(*info));
2852 ac84adac Paolo Bonzini
    info->device = g_strdup(bs->device_name);
2853 ac84adac Paolo Bonzini
    info->type = g_strdup("unknown");
2854 ac84adac Paolo Bonzini
    info->locked = bdrv_dev_is_medium_locked(bs);
2855 ac84adac Paolo Bonzini
    info->removable = bdrv_dev_has_removable_media(bs);
2856 b338082b bellard
2857 ac84adac Paolo Bonzini
    if (bdrv_dev_has_removable_media(bs)) {
2858 ac84adac Paolo Bonzini
        info->has_tray_open = true;
2859 ac84adac Paolo Bonzini
        info->tray_open = bdrv_dev_is_tray_open(bs);
2860 ac84adac Paolo Bonzini
    }
2861 d15e5465 Luiz Capitulino
2862 ac84adac Paolo Bonzini
    if (bdrv_iostatus_is_enabled(bs)) {
2863 ac84adac Paolo Bonzini
        info->has_io_status = true;
2864 ac84adac Paolo Bonzini
        info->io_status = bs->iostatus;
2865 ac84adac Paolo Bonzini
    }
2866 d15e5465 Luiz Capitulino
2867 b9a9b3a4 Paolo Bonzini
    if (bs->dirty_bitmap) {
2868 b9a9b3a4 Paolo Bonzini
        info->has_dirty = true;
2869 b9a9b3a4 Paolo Bonzini
        info->dirty = g_malloc0(sizeof(*info->dirty));
2870 b9a9b3a4 Paolo Bonzini
        info->dirty->count = bdrv_get_dirty_count(bs) *
2871 b9a9b3a4 Paolo Bonzini
            BDRV_SECTORS_PER_DIRTY_CHUNK * BDRV_SECTOR_SIZE;
2872 b9a9b3a4 Paolo Bonzini
    }
2873 d15e5465 Luiz Capitulino
2874 ac84adac Paolo Bonzini
    if (bs->drv) {
2875 ac84adac Paolo Bonzini
        info->has_inserted = true;
2876 ac84adac Paolo Bonzini
        info->inserted = g_malloc0(sizeof(*info->inserted));
2877 ac84adac Paolo Bonzini
        info->inserted->file = g_strdup(bs->filename);
2878 ac84adac Paolo Bonzini
        info->inserted->ro = bs->read_only;
2879 ac84adac Paolo Bonzini
        info->inserted->drv = g_strdup(bs->drv->format_name);
2880 ac84adac Paolo Bonzini
        info->inserted->encrypted = bs->encrypted;
2881 ac84adac Paolo Bonzini
        info->inserted->encryption_key_missing = bdrv_key_required(bs);
2882 ac84adac Paolo Bonzini
2883 ac84adac Paolo Bonzini
        if (bs->backing_file[0]) {
2884 ac84adac Paolo Bonzini
            info->inserted->has_backing_file = true;
2885 ac84adac Paolo Bonzini
            info->inserted->backing_file = g_strdup(bs->backing_file);
2886 e4def80b Markus Armbruster
        }
2887 f04ef601 Luiz Capitulino
2888 ac84adac Paolo Bonzini
        info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
2889 ac84adac Paolo Bonzini
2890 ac84adac Paolo Bonzini
        if (bs->io_limits_enabled) {
2891 ac84adac Paolo Bonzini
            info->inserted->bps =
2892 ac84adac Paolo Bonzini
                           bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2893 ac84adac Paolo Bonzini
            info->inserted->bps_rd =
2894 ac84adac Paolo Bonzini
                           bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2895 ac84adac Paolo Bonzini
            info->inserted->bps_wr =
2896 ac84adac Paolo Bonzini
                           bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2897 ac84adac Paolo Bonzini
            info->inserted->iops =
2898 ac84adac Paolo Bonzini
                           bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2899 ac84adac Paolo Bonzini
            info->inserted->iops_rd =
2900 ac84adac Paolo Bonzini
                           bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2901 ac84adac Paolo Bonzini
            info->inserted->iops_wr =
2902 ac84adac Paolo Bonzini
                           bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2903 f04ef601 Luiz Capitulino
        }
2904 ac84adac Paolo Bonzini
    }
2905 ac84adac Paolo Bonzini
    return info;
2906 ac84adac Paolo Bonzini
}
2907 f04ef601 Luiz Capitulino
2908 ac84adac Paolo Bonzini
BlockInfoList *qmp_query_block(Error **errp)
2909 ac84adac Paolo Bonzini
{
2910 ac84adac Paolo Bonzini
    BlockInfoList *head = NULL, **p_next = &head;
2911 ac84adac Paolo Bonzini
    BlockDriverState *bs;
2912 727f005e Zhi Yong Wu
2913 ac84adac Paolo Bonzini
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
2914 ac84adac Paolo Bonzini
        BlockInfoList *info = g_malloc0(sizeof(*info));
2915 ac84adac Paolo Bonzini
        info->value = bdrv_query_info(bs);
2916 d15e5465 Luiz Capitulino
2917 ac84adac Paolo Bonzini
        *p_next = info;
2918 ac84adac Paolo Bonzini
        p_next = &info->next;
2919 b338082b bellard
    }
2920 d15e5465 Luiz Capitulino
2921 b2023818 Luiz Capitulino
    return head;
2922 b338082b bellard
}
2923 a36e69dd ths
2924 9887b616 Paolo Bonzini
BlockStats *bdrv_query_stats(const BlockDriverState *bs)
2925 f11f57e4 Luiz Capitulino
{
2926 f11f57e4 Luiz Capitulino
    BlockStats *s;
2927 f11f57e4 Luiz Capitulino
2928 f11f57e4 Luiz Capitulino
    s = g_malloc0(sizeof(*s));
2929 f11f57e4 Luiz Capitulino
2930 f11f57e4 Luiz Capitulino
    if (bs->device_name[0]) {
2931 f11f57e4 Luiz Capitulino
        s->has_device = true;
2932 f11f57e4 Luiz Capitulino
        s->device = g_strdup(bs->device_name);
2933 294cc35f Kevin Wolf
    }
2934 294cc35f Kevin Wolf
2935 f11f57e4 Luiz Capitulino
    s->stats = g_malloc0(sizeof(*s->stats));
2936 f11f57e4 Luiz Capitulino
    s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2937 f11f57e4 Luiz Capitulino
    s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2938 f11f57e4 Luiz Capitulino
    s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2939 f11f57e4 Luiz Capitulino
    s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2940 f11f57e4 Luiz Capitulino
    s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2941 f11f57e4 Luiz Capitulino
    s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2942 f11f57e4 Luiz Capitulino
    s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2943 f11f57e4 Luiz Capitulino
    s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2944 f11f57e4 Luiz Capitulino
    s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2945 f11f57e4 Luiz Capitulino
2946 294cc35f Kevin Wolf
    if (bs->file) {
2947 f11f57e4 Luiz Capitulino
        s->has_parent = true;
2948 9887b616 Paolo Bonzini
        s->parent = bdrv_query_stats(bs->file);
2949 294cc35f Kevin Wolf
    }
2950 294cc35f Kevin Wolf
2951 f11f57e4 Luiz Capitulino
    return s;
2952 294cc35f Kevin Wolf
}
2953 294cc35f Kevin Wolf
2954 f11f57e4 Luiz Capitulino
BlockStatsList *qmp_query_blockstats(Error **errp)
2955 218a536a Luiz Capitulino
{
2956 9887b616 Paolo Bonzini
    BlockStatsList *head = NULL, **p_next = &head;
2957 a36e69dd ths
    BlockDriverState *bs;
2958 a36e69dd ths
2959 1b7bdbc1 Stefan Hajnoczi
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
2960 f11f57e4 Luiz Capitulino
        BlockStatsList *info = g_malloc0(sizeof(*info));
2961 9887b616 Paolo Bonzini
        info->value = bdrv_query_stats(bs);
2962 f11f57e4 Luiz Capitulino
2963 9887b616 Paolo Bonzini
        *p_next = info;
2964 9887b616 Paolo Bonzini
        p_next = &info->next;
2965 a36e69dd ths
    }
2966 218a536a Luiz Capitulino
2967 f11f57e4 Luiz Capitulino
    return head;
2968 a36e69dd ths
}
2969 ea2384d3 bellard
2970 045df330 aliguori
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2971 045df330 aliguori
{
2972 045df330 aliguori
    if (bs->backing_hd && bs->backing_hd->encrypted)
2973 045df330 aliguori
        return bs->backing_file;
2974 045df330 aliguori
    else if (bs->encrypted)
2975 045df330 aliguori
        return bs->filename;
2976 045df330 aliguori
    else
2977 045df330 aliguori
        return NULL;
2978 045df330 aliguori
}
2979 045df330 aliguori
2980 5fafdf24 ths
void bdrv_get_backing_filename(BlockDriverState *bs,
2981 83f64091 bellard
                               char *filename, int filename_size)
2982 83f64091 bellard
{
2983 3574c608 Kevin Wolf
    pstrcpy(filename, filename_size, bs->backing_file);
2984 83f64091 bellard
}
2985 83f64091 bellard
2986 5fafdf24 ths
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2987 faea38e7 bellard
                          const uint8_t *buf, int nb_sectors)
2988 faea38e7 bellard
{
2989 faea38e7 bellard
    BlockDriver *drv = bs->drv;
2990 faea38e7 bellard
    if (!drv)
2991 19cb3738 bellard
        return -ENOMEDIUM;
2992 faea38e7 bellard
    if (!drv->bdrv_write_compressed)
2993 faea38e7 bellard
        return -ENOTSUP;
2994 fbb7b4e0 Kevin Wolf
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2995 fbb7b4e0 Kevin Wolf
        return -EIO;
2996 a55eb92c Jan Kiszka
2997 1755da16 Paolo Bonzini
    assert(!bs->dirty_bitmap);
2998 a55eb92c Jan Kiszka
2999 faea38e7 bellard
    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
3000 faea38e7 bellard
}
3001 3b46e624 ths
3002 faea38e7 bellard
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3003 faea38e7 bellard
{
3004 faea38e7 bellard
    BlockDriver *drv = bs->drv;
3005 faea38e7 bellard
    if (!drv)
3006 19cb3738 bellard
        return -ENOMEDIUM;
3007 faea38e7 bellard
    if (!drv->bdrv_get_info)
3008 faea38e7 bellard
        return -ENOTSUP;
3009 faea38e7 bellard
    memset(bdi, 0, sizeof(*bdi));
3010 faea38e7 bellard
    return drv->bdrv_get_info(bs, bdi);
3011 faea38e7 bellard
}
3012 faea38e7 bellard
3013 45566e9c Christoph Hellwig
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
3014 45566e9c Christoph Hellwig
                      int64_t pos, int size)
3015 178e08a5 aliguori
{
3016 178e08a5 aliguori
    BlockDriver *drv = bs->drv;
3017 178e08a5 aliguori
    if (!drv)
3018 178e08a5 aliguori
        return -ENOMEDIUM;
3019 7cdb1f6d MORITA Kazutaka
    if (drv->bdrv_save_vmstate)
3020 7cdb1f6d MORITA Kazutaka
        return drv->bdrv_save_vmstate(bs, buf, pos, size);
3021 7cdb1f6d MORITA Kazutaka
    if (bs->file)
3022 7cdb1f6d MORITA Kazutaka
        return bdrv_save_vmstate(bs->file, buf, pos, size);
3023 7cdb1f6d MORITA Kazutaka
    return -ENOTSUP;
3024 178e08a5 aliguori
}
3025 178e08a5 aliguori
3026 45566e9c Christoph Hellwig
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
3027 45566e9c Christoph Hellwig
                      int64_t pos, int size)
3028 178e08a5 aliguori
{
3029 178e08a5 aliguori
    BlockDriver *drv = bs->drv;
3030 178e08a5 aliguori
    if (!drv)
3031 178e08a5 aliguori
        return -ENOMEDIUM;
3032 7cdb1f6d MORITA Kazutaka
    if (drv->bdrv_load_vmstate)
3033 7cdb1f6d MORITA Kazutaka
        return drv->bdrv_load_vmstate(bs, buf, pos, size);
3034 7cdb1f6d MORITA Kazutaka
    if (bs->file)
3035 7cdb1f6d MORITA Kazutaka
        return bdrv_load_vmstate(bs->file, buf, pos, size);
3036 7cdb1f6d MORITA Kazutaka
    return -ENOTSUP;
3037 178e08a5 aliguori
}
3038 178e08a5 aliguori
3039 8b9b0cc2 Kevin Wolf
void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
3040 8b9b0cc2 Kevin Wolf
{
3041 8b9b0cc2 Kevin Wolf
    BlockDriver *drv = bs->drv;
3042 8b9b0cc2 Kevin Wolf
3043 8b9b0cc2 Kevin Wolf
    if (!drv || !drv->bdrv_debug_event) {
3044 8b9b0cc2 Kevin Wolf
        return;
3045 8b9b0cc2 Kevin Wolf
    }
3046 8b9b0cc2 Kevin Wolf
3047 0ed8b6f6 Blue Swirl
    drv->bdrv_debug_event(bs, event);
3048 41c695c7 Kevin Wolf
}
3049 41c695c7 Kevin Wolf
3050 41c695c7 Kevin Wolf
int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3051 41c695c7 Kevin Wolf
                          const char *tag)
3052 41c695c7 Kevin Wolf
{
3053 41c695c7 Kevin Wolf
    while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3054 41c695c7 Kevin Wolf
        bs = bs->file;
3055 41c695c7 Kevin Wolf
    }
3056 41c695c7 Kevin Wolf
3057 41c695c7 Kevin Wolf
    if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3058 41c695c7 Kevin Wolf
        return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3059 41c695c7 Kevin Wolf
    }
3060 41c695c7 Kevin Wolf
3061 41c695c7 Kevin Wolf
    return -ENOTSUP;
3062 41c695c7 Kevin Wolf
}
3063 41c695c7 Kevin Wolf
3064 41c695c7 Kevin Wolf
int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3065 41c695c7 Kevin Wolf
{
3066 41c695c7 Kevin Wolf
    while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
3067 41c695c7 Kevin Wolf
        bs = bs->file;
3068 41c695c7 Kevin Wolf
    }
3069 8b9b0cc2 Kevin Wolf
3070 41c695c7 Kevin Wolf
    if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3071 41c695c7 Kevin Wolf
        return bs->drv->bdrv_debug_resume(bs, tag);
3072 41c695c7 Kevin Wolf
    }
3073 41c695c7 Kevin Wolf
3074 41c695c7 Kevin Wolf
    return -ENOTSUP;
3075 41c695c7 Kevin Wolf
}
3076 41c695c7 Kevin Wolf
3077 41c695c7 Kevin Wolf
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3078 41c695c7 Kevin Wolf
{
3079 41c695c7 Kevin Wolf
    while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3080 41c695c7 Kevin Wolf
        bs = bs->file;
3081 41c695c7 Kevin Wolf
    }
3082 41c695c7 Kevin Wolf
3083 41c695c7 Kevin Wolf
    if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3084 41c695c7 Kevin Wolf
        return bs->drv->bdrv_debug_is_suspended(bs, tag);
3085 41c695c7 Kevin Wolf
    }
3086 41c695c7 Kevin Wolf
3087 41c695c7 Kevin Wolf
    return false;
3088 8b9b0cc2 Kevin Wolf
}
3089 8b9b0cc2 Kevin Wolf
3090 faea38e7 bellard
/**************************************************************/
3091 faea38e7 bellard
/* handling of snapshots */
3092 faea38e7 bellard
3093 feeee5ac Miguel Di Ciurcio Filho
int bdrv_can_snapshot(BlockDriverState *bs)
3094 feeee5ac Miguel Di Ciurcio Filho
{
3095 feeee5ac Miguel Di Ciurcio Filho
    BlockDriver *drv = bs->drv;
3096 07b70bfb Markus Armbruster
    if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
3097 feeee5ac Miguel Di Ciurcio Filho
        return 0;
3098 feeee5ac Miguel Di Ciurcio Filho
    }
3099 feeee5ac Miguel Di Ciurcio Filho
3100 feeee5ac Miguel Di Ciurcio Filho
    if (!drv->bdrv_snapshot_create) {
3101 feeee5ac Miguel Di Ciurcio Filho
        if (bs->file != NULL) {
3102 feeee5ac Miguel Di Ciurcio Filho
            return bdrv_can_snapshot(bs->file);
3103 feeee5ac Miguel Di Ciurcio Filho
        }
3104 feeee5ac Miguel Di Ciurcio Filho
        return 0;
3105 feeee5ac Miguel Di Ciurcio Filho
    }
3106 feeee5ac Miguel Di Ciurcio Filho
3107 feeee5ac Miguel Di Ciurcio Filho
    return 1;
3108 feeee5ac Miguel Di Ciurcio Filho
}
3109 feeee5ac Miguel Di Ciurcio Filho
3110 199630b6 Blue Swirl
int bdrv_is_snapshot(BlockDriverState *bs)
3111 199630b6 Blue Swirl
{
3112 199630b6 Blue Swirl
    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3113 199630b6 Blue Swirl
}
3114 199630b6 Blue Swirl
3115 f9092b10 Markus Armbruster
BlockDriverState *bdrv_snapshots(void)
3116 f9092b10 Markus Armbruster
{
3117 f9092b10 Markus Armbruster
    BlockDriverState *bs;
3118 f9092b10 Markus Armbruster
3119 3ac906f7 Markus Armbruster
    if (bs_snapshots) {
3120 f9092b10 Markus Armbruster
        return bs_snapshots;
3121 3ac906f7 Markus Armbruster
    }
3122 f9092b10 Markus Armbruster
3123 f9092b10 Markus Armbruster
    bs = NULL;
3124 f9092b10 Markus Armbruster
    while ((bs = bdrv_next(bs))) {
3125 f9092b10 Markus Armbruster
        if (bdrv_can_snapshot(bs)) {
3126 3ac906f7 Markus Armbruster
            bs_snapshots = bs;
3127 3ac906f7 Markus Armbruster
            return bs;
3128 f9092b10 Markus Armbruster
        }
3129 f9092b10 Markus Armbruster
    }
3130 f9092b10 Markus Armbruster
    return NULL;
3131 f9092b10 Markus Armbruster
}
3132 f9092b10 Markus Armbruster
3133 5fafdf24 ths
int bdrv_snapshot_create(BlockDriverState *bs,
3134 faea38e7 bellard
                         QEMUSnapshotInfo *sn_info)
3135 faea38e7 bellard
{
3136 faea38e7 bellard
    BlockDriver *drv = bs->drv;
3137 faea38e7 bellard
    if (!drv)
3138 19cb3738 bellard
        return -ENOMEDIUM;
3139 7cdb1f6d MORITA Kazutaka
    if (drv->bdrv_snapshot_create)
3140 7cdb1f6d MORITA Kazutaka
        return drv->bdrv_snapshot_create(bs, sn_info);
3141 7cdb1f6d MORITA Kazutaka
    if (bs->file)
3142 7cdb1f6d MORITA Kazutaka
        return bdrv_snapshot_create(bs->file, sn_info);
3143 7cdb1f6d MORITA Kazutaka
    return -ENOTSUP;
3144 faea38e7 bellard
}
3145 faea38e7 bellard
3146 5fafdf24 ths
int bdrv_snapshot_goto(BlockDriverState *bs,
3147 faea38e7 bellard
                       const char *snapshot_id)
3148 faea38e7 bellard
{
3149 faea38e7 bellard
    BlockDriver *drv = bs->drv;
3150 7cdb1f6d MORITA Kazutaka
    int ret, open_ret;
3151 7cdb1f6d MORITA Kazutaka
3152 faea38e7 bellard
    if (!drv)
3153 19cb3738 bellard
        return -ENOMEDIUM;
3154 7cdb1f6d MORITA Kazutaka
    if (drv->bdrv_snapshot_goto)
3155 7cdb1f6d MORITA Kazutaka
        return drv->bdrv_snapshot_goto(bs, snapshot_id);
3156 7cdb1f6d MORITA Kazutaka
3157 7cdb1f6d MORITA Kazutaka
    if (bs->file) {
3158 7cdb1f6d MORITA Kazutaka
        drv->bdrv_close(bs);
3159 7cdb1f6d MORITA Kazutaka
        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
3160 7cdb1f6d MORITA Kazutaka
        open_ret = drv->bdrv_open(bs, bs->open_flags);
3161 7cdb1f6d MORITA Kazutaka
        if (open_ret < 0) {
3162 7cdb1f6d MORITA Kazutaka
            bdrv_delete(bs->file);
3163 7cdb1f6d MORITA Kazutaka
            bs->drv = NULL;
3164 7cdb1f6d MORITA Kazutaka
            return open_ret;
3165 7cdb1f6d MORITA Kazutaka
        }
3166 7cdb1f6d MORITA Kazutaka
        return ret;
3167 7cdb1f6d MORITA Kazutaka
    }
3168 7cdb1f6d MORITA Kazutaka
3169 7cdb1f6d MORITA Kazutaka
    return -ENOTSUP;
3170 faea38e7 bellard
}
3171 faea38e7 bellard
3172 faea38e7 bellard
int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
3173 faea38e7 bellard
{
3174 faea38e7 bellard
    BlockDriver *drv = bs->drv;
3175 faea38e7 bellard
    if (!drv)
3176 19cb3738 bellard
        return -ENOMEDIUM;
3177 7cdb1f6d MORITA Kazutaka
    if (drv->bdrv_snapshot_delete)
3178 7cdb1f6d MORITA Kazutaka
        return drv->bdrv_snapshot_delete(bs, snapshot_id);
3179 7cdb1f6d MORITA Kazutaka
    if (bs->file)
3180 7cdb1f6d MORITA Kazutaka
        return bdrv_snapshot_delete(bs->file, snapshot_id);
3181 7cdb1f6d MORITA Kazutaka
    return -ENOTSUP;
3182 faea38e7 bellard
}
3183 faea38e7 bellard
3184 5fafdf24 ths
int bdrv_snapshot_list(BlockDriverState *bs,
3185 faea38e7 bellard
                       QEMUSnapshotInfo **psn_info)
3186 faea38e7 bellard
{
3187 faea38e7 bellard
    BlockDriver *drv = bs->drv;
3188 faea38e7 bellard
    if (!drv)
3189 19cb3738 bellard
        return -ENOMEDIUM;
3190 7cdb1f6d MORITA Kazutaka
    if (drv->bdrv_snapshot_list)
3191 7cdb1f6d MORITA Kazutaka
        return drv->bdrv_snapshot_list(bs, psn_info);
3192 7cdb1f6d MORITA Kazutaka
    if (bs->file)
3193 7cdb1f6d MORITA Kazutaka
        return bdrv_snapshot_list(bs->file, psn_info);
3194 7cdb1f6d MORITA Kazutaka
    return -ENOTSUP;
3195 faea38e7 bellard
}
3196 faea38e7 bellard
3197 51ef6727 edison
int bdrv_snapshot_load_tmp(BlockDriverState *bs,
3198 51ef6727 edison
        const char *snapshot_name)
3199 51ef6727 edison
{
3200 51ef6727 edison
    BlockDriver *drv = bs->drv;
3201 51ef6727 edison
    if (!drv) {
3202 51ef6727 edison
        return -ENOMEDIUM;
3203 51ef6727 edison
    }
3204 51ef6727 edison
    if (!bs->read_only) {
3205 51ef6727 edison
        return -EINVAL;
3206 51ef6727 edison
    }
3207 51ef6727 edison
    if (drv->bdrv_snapshot_load_tmp) {
3208 51ef6727 edison
        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
3209 51ef6727 edison
    }
3210 51ef6727 edison
    return -ENOTSUP;
3211 51ef6727 edison
}
3212 51ef6727 edison
3213 b1b1d783 Jeff Cody
/* backing_file can either be relative, or absolute, or a protocol.  If it is
3214 b1b1d783 Jeff Cody
 * relative, it must be relative to the chain.  So, passing in bs->filename
3215 b1b1d783 Jeff Cody
 * from a BDS as backing_file should not be done, as that may be relative to
3216 b1b1d783 Jeff Cody
 * the CWD rather than the chain. */
3217 e8a6bb9c Marcelo Tosatti
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3218 e8a6bb9c Marcelo Tosatti
        const char *backing_file)
3219 e8a6bb9c Marcelo Tosatti
{
3220 b1b1d783 Jeff Cody
    char *filename_full = NULL;
3221 b1b1d783 Jeff Cody
    char *backing_file_full = NULL;
3222 b1b1d783 Jeff Cody
    char *filename_tmp = NULL;
3223 b1b1d783 Jeff Cody
    int is_protocol = 0;
3224 b1b1d783 Jeff Cody
    BlockDriverState *curr_bs = NULL;
3225 b1b1d783 Jeff Cody
    BlockDriverState *retval = NULL;
3226 b1b1d783 Jeff Cody
3227 b1b1d783 Jeff Cody
    if (!bs || !bs->drv || !backing_file) {
3228 e8a6bb9c Marcelo Tosatti
        return NULL;
3229 e8a6bb9c Marcelo Tosatti
    }
3230 e8a6bb9c Marcelo Tosatti
3231 b1b1d783 Jeff Cody
    filename_full     = g_malloc(PATH_MAX);
3232 b1b1d783 Jeff Cody
    backing_file_full = g_malloc(PATH_MAX);
3233 b1b1d783 Jeff Cody
    filename_tmp      = g_malloc(PATH_MAX);
3234 b1b1d783 Jeff Cody
3235 b1b1d783 Jeff Cody
    is_protocol = path_has_protocol(backing_file);
3236 b1b1d783 Jeff Cody
3237 b1b1d783 Jeff Cody
    for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
3238 b1b1d783 Jeff Cody
3239 b1b1d783 Jeff Cody
        /* If either of the filename paths is actually a protocol, then
3240 b1b1d783 Jeff Cody
         * compare unmodified paths; otherwise make paths relative */
3241 b1b1d783 Jeff Cody
        if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3242 b1b1d783 Jeff Cody
            if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3243 b1b1d783 Jeff Cody
                retval = curr_bs->backing_hd;
3244 b1b1d783 Jeff Cody
                break;
3245 b1b1d783 Jeff Cody
            }
3246 e8a6bb9c Marcelo Tosatti
        } else {
3247 b1b1d783 Jeff Cody
            /* If not an absolute filename path, make it relative to the current
3248 b1b1d783 Jeff Cody
             * image's filename path */
3249 b1b1d783 Jeff Cody
            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3250 b1b1d783 Jeff Cody
                         backing_file);
3251 b1b1d783 Jeff Cody
3252 b1b1d783 Jeff Cody
            /* We are going to compare absolute pathnames */
3253 b1b1d783 Jeff Cody
            if (!realpath(filename_tmp, filename_full)) {
3254 b1b1d783 Jeff Cody
                continue;
3255 b1b1d783 Jeff Cody
            }
3256 b1b1d783 Jeff Cody
3257 b1b1d783 Jeff Cody
            /* We need to make sure the backing filename we are comparing against
3258 b1b1d783 Jeff Cody
             * is relative to the current image filename (or absolute) */
3259 b1b1d783 Jeff Cody
            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3260 b1b1d783 Jeff Cody
                         curr_bs->backing_file);
3261 b1b1d783 Jeff Cody
3262 b1b1d783 Jeff Cody
            if (!realpath(filename_tmp, backing_file_full)) {
3263 b1b1d783 Jeff Cody
                continue;
3264 b1b1d783 Jeff Cody
            }
3265 b1b1d783 Jeff Cody
3266 b1b1d783 Jeff Cody
            if (strcmp(backing_file_full, filename_full) == 0) {
3267 b1b1d783 Jeff Cody
                retval = curr_bs->backing_hd;
3268 b1b1d783 Jeff Cody
                break;
3269 b1b1d783 Jeff Cody
            }
3270 e8a6bb9c Marcelo Tosatti
        }
3271 e8a6bb9c Marcelo Tosatti
    }
3272 e8a6bb9c Marcelo Tosatti
3273 b1b1d783 Jeff Cody
    g_free(filename_full);
3274 b1b1d783 Jeff Cody
    g_free(backing_file_full);
3275 b1b1d783 Jeff Cody
    g_free(filename_tmp);
3276 b1b1d783 Jeff Cody
    return retval;
3277 e8a6bb9c Marcelo Tosatti
}
3278 e8a6bb9c Marcelo Tosatti
3279 f198fd1c Benoรฎt Canet
int bdrv_get_backing_file_depth(BlockDriverState *bs)
3280 f198fd1c Benoรฎt Canet
{
3281 f198fd1c Benoรฎt Canet
    if (!bs->drv) {
3282 f198fd1c Benoรฎt Canet
        return 0;
3283 f198fd1c Benoรฎt Canet
    }
3284 f198fd1c Benoรฎt Canet
3285 f198fd1c Benoรฎt Canet
    if (!bs->backing_hd) {
3286 f198fd1c Benoรฎt Canet
        return 0;
3287 f198fd1c Benoรฎt Canet
    }
3288 f198fd1c Benoรฎt Canet
3289 f198fd1c Benoรฎt Canet
    return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3290 f198fd1c Benoรฎt Canet
}
3291 f198fd1c Benoรฎt Canet
3292 79fac568 Jeff Cody
BlockDriverState *bdrv_find_base(BlockDriverState *bs)
3293 79fac568 Jeff Cody
{
3294 79fac568 Jeff Cody
    BlockDriverState *curr_bs = NULL;
3295 79fac568 Jeff Cody
3296 79fac568 Jeff Cody
    if (!bs) {
3297 79fac568 Jeff Cody
        return NULL;
3298 79fac568 Jeff Cody
    }
3299 79fac568 Jeff Cody
3300 79fac568 Jeff Cody
    curr_bs = bs;
3301 79fac568 Jeff Cody
3302 79fac568 Jeff Cody
    while (curr_bs->backing_hd) {
3303 79fac568 Jeff Cody
        curr_bs = curr_bs->backing_hd;
3304 79fac568 Jeff Cody
    }
3305 79fac568 Jeff Cody
    return curr_bs;
3306 79fac568 Jeff Cody
}
3307 79fac568 Jeff Cody
3308 faea38e7 bellard
#define NB_SUFFIXES 4
3309 faea38e7 bellard
3310 faea38e7 bellard
char *get_human_readable_size(char *buf, int buf_size, int64_t size)
3311 faea38e7 bellard
{
3312 faea38e7 bellard
    static const char suffixes[NB_SUFFIXES] = "KMGT";
3313 faea38e7 bellard
    int64_t base;
3314 faea38e7 bellard
    int i;
3315 faea38e7 bellard
3316 faea38e7 bellard
    if (size <= 999) {
3317 faea38e7 bellard
        snprintf(buf, buf_size, "%" PRId64, size);
3318 faea38e7 bellard
    } else {
3319 faea38e7 bellard
        base = 1024;
3320 faea38e7 bellard
        for(i = 0; i < NB_SUFFIXES; i++) {
3321 faea38e7 bellard
            if (size < (10 * base)) {
3322 5fafdf24 ths
                snprintf(buf, buf_size, "%0.1f%c",
3323 faea38e7 bellard
                         (double)size / base,
3324 faea38e7 bellard
                         suffixes[i]);
3325 faea38e7 bellard
                break;
3326 faea38e7 bellard
            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
3327 5fafdf24 ths
                snprintf(buf, buf_size, "%" PRId64 "%c",
3328 faea38e7 bellard
                         ((size + (base >> 1)) / base),
3329 faea38e7 bellard
                         suffixes[i]);
3330 faea38e7 bellard
                break;
3331 faea38e7 bellard
            }
3332 faea38e7 bellard
            base = base * 1024;
3333 faea38e7 bellard
        }
3334 faea38e7 bellard
    }
3335 faea38e7 bellard
    return buf;
3336 faea38e7 bellard
}
3337 faea38e7 bellard
3338 faea38e7 bellard
char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
3339 faea38e7 bellard
{
3340 faea38e7 bellard
    char buf1[128], date_buf[128], clock_buf[128];
3341 3b9f94e1 bellard
#ifdef _WIN32
3342 3b9f94e1 bellard
    struct tm *ptm;
3343 3b9f94e1 bellard
#else
3344 faea38e7 bellard
    struct tm tm;
3345 3b9f94e1 bellard
#endif
3346 faea38e7 bellard
    time_t ti;
3347 faea38e7 bellard
    int64_t secs;
3348 faea38e7 bellard
3349 faea38e7 bellard
    if (!sn) {
3350 5fafdf24 ths
        snprintf(buf, buf_size,
3351 5fafdf24 ths
                 "%-10s%-20s%7s%20s%15s",
3352 faea38e7 bellard
                 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
3353 faea38e7 bellard
    } else {
3354 faea38e7 bellard
        ti = sn->date_sec;
3355 3b9f94e1 bellard
#ifdef _WIN32
3356 3b9f94e1 bellard
        ptm = localtime(&ti);
3357 3b9f94e1 bellard
        strftime(date_buf, sizeof(date_buf),
3358 3b9f94e1 bellard
                 "%Y-%m-%d %H:%M:%S", ptm);
3359 3b9f94e1 bellard
#else
3360 faea38e7 bellard
        localtime_r(&ti, &tm);
3361 faea38e7 bellard
        strftime(date_buf, sizeof(date_buf),
3362 faea38e7 bellard
                 "%Y-%m-%d %H:%M:%S", &tm);
3363 3b9f94e1 bellard
#endif
3364 faea38e7 bellard
        secs = sn->vm_clock_nsec / 1000000000;
3365 faea38e7 bellard
        snprintf(clock_buf, sizeof(clock_buf),
3366 faea38e7 bellard
                 "%02d:%02d:%02d.%03d",
3367 faea38e7 bellard
                 (int)(secs / 3600),
3368 faea38e7 bellard
                 (int)((secs / 60) % 60),
3369 5fafdf24 ths
                 (int)(secs % 60),
3370 faea38e7 bellard
                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
3371 faea38e7 bellard
        snprintf(buf, buf_size,
3372 5fafdf24 ths
                 "%-10s%-20s%7s%20s%15s",
3373 faea38e7 bellard
                 sn->id_str, sn->name,
3374 faea38e7 bellard
                 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
3375 faea38e7 bellard
                 date_buf,
3376 faea38e7 bellard
                 clock_buf);
3377 faea38e7 bellard
    }
3378 faea38e7 bellard
    return buf;
3379 faea38e7 bellard
}
3380 faea38e7 bellard
3381 ea2384d3 bellard
/**************************************************************/
3382 83f64091 bellard
/* async I/Os */
3383 ea2384d3 bellard
3384 3b69e4b9 aliguori
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
3385 f141eafe aliguori
                                 QEMUIOVector *qiov, int nb_sectors,
3386 3b69e4b9 aliguori
                                 BlockDriverCompletionFunc *cb, void *opaque)
3387 3b69e4b9 aliguori
{
3388 bbf0a440 Stefan Hajnoczi
    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
3389 bbf0a440 Stefan Hajnoczi
3390 b2a61371 Stefan Hajnoczi
    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
3391 8c5873d6 Stefan Hajnoczi
                                 cb, opaque, false);
3392 ea2384d3 bellard
}
3393 ea2384d3 bellard
3394 f141eafe aliguori
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
3395 f141eafe aliguori
                                  QEMUIOVector *qiov, int nb_sectors,
3396 f141eafe aliguori
                                  BlockDriverCompletionFunc *cb, void *opaque)
3397 ea2384d3 bellard
{
3398 bbf0a440 Stefan Hajnoczi
    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
3399 bbf0a440 Stefan Hajnoczi
3400 1a6e115b Stefan Hajnoczi
    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
3401 8c5873d6 Stefan Hajnoczi
                                 cb, opaque, true);
3402 83f64091 bellard
}
3403 83f64091 bellard
3404 40b4f539 Kevin Wolf
3405 40b4f539 Kevin Wolf
typedef struct MultiwriteCB {
3406 40b4f539 Kevin Wolf
    int error;
3407 40b4f539 Kevin Wolf
    int num_requests;
3408 40b4f539 Kevin Wolf
    int num_callbacks;
3409 40b4f539 Kevin Wolf
    struct {
3410 40b4f539 Kevin Wolf
        BlockDriverCompletionFunc *cb;
3411 40b4f539 Kevin Wolf
        void *opaque;
3412 40b4f539 Kevin Wolf
        QEMUIOVector *free_qiov;
3413 40b4f539 Kevin Wolf
    } callbacks[];
3414 40b4f539 Kevin Wolf
} MultiwriteCB;
3415 40b4f539 Kevin Wolf
3416 40b4f539 Kevin Wolf
static void multiwrite_user_cb(MultiwriteCB *mcb)
3417 40b4f539 Kevin Wolf
{
3418 40b4f539 Kevin Wolf
    int i;
3419 40b4f539 Kevin Wolf
3420 40b4f539 Kevin Wolf
    for (i = 0; i < mcb->num_callbacks; i++) {
3421 40b4f539 Kevin Wolf
        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
3422 1e1ea48d Stefan Hajnoczi
        if (mcb->callbacks[i].free_qiov) {
3423 1e1ea48d Stefan Hajnoczi
            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
3424 1e1ea48d Stefan Hajnoczi
        }
3425 7267c094 Anthony Liguori
        g_free(mcb->callbacks[i].free_qiov);
3426 40b4f539 Kevin Wolf
    }
3427 40b4f539 Kevin Wolf
}
3428 40b4f539 Kevin Wolf
3429 40b4f539 Kevin Wolf
static void multiwrite_cb(void *opaque, int ret)
3430 40b4f539 Kevin Wolf
{
3431 40b4f539 Kevin Wolf
    MultiwriteCB *mcb = opaque;
3432 40b4f539 Kevin Wolf
3433 6d519a5f Stefan Hajnoczi
    trace_multiwrite_cb(mcb, ret);
3434 6d519a5f Stefan Hajnoczi
3435 cb6d3ca0 Kevin Wolf
    if (ret < 0 && !mcb->error) {
3436 40b4f539 Kevin Wolf
        mcb->error = ret;
3437 40b4f539 Kevin Wolf
    }
3438 40b4f539 Kevin Wolf
3439 40b4f539 Kevin Wolf
    mcb->num_requests--;
3440 40b4f539 Kevin Wolf
    if (mcb->num_requests == 0) {
3441 de189a1b Kevin Wolf
        multiwrite_user_cb(mcb);
3442 7267c094 Anthony Liguori
        g_free(mcb);
3443 40b4f539 Kevin Wolf
    }
3444 40b4f539 Kevin Wolf
}
3445 40b4f539 Kevin Wolf
3446 40b4f539 Kevin Wolf
static int multiwrite_req_compare(const void *a, const void *b)
3447 40b4f539 Kevin Wolf
{
3448 77be4366 Christoph Hellwig
    const BlockRequest *req1 = a, *req2 = b;
3449 77be4366 Christoph Hellwig
3450 77be4366 Christoph Hellwig
    /*
3451 77be4366 Christoph Hellwig
     * Note that we can't simply subtract req2->sector from req1->sector
3452 77be4366 Christoph Hellwig
     * here as that could overflow the return value.
3453 77be4366 Christoph Hellwig
     */
3454 77be4366 Christoph Hellwig
    if (req1->sector > req2->sector) {
3455 77be4366 Christoph Hellwig
        return 1;
3456 77be4366 Christoph Hellwig
    } else if (req1->sector < req2->sector) {
3457 77be4366 Christoph Hellwig
        return -1;
3458 77be4366 Christoph Hellwig
    } else {
3459 77be4366 Christoph Hellwig
        return 0;
3460 77be4366 Christoph Hellwig
    }
3461 40b4f539 Kevin Wolf
}
3462 40b4f539 Kevin Wolf
3463 40b4f539 Kevin Wolf
/*
3464 40b4f539 Kevin Wolf
 * Takes a bunch of requests and tries to merge them. Returns the number of
3465 40b4f539 Kevin Wolf
 * requests that remain after merging.
3466 40b4f539 Kevin Wolf
 */
3467 40b4f539 Kevin Wolf
static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3468 40b4f539 Kevin Wolf
    int num_reqs, MultiwriteCB *mcb)
3469 40b4f539 Kevin Wolf
{
3470 40b4f539 Kevin Wolf
    int i, outidx;
3471 40b4f539 Kevin Wolf
3472 40b4f539 Kevin Wolf
    // Sort requests by start sector
3473 40b4f539 Kevin Wolf
    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3474 40b4f539 Kevin Wolf
3475 40b4f539 Kevin Wolf
    // Check if adjacent requests touch the same clusters. If so, combine them,
3476 40b4f539 Kevin Wolf
    // filling up gaps with zero sectors.
3477 40b4f539 Kevin Wolf
    outidx = 0;
3478 40b4f539 Kevin Wolf
    for (i = 1; i < num_reqs; i++) {
3479 40b4f539 Kevin Wolf
        int merge = 0;
3480 40b4f539 Kevin Wolf
        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3481 40b4f539 Kevin Wolf
3482 b6a127a1 Paolo Bonzini
        // Handle exactly sequential writes and overlapping writes.
3483 40b4f539 Kevin Wolf
        if (reqs[i].sector <= oldreq_last) {
3484 40b4f539 Kevin Wolf
            merge = 1;
3485 40b4f539 Kevin Wolf
        }
3486 40b4f539 Kevin Wolf
3487 e2a305fb Christoph Hellwig
        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3488 e2a305fb Christoph Hellwig
            merge = 0;
3489 e2a305fb Christoph Hellwig
        }
3490 e2a305fb Christoph Hellwig
3491 40b4f539 Kevin Wolf
        if (merge) {
3492 40b4f539 Kevin Wolf
            size_t size;
3493 7267c094 Anthony Liguori
            QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
3494 40b4f539 Kevin Wolf
            qemu_iovec_init(qiov,
3495 40b4f539 Kevin Wolf
                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3496 40b4f539 Kevin Wolf
3497 40b4f539 Kevin Wolf
            // Add the first request to the merged one. If the requests are
3498 40b4f539 Kevin Wolf
            // overlapping, drop the last sectors of the first request.
3499 40b4f539 Kevin Wolf
            size = (reqs[i].sector - reqs[outidx].sector) << 9;
3500 1b093c48 Michael Tokarev
            qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
3501 40b4f539 Kevin Wolf
3502 b6a127a1 Paolo Bonzini
            // We should need to add any zeros between the two requests
3503 b6a127a1 Paolo Bonzini
            assert (reqs[i].sector <= oldreq_last);
3504 40b4f539 Kevin Wolf
3505 40b4f539 Kevin Wolf
            // Add the second request
3506 1b093c48 Michael Tokarev
            qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
3507 40b4f539 Kevin Wolf
3508 cbf1dff2 Kevin Wolf
            reqs[outidx].nb_sectors = qiov->size >> 9;
3509 40b4f539 Kevin Wolf
            reqs[outidx].qiov = qiov;
3510 40b4f539 Kevin Wolf
3511 40b4f539 Kevin Wolf
            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3512 40b4f539 Kevin Wolf
        } else {
3513 40b4f539 Kevin Wolf
            outidx++;
3514 40b4f539 Kevin Wolf
            reqs[outidx].sector     = reqs[i].sector;
3515 40b4f539 Kevin Wolf
            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3516 40b4f539 Kevin Wolf
            reqs[outidx].qiov       = reqs[i].qiov;
3517 40b4f539 Kevin Wolf
        }
3518 40b4f539 Kevin Wolf
    }
3519 40b4f539 Kevin Wolf
3520 40b4f539 Kevin Wolf
    return outidx + 1;
3521 40b4f539 Kevin Wolf
}
3522 40b4f539 Kevin Wolf
3523 40b4f539 Kevin Wolf
/*
3524 40b4f539 Kevin Wolf
 * Submit multiple AIO write requests at once.
3525 40b4f539 Kevin Wolf
 *
3526 40b4f539 Kevin Wolf
 * On success, the function returns 0 and all requests in the reqs array have
3527 40b4f539 Kevin Wolf
 * been submitted. In error case this function returns -1, and any of the
3528 40b4f539 Kevin Wolf
 * requests may or may not be submitted yet. In particular, this means that the
3529 40b4f539 Kevin Wolf
 * callback will be called for some of the requests, for others it won't. The
3530 40b4f539 Kevin Wolf
 * caller must check the error field of the BlockRequest to wait for the right
3531 40b4f539 Kevin Wolf
 * callbacks (if error != 0, no callback will be called).
3532 40b4f539 Kevin Wolf
 *
3533 40b4f539 Kevin Wolf
 * The implementation may modify the contents of the reqs array, e.g. to merge
3534 40b4f539 Kevin Wolf
 * requests. However, the fields opaque and error are left unmodified as they
3535 40b4f539 Kevin Wolf
 * are used to signal failure for a single request to the caller.
3536 40b4f539 Kevin Wolf
 */
3537 40b4f539 Kevin Wolf
int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3538 40b4f539 Kevin Wolf
{
3539 40b4f539 Kevin Wolf
    MultiwriteCB *mcb;
3540 40b4f539 Kevin Wolf
    int i;
3541 40b4f539 Kevin Wolf
3542 301db7c2 Ryan Harper
    /* don't submit writes if we don't have a medium */
3543 301db7c2 Ryan Harper
    if (bs->drv == NULL) {
3544 301db7c2 Ryan Harper
        for (i = 0; i < num_reqs; i++) {
3545 301db7c2 Ryan Harper
            reqs[i].error = -ENOMEDIUM;
3546 301db7c2 Ryan Harper
        }
3547 301db7c2 Ryan Harper
        return -1;
3548 301db7c2 Ryan Harper
    }
3549 301db7c2 Ryan Harper
3550 40b4f539 Kevin Wolf
    if (num_reqs == 0) {
3551 40b4f539 Kevin Wolf
        return 0;
3552 40b4f539 Kevin Wolf
    }
3553 40b4f539 Kevin Wolf
3554 40b4f539 Kevin Wolf
    // Create MultiwriteCB structure
3555 7267c094 Anthony Liguori
    mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
3556 40b4f539 Kevin Wolf
    mcb->num_requests = 0;
3557 40b4f539 Kevin Wolf
    mcb->num_callbacks = num_reqs;
3558 40b4f539 Kevin Wolf
3559 40b4f539 Kevin Wolf
    for (i = 0; i < num_reqs; i++) {
3560 40b4f539 Kevin Wolf
        mcb->callbacks[i].cb = reqs[i].cb;
3561 40b4f539 Kevin Wolf
        mcb->callbacks[i].opaque = reqs[i].opaque;
3562 40b4f539 Kevin Wolf
    }
3563 40b4f539 Kevin Wolf
3564 40b4f539 Kevin Wolf
    // Check for mergable requests
3565 40b4f539 Kevin Wolf
    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3566 40b4f539 Kevin Wolf
3567 6d519a5f Stefan Hajnoczi
    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3568 6d519a5f Stefan Hajnoczi
3569 df9309fb Paolo Bonzini
    /* Run the aio requests. */
3570 df9309fb Paolo Bonzini
    mcb->num_requests = num_reqs;
3571 40b4f539 Kevin Wolf
    for (i = 0; i < num_reqs; i++) {
3572 ad54ae80 Paolo Bonzini
        bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
3573 40b4f539 Kevin Wolf
            reqs[i].nb_sectors, multiwrite_cb, mcb);
3574 40b4f539 Kevin Wolf
    }
3575 40b4f539 Kevin Wolf
3576 40b4f539 Kevin Wolf
    return 0;
3577 40b4f539 Kevin Wolf
}
3578 40b4f539 Kevin Wolf
3579 83f64091 bellard
void bdrv_aio_cancel(BlockDriverAIOCB *acb)
3580 83f64091 bellard
{
3581 d7331bed Stefan Hajnoczi
    acb->aiocb_info->cancel(acb);
3582 83f64091 bellard
}
3583 83f64091 bellard
3584 98f90dba Zhi Yong Wu
/* block I/O throttling */
3585 98f90dba Zhi Yong Wu
static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3586 98f90dba Zhi Yong Wu
                 bool is_write, double elapsed_time, uint64_t *wait)
3587 98f90dba Zhi Yong Wu
{
3588 98f90dba Zhi Yong Wu
    uint64_t bps_limit = 0;
3589 98f90dba Zhi Yong Wu
    double   bytes_limit, bytes_base, bytes_res;
3590 98f90dba Zhi Yong Wu
    double   slice_time, wait_time;
3591 98f90dba Zhi Yong Wu
3592 98f90dba Zhi Yong Wu
    if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3593 98f90dba Zhi Yong Wu
        bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3594 98f90dba Zhi Yong Wu
    } else if (bs->io_limits.bps[is_write]) {
3595 98f90dba Zhi Yong Wu
        bps_limit = bs->io_limits.bps[is_write];
3596 98f90dba Zhi Yong Wu
    } else {
3597 98f90dba Zhi Yong Wu
        if (wait) {
3598 98f90dba Zhi Yong Wu
            *wait = 0;
3599 98f90dba Zhi Yong Wu
        }
3600 98f90dba Zhi Yong Wu
3601 98f90dba Zhi Yong Wu
        return false;
3602 98f90dba Zhi Yong Wu
    }
3603 98f90dba Zhi Yong Wu
3604 98f90dba Zhi Yong Wu
    slice_time = bs->slice_end - bs->slice_start;
3605 98f90dba Zhi Yong Wu
    slice_time /= (NANOSECONDS_PER_SECOND);
3606 98f90dba Zhi Yong Wu
    bytes_limit = bps_limit * slice_time;
3607 98f90dba Zhi Yong Wu
    bytes_base  = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3608 98f90dba Zhi Yong Wu
    if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3609 98f90dba Zhi Yong Wu
        bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3610 98f90dba Zhi Yong Wu
    }
3611 98f90dba Zhi Yong Wu
3612 98f90dba Zhi Yong Wu
    /* bytes_base: the bytes of data which have been read/written; and
3613 98f90dba Zhi Yong Wu
     *             it is obtained from the history statistic info.
3614 98f90dba Zhi Yong Wu
     * bytes_res: the remaining bytes of data which need to be read/written.
3615 98f90dba Zhi Yong Wu
     * (bytes_base + bytes_res) / bps_limit: used to calcuate
3616 98f90dba Zhi Yong Wu
     *             the total time for completing reading/writting all data.
3617 98f90dba Zhi Yong Wu
     */
3618 98f90dba Zhi Yong Wu
    bytes_res   = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3619 98f90dba Zhi Yong Wu
3620 98f90dba Zhi Yong Wu
    if (bytes_base + bytes_res <= bytes_limit) {
3621 98f90dba Zhi Yong Wu
        if (wait) {
3622 98f90dba Zhi Yong Wu
            *wait = 0;
3623 98f90dba Zhi Yong Wu
        }
3624 98f90dba Zhi Yong Wu
3625 98f90dba Zhi Yong Wu
        return false;
3626 98f90dba Zhi Yong Wu
    }
3627 98f90dba Zhi Yong Wu
3628 98f90dba Zhi Yong Wu
    /* Calc approx time to dispatch */
3629 98f90dba Zhi Yong Wu
    wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3630 98f90dba Zhi Yong Wu
3631 98f90dba Zhi Yong Wu
    /* When the I/O rate at runtime exceeds the limits,
3632 98f90dba Zhi Yong Wu
     * bs->slice_end need to be extended in order that the current statistic
3633 98f90dba Zhi Yong Wu
     * info can be kept until the timer fire, so it is increased and tuned
3634 98f90dba Zhi Yong Wu
     * based on the result of experiment.
3635 98f90dba Zhi Yong Wu
     */
3636 98f90dba Zhi Yong Wu
    bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3637 98f90dba Zhi Yong Wu
    bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3638 98f90dba Zhi Yong Wu
    if (wait) {
3639 98f90dba Zhi Yong Wu
        *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3640 98f90dba Zhi Yong Wu
    }
3641 98f90dba Zhi Yong Wu
3642 98f90dba Zhi Yong Wu
    return true;
3643 98f90dba Zhi Yong Wu
}
3644 98f90dba Zhi Yong Wu
3645 98f90dba Zhi Yong Wu
static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3646 98f90dba Zhi Yong Wu
                             double elapsed_time, uint64_t *wait)
3647 98f90dba Zhi Yong Wu
{
3648 98f90dba Zhi Yong Wu
    uint64_t iops_limit = 0;
3649 98f90dba Zhi Yong Wu
    double   ios_limit, ios_base;
3650 98f90dba Zhi Yong Wu
    double   slice_time, wait_time;
3651 98f90dba Zhi Yong Wu
3652 98f90dba Zhi Yong Wu
    if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3653 98f90dba Zhi Yong Wu
        iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3654 98f90dba Zhi Yong Wu
    } else if (bs->io_limits.iops[is_write]) {
3655 98f90dba Zhi Yong Wu
        iops_limit = bs->io_limits.iops[is_write];
3656 98f90dba Zhi Yong Wu
    } else {
3657 98f90dba Zhi Yong Wu
        if (wait) {
3658 98f90dba Zhi Yong Wu
            *wait = 0;
3659 98f90dba Zhi Yong Wu
        }
3660 98f90dba Zhi Yong Wu
3661 98f90dba Zhi Yong Wu
        return false;
3662 98f90dba Zhi Yong Wu
    }
3663 98f90dba Zhi Yong Wu
3664 98f90dba Zhi Yong Wu
    slice_time = bs->slice_end - bs->slice_start;
3665 98f90dba Zhi Yong Wu
    slice_time /= (NANOSECONDS_PER_SECOND);
3666 98f90dba Zhi Yong Wu
    ios_limit  = iops_limit * slice_time;
3667 98f90dba Zhi Yong Wu
    ios_base   = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3668 98f90dba Zhi Yong Wu
    if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3669 98f90dba Zhi Yong Wu
        ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3670 98f90dba Zhi Yong Wu
    }
3671 98f90dba Zhi Yong Wu
3672 98f90dba Zhi Yong Wu
    if (ios_base + 1 <= ios_limit) {
3673 98f90dba Zhi Yong Wu
        if (wait) {
3674 98f90dba Zhi Yong Wu
            *wait = 0;
3675 98f90dba Zhi Yong Wu
        }
3676 98f90dba Zhi Yong Wu
3677 98f90dba Zhi Yong Wu
        return false;
3678 98f90dba Zhi Yong Wu
    }
3679 98f90dba Zhi Yong Wu
3680 98f90dba Zhi Yong Wu
    /* Calc approx time to dispatch */
3681 98f90dba Zhi Yong Wu
    wait_time = (ios_base + 1) / iops_limit;
3682 98f90dba Zhi Yong Wu
    if (wait_time > elapsed_time) {
3683 98f90dba Zhi Yong Wu
        wait_time = wait_time - elapsed_time;
3684 98f90dba Zhi Yong Wu
    } else {
3685 98f90dba Zhi Yong Wu
        wait_time = 0;
3686 98f90dba Zhi Yong Wu
    }
3687 98f90dba Zhi Yong Wu
3688 98f90dba Zhi Yong Wu
    bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3689 98f90dba Zhi Yong Wu
    bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3690 98f90dba Zhi Yong Wu
    if (wait) {
3691 98f90dba Zhi Yong Wu
        *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3692 98f90dba Zhi Yong Wu
    }
3693 98f90dba Zhi Yong Wu
3694 98f90dba Zhi Yong Wu
    return true;
3695 98f90dba Zhi Yong Wu
}
3696 98f90dba Zhi Yong Wu
3697 98f90dba Zhi Yong Wu
static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3698 98f90dba Zhi Yong Wu
                           bool is_write, int64_t *wait)
3699 98f90dba Zhi Yong Wu
{
3700 98f90dba Zhi Yong Wu
    int64_t  now, max_wait;
3701 98f90dba Zhi Yong Wu
    uint64_t bps_wait = 0, iops_wait = 0;
3702 98f90dba Zhi Yong Wu
    double   elapsed_time;
3703 98f90dba Zhi Yong Wu
    int      bps_ret, iops_ret;
3704 98f90dba Zhi Yong Wu
3705 98f90dba Zhi Yong Wu
    now = qemu_get_clock_ns(vm_clock);
3706 98f90dba Zhi Yong Wu
    if ((bs->slice_start < now)
3707 98f90dba Zhi Yong Wu
        && (bs->slice_end > now)) {
3708 98f90dba Zhi Yong Wu
        bs->slice_end = now + bs->slice_time;
3709 98f90dba Zhi Yong Wu
    } else {
3710 98f90dba Zhi Yong Wu
        bs->slice_time  =  5 * BLOCK_IO_SLICE_TIME;
3711 98f90dba Zhi Yong Wu
        bs->slice_start = now;
3712 98f90dba Zhi Yong Wu
        bs->slice_end   = now + bs->slice_time;
3713 98f90dba Zhi Yong Wu
3714 98f90dba Zhi Yong Wu
        bs->io_base.bytes[is_write]  = bs->nr_bytes[is_write];
3715 98f90dba Zhi Yong Wu
        bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3716 98f90dba Zhi Yong Wu
3717 98f90dba Zhi Yong Wu
        bs->io_base.ios[is_write]    = bs->nr_ops[is_write];
3718 98f90dba Zhi Yong Wu
        bs->io_base.ios[!is_write]   = bs->nr_ops[!is_write];
3719 98f90dba Zhi Yong Wu
    }
3720 98f90dba Zhi Yong Wu
3721 98f90dba Zhi Yong Wu
    elapsed_time  = now - bs->slice_start;
3722 98f90dba Zhi Yong Wu
    elapsed_time  /= (NANOSECONDS_PER_SECOND);
3723 98f90dba Zhi Yong Wu
3724 98f90dba Zhi Yong Wu
    bps_ret  = bdrv_exceed_bps_limits(bs, nb_sectors,
3725 98f90dba Zhi Yong Wu
                                      is_write, elapsed_time, &bps_wait);
3726 98f90dba Zhi Yong Wu
    iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3727 98f90dba Zhi Yong Wu
                                      elapsed_time, &iops_wait);
3728 98f90dba Zhi Yong Wu
    if (bps_ret || iops_ret) {
3729 98f90dba Zhi Yong Wu
        max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3730 98f90dba Zhi Yong Wu
        if (wait) {
3731 98f90dba Zhi Yong Wu
            *wait = max_wait;
3732 98f90dba Zhi Yong Wu
        }
3733 98f90dba Zhi Yong Wu
3734 98f90dba Zhi Yong Wu
        now = qemu_get_clock_ns(vm_clock);
3735 98f90dba Zhi Yong Wu
        if (bs->slice_end < now + max_wait) {
3736 98f90dba Zhi Yong Wu
            bs->slice_end = now + max_wait;
3737 98f90dba Zhi Yong Wu
        }
3738 98f90dba Zhi Yong Wu
3739 98f90dba Zhi Yong Wu
        return true;
3740 98f90dba Zhi Yong Wu
    }
3741 98f90dba Zhi Yong Wu
3742 98f90dba Zhi Yong Wu
    if (wait) {
3743 98f90dba Zhi Yong Wu
        *wait = 0;
3744 98f90dba Zhi Yong Wu
    }
3745 98f90dba Zhi Yong Wu
3746 98f90dba Zhi Yong Wu
    return false;
3747 98f90dba Zhi Yong Wu
}
3748 ce1a14dc pbrook
3749 83f64091 bellard
/**************************************************************/
3750 83f64091 bellard
/* async block device emulation */
3751 83f64091 bellard
3752 c16b5a2c Christoph Hellwig
typedef struct BlockDriverAIOCBSync {
3753 c16b5a2c Christoph Hellwig
    BlockDriverAIOCB common;
3754 c16b5a2c Christoph Hellwig
    QEMUBH *bh;
3755 c16b5a2c Christoph Hellwig
    int ret;
3756 c16b5a2c Christoph Hellwig
    /* vector translation state */
3757 c16b5a2c Christoph Hellwig
    QEMUIOVector *qiov;
3758 c16b5a2c Christoph Hellwig
    uint8_t *bounce;
3759 c16b5a2c Christoph Hellwig
    int is_write;
3760 c16b5a2c Christoph Hellwig
} BlockDriverAIOCBSync;
3761 c16b5a2c Christoph Hellwig
3762 c16b5a2c Christoph Hellwig
static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3763 c16b5a2c Christoph Hellwig
{
3764 b666d239 Kevin Wolf
    BlockDriverAIOCBSync *acb =
3765 b666d239 Kevin Wolf
        container_of(blockacb, BlockDriverAIOCBSync, common);
3766 6a7ad299 Dor Laor
    qemu_bh_delete(acb->bh);
3767 36afc451 Avi Kivity
    acb->bh = NULL;
3768 c16b5a2c Christoph Hellwig
    qemu_aio_release(acb);
3769 c16b5a2c Christoph Hellwig
}
3770 c16b5a2c Christoph Hellwig
3771 d7331bed Stefan Hajnoczi
static const AIOCBInfo bdrv_em_aiocb_info = {
3772 c16b5a2c Christoph Hellwig
    .aiocb_size         = sizeof(BlockDriverAIOCBSync),
3773 c16b5a2c Christoph Hellwig
    .cancel             = bdrv_aio_cancel_em,
3774 c16b5a2c Christoph Hellwig
};
3775 c16b5a2c Christoph Hellwig
3776 ce1a14dc pbrook
static void bdrv_aio_bh_cb(void *opaque)
3777 83f64091 bellard
{
3778 ce1a14dc pbrook
    BlockDriverAIOCBSync *acb = opaque;
3779 f141eafe aliguori
3780 f141eafe aliguori
    if (!acb->is_write)
3781 03396148 Michael Tokarev
        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
3782 ceb42de8 aliguori
    qemu_vfree(acb->bounce);
3783 ce1a14dc pbrook
    acb->common.cb(acb->common.opaque, acb->ret);
3784 6a7ad299 Dor Laor
    qemu_bh_delete(acb->bh);
3785 36afc451 Avi Kivity
    acb->bh = NULL;
3786 ce1a14dc pbrook
    qemu_aio_release(acb);
3787 83f64091 bellard
}
3788 beac80cd bellard
3789 f141eafe aliguori
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3790 f141eafe aliguori
                                            int64_t sector_num,
3791 f141eafe aliguori
                                            QEMUIOVector *qiov,
3792 f141eafe aliguori
                                            int nb_sectors,
3793 f141eafe aliguori
                                            BlockDriverCompletionFunc *cb,
3794 f141eafe aliguori
                                            void *opaque,
3795 f141eafe aliguori
                                            int is_write)
3796 f141eafe aliguori
3797 83f64091 bellard
{
3798 ce1a14dc pbrook
    BlockDriverAIOCBSync *acb;
3799 ce1a14dc pbrook
3800 d7331bed Stefan Hajnoczi
    acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
3801 f141eafe aliguori
    acb->is_write = is_write;
3802 f141eafe aliguori
    acb->qiov = qiov;
3803 e268ca52 aliguori
    acb->bounce = qemu_blockalign(bs, qiov->size);
3804 3f3aace8 Paolo Bonzini
    acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
3805 f141eafe aliguori
3806 f141eafe aliguori
    if (is_write) {
3807 d5e6b161 Michael Tokarev
        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
3808 1ed20acf Stefan Hajnoczi
        acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
3809 f141eafe aliguori
    } else {
3810 1ed20acf Stefan Hajnoczi
        acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
3811 f141eafe aliguori
    }
3812 f141eafe aliguori
3813 ce1a14dc pbrook
    qemu_bh_schedule(acb->bh);
3814 f141eafe aliguori
3815 ce1a14dc pbrook
    return &acb->common;
3816 beac80cd bellard
}
3817 beac80cd bellard
3818 f141eafe aliguori
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3819 f141eafe aliguori
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3820 ce1a14dc pbrook
        BlockDriverCompletionFunc *cb, void *opaque)
3821 beac80cd bellard
{
3822 f141eafe aliguori
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3823 f141eafe aliguori
}
3824 83f64091 bellard
3825 f141eafe aliguori
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3826 f141eafe aliguori
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3827 f141eafe aliguori
        BlockDriverCompletionFunc *cb, void *opaque)
3828 f141eafe aliguori
{
3829 f141eafe aliguori
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3830 beac80cd bellard
}
3831 beac80cd bellard
3832 68485420 Kevin Wolf
3833 68485420 Kevin Wolf
typedef struct BlockDriverAIOCBCoroutine {
3834 68485420 Kevin Wolf
    BlockDriverAIOCB common;
3835 68485420 Kevin Wolf
    BlockRequest req;
3836 68485420 Kevin Wolf
    bool is_write;
3837 d318aea9 Kevin Wolf
    bool *done;
3838 68485420 Kevin Wolf
    QEMUBH* bh;
3839 68485420 Kevin Wolf
} BlockDriverAIOCBCoroutine;
3840 68485420 Kevin Wolf
3841 68485420 Kevin Wolf
static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3842 68485420 Kevin Wolf
{
3843 d318aea9 Kevin Wolf
    BlockDriverAIOCBCoroutine *acb =
3844 d318aea9 Kevin Wolf
        container_of(blockacb, BlockDriverAIOCBCoroutine, common);
3845 d318aea9 Kevin Wolf
    bool done = false;
3846 d318aea9 Kevin Wolf
3847 d318aea9 Kevin Wolf
    acb->done = &done;
3848 d318aea9 Kevin Wolf
    while (!done) {
3849 d318aea9 Kevin Wolf
        qemu_aio_wait();
3850 d318aea9 Kevin Wolf
    }
3851 68485420 Kevin Wolf
}
3852 68485420 Kevin Wolf
3853 d7331bed Stefan Hajnoczi
static const AIOCBInfo bdrv_em_co_aiocb_info = {
3854 68485420 Kevin Wolf
    .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
3855 68485420 Kevin Wolf
    .cancel             = bdrv_aio_co_cancel_em,
3856 68485420 Kevin Wolf
};
3857 68485420 Kevin Wolf
3858 35246a68 Paolo Bonzini
static void bdrv_co_em_bh(void *opaque)
3859 68485420 Kevin Wolf
{
3860 68485420 Kevin Wolf
    BlockDriverAIOCBCoroutine *acb = opaque;
3861 68485420 Kevin Wolf
3862 68485420 Kevin Wolf
    acb->common.cb(acb->common.opaque, acb->req.error);
3863 d318aea9 Kevin Wolf
3864 d318aea9 Kevin Wolf
    if (acb->done) {
3865 d318aea9 Kevin Wolf
        *acb->done = true;
3866 d318aea9 Kevin Wolf
    }
3867 d318aea9 Kevin Wolf
3868 68485420 Kevin Wolf
    qemu_bh_delete(acb->bh);
3869 68485420 Kevin Wolf
    qemu_aio_release(acb);
3870 68485420 Kevin Wolf
}
3871 68485420 Kevin Wolf
3872 b2a61371 Stefan Hajnoczi
/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3873 b2a61371 Stefan Hajnoczi
static void coroutine_fn bdrv_co_do_rw(void *opaque)
3874 b2a61371 Stefan Hajnoczi
{
3875 b2a61371 Stefan Hajnoczi
    BlockDriverAIOCBCoroutine *acb = opaque;
3876 b2a61371 Stefan Hajnoczi
    BlockDriverState *bs = acb->common.bs;
3877 b2a61371 Stefan Hajnoczi
3878 b2a61371 Stefan Hajnoczi
    if (!acb->is_write) {
3879 b2a61371 Stefan Hajnoczi
        acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3880 470c0504 Stefan Hajnoczi
            acb->req.nb_sectors, acb->req.qiov, 0);
3881 b2a61371 Stefan Hajnoczi
    } else {
3882 b2a61371 Stefan Hajnoczi
        acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3883 f08f2dda Stefan Hajnoczi
            acb->req.nb_sectors, acb->req.qiov, 0);
3884 b2a61371 Stefan Hajnoczi
    }
3885 b2a61371 Stefan Hajnoczi
3886 35246a68 Paolo Bonzini
    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3887 b2a61371 Stefan Hajnoczi
    qemu_bh_schedule(acb->bh);
3888 b2a61371 Stefan Hajnoczi
}
3889 b2a61371 Stefan Hajnoczi
3890 68485420 Kevin Wolf
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3891 68485420 Kevin Wolf
                                               int64_t sector_num,
3892 68485420 Kevin Wolf
                                               QEMUIOVector *qiov,
3893 68485420 Kevin Wolf
                                               int nb_sectors,
3894 68485420 Kevin Wolf
                                               BlockDriverCompletionFunc *cb,
3895 68485420 Kevin Wolf
                                               void *opaque,
3896 8c5873d6 Stefan Hajnoczi
                                               bool is_write)
3897 68485420 Kevin Wolf
{
3898 68485420 Kevin Wolf
    Coroutine *co;
3899 68485420 Kevin Wolf
    BlockDriverAIOCBCoroutine *acb;
3900 68485420 Kevin Wolf
3901 d7331bed Stefan Hajnoczi
    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
3902 68485420 Kevin Wolf
    acb->req.sector = sector_num;
3903 68485420 Kevin Wolf
    acb->req.nb_sectors = nb_sectors;
3904 68485420 Kevin Wolf
    acb->req.qiov = qiov;
3905 68485420 Kevin Wolf
    acb->is_write = is_write;
3906 d318aea9 Kevin Wolf
    acb->done = NULL;
3907 68485420 Kevin Wolf
3908 8c5873d6 Stefan Hajnoczi
    co = qemu_coroutine_create(bdrv_co_do_rw);
3909 68485420 Kevin Wolf
    qemu_coroutine_enter(co, acb);
3910 68485420 Kevin Wolf
3911 68485420 Kevin Wolf
    return &acb->common;
3912 68485420 Kevin Wolf
}
3913 68485420 Kevin Wolf
3914 07f07615 Paolo Bonzini
static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
3915 b2e12bc6 Christoph Hellwig
{
3916 07f07615 Paolo Bonzini
    BlockDriverAIOCBCoroutine *acb = opaque;
3917 07f07615 Paolo Bonzini
    BlockDriverState *bs = acb->common.bs;
3918 b2e12bc6 Christoph Hellwig
3919 07f07615 Paolo Bonzini
    acb->req.error = bdrv_co_flush(bs);
3920 07f07615 Paolo Bonzini
    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3921 b2e12bc6 Christoph Hellwig
    qemu_bh_schedule(acb->bh);
3922 b2e12bc6 Christoph Hellwig
}
3923 b2e12bc6 Christoph Hellwig
3924 07f07615 Paolo Bonzini
BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
3925 016f5cf6 Alexander Graf
        BlockDriverCompletionFunc *cb, void *opaque)
3926 016f5cf6 Alexander Graf
{
3927 07f07615 Paolo Bonzini
    trace_bdrv_aio_flush(bs, opaque);
3928 016f5cf6 Alexander Graf
3929 07f07615 Paolo Bonzini
    Coroutine *co;
3930 07f07615 Paolo Bonzini
    BlockDriverAIOCBCoroutine *acb;
3931 016f5cf6 Alexander Graf
3932 d7331bed Stefan Hajnoczi
    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
3933 d318aea9 Kevin Wolf
    acb->done = NULL;
3934 d318aea9 Kevin Wolf
3935 07f07615 Paolo Bonzini
    co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3936 07f07615 Paolo Bonzini
    qemu_coroutine_enter(co, acb);
3937 016f5cf6 Alexander Graf
3938 016f5cf6 Alexander Graf
    return &acb->common;
3939 016f5cf6 Alexander Graf
}
3940 016f5cf6 Alexander Graf
3941 4265d620 Paolo Bonzini
static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3942 4265d620 Paolo Bonzini
{
3943 4265d620 Paolo Bonzini
    BlockDriverAIOCBCoroutine *acb = opaque;
3944 4265d620 Paolo Bonzini
    BlockDriverState *bs = acb->common.bs;
3945 4265d620 Paolo Bonzini
3946 4265d620 Paolo Bonzini
    acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3947 4265d620 Paolo Bonzini
    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3948 4265d620 Paolo Bonzini
    qemu_bh_schedule(acb->bh);
3949 4265d620 Paolo Bonzini
}
3950 4265d620 Paolo Bonzini
3951 4265d620 Paolo Bonzini
BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3952 4265d620 Paolo Bonzini
        int64_t sector_num, int nb_sectors,
3953 4265d620 Paolo Bonzini
        BlockDriverCompletionFunc *cb, void *opaque)
3954 4265d620 Paolo Bonzini
{
3955 4265d620 Paolo Bonzini
    Coroutine *co;
3956 4265d620 Paolo Bonzini
    BlockDriverAIOCBCoroutine *acb;
3957 4265d620 Paolo Bonzini
3958 4265d620 Paolo Bonzini
    trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3959 4265d620 Paolo Bonzini
3960 d7331bed Stefan Hajnoczi
    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
3961 4265d620 Paolo Bonzini
    acb->req.sector = sector_num;
3962 4265d620 Paolo Bonzini
    acb->req.nb_sectors = nb_sectors;
3963 d318aea9 Kevin Wolf
    acb->done = NULL;
3964 4265d620 Paolo Bonzini
    co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3965 4265d620 Paolo Bonzini
    qemu_coroutine_enter(co, acb);
3966 4265d620 Paolo Bonzini
3967 4265d620 Paolo Bonzini
    return &acb->common;
3968 4265d620 Paolo Bonzini
}
3969 4265d620 Paolo Bonzini
3970 ea2384d3 bellard
void bdrv_init(void)
3971 ea2384d3 bellard
{
3972 5efa9d5a Anthony Liguori
    module_call_init(MODULE_INIT_BLOCK);
3973 ea2384d3 bellard
}
3974 ce1a14dc pbrook
3975 eb852011 Markus Armbruster
void bdrv_init_with_whitelist(void)
3976 eb852011 Markus Armbruster
{
3977 eb852011 Markus Armbruster
    use_bdrv_whitelist = 1;
3978 eb852011 Markus Armbruster
    bdrv_init();
3979 eb852011 Markus Armbruster
}
3980 eb852011 Markus Armbruster
3981 d7331bed Stefan Hajnoczi
void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
3982 c16b5a2c Christoph Hellwig
                   BlockDriverCompletionFunc *cb, void *opaque)
3983 ce1a14dc pbrook
{
3984 ce1a14dc pbrook
    BlockDriverAIOCB *acb;
3985 ce1a14dc pbrook
3986 d7331bed Stefan Hajnoczi
    acb = g_slice_alloc(aiocb_info->aiocb_size);
3987 d7331bed Stefan Hajnoczi
    acb->aiocb_info = aiocb_info;
3988 ce1a14dc pbrook
    acb->bs = bs;
3989 ce1a14dc pbrook
    acb->cb = cb;
3990 ce1a14dc pbrook
    acb->opaque = opaque;
3991 ce1a14dc pbrook
    return acb;
3992 ce1a14dc pbrook
}
3993 ce1a14dc pbrook
3994 ce1a14dc pbrook
void qemu_aio_release(void *p)
3995 ce1a14dc pbrook
{
3996 d37c975f Stefan Hajnoczi
    BlockDriverAIOCB *acb = p;
3997 d7331bed Stefan Hajnoczi
    g_slice_free1(acb->aiocb_info->aiocb_size, acb);
3998 ce1a14dc pbrook
}
3999 19cb3738 bellard
4000 19cb3738 bellard
/**************************************************************/
4001 f9f05dc5 Kevin Wolf
/* Coroutine block device emulation */
4002 f9f05dc5 Kevin Wolf
4003 f9f05dc5 Kevin Wolf
typedef struct CoroutineIOCompletion {
4004 f9f05dc5 Kevin Wolf
    Coroutine *coroutine;
4005 f9f05dc5 Kevin Wolf
    int ret;
4006 f9f05dc5 Kevin Wolf
} CoroutineIOCompletion;
4007 f9f05dc5 Kevin Wolf
4008 f9f05dc5 Kevin Wolf
static void bdrv_co_io_em_complete(void *opaque, int ret)
4009 f9f05dc5 Kevin Wolf
{
4010 f9f05dc5 Kevin Wolf
    CoroutineIOCompletion *co = opaque;
4011 f9f05dc5 Kevin Wolf
4012 f9f05dc5 Kevin Wolf
    co->ret = ret;
4013 f9f05dc5 Kevin Wolf
    qemu_coroutine_enter(co->coroutine, NULL);
4014 f9f05dc5 Kevin Wolf
}
4015 f9f05dc5 Kevin Wolf
4016 f9f05dc5 Kevin Wolf
static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4017 f9f05dc5 Kevin Wolf
                                      int nb_sectors, QEMUIOVector *iov,
4018 f9f05dc5 Kevin Wolf
                                      bool is_write)
4019 f9f05dc5 Kevin Wolf
{
4020 f9f05dc5 Kevin Wolf
    CoroutineIOCompletion co = {
4021 f9f05dc5 Kevin Wolf
        .coroutine = qemu_coroutine_self(),
4022 f9f05dc5 Kevin Wolf
    };
4023 f9f05dc5 Kevin Wolf
    BlockDriverAIOCB *acb;
4024 f9f05dc5 Kevin Wolf
4025 f9f05dc5 Kevin Wolf
    if (is_write) {
4026 a652d160 Stefan Hajnoczi
        acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4027 a652d160 Stefan Hajnoczi
                                       bdrv_co_io_em_complete, &co);
4028 f9f05dc5 Kevin Wolf
    } else {
4029 a652d160 Stefan Hajnoczi
        acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4030 a652d160 Stefan Hajnoczi
                                      bdrv_co_io_em_complete, &co);
4031 f9f05dc5 Kevin Wolf
    }
4032 f9f05dc5 Kevin Wolf
4033 59370aaa Stefan Hajnoczi
    trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
4034 f9f05dc5 Kevin Wolf
    if (!acb) {
4035 f9f05dc5 Kevin Wolf
        return -EIO;
4036 f9f05dc5 Kevin Wolf
    }
4037 f9f05dc5 Kevin Wolf
    qemu_coroutine_yield();
4038 f9f05dc5 Kevin Wolf
4039 f9f05dc5 Kevin Wolf
    return co.ret;
4040 f9f05dc5 Kevin Wolf
}
4041 f9f05dc5 Kevin Wolf
4042 f9f05dc5 Kevin Wolf
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4043 f9f05dc5 Kevin Wolf
                                         int64_t sector_num, int nb_sectors,
4044 f9f05dc5 Kevin Wolf
                                         QEMUIOVector *iov)
4045 f9f05dc5 Kevin Wolf
{
4046 f9f05dc5 Kevin Wolf
    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4047 f9f05dc5 Kevin Wolf
}
4048 f9f05dc5 Kevin Wolf
4049 f9f05dc5 Kevin Wolf
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4050 f9f05dc5 Kevin Wolf
                                         int64_t sector_num, int nb_sectors,
4051 f9f05dc5 Kevin Wolf
                                         QEMUIOVector *iov)
4052 f9f05dc5 Kevin Wolf
{
4053 f9f05dc5 Kevin Wolf
    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4054 f9f05dc5 Kevin Wolf
}
4055 f9f05dc5 Kevin Wolf
4056 07f07615 Paolo Bonzini
static void coroutine_fn bdrv_flush_co_entry(void *opaque)
4057 e7a8a783 Kevin Wolf
{
4058 07f07615 Paolo Bonzini
    RwCo *rwco = opaque;
4059 07f07615 Paolo Bonzini
4060 07f07615 Paolo Bonzini
    rwco->ret = bdrv_co_flush(rwco->bs);
4061 07f07615 Paolo Bonzini
}
4062 07f07615 Paolo Bonzini
4063 07f07615 Paolo Bonzini
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4064 07f07615 Paolo Bonzini
{
4065 eb489bb1 Kevin Wolf
    int ret;
4066 eb489bb1 Kevin Wolf
4067 29cdb251 Paolo Bonzini
    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
4068 07f07615 Paolo Bonzini
        return 0;
4069 eb489bb1 Kevin Wolf
    }
4070 eb489bb1 Kevin Wolf
4071 ca716364 Kevin Wolf
    /* Write back cached data to the OS even with cache=unsafe */
4072 eb489bb1 Kevin Wolf
    if (bs->drv->bdrv_co_flush_to_os) {
4073 eb489bb1 Kevin Wolf
        ret = bs->drv->bdrv_co_flush_to_os(bs);
4074 eb489bb1 Kevin Wolf
        if (ret < 0) {
4075 eb489bb1 Kevin Wolf
            return ret;
4076 eb489bb1 Kevin Wolf
        }
4077 eb489bb1 Kevin Wolf
    }
4078 eb489bb1 Kevin Wolf
4079 ca716364 Kevin Wolf
    /* But don't actually force it to the disk with cache=unsafe */
4080 ca716364 Kevin Wolf
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
4081 d4c82329 Kevin Wolf
        goto flush_parent;
4082 ca716364 Kevin Wolf
    }
4083 ca716364 Kevin Wolf
4084 eb489bb1 Kevin Wolf
    if (bs->drv->bdrv_co_flush_to_disk) {
4085 29cdb251 Paolo Bonzini
        ret = bs->drv->bdrv_co_flush_to_disk(bs);
4086 07f07615 Paolo Bonzini
    } else if (bs->drv->bdrv_aio_flush) {
4087 07f07615 Paolo Bonzini
        BlockDriverAIOCB *acb;
4088 07f07615 Paolo Bonzini
        CoroutineIOCompletion co = {
4089 07f07615 Paolo Bonzini
            .coroutine = qemu_coroutine_self(),
4090 07f07615 Paolo Bonzini
        };
4091 07f07615 Paolo Bonzini
4092 07f07615 Paolo Bonzini
        acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4093 07f07615 Paolo Bonzini
        if (acb == NULL) {
4094 29cdb251 Paolo Bonzini
            ret = -EIO;
4095 07f07615 Paolo Bonzini
        } else {
4096 07f07615 Paolo Bonzini
            qemu_coroutine_yield();
4097 29cdb251 Paolo Bonzini
            ret = co.ret;
4098 07f07615 Paolo Bonzini
        }
4099 07f07615 Paolo Bonzini
    } else {
4100 07f07615 Paolo Bonzini
        /*
4101 07f07615 Paolo Bonzini
         * Some block drivers always operate in either writethrough or unsafe
4102 07f07615 Paolo Bonzini
         * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4103 07f07615 Paolo Bonzini
         * know how the server works (because the behaviour is hardcoded or
4104 07f07615 Paolo Bonzini
         * depends on server-side configuration), so we can't ensure that
4105 07f07615 Paolo Bonzini
         * everything is safe on disk. Returning an error doesn't work because
4106 07f07615 Paolo Bonzini
         * that would break guests even if the server operates in writethrough
4107 07f07615 Paolo Bonzini
         * mode.
4108 07f07615 Paolo Bonzini
         *
4109 07f07615 Paolo Bonzini
         * Let's hope the user knows what he's doing.
4110 07f07615 Paolo Bonzini
         */
4111 29cdb251 Paolo Bonzini
        ret = 0;
4112 07f07615 Paolo Bonzini
    }
4113 29cdb251 Paolo Bonzini
    if (ret < 0) {
4114 29cdb251 Paolo Bonzini
        return ret;
4115 29cdb251 Paolo Bonzini
    }
4116 29cdb251 Paolo Bonzini
4117 29cdb251 Paolo Bonzini
    /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
4118 29cdb251 Paolo Bonzini
     * in the case of cache=unsafe, so there are no useless flushes.
4119 29cdb251 Paolo Bonzini
     */
4120 d4c82329 Kevin Wolf
flush_parent:
4121 29cdb251 Paolo Bonzini
    return bdrv_co_flush(bs->file);
4122 07f07615 Paolo Bonzini
}
4123 07f07615 Paolo Bonzini
4124 0f15423c Anthony Liguori
void bdrv_invalidate_cache(BlockDriverState *bs)
4125 0f15423c Anthony Liguori
{
4126 0f15423c Anthony Liguori
    if (bs->drv && bs->drv->bdrv_invalidate_cache) {
4127 0f15423c Anthony Liguori
        bs->drv->bdrv_invalidate_cache(bs);
4128 0f15423c Anthony Liguori
    }
4129 0f15423c Anthony Liguori
}
4130 0f15423c Anthony Liguori
4131 0f15423c Anthony Liguori
void bdrv_invalidate_cache_all(void)
4132 0f15423c Anthony Liguori
{
4133 0f15423c Anthony Liguori
    BlockDriverState *bs;
4134 0f15423c Anthony Liguori
4135 0f15423c Anthony Liguori
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
4136 0f15423c Anthony Liguori
        bdrv_invalidate_cache(bs);
4137 0f15423c Anthony Liguori
    }
4138 0f15423c Anthony Liguori
}
4139 0f15423c Anthony Liguori
4140 07789269 Benoรฎt Canet
void bdrv_clear_incoming_migration_all(void)
4141 07789269 Benoรฎt Canet
{
4142 07789269 Benoรฎt Canet
    BlockDriverState *bs;
4143 07789269 Benoรฎt Canet
4144 07789269 Benoรฎt Canet
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
4145 07789269 Benoรฎt Canet
        bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
4146 07789269 Benoรฎt Canet
    }
4147 07789269 Benoรฎt Canet
}
4148 07789269 Benoรฎt Canet
4149 07f07615 Paolo Bonzini
int bdrv_flush(BlockDriverState *bs)
4150 07f07615 Paolo Bonzini
{
4151 07f07615 Paolo Bonzini
    Coroutine *co;
4152 07f07615 Paolo Bonzini
    RwCo rwco = {
4153 07f07615 Paolo Bonzini
        .bs = bs,
4154 07f07615 Paolo Bonzini
        .ret = NOT_DONE,
4155 e7a8a783 Kevin Wolf
    };
4156 e7a8a783 Kevin Wolf
4157 07f07615 Paolo Bonzini
    if (qemu_in_coroutine()) {
4158 07f07615 Paolo Bonzini
        /* Fast-path if already in coroutine context */
4159 07f07615 Paolo Bonzini
        bdrv_flush_co_entry(&rwco);
4160 07f07615 Paolo Bonzini
    } else {
4161 07f07615 Paolo Bonzini
        co = qemu_coroutine_create(bdrv_flush_co_entry);
4162 07f07615 Paolo Bonzini
        qemu_coroutine_enter(co, &rwco);
4163 07f07615 Paolo Bonzini
        while (rwco.ret == NOT_DONE) {
4164 07f07615 Paolo Bonzini
            qemu_aio_wait();
4165 07f07615 Paolo Bonzini
        }
4166 e7a8a783 Kevin Wolf
    }
4167 07f07615 Paolo Bonzini
4168 07f07615 Paolo Bonzini
    return rwco.ret;
4169 e7a8a783 Kevin Wolf
}
4170 e7a8a783 Kevin Wolf
4171 4265d620 Paolo Bonzini
static void coroutine_fn bdrv_discard_co_entry(void *opaque)
4172 4265d620 Paolo Bonzini
{
4173 4265d620 Paolo Bonzini
    RwCo *rwco = opaque;
4174 4265d620 Paolo Bonzini
4175 4265d620 Paolo Bonzini
    rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
4176 4265d620 Paolo Bonzini
}
4177 4265d620 Paolo Bonzini
4178 4265d620 Paolo Bonzini
int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
4179 4265d620 Paolo Bonzini
                                 int nb_sectors)
4180 4265d620 Paolo Bonzini
{
4181 4265d620 Paolo Bonzini
    if (!bs->drv) {
4182 4265d620 Paolo Bonzini
        return -ENOMEDIUM;
4183 4265d620 Paolo Bonzini
    } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
4184 4265d620 Paolo Bonzini
        return -EIO;
4185 4265d620 Paolo Bonzini
    } else if (bs->read_only) {
4186 4265d620 Paolo Bonzini
        return -EROFS;
4187 4265d620 Paolo Bonzini
    } else if (bs->drv->bdrv_co_discard) {
4188 4265d620 Paolo Bonzini
        return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
4189 4265d620 Paolo Bonzini
    } else if (bs->drv->bdrv_aio_discard) {
4190 4265d620 Paolo Bonzini
        BlockDriverAIOCB *acb;
4191 4265d620 Paolo Bonzini
        CoroutineIOCompletion co = {
4192 4265d620 Paolo Bonzini
            .coroutine = qemu_coroutine_self(),
4193 4265d620 Paolo Bonzini
        };
4194 4265d620 Paolo Bonzini
4195 4265d620 Paolo Bonzini
        acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
4196 4265d620 Paolo Bonzini
                                        bdrv_co_io_em_complete, &co);
4197 4265d620 Paolo Bonzini
        if (acb == NULL) {
4198 4265d620 Paolo Bonzini
            return -EIO;
4199 4265d620 Paolo Bonzini
        } else {
4200 4265d620 Paolo Bonzini
            qemu_coroutine_yield();
4201 4265d620 Paolo Bonzini
            return co.ret;
4202 4265d620 Paolo Bonzini
        }
4203 4265d620 Paolo Bonzini
    } else {
4204 4265d620 Paolo Bonzini
        return 0;
4205 4265d620 Paolo Bonzini
    }
4206 4265d620 Paolo Bonzini
}
4207 4265d620 Paolo Bonzini
4208 4265d620 Paolo Bonzini
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
4209 4265d620 Paolo Bonzini
{
4210 4265d620 Paolo Bonzini
    Coroutine *co;
4211 4265d620 Paolo Bonzini
    RwCo rwco = {
4212 4265d620 Paolo Bonzini
        .bs = bs,
4213 4265d620 Paolo Bonzini
        .sector_num = sector_num,
4214 4265d620 Paolo Bonzini
        .nb_sectors = nb_sectors,
4215 4265d620 Paolo Bonzini
        .ret = NOT_DONE,
4216 4265d620 Paolo Bonzini
    };
4217 4265d620 Paolo Bonzini
4218 4265d620 Paolo Bonzini
    if (qemu_in_coroutine()) {
4219 4265d620 Paolo Bonzini
        /* Fast-path if already in coroutine context */
4220 4265d620 Paolo Bonzini
        bdrv_discard_co_entry(&rwco);
4221 4265d620 Paolo Bonzini
    } else {
4222 4265d620 Paolo Bonzini
        co = qemu_coroutine_create(bdrv_discard_co_entry);
4223 4265d620 Paolo Bonzini
        qemu_coroutine_enter(co, &rwco);
4224 4265d620 Paolo Bonzini
        while (rwco.ret == NOT_DONE) {
4225 4265d620 Paolo Bonzini
            qemu_aio_wait();
4226 4265d620 Paolo Bonzini
        }
4227 4265d620 Paolo Bonzini
    }
4228 4265d620 Paolo Bonzini
4229 4265d620 Paolo Bonzini
    return rwco.ret;
4230 4265d620 Paolo Bonzini
}
4231 4265d620 Paolo Bonzini
4232 f9f05dc5 Kevin Wolf
/**************************************************************/
4233 19cb3738 bellard
/* removable device support */
4234 19cb3738 bellard
4235 19cb3738 bellard
/**
4236 19cb3738 bellard
 * Return TRUE if the media is present
4237 19cb3738 bellard
 */
4238 19cb3738 bellard
int bdrv_is_inserted(BlockDriverState *bs)
4239 19cb3738 bellard
{
4240 19cb3738 bellard
    BlockDriver *drv = bs->drv;
4241 a1aff5bf Markus Armbruster
4242 19cb3738 bellard
    if (!drv)
4243 19cb3738 bellard
        return 0;
4244 19cb3738 bellard
    if (!drv->bdrv_is_inserted)
4245 a1aff5bf Markus Armbruster
        return 1;
4246 a1aff5bf Markus Armbruster
    return drv->bdrv_is_inserted(bs);
4247 19cb3738 bellard
}
4248 19cb3738 bellard
4249 19cb3738 bellard
/**
4250 8e49ca46 Markus Armbruster
 * Return whether the media changed since the last call to this
4251 8e49ca46 Markus Armbruster
 * function, or -ENOTSUP if we don't know.  Most drivers don't know.
4252 19cb3738 bellard
 */
4253 19cb3738 bellard
int bdrv_media_changed(BlockDriverState *bs)
4254 19cb3738 bellard
{
4255 19cb3738 bellard
    BlockDriver *drv = bs->drv;
4256 19cb3738 bellard
4257 8e49ca46 Markus Armbruster
    if (drv && drv->bdrv_media_changed) {
4258 8e49ca46 Markus Armbruster
        return drv->bdrv_media_changed(bs);
4259 8e49ca46 Markus Armbruster
    }
4260 8e49ca46 Markus Armbruster
    return -ENOTSUP;
4261 19cb3738 bellard
}
4262 19cb3738 bellard
4263 19cb3738 bellard
/**
4264 19cb3738 bellard
 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
4265 19cb3738 bellard
 */
4266 f36f3949 Luiz Capitulino
void bdrv_eject(BlockDriverState *bs, bool eject_flag)
4267 19cb3738 bellard
{
4268 19cb3738 bellard
    BlockDriver *drv = bs->drv;
4269 19cb3738 bellard
4270 822e1cd1 Markus Armbruster
    if (drv && drv->bdrv_eject) {
4271 822e1cd1 Markus Armbruster
        drv->bdrv_eject(bs, eject_flag);
4272 19cb3738 bellard
    }
4273 6f382ed2 Luiz Capitulino
4274 6f382ed2 Luiz Capitulino
    if (bs->device_name[0] != '\0') {
4275 6f382ed2 Luiz Capitulino
        bdrv_emit_qmp_eject_event(bs, eject_flag);
4276 6f382ed2 Luiz Capitulino
    }
4277 19cb3738 bellard
}
4278 19cb3738 bellard
4279 19cb3738 bellard
/**
4280 19cb3738 bellard
 * Lock or unlock the media (if it is locked, the user won't be able
4281 19cb3738 bellard
 * to eject it manually).
4282 19cb3738 bellard
 */
4283 025e849a Markus Armbruster
void bdrv_lock_medium(BlockDriverState *bs, bool locked)
4284 19cb3738 bellard
{
4285 19cb3738 bellard
    BlockDriver *drv = bs->drv;
4286 19cb3738 bellard
4287 025e849a Markus Armbruster
    trace_bdrv_lock_medium(bs, locked);
4288 b8c6d095 Stefan Hajnoczi
4289 025e849a Markus Armbruster
    if (drv && drv->bdrv_lock_medium) {
4290 025e849a Markus Armbruster
        drv->bdrv_lock_medium(bs, locked);
4291 19cb3738 bellard
    }
4292 19cb3738 bellard
}
4293 985a03b0 ths
4294 985a03b0 ths
/* needed for generic scsi interface */
4295 985a03b0 ths
4296 985a03b0 ths
int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
4297 985a03b0 ths
{
4298 985a03b0 ths
    BlockDriver *drv = bs->drv;
4299 985a03b0 ths
4300 985a03b0 ths
    if (drv && drv->bdrv_ioctl)
4301 985a03b0 ths
        return drv->bdrv_ioctl(bs, req, buf);
4302 985a03b0 ths
    return -ENOTSUP;
4303 985a03b0 ths
}
4304 7d780669 aliguori
4305 221f715d aliguori
BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
4306 221f715d aliguori
        unsigned long int req, void *buf,
4307 221f715d aliguori
        BlockDriverCompletionFunc *cb, void *opaque)
4308 7d780669 aliguori
{
4309 221f715d aliguori
    BlockDriver *drv = bs->drv;
4310 7d780669 aliguori
4311 221f715d aliguori
    if (drv && drv->bdrv_aio_ioctl)
4312 221f715d aliguori
        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
4313 221f715d aliguori
    return NULL;
4314 7d780669 aliguori
}
4315 e268ca52 aliguori
4316 7b6f9300 Markus Armbruster
void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
4317 7b6f9300 Markus Armbruster
{
4318 7b6f9300 Markus Armbruster
    bs->buffer_alignment = align;
4319 7b6f9300 Markus Armbruster
}
4320 7cd1e32a lirans@il.ibm.com
4321 e268ca52 aliguori
void *qemu_blockalign(BlockDriverState *bs, size_t size)
4322 e268ca52 aliguori
{
4323 e268ca52 aliguori
    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
4324 e268ca52 aliguori
}
4325 7cd1e32a lirans@il.ibm.com
4326 7cd1e32a lirans@il.ibm.com
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
4327 7cd1e32a lirans@il.ibm.com
{
4328 7cd1e32a lirans@il.ibm.com
    int64_t bitmap_size;
4329 a55eb92c Jan Kiszka
4330 aaa0eb75 Liran Schour
    bs->dirty_count = 0;
4331 a55eb92c Jan Kiszka
    if (enable) {
4332 c6d22830 Jan Kiszka
        if (!bs->dirty_bitmap) {
4333 c6d22830 Jan Kiszka
            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
4334 71df14fc Paolo Bonzini
                    BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
4335 71df14fc Paolo Bonzini
            bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
4336 a55eb92c Jan Kiszka
4337 71df14fc Paolo Bonzini
            bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
4338 a55eb92c Jan Kiszka
        }
4339 7cd1e32a lirans@il.ibm.com
    } else {
4340 c6d22830 Jan Kiszka
        if (bs->dirty_bitmap) {
4341 7267c094 Anthony Liguori
            g_free(bs->dirty_bitmap);
4342 c6d22830 Jan Kiszka
            bs->dirty_bitmap = NULL;
4343 a55eb92c Jan Kiszka
        }
4344 7cd1e32a lirans@il.ibm.com
    }
4345 7cd1e32a lirans@il.ibm.com
}
4346 7cd1e32a lirans@il.ibm.com
4347 7cd1e32a lirans@il.ibm.com
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
4348 7cd1e32a lirans@il.ibm.com
{
4349 6ea44308 Jan Kiszka
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
4350 a55eb92c Jan Kiszka
4351 c6d22830 Jan Kiszka
    if (bs->dirty_bitmap &&
4352 c6d22830 Jan Kiszka
        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
4353 1755da16 Paolo Bonzini
        return !!(bs->dirty_bitmap[chunk / BITS_PER_LONG] &
4354 1755da16 Paolo Bonzini
            (1UL << (chunk % BITS_PER_LONG)));
4355 7cd1e32a lirans@il.ibm.com
    } else {
4356 7cd1e32a lirans@il.ibm.com
        return 0;
4357 7cd1e32a lirans@il.ibm.com
    }
4358 7cd1e32a lirans@il.ibm.com
}
4359 7cd1e32a lirans@il.ibm.com
4360 1755da16 Paolo Bonzini
int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector)
4361 1755da16 Paolo Bonzini
{
4362 1755da16 Paolo Bonzini
    int64_t chunk;
4363 1755da16 Paolo Bonzini
    int bit, elem;
4364 1755da16 Paolo Bonzini
4365 1755da16 Paolo Bonzini
    /* Avoid an infinite loop.  */
4366 1755da16 Paolo Bonzini
    assert(bs->dirty_count > 0);
4367 1755da16 Paolo Bonzini
4368 1755da16 Paolo Bonzini
    sector = (sector | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
4369 1755da16 Paolo Bonzini
    chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
4370 1755da16 Paolo Bonzini
4371 1755da16 Paolo Bonzini
    QEMU_BUILD_BUG_ON(sizeof(bs->dirty_bitmap[0]) * 8 != BITS_PER_LONG);
4372 1755da16 Paolo Bonzini
    elem = chunk / BITS_PER_LONG;
4373 1755da16 Paolo Bonzini
    bit = chunk % BITS_PER_LONG;
4374 1755da16 Paolo Bonzini
    for (;;) {
4375 1755da16 Paolo Bonzini
        if (sector >= bs->total_sectors) {
4376 1755da16 Paolo Bonzini
            sector = 0;
4377 1755da16 Paolo Bonzini
            bit = elem = 0;
4378 1755da16 Paolo Bonzini
        }
4379 1755da16 Paolo Bonzini
        if (bit == 0 && bs->dirty_bitmap[elem] == 0) {
4380 1755da16 Paolo Bonzini
            sector += BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
4381 1755da16 Paolo Bonzini
            elem++;
4382 1755da16 Paolo Bonzini
        } else {
4383 1755da16 Paolo Bonzini
            if (bs->dirty_bitmap[elem] & (1UL << bit)) {
4384 1755da16 Paolo Bonzini
                return sector;
4385 1755da16 Paolo Bonzini
            }
4386 1755da16 Paolo Bonzini
            sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
4387 1755da16 Paolo Bonzini
            if (++bit == BITS_PER_LONG) {
4388 1755da16 Paolo Bonzini
                bit = 0;
4389 1755da16 Paolo Bonzini
                elem++;
4390 1755da16 Paolo Bonzini
            }
4391 1755da16 Paolo Bonzini
        }
4392 1755da16 Paolo Bonzini
    }
4393 1755da16 Paolo Bonzini
}
4394 1755da16 Paolo Bonzini
4395 1755da16 Paolo Bonzini
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
4396 1755da16 Paolo Bonzini
                    int nr_sectors)
4397 1755da16 Paolo Bonzini
{
4398 1755da16 Paolo Bonzini
    set_dirty_bitmap(bs, cur_sector, nr_sectors, 1);
4399 1755da16 Paolo Bonzini
}
4400 1755da16 Paolo Bonzini
4401 a55eb92c Jan Kiszka
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
4402 a55eb92c Jan Kiszka
                      int nr_sectors)
4403 7cd1e32a lirans@il.ibm.com
{
4404 7cd1e32a lirans@il.ibm.com
    set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
4405 7cd1e32a lirans@il.ibm.com
}
4406 aaa0eb75 Liran Schour
4407 aaa0eb75 Liran Schour
int64_t bdrv_get_dirty_count(BlockDriverState *bs)
4408 aaa0eb75 Liran Schour
{
4409 aaa0eb75 Liran Schour
    return bs->dirty_count;
4410 aaa0eb75 Liran Schour
}
4411 f88e1a42 Jes Sorensen
4412 db593f25 Marcelo Tosatti
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
4413 db593f25 Marcelo Tosatti
{
4414 db593f25 Marcelo Tosatti
    assert(bs->in_use != in_use);
4415 db593f25 Marcelo Tosatti
    bs->in_use = in_use;
4416 db593f25 Marcelo Tosatti
}
4417 db593f25 Marcelo Tosatti
4418 db593f25 Marcelo Tosatti
int bdrv_in_use(BlockDriverState *bs)
4419 db593f25 Marcelo Tosatti
{
4420 db593f25 Marcelo Tosatti
    return bs->in_use;
4421 db593f25 Marcelo Tosatti
}
4422 db593f25 Marcelo Tosatti
4423 28a7282a Luiz Capitulino
void bdrv_iostatus_enable(BlockDriverState *bs)
4424 28a7282a Luiz Capitulino
{
4425 d6bf279e Luiz Capitulino
    bs->iostatus_enabled = true;
4426 58e21ef5 Luiz Capitulino
    bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
4427 28a7282a Luiz Capitulino
}
4428 28a7282a Luiz Capitulino
4429 28a7282a Luiz Capitulino
/* The I/O status is only enabled if the drive explicitly
4430 28a7282a Luiz Capitulino
 * enables it _and_ the VM is configured to stop on errors */
4431 28a7282a Luiz Capitulino
bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
4432 28a7282a Luiz Capitulino
{
4433 d6bf279e Luiz Capitulino
    return (bs->iostatus_enabled &&
4434 92aa5c6d Paolo Bonzini
           (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
4435 92aa5c6d Paolo Bonzini
            bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
4436 92aa5c6d Paolo Bonzini
            bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
4437 28a7282a Luiz Capitulino
}
4438 28a7282a Luiz Capitulino
4439 28a7282a Luiz Capitulino
void bdrv_iostatus_disable(BlockDriverState *bs)
4440 28a7282a Luiz Capitulino
{
4441 d6bf279e Luiz Capitulino
    bs->iostatus_enabled = false;
4442 28a7282a Luiz Capitulino
}
4443 28a7282a Luiz Capitulino
4444 28a7282a Luiz Capitulino
void bdrv_iostatus_reset(BlockDriverState *bs)
4445 28a7282a Luiz Capitulino
{
4446 28a7282a Luiz Capitulino
    if (bdrv_iostatus_is_enabled(bs)) {
4447 58e21ef5 Luiz Capitulino
        bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
4448 3bd293c3 Paolo Bonzini
        if (bs->job) {
4449 3bd293c3 Paolo Bonzini
            block_job_iostatus_reset(bs->job);
4450 3bd293c3 Paolo Bonzini
        }
4451 28a7282a Luiz Capitulino
    }
4452 28a7282a Luiz Capitulino
}
4453 28a7282a Luiz Capitulino
4454 28a7282a Luiz Capitulino
void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
4455 28a7282a Luiz Capitulino
{
4456 3e1caa5f Paolo Bonzini
    assert(bdrv_iostatus_is_enabled(bs));
4457 3e1caa5f Paolo Bonzini
    if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
4458 58e21ef5 Luiz Capitulino
        bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
4459 58e21ef5 Luiz Capitulino
                                         BLOCK_DEVICE_IO_STATUS_FAILED;
4460 28a7282a Luiz Capitulino
    }
4461 28a7282a Luiz Capitulino
}
4462 28a7282a Luiz Capitulino
4463 a597e79c Christoph Hellwig
void
4464 a597e79c Christoph Hellwig
bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
4465 a597e79c Christoph Hellwig
        enum BlockAcctType type)
4466 a597e79c Christoph Hellwig
{
4467 a597e79c Christoph Hellwig
    assert(type < BDRV_MAX_IOTYPE);
4468 a597e79c Christoph Hellwig
4469 a597e79c Christoph Hellwig
    cookie->bytes = bytes;
4470 c488c7f6 Christoph Hellwig
    cookie->start_time_ns = get_clock();
4471 a597e79c Christoph Hellwig
    cookie->type = type;
4472 a597e79c Christoph Hellwig
}
4473 a597e79c Christoph Hellwig
4474 a597e79c Christoph Hellwig
void
4475 a597e79c Christoph Hellwig
bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4476 a597e79c Christoph Hellwig
{
4477 a597e79c Christoph Hellwig
    assert(cookie->type < BDRV_MAX_IOTYPE);
4478 a597e79c Christoph Hellwig
4479 a597e79c Christoph Hellwig
    bs->nr_bytes[cookie->type] += cookie->bytes;
4480 a597e79c Christoph Hellwig
    bs->nr_ops[cookie->type]++;
4481 c488c7f6 Christoph Hellwig
    bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
4482 a597e79c Christoph Hellwig
}
4483 a597e79c Christoph Hellwig
4484 d92ada22 Luiz Capitulino
void bdrv_img_create(const char *filename, const char *fmt,
4485 d92ada22 Luiz Capitulino
                     const char *base_filename, const char *base_fmt,
4486 d92ada22 Luiz Capitulino
                     char *options, uint64_t img_size, int flags, Error **errp)
4487 f88e1a42 Jes Sorensen
{
4488 f88e1a42 Jes Sorensen
    QEMUOptionParameter *param = NULL, *create_options = NULL;
4489 d220894e Kevin Wolf
    QEMUOptionParameter *backing_fmt, *backing_file, *size;
4490 f88e1a42 Jes Sorensen
    BlockDriverState *bs = NULL;
4491 f88e1a42 Jes Sorensen
    BlockDriver *drv, *proto_drv;
4492 96df67d1 Stefan Hajnoczi
    BlockDriver *backing_drv = NULL;
4493 f88e1a42 Jes Sorensen
    int ret = 0;
4494 f88e1a42 Jes Sorensen
4495 f88e1a42 Jes Sorensen
    /* Find driver and parse its options */
4496 f88e1a42 Jes Sorensen
    drv = bdrv_find_format(fmt);
4497 f88e1a42 Jes Sorensen
    if (!drv) {
4498 71c79813 Luiz Capitulino
        error_setg(errp, "Unknown file format '%s'", fmt);
4499 d92ada22 Luiz Capitulino
        return;
4500 f88e1a42 Jes Sorensen
    }
4501 f88e1a42 Jes Sorensen
4502 f88e1a42 Jes Sorensen
    proto_drv = bdrv_find_protocol(filename);
4503 f88e1a42 Jes Sorensen
    if (!proto_drv) {
4504 71c79813 Luiz Capitulino
        error_setg(errp, "Unknown protocol '%s'", filename);
4505 d92ada22 Luiz Capitulino
        return;
4506 f88e1a42 Jes Sorensen
    }
4507 f88e1a42 Jes Sorensen
4508 f88e1a42 Jes Sorensen
    create_options = append_option_parameters(create_options,
4509 f88e1a42 Jes Sorensen
                                              drv->create_options);
4510 f88e1a42 Jes Sorensen
    create_options = append_option_parameters(create_options,
4511 f88e1a42 Jes Sorensen
                                              proto_drv->create_options);
4512 f88e1a42 Jes Sorensen
4513 f88e1a42 Jes Sorensen
    /* Create parameter list with default values */
4514 f88e1a42 Jes Sorensen
    param = parse_option_parameters("", create_options, param);
4515 f88e1a42 Jes Sorensen
4516 f88e1a42 Jes Sorensen
    set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4517 f88e1a42 Jes Sorensen
4518 f88e1a42 Jes Sorensen
    /* Parse -o options */
4519 f88e1a42 Jes Sorensen
    if (options) {
4520 f88e1a42 Jes Sorensen
        param = parse_option_parameters(options, create_options, param);
4521 f88e1a42 Jes Sorensen
        if (param == NULL) {
4522 71c79813 Luiz Capitulino
            error_setg(errp, "Invalid options for file format '%s'.", fmt);
4523 f88e1a42 Jes Sorensen
            goto out;
4524 f88e1a42 Jes Sorensen
        }
4525 f88e1a42 Jes Sorensen
    }
4526 f88e1a42 Jes Sorensen
4527 f88e1a42 Jes Sorensen
    if (base_filename) {
4528 f88e1a42 Jes Sorensen
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4529 f88e1a42 Jes Sorensen
                                 base_filename)) {
4530 71c79813 Luiz Capitulino
            error_setg(errp, "Backing file not supported for file format '%s'",
4531 71c79813 Luiz Capitulino
                       fmt);
4532 f88e1a42 Jes Sorensen
            goto out;
4533 f88e1a42 Jes Sorensen
        }
4534 f88e1a42 Jes Sorensen
    }
4535 f88e1a42 Jes Sorensen
4536 f88e1a42 Jes Sorensen
    if (base_fmt) {
4537 f88e1a42 Jes Sorensen
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4538 71c79813 Luiz Capitulino
            error_setg(errp, "Backing file format not supported for file "
4539 71c79813 Luiz Capitulino
                             "format '%s'", fmt);
4540 f88e1a42 Jes Sorensen
            goto out;
4541 f88e1a42 Jes Sorensen
        }
4542 f88e1a42 Jes Sorensen
    }
4543 f88e1a42 Jes Sorensen
4544 792da93a Jes Sorensen
    backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4545 792da93a Jes Sorensen
    if (backing_file && backing_file->value.s) {
4546 792da93a Jes Sorensen
        if (!strcmp(filename, backing_file->value.s)) {
4547 71c79813 Luiz Capitulino
            error_setg(errp, "Error: Trying to create an image with the "
4548 71c79813 Luiz Capitulino
                             "same filename as the backing file");
4549 792da93a Jes Sorensen
            goto out;
4550 792da93a Jes Sorensen
        }
4551 792da93a Jes Sorensen
    }
4552 792da93a Jes Sorensen
4553 f88e1a42 Jes Sorensen
    backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4554 f88e1a42 Jes Sorensen
    if (backing_fmt && backing_fmt->value.s) {
4555 96df67d1 Stefan Hajnoczi
        backing_drv = bdrv_find_format(backing_fmt->value.s);
4556 96df67d1 Stefan Hajnoczi
        if (!backing_drv) {
4557 71c79813 Luiz Capitulino
            error_setg(errp, "Unknown backing file format '%s'",
4558 71c79813 Luiz Capitulino
                       backing_fmt->value.s);
4559 f88e1a42 Jes Sorensen
            goto out;
4560 f88e1a42 Jes Sorensen
        }
4561 f88e1a42 Jes Sorensen
    }
4562 f88e1a42 Jes Sorensen
4563 f88e1a42 Jes Sorensen
    // The size for the image must always be specified, with one exception:
4564 f88e1a42 Jes Sorensen
    // If we are using a backing file, we can obtain the size from there
4565 d220894e Kevin Wolf
    size = get_option_parameter(param, BLOCK_OPT_SIZE);
4566 d220894e Kevin Wolf
    if (size && size->value.n == -1) {
4567 f88e1a42 Jes Sorensen
        if (backing_file && backing_file->value.s) {
4568 f88e1a42 Jes Sorensen
            uint64_t size;
4569 f88e1a42 Jes Sorensen
            char buf[32];
4570 63090dac Paolo Bonzini
            int back_flags;
4571 63090dac Paolo Bonzini
4572 63090dac Paolo Bonzini
            /* backing files always opened read-only */
4573 63090dac Paolo Bonzini
            back_flags =
4574 63090dac Paolo Bonzini
                flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
4575 f88e1a42 Jes Sorensen
4576 f88e1a42 Jes Sorensen
            bs = bdrv_new("");
4577 f88e1a42 Jes Sorensen
4578 63090dac Paolo Bonzini
            ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
4579 f88e1a42 Jes Sorensen
            if (ret < 0) {
4580 71c79813 Luiz Capitulino
                error_setg_errno(errp, -ret, "Could not open '%s'",
4581 71c79813 Luiz Capitulino
                                 backing_file->value.s);
4582 f88e1a42 Jes Sorensen
                goto out;
4583 f88e1a42 Jes Sorensen
            }
4584 f88e1a42 Jes Sorensen
            bdrv_get_geometry(bs, &size);
4585 f88e1a42 Jes Sorensen
            size *= 512;
4586 f88e1a42 Jes Sorensen
4587 f88e1a42 Jes Sorensen
            snprintf(buf, sizeof(buf), "%" PRId64, size);
4588 f88e1a42 Jes Sorensen
            set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4589 f88e1a42 Jes Sorensen
        } else {
4590 71c79813 Luiz Capitulino
            error_setg(errp, "Image creation needs a size parameter");
4591 f88e1a42 Jes Sorensen
            goto out;
4592 f88e1a42 Jes Sorensen
        }
4593 f88e1a42 Jes Sorensen
    }
4594 f88e1a42 Jes Sorensen
4595 f88e1a42 Jes Sorensen
    printf("Formatting '%s', fmt=%s ", filename, fmt);
4596 f88e1a42 Jes Sorensen
    print_option_parameters(param);
4597 f88e1a42 Jes Sorensen
    puts("");
4598 f88e1a42 Jes Sorensen
4599 f88e1a42 Jes Sorensen
    ret = bdrv_create(drv, filename, param);
4600 f88e1a42 Jes Sorensen
    if (ret < 0) {
4601 f88e1a42 Jes Sorensen
        if (ret == -ENOTSUP) {
4602 71c79813 Luiz Capitulino
            error_setg(errp,"Formatting or formatting option not supported for "
4603 71c79813 Luiz Capitulino
                            "file format '%s'", fmt);
4604 f88e1a42 Jes Sorensen
        } else if (ret == -EFBIG) {
4605 71c79813 Luiz Capitulino
            error_setg(errp, "The image size is too large for file format '%s'",
4606 71c79813 Luiz Capitulino
                       fmt);
4607 f88e1a42 Jes Sorensen
        } else {
4608 71c79813 Luiz Capitulino
            error_setg(errp, "%s: error while creating %s: %s", filename, fmt,
4609 71c79813 Luiz Capitulino
                       strerror(-ret));
4610 f88e1a42 Jes Sorensen
        }
4611 f88e1a42 Jes Sorensen
    }
4612 f88e1a42 Jes Sorensen
4613 f88e1a42 Jes Sorensen
out:
4614 f88e1a42 Jes Sorensen
    free_option_parameters(create_options);
4615 f88e1a42 Jes Sorensen
    free_option_parameters(param);
4616 f88e1a42 Jes Sorensen
4617 f88e1a42 Jes Sorensen
    if (bs) {
4618 f88e1a42 Jes Sorensen
        bdrv_delete(bs);
4619 f88e1a42 Jes Sorensen
    }
4620 f88e1a42 Jes Sorensen
}