root / block / qed.c @ cc84d90f
History | View | Annotate | Download (46.8 kB)
1 | 75411d23 | Stefan Hajnoczi | /*
|
---|---|---|---|
2 | 75411d23 | Stefan Hajnoczi | * QEMU Enhanced Disk Format
|
3 | 75411d23 | Stefan Hajnoczi | *
|
4 | 75411d23 | Stefan Hajnoczi | * Copyright IBM, Corp. 2010
|
5 | 75411d23 | Stefan Hajnoczi | *
|
6 | 75411d23 | Stefan Hajnoczi | * Authors:
|
7 | 75411d23 | Stefan Hajnoczi | * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
|
8 | 75411d23 | Stefan Hajnoczi | * Anthony Liguori <aliguori@us.ibm.com>
|
9 | 75411d23 | Stefan Hajnoczi | *
|
10 | 75411d23 | Stefan Hajnoczi | * This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
11 | 75411d23 | Stefan Hajnoczi | * See the COPYING.LIB file in the top-level directory.
|
12 | 75411d23 | Stefan Hajnoczi | *
|
13 | 75411d23 | Stefan Hajnoczi | */
|
14 | 75411d23 | Stefan Hajnoczi | |
15 | 1de7afc9 | Paolo Bonzini | #include "qemu/timer.h" |
16 | eabba580 | Stefan Hajnoczi | #include "trace.h" |
17 | 75411d23 | Stefan Hajnoczi | #include "qed.h" |
18 | 7b1b5d19 | Paolo Bonzini | #include "qapi/qmp/qerror.h" |
19 | caf71f86 | Paolo Bonzini | #include "migration/migration.h" |
20 | 75411d23 | Stefan Hajnoczi | |
21 | eabba580 | Stefan Hajnoczi | static void qed_aio_cancel(BlockDriverAIOCB *blockacb) |
22 | eabba580 | Stefan Hajnoczi | { |
23 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = (QEDAIOCB *)blockacb; |
24 | eabba580 | Stefan Hajnoczi | bool finished = false; |
25 | eabba580 | Stefan Hajnoczi | |
26 | eabba580 | Stefan Hajnoczi | /* Wait for the request to finish */
|
27 | eabba580 | Stefan Hajnoczi | acb->finished = &finished; |
28 | eabba580 | Stefan Hajnoczi | while (!finished) {
|
29 | eabba580 | Stefan Hajnoczi | qemu_aio_wait(); |
30 | eabba580 | Stefan Hajnoczi | } |
31 | eabba580 | Stefan Hajnoczi | } |
32 | eabba580 | Stefan Hajnoczi | |
33 | d7331bed | Stefan Hajnoczi | static const AIOCBInfo qed_aiocb_info = { |
34 | eabba580 | Stefan Hajnoczi | .aiocb_size = sizeof(QEDAIOCB),
|
35 | eabba580 | Stefan Hajnoczi | .cancel = qed_aio_cancel, |
36 | eabba580 | Stefan Hajnoczi | }; |
37 | eabba580 | Stefan Hajnoczi | |
38 | 75411d23 | Stefan Hajnoczi | static int bdrv_qed_probe(const uint8_t *buf, int buf_size, |
39 | 75411d23 | Stefan Hajnoczi | const char *filename) |
40 | 75411d23 | Stefan Hajnoczi | { |
41 | 75411d23 | Stefan Hajnoczi | const QEDHeader *header = (const QEDHeader *)buf; |
42 | 75411d23 | Stefan Hajnoczi | |
43 | 75411d23 | Stefan Hajnoczi | if (buf_size < sizeof(*header)) { |
44 | 75411d23 | Stefan Hajnoczi | return 0; |
45 | 75411d23 | Stefan Hajnoczi | } |
46 | 75411d23 | Stefan Hajnoczi | if (le32_to_cpu(header->magic) != QED_MAGIC) {
|
47 | 75411d23 | Stefan Hajnoczi | return 0; |
48 | 75411d23 | Stefan Hajnoczi | } |
49 | 75411d23 | Stefan Hajnoczi | return 100; |
50 | 75411d23 | Stefan Hajnoczi | } |
51 | 75411d23 | Stefan Hajnoczi | |
52 | 75411d23 | Stefan Hajnoczi | /**
|
53 | 75411d23 | Stefan Hajnoczi | * Check whether an image format is raw
|
54 | 75411d23 | Stefan Hajnoczi | *
|
55 | 75411d23 | Stefan Hajnoczi | * @fmt: Backing file format, may be NULL
|
56 | 75411d23 | Stefan Hajnoczi | */
|
57 | 75411d23 | Stefan Hajnoczi | static bool qed_fmt_is_raw(const char *fmt) |
58 | 75411d23 | Stefan Hajnoczi | { |
59 | 75411d23 | Stefan Hajnoczi | return fmt && strcmp(fmt, "raw") == 0; |
60 | 75411d23 | Stefan Hajnoczi | } |
61 | 75411d23 | Stefan Hajnoczi | |
62 | 75411d23 | Stefan Hajnoczi | static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu) |
63 | 75411d23 | Stefan Hajnoczi | { |
64 | 75411d23 | Stefan Hajnoczi | cpu->magic = le32_to_cpu(le->magic); |
65 | 75411d23 | Stefan Hajnoczi | cpu->cluster_size = le32_to_cpu(le->cluster_size); |
66 | 75411d23 | Stefan Hajnoczi | cpu->table_size = le32_to_cpu(le->table_size); |
67 | 75411d23 | Stefan Hajnoczi | cpu->header_size = le32_to_cpu(le->header_size); |
68 | 75411d23 | Stefan Hajnoczi | cpu->features = le64_to_cpu(le->features); |
69 | 75411d23 | Stefan Hajnoczi | cpu->compat_features = le64_to_cpu(le->compat_features); |
70 | 75411d23 | Stefan Hajnoczi | cpu->autoclear_features = le64_to_cpu(le->autoclear_features); |
71 | 75411d23 | Stefan Hajnoczi | cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset); |
72 | 75411d23 | Stefan Hajnoczi | cpu->image_size = le64_to_cpu(le->image_size); |
73 | 75411d23 | Stefan Hajnoczi | cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset); |
74 | 75411d23 | Stefan Hajnoczi | cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size); |
75 | 75411d23 | Stefan Hajnoczi | } |
76 | 75411d23 | Stefan Hajnoczi | |
77 | 75411d23 | Stefan Hajnoczi | static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le) |
78 | 75411d23 | Stefan Hajnoczi | { |
79 | 75411d23 | Stefan Hajnoczi | le->magic = cpu_to_le32(cpu->magic); |
80 | 75411d23 | Stefan Hajnoczi | le->cluster_size = cpu_to_le32(cpu->cluster_size); |
81 | 75411d23 | Stefan Hajnoczi | le->table_size = cpu_to_le32(cpu->table_size); |
82 | 75411d23 | Stefan Hajnoczi | le->header_size = cpu_to_le32(cpu->header_size); |
83 | 75411d23 | Stefan Hajnoczi | le->features = cpu_to_le64(cpu->features); |
84 | 75411d23 | Stefan Hajnoczi | le->compat_features = cpu_to_le64(cpu->compat_features); |
85 | 75411d23 | Stefan Hajnoczi | le->autoclear_features = cpu_to_le64(cpu->autoclear_features); |
86 | 75411d23 | Stefan Hajnoczi | le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset); |
87 | 75411d23 | Stefan Hajnoczi | le->image_size = cpu_to_le64(cpu->image_size); |
88 | 75411d23 | Stefan Hajnoczi | le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset); |
89 | 75411d23 | Stefan Hajnoczi | le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size); |
90 | 75411d23 | Stefan Hajnoczi | } |
91 | 75411d23 | Stefan Hajnoczi | |
92 | b10170ac | Stefan Hajnoczi | int qed_write_header_sync(BDRVQEDState *s)
|
93 | 75411d23 | Stefan Hajnoczi | { |
94 | 75411d23 | Stefan Hajnoczi | QEDHeader le; |
95 | 75411d23 | Stefan Hajnoczi | int ret;
|
96 | 75411d23 | Stefan Hajnoczi | |
97 | 75411d23 | Stefan Hajnoczi | qed_header_cpu_to_le(&s->header, &le); |
98 | 75411d23 | Stefan Hajnoczi | ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le)); |
99 | 75411d23 | Stefan Hajnoczi | if (ret != sizeof(le)) { |
100 | 75411d23 | Stefan Hajnoczi | return ret;
|
101 | 75411d23 | Stefan Hajnoczi | } |
102 | 75411d23 | Stefan Hajnoczi | return 0; |
103 | 75411d23 | Stefan Hajnoczi | } |
104 | 75411d23 | Stefan Hajnoczi | |
105 | 01979a98 | Stefan Hajnoczi | typedef struct { |
106 | 01979a98 | Stefan Hajnoczi | GenericCB gencb; |
107 | 01979a98 | Stefan Hajnoczi | BDRVQEDState *s; |
108 | 01979a98 | Stefan Hajnoczi | struct iovec iov;
|
109 | 01979a98 | Stefan Hajnoczi | QEMUIOVector qiov; |
110 | 01979a98 | Stefan Hajnoczi | int nsectors;
|
111 | 01979a98 | Stefan Hajnoczi | uint8_t *buf; |
112 | 01979a98 | Stefan Hajnoczi | } QEDWriteHeaderCB; |
113 | 01979a98 | Stefan Hajnoczi | |
114 | 01979a98 | Stefan Hajnoczi | static void qed_write_header_cb(void *opaque, int ret) |
115 | 01979a98 | Stefan Hajnoczi | { |
116 | 01979a98 | Stefan Hajnoczi | QEDWriteHeaderCB *write_header_cb = opaque; |
117 | 01979a98 | Stefan Hajnoczi | |
118 | 01979a98 | Stefan Hajnoczi | qemu_vfree(write_header_cb->buf); |
119 | 01979a98 | Stefan Hajnoczi | gencb_complete(write_header_cb, ret); |
120 | 01979a98 | Stefan Hajnoczi | } |
121 | 01979a98 | Stefan Hajnoczi | |
122 | 01979a98 | Stefan Hajnoczi | static void qed_write_header_read_cb(void *opaque, int ret) |
123 | 01979a98 | Stefan Hajnoczi | { |
124 | 01979a98 | Stefan Hajnoczi | QEDWriteHeaderCB *write_header_cb = opaque; |
125 | 01979a98 | Stefan Hajnoczi | BDRVQEDState *s = write_header_cb->s; |
126 | 01979a98 | Stefan Hajnoczi | |
127 | 01979a98 | Stefan Hajnoczi | if (ret) {
|
128 | 01979a98 | Stefan Hajnoczi | qed_write_header_cb(write_header_cb, ret); |
129 | 01979a98 | Stefan Hajnoczi | return;
|
130 | 01979a98 | Stefan Hajnoczi | } |
131 | 01979a98 | Stefan Hajnoczi | |
132 | 01979a98 | Stefan Hajnoczi | /* Update header */
|
133 | 01979a98 | Stefan Hajnoczi | qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf); |
134 | 01979a98 | Stefan Hajnoczi | |
135 | ad54ae80 | Paolo Bonzini | bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
|
136 | ad54ae80 | Paolo Bonzini | write_header_cb->nsectors, qed_write_header_cb, |
137 | ad54ae80 | Paolo Bonzini | write_header_cb); |
138 | 01979a98 | Stefan Hajnoczi | } |
139 | 01979a98 | Stefan Hajnoczi | |
140 | 01979a98 | Stefan Hajnoczi | /**
|
141 | 01979a98 | Stefan Hajnoczi | * Update header in-place (does not rewrite backing filename or other strings)
|
142 | 01979a98 | Stefan Hajnoczi | *
|
143 | 01979a98 | Stefan Hajnoczi | * This function only updates known header fields in-place and does not affect
|
144 | 01979a98 | Stefan Hajnoczi | * extra data after the QED header.
|
145 | 01979a98 | Stefan Hajnoczi | */
|
146 | 01979a98 | Stefan Hajnoczi | static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb, |
147 | 01979a98 | Stefan Hajnoczi | void *opaque)
|
148 | 01979a98 | Stefan Hajnoczi | { |
149 | 01979a98 | Stefan Hajnoczi | /* We must write full sectors for O_DIRECT but cannot necessarily generate
|
150 | 01979a98 | Stefan Hajnoczi | * the data following the header if an unrecognized compat feature is
|
151 | 01979a98 | Stefan Hajnoczi | * active. Therefore, first read the sectors containing the header, update
|
152 | 01979a98 | Stefan Hajnoczi | * them, and write back.
|
153 | 01979a98 | Stefan Hajnoczi | */
|
154 | 01979a98 | Stefan Hajnoczi | |
155 | 01979a98 | Stefan Hajnoczi | int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) / |
156 | 01979a98 | Stefan Hajnoczi | BDRV_SECTOR_SIZE; |
157 | 01979a98 | Stefan Hajnoczi | size_t len = nsectors * BDRV_SECTOR_SIZE; |
158 | 01979a98 | Stefan Hajnoczi | QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
|
159 | 01979a98 | Stefan Hajnoczi | cb, opaque); |
160 | 01979a98 | Stefan Hajnoczi | |
161 | 01979a98 | Stefan Hajnoczi | write_header_cb->s = s; |
162 | 01979a98 | Stefan Hajnoczi | write_header_cb->nsectors = nsectors; |
163 | 01979a98 | Stefan Hajnoczi | write_header_cb->buf = qemu_blockalign(s->bs, len); |
164 | 01979a98 | Stefan Hajnoczi | write_header_cb->iov.iov_base = write_header_cb->buf; |
165 | 01979a98 | Stefan Hajnoczi | write_header_cb->iov.iov_len = len; |
166 | 01979a98 | Stefan Hajnoczi | qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
|
167 | 01979a98 | Stefan Hajnoczi | |
168 | ad54ae80 | Paolo Bonzini | bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
|
169 | ad54ae80 | Paolo Bonzini | qed_write_header_read_cb, write_header_cb); |
170 | 01979a98 | Stefan Hajnoczi | } |
171 | 01979a98 | Stefan Hajnoczi | |
172 | 75411d23 | Stefan Hajnoczi | static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
|
173 | 75411d23 | Stefan Hajnoczi | { |
174 | 75411d23 | Stefan Hajnoczi | uint64_t table_entries; |
175 | 75411d23 | Stefan Hajnoczi | uint64_t l2_size; |
176 | 75411d23 | Stefan Hajnoczi | |
177 | 75411d23 | Stefan Hajnoczi | table_entries = (table_size * cluster_size) / sizeof(uint64_t);
|
178 | 75411d23 | Stefan Hajnoczi | l2_size = table_entries * cluster_size; |
179 | 75411d23 | Stefan Hajnoczi | |
180 | 75411d23 | Stefan Hajnoczi | return l2_size * table_entries;
|
181 | 75411d23 | Stefan Hajnoczi | } |
182 | 75411d23 | Stefan Hajnoczi | |
183 | 75411d23 | Stefan Hajnoczi | static bool qed_is_cluster_size_valid(uint32_t cluster_size) |
184 | 75411d23 | Stefan Hajnoczi | { |
185 | 75411d23 | Stefan Hajnoczi | if (cluster_size < QED_MIN_CLUSTER_SIZE ||
|
186 | 75411d23 | Stefan Hajnoczi | cluster_size > QED_MAX_CLUSTER_SIZE) { |
187 | 75411d23 | Stefan Hajnoczi | return false; |
188 | 75411d23 | Stefan Hajnoczi | } |
189 | 75411d23 | Stefan Hajnoczi | if (cluster_size & (cluster_size - 1)) { |
190 | 75411d23 | Stefan Hajnoczi | return false; /* not power of 2 */ |
191 | 75411d23 | Stefan Hajnoczi | } |
192 | 75411d23 | Stefan Hajnoczi | return true; |
193 | 75411d23 | Stefan Hajnoczi | } |
194 | 75411d23 | Stefan Hajnoczi | |
195 | 75411d23 | Stefan Hajnoczi | static bool qed_is_table_size_valid(uint32_t table_size) |
196 | 75411d23 | Stefan Hajnoczi | { |
197 | 75411d23 | Stefan Hajnoczi | if (table_size < QED_MIN_TABLE_SIZE ||
|
198 | 75411d23 | Stefan Hajnoczi | table_size > QED_MAX_TABLE_SIZE) { |
199 | 75411d23 | Stefan Hajnoczi | return false; |
200 | 75411d23 | Stefan Hajnoczi | } |
201 | 75411d23 | Stefan Hajnoczi | if (table_size & (table_size - 1)) { |
202 | 75411d23 | Stefan Hajnoczi | return false; /* not power of 2 */ |
203 | 75411d23 | Stefan Hajnoczi | } |
204 | 75411d23 | Stefan Hajnoczi | return true; |
205 | 75411d23 | Stefan Hajnoczi | } |
206 | 75411d23 | Stefan Hajnoczi | |
207 | 75411d23 | Stefan Hajnoczi | static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size, |
208 | 75411d23 | Stefan Hajnoczi | uint32_t table_size) |
209 | 75411d23 | Stefan Hajnoczi | { |
210 | 75411d23 | Stefan Hajnoczi | if (image_size % BDRV_SECTOR_SIZE != 0) { |
211 | 75411d23 | Stefan Hajnoczi | return false; /* not multiple of sector size */ |
212 | 75411d23 | Stefan Hajnoczi | } |
213 | 75411d23 | Stefan Hajnoczi | if (image_size > qed_max_image_size(cluster_size, table_size)) {
|
214 | 75411d23 | Stefan Hajnoczi | return false; /* image is too large */ |
215 | 75411d23 | Stefan Hajnoczi | } |
216 | 75411d23 | Stefan Hajnoczi | return true; |
217 | 75411d23 | Stefan Hajnoczi | } |
218 | 75411d23 | Stefan Hajnoczi | |
219 | 75411d23 | Stefan Hajnoczi | /**
|
220 | 75411d23 | Stefan Hajnoczi | * Read a string of known length from the image file
|
221 | 75411d23 | Stefan Hajnoczi | *
|
222 | 75411d23 | Stefan Hajnoczi | * @file: Image file
|
223 | 75411d23 | Stefan Hajnoczi | * @offset: File offset to start of string, in bytes
|
224 | 75411d23 | Stefan Hajnoczi | * @n: String length in bytes
|
225 | 75411d23 | Stefan Hajnoczi | * @buf: Destination buffer
|
226 | 75411d23 | Stefan Hajnoczi | * @buflen: Destination buffer length in bytes
|
227 | 75411d23 | Stefan Hajnoczi | * @ret: 0 on success, -errno on failure
|
228 | 75411d23 | Stefan Hajnoczi | *
|
229 | 75411d23 | Stefan Hajnoczi | * The string is NUL-terminated.
|
230 | 75411d23 | Stefan Hajnoczi | */
|
231 | 75411d23 | Stefan Hajnoczi | static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n, |
232 | 75411d23 | Stefan Hajnoczi | char *buf, size_t buflen)
|
233 | 75411d23 | Stefan Hajnoczi | { |
234 | 75411d23 | Stefan Hajnoczi | int ret;
|
235 | 75411d23 | Stefan Hajnoczi | if (n >= buflen) {
|
236 | 75411d23 | Stefan Hajnoczi | return -EINVAL;
|
237 | 75411d23 | Stefan Hajnoczi | } |
238 | 75411d23 | Stefan Hajnoczi | ret = bdrv_pread(file, offset, buf, n); |
239 | 75411d23 | Stefan Hajnoczi | if (ret < 0) { |
240 | 75411d23 | Stefan Hajnoczi | return ret;
|
241 | 75411d23 | Stefan Hajnoczi | } |
242 | 75411d23 | Stefan Hajnoczi | buf[n] = '\0';
|
243 | 75411d23 | Stefan Hajnoczi | return 0; |
244 | 75411d23 | Stefan Hajnoczi | } |
245 | 75411d23 | Stefan Hajnoczi | |
246 | eabba580 | Stefan Hajnoczi | /**
|
247 | eabba580 | Stefan Hajnoczi | * Allocate new clusters
|
248 | eabba580 | Stefan Hajnoczi | *
|
249 | eabba580 | Stefan Hajnoczi | * @s: QED state
|
250 | eabba580 | Stefan Hajnoczi | * @n: Number of contiguous clusters to allocate
|
251 | eabba580 | Stefan Hajnoczi | * @ret: Offset of first allocated cluster
|
252 | eabba580 | Stefan Hajnoczi | *
|
253 | eabba580 | Stefan Hajnoczi | * This function only produces the offset where the new clusters should be
|
254 | eabba580 | Stefan Hajnoczi | * written. It updates BDRVQEDState but does not make any changes to the image
|
255 | eabba580 | Stefan Hajnoczi | * file.
|
256 | eabba580 | Stefan Hajnoczi | */
|
257 | eabba580 | Stefan Hajnoczi | static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n) |
258 | eabba580 | Stefan Hajnoczi | { |
259 | eabba580 | Stefan Hajnoczi | uint64_t offset = s->file_size; |
260 | eabba580 | Stefan Hajnoczi | s->file_size += n * s->header.cluster_size; |
261 | eabba580 | Stefan Hajnoczi | return offset;
|
262 | eabba580 | Stefan Hajnoczi | } |
263 | eabba580 | Stefan Hajnoczi | |
264 | 298800ca | Stefan Hajnoczi | QEDTable *qed_alloc_table(BDRVQEDState *s) |
265 | 298800ca | Stefan Hajnoczi | { |
266 | 298800ca | Stefan Hajnoczi | /* Honor O_DIRECT memory alignment requirements */
|
267 | 298800ca | Stefan Hajnoczi | return qemu_blockalign(s->bs,
|
268 | 298800ca | Stefan Hajnoczi | s->header.cluster_size * s->header.table_size); |
269 | 298800ca | Stefan Hajnoczi | } |
270 | 298800ca | Stefan Hajnoczi | |
271 | eabba580 | Stefan Hajnoczi | /**
|
272 | eabba580 | Stefan Hajnoczi | * Allocate a new zeroed L2 table
|
273 | eabba580 | Stefan Hajnoczi | */
|
274 | eabba580 | Stefan Hajnoczi | static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
|
275 | eabba580 | Stefan Hajnoczi | { |
276 | eabba580 | Stefan Hajnoczi | CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); |
277 | eabba580 | Stefan Hajnoczi | |
278 | eabba580 | Stefan Hajnoczi | l2_table->table = qed_alloc_table(s); |
279 | eabba580 | Stefan Hajnoczi | l2_table->offset = qed_alloc_clusters(s, s->header.table_size); |
280 | eabba580 | Stefan Hajnoczi | |
281 | eabba580 | Stefan Hajnoczi | memset(l2_table->table->offsets, 0,
|
282 | eabba580 | Stefan Hajnoczi | s->header.cluster_size * s->header.table_size); |
283 | eabba580 | Stefan Hajnoczi | return l2_table;
|
284 | eabba580 | Stefan Hajnoczi | } |
285 | eabba580 | Stefan Hajnoczi | |
286 | eabba580 | Stefan Hajnoczi | static void qed_aio_next_io(void *opaque, int ret); |
287 | eabba580 | Stefan Hajnoczi | |
288 | 6f321e93 | Stefan Hajnoczi | static void qed_plug_allocating_write_reqs(BDRVQEDState *s) |
289 | 6f321e93 | Stefan Hajnoczi | { |
290 | 6f321e93 | Stefan Hajnoczi | assert(!s->allocating_write_reqs_plugged); |
291 | 6f321e93 | Stefan Hajnoczi | |
292 | 6f321e93 | Stefan Hajnoczi | s->allocating_write_reqs_plugged = true;
|
293 | 6f321e93 | Stefan Hajnoczi | } |
294 | 6f321e93 | Stefan Hajnoczi | |
295 | 6f321e93 | Stefan Hajnoczi | static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) |
296 | 6f321e93 | Stefan Hajnoczi | { |
297 | 6f321e93 | Stefan Hajnoczi | QEDAIOCB *acb; |
298 | 6f321e93 | Stefan Hajnoczi | |
299 | 6f321e93 | Stefan Hajnoczi | assert(s->allocating_write_reqs_plugged); |
300 | 6f321e93 | Stefan Hajnoczi | |
301 | 6f321e93 | Stefan Hajnoczi | s->allocating_write_reqs_plugged = false;
|
302 | 6f321e93 | Stefan Hajnoczi | |
303 | 6f321e93 | Stefan Hajnoczi | acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); |
304 | 6f321e93 | Stefan Hajnoczi | if (acb) {
|
305 | 6f321e93 | Stefan Hajnoczi | qed_aio_next_io(acb, 0);
|
306 | 6f321e93 | Stefan Hajnoczi | } |
307 | 6f321e93 | Stefan Hajnoczi | } |
308 | 6f321e93 | Stefan Hajnoczi | |
309 | 6f321e93 | Stefan Hajnoczi | static void qed_finish_clear_need_check(void *opaque, int ret) |
310 | 6f321e93 | Stefan Hajnoczi | { |
311 | 6f321e93 | Stefan Hajnoczi | /* Do nothing */
|
312 | 6f321e93 | Stefan Hajnoczi | } |
313 | 6f321e93 | Stefan Hajnoczi | |
314 | 6f321e93 | Stefan Hajnoczi | static void qed_flush_after_clear_need_check(void *opaque, int ret) |
315 | 6f321e93 | Stefan Hajnoczi | { |
316 | 6f321e93 | Stefan Hajnoczi | BDRVQEDState *s = opaque; |
317 | 6f321e93 | Stefan Hajnoczi | |
318 | 6f321e93 | Stefan Hajnoczi | bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s); |
319 | 6f321e93 | Stefan Hajnoczi | |
320 | 6f321e93 | Stefan Hajnoczi | /* No need to wait until flush completes */
|
321 | 6f321e93 | Stefan Hajnoczi | qed_unplug_allocating_write_reqs(s); |
322 | 6f321e93 | Stefan Hajnoczi | } |
323 | 6f321e93 | Stefan Hajnoczi | |
324 | 6f321e93 | Stefan Hajnoczi | static void qed_clear_need_check(void *opaque, int ret) |
325 | 6f321e93 | Stefan Hajnoczi | { |
326 | 6f321e93 | Stefan Hajnoczi | BDRVQEDState *s = opaque; |
327 | 6f321e93 | Stefan Hajnoczi | |
328 | 6f321e93 | Stefan Hajnoczi | if (ret) {
|
329 | 6f321e93 | Stefan Hajnoczi | qed_unplug_allocating_write_reqs(s); |
330 | 6f321e93 | Stefan Hajnoczi | return;
|
331 | 6f321e93 | Stefan Hajnoczi | } |
332 | 6f321e93 | Stefan Hajnoczi | |
333 | 6f321e93 | Stefan Hajnoczi | s->header.features &= ~QED_F_NEED_CHECK; |
334 | 6f321e93 | Stefan Hajnoczi | qed_write_header(s, qed_flush_after_clear_need_check, s); |
335 | 6f321e93 | Stefan Hajnoczi | } |
336 | 6f321e93 | Stefan Hajnoczi | |
337 | 6f321e93 | Stefan Hajnoczi | static void qed_need_check_timer_cb(void *opaque) |
338 | 6f321e93 | Stefan Hajnoczi | { |
339 | 6f321e93 | Stefan Hajnoczi | BDRVQEDState *s = opaque; |
340 | 6f321e93 | Stefan Hajnoczi | |
341 | 6f321e93 | Stefan Hajnoczi | /* The timer should only fire when allocating writes have drained */
|
342 | 6f321e93 | Stefan Hajnoczi | assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs)); |
343 | 6f321e93 | Stefan Hajnoczi | |
344 | 6f321e93 | Stefan Hajnoczi | trace_qed_need_check_timer_cb(s); |
345 | 6f321e93 | Stefan Hajnoczi | |
346 | 6f321e93 | Stefan Hajnoczi | qed_plug_allocating_write_reqs(s); |
347 | 6f321e93 | Stefan Hajnoczi | |
348 | 6f321e93 | Stefan Hajnoczi | /* Ensure writes are on disk before clearing flag */
|
349 | 6f321e93 | Stefan Hajnoczi | bdrv_aio_flush(s->bs, qed_clear_need_check, s); |
350 | 6f321e93 | Stefan Hajnoczi | } |
351 | 6f321e93 | Stefan Hajnoczi | |
352 | 6f321e93 | Stefan Hajnoczi | static void qed_start_need_check_timer(BDRVQEDState *s) |
353 | 6f321e93 | Stefan Hajnoczi | { |
354 | 6f321e93 | Stefan Hajnoczi | trace_qed_start_need_check_timer(s); |
355 | 6f321e93 | Stefan Hajnoczi | |
356 | bc72ad67 | Alex Bligh | /* Use QEMU_CLOCK_VIRTUAL so we don't alter the image file while suspended for
|
357 | 6f321e93 | Stefan Hajnoczi | * migration.
|
358 | 6f321e93 | Stefan Hajnoczi | */
|
359 | bc72ad67 | Alex Bligh | timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + |
360 | 6f321e93 | Stefan Hajnoczi | get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT); |
361 | 6f321e93 | Stefan Hajnoczi | } |
362 | 6f321e93 | Stefan Hajnoczi | |
363 | 6f321e93 | Stefan Hajnoczi | /* It's okay to call this multiple times or when no timer is started */
|
364 | 6f321e93 | Stefan Hajnoczi | static void qed_cancel_need_check_timer(BDRVQEDState *s) |
365 | 6f321e93 | Stefan Hajnoczi | { |
366 | 6f321e93 | Stefan Hajnoczi | trace_qed_cancel_need_check_timer(s); |
367 | bc72ad67 | Alex Bligh | timer_del(s->need_check_timer); |
368 | 6f321e93 | Stefan Hajnoczi | } |
369 | 6f321e93 | Stefan Hajnoczi | |
370 | e023b2e2 | Paolo Bonzini | static void bdrv_qed_rebind(BlockDriverState *bs) |
371 | e023b2e2 | Paolo Bonzini | { |
372 | e023b2e2 | Paolo Bonzini | BDRVQEDState *s = bs->opaque; |
373 | e023b2e2 | Paolo Bonzini | s->bs = bs; |
374 | e023b2e2 | Paolo Bonzini | } |
375 | e023b2e2 | Paolo Bonzini | |
376 | 015a1036 | Max Reitz | static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, |
377 | 015a1036 | Max Reitz | Error **errp) |
378 | 75411d23 | Stefan Hajnoczi | { |
379 | 75411d23 | Stefan Hajnoczi | BDRVQEDState *s = bs->opaque; |
380 | 75411d23 | Stefan Hajnoczi | QEDHeader le_header; |
381 | 75411d23 | Stefan Hajnoczi | int64_t file_size; |
382 | 75411d23 | Stefan Hajnoczi | int ret;
|
383 | 75411d23 | Stefan Hajnoczi | |
384 | 75411d23 | Stefan Hajnoczi | s->bs = bs; |
385 | eabba580 | Stefan Hajnoczi | QSIMPLEQ_INIT(&s->allocating_write_reqs); |
386 | 75411d23 | Stefan Hajnoczi | |
387 | 75411d23 | Stefan Hajnoczi | ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header)); |
388 | 75411d23 | Stefan Hajnoczi | if (ret < 0) { |
389 | 75411d23 | Stefan Hajnoczi | return ret;
|
390 | 75411d23 | Stefan Hajnoczi | } |
391 | 75411d23 | Stefan Hajnoczi | qed_header_le_to_cpu(&le_header, &s->header); |
392 | 75411d23 | Stefan Hajnoczi | |
393 | 75411d23 | Stefan Hajnoczi | if (s->header.magic != QED_MAGIC) {
|
394 | 15bac0d5 | Stefan Weil | return -EMEDIUMTYPE;
|
395 | 75411d23 | Stefan Hajnoczi | } |
396 | 75411d23 | Stefan Hajnoczi | if (s->header.features & ~QED_FEATURE_MASK) {
|
397 | 10b758e8 | Kevin Wolf | /* image uses unsupported feature bits */
|
398 | 10b758e8 | Kevin Wolf | char buf[64]; |
399 | 10b758e8 | Kevin Wolf | snprintf(buf, sizeof(buf), "%" PRIx64, |
400 | 10b758e8 | Kevin Wolf | s->header.features & ~QED_FEATURE_MASK); |
401 | 10b758e8 | Kevin Wolf | qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, |
402 | 10b758e8 | Kevin Wolf | bs->device_name, "QED", buf);
|
403 | 10b758e8 | Kevin Wolf | return -ENOTSUP;
|
404 | 75411d23 | Stefan Hajnoczi | } |
405 | 75411d23 | Stefan Hajnoczi | if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
|
406 | 75411d23 | Stefan Hajnoczi | return -EINVAL;
|
407 | 75411d23 | Stefan Hajnoczi | } |
408 | 75411d23 | Stefan Hajnoczi | |
409 | 75411d23 | Stefan Hajnoczi | /* Round down file size to the last cluster */
|
410 | 75411d23 | Stefan Hajnoczi | file_size = bdrv_getlength(bs->file); |
411 | 75411d23 | Stefan Hajnoczi | if (file_size < 0) { |
412 | 75411d23 | Stefan Hajnoczi | return file_size;
|
413 | 75411d23 | Stefan Hajnoczi | } |
414 | 75411d23 | Stefan Hajnoczi | s->file_size = qed_start_of_cluster(s, file_size); |
415 | 75411d23 | Stefan Hajnoczi | |
416 | 75411d23 | Stefan Hajnoczi | if (!qed_is_table_size_valid(s->header.table_size)) {
|
417 | 75411d23 | Stefan Hajnoczi | return -EINVAL;
|
418 | 75411d23 | Stefan Hajnoczi | } |
419 | 75411d23 | Stefan Hajnoczi | if (!qed_is_image_size_valid(s->header.image_size,
|
420 | 75411d23 | Stefan Hajnoczi | s->header.cluster_size, |
421 | 75411d23 | Stefan Hajnoczi | s->header.table_size)) { |
422 | 75411d23 | Stefan Hajnoczi | return -EINVAL;
|
423 | 75411d23 | Stefan Hajnoczi | } |
424 | 75411d23 | Stefan Hajnoczi | if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
|
425 | 75411d23 | Stefan Hajnoczi | return -EINVAL;
|
426 | 75411d23 | Stefan Hajnoczi | } |
427 | 75411d23 | Stefan Hajnoczi | |
428 | 75411d23 | Stefan Hajnoczi | s->table_nelems = (s->header.cluster_size * s->header.table_size) / |
429 | 75411d23 | Stefan Hajnoczi | sizeof(uint64_t);
|
430 | 75411d23 | Stefan Hajnoczi | s->l2_shift = ffs(s->header.cluster_size) - 1;
|
431 | 75411d23 | Stefan Hajnoczi | s->l2_mask = s->table_nelems - 1;
|
432 | 75411d23 | Stefan Hajnoczi | s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
|
433 | 75411d23 | Stefan Hajnoczi | |
434 | 75411d23 | Stefan Hajnoczi | if ((s->header.features & QED_F_BACKING_FILE)) {
|
435 | 75411d23 | Stefan Hajnoczi | if ((uint64_t)s->header.backing_filename_offset +
|
436 | 75411d23 | Stefan Hajnoczi | s->header.backing_filename_size > |
437 | 75411d23 | Stefan Hajnoczi | s->header.cluster_size * s->header.header_size) { |
438 | 75411d23 | Stefan Hajnoczi | return -EINVAL;
|
439 | 75411d23 | Stefan Hajnoczi | } |
440 | 75411d23 | Stefan Hajnoczi | |
441 | 75411d23 | Stefan Hajnoczi | ret = qed_read_string(bs->file, s->header.backing_filename_offset, |
442 | 75411d23 | Stefan Hajnoczi | s->header.backing_filename_size, bs->backing_file, |
443 | 75411d23 | Stefan Hajnoczi | sizeof(bs->backing_file));
|
444 | 75411d23 | Stefan Hajnoczi | if (ret < 0) { |
445 | 75411d23 | Stefan Hajnoczi | return ret;
|
446 | 75411d23 | Stefan Hajnoczi | } |
447 | 75411d23 | Stefan Hajnoczi | |
448 | 75411d23 | Stefan Hajnoczi | if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
|
449 | 75411d23 | Stefan Hajnoczi | pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw"); |
450 | 75411d23 | Stefan Hajnoczi | } |
451 | 75411d23 | Stefan Hajnoczi | } |
452 | 75411d23 | Stefan Hajnoczi | |
453 | 75411d23 | Stefan Hajnoczi | /* Reset unknown autoclear feature bits. This is a backwards
|
454 | 75411d23 | Stefan Hajnoczi | * compatibility mechanism that allows images to be opened by older
|
455 | 75411d23 | Stefan Hajnoczi | * programs, which "knock out" unknown feature bits. When an image is
|
456 | 75411d23 | Stefan Hajnoczi | * opened by a newer program again it can detect that the autoclear
|
457 | 75411d23 | Stefan Hajnoczi | * feature is no longer valid.
|
458 | 75411d23 | Stefan Hajnoczi | */
|
459 | 75411d23 | Stefan Hajnoczi | if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 && |
460 | 2d1f3c23 | Benoît Canet | !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) { |
461 | 75411d23 | Stefan Hajnoczi | s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK; |
462 | 75411d23 | Stefan Hajnoczi | |
463 | 75411d23 | Stefan Hajnoczi | ret = qed_write_header_sync(s); |
464 | 75411d23 | Stefan Hajnoczi | if (ret) {
|
465 | 75411d23 | Stefan Hajnoczi | return ret;
|
466 | 75411d23 | Stefan Hajnoczi | } |
467 | 75411d23 | Stefan Hajnoczi | |
468 | 75411d23 | Stefan Hajnoczi | /* From here on only known autoclear feature bits are valid */
|
469 | 75411d23 | Stefan Hajnoczi | bdrv_flush(bs->file); |
470 | 75411d23 | Stefan Hajnoczi | } |
471 | 75411d23 | Stefan Hajnoczi | |
472 | 298800ca | Stefan Hajnoczi | s->l1_table = qed_alloc_table(s); |
473 | 298800ca | Stefan Hajnoczi | qed_init_l2_cache(&s->l2_cache); |
474 | 298800ca | Stefan Hajnoczi | |
475 | 298800ca | Stefan Hajnoczi | ret = qed_read_l1_table_sync(s); |
476 | 298800ca | Stefan Hajnoczi | if (ret) {
|
477 | 01979a98 | Stefan Hajnoczi | goto out;
|
478 | 01979a98 | Stefan Hajnoczi | } |
479 | 01979a98 | Stefan Hajnoczi | |
480 | 01979a98 | Stefan Hajnoczi | /* If image was not closed cleanly, check consistency */
|
481 | 058f8f16 | Stefan Hajnoczi | if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) {
|
482 | 01979a98 | Stefan Hajnoczi | /* Read-only images cannot be fixed. There is no risk of corruption
|
483 | 01979a98 | Stefan Hajnoczi | * since write operations are not possible. Therefore, allow
|
484 | 01979a98 | Stefan Hajnoczi | * potentially inconsistent images to be opened read-only. This can
|
485 | 01979a98 | Stefan Hajnoczi | * aid data recovery from an otherwise inconsistent image.
|
486 | 01979a98 | Stefan Hajnoczi | */
|
487 | 2d1f3c23 | Benoît Canet | if (!bdrv_is_read_only(bs->file) &&
|
488 | 2d1f3c23 | Benoît Canet | !(flags & BDRV_O_INCOMING)) { |
489 | 01979a98 | Stefan Hajnoczi | BdrvCheckResult result = {0};
|
490 | 01979a98 | Stefan Hajnoczi | |
491 | 01979a98 | Stefan Hajnoczi | ret = qed_check(s, &result, true);
|
492 | 6f321e93 | Stefan Hajnoczi | if (ret) {
|
493 | 6f321e93 | Stefan Hajnoczi | goto out;
|
494 | 6f321e93 | Stefan Hajnoczi | } |
495 | 01979a98 | Stefan Hajnoczi | } |
496 | 01979a98 | Stefan Hajnoczi | } |
497 | 01979a98 | Stefan Hajnoczi | |
498 | bc72ad67 | Alex Bligh | s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, |
499 | 6f321e93 | Stefan Hajnoczi | qed_need_check_timer_cb, s); |
500 | 6f321e93 | Stefan Hajnoczi | |
501 | 01979a98 | Stefan Hajnoczi | out:
|
502 | 01979a98 | Stefan Hajnoczi | if (ret) {
|
503 | 298800ca | Stefan Hajnoczi | qed_free_l2_cache(&s->l2_cache); |
504 | 298800ca | Stefan Hajnoczi | qemu_vfree(s->l1_table); |
505 | 298800ca | Stefan Hajnoczi | } |
506 | 75411d23 | Stefan Hajnoczi | return ret;
|
507 | 75411d23 | Stefan Hajnoczi | } |
508 | 75411d23 | Stefan Hajnoczi | |
509 | f9cb20f1 | Jeff Cody | /* We have nothing to do for QED reopen, stubs just return
|
510 | f9cb20f1 | Jeff Cody | * success */
|
511 | f9cb20f1 | Jeff Cody | static int bdrv_qed_reopen_prepare(BDRVReopenState *state, |
512 | f9cb20f1 | Jeff Cody | BlockReopenQueue *queue, Error **errp) |
513 | f9cb20f1 | Jeff Cody | { |
514 | f9cb20f1 | Jeff Cody | return 0; |
515 | f9cb20f1 | Jeff Cody | } |
516 | f9cb20f1 | Jeff Cody | |
517 | 75411d23 | Stefan Hajnoczi | static void bdrv_qed_close(BlockDriverState *bs) |
518 | 75411d23 | Stefan Hajnoczi | { |
519 | 298800ca | Stefan Hajnoczi | BDRVQEDState *s = bs->opaque; |
520 | 298800ca | Stefan Hajnoczi | |
521 | 6f321e93 | Stefan Hajnoczi | qed_cancel_need_check_timer(s); |
522 | bc72ad67 | Alex Bligh | timer_free(s->need_check_timer); |
523 | 6f321e93 | Stefan Hajnoczi | |
524 | 01979a98 | Stefan Hajnoczi | /* Ensure writes reach stable storage */
|
525 | 01979a98 | Stefan Hajnoczi | bdrv_flush(bs->file); |
526 | 01979a98 | Stefan Hajnoczi | |
527 | 01979a98 | Stefan Hajnoczi | /* Clean shutdown, no check required on next open */
|
528 | 01979a98 | Stefan Hajnoczi | if (s->header.features & QED_F_NEED_CHECK) {
|
529 | 01979a98 | Stefan Hajnoczi | s->header.features &= ~QED_F_NEED_CHECK; |
530 | 01979a98 | Stefan Hajnoczi | qed_write_header_sync(s); |
531 | 01979a98 | Stefan Hajnoczi | } |
532 | 01979a98 | Stefan Hajnoczi | |
533 | 298800ca | Stefan Hajnoczi | qed_free_l2_cache(&s->l2_cache); |
534 | 298800ca | Stefan Hajnoczi | qemu_vfree(s->l1_table); |
535 | 75411d23 | Stefan Hajnoczi | } |
536 | 75411d23 | Stefan Hajnoczi | |
537 | 75411d23 | Stefan Hajnoczi | static int qed_create(const char *filename, uint32_t cluster_size, |
538 | 75411d23 | Stefan Hajnoczi | uint64_t image_size, uint32_t table_size, |
539 | 75411d23 | Stefan Hajnoczi | const char *backing_file, const char *backing_fmt) |
540 | 75411d23 | Stefan Hajnoczi | { |
541 | 75411d23 | Stefan Hajnoczi | QEDHeader header = { |
542 | 75411d23 | Stefan Hajnoczi | .magic = QED_MAGIC, |
543 | 75411d23 | Stefan Hajnoczi | .cluster_size = cluster_size, |
544 | 75411d23 | Stefan Hajnoczi | .table_size = table_size, |
545 | 75411d23 | Stefan Hajnoczi | .header_size = 1,
|
546 | 75411d23 | Stefan Hajnoczi | .features = 0,
|
547 | 75411d23 | Stefan Hajnoczi | .compat_features = 0,
|
548 | 75411d23 | Stefan Hajnoczi | .l1_table_offset = cluster_size, |
549 | 75411d23 | Stefan Hajnoczi | .image_size = image_size, |
550 | 75411d23 | Stefan Hajnoczi | }; |
551 | 75411d23 | Stefan Hajnoczi | QEDHeader le_header; |
552 | 75411d23 | Stefan Hajnoczi | uint8_t *l1_table = NULL;
|
553 | 75411d23 | Stefan Hajnoczi | size_t l1_size = header.cluster_size * header.table_size; |
554 | 34b5d2c6 | Max Reitz | Error *local_err = NULL;
|
555 | 75411d23 | Stefan Hajnoczi | int ret = 0; |
556 | 75411d23 | Stefan Hajnoczi | BlockDriverState *bs = NULL;
|
557 | 75411d23 | Stefan Hajnoczi | |
558 | cc84d90f | Max Reitz | ret = bdrv_create_file(filename, NULL, &local_err);
|
559 | 75411d23 | Stefan Hajnoczi | if (ret < 0) { |
560 | cc84d90f | Max Reitz | qerror_report_err(local_err); |
561 | cc84d90f | Max Reitz | error_free(local_err); |
562 | 75411d23 | Stefan Hajnoczi | return ret;
|
563 | 75411d23 | Stefan Hajnoczi | } |
564 | 75411d23 | Stefan Hajnoczi | |
565 | 34b5d2c6 | Max Reitz | ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB,
|
566 | 34b5d2c6 | Max Reitz | &local_err); |
567 | 75411d23 | Stefan Hajnoczi | if (ret < 0) { |
568 | 34b5d2c6 | Max Reitz | qerror_report_err(local_err); |
569 | 34b5d2c6 | Max Reitz | error_free(local_err); |
570 | 75411d23 | Stefan Hajnoczi | return ret;
|
571 | 75411d23 | Stefan Hajnoczi | } |
572 | 75411d23 | Stefan Hajnoczi | |
573 | c743849b | Stefan Hajnoczi | /* File must start empty and grow, check truncate is supported */
|
574 | c743849b | Stefan Hajnoczi | ret = bdrv_truncate(bs, 0);
|
575 | c743849b | Stefan Hajnoczi | if (ret < 0) { |
576 | c743849b | Stefan Hajnoczi | goto out;
|
577 | c743849b | Stefan Hajnoczi | } |
578 | c743849b | Stefan Hajnoczi | |
579 | 75411d23 | Stefan Hajnoczi | if (backing_file) {
|
580 | 75411d23 | Stefan Hajnoczi | header.features |= QED_F_BACKING_FILE; |
581 | 75411d23 | Stefan Hajnoczi | header.backing_filename_offset = sizeof(le_header);
|
582 | 75411d23 | Stefan Hajnoczi | header.backing_filename_size = strlen(backing_file); |
583 | 75411d23 | Stefan Hajnoczi | |
584 | 75411d23 | Stefan Hajnoczi | if (qed_fmt_is_raw(backing_fmt)) {
|
585 | 75411d23 | Stefan Hajnoczi | header.features |= QED_F_BACKING_FORMAT_NO_PROBE; |
586 | 75411d23 | Stefan Hajnoczi | } |
587 | 75411d23 | Stefan Hajnoczi | } |
588 | 75411d23 | Stefan Hajnoczi | |
589 | 75411d23 | Stefan Hajnoczi | qed_header_cpu_to_le(&header, &le_header); |
590 | 75411d23 | Stefan Hajnoczi | ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header)); |
591 | 75411d23 | Stefan Hajnoczi | if (ret < 0) { |
592 | 75411d23 | Stefan Hajnoczi | goto out;
|
593 | 75411d23 | Stefan Hajnoczi | } |
594 | 75411d23 | Stefan Hajnoczi | ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
|
595 | 75411d23 | Stefan Hajnoczi | header.backing_filename_size); |
596 | 75411d23 | Stefan Hajnoczi | if (ret < 0) { |
597 | 75411d23 | Stefan Hajnoczi | goto out;
|
598 | 75411d23 | Stefan Hajnoczi | } |
599 | 75411d23 | Stefan Hajnoczi | |
600 | 7267c094 | Anthony Liguori | l1_table = g_malloc0(l1_size); |
601 | 75411d23 | Stefan Hajnoczi | ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size); |
602 | 75411d23 | Stefan Hajnoczi | if (ret < 0) { |
603 | 75411d23 | Stefan Hajnoczi | goto out;
|
604 | 75411d23 | Stefan Hajnoczi | } |
605 | 75411d23 | Stefan Hajnoczi | |
606 | 75411d23 | Stefan Hajnoczi | ret = 0; /* success */ |
607 | 75411d23 | Stefan Hajnoczi | out:
|
608 | 7267c094 | Anthony Liguori | g_free(l1_table); |
609 | 4f6fd349 | Fam Zheng | bdrv_unref(bs); |
610 | 75411d23 | Stefan Hajnoczi | return ret;
|
611 | 75411d23 | Stefan Hajnoczi | } |
612 | 75411d23 | Stefan Hajnoczi | |
613 | d5124c00 | Max Reitz | static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options, |
614 | d5124c00 | Max Reitz | Error **errp) |
615 | 75411d23 | Stefan Hajnoczi | { |
616 | 75411d23 | Stefan Hajnoczi | uint64_t image_size = 0;
|
617 | 75411d23 | Stefan Hajnoczi | uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE; |
618 | 75411d23 | Stefan Hajnoczi | uint32_t table_size = QED_DEFAULT_TABLE_SIZE; |
619 | 75411d23 | Stefan Hajnoczi | const char *backing_file = NULL; |
620 | 75411d23 | Stefan Hajnoczi | const char *backing_fmt = NULL; |
621 | 75411d23 | Stefan Hajnoczi | |
622 | 75411d23 | Stefan Hajnoczi | while (options && options->name) {
|
623 | 75411d23 | Stefan Hajnoczi | if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
|
624 | 75411d23 | Stefan Hajnoczi | image_size = options->value.n; |
625 | 75411d23 | Stefan Hajnoczi | } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { |
626 | 75411d23 | Stefan Hajnoczi | backing_file = options->value.s; |
627 | 75411d23 | Stefan Hajnoczi | } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) { |
628 | 75411d23 | Stefan Hajnoczi | backing_fmt = options->value.s; |
629 | 75411d23 | Stefan Hajnoczi | } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { |
630 | 75411d23 | Stefan Hajnoczi | if (options->value.n) {
|
631 | 75411d23 | Stefan Hajnoczi | cluster_size = options->value.n; |
632 | 75411d23 | Stefan Hajnoczi | } |
633 | 75411d23 | Stefan Hajnoczi | } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) { |
634 | 75411d23 | Stefan Hajnoczi | if (options->value.n) {
|
635 | 75411d23 | Stefan Hajnoczi | table_size = options->value.n; |
636 | 75411d23 | Stefan Hajnoczi | } |
637 | 75411d23 | Stefan Hajnoczi | } |
638 | 75411d23 | Stefan Hajnoczi | options++; |
639 | 75411d23 | Stefan Hajnoczi | } |
640 | 75411d23 | Stefan Hajnoczi | |
641 | 75411d23 | Stefan Hajnoczi | if (!qed_is_cluster_size_valid(cluster_size)) {
|
642 | 75411d23 | Stefan Hajnoczi | fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
|
643 | 75411d23 | Stefan Hajnoczi | QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE); |
644 | 75411d23 | Stefan Hajnoczi | return -EINVAL;
|
645 | 75411d23 | Stefan Hajnoczi | } |
646 | 75411d23 | Stefan Hajnoczi | if (!qed_is_table_size_valid(table_size)) {
|
647 | 75411d23 | Stefan Hajnoczi | fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
|
648 | 75411d23 | Stefan Hajnoczi | QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE); |
649 | 75411d23 | Stefan Hajnoczi | return -EINVAL;
|
650 | 75411d23 | Stefan Hajnoczi | } |
651 | 75411d23 | Stefan Hajnoczi | if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
|
652 | 75411d23 | Stefan Hajnoczi | fprintf(stderr, "QED image size must be a non-zero multiple of "
|
653 | 75411d23 | Stefan Hajnoczi | "cluster size and less than %" PRIu64 " bytes\n", |
654 | 75411d23 | Stefan Hajnoczi | qed_max_image_size(cluster_size, table_size)); |
655 | 75411d23 | Stefan Hajnoczi | return -EINVAL;
|
656 | 75411d23 | Stefan Hajnoczi | } |
657 | 75411d23 | Stefan Hajnoczi | |
658 | 75411d23 | Stefan Hajnoczi | return qed_create(filename, cluster_size, image_size, table_size,
|
659 | 75411d23 | Stefan Hajnoczi | backing_file, backing_fmt); |
660 | 75411d23 | Stefan Hajnoczi | } |
661 | 75411d23 | Stefan Hajnoczi | |
662 | 298800ca | Stefan Hajnoczi | typedef struct { |
663 | 4bc74be9 | Paolo Bonzini | BlockDriverState *bs; |
664 | b7d5a5b8 | Stefan Hajnoczi | Coroutine *co; |
665 | 4bc74be9 | Paolo Bonzini | uint64_t pos; |
666 | 4bc74be9 | Paolo Bonzini | int64_t status; |
667 | 298800ca | Stefan Hajnoczi | int *pnum;
|
668 | 298800ca | Stefan Hajnoczi | } QEDIsAllocatedCB; |
669 | 298800ca | Stefan Hajnoczi | |
670 | 298800ca | Stefan Hajnoczi | static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len) |
671 | 298800ca | Stefan Hajnoczi | { |
672 | 298800ca | Stefan Hajnoczi | QEDIsAllocatedCB *cb = opaque; |
673 | 4bc74be9 | Paolo Bonzini | BDRVQEDState *s = cb->bs->opaque; |
674 | 298800ca | Stefan Hajnoczi | *cb->pnum = len / BDRV_SECTOR_SIZE; |
675 | 4bc74be9 | Paolo Bonzini | switch (ret) {
|
676 | 4bc74be9 | Paolo Bonzini | case QED_CLUSTER_FOUND:
|
677 | 4bc74be9 | Paolo Bonzini | offset |= qed_offset_into_cluster(s, cb->pos); |
678 | 4bc74be9 | Paolo Bonzini | cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; |
679 | 4bc74be9 | Paolo Bonzini | break;
|
680 | 4bc74be9 | Paolo Bonzini | case QED_CLUSTER_ZERO:
|
681 | 4bc74be9 | Paolo Bonzini | cb->status = BDRV_BLOCK_ZERO; |
682 | 4bc74be9 | Paolo Bonzini | break;
|
683 | 4bc74be9 | Paolo Bonzini | case QED_CLUSTER_L2:
|
684 | 4bc74be9 | Paolo Bonzini | case QED_CLUSTER_L1:
|
685 | 4bc74be9 | Paolo Bonzini | cb->status = 0;
|
686 | 4bc74be9 | Paolo Bonzini | break;
|
687 | 4bc74be9 | Paolo Bonzini | default:
|
688 | 4bc74be9 | Paolo Bonzini | assert(ret < 0);
|
689 | 4bc74be9 | Paolo Bonzini | cb->status = ret; |
690 | 4bc74be9 | Paolo Bonzini | break;
|
691 | 4bc74be9 | Paolo Bonzini | } |
692 | 4bc74be9 | Paolo Bonzini | |
693 | b7d5a5b8 | Stefan Hajnoczi | if (cb->co) {
|
694 | b7d5a5b8 | Stefan Hajnoczi | qemu_coroutine_enter(cb->co, NULL);
|
695 | b7d5a5b8 | Stefan Hajnoczi | } |
696 | 298800ca | Stefan Hajnoczi | } |
697 | 298800ca | Stefan Hajnoczi | |
698 | b6b8a333 | Paolo Bonzini | static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
|
699 | b7d5a5b8 | Stefan Hajnoczi | int64_t sector_num, |
700 | b7d5a5b8 | Stefan Hajnoczi | int nb_sectors, int *pnum) |
701 | 75411d23 | Stefan Hajnoczi | { |
702 | 298800ca | Stefan Hajnoczi | BDRVQEDState *s = bs->opaque; |
703 | 298800ca | Stefan Hajnoczi | size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE; |
704 | 298800ca | Stefan Hajnoczi | QEDIsAllocatedCB cb = { |
705 | 4bc74be9 | Paolo Bonzini | .bs = bs, |
706 | 4bc74be9 | Paolo Bonzini | .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE, |
707 | 4bc74be9 | Paolo Bonzini | .status = BDRV_BLOCK_OFFSET_MASK, |
708 | 298800ca | Stefan Hajnoczi | .pnum = pnum, |
709 | 298800ca | Stefan Hajnoczi | }; |
710 | 298800ca | Stefan Hajnoczi | QEDRequest request = { .l2_table = NULL };
|
711 | 298800ca | Stefan Hajnoczi | |
712 | 4bc74be9 | Paolo Bonzini | qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb); |
713 | 298800ca | Stefan Hajnoczi | |
714 | b7d5a5b8 | Stefan Hajnoczi | /* Now sleep if the callback wasn't invoked immediately */
|
715 | 4bc74be9 | Paolo Bonzini | while (cb.status == BDRV_BLOCK_OFFSET_MASK) {
|
716 | b7d5a5b8 | Stefan Hajnoczi | cb.co = qemu_coroutine_self(); |
717 | b7d5a5b8 | Stefan Hajnoczi | qemu_coroutine_yield(); |
718 | 298800ca | Stefan Hajnoczi | } |
719 | 298800ca | Stefan Hajnoczi | |
720 | 298800ca | Stefan Hajnoczi | qed_unref_l2_cache_entry(request.l2_table); |
721 | 298800ca | Stefan Hajnoczi | |
722 | 4bc74be9 | Paolo Bonzini | return cb.status;
|
723 | 75411d23 | Stefan Hajnoczi | } |
724 | 75411d23 | Stefan Hajnoczi | |
725 | 75411d23 | Stefan Hajnoczi | static int bdrv_qed_make_empty(BlockDriverState *bs) |
726 | 75411d23 | Stefan Hajnoczi | { |
727 | 75411d23 | Stefan Hajnoczi | return -ENOTSUP;
|
728 | 75411d23 | Stefan Hajnoczi | } |
729 | 75411d23 | Stefan Hajnoczi | |
730 | eabba580 | Stefan Hajnoczi | static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
|
731 | eabba580 | Stefan Hajnoczi | { |
732 | eabba580 | Stefan Hajnoczi | return acb->common.bs->opaque;
|
733 | eabba580 | Stefan Hajnoczi | } |
734 | eabba580 | Stefan Hajnoczi | |
735 | eabba580 | Stefan Hajnoczi | /**
|
736 | eabba580 | Stefan Hajnoczi | * Read from the backing file or zero-fill if no backing file
|
737 | eabba580 | Stefan Hajnoczi | *
|
738 | eabba580 | Stefan Hajnoczi | * @s: QED state
|
739 | eabba580 | Stefan Hajnoczi | * @pos: Byte position in device
|
740 | eabba580 | Stefan Hajnoczi | * @qiov: Destination I/O vector
|
741 | eabba580 | Stefan Hajnoczi | * @cb: Completion function
|
742 | eabba580 | Stefan Hajnoczi | * @opaque: User data for completion function
|
743 | eabba580 | Stefan Hajnoczi | *
|
744 | eabba580 | Stefan Hajnoczi | * This function reads qiov->size bytes starting at pos from the backing file.
|
745 | eabba580 | Stefan Hajnoczi | * If there is no backing file then zeroes are read.
|
746 | eabba580 | Stefan Hajnoczi | */
|
747 | eabba580 | Stefan Hajnoczi | static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, |
748 | eabba580 | Stefan Hajnoczi | QEMUIOVector *qiov, |
749 | eabba580 | Stefan Hajnoczi | BlockDriverCompletionFunc *cb, void *opaque)
|
750 | eabba580 | Stefan Hajnoczi | { |
751 | eabba580 | Stefan Hajnoczi | uint64_t backing_length = 0;
|
752 | eabba580 | Stefan Hajnoczi | size_t size; |
753 | eabba580 | Stefan Hajnoczi | |
754 | eabba580 | Stefan Hajnoczi | /* If there is a backing file, get its length. Treat the absence of a
|
755 | eabba580 | Stefan Hajnoczi | * backing file like a zero length backing file.
|
756 | eabba580 | Stefan Hajnoczi | */
|
757 | eabba580 | Stefan Hajnoczi | if (s->bs->backing_hd) {
|
758 | eabba580 | Stefan Hajnoczi | int64_t l = bdrv_getlength(s->bs->backing_hd); |
759 | eabba580 | Stefan Hajnoczi | if (l < 0) { |
760 | eabba580 | Stefan Hajnoczi | cb(opaque, l); |
761 | eabba580 | Stefan Hajnoczi | return;
|
762 | eabba580 | Stefan Hajnoczi | } |
763 | eabba580 | Stefan Hajnoczi | backing_length = l; |
764 | eabba580 | Stefan Hajnoczi | } |
765 | eabba580 | Stefan Hajnoczi | |
766 | eabba580 | Stefan Hajnoczi | /* Zero all sectors if reading beyond the end of the backing file */
|
767 | eabba580 | Stefan Hajnoczi | if (pos >= backing_length ||
|
768 | eabba580 | Stefan Hajnoczi | pos + qiov->size > backing_length) { |
769 | 3d9b4925 | Michael Tokarev | qemu_iovec_memset(qiov, 0, 0, qiov->size); |
770 | eabba580 | Stefan Hajnoczi | } |
771 | eabba580 | Stefan Hajnoczi | |
772 | eabba580 | Stefan Hajnoczi | /* Complete now if there are no backing file sectors to read */
|
773 | eabba580 | Stefan Hajnoczi | if (pos >= backing_length) {
|
774 | eabba580 | Stefan Hajnoczi | cb(opaque, 0);
|
775 | eabba580 | Stefan Hajnoczi | return;
|
776 | eabba580 | Stefan Hajnoczi | } |
777 | eabba580 | Stefan Hajnoczi | |
778 | eabba580 | Stefan Hajnoczi | /* If the read straddles the end of the backing file, shorten it */
|
779 | eabba580 | Stefan Hajnoczi | size = MIN((uint64_t)backing_length - pos, qiov->size); |
780 | eabba580 | Stefan Hajnoczi | |
781 | 820100fd | Paolo Bonzini | BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); |
782 | ad54ae80 | Paolo Bonzini | bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE, |
783 | ad54ae80 | Paolo Bonzini | qiov, size / BDRV_SECTOR_SIZE, cb, opaque); |
784 | eabba580 | Stefan Hajnoczi | } |
785 | eabba580 | Stefan Hajnoczi | |
786 | eabba580 | Stefan Hajnoczi | typedef struct { |
787 | eabba580 | Stefan Hajnoczi | GenericCB gencb; |
788 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s; |
789 | eabba580 | Stefan Hajnoczi | QEMUIOVector qiov; |
790 | eabba580 | Stefan Hajnoczi | struct iovec iov;
|
791 | eabba580 | Stefan Hajnoczi | uint64_t offset; |
792 | eabba580 | Stefan Hajnoczi | } CopyFromBackingFileCB; |
793 | eabba580 | Stefan Hajnoczi | |
794 | eabba580 | Stefan Hajnoczi | static void qed_copy_from_backing_file_cb(void *opaque, int ret) |
795 | eabba580 | Stefan Hajnoczi | { |
796 | eabba580 | Stefan Hajnoczi | CopyFromBackingFileCB *copy_cb = opaque; |
797 | eabba580 | Stefan Hajnoczi | qemu_vfree(copy_cb->iov.iov_base); |
798 | eabba580 | Stefan Hajnoczi | gencb_complete(©_cb->gencb, ret); |
799 | eabba580 | Stefan Hajnoczi | } |
800 | eabba580 | Stefan Hajnoczi | |
801 | eabba580 | Stefan Hajnoczi | static void qed_copy_from_backing_file_write(void *opaque, int ret) |
802 | eabba580 | Stefan Hajnoczi | { |
803 | eabba580 | Stefan Hajnoczi | CopyFromBackingFileCB *copy_cb = opaque; |
804 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = copy_cb->s; |
805 | eabba580 | Stefan Hajnoczi | |
806 | eabba580 | Stefan Hajnoczi | if (ret) {
|
807 | eabba580 | Stefan Hajnoczi | qed_copy_from_backing_file_cb(copy_cb, ret); |
808 | eabba580 | Stefan Hajnoczi | return;
|
809 | eabba580 | Stefan Hajnoczi | } |
810 | eabba580 | Stefan Hajnoczi | |
811 | eabba580 | Stefan Hajnoczi | BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE); |
812 | ad54ae80 | Paolo Bonzini | bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE, |
813 | ad54ae80 | Paolo Bonzini | ©_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE, |
814 | ad54ae80 | Paolo Bonzini | qed_copy_from_backing_file_cb, copy_cb); |
815 | eabba580 | Stefan Hajnoczi | } |
816 | eabba580 | Stefan Hajnoczi | |
817 | eabba580 | Stefan Hajnoczi | /**
|
818 | eabba580 | Stefan Hajnoczi | * Copy data from backing file into the image
|
819 | eabba580 | Stefan Hajnoczi | *
|
820 | eabba580 | Stefan Hajnoczi | * @s: QED state
|
821 | eabba580 | Stefan Hajnoczi | * @pos: Byte position in device
|
822 | eabba580 | Stefan Hajnoczi | * @len: Number of bytes
|
823 | eabba580 | Stefan Hajnoczi | * @offset: Byte offset in image file
|
824 | eabba580 | Stefan Hajnoczi | * @cb: Completion function
|
825 | eabba580 | Stefan Hajnoczi | * @opaque: User data for completion function
|
826 | eabba580 | Stefan Hajnoczi | */
|
827 | eabba580 | Stefan Hajnoczi | static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, |
828 | eabba580 | Stefan Hajnoczi | uint64_t len, uint64_t offset, |
829 | eabba580 | Stefan Hajnoczi | BlockDriverCompletionFunc *cb, |
830 | eabba580 | Stefan Hajnoczi | void *opaque)
|
831 | eabba580 | Stefan Hajnoczi | { |
832 | eabba580 | Stefan Hajnoczi | CopyFromBackingFileCB *copy_cb; |
833 | eabba580 | Stefan Hajnoczi | |
834 | eabba580 | Stefan Hajnoczi | /* Skip copy entirely if there is no work to do */
|
835 | eabba580 | Stefan Hajnoczi | if (len == 0) { |
836 | eabba580 | Stefan Hajnoczi | cb(opaque, 0);
|
837 | eabba580 | Stefan Hajnoczi | return;
|
838 | eabba580 | Stefan Hajnoczi | } |
839 | eabba580 | Stefan Hajnoczi | |
840 | eabba580 | Stefan Hajnoczi | copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
|
841 | eabba580 | Stefan Hajnoczi | copy_cb->s = s; |
842 | eabba580 | Stefan Hajnoczi | copy_cb->offset = offset; |
843 | eabba580 | Stefan Hajnoczi | copy_cb->iov.iov_base = qemu_blockalign(s->bs, len); |
844 | eabba580 | Stefan Hajnoczi | copy_cb->iov.iov_len = len; |
845 | eabba580 | Stefan Hajnoczi | qemu_iovec_init_external(©_cb->qiov, ©_cb->iov, 1);
|
846 | eabba580 | Stefan Hajnoczi | |
847 | eabba580 | Stefan Hajnoczi | qed_read_backing_file(s, pos, ©_cb->qiov, |
848 | eabba580 | Stefan Hajnoczi | qed_copy_from_backing_file_write, copy_cb); |
849 | eabba580 | Stefan Hajnoczi | } |
850 | eabba580 | Stefan Hajnoczi | |
851 | eabba580 | Stefan Hajnoczi | /**
|
852 | eabba580 | Stefan Hajnoczi | * Link one or more contiguous clusters into a table
|
853 | eabba580 | Stefan Hajnoczi | *
|
854 | eabba580 | Stefan Hajnoczi | * @s: QED state
|
855 | eabba580 | Stefan Hajnoczi | * @table: L2 table
|
856 | eabba580 | Stefan Hajnoczi | * @index: First cluster index
|
857 | eabba580 | Stefan Hajnoczi | * @n: Number of contiguous clusters
|
858 | 21df65b6 | Anthony Liguori | * @cluster: First cluster offset
|
859 | 21df65b6 | Anthony Liguori | *
|
860 | 21df65b6 | Anthony Liguori | * The cluster offset may be an allocated byte offset in the image file, the
|
861 | 21df65b6 | Anthony Liguori | * zero cluster marker, or the unallocated cluster marker.
|
862 | eabba580 | Stefan Hajnoczi | */
|
863 | eabba580 | Stefan Hajnoczi | static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, |
864 | eabba580 | Stefan Hajnoczi | unsigned int n, uint64_t cluster) |
865 | eabba580 | Stefan Hajnoczi | { |
866 | eabba580 | Stefan Hajnoczi | int i;
|
867 | eabba580 | Stefan Hajnoczi | for (i = index; i < index + n; i++) {
|
868 | eabba580 | Stefan Hajnoczi | table->offsets[i] = cluster; |
869 | 21df65b6 | Anthony Liguori | if (!qed_offset_is_unalloc_cluster(cluster) &&
|
870 | 21df65b6 | Anthony Liguori | !qed_offset_is_zero_cluster(cluster)) { |
871 | 21df65b6 | Anthony Liguori | cluster += s->header.cluster_size; |
872 | 21df65b6 | Anthony Liguori | } |
873 | eabba580 | Stefan Hajnoczi | } |
874 | eabba580 | Stefan Hajnoczi | } |
875 | eabba580 | Stefan Hajnoczi | |
876 | eabba580 | Stefan Hajnoczi | static void qed_aio_complete_bh(void *opaque) |
877 | eabba580 | Stefan Hajnoczi | { |
878 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
879 | eabba580 | Stefan Hajnoczi | BlockDriverCompletionFunc *cb = acb->common.cb; |
880 | eabba580 | Stefan Hajnoczi | void *user_opaque = acb->common.opaque;
|
881 | eabba580 | Stefan Hajnoczi | int ret = acb->bh_ret;
|
882 | eabba580 | Stefan Hajnoczi | bool *finished = acb->finished;
|
883 | eabba580 | Stefan Hajnoczi | |
884 | eabba580 | Stefan Hajnoczi | qemu_bh_delete(acb->bh); |
885 | eabba580 | Stefan Hajnoczi | qemu_aio_release(acb); |
886 | eabba580 | Stefan Hajnoczi | |
887 | eabba580 | Stefan Hajnoczi | /* Invoke callback */
|
888 | eabba580 | Stefan Hajnoczi | cb(user_opaque, ret); |
889 | eabba580 | Stefan Hajnoczi | |
890 | eabba580 | Stefan Hajnoczi | /* Signal cancel completion */
|
891 | eabba580 | Stefan Hajnoczi | if (finished) {
|
892 | eabba580 | Stefan Hajnoczi | *finished = true;
|
893 | eabba580 | Stefan Hajnoczi | } |
894 | eabba580 | Stefan Hajnoczi | } |
895 | eabba580 | Stefan Hajnoczi | |
896 | eabba580 | Stefan Hajnoczi | static void qed_aio_complete(QEDAIOCB *acb, int ret) |
897 | eabba580 | Stefan Hajnoczi | { |
898 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
899 | eabba580 | Stefan Hajnoczi | |
900 | eabba580 | Stefan Hajnoczi | trace_qed_aio_complete(s, acb, ret); |
901 | eabba580 | Stefan Hajnoczi | |
902 | eabba580 | Stefan Hajnoczi | /* Free resources */
|
903 | eabba580 | Stefan Hajnoczi | qemu_iovec_destroy(&acb->cur_qiov); |
904 | eabba580 | Stefan Hajnoczi | qed_unref_l2_cache_entry(acb->request.l2_table); |
905 | eabba580 | Stefan Hajnoczi | |
906 | 0e71be19 | Stefan Hajnoczi | /* Free the buffer we may have allocated for zero writes */
|
907 | 0e71be19 | Stefan Hajnoczi | if (acb->flags & QED_AIOCB_ZERO) {
|
908 | 0e71be19 | Stefan Hajnoczi | qemu_vfree(acb->qiov->iov[0].iov_base);
|
909 | 0e71be19 | Stefan Hajnoczi | acb->qiov->iov[0].iov_base = NULL; |
910 | 0e71be19 | Stefan Hajnoczi | } |
911 | 0e71be19 | Stefan Hajnoczi | |
912 | eabba580 | Stefan Hajnoczi | /* Arrange for a bh to invoke the completion function */
|
913 | eabba580 | Stefan Hajnoczi | acb->bh_ret = ret; |
914 | eabba580 | Stefan Hajnoczi | acb->bh = qemu_bh_new(qed_aio_complete_bh, acb); |
915 | eabba580 | Stefan Hajnoczi | qemu_bh_schedule(acb->bh); |
916 | eabba580 | Stefan Hajnoczi | |
917 | eabba580 | Stefan Hajnoczi | /* Start next allocating write request waiting behind this one. Note that
|
918 | eabba580 | Stefan Hajnoczi | * requests enqueue themselves when they first hit an unallocated cluster
|
919 | eabba580 | Stefan Hajnoczi | * but they wait until the entire request is finished before waking up the
|
920 | eabba580 | Stefan Hajnoczi | * next request in the queue. This ensures that we don't cycle through
|
921 | eabba580 | Stefan Hajnoczi | * requests multiple times but rather finish one at a time completely.
|
922 | eabba580 | Stefan Hajnoczi | */
|
923 | eabba580 | Stefan Hajnoczi | if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
|
924 | eabba580 | Stefan Hajnoczi | QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next); |
925 | eabba580 | Stefan Hajnoczi | acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); |
926 | eabba580 | Stefan Hajnoczi | if (acb) {
|
927 | eabba580 | Stefan Hajnoczi | qed_aio_next_io(acb, 0);
|
928 | 6f321e93 | Stefan Hajnoczi | } else if (s->header.features & QED_F_NEED_CHECK) { |
929 | 6f321e93 | Stefan Hajnoczi | qed_start_need_check_timer(s); |
930 | eabba580 | Stefan Hajnoczi | } |
931 | eabba580 | Stefan Hajnoczi | } |
932 | eabba580 | Stefan Hajnoczi | } |
933 | eabba580 | Stefan Hajnoczi | |
934 | eabba580 | Stefan Hajnoczi | /**
|
935 | eabba580 | Stefan Hajnoczi | * Commit the current L2 table to the cache
|
936 | eabba580 | Stefan Hajnoczi | */
|
937 | eabba580 | Stefan Hajnoczi | static void qed_commit_l2_update(void *opaque, int ret) |
938 | eabba580 | Stefan Hajnoczi | { |
939 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
940 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
941 | eabba580 | Stefan Hajnoczi | CachedL2Table *l2_table = acb->request.l2_table; |
942 | e4fc8781 | Stefan Hajnoczi | uint64_t l2_offset = l2_table->offset; |
943 | eabba580 | Stefan Hajnoczi | |
944 | eabba580 | Stefan Hajnoczi | qed_commit_l2_cache_entry(&s->l2_cache, l2_table); |
945 | eabba580 | Stefan Hajnoczi | |
946 | eabba580 | Stefan Hajnoczi | /* This is guaranteed to succeed because we just committed the entry to the
|
947 | eabba580 | Stefan Hajnoczi | * cache.
|
948 | eabba580 | Stefan Hajnoczi | */
|
949 | e4fc8781 | Stefan Hajnoczi | acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset); |
950 | eabba580 | Stefan Hajnoczi | assert(acb->request.l2_table != NULL);
|
951 | eabba580 | Stefan Hajnoczi | |
952 | eabba580 | Stefan Hajnoczi | qed_aio_next_io(opaque, ret); |
953 | eabba580 | Stefan Hajnoczi | } |
954 | eabba580 | Stefan Hajnoczi | |
955 | eabba580 | Stefan Hajnoczi | /**
|
956 | eabba580 | Stefan Hajnoczi | * Update L1 table with new L2 table offset and write it out
|
957 | eabba580 | Stefan Hajnoczi | */
|
958 | eabba580 | Stefan Hajnoczi | static void qed_aio_write_l1_update(void *opaque, int ret) |
959 | eabba580 | Stefan Hajnoczi | { |
960 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
961 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
962 | eabba580 | Stefan Hajnoczi | int index;
|
963 | eabba580 | Stefan Hajnoczi | |
964 | eabba580 | Stefan Hajnoczi | if (ret) {
|
965 | eabba580 | Stefan Hajnoczi | qed_aio_complete(acb, ret); |
966 | eabba580 | Stefan Hajnoczi | return;
|
967 | eabba580 | Stefan Hajnoczi | } |
968 | eabba580 | Stefan Hajnoczi | |
969 | eabba580 | Stefan Hajnoczi | index = qed_l1_index(s, acb->cur_pos); |
970 | eabba580 | Stefan Hajnoczi | s->l1_table->offsets[index] = acb->request.l2_table->offset; |
971 | eabba580 | Stefan Hajnoczi | |
972 | eabba580 | Stefan Hajnoczi | qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
|
973 | eabba580 | Stefan Hajnoczi | } |
974 | eabba580 | Stefan Hajnoczi | |
975 | eabba580 | Stefan Hajnoczi | /**
|
976 | eabba580 | Stefan Hajnoczi | * Update L2 table with new cluster offsets and write them out
|
977 | eabba580 | Stefan Hajnoczi | */
|
978 | 0e71be19 | Stefan Hajnoczi | static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset) |
979 | eabba580 | Stefan Hajnoczi | { |
980 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
981 | eabba580 | Stefan Hajnoczi | bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
|
982 | eabba580 | Stefan Hajnoczi | int index;
|
983 | eabba580 | Stefan Hajnoczi | |
984 | eabba580 | Stefan Hajnoczi | if (ret) {
|
985 | eabba580 | Stefan Hajnoczi | goto err;
|
986 | eabba580 | Stefan Hajnoczi | } |
987 | eabba580 | Stefan Hajnoczi | |
988 | eabba580 | Stefan Hajnoczi | if (need_alloc) {
|
989 | eabba580 | Stefan Hajnoczi | qed_unref_l2_cache_entry(acb->request.l2_table); |
990 | eabba580 | Stefan Hajnoczi | acb->request.l2_table = qed_new_l2_table(s); |
991 | eabba580 | Stefan Hajnoczi | } |
992 | eabba580 | Stefan Hajnoczi | |
993 | eabba580 | Stefan Hajnoczi | index = qed_l2_index(s, acb->cur_pos); |
994 | eabba580 | Stefan Hajnoczi | qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters, |
995 | 0e71be19 | Stefan Hajnoczi | offset); |
996 | eabba580 | Stefan Hajnoczi | |
997 | eabba580 | Stefan Hajnoczi | if (need_alloc) {
|
998 | eabba580 | Stefan Hajnoczi | /* Write out the whole new L2 table */
|
999 | eabba580 | Stefan Hajnoczi | qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true, |
1000 | eabba580 | Stefan Hajnoczi | qed_aio_write_l1_update, acb); |
1001 | eabba580 | Stefan Hajnoczi | } else {
|
1002 | eabba580 | Stefan Hajnoczi | /* Write out only the updated part of the L2 table */
|
1003 | eabba580 | Stefan Hajnoczi | qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
|
1004 | eabba580 | Stefan Hajnoczi | qed_aio_next_io, acb); |
1005 | eabba580 | Stefan Hajnoczi | } |
1006 | eabba580 | Stefan Hajnoczi | return;
|
1007 | eabba580 | Stefan Hajnoczi | |
1008 | eabba580 | Stefan Hajnoczi | err:
|
1009 | eabba580 | Stefan Hajnoczi | qed_aio_complete(acb, ret); |
1010 | eabba580 | Stefan Hajnoczi | } |
1011 | eabba580 | Stefan Hajnoczi | |
1012 | 0e71be19 | Stefan Hajnoczi | static void qed_aio_write_l2_update_cb(void *opaque, int ret) |
1013 | 0e71be19 | Stefan Hajnoczi | { |
1014 | 0e71be19 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
1015 | 0e71be19 | Stefan Hajnoczi | qed_aio_write_l2_update(acb, ret, acb->cur_cluster); |
1016 | 0e71be19 | Stefan Hajnoczi | } |
1017 | 0e71be19 | Stefan Hajnoczi | |
1018 | eabba580 | Stefan Hajnoczi | /**
|
1019 | eabba580 | Stefan Hajnoczi | * Flush new data clusters before updating the L2 table
|
1020 | eabba580 | Stefan Hajnoczi | *
|
1021 | eabba580 | Stefan Hajnoczi | * This flush is necessary when a backing file is in use. A crash during an
|
1022 | eabba580 | Stefan Hajnoczi | * allocating write could result in empty clusters in the image. If the write
|
1023 | eabba580 | Stefan Hajnoczi | * only touched a subregion of the cluster, then backing image sectors have
|
1024 | eabba580 | Stefan Hajnoczi | * been lost in the untouched region. The solution is to flush after writing a
|
1025 | eabba580 | Stefan Hajnoczi | * new data cluster and before updating the L2 table.
|
1026 | eabba580 | Stefan Hajnoczi | */
|
1027 | eabba580 | Stefan Hajnoczi | static void qed_aio_write_flush_before_l2_update(void *opaque, int ret) |
1028 | eabba580 | Stefan Hajnoczi | { |
1029 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
1030 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
1031 | eabba580 | Stefan Hajnoczi | |
1032 | 0e71be19 | Stefan Hajnoczi | if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
|
1033 | eabba580 | Stefan Hajnoczi | qed_aio_complete(acb, -EIO); |
1034 | eabba580 | Stefan Hajnoczi | } |
1035 | eabba580 | Stefan Hajnoczi | } |
1036 | eabba580 | Stefan Hajnoczi | |
1037 | eabba580 | Stefan Hajnoczi | /**
|
1038 | eabba580 | Stefan Hajnoczi | * Write data to the image file
|
1039 | eabba580 | Stefan Hajnoczi | */
|
1040 | eabba580 | Stefan Hajnoczi | static void qed_aio_write_main(void *opaque, int ret) |
1041 | eabba580 | Stefan Hajnoczi | { |
1042 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
1043 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
1044 | eabba580 | Stefan Hajnoczi | uint64_t offset = acb->cur_cluster + |
1045 | eabba580 | Stefan Hajnoczi | qed_offset_into_cluster(s, acb->cur_pos); |
1046 | eabba580 | Stefan Hajnoczi | BlockDriverCompletionFunc *next_fn; |
1047 | eabba580 | Stefan Hajnoczi | |
1048 | eabba580 | Stefan Hajnoczi | trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size); |
1049 | eabba580 | Stefan Hajnoczi | |
1050 | eabba580 | Stefan Hajnoczi | if (ret) {
|
1051 | eabba580 | Stefan Hajnoczi | qed_aio_complete(acb, ret); |
1052 | eabba580 | Stefan Hajnoczi | return;
|
1053 | eabba580 | Stefan Hajnoczi | } |
1054 | eabba580 | Stefan Hajnoczi | |
1055 | eabba580 | Stefan Hajnoczi | if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
|
1056 | eabba580 | Stefan Hajnoczi | next_fn = qed_aio_next_io; |
1057 | eabba580 | Stefan Hajnoczi | } else {
|
1058 | eabba580 | Stefan Hajnoczi | if (s->bs->backing_hd) {
|
1059 | eabba580 | Stefan Hajnoczi | next_fn = qed_aio_write_flush_before_l2_update; |
1060 | eabba580 | Stefan Hajnoczi | } else {
|
1061 | 0e71be19 | Stefan Hajnoczi | next_fn = qed_aio_write_l2_update_cb; |
1062 | eabba580 | Stefan Hajnoczi | } |
1063 | eabba580 | Stefan Hajnoczi | } |
1064 | eabba580 | Stefan Hajnoczi | |
1065 | eabba580 | Stefan Hajnoczi | BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); |
1066 | ad54ae80 | Paolo Bonzini | bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, |
1067 | ad54ae80 | Paolo Bonzini | &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE, |
1068 | ad54ae80 | Paolo Bonzini | next_fn, acb); |
1069 | eabba580 | Stefan Hajnoczi | } |
1070 | eabba580 | Stefan Hajnoczi | |
1071 | eabba580 | Stefan Hajnoczi | /**
|
1072 | eabba580 | Stefan Hajnoczi | * Populate back untouched region of new data cluster
|
1073 | eabba580 | Stefan Hajnoczi | */
|
1074 | eabba580 | Stefan Hajnoczi | static void qed_aio_write_postfill(void *opaque, int ret) |
1075 | eabba580 | Stefan Hajnoczi | { |
1076 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
1077 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
1078 | eabba580 | Stefan Hajnoczi | uint64_t start = acb->cur_pos + acb->cur_qiov.size; |
1079 | eabba580 | Stefan Hajnoczi | uint64_t len = |
1080 | eabba580 | Stefan Hajnoczi | qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
|
1081 | eabba580 | Stefan Hajnoczi | uint64_t offset = acb->cur_cluster + |
1082 | eabba580 | Stefan Hajnoczi | qed_offset_into_cluster(s, acb->cur_pos) + |
1083 | eabba580 | Stefan Hajnoczi | acb->cur_qiov.size; |
1084 | eabba580 | Stefan Hajnoczi | |
1085 | eabba580 | Stefan Hajnoczi | if (ret) {
|
1086 | eabba580 | Stefan Hajnoczi | qed_aio_complete(acb, ret); |
1087 | eabba580 | Stefan Hajnoczi | return;
|
1088 | eabba580 | Stefan Hajnoczi | } |
1089 | eabba580 | Stefan Hajnoczi | |
1090 | eabba580 | Stefan Hajnoczi | trace_qed_aio_write_postfill(s, acb, start, len, offset); |
1091 | eabba580 | Stefan Hajnoczi | qed_copy_from_backing_file(s, start, len, offset, |
1092 | eabba580 | Stefan Hajnoczi | qed_aio_write_main, acb); |
1093 | eabba580 | Stefan Hajnoczi | } |
1094 | eabba580 | Stefan Hajnoczi | |
1095 | eabba580 | Stefan Hajnoczi | /**
|
1096 | eabba580 | Stefan Hajnoczi | * Populate front untouched region of new data cluster
|
1097 | eabba580 | Stefan Hajnoczi | */
|
1098 | eabba580 | Stefan Hajnoczi | static void qed_aio_write_prefill(void *opaque, int ret) |
1099 | eabba580 | Stefan Hajnoczi | { |
1100 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
1101 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
1102 | eabba580 | Stefan Hajnoczi | uint64_t start = qed_start_of_cluster(s, acb->cur_pos); |
1103 | eabba580 | Stefan Hajnoczi | uint64_t len = qed_offset_into_cluster(s, acb->cur_pos); |
1104 | eabba580 | Stefan Hajnoczi | |
1105 | eabba580 | Stefan Hajnoczi | trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); |
1106 | eabba580 | Stefan Hajnoczi | qed_copy_from_backing_file(s, start, len, acb->cur_cluster, |
1107 | eabba580 | Stefan Hajnoczi | qed_aio_write_postfill, acb); |
1108 | eabba580 | Stefan Hajnoczi | } |
1109 | eabba580 | Stefan Hajnoczi | |
1110 | eabba580 | Stefan Hajnoczi | /**
|
1111 | 0d09c797 | Stefan Hajnoczi | * Check if the QED_F_NEED_CHECK bit should be set during allocating write
|
1112 | 0d09c797 | Stefan Hajnoczi | */
|
1113 | 0d09c797 | Stefan Hajnoczi | static bool qed_should_set_need_check(BDRVQEDState *s) |
1114 | 0d09c797 | Stefan Hajnoczi | { |
1115 | 0d09c797 | Stefan Hajnoczi | /* The flush before L2 update path ensures consistency */
|
1116 | 0d09c797 | Stefan Hajnoczi | if (s->bs->backing_hd) {
|
1117 | 0d09c797 | Stefan Hajnoczi | return false; |
1118 | 0d09c797 | Stefan Hajnoczi | } |
1119 | 0d09c797 | Stefan Hajnoczi | |
1120 | 0d09c797 | Stefan Hajnoczi | return !(s->header.features & QED_F_NEED_CHECK);
|
1121 | 0d09c797 | Stefan Hajnoczi | } |
1122 | 0d09c797 | Stefan Hajnoczi | |
1123 | 0e71be19 | Stefan Hajnoczi | static void qed_aio_write_zero_cluster(void *opaque, int ret) |
1124 | 0e71be19 | Stefan Hajnoczi | { |
1125 | 0e71be19 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
1126 | 0e71be19 | Stefan Hajnoczi | |
1127 | 0e71be19 | Stefan Hajnoczi | if (ret) {
|
1128 | 0e71be19 | Stefan Hajnoczi | qed_aio_complete(acb, ret); |
1129 | 0e71be19 | Stefan Hajnoczi | return;
|
1130 | 0e71be19 | Stefan Hajnoczi | } |
1131 | 0e71be19 | Stefan Hajnoczi | |
1132 | 0e71be19 | Stefan Hajnoczi | qed_aio_write_l2_update(acb, 0, 1); |
1133 | 0e71be19 | Stefan Hajnoczi | } |
1134 | 0e71be19 | Stefan Hajnoczi | |
1135 | 0d09c797 | Stefan Hajnoczi | /**
|
1136 | eabba580 | Stefan Hajnoczi | * Write new data cluster
|
1137 | eabba580 | Stefan Hajnoczi | *
|
1138 | eabba580 | Stefan Hajnoczi | * @acb: Write request
|
1139 | eabba580 | Stefan Hajnoczi | * @len: Length in bytes
|
1140 | eabba580 | Stefan Hajnoczi | *
|
1141 | eabba580 | Stefan Hajnoczi | * This path is taken when writing to previously unallocated clusters.
|
1142 | eabba580 | Stefan Hajnoczi | */
|
1143 | eabba580 | Stefan Hajnoczi | static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) |
1144 | eabba580 | Stefan Hajnoczi | { |
1145 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
1146 | 0e71be19 | Stefan Hajnoczi | BlockDriverCompletionFunc *cb; |
1147 | eabba580 | Stefan Hajnoczi | |
1148 | 6f321e93 | Stefan Hajnoczi | /* Cancel timer when the first allocating request comes in */
|
1149 | 6f321e93 | Stefan Hajnoczi | if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
|
1150 | 6f321e93 | Stefan Hajnoczi | qed_cancel_need_check_timer(s); |
1151 | 6f321e93 | Stefan Hajnoczi | } |
1152 | 6f321e93 | Stefan Hajnoczi | |
1153 | eabba580 | Stefan Hajnoczi | /* Freeze this request if another allocating write is in progress */
|
1154 | eabba580 | Stefan Hajnoczi | if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
|
1155 | eabba580 | Stefan Hajnoczi | QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next); |
1156 | eabba580 | Stefan Hajnoczi | } |
1157 | 6f321e93 | Stefan Hajnoczi | if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
|
1158 | 6f321e93 | Stefan Hajnoczi | s->allocating_write_reqs_plugged) { |
1159 | eabba580 | Stefan Hajnoczi | return; /* wait for existing request to finish */ |
1160 | eabba580 | Stefan Hajnoczi | } |
1161 | eabba580 | Stefan Hajnoczi | |
1162 | eabba580 | Stefan Hajnoczi | acb->cur_nclusters = qed_bytes_to_clusters(s, |
1163 | eabba580 | Stefan Hajnoczi | qed_offset_into_cluster(s, acb->cur_pos) + len); |
1164 | 1b093c48 | Michael Tokarev | qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); |
1165 | eabba580 | Stefan Hajnoczi | |
1166 | 0e71be19 | Stefan Hajnoczi | if (acb->flags & QED_AIOCB_ZERO) {
|
1167 | 0e71be19 | Stefan Hajnoczi | /* Skip ahead if the clusters are already zero */
|
1168 | 0e71be19 | Stefan Hajnoczi | if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
|
1169 | 0e71be19 | Stefan Hajnoczi | qed_aio_next_io(acb, 0);
|
1170 | 0e71be19 | Stefan Hajnoczi | return;
|
1171 | 0e71be19 | Stefan Hajnoczi | } |
1172 | 0e71be19 | Stefan Hajnoczi | |
1173 | 0e71be19 | Stefan Hajnoczi | cb = qed_aio_write_zero_cluster; |
1174 | 0e71be19 | Stefan Hajnoczi | } else {
|
1175 | 0e71be19 | Stefan Hajnoczi | cb = qed_aio_write_prefill; |
1176 | 0e71be19 | Stefan Hajnoczi | acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); |
1177 | 0e71be19 | Stefan Hajnoczi | } |
1178 | 0e71be19 | Stefan Hajnoczi | |
1179 | 0d09c797 | Stefan Hajnoczi | if (qed_should_set_need_check(s)) {
|
1180 | 0d09c797 | Stefan Hajnoczi | s->header.features |= QED_F_NEED_CHECK; |
1181 | 0e71be19 | Stefan Hajnoczi | qed_write_header(s, cb, acb); |
1182 | 0d09c797 | Stefan Hajnoczi | } else {
|
1183 | 0e71be19 | Stefan Hajnoczi | cb(acb, 0);
|
1184 | 01979a98 | Stefan Hajnoczi | } |
1185 | eabba580 | Stefan Hajnoczi | } |
1186 | eabba580 | Stefan Hajnoczi | |
1187 | eabba580 | Stefan Hajnoczi | /**
|
1188 | eabba580 | Stefan Hajnoczi | * Write data cluster in place
|
1189 | eabba580 | Stefan Hajnoczi | *
|
1190 | eabba580 | Stefan Hajnoczi | * @acb: Write request
|
1191 | eabba580 | Stefan Hajnoczi | * @offset: Cluster offset in bytes
|
1192 | eabba580 | Stefan Hajnoczi | * @len: Length in bytes
|
1193 | eabba580 | Stefan Hajnoczi | *
|
1194 | eabba580 | Stefan Hajnoczi | * This path is taken when writing to already allocated clusters.
|
1195 | eabba580 | Stefan Hajnoczi | */
|
1196 | eabba580 | Stefan Hajnoczi | static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) |
1197 | eabba580 | Stefan Hajnoczi | { |
1198 | 0e71be19 | Stefan Hajnoczi | /* Allocate buffer for zero writes */
|
1199 | 0e71be19 | Stefan Hajnoczi | if (acb->flags & QED_AIOCB_ZERO) {
|
1200 | 0e71be19 | Stefan Hajnoczi | struct iovec *iov = acb->qiov->iov;
|
1201 | 0e71be19 | Stefan Hajnoczi | |
1202 | 0e71be19 | Stefan Hajnoczi | if (!iov->iov_base) {
|
1203 | 0e71be19 | Stefan Hajnoczi | iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len); |
1204 | 0e71be19 | Stefan Hajnoczi | memset(iov->iov_base, 0, iov->iov_len);
|
1205 | 0e71be19 | Stefan Hajnoczi | } |
1206 | 0e71be19 | Stefan Hajnoczi | } |
1207 | 0e71be19 | Stefan Hajnoczi | |
1208 | eabba580 | Stefan Hajnoczi | /* Calculate the I/O vector */
|
1209 | eabba580 | Stefan Hajnoczi | acb->cur_cluster = offset; |
1210 | 1b093c48 | Michael Tokarev | qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); |
1211 | eabba580 | Stefan Hajnoczi | |
1212 | eabba580 | Stefan Hajnoczi | /* Do the actual write */
|
1213 | eabba580 | Stefan Hajnoczi | qed_aio_write_main(acb, 0);
|
1214 | eabba580 | Stefan Hajnoczi | } |
1215 | eabba580 | Stefan Hajnoczi | |
1216 | eabba580 | Stefan Hajnoczi | /**
|
1217 | eabba580 | Stefan Hajnoczi | * Write data cluster
|
1218 | eabba580 | Stefan Hajnoczi | *
|
1219 | eabba580 | Stefan Hajnoczi | * @opaque: Write request
|
1220 | eabba580 | Stefan Hajnoczi | * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
|
1221 | eabba580 | Stefan Hajnoczi | * or -errno
|
1222 | eabba580 | Stefan Hajnoczi | * @offset: Cluster offset in bytes
|
1223 | eabba580 | Stefan Hajnoczi | * @len: Length in bytes
|
1224 | eabba580 | Stefan Hajnoczi | *
|
1225 | eabba580 | Stefan Hajnoczi | * Callback from qed_find_cluster().
|
1226 | eabba580 | Stefan Hajnoczi | */
|
1227 | eabba580 | Stefan Hajnoczi | static void qed_aio_write_data(void *opaque, int ret, |
1228 | eabba580 | Stefan Hajnoczi | uint64_t offset, size_t len) |
1229 | eabba580 | Stefan Hajnoczi | { |
1230 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
1231 | eabba580 | Stefan Hajnoczi | |
1232 | eabba580 | Stefan Hajnoczi | trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len); |
1233 | eabba580 | Stefan Hajnoczi | |
1234 | eabba580 | Stefan Hajnoczi | acb->find_cluster_ret = ret; |
1235 | eabba580 | Stefan Hajnoczi | |
1236 | eabba580 | Stefan Hajnoczi | switch (ret) {
|
1237 | eabba580 | Stefan Hajnoczi | case QED_CLUSTER_FOUND:
|
1238 | eabba580 | Stefan Hajnoczi | qed_aio_write_inplace(acb, offset, len); |
1239 | eabba580 | Stefan Hajnoczi | break;
|
1240 | eabba580 | Stefan Hajnoczi | |
1241 | eabba580 | Stefan Hajnoczi | case QED_CLUSTER_L2:
|
1242 | eabba580 | Stefan Hajnoczi | case QED_CLUSTER_L1:
|
1243 | 21df65b6 | Anthony Liguori | case QED_CLUSTER_ZERO:
|
1244 | eabba580 | Stefan Hajnoczi | qed_aio_write_alloc(acb, len); |
1245 | eabba580 | Stefan Hajnoczi | break;
|
1246 | eabba580 | Stefan Hajnoczi | |
1247 | eabba580 | Stefan Hajnoczi | default:
|
1248 | eabba580 | Stefan Hajnoczi | qed_aio_complete(acb, ret); |
1249 | eabba580 | Stefan Hajnoczi | break;
|
1250 | eabba580 | Stefan Hajnoczi | } |
1251 | eabba580 | Stefan Hajnoczi | } |
1252 | eabba580 | Stefan Hajnoczi | |
1253 | eabba580 | Stefan Hajnoczi | /**
|
1254 | eabba580 | Stefan Hajnoczi | * Read data cluster
|
1255 | eabba580 | Stefan Hajnoczi | *
|
1256 | eabba580 | Stefan Hajnoczi | * @opaque: Read request
|
1257 | eabba580 | Stefan Hajnoczi | * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
|
1258 | eabba580 | Stefan Hajnoczi | * or -errno
|
1259 | eabba580 | Stefan Hajnoczi | * @offset: Cluster offset in bytes
|
1260 | eabba580 | Stefan Hajnoczi | * @len: Length in bytes
|
1261 | eabba580 | Stefan Hajnoczi | *
|
1262 | eabba580 | Stefan Hajnoczi | * Callback from qed_find_cluster().
|
1263 | eabba580 | Stefan Hajnoczi | */
|
1264 | eabba580 | Stefan Hajnoczi | static void qed_aio_read_data(void *opaque, int ret, |
1265 | eabba580 | Stefan Hajnoczi | uint64_t offset, size_t len) |
1266 | eabba580 | Stefan Hajnoczi | { |
1267 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
1268 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
1269 | eabba580 | Stefan Hajnoczi | BlockDriverState *bs = acb->common.bs; |
1270 | eabba580 | Stefan Hajnoczi | |
1271 | eabba580 | Stefan Hajnoczi | /* Adjust offset into cluster */
|
1272 | eabba580 | Stefan Hajnoczi | offset += qed_offset_into_cluster(s, acb->cur_pos); |
1273 | eabba580 | Stefan Hajnoczi | |
1274 | eabba580 | Stefan Hajnoczi | trace_qed_aio_read_data(s, acb, ret, offset, len); |
1275 | eabba580 | Stefan Hajnoczi | |
1276 | eabba580 | Stefan Hajnoczi | if (ret < 0) { |
1277 | eabba580 | Stefan Hajnoczi | goto err;
|
1278 | eabba580 | Stefan Hajnoczi | } |
1279 | eabba580 | Stefan Hajnoczi | |
1280 | 1b093c48 | Michael Tokarev | qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); |
1281 | eabba580 | Stefan Hajnoczi | |
1282 | 21df65b6 | Anthony Liguori | /* Handle zero cluster and backing file reads */
|
1283 | 21df65b6 | Anthony Liguori | if (ret == QED_CLUSTER_ZERO) {
|
1284 | 3d9b4925 | Michael Tokarev | qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size); |
1285 | 21df65b6 | Anthony Liguori | qed_aio_next_io(acb, 0);
|
1286 | 21df65b6 | Anthony Liguori | return;
|
1287 | 21df65b6 | Anthony Liguori | } else if (ret != QED_CLUSTER_FOUND) { |
1288 | eabba580 | Stefan Hajnoczi | qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, |
1289 | eabba580 | Stefan Hajnoczi | qed_aio_next_io, acb); |
1290 | eabba580 | Stefan Hajnoczi | return;
|
1291 | eabba580 | Stefan Hajnoczi | } |
1292 | eabba580 | Stefan Hajnoczi | |
1293 | eabba580 | Stefan Hajnoczi | BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); |
1294 | ad54ae80 | Paolo Bonzini | bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE, |
1295 | ad54ae80 | Paolo Bonzini | &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE, |
1296 | ad54ae80 | Paolo Bonzini | qed_aio_next_io, acb); |
1297 | eabba580 | Stefan Hajnoczi | return;
|
1298 | eabba580 | Stefan Hajnoczi | |
1299 | eabba580 | Stefan Hajnoczi | err:
|
1300 | eabba580 | Stefan Hajnoczi | qed_aio_complete(acb, ret); |
1301 | eabba580 | Stefan Hajnoczi | } |
1302 | eabba580 | Stefan Hajnoczi | |
1303 | eabba580 | Stefan Hajnoczi | /**
|
1304 | eabba580 | Stefan Hajnoczi | * Begin next I/O or complete the request
|
1305 | eabba580 | Stefan Hajnoczi | */
|
1306 | eabba580 | Stefan Hajnoczi | static void qed_aio_next_io(void *opaque, int ret) |
1307 | eabba580 | Stefan Hajnoczi | { |
1308 | eabba580 | Stefan Hajnoczi | QEDAIOCB *acb = opaque; |
1309 | eabba580 | Stefan Hajnoczi | BDRVQEDState *s = acb_to_s(acb); |
1310 | 6e4f59bd | Stefan Hajnoczi | QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ? |
1311 | 6e4f59bd | Stefan Hajnoczi | qed_aio_write_data : qed_aio_read_data; |
1312 | eabba580 | Stefan Hajnoczi | |
1313 | eabba580 | Stefan Hajnoczi | trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); |
1314 | eabba580 | Stefan Hajnoczi | |
1315 | eabba580 | Stefan Hajnoczi | /* Handle I/O error */
|
1316 | eabba580 | Stefan Hajnoczi | if (ret) {
|
1317 | eabba580 | Stefan Hajnoczi | qed_aio_complete(acb, ret); |
1318 | eabba580 | Stefan Hajnoczi | return;
|
1319 | eabba580 | Stefan Hajnoczi | } |
1320 | eabba580 | Stefan Hajnoczi | |
1321 | eabba580 | Stefan Hajnoczi | acb->qiov_offset += acb->cur_qiov.size; |
1322 | eabba580 | Stefan Hajnoczi | acb->cur_pos += acb->cur_qiov.size; |
1323 | eabba580 | Stefan Hajnoczi | qemu_iovec_reset(&acb->cur_qiov); |
1324 | eabba580 | Stefan Hajnoczi | |
1325 | eabba580 | Stefan Hajnoczi | /* Complete request */
|
1326 | eabba580 | Stefan Hajnoczi | if (acb->cur_pos >= acb->end_pos) {
|
1327 | eabba580 | Stefan Hajnoczi | qed_aio_complete(acb, 0);
|
1328 | eabba580 | Stefan Hajnoczi | return;
|
1329 | eabba580 | Stefan Hajnoczi | } |
1330 | eabba580 | Stefan Hajnoczi | |
1331 | eabba580 | Stefan Hajnoczi | /* Find next cluster and start I/O */
|
1332 | eabba580 | Stefan Hajnoczi | qed_find_cluster(s, &acb->request, |
1333 | eabba580 | Stefan Hajnoczi | acb->cur_pos, acb->end_pos - acb->cur_pos, |
1334 | eabba580 | Stefan Hajnoczi | io_fn, acb); |
1335 | eabba580 | Stefan Hajnoczi | } |
1336 | eabba580 | Stefan Hajnoczi | |
1337 | eabba580 | Stefan Hajnoczi | static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
|
1338 | eabba580 | Stefan Hajnoczi | int64_t sector_num, |
1339 | eabba580 | Stefan Hajnoczi | QEMUIOVector *qiov, int nb_sectors,
|
1340 | eabba580 | Stefan Hajnoczi | BlockDriverCompletionFunc *cb, |
1341 | 6e4f59bd | Stefan Hajnoczi | void *opaque, int flags) |
1342 | eabba580 | Stefan Hajnoczi | { |
1343 | d7331bed | Stefan Hajnoczi | QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque); |
1344 | eabba580 | Stefan Hajnoczi | |
1345 | eabba580 | Stefan Hajnoczi | trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, |
1346 | 6e4f59bd | Stefan Hajnoczi | opaque, flags); |
1347 | eabba580 | Stefan Hajnoczi | |
1348 | 6e4f59bd | Stefan Hajnoczi | acb->flags = flags; |
1349 | eabba580 | Stefan Hajnoczi | acb->finished = NULL;
|
1350 | eabba580 | Stefan Hajnoczi | acb->qiov = qiov; |
1351 | eabba580 | Stefan Hajnoczi | acb->qiov_offset = 0;
|
1352 | eabba580 | Stefan Hajnoczi | acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; |
1353 | eabba580 | Stefan Hajnoczi | acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE; |
1354 | eabba580 | Stefan Hajnoczi | acb->request.l2_table = NULL;
|
1355 | eabba580 | Stefan Hajnoczi | qemu_iovec_init(&acb->cur_qiov, qiov->niov); |
1356 | eabba580 | Stefan Hajnoczi | |
1357 | eabba580 | Stefan Hajnoczi | /* Start request */
|
1358 | eabba580 | Stefan Hajnoczi | qed_aio_next_io(acb, 0);
|
1359 | eabba580 | Stefan Hajnoczi | return &acb->common;
|
1360 | eabba580 | Stefan Hajnoczi | } |
1361 | eabba580 | Stefan Hajnoczi | |
1362 | 75411d23 | Stefan Hajnoczi | static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
|
1363 | 75411d23 | Stefan Hajnoczi | int64_t sector_num, |
1364 | 75411d23 | Stefan Hajnoczi | QEMUIOVector *qiov, int nb_sectors,
|
1365 | 75411d23 | Stefan Hajnoczi | BlockDriverCompletionFunc *cb, |
1366 | 75411d23 | Stefan Hajnoczi | void *opaque)
|
1367 | 75411d23 | Stefan Hajnoczi | { |
1368 | 6e4f59bd | Stefan Hajnoczi | return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); |
1369 | 75411d23 | Stefan Hajnoczi | } |
1370 | 75411d23 | Stefan Hajnoczi | |
1371 | 75411d23 | Stefan Hajnoczi | static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
|
1372 | 75411d23 | Stefan Hajnoczi | int64_t sector_num, |
1373 | 75411d23 | Stefan Hajnoczi | QEMUIOVector *qiov, int nb_sectors,
|
1374 | 75411d23 | Stefan Hajnoczi | BlockDriverCompletionFunc *cb, |
1375 | 75411d23 | Stefan Hajnoczi | void *opaque)
|
1376 | 75411d23 | Stefan Hajnoczi | { |
1377 | 6e4f59bd | Stefan Hajnoczi | return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
|
1378 | 6e4f59bd | Stefan Hajnoczi | opaque, QED_AIOCB_WRITE); |
1379 | 75411d23 | Stefan Hajnoczi | } |
1380 | 75411d23 | Stefan Hajnoczi | |
1381 | 0e71be19 | Stefan Hajnoczi | typedef struct { |
1382 | 0e71be19 | Stefan Hajnoczi | Coroutine *co; |
1383 | 0e71be19 | Stefan Hajnoczi | int ret;
|
1384 | 0e71be19 | Stefan Hajnoczi | bool done;
|
1385 | 0e71be19 | Stefan Hajnoczi | } QEDWriteZeroesCB; |
1386 | 0e71be19 | Stefan Hajnoczi | |
1387 | 0e71be19 | Stefan Hajnoczi | static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret) |
1388 | 0e71be19 | Stefan Hajnoczi | { |
1389 | 0e71be19 | Stefan Hajnoczi | QEDWriteZeroesCB *cb = opaque; |
1390 | 0e71be19 | Stefan Hajnoczi | |
1391 | 0e71be19 | Stefan Hajnoczi | cb->done = true;
|
1392 | 0e71be19 | Stefan Hajnoczi | cb->ret = ret; |
1393 | 0e71be19 | Stefan Hajnoczi | if (cb->co) {
|
1394 | 0e71be19 | Stefan Hajnoczi | qemu_coroutine_enter(cb->co, NULL);
|
1395 | 0e71be19 | Stefan Hajnoczi | } |
1396 | 0e71be19 | Stefan Hajnoczi | } |
1397 | 0e71be19 | Stefan Hajnoczi | |
1398 | 0e71be19 | Stefan Hajnoczi | static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs, |
1399 | 0e71be19 | Stefan Hajnoczi | int64_t sector_num, |
1400 | 0e71be19 | Stefan Hajnoczi | int nb_sectors)
|
1401 | 0e71be19 | Stefan Hajnoczi | { |
1402 | 0e71be19 | Stefan Hajnoczi | BlockDriverAIOCB *blockacb; |
1403 | ef72f76e | Stefan Hajnoczi | BDRVQEDState *s = bs->opaque; |
1404 | 0e71be19 | Stefan Hajnoczi | QEDWriteZeroesCB cb = { .done = false };
|
1405 | 0e71be19 | Stefan Hajnoczi | QEMUIOVector qiov; |
1406 | 0e71be19 | Stefan Hajnoczi | struct iovec iov;
|
1407 | 0e71be19 | Stefan Hajnoczi | |
1408 | ef72f76e | Stefan Hajnoczi | /* Refuse if there are untouched backing file sectors */
|
1409 | ef72f76e | Stefan Hajnoczi | if (bs->backing_hd) {
|
1410 | ef72f76e | Stefan Hajnoczi | if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) { |
1411 | ef72f76e | Stefan Hajnoczi | return -ENOTSUP;
|
1412 | ef72f76e | Stefan Hajnoczi | } |
1413 | ef72f76e | Stefan Hajnoczi | if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) { |
1414 | ef72f76e | Stefan Hajnoczi | return -ENOTSUP;
|
1415 | ef72f76e | Stefan Hajnoczi | } |
1416 | ef72f76e | Stefan Hajnoczi | } |
1417 | ef72f76e | Stefan Hajnoczi | |
1418 | 0e71be19 | Stefan Hajnoczi | /* Zero writes start without an I/O buffer. If a buffer becomes necessary
|
1419 | 0e71be19 | Stefan Hajnoczi | * then it will be allocated during request processing.
|
1420 | 0e71be19 | Stefan Hajnoczi | */
|
1421 | 0e71be19 | Stefan Hajnoczi | iov.iov_base = NULL,
|
1422 | 0e71be19 | Stefan Hajnoczi | iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE, |
1423 | 0e71be19 | Stefan Hajnoczi | |
1424 | 0e71be19 | Stefan Hajnoczi | qemu_iovec_init_external(&qiov, &iov, 1);
|
1425 | 0e71be19 | Stefan Hajnoczi | blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors, |
1426 | 0e71be19 | Stefan Hajnoczi | qed_co_write_zeroes_cb, &cb, |
1427 | 0e71be19 | Stefan Hajnoczi | QED_AIOCB_WRITE | QED_AIOCB_ZERO); |
1428 | 0e71be19 | Stefan Hajnoczi | if (!blockacb) {
|
1429 | 0e71be19 | Stefan Hajnoczi | return -EIO;
|
1430 | 0e71be19 | Stefan Hajnoczi | } |
1431 | 0e71be19 | Stefan Hajnoczi | if (!cb.done) {
|
1432 | 0e71be19 | Stefan Hajnoczi | cb.co = qemu_coroutine_self(); |
1433 | 0e71be19 | Stefan Hajnoczi | qemu_coroutine_yield(); |
1434 | 0e71be19 | Stefan Hajnoczi | } |
1435 | 0e71be19 | Stefan Hajnoczi | assert(cb.done); |
1436 | 0e71be19 | Stefan Hajnoczi | return cb.ret;
|
1437 | 0e71be19 | Stefan Hajnoczi | } |
1438 | 0e71be19 | Stefan Hajnoczi | |
1439 | 75411d23 | Stefan Hajnoczi | static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset) |
1440 | 75411d23 | Stefan Hajnoczi | { |
1441 | 77a5a000 | Stefan Hajnoczi | BDRVQEDState *s = bs->opaque; |
1442 | 77a5a000 | Stefan Hajnoczi | uint64_t old_image_size; |
1443 | 77a5a000 | Stefan Hajnoczi | int ret;
|
1444 | 77a5a000 | Stefan Hajnoczi | |
1445 | 77a5a000 | Stefan Hajnoczi | if (!qed_is_image_size_valid(offset, s->header.cluster_size,
|
1446 | 77a5a000 | Stefan Hajnoczi | s->header.table_size)) { |
1447 | 77a5a000 | Stefan Hajnoczi | return -EINVAL;
|
1448 | 77a5a000 | Stefan Hajnoczi | } |
1449 | 77a5a000 | Stefan Hajnoczi | |
1450 | 77a5a000 | Stefan Hajnoczi | /* Shrinking is currently not supported */
|
1451 | 77a5a000 | Stefan Hajnoczi | if ((uint64_t)offset < s->header.image_size) {
|
1452 | 77a5a000 | Stefan Hajnoczi | return -ENOTSUP;
|
1453 | 77a5a000 | Stefan Hajnoczi | } |
1454 | 77a5a000 | Stefan Hajnoczi | |
1455 | 77a5a000 | Stefan Hajnoczi | old_image_size = s->header.image_size; |
1456 | 77a5a000 | Stefan Hajnoczi | s->header.image_size = offset; |
1457 | 77a5a000 | Stefan Hajnoczi | ret = qed_write_header_sync(s); |
1458 | 77a5a000 | Stefan Hajnoczi | if (ret < 0) { |
1459 | 77a5a000 | Stefan Hajnoczi | s->header.image_size = old_image_size; |
1460 | 77a5a000 | Stefan Hajnoczi | } |
1461 | 77a5a000 | Stefan Hajnoczi | return ret;
|
1462 | 75411d23 | Stefan Hajnoczi | } |
1463 | 75411d23 | Stefan Hajnoczi | |
1464 | 75411d23 | Stefan Hajnoczi | static int64_t bdrv_qed_getlength(BlockDriverState *bs)
|
1465 | 75411d23 | Stefan Hajnoczi | { |
1466 | 75411d23 | Stefan Hajnoczi | BDRVQEDState *s = bs->opaque; |
1467 | 75411d23 | Stefan Hajnoczi | return s->header.image_size;
|
1468 | 75411d23 | Stefan Hajnoczi | } |
1469 | 75411d23 | Stefan Hajnoczi | |
1470 | 75411d23 | Stefan Hajnoczi | static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) |
1471 | 75411d23 | Stefan Hajnoczi | { |
1472 | 75411d23 | Stefan Hajnoczi | BDRVQEDState *s = bs->opaque; |
1473 | 75411d23 | Stefan Hajnoczi | |
1474 | 75411d23 | Stefan Hajnoczi | memset(bdi, 0, sizeof(*bdi)); |
1475 | 75411d23 | Stefan Hajnoczi | bdi->cluster_size = s->header.cluster_size; |
1476 | d68dbee8 | Dong Xu Wang | bdi->is_dirty = s->header.features & QED_F_NEED_CHECK; |
1477 | 75411d23 | Stefan Hajnoczi | return 0; |
1478 | 75411d23 | Stefan Hajnoczi | } |
1479 | 75411d23 | Stefan Hajnoczi | |
1480 | 75411d23 | Stefan Hajnoczi | static int bdrv_qed_change_backing_file(BlockDriverState *bs, |
1481 | 75411d23 | Stefan Hajnoczi | const char *backing_file, |
1482 | 75411d23 | Stefan Hajnoczi | const char *backing_fmt) |
1483 | 75411d23 | Stefan Hajnoczi | { |
1484 | 75411d23 | Stefan Hajnoczi | BDRVQEDState *s = bs->opaque; |
1485 | 75411d23 | Stefan Hajnoczi | QEDHeader new_header, le_header; |
1486 | 75411d23 | Stefan Hajnoczi | void *buffer;
|
1487 | 75411d23 | Stefan Hajnoczi | size_t buffer_len, backing_file_len; |
1488 | 75411d23 | Stefan Hajnoczi | int ret;
|
1489 | 75411d23 | Stefan Hajnoczi | |
1490 | 75411d23 | Stefan Hajnoczi | /* Refuse to set backing filename if unknown compat feature bits are
|
1491 | 75411d23 | Stefan Hajnoczi | * active. If the image uses an unknown compat feature then we may not
|
1492 | 75411d23 | Stefan Hajnoczi | * know the layout of data following the header structure and cannot safely
|
1493 | 75411d23 | Stefan Hajnoczi | * add a new string.
|
1494 | 75411d23 | Stefan Hajnoczi | */
|
1495 | 75411d23 | Stefan Hajnoczi | if (backing_file && (s->header.compat_features &
|
1496 | 75411d23 | Stefan Hajnoczi | ~QED_COMPAT_FEATURE_MASK)) { |
1497 | 75411d23 | Stefan Hajnoczi | return -ENOTSUP;
|
1498 | 75411d23 | Stefan Hajnoczi | } |
1499 | 75411d23 | Stefan Hajnoczi | |
1500 | 75411d23 | Stefan Hajnoczi | memcpy(&new_header, &s->header, sizeof(new_header));
|
1501 | 75411d23 | Stefan Hajnoczi | |
1502 | 75411d23 | Stefan Hajnoczi | new_header.features &= ~(QED_F_BACKING_FILE | |
1503 | 75411d23 | Stefan Hajnoczi | QED_F_BACKING_FORMAT_NO_PROBE); |
1504 | 75411d23 | Stefan Hajnoczi | |
1505 | 75411d23 | Stefan Hajnoczi | /* Adjust feature flags */
|
1506 | 75411d23 | Stefan Hajnoczi | if (backing_file) {
|
1507 | 75411d23 | Stefan Hajnoczi | new_header.features |= QED_F_BACKING_FILE; |
1508 | 75411d23 | Stefan Hajnoczi | |
1509 | 75411d23 | Stefan Hajnoczi | if (qed_fmt_is_raw(backing_fmt)) {
|
1510 | 75411d23 | Stefan Hajnoczi | new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE; |
1511 | 75411d23 | Stefan Hajnoczi | } |
1512 | 75411d23 | Stefan Hajnoczi | } |
1513 | 75411d23 | Stefan Hajnoczi | |
1514 | 75411d23 | Stefan Hajnoczi | /* Calculate new header size */
|
1515 | 75411d23 | Stefan Hajnoczi | backing_file_len = 0;
|
1516 | 75411d23 | Stefan Hajnoczi | |
1517 | 75411d23 | Stefan Hajnoczi | if (backing_file) {
|
1518 | 75411d23 | Stefan Hajnoczi | backing_file_len = strlen(backing_file); |
1519 | 75411d23 | Stefan Hajnoczi | } |
1520 | 75411d23 | Stefan Hajnoczi | |
1521 | 75411d23 | Stefan Hajnoczi | buffer_len = sizeof(new_header);
|
1522 | 75411d23 | Stefan Hajnoczi | new_header.backing_filename_offset = buffer_len; |
1523 | 75411d23 | Stefan Hajnoczi | new_header.backing_filename_size = backing_file_len; |
1524 | 75411d23 | Stefan Hajnoczi | buffer_len += backing_file_len; |
1525 | 75411d23 | Stefan Hajnoczi | |
1526 | 75411d23 | Stefan Hajnoczi | /* Make sure we can rewrite header without failing */
|
1527 | 75411d23 | Stefan Hajnoczi | if (buffer_len > new_header.header_size * new_header.cluster_size) {
|
1528 | 75411d23 | Stefan Hajnoczi | return -ENOSPC;
|
1529 | 75411d23 | Stefan Hajnoczi | } |
1530 | 75411d23 | Stefan Hajnoczi | |
1531 | 75411d23 | Stefan Hajnoczi | /* Prepare new header */
|
1532 | 7267c094 | Anthony Liguori | buffer = g_malloc(buffer_len); |
1533 | 75411d23 | Stefan Hajnoczi | |
1534 | 75411d23 | Stefan Hajnoczi | qed_header_cpu_to_le(&new_header, &le_header); |
1535 | 75411d23 | Stefan Hajnoczi | memcpy(buffer, &le_header, sizeof(le_header));
|
1536 | 75411d23 | Stefan Hajnoczi | buffer_len = sizeof(le_header);
|
1537 | 75411d23 | Stefan Hajnoczi | |
1538 | feba23b1 | Pavel Borzenkov | if (backing_file) {
|
1539 | feba23b1 | Pavel Borzenkov | memcpy(buffer + buffer_len, backing_file, backing_file_len); |
1540 | feba23b1 | Pavel Borzenkov | buffer_len += backing_file_len; |
1541 | feba23b1 | Pavel Borzenkov | } |
1542 | 75411d23 | Stefan Hajnoczi | |
1543 | 75411d23 | Stefan Hajnoczi | /* Write new header */
|
1544 | 75411d23 | Stefan Hajnoczi | ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
|
1545 | 7267c094 | Anthony Liguori | g_free(buffer); |
1546 | 75411d23 | Stefan Hajnoczi | if (ret == 0) { |
1547 | 75411d23 | Stefan Hajnoczi | memcpy(&s->header, &new_header, sizeof(new_header));
|
1548 | 75411d23 | Stefan Hajnoczi | } |
1549 | 75411d23 | Stefan Hajnoczi | return ret;
|
1550 | 75411d23 | Stefan Hajnoczi | } |
1551 | 75411d23 | Stefan Hajnoczi | |
1552 | c82954e5 | Benoît Canet | static void bdrv_qed_invalidate_cache(BlockDriverState *bs) |
1553 | c82954e5 | Benoît Canet | { |
1554 | c82954e5 | Benoît Canet | BDRVQEDState *s = bs->opaque; |
1555 | c82954e5 | Benoît Canet | |
1556 | c82954e5 | Benoît Canet | bdrv_qed_close(bs); |
1557 | c82954e5 | Benoît Canet | memset(s, 0, sizeof(BDRVQEDState)); |
1558 | 015a1036 | Max Reitz | bdrv_qed_open(bs, NULL, bs->open_flags, NULL); |
1559 | c82954e5 | Benoît Canet | } |
1560 | c82954e5 | Benoît Canet | |
1561 | 4534ff54 | Kevin Wolf | static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result, |
1562 | 4534ff54 | Kevin Wolf | BdrvCheckMode fix) |
1563 | 75411d23 | Stefan Hajnoczi | { |
1564 | 01979a98 | Stefan Hajnoczi | BDRVQEDState *s = bs->opaque; |
1565 | 01979a98 | Stefan Hajnoczi | |
1566 | 4534ff54 | Kevin Wolf | return qed_check(s, result, !!fix);
|
1567 | 75411d23 | Stefan Hajnoczi | } |
1568 | 75411d23 | Stefan Hajnoczi | |
1569 | 75411d23 | Stefan Hajnoczi | static QEMUOptionParameter qed_create_options[] = {
|
1570 | 75411d23 | Stefan Hajnoczi | { |
1571 | 75411d23 | Stefan Hajnoczi | .name = BLOCK_OPT_SIZE, |
1572 | 75411d23 | Stefan Hajnoczi | .type = OPT_SIZE, |
1573 | 75411d23 | Stefan Hajnoczi | .help = "Virtual disk size (in bytes)"
|
1574 | 75411d23 | Stefan Hajnoczi | }, { |
1575 | 75411d23 | Stefan Hajnoczi | .name = BLOCK_OPT_BACKING_FILE, |
1576 | 75411d23 | Stefan Hajnoczi | .type = OPT_STRING, |
1577 | 75411d23 | Stefan Hajnoczi | .help = "File name of a base image"
|
1578 | 75411d23 | Stefan Hajnoczi | }, { |
1579 | 75411d23 | Stefan Hajnoczi | .name = BLOCK_OPT_BACKING_FMT, |
1580 | 75411d23 | Stefan Hajnoczi | .type = OPT_STRING, |
1581 | 75411d23 | Stefan Hajnoczi | .help = "Image format of the base image"
|
1582 | 75411d23 | Stefan Hajnoczi | }, { |
1583 | 75411d23 | Stefan Hajnoczi | .name = BLOCK_OPT_CLUSTER_SIZE, |
1584 | 75411d23 | Stefan Hajnoczi | .type = OPT_SIZE, |
1585 | 99cce9fa | Kevin Wolf | .help = "Cluster size (in bytes)",
|
1586 | 99cce9fa | Kevin Wolf | .value = { .n = QED_DEFAULT_CLUSTER_SIZE }, |
1587 | 75411d23 | Stefan Hajnoczi | }, { |
1588 | 75411d23 | Stefan Hajnoczi | .name = BLOCK_OPT_TABLE_SIZE, |
1589 | 75411d23 | Stefan Hajnoczi | .type = OPT_SIZE, |
1590 | 75411d23 | Stefan Hajnoczi | .help = "L1/L2 table size (in clusters)"
|
1591 | 75411d23 | Stefan Hajnoczi | }, |
1592 | 75411d23 | Stefan Hajnoczi | { /* end of list */ }
|
1593 | 75411d23 | Stefan Hajnoczi | }; |
1594 | 75411d23 | Stefan Hajnoczi | |
1595 | 75411d23 | Stefan Hajnoczi | static BlockDriver bdrv_qed = {
|
1596 | 75411d23 | Stefan Hajnoczi | .format_name = "qed",
|
1597 | 75411d23 | Stefan Hajnoczi | .instance_size = sizeof(BDRVQEDState),
|
1598 | 75411d23 | Stefan Hajnoczi | .create_options = qed_create_options, |
1599 | 75411d23 | Stefan Hajnoczi | |
1600 | 75411d23 | Stefan Hajnoczi | .bdrv_probe = bdrv_qed_probe, |
1601 | e023b2e2 | Paolo Bonzini | .bdrv_rebind = bdrv_qed_rebind, |
1602 | 75411d23 | Stefan Hajnoczi | .bdrv_open = bdrv_qed_open, |
1603 | 75411d23 | Stefan Hajnoczi | .bdrv_close = bdrv_qed_close, |
1604 | f9cb20f1 | Jeff Cody | .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, |
1605 | 75411d23 | Stefan Hajnoczi | .bdrv_create = bdrv_qed_create, |
1606 | 3ac21627 | Peter Lieven | .bdrv_has_zero_init = bdrv_has_zero_init_1, |
1607 | b6b8a333 | Paolo Bonzini | .bdrv_co_get_block_status = bdrv_qed_co_get_block_status, |
1608 | 75411d23 | Stefan Hajnoczi | .bdrv_make_empty = bdrv_qed_make_empty, |
1609 | 75411d23 | Stefan Hajnoczi | .bdrv_aio_readv = bdrv_qed_aio_readv, |
1610 | 75411d23 | Stefan Hajnoczi | .bdrv_aio_writev = bdrv_qed_aio_writev, |
1611 | 0e71be19 | Stefan Hajnoczi | .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes, |
1612 | 75411d23 | Stefan Hajnoczi | .bdrv_truncate = bdrv_qed_truncate, |
1613 | 75411d23 | Stefan Hajnoczi | .bdrv_getlength = bdrv_qed_getlength, |
1614 | 75411d23 | Stefan Hajnoczi | .bdrv_get_info = bdrv_qed_get_info, |
1615 | 75411d23 | Stefan Hajnoczi | .bdrv_change_backing_file = bdrv_qed_change_backing_file, |
1616 | c82954e5 | Benoît Canet | .bdrv_invalidate_cache = bdrv_qed_invalidate_cache, |
1617 | 75411d23 | Stefan Hajnoczi | .bdrv_check = bdrv_qed_check, |
1618 | 75411d23 | Stefan Hajnoczi | }; |
1619 | 75411d23 | Stefan Hajnoczi | |
1620 | 75411d23 | Stefan Hajnoczi | static void bdrv_qed_init(void) |
1621 | 75411d23 | Stefan Hajnoczi | { |
1622 | 75411d23 | Stefan Hajnoczi | bdrv_register(&bdrv_qed); |
1623 | 75411d23 | Stefan Hajnoczi | } |
1624 | 75411d23 | Stefan Hajnoczi | |
1625 | 75411d23 | Stefan Hajnoczi | block_init(bdrv_qed_init); |