root / drivers / block-vhd.c @ abdb293f
History | View | Annotate | Download (56.5 kB)
1 |
/*
|
---|---|
2 |
*
|
3 |
* Copyright (c) 2007, XenSource Inc.
|
4 |
* Copyright (c) 2010, Citrix Systems, Inc.
|
5 |
*
|
6 |
* All rights reserved.
|
7 |
*
|
8 |
* Redistribution and use in source and binary forms, with or without
|
9 |
* modification, are permitted provided that the following conditions are met:
|
10 |
* * Redistributions of source code must retain the above copyright
|
11 |
* notice, this list of conditions and the following disclaimer.
|
12 |
* * Redistributions in binary form must reproduce the above copyright
|
13 |
* notice, this list of conditions and the following disclaimer in the
|
14 |
* documentation and/or other materials provided with the distribution.
|
15 |
* * Neither the name of XenSource Inc. nor the names of its contributors
|
16 |
* may be used to endorse or promote products derived from this software
|
17 |
* without specific prior written permission.
|
18 |
*
|
19 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
20 |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
21 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
22 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
23 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
24 |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
25 |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
26 |
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
27 |
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
28 |
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
29 |
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30 |
*/
|
31 |
|
32 |
/*
|
33 |
* block-vhd.c: asynchronous vhd implementation.
|
34 |
*
|
35 |
* A note on write transactions:
|
36 |
* Writes that require updating the BAT or bitmaps cannot be signaled
|
37 |
* as complete until all updates have reached disk. Transactions are
|
38 |
* used to ensure proper ordering in these cases. The two types of
|
39 |
* transactions are as follows:
|
40 |
* - Bitmap updates only: data writes that require updates to the same
|
41 |
* bitmap are grouped in a transaction. Only after all data writes
|
42 |
* in a transaction complete does the bitmap write commence. Only
|
43 |
* after the bitmap write finishes are the data writes signalled as
|
44 |
* complete.
|
45 |
* - BAT and bitmap updates: data writes are grouped in transactions
|
46 |
* as above, but a special extra write is included in the transaction,
|
47 |
* which zeros out the newly allocated bitmap on disk. When the data
|
48 |
* writes and the zero-bitmap write complete, the BAT and bitmap writes
|
49 |
* are started in parallel. The transaction is completed only after both
|
50 |
* the BAT and bitmap writes successfully return.
|
51 |
*/
|
52 |
|
53 |
#ifdef HAVE_CONFIG_H
|
54 |
#include "config.h" |
55 |
#endif
|
56 |
|
57 |
#include <errno.h> |
58 |
#include <fcntl.h> |
59 |
#include <stdio.h> |
60 |
#include <stdlib.h> |
61 |
#include <unistd.h> |
62 |
#include <sys/stat.h> |
63 |
#include <sys/ioctl.h> |
64 |
#include <uuid/uuid.h> /* For whatever reason, Linux packages this in */ |
65 |
/* e2fsprogs-devel. */
|
66 |
#include <string.h> /* for memset. */ |
67 |
#include <libaio.h> |
68 |
#include <sys/mman.h> |
69 |
|
70 |
#include "libvhd.h" |
71 |
#include "tapdisk.h" |
72 |
#include "tapdisk-driver.h" |
73 |
#include "tapdisk-interface.h" |
74 |
#include "tapdisk-disktype.h" |
75 |
#include "tapdisk-storage.h" |
76 |
|
77 |
unsigned int SPB; |
78 |
|
79 |
#define DEBUGGING 2 |
80 |
#define ASSERTING 1 |
81 |
#define MICROSOFT_COMPAT
|
82 |
|
83 |
#define VHD_BATMAP_MAX_RETRIES 10 |
84 |
|
85 |
#define __TRACE(s) \
|
86 |
do { \
|
87 |
DBG(TLOG_DBG, "%s: QUEUED: %" PRIu64 ", COMPLETED: %" \ |
88 |
PRIu64", RETURNED: %" PRIu64 ", DATA_ALLOCATED: " \ |
89 |
"%u, BBLK: 0x%04x\n", \
|
90 |
s->vhd.file, s->queued, s->completed, s->returned, \ |
91 |
VHD_REQS_DATA - s->vreq_free_count, \ |
92 |
s->bat.pbw_blk); \ |
93 |
} while(0) |
94 |
|
95 |
#define __ASSERT(_p) \
|
96 |
if (!(_p)) { \
|
97 |
DPRINTF("%s:%d: FAILED ASSERTION: '%s'\n", \
|
98 |
__FILE__, __LINE__, #_p); \
|
99 |
DBG(TLOG_WARN, "%s:%d: FAILED ASSERTION: '%s'\n", \
|
100 |
__FILE__, __LINE__, #_p); \
|
101 |
td_panic(); \ |
102 |
} |
103 |
|
104 |
#if (DEBUGGING == 1) |
105 |
#define DBG(level, _f, _a...) DPRINTF(_f, ##_a) |
106 |
#define ERR(_s, err, _f, _a...) DPRINTF("ERROR: %d: " _f, err, ##_a) |
107 |
#define TRACE(s) ((void)0) |
108 |
#elif (DEBUGGING == 2) |
109 |
#define DBG(level, _f, _a...) tlog_write(level, _f, ##_a) |
110 |
#define ERR(_s, _err, _f, _a...) tlog_drv_error((_s)->driver, _err, _f, ##_a) |
111 |
#define TRACE(s) __TRACE(s)
|
112 |
#else
|
113 |
#define DBG(level, _f, _a...) ((void)0) |
114 |
#define ERR(_s, err, _f, _a...) ((void)0) |
115 |
#define TRACE(s) ((void)0) |
116 |
#endif
|
117 |
|
118 |
#if (ASSERTING == 1) |
119 |
#define ASSERT(_p) __ASSERT(_p)
|
120 |
#else
|
121 |
#define ASSERT(_p) ((void)0) |
122 |
#endif
|
123 |
|
124 |
/******VHD DEFINES******/
|
125 |
#define VHD_CACHE_SIZE 32 |
126 |
|
127 |
#define VHD_REQS_DATA TAPDISK_DATA_REQUESTS
|
128 |
#define VHD_REQS_META (VHD_CACHE_SIZE + 2) |
129 |
#define VHD_REQS_TOTAL (VHD_REQS_DATA + VHD_REQS_META)
|
130 |
|
131 |
#define VHD_OP_BAT_WRITE 0 |
132 |
#define VHD_OP_DATA_READ 1 |
133 |
#define VHD_OP_DATA_WRITE 2 |
134 |
#define VHD_OP_BITMAP_READ 3 |
135 |
#define VHD_OP_BITMAP_WRITE 4 |
136 |
#define VHD_OP_ZERO_BM_WRITE 5 |
137 |
#define VHD_OP_REDUNDANT_BM_WRITE 6 |
138 |
|
139 |
#define VHD_BM_BAT_LOCKED 0 |
140 |
#define VHD_BM_BAT_CLEAR 1 |
141 |
#define VHD_BM_BIT_CLEAR 2 |
142 |
#define VHD_BM_BIT_SET 3 |
143 |
#define VHD_BM_NOT_CACHED 4 |
144 |
#define VHD_BM_READ_PENDING 5 |
145 |
|
146 |
#define VHD_FLAG_OPEN_RDONLY 1 |
147 |
#define VHD_FLAG_OPEN_NO_CACHE 2 |
148 |
#define VHD_FLAG_OPEN_QUIET 4 |
149 |
#define VHD_FLAG_OPEN_STRICT 8 |
150 |
#define VHD_FLAG_OPEN_QUERY 16 |
151 |
#define VHD_FLAG_OPEN_PREALLOCATE 32 |
152 |
|
153 |
#define VHD_FLAG_BAT_LOCKED 1 |
154 |
#define VHD_FLAG_BAT_WRITE_STARTED 2 |
155 |
|
156 |
#define VHD_FLAG_BM_UPDATE_BAT 1 |
157 |
#define VHD_FLAG_BM_WRITE_PENDING 2 |
158 |
#define VHD_FLAG_BM_READ_PENDING 4 |
159 |
#define VHD_FLAG_BM_LOCKED 8 |
160 |
|
161 |
#define VHD_FLAG_REQ_UPDATE_BAT 1 |
162 |
#define VHD_FLAG_REQ_UPDATE_BITMAP 2 |
163 |
#define VHD_FLAG_REQ_QUEUED 4 |
164 |
#define VHD_FLAG_REQ_FINISHED 8 |
165 |
|
166 |
#define VHD_FLAG_TX_LIVE 1 |
167 |
#define VHD_FLAG_TX_UPDATE_BAT 2 |
168 |
|
169 |
typedef uint8_t vhd_flag_t;
|
170 |
|
171 |
struct vhd_state;
|
172 |
struct vhd_request;
|
173 |
|
174 |
struct vhd_req_list {
|
175 |
struct vhd_request *head;
|
176 |
struct vhd_request *tail;
|
177 |
}; |
178 |
|
179 |
struct vhd_transaction {
|
180 |
int error;
|
181 |
int closed;
|
182 |
int started;
|
183 |
int finished;
|
184 |
vhd_flag_t status; |
185 |
struct vhd_req_list requests;
|
186 |
}; |
187 |
|
188 |
struct vhd_request {
|
189 |
int error;
|
190 |
uint8_t op; |
191 |
vhd_flag_t flags; |
192 |
td_request_t treq; |
193 |
struct tiocb tiocb;
|
194 |
struct vhd_state *state;
|
195 |
struct vhd_request *next;
|
196 |
struct vhd_transaction *tx;
|
197 |
}; |
198 |
|
199 |
struct vhd_bat_state {
|
200 |
vhd_bat_t bat; |
201 |
vhd_batmap_t batmap; |
202 |
vhd_flag_t status; |
203 |
uint32_t pbw_blk; /* blk num of pending write */
|
204 |
uint64_t pbw_offset; /* file offset of same */
|
205 |
struct vhd_request req; /* for writing bat table */ |
206 |
struct vhd_request zero_req; /* for initializing bitmaps */ |
207 |
char *bat_buf;
|
208 |
}; |
209 |
|
210 |
struct vhd_bitmap {
|
211 |
uint32_t blk; |
212 |
uint64_t seqno; /* lru sequence number */
|
213 |
vhd_flag_t status; |
214 |
|
215 |
char *map; /* map should only be modified |
216 |
* in finish_bitmap_write */
|
217 |
char *shadow; /* in-memory bitmap changes are |
218 |
* made to shadow and copied to
|
219 |
* map only after having been
|
220 |
* flushed to disk */
|
221 |
struct vhd_transaction tx; /* transaction data structure |
222 |
* encapsulating data, bitmap,
|
223 |
* and bat writes */
|
224 |
struct vhd_req_list queue; /* data writes waiting for next |
225 |
* transaction */
|
226 |
struct vhd_req_list waiting; /* pending requests that cannot |
227 |
* be serviced until this bitmap
|
228 |
* is read from disk */
|
229 |
struct vhd_request req;
|
230 |
}; |
231 |
|
232 |
struct vhd_state {
|
233 |
vhd_flag_t flags; |
234 |
|
235 |
/* VHD stuff */
|
236 |
vhd_context_t vhd; |
237 |
uint32_t spp; /* sectors per page */
|
238 |
uint32_t spb; /* sectors per block */
|
239 |
uint64_t first_db; /* pointer to datablock 0 */
|
240 |
uint64_t next_db; /* pointer to the next
|
241 |
* (unallocated) datablock */
|
242 |
|
243 |
struct vhd_bat_state bat;
|
244 |
|
245 |
uint64_t bm_lru; /* lru sequence number */
|
246 |
uint32_t bm_secs; /* size of bitmap, in sectors */
|
247 |
struct vhd_bitmap *bitmap[VHD_CACHE_SIZE];
|
248 |
|
249 |
int bm_free_count;
|
250 |
struct vhd_bitmap *bitmap_free[VHD_CACHE_SIZE];
|
251 |
struct vhd_bitmap bitmap_list[VHD_CACHE_SIZE];
|
252 |
|
253 |
int vreq_free_count;
|
254 |
struct vhd_request *vreq_free[VHD_REQS_DATA];
|
255 |
struct vhd_request vreq_list[VHD_REQS_DATA];
|
256 |
|
257 |
/* for redundant bitmap writes */
|
258 |
int padbm_size;
|
259 |
char *padbm_buf;
|
260 |
long int debug_skipped_redundant_writes; |
261 |
long int debug_done_redundant_writes; |
262 |
|
263 |
td_driver_t *driver; |
264 |
|
265 |
uint64_t queued; |
266 |
uint64_t completed; |
267 |
uint64_t returned; |
268 |
uint64_t reads; |
269 |
uint64_t read_size; |
270 |
uint64_t writes; |
271 |
uint64_t write_size; |
272 |
}; |
273 |
|
274 |
#define test_vhd_flag(word, flag) ((word) & (flag))
|
275 |
#define set_vhd_flag(word, flag) ((word) |= (flag))
|
276 |
#define clear_vhd_flag(word, flag) ((word) &= ~(flag))
|
277 |
|
278 |
#define bat_entry(s, blk) ((s)->bat.bat.bat[(blk)])
|
279 |
|
280 |
static void vhd_complete(void *, struct tiocb *, int); |
281 |
static void finish_data_transaction(struct vhd_state *, struct vhd_bitmap *); |
282 |
|
283 |
static struct vhd_state *_vhd_master; |
284 |
static unsigned long _vhd_zsize; |
285 |
static char *_vhd_zeros; |
286 |
|
287 |
static int |
288 |
vhd_initialize(struct vhd_state *s)
|
289 |
{ |
290 |
if (_vhd_zeros)
|
291 |
return 0; |
292 |
|
293 |
_vhd_zsize = 2 * getpagesize();
|
294 |
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE))
|
295 |
_vhd_zsize += VHD_BLOCK_SIZE; |
296 |
|
297 |
_vhd_zeros = mmap(0, _vhd_zsize, PROT_READ,
|
298 |
MAP_SHARED | MAP_ANONYMOUS, -1, 0); |
299 |
if (_vhd_zeros == MAP_FAILED) {
|
300 |
EPRINTF("vhd_initialize failed: %d\n", -errno);
|
301 |
_vhd_zeros = NULL;
|
302 |
_vhd_zsize = 0;
|
303 |
return -errno;
|
304 |
} |
305 |
|
306 |
_vhd_master = s; |
307 |
return 0; |
308 |
} |
309 |
|
310 |
static void |
311 |
vhd_free(struct vhd_state *s)
|
312 |
{ |
313 |
if (_vhd_master != s || !_vhd_zeros)
|
314 |
return;
|
315 |
|
316 |
munmap(_vhd_zeros, _vhd_zsize); |
317 |
_vhd_zsize = 0;
|
318 |
_vhd_zeros = NULL;
|
319 |
_vhd_master = NULL;
|
320 |
} |
321 |
|
322 |
static char * |
323 |
_get_vhd_zeros(const char *func, unsigned long size) |
324 |
{ |
325 |
if (!_vhd_zeros || _vhd_zsize < size) {
|
326 |
EPRINTF("invalid zero request from %s: %lu, %lu, %p\n",
|
327 |
func, size, _vhd_zsize, _vhd_zeros); |
328 |
ASSERT(0);
|
329 |
} |
330 |
|
331 |
return _vhd_zeros;
|
332 |
} |
333 |
|
334 |
#define vhd_zeros(size) _get_vhd_zeros(__func__, size)
|
335 |
|
336 |
static inline void |
337 |
set_batmap(struct vhd_state *s, uint32_t blk)
|
338 |
{ |
339 |
if (s->bat.batmap.map) {
|
340 |
vhd_batmap_set(&s->vhd, &s->bat.batmap, blk); |
341 |
DBG(TLOG_DBG, "block 0x%x completely full\n", blk);
|
342 |
} |
343 |
} |
344 |
|
345 |
static inline int |
346 |
test_batmap(struct vhd_state *s, uint32_t blk)
|
347 |
{ |
348 |
if (!s->bat.batmap.map)
|
349 |
return 0; |
350 |
return vhd_batmap_test(&s->vhd, &s->bat.batmap, blk);
|
351 |
} |
352 |
|
353 |
static int |
354 |
vhd_kill_footer(struct vhd_state *s)
|
355 |
{ |
356 |
int err;
|
357 |
off64_t end; |
358 |
void *zeros;
|
359 |
|
360 |
if (s->vhd.footer.type == HD_TYPE_FIXED)
|
361 |
return 0; |
362 |
|
363 |
err = posix_memalign(&zeros, 512, 512); |
364 |
if (err)
|
365 |
return -err;
|
366 |
|
367 |
err = 1;
|
368 |
memset(zeros, 0xc7c7c7c7, 512); |
369 |
|
370 |
if ((end = lseek64(s->vhd.fd, 0, SEEK_END)) == -1) |
371 |
goto fail;
|
372 |
|
373 |
if (lseek64(s->vhd.fd, (end - 512), SEEK_SET) == -1) |
374 |
goto fail;
|
375 |
|
376 |
if (write(s->vhd.fd, zeros, 512) != 512) |
377 |
goto fail;
|
378 |
|
379 |
err = 0;
|
380 |
|
381 |
fail:
|
382 |
free(zeros); |
383 |
if (err)
|
384 |
return (errno ? -errno : -EIO);
|
385 |
return 0; |
386 |
} |
387 |
|
388 |
static inline int |
389 |
find_next_free_block(struct vhd_state *s)
|
390 |
{ |
391 |
int err;
|
392 |
off64_t eom; |
393 |
uint32_t i, entry; |
394 |
|
395 |
err = vhd_end_of_headers(&s->vhd, &eom); |
396 |
if (err)
|
397 |
return err;
|
398 |
|
399 |
s->next_db = secs_round_up(eom); |
400 |
s->first_db = s->next_db; |
401 |
if ((s->first_db + s->bm_secs) % s->spp)
|
402 |
s->first_db += (s->spp - ((s->first_db + s->bm_secs) % s->spp)); |
403 |
|
404 |
for (i = 0; i < s->bat.bat.entries; i++) { |
405 |
entry = bat_entry(s, i); |
406 |
if (entry != DD_BLK_UNUSED && entry >= s->next_db)
|
407 |
s->next_db = entry + s->spb + s->bm_secs; |
408 |
} |
409 |
|
410 |
return 0; |
411 |
} |
412 |
|
413 |
static void |
414 |
vhd_free_bat(struct vhd_state *s)
|
415 |
{ |
416 |
free(s->bat.bat.bat); |
417 |
free(s->bat.batmap.map); |
418 |
free(s->bat.bat_buf); |
419 |
memset(&s->bat, 0, sizeof(struct vhd_bat)); |
420 |
} |
421 |
|
422 |
static int |
423 |
vhd_initialize_bat(struct vhd_state *s)
|
424 |
{ |
425 |
int err, batmap_required, i;
|
426 |
void *buf;
|
427 |
|
428 |
memset(&s->bat, 0, sizeof(struct vhd_bat)); |
429 |
|
430 |
err = vhd_read_bat(&s->vhd, &s->bat.bat); |
431 |
if (err) {
|
432 |
EPRINTF("%s: reading bat: %d\n", s->vhd.file, err);
|
433 |
return err;
|
434 |
} |
435 |
|
436 |
batmap_required = 1;
|
437 |
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY)) {
|
438 |
batmap_required = 0;
|
439 |
} else {
|
440 |
err = find_next_free_block(s); |
441 |
if (err)
|
442 |
goto fail;
|
443 |
} |
444 |
|
445 |
if (vhd_has_batmap(&s->vhd)) {
|
446 |
for (i = 0; i < VHD_BATMAP_MAX_RETRIES; i++) { |
447 |
err = vhd_read_batmap(&s->vhd, &s->bat.batmap); |
448 |
if (err) {
|
449 |
EPRINTF("%s: reading batmap: %d\n",
|
450 |
s->vhd.file, err); |
451 |
if (batmap_required)
|
452 |
goto fail;
|
453 |
} else {
|
454 |
break;
|
455 |
} |
456 |
} |
457 |
if (err)
|
458 |
EPRINTF("%s: ignoring non-critical batmap error\n",
|
459 |
s->vhd.file); |
460 |
} |
461 |
|
462 |
err = posix_memalign(&buf, VHD_SECTOR_SIZE, VHD_SECTOR_SIZE); |
463 |
if (err)
|
464 |
goto fail;
|
465 |
|
466 |
s->bat.bat_buf = buf; |
467 |
|
468 |
return 0; |
469 |
|
470 |
fail:
|
471 |
vhd_free_bat(s); |
472 |
return err;
|
473 |
} |
474 |
|
475 |
static void |
476 |
vhd_free_bitmap_cache(struct vhd_state *s)
|
477 |
{ |
478 |
int i;
|
479 |
struct vhd_bitmap *bm;
|
480 |
|
481 |
for (i = 0; i < VHD_CACHE_SIZE; i++) { |
482 |
bm = s->bitmap_list + i; |
483 |
free(bm->map); |
484 |
free(bm->shadow); |
485 |
s->bitmap_free[i] = NULL;
|
486 |
} |
487 |
|
488 |
memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE); |
489 |
} |
490 |
|
491 |
static int |
492 |
vhd_initialize_bitmap_cache(struct vhd_state *s)
|
493 |
{ |
494 |
int i, err, map_size;
|
495 |
struct vhd_bitmap *bm;
|
496 |
void *map, *shadow;
|
497 |
|
498 |
memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE); |
499 |
|
500 |
s->bm_lru = 0;
|
501 |
map_size = vhd_sectors_to_bytes(s->bm_secs); |
502 |
s->bm_free_count = VHD_CACHE_SIZE; |
503 |
|
504 |
for (i = 0; i < VHD_CACHE_SIZE; i++) { |
505 |
bm = s->bitmap_list + i; |
506 |
|
507 |
err = posix_memalign(&map, 512, map_size);
|
508 |
if (err)
|
509 |
goto fail;
|
510 |
|
511 |
bm->map = map; |
512 |
|
513 |
err = posix_memalign(&shadow, 512, map_size);
|
514 |
if (err)
|
515 |
goto fail;
|
516 |
|
517 |
bm->shadow = shadow; |
518 |
|
519 |
memset(bm->map, 0, map_size);
|
520 |
memset(bm->shadow, 0, map_size);
|
521 |
s->bitmap_free[i] = bm; |
522 |
} |
523 |
|
524 |
return 0; |
525 |
|
526 |
fail:
|
527 |
vhd_free_bitmap_cache(s); |
528 |
return err;
|
529 |
} |
530 |
|
531 |
static int |
532 |
vhd_initialize_dynamic_disk(struct vhd_state *s)
|
533 |
{ |
534 |
uint32_t bm_size; |
535 |
void *buf;
|
536 |
int err;
|
537 |
|
538 |
err = vhd_get_header(&s->vhd); |
539 |
if (err) {
|
540 |
if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
|
541 |
EPRINTF("Error reading VHD DD header.\n");
|
542 |
return err;
|
543 |
} |
544 |
|
545 |
if (s->vhd.header.hdr_ver != 0x00010000) { |
546 |
EPRINTF("unsupported header version! (0x%x)\n",
|
547 |
s->vhd.header.hdr_ver); |
548 |
return -EINVAL;
|
549 |
} |
550 |
|
551 |
s->spp = getpagesize() >> VHD_SECTOR_SHIFT; |
552 |
s->spb = s->vhd.header.block_size >> VHD_SECTOR_SHIFT; |
553 |
s->bm_secs = secs_round_up_no_zero(s->spb >> 3);
|
554 |
|
555 |
s->padbm_size = (s->bm_secs / getpagesize()) * getpagesize(); |
556 |
if (s->bm_secs % getpagesize())
|
557 |
s->padbm_size += getpagesize(); |
558 |
|
559 |
err = posix_memalign(&buf, 512, s->padbm_size);
|
560 |
if (err)
|
561 |
return -err;
|
562 |
|
563 |
s->padbm_buf = buf; |
564 |
bm_size = s->bm_secs << VHD_SECTOR_SHIFT; |
565 |
memset(s->padbm_buf, 0, s->padbm_size - bm_size);
|
566 |
memset(s->padbm_buf + (s->padbm_size - bm_size), ~0, bm_size);
|
567 |
s->debug_skipped_redundant_writes = 0;
|
568 |
s->debug_done_redundant_writes = 0;
|
569 |
|
570 |
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_NO_CACHE))
|
571 |
return 0; |
572 |
|
573 |
err = vhd_initialize_bat(s); |
574 |
if (err)
|
575 |
return err;
|
576 |
|
577 |
err = vhd_initialize_bitmap_cache(s); |
578 |
if (err) {
|
579 |
vhd_free_bat(s); |
580 |
return err;
|
581 |
} |
582 |
|
583 |
return 0; |
584 |
} |
585 |
|
586 |
static int |
587 |
vhd_check_version(struct vhd_state *s)
|
588 |
{ |
589 |
if (strncmp(s->vhd.footer.crtr_app, "tap", 3)) |
590 |
return 0; |
591 |
|
592 |
if (s->vhd.footer.crtr_ver > VHD_CURRENT_VERSION) {
|
593 |
if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
|
594 |
EPRINTF("WARNING: %s vhd creator version 0x%08x, "
|
595 |
"but only versions up to 0x%08x are "
|
596 |
"supported for IO\n", s->vhd.file,
|
597 |
s->vhd.footer.crtr_ver, VHD_CURRENT_VERSION); |
598 |
|
599 |
return -EINVAL;
|
600 |
} |
601 |
|
602 |
return 0; |
603 |
} |
604 |
|
605 |
static void |
606 |
vhd_log_open(struct vhd_state *s)
|
607 |
{ |
608 |
char buf[5]; |
609 |
uint32_t i, allocated, full; |
610 |
|
611 |
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
|
612 |
return;
|
613 |
|
614 |
snprintf(buf, sizeof(buf), "%s", s->vhd.footer.crtr_app); |
615 |
if (!vhd_type_dynamic(&s->vhd)) {
|
616 |
DPRINTF("%s version: %s 0x%08x\n",
|
617 |
s->vhd.file, buf, s->vhd.footer.crtr_ver); |
618 |
return;
|
619 |
} |
620 |
|
621 |
allocated = 0;
|
622 |
full = 0;
|
623 |
|
624 |
for (i = 0; i < s->bat.bat.entries; i++) { |
625 |
if (bat_entry(s, i) != DD_BLK_UNUSED)
|
626 |
allocated++; |
627 |
if (test_batmap(s, i))
|
628 |
full++; |
629 |
} |
630 |
|
631 |
DPRINTF("%s version: %s 0x%08x, b: %u, a: %u, f: %u, n: %"PRIu64"\n", |
632 |
s->vhd.file, buf, s->vhd.footer.crtr_ver, s->bat.bat.entries, |
633 |
allocated, full, s->next_db); |
634 |
} |
635 |
|
636 |
static int |
637 |
__vhd_open(td_driver_t *driver, const char *name, vhd_flag_t flags) |
638 |
{ |
639 |
int i, o_flags, err;
|
640 |
struct vhd_state *s;
|
641 |
|
642 |
DBG(TLOG_INFO, "vhd_open: %s\n", name);
|
643 |
if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT))
|
644 |
libvhd_set_log_level(1);
|
645 |
|
646 |
s = (struct vhd_state *)driver->data;
|
647 |
memset(s, 0, sizeof(struct vhd_state)); |
648 |
|
649 |
s->flags = flags; |
650 |
s->driver = driver; |
651 |
|
652 |
err = vhd_initialize(s); |
653 |
if (err)
|
654 |
return err;
|
655 |
|
656 |
o_flags = ((test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) ? |
657 |
VHD_OPEN_RDONLY : VHD_OPEN_RDWR); |
658 |
|
659 |
if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT))
|
660 |
set_vhd_flag(o_flags, VHD_OPEN_STRICT); |
661 |
|
662 |
err = vhd_open(&s->vhd, name, o_flags); |
663 |
if (err) {
|
664 |
libvhd_set_log_level(1);
|
665 |
err = vhd_open(&s->vhd, name, o_flags); |
666 |
if (err) {
|
667 |
EPRINTF("Unable to open [%s] (%d)!\n", name, err);
|
668 |
return err;
|
669 |
} |
670 |
} |
671 |
|
672 |
err = vhd_check_version(s); |
673 |
if (err)
|
674 |
goto fail;
|
675 |
|
676 |
s->spb = s->spp = 1;
|
677 |
|
678 |
if (vhd_type_dynamic(&s->vhd)) {
|
679 |
err = vhd_initialize_dynamic_disk(s); |
680 |
if (err)
|
681 |
goto fail;
|
682 |
} |
683 |
|
684 |
vhd_log_open(s); |
685 |
|
686 |
SPB = s->spb; |
687 |
|
688 |
s->vreq_free_count = VHD_REQS_DATA; |
689 |
for (i = 0; i < VHD_REQS_DATA; i++) |
690 |
s->vreq_free[i] = s->vreq_list + i; |
691 |
|
692 |
driver->info.size = s->vhd.footer.curr_size >> VHD_SECTOR_SHIFT; |
693 |
driver->info.sector_size = VHD_SECTOR_SIZE; |
694 |
driver->info.info = 0;
|
695 |
|
696 |
DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%lu, inf:%u)\n", |
697 |
driver->info.size, driver->info.sector_size, driver->info.info); |
698 |
|
699 |
if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT) &&
|
700 |
!test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) { |
701 |
err = vhd_kill_footer(s); |
702 |
if (err) {
|
703 |
DPRINTF("ERROR killing footer: %d\n", err);
|
704 |
goto fail;
|
705 |
} |
706 |
s->writes++; |
707 |
} |
708 |
|
709 |
return 0; |
710 |
|
711 |
fail:
|
712 |
vhd_free_bat(s); |
713 |
vhd_free_bitmap_cache(s); |
714 |
vhd_close(&s->vhd); |
715 |
vhd_free(s); |
716 |
return err;
|
717 |
} |
718 |
|
719 |
static int |
720 |
_vhd_open(td_driver_t *driver, const char *name, td_flag_t flags) |
721 |
{ |
722 |
vhd_flag_t vhd_flags = 0;
|
723 |
|
724 |
if (flags & TD_OPEN_RDONLY)
|
725 |
vhd_flags |= VHD_FLAG_OPEN_RDONLY; |
726 |
if (flags & TD_OPEN_QUIET)
|
727 |
vhd_flags |= VHD_FLAG_OPEN_QUIET; |
728 |
if (flags & TD_OPEN_STRICT)
|
729 |
vhd_flags |= VHD_FLAG_OPEN_STRICT; |
730 |
if (flags & TD_OPEN_QUERY)
|
731 |
vhd_flags |= (VHD_FLAG_OPEN_QUERY | |
732 |
VHD_FLAG_OPEN_QUIET | |
733 |
VHD_FLAG_OPEN_RDONLY | |
734 |
VHD_FLAG_OPEN_NO_CACHE); |
735 |
|
736 |
/* pre-allocate for all but NFS and LVM storage */
|
737 |
driver->storage = tapdisk_storage_type(name); |
738 |
|
739 |
if (driver->storage != TAPDISK_STORAGE_TYPE_NFS &&
|
740 |
driver->storage != TAPDISK_STORAGE_TYPE_LVM) |
741 |
vhd_flags |= VHD_FLAG_OPEN_PREALLOCATE; |
742 |
|
743 |
return __vhd_open(driver, name, vhd_flags);
|
744 |
} |
745 |
|
746 |
static void |
747 |
vhd_log_close(struct vhd_state *s)
|
748 |
{ |
749 |
uint32_t i, allocated, full; |
750 |
|
751 |
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
|
752 |
return;
|
753 |
|
754 |
allocated = 0;
|
755 |
full = 0;
|
756 |
|
757 |
for (i = 0; i < s->bat.bat.entries; i++) { |
758 |
if (bat_entry(s, i) != DD_BLK_UNUSED)
|
759 |
allocated++; |
760 |
if (test_batmap(s, i))
|
761 |
full++; |
762 |
} |
763 |
|
764 |
DPRINTF("%s: b: %u, a: %u, f: %u, n: %"PRIu64"\n", |
765 |
s->vhd.file, s->bat.bat.entries, allocated, full, s->next_db); |
766 |
} |
767 |
|
768 |
static int |
769 |
_vhd_close(td_driver_t *driver) |
770 |
{ |
771 |
int err;
|
772 |
struct vhd_state *s;
|
773 |
|
774 |
DBG(TLOG_WARN, "vhd_close\n");
|
775 |
s = (struct vhd_state *)driver->data;
|
776 |
|
777 |
DPRINTF("gaps written/skipped: %ld/%ld\n",
|
778 |
s->debug_done_redundant_writes, |
779 |
s->debug_skipped_redundant_writes); |
780 |
|
781 |
/* don't write footer if tapdisk is read-only */
|
782 |
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY))
|
783 |
goto free;
|
784 |
|
785 |
/*
|
786 |
* write footer if:
|
787 |
* - we killed it on open (opened with strict)
|
788 |
* - we've written data since opening
|
789 |
*/
|
790 |
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_STRICT) || s->writes) {
|
791 |
memcpy(&s->vhd.bat, &s->bat.bat, sizeof(vhd_bat_t));
|
792 |
err = vhd_write_footer(&s->vhd, &s->vhd.footer); |
793 |
memset(&s->vhd.bat, 0, sizeof(vhd_bat_t)); |
794 |
|
795 |
if (err)
|
796 |
EPRINTF("writing %s footer: %d\n", s->vhd.file, err);
|
797 |
|
798 |
if (!vhd_has_batmap(&s->vhd))
|
799 |
goto free;
|
800 |
|
801 |
err = vhd_write_batmap(&s->vhd, &s->bat.batmap); |
802 |
if (err)
|
803 |
EPRINTF("writing %s batmap: %d\n", s->vhd.file, err);
|
804 |
} |
805 |
|
806 |
free:
|
807 |
vhd_log_close(s); |
808 |
vhd_free_bat(s); |
809 |
vhd_free_bitmap_cache(s); |
810 |
vhd_close(&s->vhd); |
811 |
vhd_free(s); |
812 |
|
813 |
memset(s, 0, sizeof(struct vhd_state)); |
814 |
|
815 |
return 0; |
816 |
} |
817 |
|
818 |
int
|
819 |
vhd_validate_parent(td_driver_t *child_driver, |
820 |
td_driver_t *parent_driver, td_flag_t flags) |
821 |
{ |
822 |
struct vhd_state *child = (struct vhd_state *)child_driver->data; |
823 |
struct vhd_state *parent;
|
824 |
|
825 |
if (parent_driver->type != DISK_TYPE_VHD) {
|
826 |
if (child_driver->type != DISK_TYPE_VHD)
|
827 |
return -EINVAL;
|
828 |
if (child->vhd.footer.type != HD_TYPE_DIFF)
|
829 |
return -EINVAL;
|
830 |
if (!vhd_parent_raw(&child->vhd))
|
831 |
return -EINVAL;
|
832 |
return 0; |
833 |
} |
834 |
|
835 |
parent = (struct vhd_state *)parent_driver->data;
|
836 |
|
837 |
/*
|
838 |
* This check removed because of cases like:
|
839 |
* - parent VHD marked as 'hidden'
|
840 |
* - parent VHD modified during coalesce
|
841 |
*/
|
842 |
/*
|
843 |
if (stat(parent->vhd.file, &stats)) {
|
844 |
DPRINTF("ERROR stating parent file %s\n", parent->vhd.file);
|
845 |
return -errno;
|
846 |
}
|
847 |
|
848 |
if (child->hdr.prt_ts != vhd_time(stats.st_mtime)) {
|
849 |
DPRINTF("ERROR: parent file has been modified since "
|
850 |
"snapshot. Child image no longer valid.\n");
|
851 |
return -EINVAL;
|
852 |
}
|
853 |
*/
|
854 |
|
855 |
if (uuid_compare(child->vhd.header.prt_uuid, parent->vhd.footer.uuid)) {
|
856 |
DPRINTF("ERROR: %s: %s, %s: parent uuid has changed since "
|
857 |
"snapshot. Child image no longer valid.\n",
|
858 |
__func__, child->vhd.file, parent->vhd.file); |
859 |
return -EINVAL;
|
860 |
} |
861 |
|
862 |
/* TODO: compare sizes */
|
863 |
|
864 |
return 0; |
865 |
} |
866 |
|
867 |
int
|
868 |
vhd_get_parent_id(td_driver_t *driver, td_disk_id_t *id) |
869 |
{ |
870 |
int err;
|
871 |
char *parent;
|
872 |
struct vhd_state *s;
|
873 |
|
874 |
DBG(TLOG_DBG, "\n");
|
875 |
memset(id, 0, sizeof(td_disk_id_t)); |
876 |
|
877 |
s = (struct vhd_state *)driver->data;
|
878 |
|
879 |
if (s->vhd.footer.type != HD_TYPE_DIFF)
|
880 |
return TD_NO_PARENT;
|
881 |
|
882 |
err = vhd_parent_locator_get(&s->vhd, &parent); |
883 |
if (err)
|
884 |
return err;
|
885 |
|
886 |
id->name = parent; |
887 |
id->type = vhd_parent_raw(&s->vhd) ? DISK_TYPE_AIO : DISK_TYPE_VHD; |
888 |
id->flags |= TD_OPEN_SHAREABLE|TD_OPEN_RDONLY; |
889 |
|
890 |
return 0; |
891 |
} |
892 |
|
893 |
static inline void |
894 |
clear_req_list(struct vhd_req_list *list)
|
895 |
{ |
896 |
list->head = list->tail = NULL;
|
897 |
} |
898 |
|
899 |
static inline void |
900 |
add_to_tail(struct vhd_req_list *list, struct vhd_request *e) |
901 |
{ |
902 |
if (!list->head)
|
903 |
list->head = list->tail = e; |
904 |
else
|
905 |
list->tail = list->tail->next = e; |
906 |
} |
907 |
|
908 |
static inline int |
909 |
remove_from_req_list(struct vhd_req_list *list, struct vhd_request *e) |
910 |
{ |
911 |
struct vhd_request *i = list->head;
|
912 |
|
913 |
if (list->head == e) {
|
914 |
if (list->tail == e)
|
915 |
clear_req_list(list); |
916 |
else
|
917 |
list->head = list->head->next; |
918 |
return 0; |
919 |
} |
920 |
|
921 |
while (i->next) {
|
922 |
if (i->next == e) {
|
923 |
if (list->tail == e) {
|
924 |
i->next = NULL;
|
925 |
list->tail = i; |
926 |
} else
|
927 |
i->next = i->next->next; |
928 |
return 0; |
929 |
} |
930 |
i = i->next; |
931 |
} |
932 |
|
933 |
return -EINVAL;
|
934 |
} |
935 |
|
936 |
static inline void |
937 |
init_vhd_request(struct vhd_state *s, struct vhd_request *req) |
938 |
{ |
939 |
memset(req, 0, sizeof(struct vhd_request)); |
940 |
req->state = s; |
941 |
} |
942 |
|
943 |
static inline void |
944 |
init_tx(struct vhd_transaction *tx)
|
945 |
{ |
946 |
memset(tx, 0, sizeof(struct vhd_transaction)); |
947 |
} |
948 |
|
949 |
static inline void |
950 |
add_to_transaction(struct vhd_transaction *tx, struct vhd_request *r) |
951 |
{ |
952 |
ASSERT(!tx->closed); |
953 |
|
954 |
r->tx = tx; |
955 |
tx->started++; |
956 |
add_to_tail(&tx->requests, r); |
957 |
set_vhd_flag(tx->status, VHD_FLAG_TX_LIVE); |
958 |
|
959 |
DBG(TLOG_DBG, "blk: 0x%04"PRIx64", lsec: 0x%08"PRIx64", tx: %p, " |
960 |
"started: %d, finished: %d, status: %u\n",
|
961 |
r->treq.sec / SPB, r->treq.sec, tx, |
962 |
tx->started, tx->finished, tx->status); |
963 |
} |
964 |
|
965 |
static inline int |
966 |
transaction_completed(struct vhd_transaction *tx)
|
967 |
{ |
968 |
return (tx->started == tx->finished);
|
969 |
} |
970 |
|
971 |
static inline void |
972 |
init_bat(struct vhd_state *s)
|
973 |
{ |
974 |
s->bat.req.tx = NULL;
|
975 |
s->bat.req.next = NULL;
|
976 |
s->bat.req.error = 0;
|
977 |
s->bat.pbw_blk = 0;
|
978 |
s->bat.pbw_offset = 0;
|
979 |
s->bat.status = 0;
|
980 |
} |
981 |
|
982 |
static inline void |
983 |
lock_bat(struct vhd_state *s)
|
984 |
{ |
985 |
set_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED); |
986 |
} |
987 |
|
988 |
static inline void |
989 |
unlock_bat(struct vhd_state *s)
|
990 |
{ |
991 |
clear_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED); |
992 |
} |
993 |
|
994 |
static inline int |
995 |
bat_locked(struct vhd_state *s)
|
996 |
{ |
997 |
return test_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED);
|
998 |
} |
999 |
|
1000 |
static inline void |
1001 |
init_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap *bm) |
1002 |
{ |
1003 |
bm->blk = 0;
|
1004 |
bm->seqno = 0;
|
1005 |
bm->status = 0;
|
1006 |
init_tx(&bm->tx); |
1007 |
clear_req_list(&bm->queue); |
1008 |
clear_req_list(&bm->waiting); |
1009 |
memset(bm->map, 0, vhd_sectors_to_bytes(s->bm_secs));
|
1010 |
memset(bm->shadow, 0, vhd_sectors_to_bytes(s->bm_secs));
|
1011 |
init_vhd_request(s, &bm->req); |
1012 |
} |
1013 |
|
1014 |
static inline struct vhd_bitmap * |
1015 |
get_bitmap(struct vhd_state *s, uint32_t block)
|
1016 |
{ |
1017 |
int i;
|
1018 |
struct vhd_bitmap *bm;
|
1019 |
|
1020 |
for (i = 0; i < VHD_CACHE_SIZE; i++) { |
1021 |
bm = s->bitmap[i]; |
1022 |
if (bm && bm->blk == block)
|
1023 |
return bm;
|
1024 |
} |
1025 |
|
1026 |
return NULL; |
1027 |
} |
1028 |
|
1029 |
static inline void |
1030 |
lock_bitmap(struct vhd_bitmap *bm)
|
1031 |
{ |
1032 |
set_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED); |
1033 |
} |
1034 |
|
1035 |
static inline void |
1036 |
unlock_bitmap(struct vhd_bitmap *bm)
|
1037 |
{ |
1038 |
clear_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED); |
1039 |
} |
1040 |
|
1041 |
static inline int |
1042 |
bitmap_locked(struct vhd_bitmap *bm)
|
1043 |
{ |
1044 |
return test_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED);
|
1045 |
} |
1046 |
|
1047 |
static inline int |
1048 |
bitmap_valid(struct vhd_bitmap *bm)
|
1049 |
{ |
1050 |
return !test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING);
|
1051 |
} |
1052 |
|
1053 |
static inline int |
1054 |
bitmap_in_use(struct vhd_bitmap *bm)
|
1055 |
{ |
1056 |
return (test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING) ||
|
1057 |
test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING) || |
1058 |
test_vhd_flag(bm->tx.status, VHD_FLAG_TX_UPDATE_BAT) || |
1059 |
bm->waiting.head || bm->tx.requests.head || bm->queue.head); |
1060 |
} |
1061 |
|
1062 |
static inline int |
1063 |
bitmap_full(struct vhd_state *s, struct vhd_bitmap *bm) |
1064 |
{ |
1065 |
int i, n;
|
1066 |
|
1067 |
n = s->spb >> 3;
|
1068 |
for (i = 0; i < n; i++) |
1069 |
if (bm->map[i] != (char)0xFF) |
1070 |
return 0; |
1071 |
|
1072 |
DBG(TLOG_DBG, "bitmap 0x%04x full\n", bm->blk);
|
1073 |
return 1; |
1074 |
} |
1075 |
|
1076 |
static struct vhd_bitmap * |
1077 |
remove_lru_bitmap(struct vhd_state *s)
|
1078 |
{ |
1079 |
int i, idx = 0; |
1080 |
uint64_t seq = s->bm_lru; |
1081 |
struct vhd_bitmap *bm, *lru = NULL; |
1082 |
|
1083 |
for (i = 0; i < VHD_CACHE_SIZE; i++) { |
1084 |
bm = s->bitmap[i]; |
1085 |
if (bm && bm->seqno < seq && !bitmap_locked(bm)) {
|
1086 |
idx = i; |
1087 |
lru = bm; |
1088 |
seq = lru->seqno; |
1089 |
} |
1090 |
} |
1091 |
|
1092 |
if (lru) {
|
1093 |
s->bitmap[idx] = NULL;
|
1094 |
ASSERT(!bitmap_in_use(lru)); |
1095 |
} |
1096 |
|
1097 |
return lru;
|
1098 |
} |
1099 |
|
1100 |
static int |
1101 |
alloc_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap **bitmap, uint32_t blk) |
1102 |
{ |
1103 |
struct vhd_bitmap *bm;
|
1104 |
|
1105 |
*bitmap = NULL;
|
1106 |
|
1107 |
if (s->bm_free_count > 0) { |
1108 |
bm = s->bitmap_free[--s->bm_free_count]; |
1109 |
} else {
|
1110 |
bm = remove_lru_bitmap(s); |
1111 |
if (!bm)
|
1112 |
return -EBUSY;
|
1113 |
} |
1114 |
|
1115 |
init_vhd_bitmap(s, bm); |
1116 |
bm->blk = blk; |
1117 |
*bitmap = bm; |
1118 |
|
1119 |
return 0; |
1120 |
} |
1121 |
|
1122 |
static inline uint64_t |
1123 |
__bitmap_lru_seqno(struct vhd_state *s)
|
1124 |
{ |
1125 |
int i;
|
1126 |
struct vhd_bitmap *bm;
|
1127 |
|
1128 |
if (s->bm_lru == 0xffffffff) { |
1129 |
s->bm_lru = 0;
|
1130 |
for (i = 0; i < VHD_CACHE_SIZE; i++) { |
1131 |
bm = s->bitmap[i]; |
1132 |
if (bm) {
|
1133 |
bm->seqno >>= 1;
|
1134 |
if (bm->seqno > s->bm_lru)
|
1135 |
s->bm_lru = bm->seqno; |
1136 |
} |
1137 |
} |
1138 |
} |
1139 |
|
1140 |
return ++s->bm_lru;
|
1141 |
} |
1142 |
|
1143 |
static inline void |
1144 |
touch_bitmap(struct vhd_state *s, struct vhd_bitmap *bm) |
1145 |
{ |
1146 |
bm->seqno = __bitmap_lru_seqno(s); |
1147 |
} |
1148 |
|
1149 |
static inline void |
1150 |
install_bitmap(struct vhd_state *s, struct vhd_bitmap *bm) |
1151 |
{ |
1152 |
int i;
|
1153 |
for (i = 0; i < VHD_CACHE_SIZE; i++) { |
1154 |
if (!s->bitmap[i]) {
|
1155 |
touch_bitmap(s, bm); |
1156 |
s->bitmap[i] = bm; |
1157 |
return;
|
1158 |
} |
1159 |
} |
1160 |
|
1161 |
ASSERT(0);
|
1162 |
} |
1163 |
|
1164 |
static inline void |
1165 |
free_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap *bm) |
1166 |
{ |
1167 |
int i;
|
1168 |
|
1169 |
for (i = 0; i < VHD_CACHE_SIZE; i++) |
1170 |
if (s->bitmap[i] == bm)
|
1171 |
break;
|
1172 |
|
1173 |
ASSERT(!bitmap_locked(bm)); |
1174 |
ASSERT(!bitmap_in_use(bm)); |
1175 |
ASSERT(i < VHD_CACHE_SIZE); |
1176 |
|
1177 |
s->bitmap[i] = NULL;
|
1178 |
s->bitmap_free[s->bm_free_count++] = bm; |
1179 |
} |
1180 |
|
1181 |
static int |
1182 |
read_bitmap_cache(struct vhd_state *s, uint64_t sector, uint8_t op)
|
1183 |
{ |
1184 |
uint32_t blk, sec; |
1185 |
struct vhd_bitmap *bm;
|
1186 |
|
1187 |
/* in fixed disks, every block is present */
|
1188 |
if (s->vhd.footer.type == HD_TYPE_FIXED)
|
1189 |
return VHD_BM_BIT_SET;
|
1190 |
|
1191 |
blk = sector / s->spb; |
1192 |
sec = sector % s->spb; |
1193 |
|
1194 |
if (blk > s->vhd.header.max_bat_size) {
|
1195 |
DPRINTF("ERROR: sec %"PRIu64" out of range, op = %d\n", |
1196 |
sector, op); |
1197 |
return -EINVAL;
|
1198 |
} |
1199 |
|
1200 |
if (bat_entry(s, blk) == DD_BLK_UNUSED) {
|
1201 |
if (op == VHD_OP_DATA_WRITE &&
|
1202 |
s->bat.pbw_blk != blk && bat_locked(s)) |
1203 |
return VHD_BM_BAT_LOCKED;
|
1204 |
|
1205 |
return VHD_BM_BAT_CLEAR;
|
1206 |
} |
1207 |
|
1208 |
if (test_batmap(s, blk)) {
|
1209 |
DBG(TLOG_DBG, "batmap set for 0x%04x\n", blk);
|
1210 |
return VHD_BM_BIT_SET;
|
1211 |
} |
1212 |
|
1213 |
bm = get_bitmap(s, blk); |
1214 |
if (!bm)
|
1215 |
return VHD_BM_NOT_CACHED;
|
1216 |
|
1217 |
/* bump lru count */
|
1218 |
touch_bitmap(s, bm); |
1219 |
|
1220 |
if (test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING))
|
1221 |
return VHD_BM_READ_PENDING;
|
1222 |
|
1223 |
return ((vhd_bitmap_test(&s->vhd, bm->map, sec)) ?
|
1224 |
VHD_BM_BIT_SET : VHD_BM_BIT_CLEAR); |
1225 |
} |
1226 |
|
1227 |
static int |
1228 |
read_bitmap_cache_span(struct vhd_state *s,
|
1229 |
uint64_t sector, int nr_secs, int value) |
1230 |
{ |
1231 |
int ret;
|
1232 |
uint32_t blk, sec; |
1233 |
struct vhd_bitmap *bm;
|
1234 |
|
1235 |
/* in fixed disks, every block is present */
|
1236 |
if (s->vhd.footer.type == HD_TYPE_FIXED)
|
1237 |
return nr_secs;
|
1238 |
|
1239 |
sec = sector % s->spb; |
1240 |
blk = sector / s->spb; |
1241 |
|
1242 |
if (test_batmap(s, blk))
|
1243 |
return MIN(nr_secs, s->spb - sec);
|
1244 |
|
1245 |
bm = get_bitmap(s, blk); |
1246 |
|
1247 |
ASSERT(bm && bitmap_valid(bm)); |
1248 |
|
1249 |
for (ret = 0; sec < s->spb && ret < nr_secs; sec++, ret++) |
1250 |
if (vhd_bitmap_test(&s->vhd, bm->map, sec) != value)
|
1251 |
break;
|
1252 |
|
1253 |
return ret;
|
1254 |
} |
1255 |
|
1256 |
static inline struct vhd_request * |
1257 |
alloc_vhd_request(struct vhd_state *s)
|
1258 |
{ |
1259 |
struct vhd_request *req = NULL; |
1260 |
|
1261 |
if (s->vreq_free_count > 0) { |
1262 |
req = s->vreq_free[--s->vreq_free_count]; |
1263 |
ASSERT(req->treq.secs == 0);
|
1264 |
init_vhd_request(s, req); |
1265 |
return req;
|
1266 |
} |
1267 |
|
1268 |
return NULL; |
1269 |
} |
1270 |
|
1271 |
static inline void |
1272 |
free_vhd_request(struct vhd_state *s, struct vhd_request *req) |
1273 |
{ |
1274 |
memset(req, 0, sizeof(struct vhd_request)); |
1275 |
s->vreq_free[s->vreq_free_count++] = req; |
1276 |
} |
1277 |
|
1278 |
static inline void |
1279 |
aio_read(struct vhd_state *s, struct vhd_request *req, uint64_t offset) |
1280 |
{ |
1281 |
struct tiocb *tiocb = &req->tiocb;
|
1282 |
|
1283 |
td_prep_read(tiocb, s->vhd.fd, req->treq.buf, |
1284 |
vhd_sectors_to_bytes(req->treq.secs), |
1285 |
offset, vhd_complete, req); |
1286 |
td_queue_tiocb(s->driver, tiocb); |
1287 |
|
1288 |
s->queued++; |
1289 |
s->reads++; |
1290 |
s->read_size += req->treq.secs; |
1291 |
TRACE(s); |
1292 |
} |
1293 |
|
1294 |
static inline void |
1295 |
aio_write(struct vhd_state *s, struct vhd_request *req, uint64_t offset) |
1296 |
{ |
1297 |
struct tiocb *tiocb = &req->tiocb;
|
1298 |
|
1299 |
td_prep_write(tiocb, s->vhd.fd, req->treq.buf, |
1300 |
vhd_sectors_to_bytes(req->treq.secs), |
1301 |
offset, vhd_complete, req); |
1302 |
td_queue_tiocb(s->driver, tiocb); |
1303 |
|
1304 |
s->queued++; |
1305 |
s->writes++; |
1306 |
s->write_size += req->treq.secs; |
1307 |
TRACE(s); |
1308 |
} |
1309 |
|
1310 |
static inline uint64_t |
1311 |
reserve_new_block(struct vhd_state *s, uint32_t blk)
|
1312 |
{ |
1313 |
int gap = 0; |
1314 |
|
1315 |
ASSERT(!test_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED)); |
1316 |
|
1317 |
/* data region of segment should begin on page boundary */
|
1318 |
if ((s->next_db + s->bm_secs) % s->spp)
|
1319 |
gap = (s->spp - ((s->next_db + s->bm_secs) % s->spp)); |
1320 |
|
1321 |
s->bat.pbw_blk = blk; |
1322 |
s->bat.pbw_offset = s->next_db + gap; |
1323 |
|
1324 |
return s->next_db;
|
1325 |
} |
1326 |
|
1327 |
static int |
1328 |
schedule_bat_write(struct vhd_state *s)
|
1329 |
{ |
1330 |
int i;
|
1331 |
uint32_t blk; |
1332 |
char *buf;
|
1333 |
uint64_t offset; |
1334 |
struct vhd_request *req;
|
1335 |
|
1336 |
ASSERT(bat_locked(s)); |
1337 |
|
1338 |
req = &s->bat.req; |
1339 |
buf = s->bat.bat_buf; |
1340 |
blk = s->bat.pbw_blk; |
1341 |
|
1342 |
init_vhd_request(s, req); |
1343 |
memcpy(buf, &bat_entry(s, blk - (blk % 128)), 512); |
1344 |
|
1345 |
((uint32_t *)buf)[blk % 128] = s->bat.pbw_offset;
|
1346 |
|
1347 |
for (i = 0; i < 128; i++) |
1348 |
BE32_OUT(&((uint32_t *)buf)[i]); |
1349 |
|
1350 |
offset = s->vhd.header.table_offset + (blk - (blk % 128)) * 4; |
1351 |
req->treq.secs = 1;
|
1352 |
req->treq.buf = buf; |
1353 |
req->op = VHD_OP_BAT_WRITE; |
1354 |
req->next = NULL;
|
1355 |
|
1356 |
aio_write(s, req, offset); |
1357 |
set_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED); |
1358 |
|
1359 |
DBG(TLOG_DBG, "blk: 0x%04x, pbwo: 0x%08"PRIx64", " |
1360 |
"table_offset: 0x%08"PRIx64"\n", blk, s->bat.pbw_offset, offset); |
1361 |
|
1362 |
return 0; |
1363 |
} |
1364 |
|
1365 |
static void |
1366 |
schedule_zero_bm_write(struct vhd_state *s,
|
1367 |
struct vhd_bitmap *bm, uint64_t lb_end)
|
1368 |
{ |
1369 |
uint64_t offset; |
1370 |
struct vhd_request *req = &s->bat.zero_req;
|
1371 |
|
1372 |
init_vhd_request(s, req); |
1373 |
|
1374 |
offset = vhd_sectors_to_bytes(lb_end); |
1375 |
req->op = VHD_OP_ZERO_BM_WRITE; |
1376 |
req->treq.sec = s->bat.pbw_blk * s->spb; |
1377 |
req->treq.secs = (s->bat.pbw_offset - lb_end) + s->bm_secs; |
1378 |
req->treq.buf = vhd_zeros(vhd_sectors_to_bytes(req->treq.secs)); |
1379 |
req->next = NULL;
|
1380 |
|
1381 |
DBG(TLOG_DBG, "blk: 0x%04x, writing zero bitmap at 0x%08"PRIx64"\n", |
1382 |
s->bat.pbw_blk, offset); |
1383 |
|
1384 |
lock_bitmap(bm); |
1385 |
add_to_transaction(&bm->tx, req); |
1386 |
aio_write(s, req, offset); |
1387 |
} |
1388 |
|
1389 |
/* This is a performance optimization. When writing sequentially into full
|
1390 |
* blocks, skipping (up-to-date) bitmaps causes an approx. 25% reduction in
|
1391 |
* throughput. To prevent skipping, we issue redundant writes into the (padded)
|
1392 |
* bitmap area just to make all writes sequential. This will help VHDs on raw
|
1393 |
* block devices, while the FS-based VHDs shouldn't suffer much.
|
1394 |
*
|
1395 |
* Note that it only makes sense to perform this reduntant bitmap write if the
|
1396 |
* block is completely full (i.e. the batmap entry is set). If the block is not
|
1397 |
* completely full then one of the following two things will be true:
|
1398 |
* 1. we'll either be allocating new sectors in this block and writing its
|
1399 |
* bitmap transactionally, which will be slow anyways; or
|
1400 |
* 2. the IO will be skipping over the unallocated sectors again, so the
|
1401 |
* pattern will not be sequential anyways
|
1402 |
* In either case a redundant bitmap write becomes pointless. This fact
|
1403 |
* simplifies the implementation of redundant writes: since we know the bitmap
|
1404 |
* cannot be updated by anyone else, we don't have to worry about transactions
|
1405 |
* or potential write conflicts.
|
1406 |
* */
|
1407 |
static void |
1408 |
schedule_redundant_bm_write(struct vhd_state *s, uint32_t blk)
|
1409 |
{ |
1410 |
uint64_t offset; |
1411 |
struct vhd_request *req;
|
1412 |
|
1413 |
ASSERT(s->vhd.footer.type != HD_TYPE_FIXED); |
1414 |
ASSERT(test_batmap(s, blk)); |
1415 |
|
1416 |
req = alloc_vhd_request(s); |
1417 |
if (!req)
|
1418 |
return;
|
1419 |
|
1420 |
req->treq.buf = s->padbm_buf; |
1421 |
|
1422 |
offset = bat_entry(s, blk); |
1423 |
ASSERT(offset != DD_BLK_UNUSED); |
1424 |
offset <<= VHD_SECTOR_SHIFT; |
1425 |
offset -= s->padbm_size - (s->bm_secs << VHD_SECTOR_SHIFT); |
1426 |
|
1427 |
req->op = VHD_OP_REDUNDANT_BM_WRITE; |
1428 |
req->treq.sec = blk * s->spb; |
1429 |
req->treq.secs = s->padbm_size >> VHD_SECTOR_SHIFT; |
1430 |
req->next = NULL;
|
1431 |
|
1432 |
DBG(TLOG_DBG, "blk: %u, writing redundant bitmap at %" PRIu64 "\n", |
1433 |
blk, offset); |
1434 |
|
1435 |
aio_write(s, req, offset); |
1436 |
} |
1437 |
|
1438 |
static int |
1439 |
update_bat(struct vhd_state *s, uint32_t blk)
|
1440 |
{ |
1441 |
int err;
|
1442 |
uint64_t lb_end; |
1443 |
struct vhd_bitmap *bm;
|
1444 |
|
1445 |
ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED); |
1446 |
|
1447 |
if (bat_locked(s)) {
|
1448 |
ASSERT(s->bat.pbw_blk == blk); |
1449 |
return 0; |
1450 |
} |
1451 |
|
1452 |
/* empty bitmap could already be in
|
1453 |
* cache if earlier bat update failed */
|
1454 |
bm = get_bitmap(s, blk); |
1455 |
if (!bm) {
|
1456 |
/* install empty bitmap in cache */
|
1457 |
err = alloc_vhd_bitmap(s, &bm, blk); |
1458 |
if (err)
|
1459 |
return err;
|
1460 |
|
1461 |
install_bitmap(s, bm); |
1462 |
} |
1463 |
|
1464 |
lock_bat(s); |
1465 |
lb_end = reserve_new_block(s, blk); |
1466 |
schedule_zero_bm_write(s, bm, lb_end); |
1467 |
set_vhd_flag(bm->tx.status, VHD_FLAG_TX_UPDATE_BAT); |
1468 |
|
1469 |
return 0; |
1470 |
} |
1471 |
|
1472 |
static int |
1473 |
allocate_block(struct vhd_state *s, uint32_t blk)
|
1474 |
{ |
1475 |
int err, gap;
|
1476 |
uint64_t offset, size; |
1477 |
struct vhd_bitmap *bm;
|
1478 |
ssize_t count; |
1479 |
|
1480 |
ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED); |
1481 |
|
1482 |
if (bat_locked(s)) {
|
1483 |
ASSERT(s->bat.pbw_blk == blk); |
1484 |
if (s->bat.req.error)
|
1485 |
return -EBUSY;
|
1486 |
return 0; |
1487 |
} |
1488 |
|
1489 |
gap = 0;
|
1490 |
s->bat.pbw_blk = blk; |
1491 |
offset = vhd_sectors_to_bytes(s->next_db); |
1492 |
|
1493 |
/* data region of segment should begin on page boundary */
|
1494 |
if ((s->next_db + s->bm_secs) % s->spp) {
|
1495 |
gap = (s->spp - ((s->next_db + s->bm_secs) % s->spp)); |
1496 |
s->next_db += gap; |
1497 |
} |
1498 |
|
1499 |
s->bat.pbw_offset = s->next_db; |
1500 |
|
1501 |
DBG(TLOG_DBG, "blk: 0x%04x, pbwo: 0x%08"PRIx64"\n", |
1502 |
blk, s->bat.pbw_offset); |
1503 |
|
1504 |
if (lseek(s->vhd.fd, offset, SEEK_SET) == (off_t)-1) { |
1505 |
ERR(s, -errno, "lseek failed\n");
|
1506 |
return -errno;
|
1507 |
} |
1508 |
|
1509 |
size = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap); |
1510 |
count = write(s->vhd.fd, vhd_zeros(size), size); |
1511 |
if (count != size) {
|
1512 |
err = count < 0 ? -errno : -ENOSPC;
|
1513 |
ERR(s, -errno, |
1514 |
"write failed (%zd, offset %"PRIu64")\n", count, offset); |
1515 |
return err;
|
1516 |
} |
1517 |
|
1518 |
/* empty bitmap could already be in
|
1519 |
* cache if earlier bat update failed */
|
1520 |
bm = get_bitmap(s, blk); |
1521 |
if (!bm) {
|
1522 |
/* install empty bitmap in cache */
|
1523 |
err = alloc_vhd_bitmap(s, &bm, blk); |
1524 |
if (err)
|
1525 |
return err;
|
1526 |
|
1527 |
install_bitmap(s, bm); |
1528 |
} |
1529 |
|
1530 |
lock_bat(s); |
1531 |
lock_bitmap(bm); |
1532 |
schedule_bat_write(s); |
1533 |
add_to_transaction(&bm->tx, &s->bat.req); |
1534 |
|
1535 |
return 0; |
1536 |
} |
1537 |
|
1538 |
static int |
1539 |
schedule_data_read(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
|
1540 |
{ |
1541 |
uint64_t offset; |
1542 |
uint32_t blk = 0, sec = 0; |
1543 |
struct vhd_bitmap *bm;
|
1544 |
struct vhd_request *req;
|
1545 |
|
1546 |
if (s->vhd.footer.type == HD_TYPE_FIXED) {
|
1547 |
offset = vhd_sectors_to_bytes(treq.sec); |
1548 |
goto make_request;
|
1549 |
} |
1550 |
|
1551 |
blk = treq.sec / s->spb; |
1552 |
sec = treq.sec % s->spb; |
1553 |
bm = get_bitmap(s, blk); |
1554 |
offset = bat_entry(s, blk); |
1555 |
|
1556 |
ASSERT(offset != DD_BLK_UNUSED); |
1557 |
ASSERT(test_batmap(s, blk) || (bm && bitmap_valid(bm))); |
1558 |
|
1559 |
offset += s->bm_secs + sec; |
1560 |
offset = vhd_sectors_to_bytes(offset); |
1561 |
|
1562 |
make_request:
|
1563 |
req = alloc_vhd_request(s); |
1564 |
if (!req)
|
1565 |
return -EBUSY;
|
1566 |
|
1567 |
req->treq = treq; |
1568 |
req->flags = flags; |
1569 |
req->op = VHD_OP_DATA_READ; |
1570 |
req->next = NULL;
|
1571 |
|
1572 |
aio_read(s, req, offset); |
1573 |
|
1574 |
DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, sec: 0x%04x, " |
1575 |
"nr_secs: 0x%04x, offset: 0x%08"PRIx64", flags: 0x%08x, buf: %p\n", |
1576 |
s->vhd.file, treq.sec, blk, sec, treq.secs, offset, req->flags, |
1577 |
treq.buf); |
1578 |
|
1579 |
return 0; |
1580 |
} |
1581 |
|
1582 |
static int |
1583 |
schedule_data_write(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
|
1584 |
{ |
1585 |
int err;
|
1586 |
uint64_t offset; |
1587 |
uint32_t blk = 0, sec = 0; |
1588 |
struct vhd_bitmap *bm = NULL; |
1589 |
struct vhd_request *req;
|
1590 |
|
1591 |
if (s->vhd.footer.type == HD_TYPE_FIXED) {
|
1592 |
offset = vhd_sectors_to_bytes(treq.sec); |
1593 |
goto make_request;
|
1594 |
} |
1595 |
|
1596 |
blk = treq.sec / s->spb; |
1597 |
sec = treq.sec % s->spb; |
1598 |
offset = bat_entry(s, blk); |
1599 |
|
1600 |
if (test_vhd_flag(flags, VHD_FLAG_REQ_UPDATE_BAT)) {
|
1601 |
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE))
|
1602 |
err = allocate_block(s, blk); |
1603 |
else
|
1604 |
err = update_bat(s, blk); |
1605 |
|
1606 |
if (err)
|
1607 |
return err;
|
1608 |
|
1609 |
offset = s->bat.pbw_offset; |
1610 |
} |
1611 |
|
1612 |
offset += s->bm_secs + sec; |
1613 |
offset = vhd_sectors_to_bytes(offset); |
1614 |
|
1615 |
make_request:
|
1616 |
req = alloc_vhd_request(s); |
1617 |
if (!req)
|
1618 |
return -EBUSY;
|
1619 |
|
1620 |
req->treq = treq; |
1621 |
req->flags = flags; |
1622 |
req->op = VHD_OP_DATA_WRITE; |
1623 |
req->next = NULL;
|
1624 |
|
1625 |
if (test_vhd_flag(flags, VHD_FLAG_REQ_UPDATE_BITMAP)) {
|
1626 |
bm = get_bitmap(s, blk); |
1627 |
ASSERT(bm && bitmap_valid(bm)); |
1628 |
lock_bitmap(bm); |
1629 |
|
1630 |
if (bm->tx.closed) {
|
1631 |
add_to_tail(&bm->queue, req); |
1632 |
set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED); |
1633 |
} else
|
1634 |
add_to_transaction(&bm->tx, req); |
1635 |
} else if (sec == 0 && /* first sector inside data block */ |
1636 |
s->vhd.footer.type != HD_TYPE_FIXED && |
1637 |
bat_entry(s, blk) != s->first_db && |
1638 |
test_batmap(s, blk)) |
1639 |
schedule_redundant_bm_write(s, blk); |
1640 |
|
1641 |
aio_write(s, req, offset); |
1642 |
|
1643 |
DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, sec: 0x%04x, " |
1644 |
"nr_secs: 0x%04x, offset: 0x%08"PRIx64", flags: 0x%08x\n", |
1645 |
s->vhd.file, treq.sec, blk, sec, treq.secs, offset, req->flags); |
1646 |
|
1647 |
return 0; |
1648 |
} |
1649 |
|
1650 |
static int |
1651 |
schedule_bitmap_read(struct vhd_state *s, uint32_t blk)
|
1652 |
{ |
1653 |
int err;
|
1654 |
uint64_t offset; |
1655 |
struct vhd_bitmap *bm;
|
1656 |
struct vhd_request *req = NULL; |
1657 |
|
1658 |
ASSERT(vhd_type_dynamic(&s->vhd)); |
1659 |
|
1660 |
offset = bat_entry(s, blk); |
1661 |
|
1662 |
ASSERT(offset != DD_BLK_UNUSED); |
1663 |
ASSERT(!get_bitmap(s, blk)); |
1664 |
|
1665 |
offset = vhd_sectors_to_bytes(offset); |
1666 |
|
1667 |
err = alloc_vhd_bitmap(s, &bm, blk); |
1668 |
if (err)
|
1669 |
return err;
|
1670 |
|
1671 |
req = &bm->req; |
1672 |
init_vhd_request(s, req); |
1673 |
|
1674 |
req->treq.sec = blk * s->spb; |
1675 |
req->treq.secs = s->bm_secs; |
1676 |
req->treq.buf = bm->map; |
1677 |
req->treq.cb = NULL;
|
1678 |
req->op = VHD_OP_BITMAP_READ; |
1679 |
req->next = NULL;
|
1680 |
|
1681 |
aio_read(s, req, offset); |
1682 |
lock_bitmap(bm); |
1683 |
install_bitmap(s, bm); |
1684 |
set_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING); |
1685 |
|
1686 |
DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, nr_secs: 0x%04x, " |
1687 |
"offset: 0x%08"PRIx64"\n", s->vhd.file, req->treq.sec, blk, |
1688 |
req->treq.secs, offset); |
1689 |
|
1690 |
return 0; |
1691 |
} |
1692 |
|
1693 |
static void |
1694 |
schedule_bitmap_write(struct vhd_state *s, uint32_t blk)
|
1695 |
{ |
1696 |
uint64_t offset; |
1697 |
struct vhd_bitmap *bm;
|
1698 |
struct vhd_request *req;
|
1699 |
|
1700 |
bm = get_bitmap(s, blk); |
1701 |
offset = bat_entry(s, blk); |
1702 |
|
1703 |
ASSERT(vhd_type_dynamic(&s->vhd)); |
1704 |
ASSERT(bm && bitmap_valid(bm) && |
1705 |
!test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING)); |
1706 |
|
1707 |
if (offset == DD_BLK_UNUSED) {
|
1708 |
ASSERT(bat_locked(s) && s->bat.pbw_blk == blk); |
1709 |
offset = s->bat.pbw_offset; |
1710 |
} |
1711 |
|
1712 |
offset = vhd_sectors_to_bytes(offset); |
1713 |
|
1714 |
req = &bm->req; |
1715 |
init_vhd_request(s, req); |
1716 |
|
1717 |
req->treq.sec = blk * s->spb; |
1718 |
req->treq.secs = s->bm_secs; |
1719 |
req->treq.buf = bm->shadow; |
1720 |
req->treq.cb = NULL;
|
1721 |
req->op = VHD_OP_BITMAP_WRITE; |
1722 |
req->next = NULL;
|
1723 |
|
1724 |
aio_write(s, req, offset); |
1725 |
lock_bitmap(bm); |
1726 |
touch_bitmap(s, bm); /* bump lru count */
|
1727 |
set_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING); |
1728 |
|
1729 |
DBG(TLOG_DBG, "%s: blk: 0x%04x, sec: 0x%08"PRIx64", nr_secs: 0x%04x, " |
1730 |
"offset: 0x%"PRIx64"\n", s->vhd.file, blk, req->treq.sec, |
1731 |
req->treq.secs, offset); |
1732 |
} |
1733 |
|
1734 |
/*
|
1735 |
* queued requests will be submitted once the bitmap
|
1736 |
* describing them is read and the requests are validated.
|
1737 |
*/
|
1738 |
static int |
1739 |
__vhd_queue_request(struct vhd_state *s, uint8_t op, td_request_t treq)
|
1740 |
{ |
1741 |
uint32_t blk; |
1742 |
struct vhd_bitmap *bm;
|
1743 |
struct vhd_request *req;
|
1744 |
|
1745 |
ASSERT(vhd_type_dynamic(&s->vhd)); |
1746 |
|
1747 |
blk = treq.sec / s->spb; |
1748 |
bm = get_bitmap(s, blk); |
1749 |
|
1750 |
ASSERT(bm && test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING)); |
1751 |
|
1752 |
req = alloc_vhd_request(s); |
1753 |
if (!req)
|
1754 |
return -EBUSY;
|
1755 |
|
1756 |
req->treq = treq; |
1757 |
req->op = op; |
1758 |
req->next = NULL;
|
1759 |
|
1760 |
add_to_tail(&bm->waiting, req); |
1761 |
lock_bitmap(bm); |
1762 |
|
1763 |
DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x nr_secs: 0x%04x, " |
1764 |
"op: %u\n", s->vhd.file, treq.sec, blk, treq.secs, op);
|
1765 |
|
1766 |
TRACE(s); |
1767 |
return 0; |
1768 |
} |
1769 |
|
1770 |
static void |
1771 |
vhd_queue_read(td_driver_t *driver, td_request_t treq) |
1772 |
{ |
1773 |
struct vhd_state *s = (struct vhd_state *)driver->data; |
1774 |
|
1775 |
DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", secs: 0x%04x (seg: %d)\n", |
1776 |
s->vhd.file, treq.sec, treq.secs, treq.sidx); |
1777 |
|
1778 |
while (treq.secs) {
|
1779 |
int err;
|
1780 |
td_request_t clone; |
1781 |
|
1782 |
err = 0;
|
1783 |
clone = treq; |
1784 |
|
1785 |
switch (read_bitmap_cache(s, clone.sec, VHD_OP_DATA_READ)) {
|
1786 |
case -EINVAL:
|
1787 |
err = -EINVAL; |
1788 |
goto fail;
|
1789 |
|
1790 |
case VHD_BM_BAT_CLEAR:
|
1791 |
clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb)); |
1792 |
td_forward_request(clone); |
1793 |
break;
|
1794 |
|
1795 |
case VHD_BM_BIT_CLEAR:
|
1796 |
clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 0);
|
1797 |
td_forward_request(clone); |
1798 |
break;
|
1799 |
|
1800 |
case VHD_BM_BIT_SET:
|
1801 |
clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 1);
|
1802 |
err = schedule_data_read(s, clone, 0);
|
1803 |
if (err)
|
1804 |
goto fail;
|
1805 |
break;
|
1806 |
|
1807 |
case VHD_BM_NOT_CACHED:
|
1808 |
err = schedule_bitmap_read(s, clone.sec / s->spb); |
1809 |
if (err)
|
1810 |
goto fail;
|
1811 |
|
1812 |
clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb)); |
1813 |
err = __vhd_queue_request(s, VHD_OP_DATA_READ, clone); |
1814 |
if (err)
|
1815 |
goto fail;
|
1816 |
break;
|
1817 |
|
1818 |
case VHD_BM_READ_PENDING:
|
1819 |
clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb)); |
1820 |
err = __vhd_queue_request(s, VHD_OP_DATA_READ, clone); |
1821 |
if (err)
|
1822 |
goto fail;
|
1823 |
break;
|
1824 |
|
1825 |
case VHD_BM_BAT_LOCKED:
|
1826 |
default:
|
1827 |
ASSERT(0);
|
1828 |
break;
|
1829 |
} |
1830 |
|
1831 |
treq.sec += clone.secs; |
1832 |
treq.secs -= clone.secs; |
1833 |
treq.buf += vhd_sectors_to_bytes(clone.secs); |
1834 |
continue;
|
1835 |
|
1836 |
fail:
|
1837 |
clone.secs = treq.secs; |
1838 |
td_complete_request(clone, err); |
1839 |
break;
|
1840 |
} |
1841 |
} |
1842 |
|
1843 |
static void |
1844 |
vhd_queue_write(td_driver_t *driver, td_request_t treq) |
1845 |
{ |
1846 |
struct vhd_state *s = (struct vhd_state *)driver->data; |
1847 |
|
1848 |
DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", secs: 0x%04x, (seg: %d)\n", |
1849 |
s->vhd.file, treq.sec, treq.secs, treq.sidx); |
1850 |
|
1851 |
while (treq.secs) {
|
1852 |
int err;
|
1853 |
uint8_t flags; |
1854 |
td_request_t clone; |
1855 |
|
1856 |
err = 0;
|
1857 |
flags = 0;
|
1858 |
clone = treq; |
1859 |
|
1860 |
switch (read_bitmap_cache(s, clone.sec, VHD_OP_DATA_WRITE)) {
|
1861 |
case -EINVAL:
|
1862 |
err = -EINVAL; |
1863 |
goto fail;
|
1864 |
|
1865 |
case VHD_BM_BAT_LOCKED:
|
1866 |
err = -EBUSY; |
1867 |
goto fail;
|
1868 |
|
1869 |
case VHD_BM_BAT_CLEAR:
|
1870 |
flags = (VHD_FLAG_REQ_UPDATE_BAT | |
1871 |
VHD_FLAG_REQ_UPDATE_BITMAP); |
1872 |
clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb)); |
1873 |
err = schedule_data_write(s, clone, flags); |
1874 |
if (err)
|
1875 |
goto fail;
|
1876 |
break;
|
1877 |
|
1878 |
case VHD_BM_BIT_CLEAR:
|
1879 |
flags = VHD_FLAG_REQ_UPDATE_BITMAP; |
1880 |
clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 0);
|
1881 |
err = schedule_data_write(s, clone, flags); |
1882 |
if (err)
|
1883 |
goto fail;
|
1884 |
break;
|
1885 |
|
1886 |
case VHD_BM_BIT_SET:
|
1887 |
clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 1);
|
1888 |
err = schedule_data_write(s, clone, 0);
|
1889 |
if (err)
|
1890 |
goto fail;
|
1891 |
break;
|
1892 |
|
1893 |
case VHD_BM_NOT_CACHED:
|
1894 |
clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb)); |
1895 |
err = schedule_bitmap_read(s, clone.sec / s->spb); |
1896 |
if (err)
|
1897 |
goto fail;
|
1898 |
|
1899 |
err = __vhd_queue_request(s, VHD_OP_DATA_WRITE, clone); |
1900 |
if (err)
|
1901 |
goto fail;
|
1902 |
break;
|
1903 |
|
1904 |
case VHD_BM_READ_PENDING:
|
1905 |
clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb)); |
1906 |
err = __vhd_queue_request(s, VHD_OP_DATA_WRITE, clone); |
1907 |
if (err)
|
1908 |
goto fail;
|
1909 |
break;
|
1910 |
|
1911 |
default:
|
1912 |
ASSERT(0);
|
1913 |
break;
|
1914 |
} |
1915 |
|
1916 |
treq.sec += clone.secs; |
1917 |
treq.secs -= clone.secs; |
1918 |
treq.buf += vhd_sectors_to_bytes(clone.secs); |
1919 |
continue;
|
1920 |
|
1921 |
fail:
|
1922 |
clone.secs = treq.secs; |
1923 |
td_complete_request(clone, err); |
1924 |
break;
|
1925 |
} |
1926 |
} |
1927 |
|
1928 |
static inline void |
1929 |
signal_completion(struct vhd_request *list, int error) |
1930 |
{ |
1931 |
struct vhd_state *s;
|
1932 |
struct vhd_request *r, *next;
|
1933 |
|
1934 |
if (!list)
|
1935 |
return;
|
1936 |
|
1937 |
r = list; |
1938 |
s = list->state; |
1939 |
|
1940 |
while (r) {
|
1941 |
int err;
|
1942 |
|
1943 |
err = (error ? error : r->error); |
1944 |
next = r->next; |
1945 |
td_complete_request(r->treq, err); |
1946 |
DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x%04"PRIx64", " |
1947 |
"err: %d\n", r->treq.sec, r->treq.sec / s->spb, err);
|
1948 |
free_vhd_request(s, r); |
1949 |
r = next; |
1950 |
|
1951 |
s->returned++; |
1952 |
TRACE(s); |
1953 |
} |
1954 |
} |
1955 |
|
1956 |
static void |
1957 |
start_new_bitmap_transaction(struct vhd_state *s, struct vhd_bitmap *bm) |
1958 |
{ |
1959 |
struct vhd_transaction *tx;
|
1960 |
struct vhd_request *r, *next;
|
1961 |
int i;
|
1962 |
|
1963 |
if (!bm->queue.head)
|
1964 |
return;
|
1965 |
|
1966 |
DBG(TLOG_DBG, "blk: 0x%04x\n", bm->blk);
|
1967 |
|
1968 |
r = bm->queue.head; |
1969 |
tx = &bm->tx; |
1970 |
clear_req_list(&bm->queue); |
1971 |
|
1972 |
if (r && bat_entry(s, bm->blk) == DD_BLK_UNUSED)
|
1973 |
tx->error = -EIO; |
1974 |
|
1975 |
while (r) {
|
1976 |
next = r->next; |
1977 |
r->next = NULL;
|
1978 |
clear_vhd_flag(r->flags, VHD_FLAG_REQ_QUEUED); |
1979 |
|
1980 |
add_to_transaction(tx, r); |
1981 |
if (test_vhd_flag(r->flags, VHD_FLAG_REQ_FINISHED)) {
|
1982 |
tx->finished++; |
1983 |
if (!r->error) {
|
1984 |
uint32_t sec = r->treq.sec % s->spb; |
1985 |
for (i = 0; i < r->treq.secs; i++) |
1986 |
vhd_bitmap_set(&s->vhd, |
1987 |
bm->shadow, sec + i); |
1988 |
} |
1989 |
} |
1990 |
r = next; |
1991 |
} |
1992 |
|
1993 |
/* perhaps all the queued writes already completed? */
|
1994 |
if (tx->started && transaction_completed(tx))
|
1995 |
finish_data_transaction(s, bm); |
1996 |
} |
1997 |
|
1998 |
static void |
1999 |
finish_bat_transaction(struct vhd_state *s, struct vhd_bitmap *bm) |
2000 |
{ |
2001 |
struct vhd_transaction *tx = &bm->tx;
|
2002 |
|
2003 |
if (!bat_locked(s))
|
2004 |
return;
|
2005 |
|
2006 |
if (s->bat.pbw_blk != bm->blk)
|
2007 |
return;
|
2008 |
|
2009 |
if (!s->bat.req.error)
|
2010 |
goto release;
|
2011 |
|
2012 |
if (!test_vhd_flag(tx->status, VHD_FLAG_TX_LIVE))
|
2013 |
goto release;
|
2014 |
|
2015 |
tx->closed = 1;
|
2016 |
return;
|
2017 |
|
2018 |
release:
|
2019 |
DBG(TLOG_DBG, "blk: 0x%04x\n", bm->blk);
|
2020 |
unlock_bat(s); |
2021 |
init_bat(s); |
2022 |
} |
2023 |
|
2024 |
static void |
2025 |
finish_bitmap_transaction(struct vhd_state *s,
|
2026 |
struct vhd_bitmap *bm, int error) |
2027 |
{ |
2028 |
int map_size;
|
2029 |
struct vhd_transaction *tx = &bm->tx;
|
2030 |
|
2031 |
DBG(TLOG_DBG, "blk: 0x%04x, err: %d\n", bm->blk, error);
|
2032 |
tx->error = (tx->error ? tx->error : error); |
2033 |
map_size = vhd_sectors_to_bytes(s->bm_secs); |
2034 |
|
2035 |
if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE)) {
|
2036 |
if (test_vhd_flag(tx->status, VHD_FLAG_TX_UPDATE_BAT)) {
|
2037 |
/* still waiting for bat write */
|
2038 |
ASSERT(bm->blk == s->bat.pbw_blk); |
2039 |
ASSERT(test_vhd_flag(s->bat.status, |
2040 |
VHD_FLAG_BAT_WRITE_STARTED)); |
2041 |
s->bat.req.tx = tx; |
2042 |
return;
|
2043 |
} |
2044 |
} |
2045 |
|
2046 |
if (tx->error) {
|
2047 |
/* undo changes to shadow */
|
2048 |
memcpy(bm->shadow, bm->map, map_size); |
2049 |
} else {
|
2050 |
/* complete atomic write */
|
2051 |
memcpy(bm->map, bm->shadow, map_size); |
2052 |
if (!test_batmap(s, bm->blk) && bitmap_full(s, bm))
|
2053 |
set_batmap(s, bm->blk); |
2054 |
} |
2055 |
|
2056 |
/* transaction done; signal completions */
|
2057 |
signal_completion(tx->requests.head, tx->error); |
2058 |
init_tx(tx); |
2059 |
start_new_bitmap_transaction(s, bm); |
2060 |
|
2061 |
if (!bitmap_in_use(bm))
|
2062 |
unlock_bitmap(bm); |
2063 |
|
2064 |
finish_bat_transaction(s, bm); |
2065 |
} |
2066 |
|
2067 |
static void |
2068 |
finish_data_transaction(struct vhd_state *s, struct vhd_bitmap *bm) |
2069 |
{ |
2070 |
struct vhd_transaction *tx = &bm->tx;
|
2071 |
|
2072 |
DBG(TLOG_DBG, "blk: 0x%04x\n", bm->blk);
|
2073 |
|
2074 |
tx->closed = 1;
|
2075 |
|
2076 |
if (!tx->error)
|
2077 |
return schedule_bitmap_write(s, bm->blk);
|
2078 |
|
2079 |
return finish_bitmap_transaction(s, bm, 0); |
2080 |
} |
2081 |
|
2082 |
static void |
2083 |
finish_bat_write(struct vhd_request *req)
|
2084 |
{ |
2085 |
struct vhd_bitmap *bm;
|
2086 |
struct vhd_transaction *tx;
|
2087 |
struct vhd_state *s = req->state;
|
2088 |
|
2089 |
s->returned++; |
2090 |
TRACE(s); |
2091 |
|
2092 |
bm = get_bitmap(s, s->bat.pbw_blk); |
2093 |
|
2094 |
DBG(TLOG_DBG, "blk 0x%04x, pbwo: 0x%08"PRIx64", err %d\n", |
2095 |
s->bat.pbw_blk, s->bat.pbw_offset, req->error); |
2096 |
ASSERT(bm && bitmap_valid(bm)); |
2097 |
ASSERT(bat_locked(s) && |
2098 |
test_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED)); |
2099 |
|
2100 |
tx = &bm->tx; |
2101 |
ASSERT(test_vhd_flag(tx->status, VHD_FLAG_TX_LIVE)); |
2102 |
|
2103 |
if (!req->error) {
|
2104 |
bat_entry(s, s->bat.pbw_blk) = s->bat.pbw_offset; |
2105 |
s->next_db = s->bat.pbw_offset + s->spb + s->bm_secs; |
2106 |
} else
|
2107 |
tx->error = req->error; |
2108 |
|
2109 |
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE)) {
|
2110 |
tx->finished++; |
2111 |
remove_from_req_list(&tx->requests, req); |
2112 |
if (transaction_completed(tx))
|
2113 |
finish_data_transaction(s, bm); |
2114 |
} else {
|
2115 |
clear_vhd_flag(tx->status, VHD_FLAG_TX_UPDATE_BAT); |
2116 |
if (s->bat.req.tx)
|
2117 |
finish_bitmap_transaction(s, bm, req->error); |
2118 |
} |
2119 |
|
2120 |
finish_bat_transaction(s, bm); |
2121 |
} |
2122 |
|
2123 |
static void |
2124 |
finish_zero_bm_write(struct vhd_request *req)
|
2125 |
{ |
2126 |
uint32_t blk; |
2127 |
struct vhd_bitmap *bm;
|
2128 |
struct vhd_transaction *tx = req->tx;
|
2129 |
struct vhd_state *s = req->state;
|
2130 |
|
2131 |
s->returned++; |
2132 |
TRACE(s); |
2133 |
|
2134 |
blk = req->treq.sec / s->spb; |
2135 |
bm = get_bitmap(s, blk); |
2136 |
|
2137 |
DBG(TLOG_DBG, "blk: 0x%04x\n", blk);
|
2138 |
ASSERT(bat_locked(s)); |
2139 |
ASSERT(s->bat.pbw_blk == blk); |
2140 |
ASSERT(bm && bitmap_valid(bm) && bitmap_locked(bm)); |
2141 |
|
2142 |
tx->finished++; |
2143 |
remove_from_req_list(&tx->requests, req); |
2144 |
|
2145 |
if (req->error) {
|
2146 |
unlock_bat(s); |
2147 |
init_bat(s); |
2148 |
tx->error = req->error; |
2149 |
clear_vhd_flag(tx->status, VHD_FLAG_TX_UPDATE_BAT); |
2150 |
} else
|
2151 |
schedule_bat_write(s); |
2152 |
|
2153 |
if (transaction_completed(tx))
|
2154 |
finish_data_transaction(s, bm); |
2155 |
} |
2156 |
|
2157 |
static int |
2158 |
finish_redundant_bm_write(struct vhd_request *req)
|
2159 |
{ |
2160 |
/* uint32_t blk; */
|
2161 |
struct vhd_state *s = (struct vhd_state *) req->state; |
2162 |
|
2163 |
s->returned++; |
2164 |
TRACE(s); |
2165 |
/* blk = req->treq.sec / s->spb;
|
2166 |
DBG(TLOG_DBG, "blk: %u\n", blk); */
|
2167 |
|
2168 |
if (req->error) {
|
2169 |
ERR(s, req->error, "lsec: 0x%08"PRIx64, req->treq.sec);
|
2170 |
} |
2171 |
free_vhd_request(s, req); |
2172 |
s->debug_done_redundant_writes++; |
2173 |
return 0; |
2174 |
} |
2175 |
|
2176 |
|
2177 |
static void |
2178 |
finish_bitmap_read(struct vhd_request *req)
|
2179 |
{ |
2180 |
uint32_t blk; |
2181 |
struct vhd_bitmap *bm;
|
2182 |
struct vhd_request *r, *next;
|
2183 |
struct vhd_state *s = req->state;
|
2184 |
|
2185 |
s->returned++; |
2186 |
TRACE(s); |
2187 |
|
2188 |
blk = req->treq.sec / s->spb; |
2189 |
bm = get_bitmap(s, blk); |
2190 |
|
2191 |
DBG(TLOG_DBG, "blk: 0x%04x\n", blk);
|
2192 |
ASSERT(bm && test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING)); |
2193 |
|
2194 |
r = bm->waiting.head; |
2195 |
clear_req_list(&bm->waiting); |
2196 |
clear_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING); |
2197 |
|
2198 |
if (!req->error) {
|
2199 |
memcpy(bm->shadow, bm->map, vhd_sectors_to_bytes(s->bm_secs)); |
2200 |
|
2201 |
while (r) {
|
2202 |
struct vhd_request tmp;
|
2203 |
|
2204 |
tmp = *r; |
2205 |
next = r->next; |
2206 |
free_vhd_request(s, r); |
2207 |
|
2208 |
ASSERT(tmp.op == VHD_OP_DATA_READ || |
2209 |
tmp.op == VHD_OP_DATA_WRITE); |
2210 |
|
2211 |
if (tmp.op == VHD_OP_DATA_READ)
|
2212 |
vhd_queue_read(s->driver, tmp.treq); |
2213 |
else if (tmp.op == VHD_OP_DATA_WRITE) |
2214 |
vhd_queue_write(s->driver, tmp.treq); |
2215 |
|
2216 |
r = next; |
2217 |
} |
2218 |
} else {
|
2219 |
int err = req->error;
|
2220 |
unlock_bitmap(bm); |
2221 |
free_vhd_bitmap(s, bm); |
2222 |
return signal_completion(r, err);
|
2223 |
} |
2224 |
|
2225 |
if (!bitmap_in_use(bm))
|
2226 |
unlock_bitmap(bm); |
2227 |
} |
2228 |
|
2229 |
static void |
2230 |
finish_bitmap_write(struct vhd_request *req)
|
2231 |
{ |
2232 |
uint32_t blk; |
2233 |
struct vhd_bitmap *bm;
|
2234 |
struct vhd_transaction *tx;
|
2235 |
struct vhd_state *s = req->state;
|
2236 |
|
2237 |
s->returned++; |
2238 |
TRACE(s); |
2239 |
|
2240 |
blk = req->treq.sec / s->spb; |
2241 |
bm = get_bitmap(s, blk); |
2242 |
tx = &bm->tx; |
2243 |
|
2244 |
DBG(TLOG_DBG, "blk: 0x%04x, started: %d, finished: %d\n",
|
2245 |
blk, tx->started, tx->finished); |
2246 |
ASSERT(tx->closed); |
2247 |
ASSERT(bm && bitmap_valid(bm)); |
2248 |
ASSERT(test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING)); |
2249 |
|
2250 |
clear_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING); |
2251 |
|
2252 |
finish_bitmap_transaction(s, bm, req->error); |
2253 |
} |
2254 |
|
2255 |
static void |
2256 |
finish_data_read(struct vhd_request *req)
|
2257 |
{ |
2258 |
struct vhd_state *s = req->state;
|
2259 |
|
2260 |
DBG(TLOG_DBG, "lsec 0x%08"PRIx64", blk: 0x%04"PRIx64"\n", |
2261 |
req->treq.sec, req->treq.sec / s->spb); |
2262 |
signal_completion(req, 0);
|
2263 |
} |
2264 |
|
2265 |
static void |
2266 |
finish_data_write(struct vhd_request *req)
|
2267 |
{ |
2268 |
int i;
|
2269 |
struct vhd_transaction *tx = req->tx;
|
2270 |
struct vhd_state *s = (struct vhd_state *)req->state; |
2271 |
|
2272 |
set_vhd_flag(req->flags, VHD_FLAG_REQ_FINISHED); |
2273 |
|
2274 |
if (tx) {
|
2275 |
uint32_t blk, sec; |
2276 |
struct vhd_bitmap *bm;
|
2277 |
|
2278 |
blk = req->treq.sec / s->spb; |
2279 |
sec = req->treq.sec % s->spb; |
2280 |
bm = get_bitmap(s, blk); |
2281 |
|
2282 |
ASSERT(bm && bitmap_valid(bm) && bitmap_locked(bm)); |
2283 |
|
2284 |
tx->finished++; |
2285 |
|
2286 |
DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x04%"PRIx64", " |
2287 |
"tx->started: %d, tx->finished: %d\n", req->treq.sec,
|
2288 |
req->treq.sec / s->spb, tx->started, tx->finished); |
2289 |
|
2290 |
if (!req->error)
|
2291 |
for (i = 0; i < req->treq.secs; i++) |
2292 |
vhd_bitmap_set(&s->vhd, bm->shadow, sec + i); |
2293 |
|
2294 |
if (transaction_completed(tx))
|
2295 |
finish_data_transaction(s, bm); |
2296 |
|
2297 |
} else if (!test_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED)) { |
2298 |
ASSERT(!req->next); |
2299 |
DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x%04"PRIx64"\n", |
2300 |
req->treq.sec, req->treq.sec / s->spb); |
2301 |
signal_completion(req, 0);
|
2302 |
} |
2303 |
} |
2304 |
|
2305 |
void
|
2306 |
vhd_complete(void *arg, struct tiocb *tiocb, int err) |
2307 |
{ |
2308 |
struct vhd_request *req = (struct vhd_request *)arg; |
2309 |
struct vhd_state *s = req->state;
|
2310 |
struct iocb *io = &tiocb->iocb;
|
2311 |
|
2312 |
s->completed++; |
2313 |
TRACE(s); |
2314 |
|
2315 |
req->error = err; |
2316 |
|
2317 |
if (req->error)
|
2318 |
ERR(s, req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, " |
2319 |
"nbytes: %lu, blk: %"PRIu64", blk_offset: %u", |
2320 |
s->vhd.file, req->op, req->treq.sec, req->treq.secs, |
2321 |
io->u.c.nbytes, req->treq.sec / s->spb, |
2322 |
bat_entry(s, req->treq.sec / s->spb)); |
2323 |
|
2324 |
switch (req->op) {
|
2325 |
case VHD_OP_DATA_READ:
|
2326 |
finish_data_read(req); |
2327 |
break;
|
2328 |
|
2329 |
case VHD_OP_DATA_WRITE:
|
2330 |
finish_data_write(req); |
2331 |
break;
|
2332 |
|
2333 |
case VHD_OP_BITMAP_READ:
|
2334 |
finish_bitmap_read(req); |
2335 |
break;
|
2336 |
|
2337 |
case VHD_OP_BITMAP_WRITE:
|
2338 |
finish_bitmap_write(req); |
2339 |
break;
|
2340 |
|
2341 |
case VHD_OP_ZERO_BM_WRITE:
|
2342 |
finish_zero_bm_write(req); |
2343 |
break;
|
2344 |
|
2345 |
case VHD_OP_REDUNDANT_BM_WRITE:
|
2346 |
finish_redundant_bm_write(req); |
2347 |
break;
|
2348 |
|
2349 |
case VHD_OP_BAT_WRITE:
|
2350 |
finish_bat_write(req); |
2351 |
break;
|
2352 |
|
2353 |
default:
|
2354 |
ASSERT(0);
|
2355 |
break;
|
2356 |
} |
2357 |
} |
2358 |
|
2359 |
void
|
2360 |
vhd_debug(td_driver_t *driver) |
2361 |
{ |
2362 |
int i;
|
2363 |
struct vhd_state *s = (struct vhd_state *)driver->data; |
2364 |
|
2365 |
DBG(TLOG_WARN, "%s: QUEUED: 0x%08"PRIx64", COMPLETED: 0x%08"PRIx64", " |
2366 |
"RETURNED: 0x%08"PRIx64"\n", s->vhd.file, s->queued, s->completed, |
2367 |
s->returned); |
2368 |
DBG(TLOG_WARN, "WRITES: 0x%08"PRIx64", AVG_WRITE_SIZE: %f\n", |
2369 |
s->writes, (s->writes ? ((float)s->write_size / s->writes) : 0.0)); |
2370 |
DBG(TLOG_WARN, "READS: 0x%08"PRIx64", AVG_READ_SIZE: %f\n", |
2371 |
s->reads, (s->reads ? ((float)s->read_size / s->reads) : 0.0)); |
2372 |
|
2373 |
DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%u total)\n", VHD_REQS_DATA);
|
2374 |
for (i = 0; i < VHD_REQS_DATA; i++) { |
2375 |
struct vhd_request *r = &s->vreq_list[i];
|
2376 |
td_request_t *t = &r->treq; |
2377 |
const char *vname = t->vreq ? t->vreq->name: NULL; |
2378 |
if (t->secs)
|
2379 |
DBG(TLOG_WARN, "%d: vreq: %s.%d, err: %d, op: %d,"
|
2380 |
" lsec: 0x%08"PRIx64", flags: %d, this: %p, " |
2381 |
"next: %p, tx: %p\n", i, vname, t->sidx, r->error, r->op,
|
2382 |
t->sec, r->flags, r, r->next, r->tx); |
2383 |
} |
2384 |
|
2385 |
DBG(TLOG_WARN, "BITMAP CACHE:\n");
|
2386 |
for (i = 0; i < VHD_CACHE_SIZE; i++) { |
2387 |
int qnum = 0, wnum = 0, rnum = 0; |
2388 |
struct vhd_bitmap *bm = s->bitmap[i];
|
2389 |
struct vhd_transaction *tx;
|
2390 |
struct vhd_request *r;
|
2391 |
|
2392 |
if (!bm)
|
2393 |
continue;
|
2394 |
|
2395 |
tx = &bm->tx; |
2396 |
r = bm->queue.head; |
2397 |
while (r) {
|
2398 |
qnum++; |
2399 |
r = r->next; |
2400 |
} |
2401 |
|
2402 |
r = bm->waiting.head; |
2403 |
while (r) {
|
2404 |
wnum++; |
2405 |
r = r->next; |
2406 |
} |
2407 |
|
2408 |
r = tx->requests.head; |
2409 |
while (r) {
|
2410 |
rnum++; |
2411 |
r = r->next; |
2412 |
} |
2413 |
|
2414 |
DBG(TLOG_WARN, "%d: blk: 0x%04x, status: 0x%08x, q: %p, qnum: %d, w: %p, "
|
2415 |
"wnum: %d, locked: %d, in use: %d, tx: %p, tx_error: %d, "
|
2416 |
"started: %d, finished: %d, status: %u, reqs: %p, nreqs: %d\n",
|
2417 |
i, bm->blk, bm->status, bm->queue.head, qnum, bm->waiting.head, |
2418 |
wnum, bitmap_locked(bm), bitmap_in_use(bm), tx, tx->error, |
2419 |
tx->started, tx->finished, tx->status, tx->requests.head, rnum); |
2420 |
} |
2421 |
|
2422 |
DBG(TLOG_WARN, "BAT: status: 0x%08x, pbw_blk: 0x%04x, "
|
2423 |
"pbw_off: 0x%08"PRIx64", tx: %p\n", s->bat.status, s->bat.pbw_blk, |
2424 |
s->bat.pbw_offset, s->bat.req.tx); |
2425 |
|
2426 |
/*
|
2427 |
for (i = 0; i < s->hdr.max_bat_size; i++)
|
2428 |
DPRINTF("%d: %u\n", i, s->bat.bat[i]);
|
2429 |
*/
|
2430 |
} |
2431 |
|
2432 |
struct tap_disk tapdisk_vhd = {
|
2433 |
.disk_type = "tapdisk_vhd",
|
2434 |
.flags = 0,
|
2435 |
.private_data_size = sizeof(struct vhd_state), |
2436 |
.td_open = _vhd_open, |
2437 |
.td_close = _vhd_close, |
2438 |
.td_queue_read = vhd_queue_read, |
2439 |
.td_queue_write = vhd_queue_write, |
2440 |
.td_get_parent_id = vhd_get_parent_id, |
2441 |
.td_validate_parent = vhd_validate_parent, |
2442 |
.td_debug = vhd_debug, |
2443 |
}; |