Statistics
| Branch: | Revision:

root / block / vmdk.c @ 89ac8480

History | View | Annotate | Download (63.5 kB)

1
/*
2
 * Block driver for the VMDK format
3
 *
4
 * Copyright (c) 2004 Fabrice Bellard
5
 * Copyright (c) 2005 Filip Navara
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy
8
 * of this software and associated documentation files (the "Software"), to deal
9
 * in the Software without restriction, including without limitation the rights
10
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the Software is
12
 * furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
 * THE SOFTWARE.
24
 */
25

    
26
#include "qemu-common.h"
27
#include "block/block_int.h"
28
#include "qemu/module.h"
29
#include "migration/migration.h"
30
#include <zlib.h>
31

    
32
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
33
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
34
#define VMDK4_COMPRESSION_DEFLATE 1
35
#define VMDK4_FLAG_NL_DETECT (1 << 0)
36
#define VMDK4_FLAG_RGD (1 << 1)
37
/* Zeroed-grain enable bit */
38
#define VMDK4_FLAG_ZERO_GRAIN   (1 << 2)
39
#define VMDK4_FLAG_COMPRESS (1 << 16)
40
#define VMDK4_FLAG_MARKER (1 << 17)
41
#define VMDK4_GD_AT_END 0xffffffffffffffffULL
42

    
43
#define VMDK_GTE_ZEROED 0x1
44

    
45
/* VMDK internal error codes */
46
#define VMDK_OK      0
47
#define VMDK_ERROR   (-1)
48
/* Cluster not allocated */
49
#define VMDK_UNALLOC (-2)
50
#define VMDK_ZEROED  (-3)
51

    
52
#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
53

    
54
typedef struct {
55
    uint32_t version;
56
    uint32_t flags;
57
    uint32_t disk_sectors;
58
    uint32_t granularity;
59
    uint32_t l1dir_offset;
60
    uint32_t l1dir_size;
61
    uint32_t file_sectors;
62
    uint32_t cylinders;
63
    uint32_t heads;
64
    uint32_t sectors_per_track;
65
} QEMU_PACKED VMDK3Header;
66

    
67
typedef struct {
68
    uint32_t version;
69
    uint32_t flags;
70
    uint64_t capacity;
71
    uint64_t granularity;
72
    uint64_t desc_offset;
73
    uint64_t desc_size;
74
    /* Number of GrainTableEntries per GrainTable */
75
    uint32_t num_gtes_per_gt;
76
    uint64_t rgd_offset;
77
    uint64_t gd_offset;
78
    uint64_t grain_offset;
79
    char filler[1];
80
    char check_bytes[4];
81
    uint16_t compressAlgorithm;
82
} QEMU_PACKED VMDK4Header;
83

    
84
#define L2_CACHE_SIZE 16
85

    
86
typedef struct VmdkExtent {
87
    BlockDriverState *file;
88
    bool flat;
89
    bool compressed;
90
    bool has_marker;
91
    bool has_zero_grain;
92
    int version;
93
    int64_t sectors;
94
    int64_t end_sector;
95
    int64_t flat_start_offset;
96
    int64_t l1_table_offset;
97
    int64_t l1_backup_table_offset;
98
    uint32_t *l1_table;
99
    uint32_t *l1_backup_table;
100
    unsigned int l1_size;
101
    uint32_t l1_entry_sectors;
102

    
103
    unsigned int l2_size;
104
    uint32_t *l2_cache;
105
    uint32_t l2_cache_offsets[L2_CACHE_SIZE];
106
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
107

    
108
    int64_t cluster_sectors;
109
    char *type;
110
} VmdkExtent;
111

    
112
typedef struct BDRVVmdkState {
113
    CoMutex lock;
114
    uint64_t desc_offset;
115
    bool cid_updated;
116
    bool cid_checked;
117
    uint32_t cid;
118
    uint32_t parent_cid;
119
    int num_extents;
120
    /* Extent array with num_extents entries, ascend ordered by address */
121
    VmdkExtent *extents;
122
    Error *migration_blocker;
123
    char *create_type;
124
} BDRVVmdkState;
125

    
126
typedef struct VmdkMetaData {
127
    uint32_t offset;
128
    unsigned int l1_index;
129
    unsigned int l2_index;
130
    unsigned int l2_offset;
131
    int valid;
132
    uint32_t *l2_cache_entry;
133
} VmdkMetaData;
134

    
135
typedef struct VmdkGrainMarker {
136
    uint64_t lba;
137
    uint32_t size;
138
    uint8_t  data[0];
139
} QEMU_PACKED VmdkGrainMarker;
140

    
141
enum {
142
    MARKER_END_OF_STREAM    = 0,
143
    MARKER_GRAIN_TABLE      = 1,
144
    MARKER_GRAIN_DIRECTORY  = 2,
145
    MARKER_FOOTER           = 3,
146
};
147

    
148
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
149
{
150
    uint32_t magic;
151

    
152
    if (buf_size < 4) {
153
        return 0;
154
    }
155
    magic = be32_to_cpu(*(uint32_t *)buf);
156
    if (magic == VMDK3_MAGIC ||
157
        magic == VMDK4_MAGIC) {
158
        return 100;
159
    } else {
160
        const char *p = (const char *)buf;
161
        const char *end = p + buf_size;
162
        while (p < end) {
163
            if (*p == '#') {
164
                /* skip comment line */
165
                while (p < end && *p != '\n') {
166
                    p++;
167
                }
168
                p++;
169
                continue;
170
            }
171
            if (*p == ' ') {
172
                while (p < end && *p == ' ') {
173
                    p++;
174
                }
175
                /* skip '\r' if windows line endings used. */
176
                if (p < end && *p == '\r') {
177
                    p++;
178
                }
179
                /* only accept blank lines before 'version=' line */
180
                if (p == end || *p != '\n') {
181
                    return 0;
182
                }
183
                p++;
184
                continue;
185
            }
186
            if (end - p >= strlen("version=X\n")) {
187
                if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
188
                    strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
189
                    return 100;
190
                }
191
            }
192
            if (end - p >= strlen("version=X\r\n")) {
193
                if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
194
                    strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
195
                    return 100;
196
                }
197
            }
198
            return 0;
199
        }
200
        return 0;
201
    }
202
}
203

    
204
#define SECTOR_SIZE 512
205
#define DESC_SIZE (20 * SECTOR_SIZE)    /* 20 sectors of 512 bytes each */
206
#define BUF_SIZE 4096
207
#define HEADER_SIZE 512                 /* first sector of 512 bytes */
208

    
209
static void vmdk_free_extents(BlockDriverState *bs)
210
{
211
    int i;
212
    BDRVVmdkState *s = bs->opaque;
213
    VmdkExtent *e;
214

    
215
    for (i = 0; i < s->num_extents; i++) {
216
        e = &s->extents[i];
217
        g_free(e->l1_table);
218
        g_free(e->l2_cache);
219
        g_free(e->l1_backup_table);
220
        g_free(e->type);
221
        if (e->file != bs->file) {
222
            bdrv_unref(e->file);
223
        }
224
    }
225
    g_free(s->extents);
226
}
227

    
228
static void vmdk_free_last_extent(BlockDriverState *bs)
229
{
230
    BDRVVmdkState *s = bs->opaque;
231

    
232
    if (s->num_extents == 0) {
233
        return;
234
    }
235
    s->num_extents--;
236
    s->extents = g_realloc(s->extents, s->num_extents * sizeof(VmdkExtent));
237
}
238

    
239
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
240
{
241
    char desc[DESC_SIZE];
242
    uint32_t cid = 0xffffffff;
243
    const char *p_name, *cid_str;
244
    size_t cid_str_size;
245
    BDRVVmdkState *s = bs->opaque;
246
    int ret;
247

    
248
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
249
    if (ret < 0) {
250
        return 0;
251
    }
252

    
253
    if (parent) {
254
        cid_str = "parentCID";
255
        cid_str_size = sizeof("parentCID");
256
    } else {
257
        cid_str = "CID";
258
        cid_str_size = sizeof("CID");
259
    }
260

    
261
    desc[DESC_SIZE - 1] = '\0';
262
    p_name = strstr(desc, cid_str);
263
    if (p_name != NULL) {
264
        p_name += cid_str_size;
265
        sscanf(p_name, "%x", &cid);
266
    }
267

    
268
    return cid;
269
}
270

    
271
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
272
{
273
    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
274
    char *p_name, *tmp_str;
275
    BDRVVmdkState *s = bs->opaque;
276
    int ret;
277

    
278
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
279
    if (ret < 0) {
280
        return ret;
281
    }
282

    
283
    desc[DESC_SIZE - 1] = '\0';
284
    tmp_str = strstr(desc, "parentCID");
285
    if (tmp_str == NULL) {
286
        return -EINVAL;
287
    }
288

    
289
    pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
290
    p_name = strstr(desc, "CID");
291
    if (p_name != NULL) {
292
        p_name += sizeof("CID");
293
        snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
294
        pstrcat(desc, sizeof(desc), tmp_desc);
295
    }
296

    
297
    ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
298
    if (ret < 0) {
299
        return ret;
300
    }
301

    
302
    return 0;
303
}
304

    
305
static int vmdk_is_cid_valid(BlockDriverState *bs)
306
{
307
    BDRVVmdkState *s = bs->opaque;
308
    BlockDriverState *p_bs = bs->backing_hd;
309
    uint32_t cur_pcid;
310

    
311
    if (!s->cid_checked && p_bs) {
312
        cur_pcid = vmdk_read_cid(p_bs, 0);
313
        if (s->parent_cid != cur_pcid) {
314
            /* CID not valid */
315
            return 0;
316
        }
317
    }
318
    s->cid_checked = true;
319
    /* CID valid */
320
    return 1;
321
}
322

    
323
/* Queue extents, if any, for reopen() */
324
static int vmdk_reopen_prepare(BDRVReopenState *state,
325
                               BlockReopenQueue *queue, Error **errp)
326
{
327
    BDRVVmdkState *s;
328
    int ret = -1;
329
    int i;
330
    VmdkExtent *e;
331

    
332
    assert(state != NULL);
333
    assert(state->bs != NULL);
334

    
335
    if (queue == NULL) {
336
        error_setg(errp, "No reopen queue for VMDK extents");
337
        goto exit;
338
    }
339

    
340
    s = state->bs->opaque;
341

    
342
    assert(s != NULL);
343

    
344
    for (i = 0; i < s->num_extents; i++) {
345
        e = &s->extents[i];
346
        if (e->file != state->bs->file) {
347
            bdrv_reopen_queue(queue, e->file, state->flags);
348
        }
349
    }
350
    ret = 0;
351

    
352
exit:
353
    return ret;
354
}
355

    
356
static int vmdk_parent_open(BlockDriverState *bs)
357
{
358
    char *p_name;
359
    char desc[DESC_SIZE + 1];
360
    BDRVVmdkState *s = bs->opaque;
361
    int ret;
362

    
363
    desc[DESC_SIZE] = '\0';
364
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
365
    if (ret < 0) {
366
        return ret;
367
    }
368

    
369
    p_name = strstr(desc, "parentFileNameHint");
370
    if (p_name != NULL) {
371
        char *end_name;
372

    
373
        p_name += sizeof("parentFileNameHint") + 1;
374
        end_name = strchr(p_name, '\"');
375
        if (end_name == NULL) {
376
            return -EINVAL;
377
        }
378
        if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
379
            return -EINVAL;
380
        }
381

    
382
        pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
383
    }
384

    
385
    return 0;
386
}
387

    
388
/* Create and append extent to the extent array. Return the added VmdkExtent
389
 * address. return NULL if allocation failed. */
390
static int vmdk_add_extent(BlockDriverState *bs,
391
                           BlockDriverState *file, bool flat, int64_t sectors,
392
                           int64_t l1_offset, int64_t l1_backup_offset,
393
                           uint32_t l1_size,
394
                           int l2_size, uint64_t cluster_sectors,
395
                           VmdkExtent **new_extent,
396
                           Error **errp)
397
{
398
    VmdkExtent *extent;
399
    BDRVVmdkState *s = bs->opaque;
400

    
401
    if (cluster_sectors > 0x200000) {
402
        /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
403
        error_setg(errp, "Invalid granularity, image may be corrupt");
404
        return -EFBIG;
405
    }
406
    if (l1_size > 512 * 1024 * 1024) {
407
        /* Although with big capacity and small l1_entry_sectors, we can get a
408
         * big l1_size, we don't want unbounded value to allocate the table.
409
         * Limit it to 512M, which is 16PB for default cluster and L2 table
410
         * size */
411
        error_setg(errp, "L1 size too big");
412
        return -EFBIG;
413
    }
414

    
415
    s->extents = g_realloc(s->extents,
416
                              (s->num_extents + 1) * sizeof(VmdkExtent));
417
    extent = &s->extents[s->num_extents];
418
    s->num_extents++;
419

    
420
    memset(extent, 0, sizeof(VmdkExtent));
421
    extent->file = file;
422
    extent->flat = flat;
423
    extent->sectors = sectors;
424
    extent->l1_table_offset = l1_offset;
425
    extent->l1_backup_table_offset = l1_backup_offset;
426
    extent->l1_size = l1_size;
427
    extent->l1_entry_sectors = l2_size * cluster_sectors;
428
    extent->l2_size = l2_size;
429
    extent->cluster_sectors = flat ? sectors : cluster_sectors;
430

    
431
    if (s->num_extents > 1) {
432
        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
433
    } else {
434
        extent->end_sector = extent->sectors;
435
    }
436
    bs->total_sectors = extent->end_sector;
437
    if (new_extent) {
438
        *new_extent = extent;
439
    }
440
    return 0;
441
}
442

    
443
static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
444
                            Error **errp)
445
{
446
    int ret;
447
    int l1_size, i;
448

    
449
    /* read the L1 table */
450
    l1_size = extent->l1_size * sizeof(uint32_t);
451
    extent->l1_table = g_malloc(l1_size);
452
    ret = bdrv_pread(extent->file,
453
                     extent->l1_table_offset,
454
                     extent->l1_table,
455
                     l1_size);
456
    if (ret < 0) {
457
        error_setg_errno(errp, -ret,
458
                         "Could not read l1 table from extent '%s'",
459
                         extent->file->filename);
460
        goto fail_l1;
461
    }
462
    for (i = 0; i < extent->l1_size; i++) {
463
        le32_to_cpus(&extent->l1_table[i]);
464
    }
465

    
466
    if (extent->l1_backup_table_offset) {
467
        extent->l1_backup_table = g_malloc(l1_size);
468
        ret = bdrv_pread(extent->file,
469
                         extent->l1_backup_table_offset,
470
                         extent->l1_backup_table,
471
                         l1_size);
472
        if (ret < 0) {
473
            error_setg_errno(errp, -ret,
474
                             "Could not read l1 backup table from extent '%s'",
475
                             extent->file->filename);
476
            goto fail_l1b;
477
        }
478
        for (i = 0; i < extent->l1_size; i++) {
479
            le32_to_cpus(&extent->l1_backup_table[i]);
480
        }
481
    }
482

    
483
    extent->l2_cache =
484
        g_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
485
    return 0;
486
 fail_l1b:
487
    g_free(extent->l1_backup_table);
488
 fail_l1:
489
    g_free(extent->l1_table);
490
    return ret;
491
}
492

    
493
static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
494
                                 BlockDriverState *file,
495
                                 int flags, Error **errp)
496
{
497
    int ret;
498
    uint32_t magic;
499
    VMDK3Header header;
500
    VmdkExtent *extent;
501

    
502
    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
503
    if (ret < 0) {
504
        error_setg_errno(errp, -ret,
505
                         "Could not read header from file '%s'",
506
                         file->filename);
507
        return ret;
508
    }
509
    ret = vmdk_add_extent(bs, file, false,
510
                          le32_to_cpu(header.disk_sectors),
511
                          le32_to_cpu(header.l1dir_offset) << 9,
512
                          0,
513
                          le32_to_cpu(header.l1dir_size),
514
                          4096,
515
                          le32_to_cpu(header.granularity),
516
                          &extent,
517
                          errp);
518
    if (ret < 0) {
519
        return ret;
520
    }
521
    ret = vmdk_init_tables(bs, extent, errp);
522
    if (ret) {
523
        /* free extent allocated by vmdk_add_extent */
524
        vmdk_free_last_extent(bs);
525
    }
526
    return ret;
527
}
528

    
529
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
530
                               Error **errp);
531

    
532
static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
533
                            Error **errp)
534
{
535
    int64_t size;
536
    char *buf;
537
    int ret;
538

    
539
    size = bdrv_getlength(file);
540
    if (size < 0) {
541
        error_setg_errno(errp, -size, "Could not access file");
542
        return NULL;
543
    }
544

    
545
    size = MIN(size, 1 << 20);  /* avoid unbounded allocation */
546
    buf = g_malloc0(size + 1);
547

    
548
    ret = bdrv_pread(file, desc_offset, buf, size);
549
    if (ret < 0) {
550
        error_setg_errno(errp, -ret, "Could not read from file");
551
        g_free(buf);
552
        return NULL;
553
    }
554

    
555
    return buf;
556
}
557

    
558
static int vmdk_open_vmdk4(BlockDriverState *bs,
559
                           BlockDriverState *file,
560
                           int flags, Error **errp)
561
{
562
    int ret;
563
    uint32_t magic;
564
    uint32_t l1_size, l1_entry_sectors;
565
    VMDK4Header header;
566
    VmdkExtent *extent;
567
    BDRVVmdkState *s = bs->opaque;
568
    int64_t l1_backup_offset = 0;
569

    
570
    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
571
    if (ret < 0) {
572
        error_setg_errno(errp, -ret,
573
                         "Could not read header from file '%s'",
574
                         file->filename);
575
        return -EINVAL;
576
    }
577
    if (header.capacity == 0) {
578
        uint64_t desc_offset = le64_to_cpu(header.desc_offset);
579
        if (desc_offset) {
580
            char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
581
            if (!buf) {
582
                return -EINVAL;
583
            }
584
            ret = vmdk_open_desc_file(bs, flags, buf, errp);
585
            g_free(buf);
586
            return ret;
587
        }
588
    }
589

    
590
    if (!s->create_type) {
591
        s->create_type = g_strdup("monolithicSparse");
592
    }
593

    
594
    if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
595
        /*
596
         * The footer takes precedence over the header, so read it in. The
597
         * footer starts at offset -1024 from the end: One sector for the
598
         * footer, and another one for the end-of-stream marker.
599
         */
600
        struct {
601
            struct {
602
                uint64_t val;
603
                uint32_t size;
604
                uint32_t type;
605
                uint8_t pad[512 - 16];
606
            } QEMU_PACKED footer_marker;
607

    
608
            uint32_t magic;
609
            VMDK4Header header;
610
            uint8_t pad[512 - 4 - sizeof(VMDK4Header)];
611

    
612
            struct {
613
                uint64_t val;
614
                uint32_t size;
615
                uint32_t type;
616
                uint8_t pad[512 - 16];
617
            } QEMU_PACKED eos_marker;
618
        } QEMU_PACKED footer;
619

    
620
        ret = bdrv_pread(file,
621
            bs->file->total_sectors * 512 - 1536,
622
            &footer, sizeof(footer));
623
        if (ret < 0) {
624
            return ret;
625
        }
626

    
627
        /* Some sanity checks for the footer */
628
        if (be32_to_cpu(footer.magic) != VMDK4_MAGIC ||
629
            le32_to_cpu(footer.footer_marker.size) != 0  ||
630
            le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER ||
631
            le64_to_cpu(footer.eos_marker.val) != 0  ||
632
            le32_to_cpu(footer.eos_marker.size) != 0  ||
633
            le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
634
        {
635
            return -EINVAL;
636
        }
637

    
638
        header = footer.header;
639
    }
640

    
641
    if (le32_to_cpu(header.version) > 3) {
642
        char buf[64];
643
        snprintf(buf, sizeof(buf), "VMDK version %d",
644
                 le32_to_cpu(header.version));
645
        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
646
                  bs->device_name, "vmdk", buf);
647
        return -ENOTSUP;
648
    } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
649
        /* VMware KB 2064959 explains that version 3 added support for
650
         * persistent changed block tracking (CBT), and backup software can
651
         * read it as version=1 if it doesn't care about the changed area
652
         * information. So we are safe to enable read only. */
653
        error_setg(errp, "VMDK version 3 must be read only");
654
        return -EINVAL;
655
    }
656

    
657
    if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
658
        error_setg(errp, "L2 table size too big");
659
        return -EINVAL;
660
    }
661

    
662
    l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
663
                        * le64_to_cpu(header.granularity);
664
    if (l1_entry_sectors == 0) {
665
        return -EINVAL;
666
    }
667
    l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
668
                / l1_entry_sectors;
669
    if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
670
        l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
671
    }
672
    if (bdrv_getlength(file) <
673
            le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
674
        error_setg(errp, "File truncated, expecting at least %lld bytes",
675
                   le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE);
676
        return -EINVAL;
677
    }
678

    
679
    ret = vmdk_add_extent(bs, file, false,
680
                          le64_to_cpu(header.capacity),
681
                          le64_to_cpu(header.gd_offset) << 9,
682
                          l1_backup_offset,
683
                          l1_size,
684
                          le32_to_cpu(header.num_gtes_per_gt),
685
                          le64_to_cpu(header.granularity),
686
                          &extent,
687
                          errp);
688
    if (ret < 0) {
689
        return ret;
690
    }
691
    extent->compressed =
692
        le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
693
    if (extent->compressed) {
694
        g_free(s->create_type);
695
        s->create_type = g_strdup("streamOptimized");
696
    }
697
    extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
698
    extent->version = le32_to_cpu(header.version);
699
    extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
700
    ret = vmdk_init_tables(bs, extent, errp);
701
    if (ret) {
702
        /* free extent allocated by vmdk_add_extent */
703
        vmdk_free_last_extent(bs);
704
    }
705
    return ret;
706
}
707

    
708
/* find an option value out of descriptor file */
709
static int vmdk_parse_description(const char *desc, const char *opt_name,
710
        char *buf, int buf_size)
711
{
712
    char *opt_pos, *opt_end;
713
    const char *end = desc + strlen(desc);
714

    
715
    opt_pos = strstr(desc, opt_name);
716
    if (!opt_pos) {
717
        return VMDK_ERROR;
718
    }
719
    /* Skip "=\"" following opt_name */
720
    opt_pos += strlen(opt_name) + 2;
721
    if (opt_pos >= end) {
722
        return VMDK_ERROR;
723
    }
724
    opt_end = opt_pos;
725
    while (opt_end < end && *opt_end != '"') {
726
        opt_end++;
727
    }
728
    if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
729
        return VMDK_ERROR;
730
    }
731
    pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
732
    return VMDK_OK;
733
}
734

    
735
/* Open an extent file and append to bs array */
736
static int vmdk_open_sparse(BlockDriverState *bs,
737
                            BlockDriverState *file, int flags,
738
                            char *buf, Error **errp)
739
{
740
    uint32_t magic;
741

    
742
    magic = ldl_be_p(buf);
743
    switch (magic) {
744
        case VMDK3_MAGIC:
745
            return vmdk_open_vmfs_sparse(bs, file, flags, errp);
746
            break;
747
        case VMDK4_MAGIC:
748
            return vmdk_open_vmdk4(bs, file, flags, errp);
749
            break;
750
        default:
751
            return -EMEDIUMTYPE;
752
            break;
753
    }
754
}
755

    
756
static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
757
                              const char *desc_file_path, Error **errp)
758
{
759
    int ret;
760
    char access[11];
761
    char type[11];
762
    char fname[512];
763
    const char *p = desc;
764
    int64_t sectors = 0;
765
    int64_t flat_offset;
766
    char extent_path[PATH_MAX];
767
    BlockDriverState *extent_file;
768
    BDRVVmdkState *s = bs->opaque;
769
    VmdkExtent *extent;
770

    
771
    while (*p) {
772
        /* parse extent line:
773
         * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
774
         * or
775
         * RW [size in sectors] SPARSE "file-name.vmdk"
776
         */
777
        flat_offset = -1;
778
        ret = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
779
                access, &sectors, type, fname, &flat_offset);
780
        if (ret < 4 || strcmp(access, "RW")) {
781
            goto next_line;
782
        } else if (!strcmp(type, "FLAT")) {
783
            if (ret != 5 || flat_offset < 0) {
784
                error_setg(errp, "Invalid extent lines: \n%s", p);
785
                return -EINVAL;
786
            }
787
        } else if (!strcmp(type, "VMFS")) {
788
            if (ret == 4) {
789
                flat_offset = 0;
790
            } else {
791
                error_setg(errp, "Invalid extent lines:\n%s", p);
792
                return -EINVAL;
793
            }
794
        } else if (ret != 4) {
795
            error_setg(errp, "Invalid extent lines:\n%s", p);
796
            return -EINVAL;
797
        }
798

    
799
        if (sectors <= 0 ||
800
            (strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
801
             strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
802
            (strcmp(access, "RW"))) {
803
            goto next_line;
804
        }
805

    
806
        path_combine(extent_path, sizeof(extent_path),
807
                desc_file_path, fname);
808
        extent_file = NULL;
809
        ret = bdrv_open(&extent_file, extent_path, NULL, NULL,
810
                        bs->open_flags | BDRV_O_PROTOCOL, NULL, errp);
811
        if (ret) {
812
            return ret;
813
        }
814

    
815
        /* save to extents array */
816
        if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) {
817
            /* FLAT extent */
818

    
819
            ret = vmdk_add_extent(bs, extent_file, true, sectors,
820
                            0, 0, 0, 0, 0, &extent, errp);
821
            if (ret < 0) {
822
                return ret;
823
            }
824
            extent->flat_start_offset = flat_offset << 9;
825
        } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
826
            /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
827
            char *buf = vmdk_read_desc(extent_file, 0, errp);
828
            if (!buf) {
829
                ret = -EINVAL;
830
            } else {
831
                ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, errp);
832
            }
833
            if (ret) {
834
                g_free(buf);
835
                bdrv_unref(extent_file);
836
                return ret;
837
            }
838
            extent = &s->extents[s->num_extents - 1];
839
        } else {
840
            error_setg(errp, "Unsupported extent type '%s'", type);
841
            return -ENOTSUP;
842
        }
843
        extent->type = g_strdup(type);
844
next_line:
845
        /* move to next line */
846
        while (*p) {
847
            if (*p == '\n') {
848
                p++;
849
                break;
850
            }
851
            p++;
852
        }
853
    }
854
    return 0;
855
}
856

    
857
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
858
                               Error **errp)
859
{
860
    int ret;
861
    char ct[128];
862
    BDRVVmdkState *s = bs->opaque;
863

    
864
    if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
865
        ret = -EMEDIUMTYPE;
866
        goto exit;
867
    }
868
    if (strcmp(ct, "monolithicFlat") &&
869
        strcmp(ct, "vmfs") &&
870
        strcmp(ct, "vmfsSparse") &&
871
        strcmp(ct, "twoGbMaxExtentSparse") &&
872
        strcmp(ct, "twoGbMaxExtentFlat")) {
873
        error_setg(errp, "Unsupported image type '%s'", ct);
874
        ret = -ENOTSUP;
875
        goto exit;
876
    }
877
    s->create_type = g_strdup(ct);
878
    s->desc_offset = 0;
879
    ret = vmdk_parse_extents(buf, bs, bs->file->filename, errp);
880
exit:
881
    return ret;
882
}
883

    
884
static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
885
                     Error **errp)
886
{
887
    char *buf = NULL;
888
    int ret;
889
    BDRVVmdkState *s = bs->opaque;
890
    uint32_t magic;
891

    
892
    buf = vmdk_read_desc(bs->file, 0, errp);
893
    if (!buf) {
894
        return -EINVAL;
895
    }
896

    
897
    magic = ldl_be_p(buf);
898
    switch (magic) {
899
        case VMDK3_MAGIC:
900
        case VMDK4_MAGIC:
901
            ret = vmdk_open_sparse(bs, bs->file, flags, buf, errp);
902
            s->desc_offset = 0x200;
903
            break;
904
        default:
905
            ret = vmdk_open_desc_file(bs, flags, buf, errp);
906
            break;
907
    }
908
    if (ret) {
909
        goto fail;
910
    }
911

    
912
    /* try to open parent images, if exist */
913
    ret = vmdk_parent_open(bs);
914
    if (ret) {
915
        goto fail;
916
    }
917
    s->cid = vmdk_read_cid(bs, 0);
918
    s->parent_cid = vmdk_read_cid(bs, 1);
919
    qemu_co_mutex_init(&s->lock);
920

    
921
    /* Disable migration when VMDK images are used */
922
    error_set(&s->migration_blocker,
923
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
924
              "vmdk", bs->device_name, "live migration");
925
    migrate_add_blocker(s->migration_blocker);
926
    g_free(buf);
927
    return 0;
928

    
929
fail:
930
    g_free(buf);
931
    g_free(s->create_type);
932
    s->create_type = NULL;
933
    vmdk_free_extents(bs);
934
    return ret;
935
}
936

    
937

    
938
static int vmdk_refresh_limits(BlockDriverState *bs)
939
{
940
    BDRVVmdkState *s = bs->opaque;
941
    int i;
942

    
943
    for (i = 0; i < s->num_extents; i++) {
944
        if (!s->extents[i].flat) {
945
            bs->bl.write_zeroes_alignment =
946
                MAX(bs->bl.write_zeroes_alignment,
947
                    s->extents[i].cluster_sectors);
948
        }
949
    }
950

    
951
    return 0;
952
}
953

    
954
static int get_whole_cluster(BlockDriverState *bs,
955
                VmdkExtent *extent,
956
                uint64_t cluster_offset,
957
                uint64_t offset,
958
                bool allocate)
959
{
960
    int ret = VMDK_OK;
961
    uint8_t *whole_grain = NULL;
962

    
963
    /* we will be here if it's first write on non-exist grain(cluster).
964
     * try to read from parent image, if exist */
965
    if (bs->backing_hd) {
966
        whole_grain =
967
            qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
968
        if (!vmdk_is_cid_valid(bs)) {
969
            ret = VMDK_ERROR;
970
            goto exit;
971
        }
972

    
973
        /* floor offset to cluster */
974
        offset -= offset % (extent->cluster_sectors * 512);
975
        ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
976
                extent->cluster_sectors);
977
        if (ret < 0) {
978
            ret = VMDK_ERROR;
979
            goto exit;
980
        }
981

    
982
        /* Write grain only into the active image */
983
        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
984
                extent->cluster_sectors);
985
        if (ret < 0) {
986
            ret = VMDK_ERROR;
987
            goto exit;
988
        }
989
    }
990
exit:
991
    qemu_vfree(whole_grain);
992
    return ret;
993
}
994

    
995
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
996
{
997
    uint32_t offset;
998
    QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
999
    offset = cpu_to_le32(m_data->offset);
1000
    /* update L2 table */
1001
    if (bdrv_pwrite_sync(
1002
                extent->file,
1003
                ((int64_t)m_data->l2_offset * 512)
1004
                    + (m_data->l2_index * sizeof(m_data->offset)),
1005
                &offset, sizeof(offset)) < 0) {
1006
        return VMDK_ERROR;
1007
    }
1008
    /* update backup L2 table */
1009
    if (extent->l1_backup_table_offset != 0) {
1010
        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
1011
        if (bdrv_pwrite_sync(
1012
                    extent->file,
1013
                    ((int64_t)m_data->l2_offset * 512)
1014
                        + (m_data->l2_index * sizeof(m_data->offset)),
1015
                    &offset, sizeof(offset)) < 0) {
1016
            return VMDK_ERROR;
1017
        }
1018
    }
1019
    if (m_data->l2_cache_entry) {
1020
        *m_data->l2_cache_entry = offset;
1021
    }
1022

    
1023
    return VMDK_OK;
1024
}
1025

    
1026
static int get_cluster_offset(BlockDriverState *bs,
1027
                                    VmdkExtent *extent,
1028
                                    VmdkMetaData *m_data,
1029
                                    uint64_t offset,
1030
                                    int allocate,
1031
                                    uint64_t *cluster_offset)
1032
{
1033
    unsigned int l1_index, l2_offset, l2_index;
1034
    int min_index, i, j;
1035
    uint32_t min_count, *l2_table;
1036
    bool zeroed = false;
1037

    
1038
    if (m_data) {
1039
        m_data->valid = 0;
1040
    }
1041
    if (extent->flat) {
1042
        *cluster_offset = extent->flat_start_offset;
1043
        return VMDK_OK;
1044
    }
1045

    
1046
    offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
1047
    l1_index = (offset >> 9) / extent->l1_entry_sectors;
1048
    if (l1_index >= extent->l1_size) {
1049
        return VMDK_ERROR;
1050
    }
1051
    l2_offset = extent->l1_table[l1_index];
1052
    if (!l2_offset) {
1053
        return VMDK_UNALLOC;
1054
    }
1055
    for (i = 0; i < L2_CACHE_SIZE; i++) {
1056
        if (l2_offset == extent->l2_cache_offsets[i]) {
1057
            /* increment the hit count */
1058
            if (++extent->l2_cache_counts[i] == 0xffffffff) {
1059
                for (j = 0; j < L2_CACHE_SIZE; j++) {
1060
                    extent->l2_cache_counts[j] >>= 1;
1061
                }
1062
            }
1063
            l2_table = extent->l2_cache + (i * extent->l2_size);
1064
            goto found;
1065
        }
1066
    }
1067
    /* not found: load a new entry in the least used one */
1068
    min_index = 0;
1069
    min_count = 0xffffffff;
1070
    for (i = 0; i < L2_CACHE_SIZE; i++) {
1071
        if (extent->l2_cache_counts[i] < min_count) {
1072
            min_count = extent->l2_cache_counts[i];
1073
            min_index = i;
1074
        }
1075
    }
1076
    l2_table = extent->l2_cache + (min_index * extent->l2_size);
1077
    if (bdrv_pread(
1078
                extent->file,
1079
                (int64_t)l2_offset * 512,
1080
                l2_table,
1081
                extent->l2_size * sizeof(uint32_t)
1082
            ) != extent->l2_size * sizeof(uint32_t)) {
1083
        return VMDK_ERROR;
1084
    }
1085

    
1086
    extent->l2_cache_offsets[min_index] = l2_offset;
1087
    extent->l2_cache_counts[min_index] = 1;
1088
 found:
1089
    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
1090
    *cluster_offset = le32_to_cpu(l2_table[l2_index]);
1091

    
1092
    if (m_data) {
1093
        m_data->valid = 1;
1094
        m_data->l1_index = l1_index;
1095
        m_data->l2_index = l2_index;
1096
        m_data->offset = *cluster_offset;
1097
        m_data->l2_offset = l2_offset;
1098
        m_data->l2_cache_entry = &l2_table[l2_index];
1099
    }
1100
    if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
1101
        zeroed = true;
1102
    }
1103

    
1104
    if (!*cluster_offset || zeroed) {
1105
        if (!allocate) {
1106
            return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
1107
        }
1108

    
1109
        /* Avoid the L2 tables update for the images that have snapshots. */
1110
        *cluster_offset = bdrv_getlength(extent->file);
1111
        if (!extent->compressed) {
1112
            bdrv_truncate(
1113
                extent->file,
1114
                *cluster_offset + (extent->cluster_sectors << 9)
1115
            );
1116
        }
1117

    
1118
        *cluster_offset >>= 9;
1119
        l2_table[l2_index] = cpu_to_le32(*cluster_offset);
1120

    
1121
        /* First of all we write grain itself, to avoid race condition
1122
         * that may to corrupt the image.
1123
         * This problem may occur because of insufficient space on host disk
1124
         * or inappropriate VM shutdown.
1125
         */
1126
        if (get_whole_cluster(
1127
                bs, extent, *cluster_offset, offset, allocate) == -1) {
1128
            return VMDK_ERROR;
1129
        }
1130

    
1131
        if (m_data) {
1132
            m_data->offset = *cluster_offset;
1133
        }
1134
    }
1135
    *cluster_offset <<= 9;
1136
    return VMDK_OK;
1137
}
1138

    
1139
static VmdkExtent *find_extent(BDRVVmdkState *s,
1140
                                int64_t sector_num, VmdkExtent *start_hint)
1141
{
1142
    VmdkExtent *extent = start_hint;
1143

    
1144
    if (!extent) {
1145
        extent = &s->extents[0];
1146
    }
1147
    while (extent < &s->extents[s->num_extents]) {
1148
        if (sector_num < extent->end_sector) {
1149
            return extent;
1150
        }
1151
        extent++;
1152
    }
1153
    return NULL;
1154
}
1155

    
1156
static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
1157
        int64_t sector_num, int nb_sectors, int *pnum)
1158
{
1159
    BDRVVmdkState *s = bs->opaque;
1160
    int64_t index_in_cluster, n, ret;
1161
    uint64_t offset;
1162
    VmdkExtent *extent;
1163

    
1164
    extent = find_extent(s, sector_num, NULL);
1165
    if (!extent) {
1166
        return 0;
1167
    }
1168
    qemu_co_mutex_lock(&s->lock);
1169
    ret = get_cluster_offset(bs, extent, NULL,
1170
                            sector_num * 512, 0, &offset);
1171
    qemu_co_mutex_unlock(&s->lock);
1172

    
1173
    switch (ret) {
1174
    case VMDK_ERROR:
1175
        ret = -EIO;
1176
        break;
1177
    case VMDK_UNALLOC:
1178
        ret = 0;
1179
        break;
1180
    case VMDK_ZEROED:
1181
        ret = BDRV_BLOCK_ZERO;
1182
        break;
1183
    case VMDK_OK:
1184
        ret = BDRV_BLOCK_DATA;
1185
        if (extent->file == bs->file) {
1186
            ret |= BDRV_BLOCK_OFFSET_VALID | offset;
1187
        }
1188

    
1189
        break;
1190
    }
1191

    
1192
    index_in_cluster = sector_num % extent->cluster_sectors;
1193
    n = extent->cluster_sectors - index_in_cluster;
1194
    if (n > nb_sectors) {
1195
        n = nb_sectors;
1196
    }
1197
    *pnum = n;
1198
    return ret;
1199
}
1200

    
1201
static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
1202
                            int64_t offset_in_cluster, const uint8_t *buf,
1203
                            int nb_sectors, int64_t sector_num)
1204
{
1205
    int ret;
1206
    VmdkGrainMarker *data = NULL;
1207
    uLongf buf_len;
1208
    const uint8_t *write_buf = buf;
1209
    int write_len = nb_sectors * 512;
1210

    
1211
    if (extent->compressed) {
1212
        if (!extent->has_marker) {
1213
            ret = -EINVAL;
1214
            goto out;
1215
        }
1216
        buf_len = (extent->cluster_sectors << 9) * 2;
1217
        data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
1218
        if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
1219
                buf_len == 0) {
1220
            ret = -EINVAL;
1221
            goto out;
1222
        }
1223
        data->lba = sector_num;
1224
        data->size = buf_len;
1225
        write_buf = (uint8_t *)data;
1226
        write_len = buf_len + sizeof(VmdkGrainMarker);
1227
    }
1228
    ret = bdrv_pwrite(extent->file,
1229
                        cluster_offset + offset_in_cluster,
1230
                        write_buf,
1231
                        write_len);
1232
    if (ret != write_len) {
1233
        ret = ret < 0 ? ret : -EIO;
1234
        goto out;
1235
    }
1236
    ret = 0;
1237
 out:
1238
    g_free(data);
1239
    return ret;
1240
}
1241

    
1242
static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
1243
                            int64_t offset_in_cluster, uint8_t *buf,
1244
                            int nb_sectors)
1245
{
1246
    int ret;
1247
    int cluster_bytes, buf_bytes;
1248
    uint8_t *cluster_buf, *compressed_data;
1249
    uint8_t *uncomp_buf;
1250
    uint32_t data_len;
1251
    VmdkGrainMarker *marker;
1252
    uLongf buf_len;
1253

    
1254

    
1255
    if (!extent->compressed) {
1256
        ret = bdrv_pread(extent->file,
1257
                          cluster_offset + offset_in_cluster,
1258
                          buf, nb_sectors * 512);
1259
        if (ret == nb_sectors * 512) {
1260
            return 0;
1261
        } else {
1262
            return -EIO;
1263
        }
1264
    }
1265
    cluster_bytes = extent->cluster_sectors * 512;
1266
    /* Read two clusters in case GrainMarker + compressed data > one cluster */
1267
    buf_bytes = cluster_bytes * 2;
1268
    cluster_buf = g_malloc(buf_bytes);
1269
    uncomp_buf = g_malloc(cluster_bytes);
1270
    ret = bdrv_pread(extent->file,
1271
                cluster_offset,
1272
                cluster_buf, buf_bytes);
1273
    if (ret < 0) {
1274
        goto out;
1275
    }
1276
    compressed_data = cluster_buf;
1277
    buf_len = cluster_bytes;
1278
    data_len = cluster_bytes;
1279
    if (extent->has_marker) {
1280
        marker = (VmdkGrainMarker *)cluster_buf;
1281
        compressed_data = marker->data;
1282
        data_len = le32_to_cpu(marker->size);
1283
    }
1284
    if (!data_len || data_len > buf_bytes) {
1285
        ret = -EINVAL;
1286
        goto out;
1287
    }
1288
    ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len);
1289
    if (ret != Z_OK) {
1290
        ret = -EINVAL;
1291
        goto out;
1292

    
1293
    }
1294
    if (offset_in_cluster < 0 ||
1295
            offset_in_cluster + nb_sectors * 512 > buf_len) {
1296
        ret = -EINVAL;
1297
        goto out;
1298
    }
1299
    memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512);
1300
    ret = 0;
1301

    
1302
 out:
1303
    g_free(uncomp_buf);
1304
    g_free(cluster_buf);
1305
    return ret;
1306
}
1307

    
1308
static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
1309
                    uint8_t *buf, int nb_sectors)
1310
{
1311
    BDRVVmdkState *s = bs->opaque;
1312
    int ret;
1313
    uint64_t n, index_in_cluster;
1314
    uint64_t extent_begin_sector, extent_relative_sector_num;
1315
    VmdkExtent *extent = NULL;
1316
    uint64_t cluster_offset;
1317

    
1318
    while (nb_sectors > 0) {
1319
        extent = find_extent(s, sector_num, extent);
1320
        if (!extent) {
1321
            return -EIO;
1322
        }
1323
        ret = get_cluster_offset(
1324
                            bs, extent, NULL,
1325
                            sector_num << 9, 0, &cluster_offset);
1326
        extent_begin_sector = extent->end_sector - extent->sectors;
1327
        extent_relative_sector_num = sector_num - extent_begin_sector;
1328
        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
1329
        n = extent->cluster_sectors - index_in_cluster;
1330
        if (n > nb_sectors) {
1331
            n = nb_sectors;
1332
        }
1333
        if (ret != VMDK_OK) {
1334
            /* if not allocated, try to read from parent image, if exist */
1335
            if (bs->backing_hd && ret != VMDK_ZEROED) {
1336
                if (!vmdk_is_cid_valid(bs)) {
1337
                    return -EINVAL;
1338
                }
1339
                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
1340
                if (ret < 0) {
1341
                    return ret;
1342
                }
1343
            } else {
1344
                memset(buf, 0, 512 * n);
1345
            }
1346
        } else {
1347
            ret = vmdk_read_extent(extent,
1348
                            cluster_offset, index_in_cluster * 512,
1349
                            buf, n);
1350
            if (ret) {
1351
                return ret;
1352
            }
1353
        }
1354
        nb_sectors -= n;
1355
        sector_num += n;
1356
        buf += n * 512;
1357
    }
1358
    return 0;
1359
}
1360

    
1361
static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
1362
                                     uint8_t *buf, int nb_sectors)
1363
{
1364
    int ret;
1365
    BDRVVmdkState *s = bs->opaque;
1366
    qemu_co_mutex_lock(&s->lock);
1367
    ret = vmdk_read(bs, sector_num, buf, nb_sectors);
1368
    qemu_co_mutex_unlock(&s->lock);
1369
    return ret;
1370
}
1371

    
1372
/**
1373
 * vmdk_write:
1374
 * @zeroed:       buf is ignored (data is zero), use zeroed_grain GTE feature
1375
 *                if possible, otherwise return -ENOTSUP.
1376
 * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try
1377
 *                with each cluster. By dry run we can find if the zero write
1378
 *                is possible without modifying image data.
1379
 *
1380
 * Returns: error code with 0 for success.
1381
 */
1382
static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
1383
                      const uint8_t *buf, int nb_sectors,
1384
                      bool zeroed, bool zero_dry_run)
1385
{
1386
    BDRVVmdkState *s = bs->opaque;
1387
    VmdkExtent *extent = NULL;
1388
    int ret;
1389
    int64_t index_in_cluster, n;
1390
    uint64_t extent_begin_sector, extent_relative_sector_num;
1391
    uint64_t cluster_offset;
1392
    VmdkMetaData m_data;
1393

    
1394
    if (sector_num > bs->total_sectors) {
1395
        error_report("Wrong offset: sector_num=0x%" PRIx64
1396
                " total_sectors=0x%" PRIx64 "\n",
1397
                sector_num, bs->total_sectors);
1398
        return -EIO;
1399
    }
1400

    
1401
    while (nb_sectors > 0) {
1402
        extent = find_extent(s, sector_num, extent);
1403
        if (!extent) {
1404
            return -EIO;
1405
        }
1406
        ret = get_cluster_offset(
1407
                                bs,
1408
                                extent,
1409
                                &m_data,
1410
                                sector_num << 9, !extent->compressed,
1411
                                &cluster_offset);
1412
        if (extent->compressed) {
1413
            if (ret == VMDK_OK) {
1414
                /* Refuse write to allocated cluster for streamOptimized */
1415
                error_report("Could not write to allocated cluster"
1416
                              " for streamOptimized");
1417
                return -EIO;
1418
            } else {
1419
                /* allocate */
1420
                ret = get_cluster_offset(
1421
                                        bs,
1422
                                        extent,
1423
                                        &m_data,
1424
                                        sector_num << 9, 1,
1425
                                        &cluster_offset);
1426
            }
1427
        }
1428
        if (ret == VMDK_ERROR) {
1429
            return -EINVAL;
1430
        }
1431
        extent_begin_sector = extent->end_sector - extent->sectors;
1432
        extent_relative_sector_num = sector_num - extent_begin_sector;
1433
        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
1434
        n = extent->cluster_sectors - index_in_cluster;
1435
        if (n > nb_sectors) {
1436
            n = nb_sectors;
1437
        }
1438
        if (zeroed) {
1439
            /* Do zeroed write, buf is ignored */
1440
            if (extent->has_zero_grain &&
1441
                    index_in_cluster == 0 &&
1442
                    n >= extent->cluster_sectors) {
1443
                n = extent->cluster_sectors;
1444
                if (!zero_dry_run) {
1445
                    m_data.offset = VMDK_GTE_ZEROED;
1446
                    /* update L2 tables */
1447
                    if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
1448
                        return -EIO;
1449
                    }
1450
                }
1451
            } else {
1452
                return -ENOTSUP;
1453
            }
1454
        } else {
1455
            ret = vmdk_write_extent(extent,
1456
                            cluster_offset, index_in_cluster * 512,
1457
                            buf, n, sector_num);
1458
            if (ret) {
1459
                return ret;
1460
            }
1461
            if (m_data.valid) {
1462
                /* update L2 tables */
1463
                if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
1464
                    return -EIO;
1465
                }
1466
            }
1467
        }
1468
        nb_sectors -= n;
1469
        sector_num += n;
1470
        buf += n * 512;
1471

    
1472
        /* update CID on the first write every time the virtual disk is
1473
         * opened */
1474
        if (!s->cid_updated) {
1475
            ret = vmdk_write_cid(bs, time(NULL));
1476
            if (ret < 0) {
1477
                return ret;
1478
            }
1479
            s->cid_updated = true;
1480
        }
1481
    }
1482
    return 0;
1483
}
1484

    
1485
static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
1486
                                      const uint8_t *buf, int nb_sectors)
1487
{
1488
    int ret;
1489
    BDRVVmdkState *s = bs->opaque;
1490
    qemu_co_mutex_lock(&s->lock);
1491
    ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
1492
    qemu_co_mutex_unlock(&s->lock);
1493
    return ret;
1494
}
1495

    
1496
static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
1497
                                             int64_t sector_num,
1498
                                             int nb_sectors,
1499
                                             BdrvRequestFlags flags)
1500
{
1501
    int ret;
1502
    BDRVVmdkState *s = bs->opaque;
1503
    qemu_co_mutex_lock(&s->lock);
1504
    /* write zeroes could fail if sectors not aligned to cluster, test it with
1505
     * dry_run == true before really updating image */
1506
    ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
1507
    if (!ret) {
1508
        ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
1509
    }
1510
    qemu_co_mutex_unlock(&s->lock);
1511
    return ret;
1512
}
1513

    
1514
static int vmdk_create_extent(const char *filename, int64_t filesize,
1515
                              bool flat, bool compress, bool zeroed_grain,
1516
                              Error **errp)
1517
{
1518
    int ret, i;
1519
    BlockDriverState *bs = NULL;
1520
    VMDK4Header header;
1521
    Error *local_err;
1522
    uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
1523
    uint32_t *gd_buf = NULL;
1524
    int gd_buf_size;
1525

    
1526
    ret = bdrv_create_file(filename, NULL, &local_err);
1527
    if (ret < 0) {
1528
        error_propagate(errp, local_err);
1529
        goto exit;
1530
    }
1531

    
1532
    assert(bs == NULL);
1533
    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
1534
                    NULL, &local_err);
1535
    if (ret < 0) {
1536
        error_propagate(errp, local_err);
1537
        goto exit;
1538
    }
1539

    
1540
    if (flat) {
1541
        ret = bdrv_truncate(bs, filesize);
1542
        if (ret < 0) {
1543
            error_setg_errno(errp, -ret, "Could not truncate file");
1544
        }
1545
        goto exit;
1546
    }
1547
    magic = cpu_to_be32(VMDK4_MAGIC);
1548
    memset(&header, 0, sizeof(header));
1549
    header.version = zeroed_grain ? 2 : 1;
1550
    header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
1551
                   | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
1552
                   | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
1553
    header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
1554
    header.capacity = filesize / BDRV_SECTOR_SIZE;
1555
    header.granularity = 128;
1556
    header.num_gtes_per_gt = BDRV_SECTOR_SIZE;
1557

    
1558
    grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity);
1559
    gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t),
1560
                           BDRV_SECTOR_SIZE);
1561
    gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt);
1562
    gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE);
1563

    
1564
    header.desc_offset = 1;
1565
    header.desc_size = 20;
1566
    header.rgd_offset = header.desc_offset + header.desc_size;
1567
    header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count);
1568
    header.grain_offset =
1569
        ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count),
1570
                 header.granularity);
1571
    /* swap endianness for all header fields */
1572
    header.version = cpu_to_le32(header.version);
1573
    header.flags = cpu_to_le32(header.flags);
1574
    header.capacity = cpu_to_le64(header.capacity);
1575
    header.granularity = cpu_to_le64(header.granularity);
1576
    header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt);
1577
    header.desc_offset = cpu_to_le64(header.desc_offset);
1578
    header.desc_size = cpu_to_le64(header.desc_size);
1579
    header.rgd_offset = cpu_to_le64(header.rgd_offset);
1580
    header.gd_offset = cpu_to_le64(header.gd_offset);
1581
    header.grain_offset = cpu_to_le64(header.grain_offset);
1582
    header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
1583

    
1584
    header.check_bytes[0] = 0xa;
1585
    header.check_bytes[1] = 0x20;
1586
    header.check_bytes[2] = 0xd;
1587
    header.check_bytes[3] = 0xa;
1588

    
1589
    /* write all the data */
1590
    ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
1591
    if (ret < 0) {
1592
        error_set(errp, QERR_IO_ERROR);
1593
        goto exit;
1594
    }
1595
    ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
1596
    if (ret < 0) {
1597
        error_set(errp, QERR_IO_ERROR);
1598
        goto exit;
1599
    }
1600

    
1601
    ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
1602
    if (ret < 0) {
1603
        error_setg_errno(errp, -ret, "Could not truncate file");
1604
        goto exit;
1605
    }
1606

    
1607
    /* write grain directory */
1608
    gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE;
1609
    gd_buf = g_malloc0(gd_buf_size);
1610
    for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors;
1611
         i < gt_count; i++, tmp += gt_size) {
1612
        gd_buf[i] = cpu_to_le32(tmp);
1613
    }
1614
    ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
1615
                      gd_buf, gd_buf_size);
1616
    if (ret < 0) {
1617
        error_set(errp, QERR_IO_ERROR);
1618
        goto exit;
1619
    }
1620

    
1621
    /* write backup grain directory */
1622
    for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_sectors;
1623
         i < gt_count; i++, tmp += gt_size) {
1624
        gd_buf[i] = cpu_to_le32(tmp);
1625
    }
1626
    ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
1627
                      gd_buf, gd_buf_size);
1628
    if (ret < 0) {
1629
        error_set(errp, QERR_IO_ERROR);
1630
        goto exit;
1631
    }
1632

    
1633
    ret = 0;
1634
exit:
1635
    if (bs) {
1636
        bdrv_unref(bs);
1637
    }
1638
    g_free(gd_buf);
1639
    return ret;
1640
}
1641

    
1642
static int filename_decompose(const char *filename, char *path, char *prefix,
1643
                              char *postfix, size_t buf_len, Error **errp)
1644
{
1645
    const char *p, *q;
1646

    
1647
    if (filename == NULL || !strlen(filename)) {
1648
        error_setg(errp, "No filename provided");
1649
        return VMDK_ERROR;
1650
    }
1651
    p = strrchr(filename, '/');
1652
    if (p == NULL) {
1653
        p = strrchr(filename, '\\');
1654
    }
1655
    if (p == NULL) {
1656
        p = strrchr(filename, ':');
1657
    }
1658
    if (p != NULL) {
1659
        p++;
1660
        if (p - filename >= buf_len) {
1661
            return VMDK_ERROR;
1662
        }
1663
        pstrcpy(path, p - filename + 1, filename);
1664
    } else {
1665
        p = filename;
1666
        path[0] = '\0';
1667
    }
1668
    q = strrchr(p, '.');
1669
    if (q == NULL) {
1670
        pstrcpy(prefix, buf_len, p);
1671
        postfix[0] = '\0';
1672
    } else {
1673
        if (q - p >= buf_len) {
1674
            return VMDK_ERROR;
1675
        }
1676
        pstrcpy(prefix, q - p + 1, p);
1677
        pstrcpy(postfix, buf_len, q);
1678
    }
1679
    return VMDK_OK;
1680
}
1681

    
1682
static int vmdk_create(const char *filename, QEMUOptionParameter *options,
1683
                       Error **errp)
1684
{
1685
    int idx = 0;
1686
    BlockDriverState *new_bs = NULL;
1687
    Error *local_err;
1688
    char *desc = NULL;
1689
    int64_t total_size = 0, filesize;
1690
    const char *adapter_type = NULL;
1691
    const char *backing_file = NULL;
1692
    const char *fmt = NULL;
1693
    int flags = 0;
1694
    int ret = 0;
1695
    bool flat, split, compress;
1696
    GString *ext_desc_lines;
1697
    char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX];
1698
    const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
1699
    const char *desc_extent_line;
1700
    char parent_desc_line[BUF_SIZE] = "";
1701
    uint32_t parent_cid = 0xffffffff;
1702
    uint32_t number_heads = 16;
1703
    bool zeroed_grain = false;
1704
    uint32_t desc_offset = 0, desc_len;
1705
    const char desc_template[] =
1706
        "# Disk DescriptorFile\n"
1707
        "version=1\n"
1708
        "CID=%x\n"
1709
        "parentCID=%x\n"
1710
        "createType=\"%s\"\n"
1711
        "%s"
1712
        "\n"
1713
        "# Extent description\n"
1714
        "%s"
1715
        "\n"
1716
        "# The Disk Data Base\n"
1717
        "#DDB\n"
1718
        "\n"
1719
        "ddb.virtualHWVersion = \"%d\"\n"
1720
        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
1721
        "ddb.geometry.heads = \"%d\"\n"
1722
        "ddb.geometry.sectors = \"63\"\n"
1723
        "ddb.adapterType = \"%s\"\n";
1724

    
1725
    ext_desc_lines = g_string_new(NULL);
1726

    
1727
    if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) {
1728
        ret = -EINVAL;
1729
        goto exit;
1730
    }
1731
    /* Read out options */
1732
    while (options && options->name) {
1733
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1734
            total_size = options->value.n;
1735
        } else if (!strcmp(options->name, BLOCK_OPT_ADAPTER_TYPE)) {
1736
            adapter_type = options->value.s;
1737
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
1738
            backing_file = options->value.s;
1739
        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
1740
            flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
1741
        } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
1742
            fmt = options->value.s;
1743
        } else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) {
1744
            zeroed_grain |= options->value.n;
1745
        }
1746
        options++;
1747
    }
1748
    if (!adapter_type) {
1749
        adapter_type = "ide";
1750
    } else if (strcmp(adapter_type, "ide") &&
1751
               strcmp(adapter_type, "buslogic") &&
1752
               strcmp(adapter_type, "lsilogic") &&
1753
               strcmp(adapter_type, "legacyESX")) {
1754
        error_setg(errp, "Unknown adapter type: '%s'", adapter_type);
1755
        ret = -EINVAL;
1756
        goto exit;
1757
    }
1758
    if (strcmp(adapter_type, "ide") != 0) {
1759
        /* that's the number of heads with which vmware operates when
1760
           creating, exporting, etc. vmdk files with a non-ide adapter type */
1761
        number_heads = 255;
1762
    }
1763
    if (!fmt) {
1764
        /* Default format to monolithicSparse */
1765
        fmt = "monolithicSparse";
1766
    } else if (strcmp(fmt, "monolithicFlat") &&
1767
               strcmp(fmt, "monolithicSparse") &&
1768
               strcmp(fmt, "twoGbMaxExtentSparse") &&
1769
               strcmp(fmt, "twoGbMaxExtentFlat") &&
1770
               strcmp(fmt, "streamOptimized")) {
1771
        error_setg(errp, "Unknown subformat: '%s'", fmt);
1772
        ret = -EINVAL;
1773
        goto exit;
1774
    }
1775
    split = !(strcmp(fmt, "twoGbMaxExtentFlat") &&
1776
              strcmp(fmt, "twoGbMaxExtentSparse"));
1777
    flat = !(strcmp(fmt, "monolithicFlat") &&
1778
             strcmp(fmt, "twoGbMaxExtentFlat"));
1779
    compress = !strcmp(fmt, "streamOptimized");
1780
    if (flat) {
1781
        desc_extent_line = "RW %lld FLAT \"%s\" 0\n";
1782
    } else {
1783
        desc_extent_line = "RW %lld SPARSE \"%s\"\n";
1784
    }
1785
    if (flat && backing_file) {
1786
        error_setg(errp, "Flat image can't have backing file");
1787
        ret = -ENOTSUP;
1788
        goto exit;
1789
    }
1790
    if (flat && zeroed_grain) {
1791
        error_setg(errp, "Flat image can't enable zeroed grain");
1792
        ret = -ENOTSUP;
1793
        goto exit;
1794
    }
1795
    if (backing_file) {
1796
        BlockDriverState *bs = NULL;
1797
        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_NO_BACKING, NULL,
1798
                        errp);
1799
        if (ret != 0) {
1800
            goto exit;
1801
        }
1802
        if (strcmp(bs->drv->format_name, "vmdk")) {
1803
            bdrv_unref(bs);
1804
            ret = -EINVAL;
1805
            goto exit;
1806
        }
1807
        parent_cid = vmdk_read_cid(bs, 0);
1808
        bdrv_unref(bs);
1809
        snprintf(parent_desc_line, sizeof(parent_desc_line),
1810
                "parentFileNameHint=\"%s\"", backing_file);
1811
    }
1812

    
1813
    /* Create extents */
1814
    filesize = total_size;
1815
    while (filesize > 0) {
1816
        char desc_line[BUF_SIZE];
1817
        char ext_filename[PATH_MAX];
1818
        char desc_filename[PATH_MAX];
1819
        int64_t size = filesize;
1820

    
1821
        if (split && size > split_size) {
1822
            size = split_size;
1823
        }
1824
        if (split) {
1825
            snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s",
1826
                    prefix, flat ? 'f' : 's', ++idx, postfix);
1827
        } else if (flat) {
1828
            snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s",
1829
                    prefix, postfix);
1830
        } else {
1831
            snprintf(desc_filename, sizeof(desc_filename), "%s%s",
1832
                    prefix, postfix);
1833
        }
1834
        snprintf(ext_filename, sizeof(ext_filename), "%s%s",
1835
                path, desc_filename);
1836

    
1837
        if (vmdk_create_extent(ext_filename, size,
1838
                               flat, compress, zeroed_grain, errp)) {
1839
            ret = -EINVAL;
1840
            goto exit;
1841
        }
1842
        filesize -= size;
1843

    
1844
        /* Format description line */
1845
        snprintf(desc_line, sizeof(desc_line),
1846
                    desc_extent_line, size / BDRV_SECTOR_SIZE, desc_filename);
1847
        g_string_append(ext_desc_lines, desc_line);
1848
    }
1849
    /* generate descriptor file */
1850
    desc = g_strdup_printf(desc_template,
1851
                           (unsigned int)time(NULL),
1852
                           parent_cid,
1853
                           fmt,
1854
                           parent_desc_line,
1855
                           ext_desc_lines->str,
1856
                           (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
1857
                           total_size /
1858
                               (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
1859
                           number_heads,
1860
                           adapter_type);
1861
    desc_len = strlen(desc);
1862
    /* the descriptor offset = 0x200 */
1863
    if (!split && !flat) {
1864
        desc_offset = 0x200;
1865
    } else {
1866
        ret = bdrv_create_file(filename, options, &local_err);
1867
        if (ret < 0) {
1868
            error_setg_errno(errp, -ret, "Could not create image file");
1869
            goto exit;
1870
        }
1871
    }
1872
    assert(new_bs == NULL);
1873
    ret = bdrv_open(&new_bs, filename, NULL, NULL,
1874
                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
1875
    if (ret < 0) {
1876
        error_setg_errno(errp, -ret, "Could not write description");
1877
        goto exit;
1878
    }
1879
    ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
1880
    if (ret < 0) {
1881
        error_setg_errno(errp, -ret, "Could not write description");
1882
        goto exit;
1883
    }
1884
    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
1885
     * for description file */
1886
    if (desc_offset == 0) {
1887
        ret = bdrv_truncate(new_bs, desc_len);
1888
        if (ret < 0) {
1889
            error_setg_errno(errp, -ret, "Could not truncate file");
1890
        }
1891
    }
1892
exit:
1893
    if (new_bs) {
1894
        bdrv_unref(new_bs);
1895
    }
1896
    g_free(desc);
1897
    g_string_free(ext_desc_lines, true);
1898
    return ret;
1899
}
1900

    
1901
static void vmdk_close(BlockDriverState *bs)
1902
{
1903
    BDRVVmdkState *s = bs->opaque;
1904

    
1905
    vmdk_free_extents(bs);
1906
    g_free(s->create_type);
1907

    
1908
    migrate_del_blocker(s->migration_blocker);
1909
    error_free(s->migration_blocker);
1910
}
1911

    
1912
static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
1913
{
1914
    BDRVVmdkState *s = bs->opaque;
1915
    int i, err;
1916
    int ret = 0;
1917

    
1918
    for (i = 0; i < s->num_extents; i++) {
1919
        err = bdrv_co_flush(s->extents[i].file);
1920
        if (err < 0) {
1921
            ret = err;
1922
        }
1923
    }
1924
    return ret;
1925
}
1926

    
1927
static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
1928
{
1929
    int i;
1930
    int64_t ret = 0;
1931
    int64_t r;
1932
    BDRVVmdkState *s = bs->opaque;
1933

    
1934
    ret = bdrv_get_allocated_file_size(bs->file);
1935
    if (ret < 0) {
1936
        return ret;
1937
    }
1938
    for (i = 0; i < s->num_extents; i++) {
1939
        if (s->extents[i].file == bs->file) {
1940
            continue;
1941
        }
1942
        r = bdrv_get_allocated_file_size(s->extents[i].file);
1943
        if (r < 0) {
1944
            return r;
1945
        }
1946
        ret += r;
1947
    }
1948
    return ret;
1949
}
1950

    
1951
static int vmdk_has_zero_init(BlockDriverState *bs)
1952
{
1953
    int i;
1954
    BDRVVmdkState *s = bs->opaque;
1955

    
1956
    /* If has a flat extent and its underlying storage doesn't have zero init,
1957
     * return 0. */
1958
    for (i = 0; i < s->num_extents; i++) {
1959
        if (s->extents[i].flat) {
1960
            if (!bdrv_has_zero_init(s->extents[i].file)) {
1961
                return 0;
1962
            }
1963
        }
1964
    }
1965
    return 1;
1966
}
1967

    
1968
static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
1969
{
1970
    ImageInfo *info = g_new0(ImageInfo, 1);
1971

    
1972
    *info = (ImageInfo){
1973
        .filename         = g_strdup(extent->file->filename),
1974
        .format           = g_strdup(extent->type),
1975
        .virtual_size     = extent->sectors * BDRV_SECTOR_SIZE,
1976
        .compressed       = extent->compressed,
1977
        .has_compressed   = extent->compressed,
1978
        .cluster_size     = extent->cluster_sectors * BDRV_SECTOR_SIZE,
1979
        .has_cluster_size = !extent->flat,
1980
    };
1981

    
1982
    return info;
1983
}
1984

    
1985
static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
1986
                      BdrvCheckMode fix)
1987
{
1988
    BDRVVmdkState *s = bs->opaque;
1989
    VmdkExtent *extent = NULL;
1990
    int64_t sector_num = 0;
1991
    int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
1992
    int ret;
1993
    uint64_t cluster_offset;
1994

    
1995
    if (fix) {
1996
        return -ENOTSUP;
1997
    }
1998

    
1999
    for (;;) {
2000
        if (sector_num >= total_sectors) {
2001
            return 0;
2002
        }
2003
        extent = find_extent(s, sector_num, extent);
2004
        if (!extent) {
2005
            fprintf(stderr,
2006
                    "ERROR: could not find extent for sector %" PRId64 "\n",
2007
                    sector_num);
2008
            break;
2009
        }
2010
        ret = get_cluster_offset(bs, extent, NULL,
2011
                                 sector_num << BDRV_SECTOR_BITS,
2012
                                 0, &cluster_offset);
2013
        if (ret == VMDK_ERROR) {
2014
            fprintf(stderr,
2015
                    "ERROR: could not get cluster_offset for sector %"
2016
                    PRId64 "\n", sector_num);
2017
            break;
2018
        }
2019
        if (ret == VMDK_OK && cluster_offset >= bdrv_getlength(extent->file)) {
2020
            fprintf(stderr,
2021
                    "ERROR: cluster offset for sector %"
2022
                    PRId64 " points after EOF\n", sector_num);
2023
            break;
2024
        }
2025
        sector_num += extent->cluster_sectors;
2026
    }
2027

    
2028
    result->corruptions++;
2029
    return 0;
2030
}
2031

    
2032
static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
2033
{
2034
    int i;
2035
    BDRVVmdkState *s = bs->opaque;
2036
    ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
2037
    ImageInfoList **next;
2038

    
2039
    *spec_info = (ImageInfoSpecific){
2040
        .kind = IMAGE_INFO_SPECIFIC_KIND_VMDK,
2041
        {
2042
            .vmdk = g_new0(ImageInfoSpecificVmdk, 1),
2043
        },
2044
    };
2045

    
2046
    *spec_info->vmdk = (ImageInfoSpecificVmdk) {
2047
        .create_type = g_strdup(s->create_type),
2048
        .cid = s->cid,
2049
        .parent_cid = s->parent_cid,
2050
    };
2051

    
2052
    next = &spec_info->vmdk->extents;
2053
    for (i = 0; i < s->num_extents; i++) {
2054
        *next = g_new0(ImageInfoList, 1);
2055
        (*next)->value = vmdk_get_extent_info(&s->extents[i]);
2056
        (*next)->next = NULL;
2057
        next = &(*next)->next;
2058
    }
2059

    
2060
    return spec_info;
2061
}
2062

    
2063
static QEMUOptionParameter vmdk_create_options[] = {
2064
    {
2065
        .name = BLOCK_OPT_SIZE,
2066
        .type = OPT_SIZE,
2067
        .help = "Virtual disk size"
2068
    },
2069
    {
2070
        .name = BLOCK_OPT_ADAPTER_TYPE,
2071
        .type = OPT_STRING,
2072
        .help = "Virtual adapter type, can be one of "
2073
                "ide (default), lsilogic, buslogic or legacyESX"
2074
    },
2075
    {
2076
        .name = BLOCK_OPT_BACKING_FILE,
2077
        .type = OPT_STRING,
2078
        .help = "File name of a base image"
2079
    },
2080
    {
2081
        .name = BLOCK_OPT_COMPAT6,
2082
        .type = OPT_FLAG,
2083
        .help = "VMDK version 6 image"
2084
    },
2085
    {
2086
        .name = BLOCK_OPT_SUBFMT,
2087
        .type = OPT_STRING,
2088
        .help =
2089
            "VMDK flat extent format, can be one of "
2090
            "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
2091
    },
2092
    {
2093
        .name = BLOCK_OPT_ZEROED_GRAIN,
2094
        .type = OPT_FLAG,
2095
        .help = "Enable efficient zero writes using the zeroed-grain GTE feature"
2096
    },
2097
    { NULL }
2098
};
2099

    
2100
static BlockDriver bdrv_vmdk = {
2101
    .format_name                  = "vmdk",
2102
    .instance_size                = sizeof(BDRVVmdkState),
2103
    .bdrv_probe                   = vmdk_probe,
2104
    .bdrv_open                    = vmdk_open,
2105
    .bdrv_check                   = vmdk_check,
2106
    .bdrv_reopen_prepare          = vmdk_reopen_prepare,
2107
    .bdrv_read                    = vmdk_co_read,
2108
    .bdrv_write                   = vmdk_co_write,
2109
    .bdrv_co_write_zeroes         = vmdk_co_write_zeroes,
2110
    .bdrv_close                   = vmdk_close,
2111
    .bdrv_create                  = vmdk_create,
2112
    .bdrv_co_flush_to_disk        = vmdk_co_flush,
2113
    .bdrv_co_get_block_status     = vmdk_co_get_block_status,
2114
    .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
2115
    .bdrv_has_zero_init           = vmdk_has_zero_init,
2116
    .bdrv_get_specific_info       = vmdk_get_specific_info,
2117
    .bdrv_refresh_limits          = vmdk_refresh_limits,
2118

    
2119
    .create_options               = vmdk_create_options,
2120
};
2121

    
2122
static void bdrv_vmdk_init(void)
2123
{
2124
    bdrv_register(&bdrv_vmdk);
2125
}
2126

    
2127
block_init(bdrv_vmdk_init);