Statistics
| Branch: | Revision:

root / block / vmdk.c @ 3178e275

History | View | Annotate | Download (48.2 kB)

1
/*
2
 * Block driver for the VMDK format
3
 *
4
 * Copyright (c) 2004 Fabrice Bellard
5
 * Copyright (c) 2005 Filip Navara
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy
8
 * of this software and associated documentation files (the "Software"), to deal
9
 * in the Software without restriction, including without limitation the rights
10
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the Software is
12
 * furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
 * THE SOFTWARE.
24
 */
25

    
26
#include "qemu-common.h"
27
#include "block_int.h"
28
#include "module.h"
29
#include "migration.h"
30
#include <zlib.h>
31

    
32
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
33
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
34
#define VMDK4_COMPRESSION_DEFLATE 1
35
#define VMDK4_FLAG_RGD (1 << 1)
36
#define VMDK4_FLAG_COMPRESS (1 << 16)
37
#define VMDK4_FLAG_MARKER (1 << 17)
38
#define VMDK4_GD_AT_END 0xffffffffffffffffULL
39

    
40
typedef struct {
41
    uint32_t version;
42
    uint32_t flags;
43
    uint32_t disk_sectors;
44
    uint32_t granularity;
45
    uint32_t l1dir_offset;
46
    uint32_t l1dir_size;
47
    uint32_t file_sectors;
48
    uint32_t cylinders;
49
    uint32_t heads;
50
    uint32_t sectors_per_track;
51
} VMDK3Header;
52

    
53
typedef struct {
54
    uint32_t version;
55
    uint32_t flags;
56
    int64_t capacity;
57
    int64_t granularity;
58
    int64_t desc_offset;
59
    int64_t desc_size;
60
    int32_t num_gtes_per_gte;
61
    int64_t rgd_offset;
62
    int64_t gd_offset;
63
    int64_t grain_offset;
64
    char filler[1];
65
    char check_bytes[4];
66
    uint16_t compressAlgorithm;
67
} QEMU_PACKED VMDK4Header;
68

    
69
#define L2_CACHE_SIZE 16
70

    
71
typedef struct VmdkExtent {
72
    BlockDriverState *file;
73
    bool flat;
74
    bool compressed;
75
    bool has_marker;
76
    int64_t sectors;
77
    int64_t end_sector;
78
    int64_t flat_start_offset;
79
    int64_t l1_table_offset;
80
    int64_t l1_backup_table_offset;
81
    uint32_t *l1_table;
82
    uint32_t *l1_backup_table;
83
    unsigned int l1_size;
84
    uint32_t l1_entry_sectors;
85

    
86
    unsigned int l2_size;
87
    uint32_t *l2_cache;
88
    uint32_t l2_cache_offsets[L2_CACHE_SIZE];
89
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
90

    
91
    unsigned int cluster_sectors;
92
} VmdkExtent;
93

    
94
typedef struct BDRVVmdkState {
95
    CoMutex lock;
96
    int desc_offset;
97
    bool cid_updated;
98
    uint32_t parent_cid;
99
    int num_extents;
100
    /* Extent array with num_extents entries, ascend ordered by address */
101
    VmdkExtent *extents;
102
    Error *migration_blocker;
103
} BDRVVmdkState;
104

    
105
typedef struct VmdkMetaData {
106
    uint32_t offset;
107
    unsigned int l1_index;
108
    unsigned int l2_index;
109
    unsigned int l2_offset;
110
    int valid;
111
} VmdkMetaData;
112

    
113
typedef struct VmdkGrainMarker {
114
    uint64_t lba;
115
    uint32_t size;
116
    uint8_t  data[0];
117
} VmdkGrainMarker;
118

    
119
enum {
120
    MARKER_END_OF_STREAM    = 0,
121
    MARKER_GRAIN_TABLE      = 1,
122
    MARKER_GRAIN_DIRECTORY  = 2,
123
    MARKER_FOOTER           = 3,
124
};
125

    
126
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
127
{
128
    uint32_t magic;
129

    
130
    if (buf_size < 4) {
131
        return 0;
132
    }
133
    magic = be32_to_cpu(*(uint32_t *)buf);
134
    if (magic == VMDK3_MAGIC ||
135
        magic == VMDK4_MAGIC) {
136
        return 100;
137
    } else {
138
        const char *p = (const char *)buf;
139
        const char *end = p + buf_size;
140
        while (p < end) {
141
            if (*p == '#') {
142
                /* skip comment line */
143
                while (p < end && *p != '\n') {
144
                    p++;
145
                }
146
                p++;
147
                continue;
148
            }
149
            if (*p == ' ') {
150
                while (p < end && *p == ' ') {
151
                    p++;
152
                }
153
                /* skip '\r' if windows line endings used. */
154
                if (p < end && *p == '\r') {
155
                    p++;
156
                }
157
                /* only accept blank lines before 'version=' line */
158
                if (p == end || *p != '\n') {
159
                    return 0;
160
                }
161
                p++;
162
                continue;
163
            }
164
            if (end - p >= strlen("version=X\n")) {
165
                if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
166
                    strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
167
                    return 100;
168
                }
169
            }
170
            if (end - p >= strlen("version=X\r\n")) {
171
                if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
172
                    strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
173
                    return 100;
174
                }
175
            }
176
            return 0;
177
        }
178
        return 0;
179
    }
180
}
181

    
182
#define CHECK_CID 1
183

    
184
#define SECTOR_SIZE 512
185
#define DESC_SIZE (20 * SECTOR_SIZE)    /* 20 sectors of 512 bytes each */
186
#define BUF_SIZE 4096
187
#define HEADER_SIZE 512                 /* first sector of 512 bytes */
188

    
189
static void vmdk_free_extents(BlockDriverState *bs)
190
{
191
    int i;
192
    BDRVVmdkState *s = bs->opaque;
193
    VmdkExtent *e;
194

    
195
    for (i = 0; i < s->num_extents; i++) {
196
        e = &s->extents[i];
197
        g_free(e->l1_table);
198
        g_free(e->l2_cache);
199
        g_free(e->l1_backup_table);
200
        if (e->file != bs->file) {
201
            bdrv_delete(e->file);
202
        }
203
    }
204
    g_free(s->extents);
205
}
206

    
207
static void vmdk_free_last_extent(BlockDriverState *bs)
208
{
209
    BDRVVmdkState *s = bs->opaque;
210

    
211
    if (s->num_extents == 0) {
212
        return;
213
    }
214
    s->num_extents--;
215
    s->extents = g_realloc(s->extents, s->num_extents * sizeof(VmdkExtent));
216
}
217

    
218
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
219
{
220
    char desc[DESC_SIZE];
221
    uint32_t cid = 0xffffffff;
222
    const char *p_name, *cid_str;
223
    size_t cid_str_size;
224
    BDRVVmdkState *s = bs->opaque;
225
    int ret;
226

    
227
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
228
    if (ret < 0) {
229
        return 0;
230
    }
231

    
232
    if (parent) {
233
        cid_str = "parentCID";
234
        cid_str_size = sizeof("parentCID");
235
    } else {
236
        cid_str = "CID";
237
        cid_str_size = sizeof("CID");
238
    }
239

    
240
    desc[DESC_SIZE - 1] = '\0';
241
    p_name = strstr(desc, cid_str);
242
    if (p_name != NULL) {
243
        p_name += cid_str_size;
244
        sscanf(p_name, "%x", &cid);
245
    }
246

    
247
    return cid;
248
}
249

    
250
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
251
{
252
    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
253
    char *p_name, *tmp_str;
254
    BDRVVmdkState *s = bs->opaque;
255
    int ret;
256

    
257
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
258
    if (ret < 0) {
259
        return ret;
260
    }
261

    
262
    desc[DESC_SIZE - 1] = '\0';
263
    tmp_str = strstr(desc, "parentCID");
264
    if (tmp_str == NULL) {
265
        return -EINVAL;
266
    }
267

    
268
    pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
269
    p_name = strstr(desc, "CID");
270
    if (p_name != NULL) {
271
        p_name += sizeof("CID");
272
        snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
273
        pstrcat(desc, sizeof(desc), tmp_desc);
274
    }
275

    
276
    ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
277
    if (ret < 0) {
278
        return ret;
279
    }
280

    
281
    return 0;
282
}
283

    
284
static int vmdk_is_cid_valid(BlockDriverState *bs)
285
{
286
#ifdef CHECK_CID
287
    BDRVVmdkState *s = bs->opaque;
288
    BlockDriverState *p_bs = bs->backing_hd;
289
    uint32_t cur_pcid;
290

    
291
    if (p_bs) {
292
        cur_pcid = vmdk_read_cid(p_bs, 0);
293
        if (s->parent_cid != cur_pcid) {
294
            /* CID not valid */
295
            return 0;
296
        }
297
    }
298
#endif
299
    /* CID valid */
300
    return 1;
301
}
302

    
303
/* Queue extents, if any, for reopen() */
304
static int vmdk_reopen_prepare(BDRVReopenState *state,
305
                               BlockReopenQueue *queue, Error **errp)
306
{
307
    BDRVVmdkState *s;
308
    int ret = -1;
309
    int i;
310
    VmdkExtent *e;
311

    
312
    assert(state != NULL);
313
    assert(state->bs != NULL);
314

    
315
    if (queue == NULL) {
316
        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
317
                 "No reopen queue for VMDK extents");
318
        goto exit;
319
    }
320

    
321
    s = state->bs->opaque;
322

    
323
    assert(s != NULL);
324

    
325
    for (i = 0; i < s->num_extents; i++) {
326
        e = &s->extents[i];
327
        if (e->file != state->bs->file) {
328
            bdrv_reopen_queue(queue, e->file, state->flags);
329
        }
330
    }
331
    ret = 0;
332

    
333
exit:
334
    return ret;
335
}
336

    
337
static int vmdk_parent_open(BlockDriverState *bs)
338
{
339
    char *p_name;
340
    char desc[DESC_SIZE + 1];
341
    BDRVVmdkState *s = bs->opaque;
342
    int ret;
343

    
344
    desc[DESC_SIZE] = '\0';
345
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
346
    if (ret < 0) {
347
        return ret;
348
    }
349

    
350
    p_name = strstr(desc, "parentFileNameHint");
351
    if (p_name != NULL) {
352
        char *end_name;
353

    
354
        p_name += sizeof("parentFileNameHint") + 1;
355
        end_name = strchr(p_name, '\"');
356
        if (end_name == NULL) {
357
            return -EINVAL;
358
        }
359
        if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
360
            return -EINVAL;
361
        }
362

    
363
        pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
364
    }
365

    
366
    return 0;
367
}
368

    
369
/* Create and append extent to the extent array. Return the added VmdkExtent
370
 * address. return NULL if allocation failed. */
371
static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
372
                           BlockDriverState *file, bool flat, int64_t sectors,
373
                           int64_t l1_offset, int64_t l1_backup_offset,
374
                           uint32_t l1_size,
375
                           int l2_size, unsigned int cluster_sectors)
376
{
377
    VmdkExtent *extent;
378
    BDRVVmdkState *s = bs->opaque;
379

    
380
    s->extents = g_realloc(s->extents,
381
                              (s->num_extents + 1) * sizeof(VmdkExtent));
382
    extent = &s->extents[s->num_extents];
383
    s->num_extents++;
384

    
385
    memset(extent, 0, sizeof(VmdkExtent));
386
    extent->file = file;
387
    extent->flat = flat;
388
    extent->sectors = sectors;
389
    extent->l1_table_offset = l1_offset;
390
    extent->l1_backup_table_offset = l1_backup_offset;
391
    extent->l1_size = l1_size;
392
    extent->l1_entry_sectors = l2_size * cluster_sectors;
393
    extent->l2_size = l2_size;
394
    extent->cluster_sectors = cluster_sectors;
395

    
396
    if (s->num_extents > 1) {
397
        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
398
    } else {
399
        extent->end_sector = extent->sectors;
400
    }
401
    bs->total_sectors = extent->end_sector;
402
    return extent;
403
}
404

    
405
static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
406
{
407
    int ret;
408
    int l1_size, i;
409

    
410
    /* read the L1 table */
411
    l1_size = extent->l1_size * sizeof(uint32_t);
412
    extent->l1_table = g_malloc(l1_size);
413
    ret = bdrv_pread(extent->file,
414
                    extent->l1_table_offset,
415
                    extent->l1_table,
416
                    l1_size);
417
    if (ret < 0) {
418
        goto fail_l1;
419
    }
420
    for (i = 0; i < extent->l1_size; i++) {
421
        le32_to_cpus(&extent->l1_table[i]);
422
    }
423

    
424
    if (extent->l1_backup_table_offset) {
425
        extent->l1_backup_table = g_malloc(l1_size);
426
        ret = bdrv_pread(extent->file,
427
                        extent->l1_backup_table_offset,
428
                        extent->l1_backup_table,
429
                        l1_size);
430
        if (ret < 0) {
431
            goto fail_l1b;
432
        }
433
        for (i = 0; i < extent->l1_size; i++) {
434
            le32_to_cpus(&extent->l1_backup_table[i]);
435
        }
436
    }
437

    
438
    extent->l2_cache =
439
        g_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
440
    return 0;
441
 fail_l1b:
442
    g_free(extent->l1_backup_table);
443
 fail_l1:
444
    g_free(extent->l1_table);
445
    return ret;
446
}
447

    
448
static int vmdk_open_vmdk3(BlockDriverState *bs,
449
                           BlockDriverState *file,
450
                           int flags)
451
{
452
    int ret;
453
    uint32_t magic;
454
    VMDK3Header header;
455
    VmdkExtent *extent;
456

    
457
    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
458
    if (ret < 0) {
459
        return ret;
460
    }
461
    extent = vmdk_add_extent(bs,
462
                             bs->file, false,
463
                             le32_to_cpu(header.disk_sectors),
464
                             le32_to_cpu(header.l1dir_offset) << 9,
465
                             0, 1 << 6, 1 << 9,
466
                             le32_to_cpu(header.granularity));
467
    ret = vmdk_init_tables(bs, extent);
468
    if (ret) {
469
        /* free extent allocated by vmdk_add_extent */
470
        vmdk_free_last_extent(bs);
471
    }
472
    return ret;
473
}
474

    
475
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
476
                               int64_t desc_offset);
477

    
478
static int vmdk_open_vmdk4(BlockDriverState *bs,
479
                           BlockDriverState *file,
480
                           int flags)
481
{
482
    int ret;
483
    uint32_t magic;
484
    uint32_t l1_size, l1_entry_sectors;
485
    VMDK4Header header;
486
    VmdkExtent *extent;
487
    int64_t l1_backup_offset = 0;
488

    
489
    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
490
    if (ret < 0) {
491
        return ret;
492
    }
493
    if (header.capacity == 0 && header.desc_offset) {
494
        return vmdk_open_desc_file(bs, flags, header.desc_offset << 9);
495
    }
496

    
497
    if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
498
        /*
499
         * The footer takes precedence over the header, so read it in. The
500
         * footer starts at offset -1024 from the end: One sector for the
501
         * footer, and another one for the end-of-stream marker.
502
         */
503
        struct {
504
            struct {
505
                uint64_t val;
506
                uint32_t size;
507
                uint32_t type;
508
                uint8_t pad[512 - 16];
509
            } QEMU_PACKED footer_marker;
510

    
511
            uint32_t magic;
512
            VMDK4Header header;
513
            uint8_t pad[512 - 4 - sizeof(VMDK4Header)];
514

    
515
            struct {
516
                uint64_t val;
517
                uint32_t size;
518
                uint32_t type;
519
                uint8_t pad[512 - 16];
520
            } QEMU_PACKED eos_marker;
521
        } QEMU_PACKED footer;
522

    
523
        ret = bdrv_pread(file,
524
            bs->file->total_sectors * 512 - 1536,
525
            &footer, sizeof(footer));
526
        if (ret < 0) {
527
            return ret;
528
        }
529

    
530
        /* Some sanity checks for the footer */
531
        if (be32_to_cpu(footer.magic) != VMDK4_MAGIC ||
532
            le32_to_cpu(footer.footer_marker.size) != 0  ||
533
            le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER ||
534
            le64_to_cpu(footer.eos_marker.val) != 0  ||
535
            le32_to_cpu(footer.eos_marker.size) != 0  ||
536
            le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
537
        {
538
            return -EINVAL;
539
        }
540

    
541
        header = footer.header;
542
    }
543

    
544
    l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
545
                        * le64_to_cpu(header.granularity);
546
    if (l1_entry_sectors == 0) {
547
        return -EINVAL;
548
    }
549
    l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
550
                / l1_entry_sectors;
551
    if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
552
        l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
553
    }
554
    extent = vmdk_add_extent(bs, file, false,
555
                          le64_to_cpu(header.capacity),
556
                          le64_to_cpu(header.gd_offset) << 9,
557
                          l1_backup_offset,
558
                          l1_size,
559
                          le32_to_cpu(header.num_gtes_per_gte),
560
                          le64_to_cpu(header.granularity));
561
    extent->compressed =
562
        le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
563
    extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
564
    ret = vmdk_init_tables(bs, extent);
565
    if (ret) {
566
        /* free extent allocated by vmdk_add_extent */
567
        vmdk_free_last_extent(bs);
568
    }
569
    return ret;
570
}
571

    
572
/* find an option value out of descriptor file */
573
static int vmdk_parse_description(const char *desc, const char *opt_name,
574
        char *buf, int buf_size)
575
{
576
    char *opt_pos, *opt_end;
577
    const char *end = desc + strlen(desc);
578

    
579
    opt_pos = strstr(desc, opt_name);
580
    if (!opt_pos) {
581
        return -1;
582
    }
583
    /* Skip "=\"" following opt_name */
584
    opt_pos += strlen(opt_name) + 2;
585
    if (opt_pos >= end) {
586
        return -1;
587
    }
588
    opt_end = opt_pos;
589
    while (opt_end < end && *opt_end != '"') {
590
        opt_end++;
591
    }
592
    if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
593
        return -1;
594
    }
595
    pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
596
    return 0;
597
}
598

    
599
/* Open an extent file and append to bs array */
600
static int vmdk_open_sparse(BlockDriverState *bs,
601
                            BlockDriverState *file,
602
                            int flags)
603
{
604
    uint32_t magic;
605

    
606
    if (bdrv_pread(file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
607
        return -EIO;
608
    }
609

    
610
    magic = be32_to_cpu(magic);
611
    switch (magic) {
612
        case VMDK3_MAGIC:
613
            return vmdk_open_vmdk3(bs, file, flags);
614
            break;
615
        case VMDK4_MAGIC:
616
            return vmdk_open_vmdk4(bs, file, flags);
617
            break;
618
        default:
619
            return -EINVAL;
620
            break;
621
    }
622
}
623

    
624
static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
625
        const char *desc_file_path)
626
{
627
    int ret;
628
    char access[11];
629
    char type[11];
630
    char fname[512];
631
    const char *p = desc;
632
    int64_t sectors = 0;
633
    int64_t flat_offset;
634
    char extent_path[PATH_MAX];
635
    BlockDriverState *extent_file;
636

    
637
    while (*p) {
638
        /* parse extent line:
639
         * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
640
         * or
641
         * RW [size in sectors] SPARSE "file-name.vmdk"
642
         */
643
        flat_offset = -1;
644
        ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64,
645
                access, &sectors, type, fname, &flat_offset);
646
        if (ret < 4 || strcmp(access, "RW")) {
647
            goto next_line;
648
        } else if (!strcmp(type, "FLAT")) {
649
            if (ret != 5 || flat_offset < 0) {
650
                return -EINVAL;
651
            }
652
        } else if (ret != 4) {
653
            return -EINVAL;
654
        }
655

    
656
        /* trim the quotation marks around */
657
        if (fname[0] == '"') {
658
            memmove(fname, fname + 1, strlen(fname));
659
            if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') {
660
                return -EINVAL;
661
            }
662
            fname[strlen(fname) - 1] = '\0';
663
        }
664
        if (sectors <= 0 ||
665
            (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
666
            (strcmp(access, "RW"))) {
667
            goto next_line;
668
        }
669

    
670
        path_combine(extent_path, sizeof(extent_path),
671
                desc_file_path, fname);
672
        ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags);
673
        if (ret) {
674
            return ret;
675
        }
676

    
677
        /* save to extents array */
678
        if (!strcmp(type, "FLAT")) {
679
            /* FLAT extent */
680
            VmdkExtent *extent;
681

    
682
            extent = vmdk_add_extent(bs, extent_file, true, sectors,
683
                            0, 0, 0, 0, sectors);
684
            extent->flat_start_offset = flat_offset << 9;
685
        } else if (!strcmp(type, "SPARSE")) {
686
            /* SPARSE extent */
687
            ret = vmdk_open_sparse(bs, extent_file, bs->open_flags);
688
            if (ret) {
689
                bdrv_delete(extent_file);
690
                return ret;
691
            }
692
        } else {
693
            fprintf(stderr,
694
                "VMDK: Not supported extent type \"%s\""".\n", type);
695
            return -ENOTSUP;
696
        }
697
next_line:
698
        /* move to next line */
699
        while (*p && *p != '\n') {
700
            p++;
701
        }
702
        p++;
703
    }
704
    return 0;
705
}
706

    
707
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
708
                               int64_t desc_offset)
709
{
710
    int ret;
711
    char buf[2048];
712
    char ct[128];
713
    BDRVVmdkState *s = bs->opaque;
714

    
715
    ret = bdrv_pread(bs->file, desc_offset, buf, sizeof(buf));
716
    if (ret < 0) {
717
        return ret;
718
    }
719
    buf[2047] = '\0';
720
    if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
721
        return -EINVAL;
722
    }
723
    if (strcmp(ct, "monolithicFlat") &&
724
        strcmp(ct, "twoGbMaxExtentSparse") &&
725
        strcmp(ct, "twoGbMaxExtentFlat")) {
726
        fprintf(stderr,
727
                "VMDK: Not supported image type \"%s\""".\n", ct);
728
        return -ENOTSUP;
729
    }
730
    s->desc_offset = 0;
731
    return vmdk_parse_extents(buf, bs, bs->file->filename);
732
}
733

    
734
static int vmdk_open(BlockDriverState *bs, int flags)
735
{
736
    int ret;
737
    BDRVVmdkState *s = bs->opaque;
738

    
739
    if (vmdk_open_sparse(bs, bs->file, flags) == 0) {
740
        s->desc_offset = 0x200;
741
    } else {
742
        ret = vmdk_open_desc_file(bs, flags, 0);
743
        if (ret) {
744
            goto fail;
745
        }
746
    }
747
    /* try to open parent images, if exist */
748
    ret = vmdk_parent_open(bs);
749
    if (ret) {
750
        goto fail;
751
    }
752
    s->parent_cid = vmdk_read_cid(bs, 1);
753
    qemu_co_mutex_init(&s->lock);
754

    
755
    /* Disable migration when VMDK images are used */
756
    error_set(&s->migration_blocker,
757
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
758
              "vmdk", bs->device_name, "live migration");
759
    migrate_add_blocker(s->migration_blocker);
760

    
761
    return 0;
762

    
763
fail:
764
    vmdk_free_extents(bs);
765
    return ret;
766
}
767

    
768
static int get_whole_cluster(BlockDriverState *bs,
769
                VmdkExtent *extent,
770
                uint64_t cluster_offset,
771
                uint64_t offset,
772
                bool allocate)
773
{
774
    /* 128 sectors * 512 bytes each = grain size 64KB */
775
    uint8_t  whole_grain[extent->cluster_sectors * 512];
776

    
777
    /* we will be here if it's first write on non-exist grain(cluster).
778
     * try to read from parent image, if exist */
779
    if (bs->backing_hd) {
780
        int ret;
781

    
782
        if (!vmdk_is_cid_valid(bs)) {
783
            return -1;
784
        }
785

    
786
        /* floor offset to cluster */
787
        offset -= offset % (extent->cluster_sectors * 512);
788
        ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
789
                extent->cluster_sectors);
790
        if (ret < 0) {
791
            return -1;
792
        }
793

    
794
        /* Write grain only into the active image */
795
        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
796
                extent->cluster_sectors);
797
        if (ret < 0) {
798
            return -1;
799
        }
800
    }
801
    return 0;
802
}
803

    
804
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
805
{
806
    /* update L2 table */
807
    if (bdrv_pwrite_sync(
808
                extent->file,
809
                ((int64_t)m_data->l2_offset * 512)
810
                    + (m_data->l2_index * sizeof(m_data->offset)),
811
                &(m_data->offset),
812
                sizeof(m_data->offset)
813
            ) < 0) {
814
        return -1;
815
    }
816
    /* update backup L2 table */
817
    if (extent->l1_backup_table_offset != 0) {
818
        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
819
        if (bdrv_pwrite_sync(
820
                    extent->file,
821
                    ((int64_t)m_data->l2_offset * 512)
822
                        + (m_data->l2_index * sizeof(m_data->offset)),
823
                    &(m_data->offset), sizeof(m_data->offset)
824
                ) < 0) {
825
            return -1;
826
        }
827
    }
828

    
829
    return 0;
830
}
831

    
832
static int get_cluster_offset(BlockDriverState *bs,
833
                                    VmdkExtent *extent,
834
                                    VmdkMetaData *m_data,
835
                                    uint64_t offset,
836
                                    int allocate,
837
                                    uint64_t *cluster_offset)
838
{
839
    unsigned int l1_index, l2_offset, l2_index;
840
    int min_index, i, j;
841
    uint32_t min_count, *l2_table, tmp = 0;
842

    
843
    if (m_data) {
844
        m_data->valid = 0;
845
    }
846
    if (extent->flat) {
847
        *cluster_offset = extent->flat_start_offset;
848
        return 0;
849
    }
850

    
851
    offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
852
    l1_index = (offset >> 9) / extent->l1_entry_sectors;
853
    if (l1_index >= extent->l1_size) {
854
        return -1;
855
    }
856
    l2_offset = extent->l1_table[l1_index];
857
    if (!l2_offset) {
858
        return -1;
859
    }
860
    for (i = 0; i < L2_CACHE_SIZE; i++) {
861
        if (l2_offset == extent->l2_cache_offsets[i]) {
862
            /* increment the hit count */
863
            if (++extent->l2_cache_counts[i] == 0xffffffff) {
864
                for (j = 0; j < L2_CACHE_SIZE; j++) {
865
                    extent->l2_cache_counts[j] >>= 1;
866
                }
867
            }
868
            l2_table = extent->l2_cache + (i * extent->l2_size);
869
            goto found;
870
        }
871
    }
872
    /* not found: load a new entry in the least used one */
873
    min_index = 0;
874
    min_count = 0xffffffff;
875
    for (i = 0; i < L2_CACHE_SIZE; i++) {
876
        if (extent->l2_cache_counts[i] < min_count) {
877
            min_count = extent->l2_cache_counts[i];
878
            min_index = i;
879
        }
880
    }
881
    l2_table = extent->l2_cache + (min_index * extent->l2_size);
882
    if (bdrv_pread(
883
                extent->file,
884
                (int64_t)l2_offset * 512,
885
                l2_table,
886
                extent->l2_size * sizeof(uint32_t)
887
            ) != extent->l2_size * sizeof(uint32_t)) {
888
        return -1;
889
    }
890

    
891
    extent->l2_cache_offsets[min_index] = l2_offset;
892
    extent->l2_cache_counts[min_index] = 1;
893
 found:
894
    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
895
    *cluster_offset = le32_to_cpu(l2_table[l2_index]);
896

    
897
    if (!*cluster_offset) {
898
        if (!allocate) {
899
            return -1;
900
        }
901

    
902
        /* Avoid the L2 tables update for the images that have snapshots. */
903
        *cluster_offset = bdrv_getlength(extent->file);
904
        if (!extent->compressed) {
905
            bdrv_truncate(
906
                extent->file,
907
                *cluster_offset + (extent->cluster_sectors << 9)
908
            );
909
        }
910

    
911
        *cluster_offset >>= 9;
912
        tmp = cpu_to_le32(*cluster_offset);
913
        l2_table[l2_index] = tmp;
914

    
915
        /* First of all we write grain itself, to avoid race condition
916
         * that may to corrupt the image.
917
         * This problem may occur because of insufficient space on host disk
918
         * or inappropriate VM shutdown.
919
         */
920
        if (get_whole_cluster(
921
                bs, extent, *cluster_offset, offset, allocate) == -1) {
922
            return -1;
923
        }
924

    
925
        if (m_data) {
926
            m_data->offset = tmp;
927
            m_data->l1_index = l1_index;
928
            m_data->l2_index = l2_index;
929
            m_data->l2_offset = l2_offset;
930
            m_data->valid = 1;
931
        }
932
    }
933
    *cluster_offset <<= 9;
934
    return 0;
935
}
936

    
937
static VmdkExtent *find_extent(BDRVVmdkState *s,
938
                                int64_t sector_num, VmdkExtent *start_hint)
939
{
940
    VmdkExtent *extent = start_hint;
941

    
942
    if (!extent) {
943
        extent = &s->extents[0];
944
    }
945
    while (extent < &s->extents[s->num_extents]) {
946
        if (sector_num < extent->end_sector) {
947
            return extent;
948
        }
949
        extent++;
950
    }
951
    return NULL;
952
}
953

    
954
static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
955
        int64_t sector_num, int nb_sectors, int *pnum)
956
{
957
    BDRVVmdkState *s = bs->opaque;
958
    int64_t index_in_cluster, n, ret;
959
    uint64_t offset;
960
    VmdkExtent *extent;
961

    
962
    extent = find_extent(s, sector_num, NULL);
963
    if (!extent) {
964
        return 0;
965
    }
966
    qemu_co_mutex_lock(&s->lock);
967
    ret = get_cluster_offset(bs, extent, NULL,
968
                            sector_num * 512, 0, &offset);
969
    qemu_co_mutex_unlock(&s->lock);
970
    /* get_cluster_offset returning 0 means success */
971
    ret = !ret;
972

    
973
    index_in_cluster = sector_num % extent->cluster_sectors;
974
    n = extent->cluster_sectors - index_in_cluster;
975
    if (n > nb_sectors) {
976
        n = nb_sectors;
977
    }
978
    *pnum = n;
979
    return ret;
980
}
981

    
982
static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
983
                            int64_t offset_in_cluster, const uint8_t *buf,
984
                            int nb_sectors, int64_t sector_num)
985
{
986
    int ret;
987
    VmdkGrainMarker *data = NULL;
988
    uLongf buf_len;
989
    const uint8_t *write_buf = buf;
990
    int write_len = nb_sectors * 512;
991

    
992
    if (extent->compressed) {
993
        if (!extent->has_marker) {
994
            ret = -EINVAL;
995
            goto out;
996
        }
997
        buf_len = (extent->cluster_sectors << 9) * 2;
998
        data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
999
        if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
1000
                buf_len == 0) {
1001
            ret = -EINVAL;
1002
            goto out;
1003
        }
1004
        data->lba = sector_num;
1005
        data->size = buf_len;
1006
        write_buf = (uint8_t *)data;
1007
        write_len = buf_len + sizeof(VmdkGrainMarker);
1008
    }
1009
    ret = bdrv_pwrite(extent->file,
1010
                        cluster_offset + offset_in_cluster,
1011
                        write_buf,
1012
                        write_len);
1013
    if (ret != write_len) {
1014
        ret = ret < 0 ? ret : -EIO;
1015
        goto out;
1016
    }
1017
    ret = 0;
1018
 out:
1019
    g_free(data);
1020
    return ret;
1021
}
1022

    
1023
static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
1024
                            int64_t offset_in_cluster, uint8_t *buf,
1025
                            int nb_sectors)
1026
{
1027
    int ret;
1028
    int cluster_bytes, buf_bytes;
1029
    uint8_t *cluster_buf, *compressed_data;
1030
    uint8_t *uncomp_buf;
1031
    uint32_t data_len;
1032
    VmdkGrainMarker *marker;
1033
    uLongf buf_len;
1034

    
1035

    
1036
    if (!extent->compressed) {
1037
        ret = bdrv_pread(extent->file,
1038
                          cluster_offset + offset_in_cluster,
1039
                          buf, nb_sectors * 512);
1040
        if (ret == nb_sectors * 512) {
1041
            return 0;
1042
        } else {
1043
            return -EIO;
1044
        }
1045
    }
1046
    cluster_bytes = extent->cluster_sectors * 512;
1047
    /* Read two clusters in case GrainMarker + compressed data > one cluster */
1048
    buf_bytes = cluster_bytes * 2;
1049
    cluster_buf = g_malloc(buf_bytes);
1050
    uncomp_buf = g_malloc(cluster_bytes);
1051
    ret = bdrv_pread(extent->file,
1052
                cluster_offset,
1053
                cluster_buf, buf_bytes);
1054
    if (ret < 0) {
1055
        goto out;
1056
    }
1057
    compressed_data = cluster_buf;
1058
    buf_len = cluster_bytes;
1059
    data_len = cluster_bytes;
1060
    if (extent->has_marker) {
1061
        marker = (VmdkGrainMarker *)cluster_buf;
1062
        compressed_data = marker->data;
1063
        data_len = le32_to_cpu(marker->size);
1064
    }
1065
    if (!data_len || data_len > buf_bytes) {
1066
        ret = -EINVAL;
1067
        goto out;
1068
    }
1069
    ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len);
1070
    if (ret != Z_OK) {
1071
        ret = -EINVAL;
1072
        goto out;
1073

    
1074
    }
1075
    if (offset_in_cluster < 0 ||
1076
            offset_in_cluster + nb_sectors * 512 > buf_len) {
1077
        ret = -EINVAL;
1078
        goto out;
1079
    }
1080
    memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512);
1081
    ret = 0;
1082

    
1083
 out:
1084
    g_free(uncomp_buf);
1085
    g_free(cluster_buf);
1086
    return ret;
1087
}
1088

    
1089
static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
1090
                    uint8_t *buf, int nb_sectors)
1091
{
1092
    BDRVVmdkState *s = bs->opaque;
1093
    int ret;
1094
    uint64_t n, index_in_cluster;
1095
    VmdkExtent *extent = NULL;
1096
    uint64_t cluster_offset;
1097

    
1098
    while (nb_sectors > 0) {
1099
        extent = find_extent(s, sector_num, extent);
1100
        if (!extent) {
1101
            return -EIO;
1102
        }
1103
        ret = get_cluster_offset(
1104
                            bs, extent, NULL,
1105
                            sector_num << 9, 0, &cluster_offset);
1106
        index_in_cluster = sector_num % extent->cluster_sectors;
1107
        n = extent->cluster_sectors - index_in_cluster;
1108
        if (n > nb_sectors) {
1109
            n = nb_sectors;
1110
        }
1111
        if (ret) {
1112
            /* if not allocated, try to read from parent image, if exist */
1113
            if (bs->backing_hd) {
1114
                if (!vmdk_is_cid_valid(bs)) {
1115
                    return -EINVAL;
1116
                }
1117
                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
1118
                if (ret < 0) {
1119
                    return ret;
1120
                }
1121
            } else {
1122
                memset(buf, 0, 512 * n);
1123
            }
1124
        } else {
1125
            ret = vmdk_read_extent(extent,
1126
                            cluster_offset, index_in_cluster * 512,
1127
                            buf, n);
1128
            if (ret) {
1129
                return ret;
1130
            }
1131
        }
1132
        nb_sectors -= n;
1133
        sector_num += n;
1134
        buf += n * 512;
1135
    }
1136
    return 0;
1137
}
1138

    
1139
static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
1140
                                     uint8_t *buf, int nb_sectors)
1141
{
1142
    int ret;
1143
    BDRVVmdkState *s = bs->opaque;
1144
    qemu_co_mutex_lock(&s->lock);
1145
    ret = vmdk_read(bs, sector_num, buf, nb_sectors);
1146
    qemu_co_mutex_unlock(&s->lock);
1147
    return ret;
1148
}
1149

    
1150
static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
1151
                     const uint8_t *buf, int nb_sectors)
1152
{
1153
    BDRVVmdkState *s = bs->opaque;
1154
    VmdkExtent *extent = NULL;
1155
    int n, ret;
1156
    int64_t index_in_cluster;
1157
    uint64_t cluster_offset;
1158
    VmdkMetaData m_data;
1159

    
1160
    if (sector_num > bs->total_sectors) {
1161
        fprintf(stderr,
1162
                "(VMDK) Wrong offset: sector_num=0x%" PRIx64
1163
                " total_sectors=0x%" PRIx64 "\n",
1164
                sector_num, bs->total_sectors);
1165
        return -EIO;
1166
    }
1167

    
1168
    while (nb_sectors > 0) {
1169
        extent = find_extent(s, sector_num, extent);
1170
        if (!extent) {
1171
            return -EIO;
1172
        }
1173
        ret = get_cluster_offset(
1174
                                bs,
1175
                                extent,
1176
                                &m_data,
1177
                                sector_num << 9, !extent->compressed,
1178
                                &cluster_offset);
1179
        if (extent->compressed) {
1180
            if (ret == 0) {
1181
                /* Refuse write to allocated cluster for streamOptimized */
1182
                fprintf(stderr,
1183
                        "VMDK: can't write to allocated cluster"
1184
                        " for streamOptimized\n");
1185
                return -EIO;
1186
            } else {
1187
                /* allocate */
1188
                ret = get_cluster_offset(
1189
                                        bs,
1190
                                        extent,
1191
                                        &m_data,
1192
                                        sector_num << 9, 1,
1193
                                        &cluster_offset);
1194
            }
1195
        }
1196
        if (ret) {
1197
            return -EINVAL;
1198
        }
1199
        index_in_cluster = sector_num % extent->cluster_sectors;
1200
        n = extent->cluster_sectors - index_in_cluster;
1201
        if (n > nb_sectors) {
1202
            n = nb_sectors;
1203
        }
1204

    
1205
        ret = vmdk_write_extent(extent,
1206
                        cluster_offset, index_in_cluster * 512,
1207
                        buf, n, sector_num);
1208
        if (ret) {
1209
            return ret;
1210
        }
1211
        if (m_data.valid) {
1212
            /* update L2 tables */
1213
            if (vmdk_L2update(extent, &m_data) == -1) {
1214
                return -EIO;
1215
            }
1216
        }
1217
        nb_sectors -= n;
1218
        sector_num += n;
1219
        buf += n * 512;
1220

    
1221
        /* update CID on the first write every time the virtual disk is
1222
         * opened */
1223
        if (!s->cid_updated) {
1224
            ret = vmdk_write_cid(bs, time(NULL));
1225
            if (ret < 0) {
1226
                return ret;
1227
            }
1228
            s->cid_updated = true;
1229
        }
1230
    }
1231
    return 0;
1232
}
1233

    
1234
static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
1235
                                      const uint8_t *buf, int nb_sectors)
1236
{
1237
    int ret;
1238
    BDRVVmdkState *s = bs->opaque;
1239
    qemu_co_mutex_lock(&s->lock);
1240
    ret = vmdk_write(bs, sector_num, buf, nb_sectors);
1241
    qemu_co_mutex_unlock(&s->lock);
1242
    return ret;
1243
}
1244

    
1245

    
1246
static int vmdk_create_extent(const char *filename, int64_t filesize,
1247
                              bool flat, bool compress)
1248
{
1249
    int ret, i;
1250
    int fd = 0;
1251
    VMDK4Header header;
1252
    uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
1253

    
1254
    fd = qemu_open(filename,
1255
                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1256
                   0644);
1257
    if (fd < 0) {
1258
        return -errno;
1259
    }
1260
    if (flat) {
1261
        ret = ftruncate(fd, filesize);
1262
        if (ret < 0) {
1263
            ret = -errno;
1264
        }
1265
        goto exit;
1266
    }
1267
    magic = cpu_to_be32(VMDK4_MAGIC);
1268
    memset(&header, 0, sizeof(header));
1269
    header.version = 1;
1270
    header.flags =
1271
        3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0);
1272
    header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
1273
    header.capacity = filesize / 512;
1274
    header.granularity = 128;
1275
    header.num_gtes_per_gte = 512;
1276

    
1277
    grains = (filesize / 512 + header.granularity - 1) / header.granularity;
1278
    gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
1279
    gt_count =
1280
        (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
1281
    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
1282

    
1283
    header.desc_offset = 1;
1284
    header.desc_size = 20;
1285
    header.rgd_offset = header.desc_offset + header.desc_size;
1286
    header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
1287
    header.grain_offset =
1288
       ((header.gd_offset + gd_size + (gt_size * gt_count) +
1289
         header.granularity - 1) / header.granularity) *
1290
        header.granularity;
1291
    /* swap endianness for all header fields */
1292
    header.version = cpu_to_le32(header.version);
1293
    header.flags = cpu_to_le32(header.flags);
1294
    header.capacity = cpu_to_le64(header.capacity);
1295
    header.granularity = cpu_to_le64(header.granularity);
1296
    header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
1297
    header.desc_offset = cpu_to_le64(header.desc_offset);
1298
    header.desc_size = cpu_to_le64(header.desc_size);
1299
    header.rgd_offset = cpu_to_le64(header.rgd_offset);
1300
    header.gd_offset = cpu_to_le64(header.gd_offset);
1301
    header.grain_offset = cpu_to_le64(header.grain_offset);
1302
    header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
1303

    
1304
    header.check_bytes[0] = 0xa;
1305
    header.check_bytes[1] = 0x20;
1306
    header.check_bytes[2] = 0xd;
1307
    header.check_bytes[3] = 0xa;
1308

    
1309
    /* write all the data */
1310
    ret = qemu_write_full(fd, &magic, sizeof(magic));
1311
    if (ret != sizeof(magic)) {
1312
        ret = -errno;
1313
        goto exit;
1314
    }
1315
    ret = qemu_write_full(fd, &header, sizeof(header));
1316
    if (ret != sizeof(header)) {
1317
        ret = -errno;
1318
        goto exit;
1319
    }
1320

    
1321
    ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
1322
    if (ret < 0) {
1323
        ret = -errno;
1324
        goto exit;
1325
    }
1326

    
1327
    /* write grain directory */
1328
    lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
1329
    for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
1330
         i < gt_count; i++, tmp += gt_size) {
1331
        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1332
        if (ret != sizeof(tmp)) {
1333
            ret = -errno;
1334
            goto exit;
1335
        }
1336
    }
1337

    
1338
    /* write backup grain directory */
1339
    lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
1340
    for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
1341
         i < gt_count; i++, tmp += gt_size) {
1342
        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1343
        if (ret != sizeof(tmp)) {
1344
            ret = -errno;
1345
            goto exit;
1346
        }
1347
    }
1348

    
1349
    ret = 0;
1350
 exit:
1351
    qemu_close(fd);
1352
    return ret;
1353
}
1354

    
1355
static int filename_decompose(const char *filename, char *path, char *prefix,
1356
        char *postfix, size_t buf_len)
1357
{
1358
    const char *p, *q;
1359

    
1360
    if (filename == NULL || !strlen(filename)) {
1361
        fprintf(stderr, "Vmdk: no filename provided.\n");
1362
        return -1;
1363
    }
1364
    p = strrchr(filename, '/');
1365
    if (p == NULL) {
1366
        p = strrchr(filename, '\\');
1367
    }
1368
    if (p == NULL) {
1369
        p = strrchr(filename, ':');
1370
    }
1371
    if (p != NULL) {
1372
        p++;
1373
        if (p - filename >= buf_len) {
1374
            return -1;
1375
        }
1376
        pstrcpy(path, p - filename + 1, filename);
1377
    } else {
1378
        p = filename;
1379
        path[0] = '\0';
1380
    }
1381
    q = strrchr(p, '.');
1382
    if (q == NULL) {
1383
        pstrcpy(prefix, buf_len, p);
1384
        postfix[0] = '\0';
1385
    } else {
1386
        if (q - p >= buf_len) {
1387
            return -1;
1388
        }
1389
        pstrcpy(prefix, q - p + 1, p);
1390
        pstrcpy(postfix, buf_len, q);
1391
    }
1392
    return 0;
1393
}
1394

    
1395
static int relative_path(char *dest, int dest_size,
1396
        const char *base, const char *target)
1397
{
1398
    int i = 0;
1399
    int n = 0;
1400
    const char *p, *q;
1401
#ifdef _WIN32
1402
    const char *sep = "\\";
1403
#else
1404
    const char *sep = "/";
1405
#endif
1406

    
1407
    if (!(dest && base && target)) {
1408
        return -1;
1409
    }
1410
    if (path_is_absolute(target)) {
1411
        dest[dest_size - 1] = '\0';
1412
        strncpy(dest, target, dest_size - 1);
1413
        return 0;
1414
    }
1415
    while (base[i] == target[i]) {
1416
        i++;
1417
    }
1418
    p = &base[i];
1419
    q = &target[i];
1420
    while (*p) {
1421
        if (*p == *sep) {
1422
            n++;
1423
        }
1424
        p++;
1425
    }
1426
    dest[0] = '\0';
1427
    for (; n; n--) {
1428
        pstrcat(dest, dest_size, "..");
1429
        pstrcat(dest, dest_size, sep);
1430
    }
1431
    pstrcat(dest, dest_size, q);
1432
    return 0;
1433
}
1434

    
1435
static int vmdk_create(const char *filename, QEMUOptionParameter *options)
1436
{
1437
    int fd, idx = 0;
1438
    char desc[BUF_SIZE];
1439
    int64_t total_size = 0, filesize;
1440
    const char *backing_file = NULL;
1441
    const char *fmt = NULL;
1442
    int flags = 0;
1443
    int ret = 0;
1444
    bool flat, split, compress;
1445
    char ext_desc_lines[BUF_SIZE] = "";
1446
    char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX];
1447
    const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
1448
    const char *desc_extent_line;
1449
    char parent_desc_line[BUF_SIZE] = "";
1450
    uint32_t parent_cid = 0xffffffff;
1451
    const char desc_template[] =
1452
        "# Disk DescriptorFile\n"
1453
        "version=1\n"
1454
        "CID=%x\n"
1455
        "parentCID=%x\n"
1456
        "createType=\"%s\"\n"
1457
        "%s"
1458
        "\n"
1459
        "# Extent description\n"
1460
        "%s"
1461
        "\n"
1462
        "# The Disk Data Base\n"
1463
        "#DDB\n"
1464
        "\n"
1465
        "ddb.virtualHWVersion = \"%d\"\n"
1466
        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
1467
        "ddb.geometry.heads = \"16\"\n"
1468
        "ddb.geometry.sectors = \"63\"\n"
1469
        "ddb.adapterType = \"ide\"\n";
1470

    
1471
    if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) {
1472
        return -EINVAL;
1473
    }
1474
    /* Read out options */
1475
    while (options && options->name) {
1476
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1477
            total_size = options->value.n;
1478
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
1479
            backing_file = options->value.s;
1480
        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
1481
            flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
1482
        } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
1483
            fmt = options->value.s;
1484
        }
1485
        options++;
1486
    }
1487
    if (!fmt) {
1488
        /* Default format to monolithicSparse */
1489
        fmt = "monolithicSparse";
1490
    } else if (strcmp(fmt, "monolithicFlat") &&
1491
               strcmp(fmt, "monolithicSparse") &&
1492
               strcmp(fmt, "twoGbMaxExtentSparse") &&
1493
               strcmp(fmt, "twoGbMaxExtentFlat") &&
1494
               strcmp(fmt, "streamOptimized")) {
1495
        fprintf(stderr, "VMDK: Unknown subformat: %s\n", fmt);
1496
        return -EINVAL;
1497
    }
1498
    split = !(strcmp(fmt, "twoGbMaxExtentFlat") &&
1499
              strcmp(fmt, "twoGbMaxExtentSparse"));
1500
    flat = !(strcmp(fmt, "monolithicFlat") &&
1501
             strcmp(fmt, "twoGbMaxExtentFlat"));
1502
    compress = !strcmp(fmt, "streamOptimized");
1503
    if (flat) {
1504
        desc_extent_line = "RW %lld FLAT \"%s\" 0\n";
1505
    } else {
1506
        desc_extent_line = "RW %lld SPARSE \"%s\"\n";
1507
    }
1508
    if (flat && backing_file) {
1509
        /* not supporting backing file for flat image */
1510
        return -ENOTSUP;
1511
    }
1512
    if (backing_file) {
1513
        char parent_filename[PATH_MAX];
1514
        BlockDriverState *bs = bdrv_new("");
1515
        ret = bdrv_open(bs, backing_file, 0, NULL);
1516
        if (ret != 0) {
1517
            bdrv_delete(bs);
1518
            return ret;
1519
        }
1520
        if (strcmp(bs->drv->format_name, "vmdk")) {
1521
            bdrv_delete(bs);
1522
            return -EINVAL;
1523
        }
1524
        parent_cid = vmdk_read_cid(bs, 0);
1525
        bdrv_delete(bs);
1526
        relative_path(parent_filename, sizeof(parent_filename),
1527
                      filename, backing_file);
1528
        snprintf(parent_desc_line, sizeof(parent_desc_line),
1529
                "parentFileNameHint=\"%s\"", parent_filename);
1530
    }
1531

    
1532
    /* Create extents */
1533
    filesize = total_size;
1534
    while (filesize > 0) {
1535
        char desc_line[BUF_SIZE];
1536
        char ext_filename[PATH_MAX];
1537
        char desc_filename[PATH_MAX];
1538
        int64_t size = filesize;
1539

    
1540
        if (split && size > split_size) {
1541
            size = split_size;
1542
        }
1543
        if (split) {
1544
            snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s",
1545
                    prefix, flat ? 'f' : 's', ++idx, postfix);
1546
        } else if (flat) {
1547
            snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s",
1548
                    prefix, postfix);
1549
        } else {
1550
            snprintf(desc_filename, sizeof(desc_filename), "%s%s",
1551
                    prefix, postfix);
1552
        }
1553
        snprintf(ext_filename, sizeof(ext_filename), "%s%s",
1554
                path, desc_filename);
1555

    
1556
        if (vmdk_create_extent(ext_filename, size, flat, compress)) {
1557
            return -EINVAL;
1558
        }
1559
        filesize -= size;
1560

    
1561
        /* Format description line */
1562
        snprintf(desc_line, sizeof(desc_line),
1563
                    desc_extent_line, size / 512, desc_filename);
1564
        pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line);
1565
    }
1566
    /* generate descriptor file */
1567
    snprintf(desc, sizeof(desc), desc_template,
1568
            (unsigned int)time(NULL),
1569
            parent_cid,
1570
            fmt,
1571
            parent_desc_line,
1572
            ext_desc_lines,
1573
            (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
1574
            total_size / (int64_t)(63 * 16 * 512));
1575
    if (split || flat) {
1576
        fd = qemu_open(filename,
1577
                       O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1578
                       0644);
1579
    } else {
1580
        fd = qemu_open(filename,
1581
                       O_WRONLY | O_BINARY | O_LARGEFILE,
1582
                       0644);
1583
    }
1584
    if (fd < 0) {
1585
        return -errno;
1586
    }
1587
    /* the descriptor offset = 0x200 */
1588
    if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) {
1589
        ret = -errno;
1590
        goto exit;
1591
    }
1592
    ret = qemu_write_full(fd, desc, strlen(desc));
1593
    if (ret != strlen(desc)) {
1594
        ret = -errno;
1595
        goto exit;
1596
    }
1597
    ret = 0;
1598
exit:
1599
    qemu_close(fd);
1600
    return ret;
1601
}
1602

    
1603
static void vmdk_close(BlockDriverState *bs)
1604
{
1605
    BDRVVmdkState *s = bs->opaque;
1606

    
1607
    vmdk_free_extents(bs);
1608

    
1609
    migrate_del_blocker(s->migration_blocker);
1610
    error_free(s->migration_blocker);
1611
}
1612

    
1613
static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
1614
{
1615
    BDRVVmdkState *s = bs->opaque;
1616
    int i, err;
1617
    int ret = 0;
1618

    
1619
    for (i = 0; i < s->num_extents; i++) {
1620
        err = bdrv_co_flush(s->extents[i].file);
1621
        if (err < 0) {
1622
            ret = err;
1623
        }
1624
    }
1625
    return ret;
1626
}
1627

    
1628
static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
1629
{
1630
    int i;
1631
    int64_t ret = 0;
1632
    int64_t r;
1633
    BDRVVmdkState *s = bs->opaque;
1634

    
1635
    ret = bdrv_get_allocated_file_size(bs->file);
1636
    if (ret < 0) {
1637
        return ret;
1638
    }
1639
    for (i = 0; i < s->num_extents; i++) {
1640
        if (s->extents[i].file == bs->file) {
1641
            continue;
1642
        }
1643
        r = bdrv_get_allocated_file_size(s->extents[i].file);
1644
        if (r < 0) {
1645
            return r;
1646
        }
1647
        ret += r;
1648
    }
1649
    return ret;
1650
}
1651

    
1652
static QEMUOptionParameter vmdk_create_options[] = {
1653
    {
1654
        .name = BLOCK_OPT_SIZE,
1655
        .type = OPT_SIZE,
1656
        .help = "Virtual disk size"
1657
    },
1658
    {
1659
        .name = BLOCK_OPT_BACKING_FILE,
1660
        .type = OPT_STRING,
1661
        .help = "File name of a base image"
1662
    },
1663
    {
1664
        .name = BLOCK_OPT_COMPAT6,
1665
        .type = OPT_FLAG,
1666
        .help = "VMDK version 6 image"
1667
    },
1668
    {
1669
        .name = BLOCK_OPT_SUBFMT,
1670
        .type = OPT_STRING,
1671
        .help =
1672
            "VMDK flat extent format, can be one of "
1673
            "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
1674
    },
1675
    { NULL }
1676
};
1677

    
1678
static BlockDriver bdrv_vmdk = {
1679
    .format_name    = "vmdk",
1680
    .instance_size  = sizeof(BDRVVmdkState),
1681
    .bdrv_probe     = vmdk_probe,
1682
    .bdrv_open      = vmdk_open,
1683
    .bdrv_reopen_prepare = vmdk_reopen_prepare,
1684
    .bdrv_read      = vmdk_co_read,
1685
    .bdrv_write     = vmdk_co_write,
1686
    .bdrv_close     = vmdk_close,
1687
    .bdrv_create    = vmdk_create,
1688
    .bdrv_co_flush_to_disk  = vmdk_co_flush,
1689
    .bdrv_co_is_allocated   = vmdk_co_is_allocated,
1690
    .bdrv_get_allocated_file_size  = vmdk_get_allocated_file_size,
1691

    
1692
    .create_options = vmdk_create_options,
1693
};
1694

    
1695
static void bdrv_vmdk_init(void)
1696
{
1697
    bdrv_register(&bdrv_vmdk);
1698
}
1699

    
1700
block_init(bdrv_vmdk_init);