Statistics
| Branch: | Revision:

root / block / vmdk.c @ 2e1e79da

History | View | Annotate | Download (45.8 kB)

1
/*
2
 * Block driver for the VMDK format
3
 *
4
 * Copyright (c) 2004 Fabrice Bellard
5
 * Copyright (c) 2005 Filip Navara
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy
8
 * of this software and associated documentation files (the "Software"), to deal
9
 * in the Software without restriction, including without limitation the rights
10
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the Software is
12
 * furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
 * THE SOFTWARE.
24
 */
25

    
26
#include "qemu-common.h"
27
#include "block_int.h"
28
#include "module.h"
29
#include "migration.h"
30
#include <zlib.h>
31

    
32
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
33
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
34
#define VMDK4_COMPRESSION_DEFLATE 1
35
#define VMDK4_FLAG_RGD (1 << 1)
36
#define VMDK4_FLAG_COMPRESS (1 << 16)
37
#define VMDK4_FLAG_MARKER (1 << 17)
38

    
39
typedef struct {
40
    uint32_t version;
41
    uint32_t flags;
42
    uint32_t disk_sectors;
43
    uint32_t granularity;
44
    uint32_t l1dir_offset;
45
    uint32_t l1dir_size;
46
    uint32_t file_sectors;
47
    uint32_t cylinders;
48
    uint32_t heads;
49
    uint32_t sectors_per_track;
50
} VMDK3Header;
51

    
52
typedef struct {
53
    uint32_t version;
54
    uint32_t flags;
55
    int64_t capacity;
56
    int64_t granularity;
57
    int64_t desc_offset;
58
    int64_t desc_size;
59
    int32_t num_gtes_per_gte;
60
    int64_t gd_offset;
61
    int64_t rgd_offset;
62
    int64_t grain_offset;
63
    char filler[1];
64
    char check_bytes[4];
65
    uint16_t compressAlgorithm;
66
} QEMU_PACKED VMDK4Header;
67

    
68
#define L2_CACHE_SIZE 16
69

    
70
typedef struct VmdkExtent {
71
    BlockDriverState *file;
72
    bool flat;
73
    bool compressed;
74
    bool has_marker;
75
    int64_t sectors;
76
    int64_t end_sector;
77
    int64_t flat_start_offset;
78
    int64_t l1_table_offset;
79
    int64_t l1_backup_table_offset;
80
    uint32_t *l1_table;
81
    uint32_t *l1_backup_table;
82
    unsigned int l1_size;
83
    uint32_t l1_entry_sectors;
84

    
85
    unsigned int l2_size;
86
    uint32_t *l2_cache;
87
    uint32_t l2_cache_offsets[L2_CACHE_SIZE];
88
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
89

    
90
    unsigned int cluster_sectors;
91
} VmdkExtent;
92

    
93
typedef struct BDRVVmdkState {
94
    CoMutex lock;
95
    int desc_offset;
96
    bool cid_updated;
97
    uint32_t parent_cid;
98
    int num_extents;
99
    /* Extent array with num_extents entries, ascend ordered by address */
100
    VmdkExtent *extents;
101
    Error *migration_blocker;
102
} BDRVVmdkState;
103

    
104
typedef struct VmdkMetaData {
105
    uint32_t offset;
106
    unsigned int l1_index;
107
    unsigned int l2_index;
108
    unsigned int l2_offset;
109
    int valid;
110
} VmdkMetaData;
111

    
112
typedef struct VmdkGrainMarker {
113
    uint64_t lba;
114
    uint32_t size;
115
    uint8_t  data[0];
116
} VmdkGrainMarker;
117

    
118
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
119
{
120
    uint32_t magic;
121

    
122
    if (buf_size < 4) {
123
        return 0;
124
    }
125
    magic = be32_to_cpu(*(uint32_t *)buf);
126
    if (magic == VMDK3_MAGIC ||
127
        magic == VMDK4_MAGIC) {
128
        return 100;
129
    } else {
130
        const char *p = (const char *)buf;
131
        const char *end = p + buf_size;
132
        while (p < end) {
133
            if (*p == '#') {
134
                /* skip comment line */
135
                while (p < end && *p != '\n') {
136
                    p++;
137
                }
138
                p++;
139
                continue;
140
            }
141
            if (*p == ' ') {
142
                while (p < end && *p == ' ') {
143
                    p++;
144
                }
145
                /* skip '\r' if windows line endings used. */
146
                if (p < end && *p == '\r') {
147
                    p++;
148
                }
149
                /* only accept blank lines before 'version=' line */
150
                if (p == end || *p != '\n') {
151
                    return 0;
152
                }
153
                p++;
154
                continue;
155
            }
156
            if (end - p >= strlen("version=X\n")) {
157
                if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
158
                    strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
159
                    return 100;
160
                }
161
            }
162
            if (end - p >= strlen("version=X\r\n")) {
163
                if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
164
                    strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
165
                    return 100;
166
                }
167
            }
168
            return 0;
169
        }
170
        return 0;
171
    }
172
}
173

    
174
#define CHECK_CID 1
175

    
176
#define SECTOR_SIZE 512
177
#define DESC_SIZE (20 * SECTOR_SIZE)    /* 20 sectors of 512 bytes each */
178
#define BUF_SIZE 4096
179
#define HEADER_SIZE 512                 /* first sector of 512 bytes */
180

    
181
static void vmdk_free_extents(BlockDriverState *bs)
182
{
183
    int i;
184
    BDRVVmdkState *s = bs->opaque;
185
    VmdkExtent *e;
186

    
187
    for (i = 0; i < s->num_extents; i++) {
188
        e = &s->extents[i];
189
        g_free(e->l1_table);
190
        g_free(e->l2_cache);
191
        g_free(e->l1_backup_table);
192
        if (e->file != bs->file) {
193
            bdrv_delete(e->file);
194
        }
195
    }
196
    g_free(s->extents);
197
}
198

    
199
static void vmdk_free_last_extent(BlockDriverState *bs)
200
{
201
    BDRVVmdkState *s = bs->opaque;
202

    
203
    if (s->num_extents == 0) {
204
        return;
205
    }
206
    s->num_extents--;
207
    s->extents = g_realloc(s->extents, s->num_extents * sizeof(VmdkExtent));
208
}
209

    
210
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
211
{
212
    char desc[DESC_SIZE];
213
    uint32_t cid = 0xffffffff;
214
    const char *p_name, *cid_str;
215
    size_t cid_str_size;
216
    BDRVVmdkState *s = bs->opaque;
217
    int ret;
218

    
219
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
220
    if (ret < 0) {
221
        return 0;
222
    }
223

    
224
    if (parent) {
225
        cid_str = "parentCID";
226
        cid_str_size = sizeof("parentCID");
227
    } else {
228
        cid_str = "CID";
229
        cid_str_size = sizeof("CID");
230
    }
231

    
232
    desc[DESC_SIZE - 1] = '\0';
233
    p_name = strstr(desc, cid_str);
234
    if (p_name != NULL) {
235
        p_name += cid_str_size;
236
        sscanf(p_name, "%x", &cid);
237
    }
238

    
239
    return cid;
240
}
241

    
242
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
243
{
244
    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
245
    char *p_name, *tmp_str;
246
    BDRVVmdkState *s = bs->opaque;
247
    int ret;
248

    
249
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
250
    if (ret < 0) {
251
        return ret;
252
    }
253

    
254
    desc[DESC_SIZE - 1] = '\0';
255
    tmp_str = strstr(desc, "parentCID");
256
    if (tmp_str == NULL) {
257
        return -EINVAL;
258
    }
259

    
260
    pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
261
    p_name = strstr(desc, "CID");
262
    if (p_name != NULL) {
263
        p_name += sizeof("CID");
264
        snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
265
        pstrcat(desc, sizeof(desc), tmp_desc);
266
    }
267

    
268
    ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
269
    if (ret < 0) {
270
        return ret;
271
    }
272

    
273
    return 0;
274
}
275

    
276
static int vmdk_is_cid_valid(BlockDriverState *bs)
277
{
278
#ifdef CHECK_CID
279
    BDRVVmdkState *s = bs->opaque;
280
    BlockDriverState *p_bs = bs->backing_hd;
281
    uint32_t cur_pcid;
282

    
283
    if (p_bs) {
284
        cur_pcid = vmdk_read_cid(p_bs, 0);
285
        if (s->parent_cid != cur_pcid) {
286
            /* CID not valid */
287
            return 0;
288
        }
289
    }
290
#endif
291
    /* CID valid */
292
    return 1;
293
}
294

    
295
static int vmdk_parent_open(BlockDriverState *bs)
296
{
297
    char *p_name;
298
    char desc[DESC_SIZE + 1];
299
    BDRVVmdkState *s = bs->opaque;
300
    int ret;
301

    
302
    desc[DESC_SIZE] = '\0';
303
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
304
    if (ret < 0) {
305
        return ret;
306
    }
307

    
308
    p_name = strstr(desc, "parentFileNameHint");
309
    if (p_name != NULL) {
310
        char *end_name;
311

    
312
        p_name += sizeof("parentFileNameHint") + 1;
313
        end_name = strchr(p_name, '\"');
314
        if (end_name == NULL) {
315
            return -EINVAL;
316
        }
317
        if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
318
            return -EINVAL;
319
        }
320

    
321
        pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
322
    }
323

    
324
    return 0;
325
}
326

    
327
/* Create and append extent to the extent array. Return the added VmdkExtent
328
 * address. return NULL if allocation failed. */
329
static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
330
                           BlockDriverState *file, bool flat, int64_t sectors,
331
                           int64_t l1_offset, int64_t l1_backup_offset,
332
                           uint32_t l1_size,
333
                           int l2_size, unsigned int cluster_sectors)
334
{
335
    VmdkExtent *extent;
336
    BDRVVmdkState *s = bs->opaque;
337

    
338
    s->extents = g_realloc(s->extents,
339
                              (s->num_extents + 1) * sizeof(VmdkExtent));
340
    extent = &s->extents[s->num_extents];
341
    s->num_extents++;
342

    
343
    memset(extent, 0, sizeof(VmdkExtent));
344
    extent->file = file;
345
    extent->flat = flat;
346
    extent->sectors = sectors;
347
    extent->l1_table_offset = l1_offset;
348
    extent->l1_backup_table_offset = l1_backup_offset;
349
    extent->l1_size = l1_size;
350
    extent->l1_entry_sectors = l2_size * cluster_sectors;
351
    extent->l2_size = l2_size;
352
    extent->cluster_sectors = cluster_sectors;
353

    
354
    if (s->num_extents > 1) {
355
        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
356
    } else {
357
        extent->end_sector = extent->sectors;
358
    }
359
    bs->total_sectors = extent->end_sector;
360
    return extent;
361
}
362

    
363
static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
364
{
365
    int ret;
366
    int l1_size, i;
367

    
368
    /* read the L1 table */
369
    l1_size = extent->l1_size * sizeof(uint32_t);
370
    extent->l1_table = g_malloc(l1_size);
371
    ret = bdrv_pread(extent->file,
372
                    extent->l1_table_offset,
373
                    extent->l1_table,
374
                    l1_size);
375
    if (ret < 0) {
376
        goto fail_l1;
377
    }
378
    for (i = 0; i < extent->l1_size; i++) {
379
        le32_to_cpus(&extent->l1_table[i]);
380
    }
381

    
382
    if (extent->l1_backup_table_offset) {
383
        extent->l1_backup_table = g_malloc(l1_size);
384
        ret = bdrv_pread(extent->file,
385
                        extent->l1_backup_table_offset,
386
                        extent->l1_backup_table,
387
                        l1_size);
388
        if (ret < 0) {
389
            goto fail_l1b;
390
        }
391
        for (i = 0; i < extent->l1_size; i++) {
392
            le32_to_cpus(&extent->l1_backup_table[i]);
393
        }
394
    }
395

    
396
    extent->l2_cache =
397
        g_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
398
    return 0;
399
 fail_l1b:
400
    g_free(extent->l1_backup_table);
401
 fail_l1:
402
    g_free(extent->l1_table);
403
    return ret;
404
}
405

    
406
static int vmdk_open_vmdk3(BlockDriverState *bs,
407
                           BlockDriverState *file,
408
                           int flags)
409
{
410
    int ret;
411
    uint32_t magic;
412
    VMDK3Header header;
413
    VmdkExtent *extent;
414

    
415
    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
416
    if (ret < 0) {
417
        return ret;
418
    }
419
    extent = vmdk_add_extent(bs,
420
                             bs->file, false,
421
                             le32_to_cpu(header.disk_sectors),
422
                             le32_to_cpu(header.l1dir_offset) << 9,
423
                             0, 1 << 6, 1 << 9,
424
                             le32_to_cpu(header.granularity));
425
    ret = vmdk_init_tables(bs, extent);
426
    if (ret) {
427
        /* free extent allocated by vmdk_add_extent */
428
        vmdk_free_last_extent(bs);
429
    }
430
    return ret;
431
}
432

    
433
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
434
                               int64_t desc_offset);
435

    
436
static int vmdk_open_vmdk4(BlockDriverState *bs,
437
                           BlockDriverState *file,
438
                           int flags)
439
{
440
    int ret;
441
    uint32_t magic;
442
    uint32_t l1_size, l1_entry_sectors;
443
    VMDK4Header header;
444
    VmdkExtent *extent;
445
    int64_t l1_backup_offset = 0;
446

    
447
    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
448
    if (ret < 0) {
449
        return ret;
450
    }
451
    if (header.capacity == 0 && header.desc_offset) {
452
        return vmdk_open_desc_file(bs, flags, header.desc_offset << 9);
453
    }
454
    l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
455
                        * le64_to_cpu(header.granularity);
456
    if (l1_entry_sectors == 0) {
457
        return -EINVAL;
458
    }
459
    l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
460
                / l1_entry_sectors;
461
    if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
462
        l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
463
    }
464
    extent = vmdk_add_extent(bs, file, false,
465
                          le64_to_cpu(header.capacity),
466
                          le64_to_cpu(header.gd_offset) << 9,
467
                          l1_backup_offset,
468
                          l1_size,
469
                          le32_to_cpu(header.num_gtes_per_gte),
470
                          le64_to_cpu(header.granularity));
471
    extent->compressed =
472
        le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
473
    extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
474
    ret = vmdk_init_tables(bs, extent);
475
    if (ret) {
476
        /* free extent allocated by vmdk_add_extent */
477
        vmdk_free_last_extent(bs);
478
    }
479
    return ret;
480
}
481

    
482
/* find an option value out of descriptor file */
483
static int vmdk_parse_description(const char *desc, const char *opt_name,
484
        char *buf, int buf_size)
485
{
486
    char *opt_pos, *opt_end;
487
    const char *end = desc + strlen(desc);
488

    
489
    opt_pos = strstr(desc, opt_name);
490
    if (!opt_pos) {
491
        return -1;
492
    }
493
    /* Skip "=\"" following opt_name */
494
    opt_pos += strlen(opt_name) + 2;
495
    if (opt_pos >= end) {
496
        return -1;
497
    }
498
    opt_end = opt_pos;
499
    while (opt_end < end && *opt_end != '"') {
500
        opt_end++;
501
    }
502
    if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
503
        return -1;
504
    }
505
    pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
506
    return 0;
507
}
508

    
509
/* Open an extent file and append to bs array */
510
static int vmdk_open_sparse(BlockDriverState *bs,
511
                            BlockDriverState *file,
512
                            int flags)
513
{
514
    uint32_t magic;
515

    
516
    if (bdrv_pread(file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
517
        return -EIO;
518
    }
519

    
520
    magic = be32_to_cpu(magic);
521
    switch (magic) {
522
        case VMDK3_MAGIC:
523
            return vmdk_open_vmdk3(bs, file, flags);
524
            break;
525
        case VMDK4_MAGIC:
526
            return vmdk_open_vmdk4(bs, file, flags);
527
            break;
528
        default:
529
            return -EINVAL;
530
            break;
531
    }
532
}
533

    
534
static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
535
        const char *desc_file_path)
536
{
537
    int ret;
538
    char access[11];
539
    char type[11];
540
    char fname[512];
541
    const char *p = desc;
542
    int64_t sectors = 0;
543
    int64_t flat_offset;
544
    char extent_path[PATH_MAX];
545
    BlockDriverState *extent_file;
546

    
547
    while (*p) {
548
        /* parse extent line:
549
         * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
550
         * or
551
         * RW [size in sectors] SPARSE "file-name.vmdk"
552
         */
553
        flat_offset = -1;
554
        ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64,
555
                access, &sectors, type, fname, &flat_offset);
556
        if (ret < 4 || strcmp(access, "RW")) {
557
            goto next_line;
558
        } else if (!strcmp(type, "FLAT")) {
559
            if (ret != 5 || flat_offset < 0) {
560
                return -EINVAL;
561
            }
562
        } else if (ret != 4) {
563
            return -EINVAL;
564
        }
565

    
566
        /* trim the quotation marks around */
567
        if (fname[0] == '"') {
568
            memmove(fname, fname + 1, strlen(fname));
569
            if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') {
570
                return -EINVAL;
571
            }
572
            fname[strlen(fname) - 1] = '\0';
573
        }
574
        if (sectors <= 0 ||
575
            (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
576
            (strcmp(access, "RW"))) {
577
            goto next_line;
578
        }
579

    
580
        path_combine(extent_path, sizeof(extent_path),
581
                desc_file_path, fname);
582
        ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags);
583
        if (ret) {
584
            return ret;
585
        }
586

    
587
        /* save to extents array */
588
        if (!strcmp(type, "FLAT")) {
589
            /* FLAT extent */
590
            VmdkExtent *extent;
591

    
592
            extent = vmdk_add_extent(bs, extent_file, true, sectors,
593
                            0, 0, 0, 0, sectors);
594
            extent->flat_start_offset = flat_offset << 9;
595
        } else if (!strcmp(type, "SPARSE")) {
596
            /* SPARSE extent */
597
            ret = vmdk_open_sparse(bs, extent_file, bs->open_flags);
598
            if (ret) {
599
                bdrv_delete(extent_file);
600
                return ret;
601
            }
602
        } else {
603
            fprintf(stderr,
604
                "VMDK: Not supported extent type \"%s\""".\n", type);
605
            return -ENOTSUP;
606
        }
607
next_line:
608
        /* move to next line */
609
        while (*p && *p != '\n') {
610
            p++;
611
        }
612
        p++;
613
    }
614
    return 0;
615
}
616

    
617
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
618
                               int64_t desc_offset)
619
{
620
    int ret;
621
    char buf[2048];
622
    char ct[128];
623
    BDRVVmdkState *s = bs->opaque;
624

    
625
    ret = bdrv_pread(bs->file, desc_offset, buf, sizeof(buf));
626
    if (ret < 0) {
627
        return ret;
628
    }
629
    buf[2047] = '\0';
630
    if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
631
        return -EINVAL;
632
    }
633
    if (strcmp(ct, "monolithicFlat") &&
634
        strcmp(ct, "twoGbMaxExtentSparse") &&
635
        strcmp(ct, "twoGbMaxExtentFlat")) {
636
        fprintf(stderr,
637
                "VMDK: Not supported image type \"%s\""".\n", ct);
638
        return -ENOTSUP;
639
    }
640
    s->desc_offset = 0;
641
    return vmdk_parse_extents(buf, bs, bs->file->filename);
642
}
643

    
644
static int vmdk_open(BlockDriverState *bs, int flags)
645
{
646
    int ret;
647
    BDRVVmdkState *s = bs->opaque;
648

    
649
    if (vmdk_open_sparse(bs, bs->file, flags) == 0) {
650
        s->desc_offset = 0x200;
651
    } else {
652
        ret = vmdk_open_desc_file(bs, flags, 0);
653
        if (ret) {
654
            goto fail;
655
        }
656
    }
657
    /* try to open parent images, if exist */
658
    ret = vmdk_parent_open(bs);
659
    if (ret) {
660
        goto fail;
661
    }
662
    s->parent_cid = vmdk_read_cid(bs, 1);
663
    qemu_co_mutex_init(&s->lock);
664

    
665
    /* Disable migration when VMDK images are used */
666
    error_set(&s->migration_blocker,
667
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
668
              "vmdk", bs->device_name, "live migration");
669
    migrate_add_blocker(s->migration_blocker);
670

    
671
    return 0;
672

    
673
fail:
674
    vmdk_free_extents(bs);
675
    return ret;
676
}
677

    
678
static int get_whole_cluster(BlockDriverState *bs,
679
                VmdkExtent *extent,
680
                uint64_t cluster_offset,
681
                uint64_t offset,
682
                bool allocate)
683
{
684
    /* 128 sectors * 512 bytes each = grain size 64KB */
685
    uint8_t  whole_grain[extent->cluster_sectors * 512];
686

    
687
    /* we will be here if it's first write on non-exist grain(cluster).
688
     * try to read from parent image, if exist */
689
    if (bs->backing_hd) {
690
        int ret;
691

    
692
        if (!vmdk_is_cid_valid(bs)) {
693
            return -1;
694
        }
695

    
696
        /* floor offset to cluster */
697
        offset -= offset % (extent->cluster_sectors * 512);
698
        ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
699
                extent->cluster_sectors);
700
        if (ret < 0) {
701
            return -1;
702
        }
703

    
704
        /* Write grain only into the active image */
705
        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
706
                extent->cluster_sectors);
707
        if (ret < 0) {
708
            return -1;
709
        }
710
    }
711
    return 0;
712
}
713

    
714
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
715
{
716
    /* update L2 table */
717
    if (bdrv_pwrite_sync(
718
                extent->file,
719
                ((int64_t)m_data->l2_offset * 512)
720
                    + (m_data->l2_index * sizeof(m_data->offset)),
721
                &(m_data->offset),
722
                sizeof(m_data->offset)
723
            ) < 0) {
724
        return -1;
725
    }
726
    /* update backup L2 table */
727
    if (extent->l1_backup_table_offset != 0) {
728
        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
729
        if (bdrv_pwrite_sync(
730
                    extent->file,
731
                    ((int64_t)m_data->l2_offset * 512)
732
                        + (m_data->l2_index * sizeof(m_data->offset)),
733
                    &(m_data->offset), sizeof(m_data->offset)
734
                ) < 0) {
735
            return -1;
736
        }
737
    }
738

    
739
    return 0;
740
}
741

    
742
static int get_cluster_offset(BlockDriverState *bs,
743
                                    VmdkExtent *extent,
744
                                    VmdkMetaData *m_data,
745
                                    uint64_t offset,
746
                                    int allocate,
747
                                    uint64_t *cluster_offset)
748
{
749
    unsigned int l1_index, l2_offset, l2_index;
750
    int min_index, i, j;
751
    uint32_t min_count, *l2_table, tmp = 0;
752

    
753
    if (m_data) {
754
        m_data->valid = 0;
755
    }
756
    if (extent->flat) {
757
        *cluster_offset = extent->flat_start_offset;
758
        return 0;
759
    }
760

    
761
    offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
762
    l1_index = (offset >> 9) / extent->l1_entry_sectors;
763
    if (l1_index >= extent->l1_size) {
764
        return -1;
765
    }
766
    l2_offset = extent->l1_table[l1_index];
767
    if (!l2_offset) {
768
        return -1;
769
    }
770
    for (i = 0; i < L2_CACHE_SIZE; i++) {
771
        if (l2_offset == extent->l2_cache_offsets[i]) {
772
            /* increment the hit count */
773
            if (++extent->l2_cache_counts[i] == 0xffffffff) {
774
                for (j = 0; j < L2_CACHE_SIZE; j++) {
775
                    extent->l2_cache_counts[j] >>= 1;
776
                }
777
            }
778
            l2_table = extent->l2_cache + (i * extent->l2_size);
779
            goto found;
780
        }
781
    }
782
    /* not found: load a new entry in the least used one */
783
    min_index = 0;
784
    min_count = 0xffffffff;
785
    for (i = 0; i < L2_CACHE_SIZE; i++) {
786
        if (extent->l2_cache_counts[i] < min_count) {
787
            min_count = extent->l2_cache_counts[i];
788
            min_index = i;
789
        }
790
    }
791
    l2_table = extent->l2_cache + (min_index * extent->l2_size);
792
    if (bdrv_pread(
793
                extent->file,
794
                (int64_t)l2_offset * 512,
795
                l2_table,
796
                extent->l2_size * sizeof(uint32_t)
797
            ) != extent->l2_size * sizeof(uint32_t)) {
798
        return -1;
799
    }
800

    
801
    extent->l2_cache_offsets[min_index] = l2_offset;
802
    extent->l2_cache_counts[min_index] = 1;
803
 found:
804
    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
805
    *cluster_offset = le32_to_cpu(l2_table[l2_index]);
806

    
807
    if (!*cluster_offset) {
808
        if (!allocate) {
809
            return -1;
810
        }
811

    
812
        /* Avoid the L2 tables update for the images that have snapshots. */
813
        *cluster_offset = bdrv_getlength(extent->file);
814
        if (!extent->compressed) {
815
            bdrv_truncate(
816
                extent->file,
817
                *cluster_offset + (extent->cluster_sectors << 9)
818
            );
819
        }
820

    
821
        *cluster_offset >>= 9;
822
        tmp = cpu_to_le32(*cluster_offset);
823
        l2_table[l2_index] = tmp;
824

    
825
        /* First of all we write grain itself, to avoid race condition
826
         * that may to corrupt the image.
827
         * This problem may occur because of insufficient space on host disk
828
         * or inappropriate VM shutdown.
829
         */
830
        if (get_whole_cluster(
831
                bs, extent, *cluster_offset, offset, allocate) == -1) {
832
            return -1;
833
        }
834

    
835
        if (m_data) {
836
            m_data->offset = tmp;
837
            m_data->l1_index = l1_index;
838
            m_data->l2_index = l2_index;
839
            m_data->l2_offset = l2_offset;
840
            m_data->valid = 1;
841
        }
842
    }
843
    *cluster_offset <<= 9;
844
    return 0;
845
}
846

    
847
static VmdkExtent *find_extent(BDRVVmdkState *s,
848
                                int64_t sector_num, VmdkExtent *start_hint)
849
{
850
    VmdkExtent *extent = start_hint;
851

    
852
    if (!extent) {
853
        extent = &s->extents[0];
854
    }
855
    while (extent < &s->extents[s->num_extents]) {
856
        if (sector_num < extent->end_sector) {
857
            return extent;
858
        }
859
        extent++;
860
    }
861
    return NULL;
862
}
863

    
864
static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
865
        int64_t sector_num, int nb_sectors, int *pnum)
866
{
867
    BDRVVmdkState *s = bs->opaque;
868
    int64_t index_in_cluster, n, ret;
869
    uint64_t offset;
870
    VmdkExtent *extent;
871

    
872
    extent = find_extent(s, sector_num, NULL);
873
    if (!extent) {
874
        return 0;
875
    }
876
    qemu_co_mutex_lock(&s->lock);
877
    ret = get_cluster_offset(bs, extent, NULL,
878
                            sector_num * 512, 0, &offset);
879
    qemu_co_mutex_unlock(&s->lock);
880
    /* get_cluster_offset returning 0 means success */
881
    ret = !ret;
882

    
883
    index_in_cluster = sector_num % extent->cluster_sectors;
884
    n = extent->cluster_sectors - index_in_cluster;
885
    if (n > nb_sectors) {
886
        n = nb_sectors;
887
    }
888
    *pnum = n;
889
    return ret;
890
}
891

    
892
static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
893
                            int64_t offset_in_cluster, const uint8_t *buf,
894
                            int nb_sectors, int64_t sector_num)
895
{
896
    int ret;
897
    VmdkGrainMarker *data = NULL;
898
    uLongf buf_len;
899
    const uint8_t *write_buf = buf;
900
    int write_len = nb_sectors * 512;
901

    
902
    if (extent->compressed) {
903
        if (!extent->has_marker) {
904
            ret = -EINVAL;
905
            goto out;
906
        }
907
        buf_len = (extent->cluster_sectors << 9) * 2;
908
        data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
909
        if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
910
                buf_len == 0) {
911
            ret = -EINVAL;
912
            goto out;
913
        }
914
        data->lba = sector_num;
915
        data->size = buf_len;
916
        write_buf = (uint8_t *)data;
917
        write_len = buf_len + sizeof(VmdkGrainMarker);
918
    }
919
    ret = bdrv_pwrite(extent->file,
920
                        cluster_offset + offset_in_cluster,
921
                        write_buf,
922
                        write_len);
923
    if (ret != write_len) {
924
        ret = ret < 0 ? ret : -EIO;
925
        goto out;
926
    }
927
    ret = 0;
928
 out:
929
    g_free(data);
930
    return ret;
931
}
932

    
933
static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
934
                            int64_t offset_in_cluster, uint8_t *buf,
935
                            int nb_sectors)
936
{
937
    int ret;
938
    int cluster_bytes, buf_bytes;
939
    uint8_t *cluster_buf, *compressed_data;
940
    uint8_t *uncomp_buf;
941
    uint32_t data_len;
942
    VmdkGrainMarker *marker;
943
    uLongf buf_len;
944

    
945

    
946
    if (!extent->compressed) {
947
        ret = bdrv_pread(extent->file,
948
                          cluster_offset + offset_in_cluster,
949
                          buf, nb_sectors * 512);
950
        if (ret == nb_sectors * 512) {
951
            return 0;
952
        } else {
953
            return -EIO;
954
        }
955
    }
956
    cluster_bytes = extent->cluster_sectors * 512;
957
    /* Read two clusters in case GrainMarker + compressed data > one cluster */
958
    buf_bytes = cluster_bytes * 2;
959
    cluster_buf = g_malloc(buf_bytes);
960
    uncomp_buf = g_malloc(cluster_bytes);
961
    ret = bdrv_pread(extent->file,
962
                cluster_offset,
963
                cluster_buf, buf_bytes);
964
    if (ret < 0) {
965
        goto out;
966
    }
967
    compressed_data = cluster_buf;
968
    buf_len = cluster_bytes;
969
    data_len = cluster_bytes;
970
    if (extent->has_marker) {
971
        marker = (VmdkGrainMarker *)cluster_buf;
972
        compressed_data = marker->data;
973
        data_len = le32_to_cpu(marker->size);
974
    }
975
    if (!data_len || data_len > buf_bytes) {
976
        ret = -EINVAL;
977
        goto out;
978
    }
979
    ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len);
980
    if (ret != Z_OK) {
981
        ret = -EINVAL;
982
        goto out;
983

    
984
    }
985
    if (offset_in_cluster < 0 ||
986
            offset_in_cluster + nb_sectors * 512 > buf_len) {
987
        ret = -EINVAL;
988
        goto out;
989
    }
990
    memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512);
991
    ret = 0;
992

    
993
 out:
994
    g_free(uncomp_buf);
995
    g_free(cluster_buf);
996
    return ret;
997
}
998

    
999
static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
1000
                    uint8_t *buf, int nb_sectors)
1001
{
1002
    BDRVVmdkState *s = bs->opaque;
1003
    int ret;
1004
    uint64_t n, index_in_cluster;
1005
    VmdkExtent *extent = NULL;
1006
    uint64_t cluster_offset;
1007

    
1008
    while (nb_sectors > 0) {
1009
        extent = find_extent(s, sector_num, extent);
1010
        if (!extent) {
1011
            return -EIO;
1012
        }
1013
        ret = get_cluster_offset(
1014
                            bs, extent, NULL,
1015
                            sector_num << 9, 0, &cluster_offset);
1016
        index_in_cluster = sector_num % extent->cluster_sectors;
1017
        n = extent->cluster_sectors - index_in_cluster;
1018
        if (n > nb_sectors) {
1019
            n = nb_sectors;
1020
        }
1021
        if (ret) {
1022
            /* if not allocated, try to read from parent image, if exist */
1023
            if (bs->backing_hd) {
1024
                if (!vmdk_is_cid_valid(bs)) {
1025
                    return -EINVAL;
1026
                }
1027
                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
1028
                if (ret < 0) {
1029
                    return ret;
1030
                }
1031
            } else {
1032
                memset(buf, 0, 512 * n);
1033
            }
1034
        } else {
1035
            ret = vmdk_read_extent(extent,
1036
                            cluster_offset, index_in_cluster * 512,
1037
                            buf, n);
1038
            if (ret) {
1039
                return ret;
1040
            }
1041
        }
1042
        nb_sectors -= n;
1043
        sector_num += n;
1044
        buf += n * 512;
1045
    }
1046
    return 0;
1047
}
1048

    
1049
static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
1050
                                     uint8_t *buf, int nb_sectors)
1051
{
1052
    int ret;
1053
    BDRVVmdkState *s = bs->opaque;
1054
    qemu_co_mutex_lock(&s->lock);
1055
    ret = vmdk_read(bs, sector_num, buf, nb_sectors);
1056
    qemu_co_mutex_unlock(&s->lock);
1057
    return ret;
1058
}
1059

    
1060
static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
1061
                     const uint8_t *buf, int nb_sectors)
1062
{
1063
    BDRVVmdkState *s = bs->opaque;
1064
    VmdkExtent *extent = NULL;
1065
    int n, ret;
1066
    int64_t index_in_cluster;
1067
    uint64_t cluster_offset;
1068
    VmdkMetaData m_data;
1069

    
1070
    if (sector_num > bs->total_sectors) {
1071
        fprintf(stderr,
1072
                "(VMDK) Wrong offset: sector_num=0x%" PRIx64
1073
                " total_sectors=0x%" PRIx64 "\n",
1074
                sector_num, bs->total_sectors);
1075
        return -EIO;
1076
    }
1077

    
1078
    while (nb_sectors > 0) {
1079
        extent = find_extent(s, sector_num, extent);
1080
        if (!extent) {
1081
            return -EIO;
1082
        }
1083
        ret = get_cluster_offset(
1084
                                bs,
1085
                                extent,
1086
                                &m_data,
1087
                                sector_num << 9, !extent->compressed,
1088
                                &cluster_offset);
1089
        if (extent->compressed) {
1090
            if (ret == 0) {
1091
                /* Refuse write to allocated cluster for streamOptimized */
1092
                fprintf(stderr,
1093
                        "VMDK: can't write to allocated cluster"
1094
                        " for streamOptimized\n");
1095
                return -EIO;
1096
            } else {
1097
                /* allocate */
1098
                ret = get_cluster_offset(
1099
                                        bs,
1100
                                        extent,
1101
                                        &m_data,
1102
                                        sector_num << 9, 1,
1103
                                        &cluster_offset);
1104
            }
1105
        }
1106
        if (ret) {
1107
            return -EINVAL;
1108
        }
1109
        index_in_cluster = sector_num % extent->cluster_sectors;
1110
        n = extent->cluster_sectors - index_in_cluster;
1111
        if (n > nb_sectors) {
1112
            n = nb_sectors;
1113
        }
1114

    
1115
        ret = vmdk_write_extent(extent,
1116
                        cluster_offset, index_in_cluster * 512,
1117
                        buf, n, sector_num);
1118
        if (ret) {
1119
            return ret;
1120
        }
1121
        if (m_data.valid) {
1122
            /* update L2 tables */
1123
            if (vmdk_L2update(extent, &m_data) == -1) {
1124
                return -EIO;
1125
            }
1126
        }
1127
        nb_sectors -= n;
1128
        sector_num += n;
1129
        buf += n * 512;
1130

    
1131
        /* update CID on the first write every time the virtual disk is
1132
         * opened */
1133
        if (!s->cid_updated) {
1134
            ret = vmdk_write_cid(bs, time(NULL));
1135
            if (ret < 0) {
1136
                return ret;
1137
            }
1138
            s->cid_updated = true;
1139
        }
1140
    }
1141
    return 0;
1142
}
1143

    
1144
static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
1145
                                      const uint8_t *buf, int nb_sectors)
1146
{
1147
    int ret;
1148
    BDRVVmdkState *s = bs->opaque;
1149
    qemu_co_mutex_lock(&s->lock);
1150
    ret = vmdk_write(bs, sector_num, buf, nb_sectors);
1151
    qemu_co_mutex_unlock(&s->lock);
1152
    return ret;
1153
}
1154

    
1155

    
1156
static int vmdk_create_extent(const char *filename, int64_t filesize,
1157
                              bool flat, bool compress)
1158
{
1159
    int ret, i;
1160
    int fd = 0;
1161
    VMDK4Header header;
1162
    uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
1163

    
1164
    fd = qemu_open(filename,
1165
                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1166
                   0644);
1167
    if (fd < 0) {
1168
        return -errno;
1169
    }
1170
    if (flat) {
1171
        ret = ftruncate(fd, filesize);
1172
        if (ret < 0) {
1173
            ret = -errno;
1174
        }
1175
        goto exit;
1176
    }
1177
    magic = cpu_to_be32(VMDK4_MAGIC);
1178
    memset(&header, 0, sizeof(header));
1179
    header.version = 1;
1180
    header.flags =
1181
        3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0);
1182
    header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
1183
    header.capacity = filesize / 512;
1184
    header.granularity = 128;
1185
    header.num_gtes_per_gte = 512;
1186

    
1187
    grains = (filesize / 512 + header.granularity - 1) / header.granularity;
1188
    gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
1189
    gt_count =
1190
        (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
1191
    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
1192

    
1193
    header.desc_offset = 1;
1194
    header.desc_size = 20;
1195
    header.rgd_offset = header.desc_offset + header.desc_size;
1196
    header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
1197
    header.grain_offset =
1198
       ((header.gd_offset + gd_size + (gt_size * gt_count) +
1199
         header.granularity - 1) / header.granularity) *
1200
        header.granularity;
1201
    /* swap endianness for all header fields */
1202
    header.version = cpu_to_le32(header.version);
1203
    header.flags = cpu_to_le32(header.flags);
1204
    header.capacity = cpu_to_le64(header.capacity);
1205
    header.granularity = cpu_to_le64(header.granularity);
1206
    header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
1207
    header.desc_offset = cpu_to_le64(header.desc_offset);
1208
    header.desc_size = cpu_to_le64(header.desc_size);
1209
    header.rgd_offset = cpu_to_le64(header.rgd_offset);
1210
    header.gd_offset = cpu_to_le64(header.gd_offset);
1211
    header.grain_offset = cpu_to_le64(header.grain_offset);
1212
    header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
1213

    
1214
    header.check_bytes[0] = 0xa;
1215
    header.check_bytes[1] = 0x20;
1216
    header.check_bytes[2] = 0xd;
1217
    header.check_bytes[3] = 0xa;
1218

    
1219
    /* write all the data */
1220
    ret = qemu_write_full(fd, &magic, sizeof(magic));
1221
    if (ret != sizeof(magic)) {
1222
        ret = -errno;
1223
        goto exit;
1224
    }
1225
    ret = qemu_write_full(fd, &header, sizeof(header));
1226
    if (ret != sizeof(header)) {
1227
        ret = -errno;
1228
        goto exit;
1229
    }
1230

    
1231
    ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
1232
    if (ret < 0) {
1233
        ret = -errno;
1234
        goto exit;
1235
    }
1236

    
1237
    /* write grain directory */
1238
    lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
1239
    for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
1240
         i < gt_count; i++, tmp += gt_size) {
1241
        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1242
        if (ret != sizeof(tmp)) {
1243
            ret = -errno;
1244
            goto exit;
1245
        }
1246
    }
1247

    
1248
    /* write backup grain directory */
1249
    lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
1250
    for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
1251
         i < gt_count; i++, tmp += gt_size) {
1252
        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1253
        if (ret != sizeof(tmp)) {
1254
            ret = -errno;
1255
            goto exit;
1256
        }
1257
    }
1258

    
1259
    ret = 0;
1260
 exit:
1261
    qemu_close(fd);
1262
    return ret;
1263
}
1264

    
1265
static int filename_decompose(const char *filename, char *path, char *prefix,
1266
        char *postfix, size_t buf_len)
1267
{
1268
    const char *p, *q;
1269

    
1270
    if (filename == NULL || !strlen(filename)) {
1271
        fprintf(stderr, "Vmdk: no filename provided.\n");
1272
        return -1;
1273
    }
1274
    p = strrchr(filename, '/');
1275
    if (p == NULL) {
1276
        p = strrchr(filename, '\\');
1277
    }
1278
    if (p == NULL) {
1279
        p = strrchr(filename, ':');
1280
    }
1281
    if (p != NULL) {
1282
        p++;
1283
        if (p - filename >= buf_len) {
1284
            return -1;
1285
        }
1286
        pstrcpy(path, p - filename + 1, filename);
1287
    } else {
1288
        p = filename;
1289
        path[0] = '\0';
1290
    }
1291
    q = strrchr(p, '.');
1292
    if (q == NULL) {
1293
        pstrcpy(prefix, buf_len, p);
1294
        postfix[0] = '\0';
1295
    } else {
1296
        if (q - p >= buf_len) {
1297
            return -1;
1298
        }
1299
        pstrcpy(prefix, q - p + 1, p);
1300
        pstrcpy(postfix, buf_len, q);
1301
    }
1302
    return 0;
1303
}
1304

    
1305
static int relative_path(char *dest, int dest_size,
1306
        const char *base, const char *target)
1307
{
1308
    int i = 0;
1309
    int n = 0;
1310
    const char *p, *q;
1311
#ifdef _WIN32
1312
    const char *sep = "\\";
1313
#else
1314
    const char *sep = "/";
1315
#endif
1316

    
1317
    if (!(dest && base && target)) {
1318
        return -1;
1319
    }
1320
    if (path_is_absolute(target)) {
1321
        dest[dest_size - 1] = '\0';
1322
        strncpy(dest, target, dest_size - 1);
1323
        return 0;
1324
    }
1325
    while (base[i] == target[i]) {
1326
        i++;
1327
    }
1328
    p = &base[i];
1329
    q = &target[i];
1330
    while (*p) {
1331
        if (*p == *sep) {
1332
            n++;
1333
        }
1334
        p++;
1335
    }
1336
    dest[0] = '\0';
1337
    for (; n; n--) {
1338
        pstrcat(dest, dest_size, "..");
1339
        pstrcat(dest, dest_size, sep);
1340
    }
1341
    pstrcat(dest, dest_size, q);
1342
    return 0;
1343
}
1344

    
1345
static int vmdk_create(const char *filename, QEMUOptionParameter *options)
1346
{
1347
    int fd, idx = 0;
1348
    char desc[BUF_SIZE];
1349
    int64_t total_size = 0, filesize;
1350
    const char *backing_file = NULL;
1351
    const char *fmt = NULL;
1352
    int flags = 0;
1353
    int ret = 0;
1354
    bool flat, split, compress;
1355
    char ext_desc_lines[BUF_SIZE] = "";
1356
    char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX];
1357
    const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
1358
    const char *desc_extent_line;
1359
    char parent_desc_line[BUF_SIZE] = "";
1360
    uint32_t parent_cid = 0xffffffff;
1361
    const char desc_template[] =
1362
        "# Disk DescriptorFile\n"
1363
        "version=1\n"
1364
        "CID=%x\n"
1365
        "parentCID=%x\n"
1366
        "createType=\"%s\"\n"
1367
        "%s"
1368
        "\n"
1369
        "# Extent description\n"
1370
        "%s"
1371
        "\n"
1372
        "# The Disk Data Base\n"
1373
        "#DDB\n"
1374
        "\n"
1375
        "ddb.virtualHWVersion = \"%d\"\n"
1376
        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
1377
        "ddb.geometry.heads = \"16\"\n"
1378
        "ddb.geometry.sectors = \"63\"\n"
1379
        "ddb.adapterType = \"ide\"\n";
1380

    
1381
    if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) {
1382
        return -EINVAL;
1383
    }
1384
    /* Read out options */
1385
    while (options && options->name) {
1386
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1387
            total_size = options->value.n;
1388
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
1389
            backing_file = options->value.s;
1390
        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
1391
            flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
1392
        } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
1393
            fmt = options->value.s;
1394
        }
1395
        options++;
1396
    }
1397
    if (!fmt) {
1398
        /* Default format to monolithicSparse */
1399
        fmt = "monolithicSparse";
1400
    } else if (strcmp(fmt, "monolithicFlat") &&
1401
               strcmp(fmt, "monolithicSparse") &&
1402
               strcmp(fmt, "twoGbMaxExtentSparse") &&
1403
               strcmp(fmt, "twoGbMaxExtentFlat") &&
1404
               strcmp(fmt, "streamOptimized")) {
1405
        fprintf(stderr, "VMDK: Unknown subformat: %s\n", fmt);
1406
        return -EINVAL;
1407
    }
1408
    split = !(strcmp(fmt, "twoGbMaxExtentFlat") &&
1409
              strcmp(fmt, "twoGbMaxExtentSparse"));
1410
    flat = !(strcmp(fmt, "monolithicFlat") &&
1411
             strcmp(fmt, "twoGbMaxExtentFlat"));
1412
    compress = !strcmp(fmt, "streamOptimized");
1413
    if (flat) {
1414
        desc_extent_line = "RW %lld FLAT \"%s\" 0\n";
1415
    } else {
1416
        desc_extent_line = "RW %lld SPARSE \"%s\"\n";
1417
    }
1418
    if (flat && backing_file) {
1419
        /* not supporting backing file for flat image */
1420
        return -ENOTSUP;
1421
    }
1422
    if (backing_file) {
1423
        char parent_filename[PATH_MAX];
1424
        BlockDriverState *bs = bdrv_new("");
1425
        ret = bdrv_open(bs, backing_file, 0, NULL);
1426
        if (ret != 0) {
1427
            bdrv_delete(bs);
1428
            return ret;
1429
        }
1430
        if (strcmp(bs->drv->format_name, "vmdk")) {
1431
            bdrv_delete(bs);
1432
            return -EINVAL;
1433
        }
1434
        parent_cid = vmdk_read_cid(bs, 0);
1435
        bdrv_delete(bs);
1436
        relative_path(parent_filename, sizeof(parent_filename),
1437
                      filename, backing_file);
1438
        snprintf(parent_desc_line, sizeof(parent_desc_line),
1439
                "parentFileNameHint=\"%s\"", parent_filename);
1440
    }
1441

    
1442
    /* Create extents */
1443
    filesize = total_size;
1444
    while (filesize > 0) {
1445
        char desc_line[BUF_SIZE];
1446
        char ext_filename[PATH_MAX];
1447
        char desc_filename[PATH_MAX];
1448
        int64_t size = filesize;
1449

    
1450
        if (split && size > split_size) {
1451
            size = split_size;
1452
        }
1453
        if (split) {
1454
            snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s",
1455
                    prefix, flat ? 'f' : 's', ++idx, postfix);
1456
        } else if (flat) {
1457
            snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s",
1458
                    prefix, postfix);
1459
        } else {
1460
            snprintf(desc_filename, sizeof(desc_filename), "%s%s",
1461
                    prefix, postfix);
1462
        }
1463
        snprintf(ext_filename, sizeof(ext_filename), "%s%s",
1464
                path, desc_filename);
1465

    
1466
        if (vmdk_create_extent(ext_filename, size, flat, compress)) {
1467
            return -EINVAL;
1468
        }
1469
        filesize -= size;
1470

    
1471
        /* Format description line */
1472
        snprintf(desc_line, sizeof(desc_line),
1473
                    desc_extent_line, size / 512, desc_filename);
1474
        pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line);
1475
    }
1476
    /* generate descriptor file */
1477
    snprintf(desc, sizeof(desc), desc_template,
1478
            (unsigned int)time(NULL),
1479
            parent_cid,
1480
            fmt,
1481
            parent_desc_line,
1482
            ext_desc_lines,
1483
            (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
1484
            total_size / (int64_t)(63 * 16 * 512));
1485
    if (split || flat) {
1486
        fd = qemu_open(filename,
1487
                       O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1488
                       0644);
1489
    } else {
1490
        fd = qemu_open(filename,
1491
                       O_WRONLY | O_BINARY | O_LARGEFILE,
1492
                       0644);
1493
    }
1494
    if (fd < 0) {
1495
        return -errno;
1496
    }
1497
    /* the descriptor offset = 0x200 */
1498
    if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) {
1499
        ret = -errno;
1500
        goto exit;
1501
    }
1502
    ret = qemu_write_full(fd, desc, strlen(desc));
1503
    if (ret != strlen(desc)) {
1504
        ret = -errno;
1505
        goto exit;
1506
    }
1507
    ret = 0;
1508
exit:
1509
    qemu_close(fd);
1510
    return ret;
1511
}
1512

    
1513
static void vmdk_close(BlockDriverState *bs)
1514
{
1515
    BDRVVmdkState *s = bs->opaque;
1516

    
1517
    vmdk_free_extents(bs);
1518

    
1519
    migrate_del_blocker(s->migration_blocker);
1520
    error_free(s->migration_blocker);
1521
}
1522

    
1523
static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
1524
{
1525
    BDRVVmdkState *s = bs->opaque;
1526
    int i, err;
1527
    int ret = 0;
1528

    
1529
    for (i = 0; i < s->num_extents; i++) {
1530
        err = bdrv_co_flush(s->extents[i].file);
1531
        if (err < 0) {
1532
            ret = err;
1533
        }
1534
    }
1535
    return ret;
1536
}
1537

    
1538
static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
1539
{
1540
    int i;
1541
    int64_t ret = 0;
1542
    int64_t r;
1543
    BDRVVmdkState *s = bs->opaque;
1544

    
1545
    ret = bdrv_get_allocated_file_size(bs->file);
1546
    if (ret < 0) {
1547
        return ret;
1548
    }
1549
    for (i = 0; i < s->num_extents; i++) {
1550
        if (s->extents[i].file == bs->file) {
1551
            continue;
1552
        }
1553
        r = bdrv_get_allocated_file_size(s->extents[i].file);
1554
        if (r < 0) {
1555
            return r;
1556
        }
1557
        ret += r;
1558
    }
1559
    return ret;
1560
}
1561

    
1562
static QEMUOptionParameter vmdk_create_options[] = {
1563
    {
1564
        .name = BLOCK_OPT_SIZE,
1565
        .type = OPT_SIZE,
1566
        .help = "Virtual disk size"
1567
    },
1568
    {
1569
        .name = BLOCK_OPT_BACKING_FILE,
1570
        .type = OPT_STRING,
1571
        .help = "File name of a base image"
1572
    },
1573
    {
1574
        .name = BLOCK_OPT_COMPAT6,
1575
        .type = OPT_FLAG,
1576
        .help = "VMDK version 6 image"
1577
    },
1578
    {
1579
        .name = BLOCK_OPT_SUBFMT,
1580
        .type = OPT_STRING,
1581
        .help =
1582
            "VMDK flat extent format, can be one of "
1583
            "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
1584
    },
1585
    { NULL }
1586
};
1587

    
1588
static BlockDriver bdrv_vmdk = {
1589
    .format_name    = "vmdk",
1590
    .instance_size  = sizeof(BDRVVmdkState),
1591
    .bdrv_probe     = vmdk_probe,
1592
    .bdrv_open      = vmdk_open,
1593
    .bdrv_read      = vmdk_co_read,
1594
    .bdrv_write     = vmdk_co_write,
1595
    .bdrv_close     = vmdk_close,
1596
    .bdrv_create    = vmdk_create,
1597
    .bdrv_co_flush_to_disk  = vmdk_co_flush,
1598
    .bdrv_co_is_allocated   = vmdk_co_is_allocated,
1599
    .bdrv_get_allocated_file_size  = vmdk_get_allocated_file_size,
1600

    
1601
    .create_options = vmdk_create_options,
1602
};
1603

    
1604
static void bdrv_vmdk_init(void)
1605
{
1606
    bdrv_register(&bdrv_vmdk);
1607
}
1608

    
1609
block_init(bdrv_vmdk_init);