Statistics
| Branch: | Revision:

root / block / vmdk.c @ 7fa60fa3

History | View | Annotate | Download (36.3 kB)

1
/*
2
 * Block driver for the VMDK format
3
 *
4
 * Copyright (c) 2004 Fabrice Bellard
5
 * Copyright (c) 2005 Filip Navara
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy
8
 * of this software and associated documentation files (the "Software"), to deal
9
 * in the Software without restriction, including without limitation the rights
10
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the Software is
12
 * furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
 * THE SOFTWARE.
24
 */
25

    
26
#include "qemu-common.h"
27
#include "block_int.h"
28
#include "module.h"
29

    
30
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
31
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
32

    
33
typedef struct {
34
    uint32_t version;
35
    uint32_t flags;
36
    uint32_t disk_sectors;
37
    uint32_t granularity;
38
    uint32_t l1dir_offset;
39
    uint32_t l1dir_size;
40
    uint32_t file_sectors;
41
    uint32_t cylinders;
42
    uint32_t heads;
43
    uint32_t sectors_per_track;
44
} VMDK3Header;
45

    
46
typedef struct {
47
    uint32_t version;
48
    uint32_t flags;
49
    int64_t capacity;
50
    int64_t granularity;
51
    int64_t desc_offset;
52
    int64_t desc_size;
53
    int32_t num_gtes_per_gte;
54
    int64_t rgd_offset;
55
    int64_t gd_offset;
56
    int64_t grain_offset;
57
    char filler[1];
58
    char check_bytes[4];
59
} __attribute__((packed)) VMDK4Header;
60

    
61
#define L2_CACHE_SIZE 16
62

    
63
typedef struct VmdkExtent {
64
    BlockDriverState *file;
65
    bool flat;
66
    int64_t sectors;
67
    int64_t end_sector;
68
    int64_t flat_start_offset;
69
    int64_t l1_table_offset;
70
    int64_t l1_backup_table_offset;
71
    uint32_t *l1_table;
72
    uint32_t *l1_backup_table;
73
    unsigned int l1_size;
74
    uint32_t l1_entry_sectors;
75

    
76
    unsigned int l2_size;
77
    uint32_t *l2_cache;
78
    uint32_t l2_cache_offsets[L2_CACHE_SIZE];
79
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
80

    
81
    unsigned int cluster_sectors;
82
} VmdkExtent;
83

    
84
typedef struct BDRVVmdkState {
85
    int desc_offset;
86
    bool cid_updated;
87
    uint32_t parent_cid;
88
    int num_extents;
89
    /* Extent array with num_extents entries, ascend ordered by address */
90
    VmdkExtent *extents;
91
} BDRVVmdkState;
92

    
93
typedef struct VmdkMetaData {
94
    uint32_t offset;
95
    unsigned int l1_index;
96
    unsigned int l2_index;
97
    unsigned int l2_offset;
98
    int valid;
99
} VmdkMetaData;
100

    
101
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
102
{
103
    uint32_t magic;
104

    
105
    if (buf_size < 4)
106
        return 0;
107
    magic = be32_to_cpu(*(uint32_t *)buf);
108
    if (magic == VMDK3_MAGIC ||
109
        magic == VMDK4_MAGIC) {
110
        return 100;
111
    } else {
112
        const char *p = (const char *)buf;
113
        const char *end = p + buf_size;
114
        while (p < end) {
115
            if (*p == '#') {
116
                /* skip comment line */
117
                while (p < end && *p != '\n') {
118
                    p++;
119
                }
120
                p++;
121
                continue;
122
            }
123
            if (*p == ' ') {
124
                while (p < end && *p == ' ') {
125
                    p++;
126
                }
127
                /* skip '\r' if windows line endings used. */
128
                if (p < end && *p == '\r') {
129
                    p++;
130
                }
131
                /* only accept blank lines before 'version=' line */
132
                if (p == end || *p != '\n') {
133
                    return 0;
134
                }
135
                p++;
136
                continue;
137
            }
138
            if (end - p >= strlen("version=X\n")) {
139
                if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
140
                    strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
141
                    return 100;
142
                }
143
            }
144
            if (end - p >= strlen("version=X\r\n")) {
145
                if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
146
                    strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
147
                    return 100;
148
                }
149
            }
150
            return 0;
151
        }
152
        return 0;
153
    }
154
}
155

    
156
#define CHECK_CID 1
157

    
158
#define SECTOR_SIZE 512
159
#define DESC_SIZE 20*SECTOR_SIZE        // 20 sectors of 512 bytes each
160
#define HEADER_SIZE 512                           // first sector of 512 bytes
161

    
162
static void vmdk_free_extents(BlockDriverState *bs)
163
{
164
    int i;
165
    BDRVVmdkState *s = bs->opaque;
166

    
167
    for (i = 0; i < s->num_extents; i++) {
168
        qemu_free(s->extents[i].l1_table);
169
        qemu_free(s->extents[i].l2_cache);
170
        qemu_free(s->extents[i].l1_backup_table);
171
    }
172
    qemu_free(s->extents);
173
}
174

    
175
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
176
{
177
    char desc[DESC_SIZE];
178
    uint32_t cid;
179
    const char *p_name, *cid_str;
180
    size_t cid_str_size;
181
    BDRVVmdkState *s = bs->opaque;
182

    
183
    if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
184
        return 0;
185
    }
186

    
187
    if (parent) {
188
        cid_str = "parentCID";
189
        cid_str_size = sizeof("parentCID");
190
    } else {
191
        cid_str = "CID";
192
        cid_str_size = sizeof("CID");
193
    }
194

    
195
    if ((p_name = strstr(desc,cid_str)) != NULL) {
196
        p_name += cid_str_size;
197
        sscanf(p_name,"%x",&cid);
198
    }
199

    
200
    return cid;
201
}
202

    
203
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
204
{
205
    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
206
    char *p_name, *tmp_str;
207
    BDRVVmdkState *s = bs->opaque;
208

    
209
    memset(desc, 0, sizeof(desc));
210
    if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
211
        return -EIO;
212
    }
213

    
214
    tmp_str = strstr(desc,"parentCID");
215
    pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
216
    if ((p_name = strstr(desc,"CID")) != NULL) {
217
        p_name += sizeof("CID");
218
        snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
219
        pstrcat(desc, sizeof(desc), tmp_desc);
220
    }
221

    
222
    if (bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE) < 0) {
223
        return -EIO;
224
    }
225
    return 0;
226
}
227

    
228
static int vmdk_is_cid_valid(BlockDriverState *bs)
229
{
230
#ifdef CHECK_CID
231
    BDRVVmdkState *s = bs->opaque;
232
    BlockDriverState *p_bs = bs->backing_hd;
233
    uint32_t cur_pcid;
234

    
235
    if (p_bs) {
236
        cur_pcid = vmdk_read_cid(p_bs,0);
237
        if (s->parent_cid != cur_pcid)
238
            // CID not valid
239
            return 0;
240
    }
241
#endif
242
    // CID valid
243
    return 1;
244
}
245

    
246
static int vmdk_snapshot_create(const char *filename, const char *backing_file)
247
{
248
    int snp_fd, p_fd;
249
    int ret;
250
    uint32_t p_cid;
251
    char *p_name, *gd_buf, *rgd_buf;
252
    const char *real_filename, *temp_str;
253
    VMDK4Header header;
254
    uint32_t gde_entries, gd_size;
255
    int64_t gd_offset, rgd_offset, capacity, gt_size;
256
    char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
257
    static const char desc_template[] =
258
    "# Disk DescriptorFile\n"
259
    "version=1\n"
260
    "CID=%x\n"
261
    "parentCID=%x\n"
262
    "createType=\"monolithicSparse\"\n"
263
    "parentFileNameHint=\"%s\"\n"
264
    "\n"
265
    "# Extent description\n"
266
    "RW %u SPARSE \"%s\"\n"
267
    "\n"
268
    "# The Disk Data Base \n"
269
    "#DDB\n"
270
    "\n";
271

    
272
    snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
273
    if (snp_fd < 0)
274
        return -errno;
275
    p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
276
    if (p_fd < 0) {
277
        close(snp_fd);
278
        return -errno;
279
    }
280

    
281
    /* read the header */
282
    if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
283
        ret = -errno;
284
        goto fail;
285
    }
286
    if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
287
        ret = -errno;
288
        goto fail;
289
    }
290

    
291
    /* write the header */
292
    if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
293
        ret = -errno;
294
        goto fail;
295
    }
296
    if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
297
        ret = -errno;
298
        goto fail;
299
    }
300

    
301
    memset(&header, 0, sizeof(header));
302
    memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
303

    
304
    if (ftruncate(snp_fd, header.grain_offset << 9)) {
305
        ret = -errno;
306
        goto fail;
307
    }
308
    /* the descriptor offset = 0x200 */
309
    if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
310
        ret = -errno;
311
        goto fail;
312
    }
313
    if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
314
        ret = -errno;
315
        goto fail;
316
    }
317

    
318
    if ((p_name = strstr(p_desc,"CID")) != NULL) {
319
        p_name += sizeof("CID");
320
        sscanf(p_name,"%x",&p_cid);
321
    }
322

    
323
    real_filename = filename;
324
    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
325
        real_filename = temp_str + 1;
326
    if ((temp_str = strrchr(real_filename, '/')) != NULL)
327
        real_filename = temp_str + 1;
328
    if ((temp_str = strrchr(real_filename, ':')) != NULL)
329
        real_filename = temp_str + 1;
330

    
331
    snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
332
             (uint32_t)header.capacity, real_filename);
333

    
334
    /* write the descriptor */
335
    if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
336
        ret = -errno;
337
        goto fail;
338
    }
339
    if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
340
        ret = -errno;
341
        goto fail;
342
    }
343

    
344
    gd_offset = header.gd_offset * SECTOR_SIZE;     // offset of GD table
345
    rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
346
    capacity = header.capacity * SECTOR_SIZE;       // Extent size
347
    /*
348
     * Each GDE span 32M disk, means:
349
     * 512 GTE per GT, each GTE points to grain
350
     */
351
    gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
352
    if (!gt_size) {
353
        ret = -EINVAL;
354
        goto fail;
355
    }
356
    gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde
357
    gd_size = gde_entries * sizeof(uint32_t);
358

    
359
    /* write RGD */
360
    rgd_buf = qemu_malloc(gd_size);
361
    if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
362
        ret = -errno;
363
        goto fail_rgd;
364
    }
365
    if (read(p_fd, rgd_buf, gd_size) != gd_size) {
366
        ret = -errno;
367
        goto fail_rgd;
368
    }
369
    if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
370
        ret = -errno;
371
        goto fail_rgd;
372
    }
373
    if (write(snp_fd, rgd_buf, gd_size) == -1) {
374
        ret = -errno;
375
        goto fail_rgd;
376
    }
377

    
378
    /* write GD */
379
    gd_buf = qemu_malloc(gd_size);
380
    if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
381
        ret = -errno;
382
        goto fail_gd;
383
    }
384
    if (read(p_fd, gd_buf, gd_size) != gd_size) {
385
        ret = -errno;
386
        goto fail_gd;
387
    }
388
    if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
389
        ret = -errno;
390
        goto fail_gd;
391
    }
392
    if (write(snp_fd, gd_buf, gd_size) == -1) {
393
        ret = -errno;
394
        goto fail_gd;
395
    }
396
    ret = 0;
397

    
398
fail_gd:
399
    qemu_free(gd_buf);
400
fail_rgd:
401
    qemu_free(rgd_buf);
402
fail:
403
    close(p_fd);
404
    close(snp_fd);
405
    return ret;
406
}
407

    
408
static int vmdk_parent_open(BlockDriverState *bs)
409
{
410
    char *p_name;
411
    char desc[DESC_SIZE + 1];
412
    BDRVVmdkState *s = bs->opaque;
413

    
414
    desc[DESC_SIZE] = '\0';
415
    if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
416
        return -1;
417
    }
418

    
419
    if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
420
        char *end_name;
421

    
422
        p_name += sizeof("parentFileNameHint") + 1;
423
        if ((end_name = strchr(p_name,'\"')) == NULL)
424
            return -1;
425
        if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
426
            return -1;
427

    
428
        pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
429
    }
430

    
431
    return 0;
432
}
433

    
434
/* Create and append extent to the extent array. Return the added VmdkExtent
435
 * address. return NULL if allocation failed. */
436
static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
437
                           BlockDriverState *file, bool flat, int64_t sectors,
438
                           int64_t l1_offset, int64_t l1_backup_offset,
439
                           uint32_t l1_size,
440
                           int l2_size, unsigned int cluster_sectors)
441
{
442
    VmdkExtent *extent;
443
    BDRVVmdkState *s = bs->opaque;
444

    
445
    s->extents = qemu_realloc(s->extents,
446
                              (s->num_extents + 1) * sizeof(VmdkExtent));
447
    extent = &s->extents[s->num_extents];
448
    s->num_extents++;
449

    
450
    memset(extent, 0, sizeof(VmdkExtent));
451
    extent->file = file;
452
    extent->flat = flat;
453
    extent->sectors = sectors;
454
    extent->l1_table_offset = l1_offset;
455
    extent->l1_backup_table_offset = l1_backup_offset;
456
    extent->l1_size = l1_size;
457
    extent->l1_entry_sectors = l2_size * cluster_sectors;
458
    extent->l2_size = l2_size;
459
    extent->cluster_sectors = cluster_sectors;
460

    
461
    if (s->num_extents > 1) {
462
        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
463
    } else {
464
        extent->end_sector = extent->sectors;
465
    }
466
    bs->total_sectors = extent->end_sector;
467
    return extent;
468
}
469

    
470
static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
471
{
472
    int ret;
473
    int l1_size, i;
474

    
475
    /* read the L1 table */
476
    l1_size = extent->l1_size * sizeof(uint32_t);
477
    extent->l1_table = qemu_malloc(l1_size);
478
    ret = bdrv_pread(extent->file,
479
                    extent->l1_table_offset,
480
                    extent->l1_table,
481
                    l1_size);
482
    if (ret < 0) {
483
        goto fail_l1;
484
    }
485
    for (i = 0; i < extent->l1_size; i++) {
486
        le32_to_cpus(&extent->l1_table[i]);
487
    }
488

    
489
    if (extent->l1_backup_table_offset) {
490
        extent->l1_backup_table = qemu_malloc(l1_size);
491
        ret = bdrv_pread(extent->file,
492
                        extent->l1_backup_table_offset,
493
                        extent->l1_backup_table,
494
                        l1_size);
495
        if (ret < 0) {
496
            goto fail_l1b;
497
        }
498
        for (i = 0; i < extent->l1_size; i++) {
499
            le32_to_cpus(&extent->l1_backup_table[i]);
500
        }
501
    }
502

    
503
    extent->l2_cache =
504
        qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
505
    return 0;
506
 fail_l1b:
507
    qemu_free(extent->l1_backup_table);
508
 fail_l1:
509
    qemu_free(extent->l1_table);
510
    return ret;
511
}
512

    
513
static int vmdk_open_vmdk3(BlockDriverState *bs, int flags)
514
{
515
    int ret;
516
    uint32_t magic;
517
    VMDK3Header header;
518
    BDRVVmdkState *s = bs->opaque;
519
    VmdkExtent *extent;
520

    
521
    s->desc_offset = 0x200;
522
    ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
523
    if (ret < 0) {
524
        goto fail;
525
    }
526
    extent = vmdk_add_extent(bs,
527
                             bs->file, false,
528
                             le32_to_cpu(header.disk_sectors),
529
                             le32_to_cpu(header.l1dir_offset) << 9,
530
                             0, 1 << 6, 1 << 9,
531
                             le32_to_cpu(header.granularity));
532
    ret = vmdk_init_tables(bs, extent);
533
    if (ret) {
534
        /* vmdk_init_tables cleans up on fail, so only free allocation of
535
         * vmdk_add_extent here. */
536
        goto fail;
537
    }
538
    return 0;
539
 fail:
540
    vmdk_free_extents(bs);
541
    return ret;
542
}
543

    
544
static int vmdk_open_vmdk4(BlockDriverState *bs, int flags)
545
{
546
    int ret;
547
    uint32_t magic;
548
    uint32_t l1_size, l1_entry_sectors;
549
    VMDK4Header header;
550
    BDRVVmdkState *s = bs->opaque;
551
    VmdkExtent *extent;
552

    
553
    s->desc_offset = 0x200;
554
    ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
555
    if (ret < 0) {
556
        goto fail;
557
    }
558
    l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
559
                        * le64_to_cpu(header.granularity);
560
    l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
561
                / l1_entry_sectors;
562
    extent = vmdk_add_extent(bs, bs->file, false,
563
                          le64_to_cpu(header.capacity),
564
                          le64_to_cpu(header.gd_offset) << 9,
565
                          le64_to_cpu(header.rgd_offset) << 9,
566
                          l1_size,
567
                          le32_to_cpu(header.num_gtes_per_gte),
568
                          le64_to_cpu(header.granularity));
569
    if (extent->l1_entry_sectors <= 0) {
570
        ret = -EINVAL;
571
        goto fail;
572
    }
573
    /* try to open parent images, if exist */
574
    ret = vmdk_parent_open(bs);
575
    if (ret) {
576
        goto fail;
577
    }
578
    s->parent_cid = vmdk_read_cid(bs, 1);
579
    ret = vmdk_init_tables(bs, extent);
580
    if (ret) {
581
        goto fail;
582
    }
583
    return 0;
584
 fail:
585
    vmdk_free_extents(bs);
586
    return ret;
587
}
588

    
589
/* find an option value out of descriptor file */
590
static int vmdk_parse_description(const char *desc, const char *opt_name,
591
        char *buf, int buf_size)
592
{
593
    char *opt_pos, *opt_end;
594
    const char *end = desc + strlen(desc);
595

    
596
    opt_pos = strstr(desc, opt_name);
597
    if (!opt_pos) {
598
        return -1;
599
    }
600
    /* Skip "=\"" following opt_name */
601
    opt_pos += strlen(opt_name) + 2;
602
    if (opt_pos >= end) {
603
        return -1;
604
    }
605
    opt_end = opt_pos;
606
    while (opt_end < end && *opt_end != '"') {
607
        opt_end++;
608
    }
609
    if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
610
        return -1;
611
    }
612
    pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
613
    return 0;
614
}
615

    
616
static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
617
        const char *desc_file_path)
618
{
619
    int ret;
620
    char access[11];
621
    char type[11];
622
    char fname[512];
623
    const char *p = desc;
624
    int64_t sectors = 0;
625
    int64_t flat_offset;
626

    
627
    while (*p) {
628
        /* parse extent line:
629
         * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
630
         * or
631
         * RW [size in sectors] SPARSE "file-name.vmdk"
632
         */
633
        flat_offset = -1;
634
        ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64,
635
                access, &sectors, type, fname, &flat_offset);
636
        if (ret < 4 || strcmp(access, "RW")) {
637
            goto next_line;
638
        } else if (!strcmp(type, "FLAT")) {
639
            if (ret != 5 || flat_offset < 0) {
640
                return -EINVAL;
641
            }
642
        } else if (ret != 4) {
643
            return -EINVAL;
644
        }
645

    
646
        /* trim the quotation marks around */
647
        if (fname[0] == '"') {
648
            memmove(fname, fname + 1, strlen(fname));
649
            if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') {
650
                return -EINVAL;
651
            }
652
            fname[strlen(fname) - 1] = '\0';
653
        }
654
        if (sectors <= 0 ||
655
            (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
656
            (strcmp(access, "RW"))) {
657
            goto next_line;
658
        }
659

    
660
        /* save to extents array */
661
        if (!strcmp(type, "FLAT")) {
662
            /* FLAT extent */
663
            char extent_path[PATH_MAX];
664
            BlockDriverState *extent_file;
665
            VmdkExtent *extent;
666

    
667
            path_combine(extent_path, sizeof(extent_path),
668
                    desc_file_path, fname);
669
            ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags);
670
            if (ret) {
671
                return ret;
672
            }
673
            extent = vmdk_add_extent(bs, extent_file, true, sectors,
674
                            0, 0, 0, 0, sectors);
675
            extent->flat_start_offset = flat_offset;
676
        } else {
677
            /* SPARSE extent, not supported for now */
678
            fprintf(stderr,
679
                "VMDK: Not supported extent type \"%s\""".\n", type);
680
            return -ENOTSUP;
681
        }
682
next_line:
683
        /* move to next line */
684
        while (*p && *p != '\n') {
685
            p++;
686
        }
687
        p++;
688
    }
689
    return 0;
690
}
691

    
692
static int vmdk_open_desc_file(BlockDriverState *bs, int flags)
693
{
694
    int ret;
695
    char buf[2048];
696
    char ct[128];
697
    BDRVVmdkState *s = bs->opaque;
698

    
699
    ret = bdrv_pread(bs->file, 0, buf, sizeof(buf));
700
    if (ret < 0) {
701
        return ret;
702
    }
703
    buf[2047] = '\0';
704
    if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
705
        return -EINVAL;
706
    }
707
    if (strcmp(ct, "monolithicFlat")) {
708
        fprintf(stderr,
709
                "VMDK: Not supported image type \"%s\""".\n", ct);
710
        return -ENOTSUP;
711
    }
712
    s->desc_offset = 0;
713
    ret = vmdk_parse_extents(buf, bs, bs->file->filename);
714
    if (ret) {
715
        return ret;
716
    }
717

    
718
    /* try to open parent images, if exist */
719
    if (vmdk_parent_open(bs)) {
720
        qemu_free(s->extents);
721
        return -EINVAL;
722
    }
723
    s->parent_cid = vmdk_read_cid(bs, 1);
724
    return 0;
725
}
726

    
727
static int vmdk_open(BlockDriverState *bs, int flags)
728
{
729
    uint32_t magic;
730

    
731
    if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
732
        return -EIO;
733
    }
734

    
735
    magic = be32_to_cpu(magic);
736
    if (magic == VMDK3_MAGIC) {
737
        return vmdk_open_vmdk3(bs, flags);
738
    } else if (magic == VMDK4_MAGIC) {
739
        return vmdk_open_vmdk4(bs, flags);
740
    } else {
741
        return vmdk_open_desc_file(bs, flags);
742
    }
743
}
744

    
745
static int get_whole_cluster(BlockDriverState *bs,
746
                VmdkExtent *extent,
747
                uint64_t cluster_offset,
748
                uint64_t offset,
749
                bool allocate)
750
{
751
    /* 128 sectors * 512 bytes each = grain size 64KB */
752
    uint8_t  whole_grain[extent->cluster_sectors * 512];
753

    
754
    /* we will be here if it's first write on non-exist grain(cluster).
755
     * try to read from parent image, if exist */
756
    if (bs->backing_hd) {
757
        int ret;
758

    
759
        if (!vmdk_is_cid_valid(bs))
760
            return -1;
761

    
762
        /* floor offset to cluster */
763
        offset -= offset % (extent->cluster_sectors * 512);
764
        ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
765
                extent->cluster_sectors);
766
        if (ret < 0) {
767
            return -1;
768
        }
769

    
770
        /* Write grain only into the active image */
771
        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
772
                extent->cluster_sectors);
773
        if (ret < 0) {
774
            return -1;
775
        }
776
    }
777
    return 0;
778
}
779

    
780
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
781
{
782
    /* update L2 table */
783
    if (bdrv_pwrite_sync(
784
                extent->file,
785
                ((int64_t)m_data->l2_offset * 512)
786
                    + (m_data->l2_index * sizeof(m_data->offset)),
787
                &(m_data->offset),
788
                sizeof(m_data->offset)
789
            ) < 0) {
790
        return -1;
791
    }
792
    /* update backup L2 table */
793
    if (extent->l1_backup_table_offset != 0) {
794
        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
795
        if (bdrv_pwrite_sync(
796
                    extent->file,
797
                    ((int64_t)m_data->l2_offset * 512)
798
                        + (m_data->l2_index * sizeof(m_data->offset)),
799
                    &(m_data->offset), sizeof(m_data->offset)
800
                ) < 0) {
801
            return -1;
802
        }
803
    }
804

    
805
    return 0;
806
}
807

    
808
static int get_cluster_offset(BlockDriverState *bs,
809
                                    VmdkExtent *extent,
810
                                    VmdkMetaData *m_data,
811
                                    uint64_t offset,
812
                                    int allocate,
813
                                    uint64_t *cluster_offset)
814
{
815
    unsigned int l1_index, l2_offset, l2_index;
816
    int min_index, i, j;
817
    uint32_t min_count, *l2_table, tmp = 0;
818

    
819
    if (m_data)
820
        m_data->valid = 0;
821
    if (extent->flat) {
822
        *cluster_offset = extent->flat_start_offset;
823
        return 0;
824
    }
825

    
826
    l1_index = (offset >> 9) / extent->l1_entry_sectors;
827
    if (l1_index >= extent->l1_size) {
828
        return -1;
829
    }
830
    l2_offset = extent->l1_table[l1_index];
831
    if (!l2_offset) {
832
        return -1;
833
    }
834
    for (i = 0; i < L2_CACHE_SIZE; i++) {
835
        if (l2_offset == extent->l2_cache_offsets[i]) {
836
            /* increment the hit count */
837
            if (++extent->l2_cache_counts[i] == 0xffffffff) {
838
                for (j = 0; j < L2_CACHE_SIZE; j++) {
839
                    extent->l2_cache_counts[j] >>= 1;
840
                }
841
            }
842
            l2_table = extent->l2_cache + (i * extent->l2_size);
843
            goto found;
844
        }
845
    }
846
    /* not found: load a new entry in the least used one */
847
    min_index = 0;
848
    min_count = 0xffffffff;
849
    for (i = 0; i < L2_CACHE_SIZE; i++) {
850
        if (extent->l2_cache_counts[i] < min_count) {
851
            min_count = extent->l2_cache_counts[i];
852
            min_index = i;
853
        }
854
    }
855
    l2_table = extent->l2_cache + (min_index * extent->l2_size);
856
    if (bdrv_pread(
857
                extent->file,
858
                (int64_t)l2_offset * 512,
859
                l2_table,
860
                extent->l2_size * sizeof(uint32_t)
861
            ) != extent->l2_size * sizeof(uint32_t)) {
862
        return -1;
863
    }
864

    
865
    extent->l2_cache_offsets[min_index] = l2_offset;
866
    extent->l2_cache_counts[min_index] = 1;
867
 found:
868
    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
869
    *cluster_offset = le32_to_cpu(l2_table[l2_index]);
870

    
871
    if (!*cluster_offset) {
872
        if (!allocate) {
873
            return -1;
874
        }
875

    
876
        // Avoid the L2 tables update for the images that have snapshots.
877
        *cluster_offset = bdrv_getlength(extent->file);
878
        bdrv_truncate(
879
            extent->file,
880
            *cluster_offset + (extent->cluster_sectors << 9)
881
        );
882

    
883
        *cluster_offset >>= 9;
884
        tmp = cpu_to_le32(*cluster_offset);
885
        l2_table[l2_index] = tmp;
886

    
887
        /* First of all we write grain itself, to avoid race condition
888
         * that may to corrupt the image.
889
         * This problem may occur because of insufficient space on host disk
890
         * or inappropriate VM shutdown.
891
         */
892
        if (get_whole_cluster(
893
                bs, extent, *cluster_offset, offset, allocate) == -1)
894
            return -1;
895

    
896
        if (m_data) {
897
            m_data->offset = tmp;
898
            m_data->l1_index = l1_index;
899
            m_data->l2_index = l2_index;
900
            m_data->l2_offset = l2_offset;
901
            m_data->valid = 1;
902
        }
903
    }
904
    *cluster_offset <<= 9;
905
    return 0;
906
}
907

    
908
static VmdkExtent *find_extent(BDRVVmdkState *s,
909
                                int64_t sector_num, VmdkExtent *start_hint)
910
{
911
    VmdkExtent *extent = start_hint;
912

    
913
    if (!extent) {
914
        extent = &s->extents[0];
915
    }
916
    while (extent < &s->extents[s->num_extents]) {
917
        if (sector_num < extent->end_sector) {
918
            return extent;
919
        }
920
        extent++;
921
    }
922
    return NULL;
923
}
924

    
925
static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
926
                             int nb_sectors, int *pnum)
927
{
928
    BDRVVmdkState *s = bs->opaque;
929
    int64_t index_in_cluster, n, ret;
930
    uint64_t offset;
931
    VmdkExtent *extent;
932

    
933
    extent = find_extent(s, sector_num, NULL);
934
    if (!extent) {
935
        return 0;
936
    }
937
    ret = get_cluster_offset(bs, extent, NULL,
938
                            sector_num * 512, 0, &offset);
939
    /* get_cluster_offset returning 0 means success */
940
    ret = !ret;
941

    
942
    index_in_cluster = sector_num % extent->cluster_sectors;
943
    n = extent->cluster_sectors - index_in_cluster;
944
    if (n > nb_sectors)
945
        n = nb_sectors;
946
    *pnum = n;
947
    return ret;
948
}
949

    
950
static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
951
                    uint8_t *buf, int nb_sectors)
952
{
953
    BDRVVmdkState *s = bs->opaque;
954
    int ret;
955
    uint64_t n, index_in_cluster;
956
    VmdkExtent *extent = NULL;
957
    uint64_t cluster_offset;
958

    
959
    while (nb_sectors > 0) {
960
        extent = find_extent(s, sector_num, extent);
961
        if (!extent) {
962
            return -EIO;
963
        }
964
        ret = get_cluster_offset(
965
                            bs, extent, NULL,
966
                            sector_num << 9, 0, &cluster_offset);
967
        index_in_cluster = sector_num % extent->cluster_sectors;
968
        n = extent->cluster_sectors - index_in_cluster;
969
        if (n > nb_sectors)
970
            n = nb_sectors;
971
        if (ret) {
972
            /* if not allocated, try to read from parent image, if exist */
973
            if (bs->backing_hd) {
974
                if (!vmdk_is_cid_valid(bs))
975
                    return -EINVAL;
976
                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
977
                if (ret < 0)
978
                    return ret;
979
            } else {
980
                memset(buf, 0, 512 * n);
981
            }
982
        } else {
983
            ret = bdrv_pread(extent->file,
984
                            cluster_offset + index_in_cluster * 512,
985
                            buf, n * 512);
986
            if (ret < 0) {
987
                return ret;
988
            }
989
        }
990
        nb_sectors -= n;
991
        sector_num += n;
992
        buf += n * 512;
993
    }
994
    return 0;
995
}
996

    
997
static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
998
                     const uint8_t *buf, int nb_sectors)
999
{
1000
    BDRVVmdkState *s = bs->opaque;
1001
    VmdkExtent *extent = NULL;
1002
    int n, ret;
1003
    int64_t index_in_cluster;
1004
    uint64_t cluster_offset;
1005
    VmdkMetaData m_data;
1006

    
1007
    if (sector_num > bs->total_sectors) {
1008
        fprintf(stderr,
1009
                "(VMDK) Wrong offset: sector_num=0x%" PRIx64
1010
                " total_sectors=0x%" PRIx64 "\n",
1011
                sector_num, bs->total_sectors);
1012
        return -EIO;
1013
    }
1014

    
1015
    while (nb_sectors > 0) {
1016
        extent = find_extent(s, sector_num, extent);
1017
        if (!extent) {
1018
            return -EIO;
1019
        }
1020
        ret = get_cluster_offset(
1021
                                bs,
1022
                                extent,
1023
                                &m_data,
1024
                                sector_num << 9, 1,
1025
                                &cluster_offset);
1026
        if (ret) {
1027
            return -EINVAL;
1028
        }
1029
        index_in_cluster = sector_num % extent->cluster_sectors;
1030
        n = extent->cluster_sectors - index_in_cluster;
1031
        if (n > nb_sectors) {
1032
            n = nb_sectors;
1033
        }
1034

    
1035
        ret = bdrv_pwrite(extent->file,
1036
                        cluster_offset + index_in_cluster * 512,
1037
                        buf,
1038
                        n * 512);
1039
        if (ret < 0) {
1040
            return ret;
1041
        }
1042
        if (m_data.valid) {
1043
            /* update L2 tables */
1044
            if (vmdk_L2update(extent, &m_data) == -1) {
1045
                return -EIO;
1046
            }
1047
        }
1048
        nb_sectors -= n;
1049
        sector_num += n;
1050
        buf += n * 512;
1051

    
1052
        // update CID on the first write every time the virtual disk is opened
1053
        if (!s->cid_updated) {
1054
            vmdk_write_cid(bs, time(NULL));
1055
            s->cid_updated = true;
1056
        }
1057
    }
1058
    return 0;
1059
}
1060

    
1061
static int vmdk_create(const char *filename, QEMUOptionParameter *options)
1062
{
1063
    int fd, i;
1064
    VMDK4Header header;
1065
    uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
1066
    static const char desc_template[] =
1067
        "# Disk DescriptorFile\n"
1068
        "version=1\n"
1069
        "CID=%x\n"
1070
        "parentCID=ffffffff\n"
1071
        "createType=\"monolithicSparse\"\n"
1072
        "\n"
1073
        "# Extent description\n"
1074
        "RW %" PRId64 " SPARSE \"%s\"\n"
1075
        "\n"
1076
        "# The Disk Data Base \n"
1077
        "#DDB\n"
1078
        "\n"
1079
        "ddb.virtualHWVersion = \"%d\"\n"
1080
        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
1081
        "ddb.geometry.heads = \"16\"\n"
1082
        "ddb.geometry.sectors = \"63\"\n"
1083
        "ddb.adapterType = \"ide\"\n";
1084
    char desc[1024];
1085
    const char *real_filename, *temp_str;
1086
    int64_t total_size = 0;
1087
    const char *backing_file = NULL;
1088
    int flags = 0;
1089
    int ret;
1090

    
1091
    // Read out options
1092
    while (options && options->name) {
1093
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1094
            total_size = options->value.n / 512;
1095
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
1096
            backing_file = options->value.s;
1097
        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
1098
            flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
1099
        }
1100
        options++;
1101
    }
1102

    
1103
    /* XXX: add support for backing file */
1104
    if (backing_file) {
1105
        return vmdk_snapshot_create(filename, backing_file);
1106
    }
1107

    
1108
    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1109
              0644);
1110
    if (fd < 0)
1111
        return -errno;
1112
    magic = cpu_to_be32(VMDK4_MAGIC);
1113
    memset(&header, 0, sizeof(header));
1114
    header.version = 1;
1115
    header.flags = 3; /* ?? */
1116
    header.capacity = total_size;
1117
    header.granularity = 128;
1118
    header.num_gtes_per_gte = 512;
1119

    
1120
    grains = (total_size + header.granularity - 1) / header.granularity;
1121
    gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
1122
    gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
1123
    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
1124

    
1125
    header.desc_offset = 1;
1126
    header.desc_size = 20;
1127
    header.rgd_offset = header.desc_offset + header.desc_size;
1128
    header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
1129
    header.grain_offset =
1130
       ((header.gd_offset + gd_size + (gt_size * gt_count) +
1131
         header.granularity - 1) / header.granularity) *
1132
        header.granularity;
1133

    
1134
    /* swap endianness for all header fields */
1135
    header.version = cpu_to_le32(header.version);
1136
    header.flags = cpu_to_le32(header.flags);
1137
    header.capacity = cpu_to_le64(header.capacity);
1138
    header.granularity = cpu_to_le64(header.granularity);
1139
    header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
1140
    header.desc_offset = cpu_to_le64(header.desc_offset);
1141
    header.desc_size = cpu_to_le64(header.desc_size);
1142
    header.rgd_offset = cpu_to_le64(header.rgd_offset);
1143
    header.gd_offset = cpu_to_le64(header.gd_offset);
1144
    header.grain_offset = cpu_to_le64(header.grain_offset);
1145

    
1146
    header.check_bytes[0] = 0xa;
1147
    header.check_bytes[1] = 0x20;
1148
    header.check_bytes[2] = 0xd;
1149
    header.check_bytes[3] = 0xa;
1150

    
1151
    /* write all the data */
1152
    ret = qemu_write_full(fd, &magic, sizeof(magic));
1153
    if (ret != sizeof(magic)) {
1154
        ret = -errno;
1155
        goto exit;
1156
    }
1157
    ret = qemu_write_full(fd, &header, sizeof(header));
1158
    if (ret != sizeof(header)) {
1159
        ret = -errno;
1160
        goto exit;
1161
    }
1162

    
1163
    ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
1164
    if (ret < 0) {
1165
        ret = -errno;
1166
        goto exit;
1167
    }
1168

    
1169
    /* write grain directory */
1170
    lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
1171
    for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
1172
         i < gt_count; i++, tmp += gt_size) {
1173
        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1174
        if (ret != sizeof(tmp)) {
1175
            ret = -errno;
1176
            goto exit;
1177
        }
1178
    }
1179

    
1180
    /* write backup grain directory */
1181
    lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
1182
    for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
1183
         i < gt_count; i++, tmp += gt_size) {
1184
        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1185
        if (ret != sizeof(tmp)) {
1186
            ret = -errno;
1187
            goto exit;
1188
        }
1189
    }
1190

    
1191
    /* compose the descriptor */
1192
    real_filename = filename;
1193
    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
1194
        real_filename = temp_str + 1;
1195
    if ((temp_str = strrchr(real_filename, '/')) != NULL)
1196
        real_filename = temp_str + 1;
1197
    if ((temp_str = strrchr(real_filename, ':')) != NULL)
1198
        real_filename = temp_str + 1;
1199
    snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
1200
             total_size, real_filename,
1201
             (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
1202
             total_size / (int64_t)(63 * 16));
1203

    
1204
    /* write the descriptor */
1205
    lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
1206
    ret = qemu_write_full(fd, desc, strlen(desc));
1207
    if (ret != strlen(desc)) {
1208
        ret = -errno;
1209
        goto exit;
1210
    }
1211

    
1212
    ret = 0;
1213
exit:
1214
    close(fd);
1215
    return ret;
1216
}
1217

    
1218
static void vmdk_close(BlockDriverState *bs)
1219
{
1220
    vmdk_free_extents(bs);
1221
}
1222

    
1223
static int vmdk_flush(BlockDriverState *bs)
1224
{
1225
    int i, ret, err;
1226
    BDRVVmdkState *s = bs->opaque;
1227

    
1228
    ret = bdrv_flush(bs->file);
1229
    for (i = 0; i < s->num_extents; i++) {
1230
        err = bdrv_flush(s->extents[i].file);
1231
        if (err < 0) {
1232
            ret = err;
1233
        }
1234
    }
1235
    return ret;
1236
}
1237

    
1238

    
1239
static QEMUOptionParameter vmdk_create_options[] = {
1240
    {
1241
        .name = BLOCK_OPT_SIZE,
1242
        .type = OPT_SIZE,
1243
        .help = "Virtual disk size"
1244
    },
1245
    {
1246
        .name = BLOCK_OPT_BACKING_FILE,
1247
        .type = OPT_STRING,
1248
        .help = "File name of a base image"
1249
    },
1250
    {
1251
        .name = BLOCK_OPT_COMPAT6,
1252
        .type = OPT_FLAG,
1253
        .help = "VMDK version 6 image"
1254
    },
1255
    { NULL }
1256
};
1257

    
1258
static BlockDriver bdrv_vmdk = {
1259
    .format_name        = "vmdk",
1260
    .instance_size        = sizeof(BDRVVmdkState),
1261
    .bdrv_probe                = vmdk_probe,
1262
    .bdrv_open      = vmdk_open,
1263
    .bdrv_read                = vmdk_read,
1264
    .bdrv_write                = vmdk_write,
1265
    .bdrv_close                = vmdk_close,
1266
    .bdrv_create        = vmdk_create,
1267
    .bdrv_flush                = vmdk_flush,
1268
    .bdrv_is_allocated        = vmdk_is_allocated,
1269

    
1270
    .create_options = vmdk_create_options,
1271
};
1272

    
1273
static void bdrv_vmdk_init(void)
1274
{
1275
    bdrv_register(&bdrv_vmdk);
1276
}
1277

    
1278
block_init(bdrv_vmdk_init);