Statistics
| Branch: | Revision:

root / block / vmdk.c @ f66fd6c3

History | View | Annotate | Download (37.6 kB)

1
/*
2
 * Block driver for the VMDK format
3
 *
4
 * Copyright (c) 2004 Fabrice Bellard
5
 * Copyright (c) 2005 Filip Navara
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy
8
 * of this software and associated documentation files (the "Software"), to deal
9
 * in the Software without restriction, including without limitation the rights
10
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the Software is
12
 * furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
 * THE SOFTWARE.
24
 */
25

    
26
#include "qemu-common.h"
27
#include "block_int.h"
28
#include "module.h"
29

    
30
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
31
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
32

    
33
typedef struct {
34
    uint32_t version;
35
    uint32_t flags;
36
    uint32_t disk_sectors;
37
    uint32_t granularity;
38
    uint32_t l1dir_offset;
39
    uint32_t l1dir_size;
40
    uint32_t file_sectors;
41
    uint32_t cylinders;
42
    uint32_t heads;
43
    uint32_t sectors_per_track;
44
} VMDK3Header;
45

    
46
typedef struct {
47
    uint32_t version;
48
    uint32_t flags;
49
    int64_t capacity;
50
    int64_t granularity;
51
    int64_t desc_offset;
52
    int64_t desc_size;
53
    int32_t num_gtes_per_gte;
54
    int64_t rgd_offset;
55
    int64_t gd_offset;
56
    int64_t grain_offset;
57
    char filler[1];
58
    char check_bytes[4];
59
} __attribute__((packed)) VMDK4Header;
60

    
61
#define L2_CACHE_SIZE 16
62

    
63
typedef struct VmdkExtent {
64
    BlockDriverState *file;
65
    bool flat;
66
    int64_t sectors;
67
    int64_t end_sector;
68
    int64_t flat_start_offset;
69
    int64_t l1_table_offset;
70
    int64_t l1_backup_table_offset;
71
    uint32_t *l1_table;
72
    uint32_t *l1_backup_table;
73
    unsigned int l1_size;
74
    uint32_t l1_entry_sectors;
75

    
76
    unsigned int l2_size;
77
    uint32_t *l2_cache;
78
    uint32_t l2_cache_offsets[L2_CACHE_SIZE];
79
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
80

    
81
    unsigned int cluster_sectors;
82
} VmdkExtent;
83

    
84
typedef struct BDRVVmdkState {
85
    int desc_offset;
86
    bool cid_updated;
87
    uint32_t parent_cid;
88
    int num_extents;
89
    /* Extent array with num_extents entries, ascend ordered by address */
90
    VmdkExtent *extents;
91
} BDRVVmdkState;
92

    
93
typedef struct VmdkMetaData {
94
    uint32_t offset;
95
    unsigned int l1_index;
96
    unsigned int l2_index;
97
    unsigned int l2_offset;
98
    int valid;
99
} VmdkMetaData;
100

    
101
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
102
{
103
    uint32_t magic;
104

    
105
    if (buf_size < 4)
106
        return 0;
107
    magic = be32_to_cpu(*(uint32_t *)buf);
108
    if (magic == VMDK3_MAGIC ||
109
        magic == VMDK4_MAGIC) {
110
        return 100;
111
    } else {
112
        const char *p = (const char *)buf;
113
        const char *end = p + buf_size;
114
        while (p < end) {
115
            if (*p == '#') {
116
                /* skip comment line */
117
                while (p < end && *p != '\n') {
118
                    p++;
119
                }
120
                p++;
121
                continue;
122
            }
123
            if (*p == ' ') {
124
                while (p < end && *p == ' ') {
125
                    p++;
126
                }
127
                /* skip '\r' if windows line endings used. */
128
                if (p < end && *p == '\r') {
129
                    p++;
130
                }
131
                /* only accept blank lines before 'version=' line */
132
                if (p == end || *p != '\n') {
133
                    return 0;
134
                }
135
                p++;
136
                continue;
137
            }
138
            if (end - p >= strlen("version=X\n")) {
139
                if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
140
                    strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
141
                    return 100;
142
                }
143
            }
144
            if (end - p >= strlen("version=X\r\n")) {
145
                if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
146
                    strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
147
                    return 100;
148
                }
149
            }
150
            return 0;
151
        }
152
        return 0;
153
    }
154
}
155

    
156
#define CHECK_CID 1
157

    
158
#define SECTOR_SIZE 512
159
#define DESC_SIZE (20 * SECTOR_SIZE)    /* 20 sectors of 512 bytes each */
160
#define BUF_SIZE 4096
161
#define HEADER_SIZE 512                 /* first sector of 512 bytes */
162

    
163
static void vmdk_free_extents(BlockDriverState *bs)
164
{
165
    int i;
166
    BDRVVmdkState *s = bs->opaque;
167

    
168
    for (i = 0; i < s->num_extents; i++) {
169
        qemu_free(s->extents[i].l1_table);
170
        qemu_free(s->extents[i].l2_cache);
171
        qemu_free(s->extents[i].l1_backup_table);
172
    }
173
    qemu_free(s->extents);
174
}
175

    
176
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
177
{
178
    char desc[DESC_SIZE];
179
    uint32_t cid;
180
    const char *p_name, *cid_str;
181
    size_t cid_str_size;
182
    BDRVVmdkState *s = bs->opaque;
183

    
184
    if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
185
        return 0;
186
    }
187

    
188
    if (parent) {
189
        cid_str = "parentCID";
190
        cid_str_size = sizeof("parentCID");
191
    } else {
192
        cid_str = "CID";
193
        cid_str_size = sizeof("CID");
194
    }
195

    
196
    if ((p_name = strstr(desc,cid_str)) != NULL) {
197
        p_name += cid_str_size;
198
        sscanf(p_name,"%x",&cid);
199
    }
200

    
201
    return cid;
202
}
203

    
204
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
205
{
206
    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
207
    char *p_name, *tmp_str;
208
    BDRVVmdkState *s = bs->opaque;
209

    
210
    memset(desc, 0, sizeof(desc));
211
    if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
212
        return -EIO;
213
    }
214

    
215
    tmp_str = strstr(desc,"parentCID");
216
    pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
217
    if ((p_name = strstr(desc,"CID")) != NULL) {
218
        p_name += sizeof("CID");
219
        snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
220
        pstrcat(desc, sizeof(desc), tmp_desc);
221
    }
222

    
223
    if (bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE) < 0) {
224
        return -EIO;
225
    }
226
    return 0;
227
}
228

    
229
static int vmdk_is_cid_valid(BlockDriverState *bs)
230
{
231
#ifdef CHECK_CID
232
    BDRVVmdkState *s = bs->opaque;
233
    BlockDriverState *p_bs = bs->backing_hd;
234
    uint32_t cur_pcid;
235

    
236
    if (p_bs) {
237
        cur_pcid = vmdk_read_cid(p_bs,0);
238
        if (s->parent_cid != cur_pcid)
239
            // CID not valid
240
            return 0;
241
    }
242
#endif
243
    // CID valid
244
    return 1;
245
}
246

    
247
static int vmdk_parent_open(BlockDriverState *bs)
248
{
249
    char *p_name;
250
    char desc[DESC_SIZE + 1];
251
    BDRVVmdkState *s = bs->opaque;
252

    
253
    desc[DESC_SIZE] = '\0';
254
    if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
255
        return -1;
256
    }
257

    
258
    if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
259
        char *end_name;
260

    
261
        p_name += sizeof("parentFileNameHint") + 1;
262
        if ((end_name = strchr(p_name,'\"')) == NULL)
263
            return -1;
264
        if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
265
            return -1;
266

    
267
        pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
268
    }
269

    
270
    return 0;
271
}
272

    
273
/* Create and append extent to the extent array. Return the added VmdkExtent
274
 * address. return NULL if allocation failed. */
275
static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
276
                           BlockDriverState *file, bool flat, int64_t sectors,
277
                           int64_t l1_offset, int64_t l1_backup_offset,
278
                           uint32_t l1_size,
279
                           int l2_size, unsigned int cluster_sectors)
280
{
281
    VmdkExtent *extent;
282
    BDRVVmdkState *s = bs->opaque;
283

    
284
    s->extents = qemu_realloc(s->extents,
285
                              (s->num_extents + 1) * sizeof(VmdkExtent));
286
    extent = &s->extents[s->num_extents];
287
    s->num_extents++;
288

    
289
    memset(extent, 0, sizeof(VmdkExtent));
290
    extent->file = file;
291
    extent->flat = flat;
292
    extent->sectors = sectors;
293
    extent->l1_table_offset = l1_offset;
294
    extent->l1_backup_table_offset = l1_backup_offset;
295
    extent->l1_size = l1_size;
296
    extent->l1_entry_sectors = l2_size * cluster_sectors;
297
    extent->l2_size = l2_size;
298
    extent->cluster_sectors = cluster_sectors;
299

    
300
    if (s->num_extents > 1) {
301
        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
302
    } else {
303
        extent->end_sector = extent->sectors;
304
    }
305
    bs->total_sectors = extent->end_sector;
306
    return extent;
307
}
308

    
309
static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
310
{
311
    int ret;
312
    int l1_size, i;
313

    
314
    /* read the L1 table */
315
    l1_size = extent->l1_size * sizeof(uint32_t);
316
    extent->l1_table = qemu_malloc(l1_size);
317
    ret = bdrv_pread(extent->file,
318
                    extent->l1_table_offset,
319
                    extent->l1_table,
320
                    l1_size);
321
    if (ret < 0) {
322
        goto fail_l1;
323
    }
324
    for (i = 0; i < extent->l1_size; i++) {
325
        le32_to_cpus(&extent->l1_table[i]);
326
    }
327

    
328
    if (extent->l1_backup_table_offset) {
329
        extent->l1_backup_table = qemu_malloc(l1_size);
330
        ret = bdrv_pread(extent->file,
331
                        extent->l1_backup_table_offset,
332
                        extent->l1_backup_table,
333
                        l1_size);
334
        if (ret < 0) {
335
            goto fail_l1b;
336
        }
337
        for (i = 0; i < extent->l1_size; i++) {
338
            le32_to_cpus(&extent->l1_backup_table[i]);
339
        }
340
    }
341

    
342
    extent->l2_cache =
343
        qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
344
    return 0;
345
 fail_l1b:
346
    qemu_free(extent->l1_backup_table);
347
 fail_l1:
348
    qemu_free(extent->l1_table);
349
    return ret;
350
}
351

    
352
static int vmdk_open_vmdk3(BlockDriverState *bs, int flags)
353
{
354
    int ret;
355
    uint32_t magic;
356
    VMDK3Header header;
357
    BDRVVmdkState *s = bs->opaque;
358
    VmdkExtent *extent;
359

    
360
    s->desc_offset = 0x200;
361
    ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
362
    if (ret < 0) {
363
        goto fail;
364
    }
365
    extent = vmdk_add_extent(bs,
366
                             bs->file, false,
367
                             le32_to_cpu(header.disk_sectors),
368
                             le32_to_cpu(header.l1dir_offset) << 9,
369
                             0, 1 << 6, 1 << 9,
370
                             le32_to_cpu(header.granularity));
371
    ret = vmdk_init_tables(bs, extent);
372
    if (ret) {
373
        /* vmdk_init_tables cleans up on fail, so only free allocation of
374
         * vmdk_add_extent here. */
375
        goto fail;
376
    }
377
    return 0;
378
 fail:
379
    vmdk_free_extents(bs);
380
    return ret;
381
}
382

    
383
static int vmdk_open_vmdk4(BlockDriverState *bs, int flags)
384
{
385
    int ret;
386
    uint32_t magic;
387
    uint32_t l1_size, l1_entry_sectors;
388
    VMDK4Header header;
389
    BDRVVmdkState *s = bs->opaque;
390
    VmdkExtent *extent;
391

    
392
    s->desc_offset = 0x200;
393
    ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
394
    if (ret < 0) {
395
        goto fail;
396
    }
397
    l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
398
                        * le64_to_cpu(header.granularity);
399
    l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
400
                / l1_entry_sectors;
401
    extent = vmdk_add_extent(bs, bs->file, false,
402
                          le64_to_cpu(header.capacity),
403
                          le64_to_cpu(header.gd_offset) << 9,
404
                          le64_to_cpu(header.rgd_offset) << 9,
405
                          l1_size,
406
                          le32_to_cpu(header.num_gtes_per_gte),
407
                          le64_to_cpu(header.granularity));
408
    if (extent->l1_entry_sectors <= 0) {
409
        ret = -EINVAL;
410
        goto fail;
411
    }
412
    /* try to open parent images, if exist */
413
    ret = vmdk_parent_open(bs);
414
    if (ret) {
415
        goto fail;
416
    }
417
    s->parent_cid = vmdk_read_cid(bs, 1);
418
    ret = vmdk_init_tables(bs, extent);
419
    if (ret) {
420
        goto fail;
421
    }
422
    return 0;
423
 fail:
424
    vmdk_free_extents(bs);
425
    return ret;
426
}
427

    
428
/* find an option value out of descriptor file */
429
static int vmdk_parse_description(const char *desc, const char *opt_name,
430
        char *buf, int buf_size)
431
{
432
    char *opt_pos, *opt_end;
433
    const char *end = desc + strlen(desc);
434

    
435
    opt_pos = strstr(desc, opt_name);
436
    if (!opt_pos) {
437
        return -1;
438
    }
439
    /* Skip "=\"" following opt_name */
440
    opt_pos += strlen(opt_name) + 2;
441
    if (opt_pos >= end) {
442
        return -1;
443
    }
444
    opt_end = opt_pos;
445
    while (opt_end < end && *opt_end != '"') {
446
        opt_end++;
447
    }
448
    if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
449
        return -1;
450
    }
451
    pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
452
    return 0;
453
}
454

    
455
static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
456
        const char *desc_file_path)
457
{
458
    int ret;
459
    char access[11];
460
    char type[11];
461
    char fname[512];
462
    const char *p = desc;
463
    int64_t sectors = 0;
464
    int64_t flat_offset;
465

    
466
    while (*p) {
467
        /* parse extent line:
468
         * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
469
         * or
470
         * RW [size in sectors] SPARSE "file-name.vmdk"
471
         */
472
        flat_offset = -1;
473
        ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64,
474
                access, &sectors, type, fname, &flat_offset);
475
        if (ret < 4 || strcmp(access, "RW")) {
476
            goto next_line;
477
        } else if (!strcmp(type, "FLAT")) {
478
            if (ret != 5 || flat_offset < 0) {
479
                return -EINVAL;
480
            }
481
        } else if (ret != 4) {
482
            return -EINVAL;
483
        }
484

    
485
        /* trim the quotation marks around */
486
        if (fname[0] == '"') {
487
            memmove(fname, fname + 1, strlen(fname));
488
            if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') {
489
                return -EINVAL;
490
            }
491
            fname[strlen(fname) - 1] = '\0';
492
        }
493
        if (sectors <= 0 ||
494
            (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
495
            (strcmp(access, "RW"))) {
496
            goto next_line;
497
        }
498

    
499
        /* save to extents array */
500
        if (!strcmp(type, "FLAT")) {
501
            /* FLAT extent */
502
            char extent_path[PATH_MAX];
503
            BlockDriverState *extent_file;
504
            VmdkExtent *extent;
505

    
506
            path_combine(extent_path, sizeof(extent_path),
507
                    desc_file_path, fname);
508
            ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags);
509
            if (ret) {
510
                return ret;
511
            }
512
            extent = vmdk_add_extent(bs, extent_file, true, sectors,
513
                            0, 0, 0, 0, sectors);
514
            extent->flat_start_offset = flat_offset;
515
        } else {
516
            /* SPARSE extent, not supported for now */
517
            fprintf(stderr,
518
                "VMDK: Not supported extent type \"%s\""".\n", type);
519
            return -ENOTSUP;
520
        }
521
next_line:
522
        /* move to next line */
523
        while (*p && *p != '\n') {
524
            p++;
525
        }
526
        p++;
527
    }
528
    return 0;
529
}
530

    
531
static int vmdk_open_desc_file(BlockDriverState *bs, int flags)
532
{
533
    int ret;
534
    char buf[2048];
535
    char ct[128];
536
    BDRVVmdkState *s = bs->opaque;
537

    
538
    ret = bdrv_pread(bs->file, 0, buf, sizeof(buf));
539
    if (ret < 0) {
540
        return ret;
541
    }
542
    buf[2047] = '\0';
543
    if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
544
        return -EINVAL;
545
    }
546
    if (strcmp(ct, "monolithicFlat")) {
547
        fprintf(stderr,
548
                "VMDK: Not supported image type \"%s\""".\n", ct);
549
        return -ENOTSUP;
550
    }
551
    s->desc_offset = 0;
552
    ret = vmdk_parse_extents(buf, bs, bs->file->filename);
553
    if (ret) {
554
        return ret;
555
    }
556

    
557
    /* try to open parent images, if exist */
558
    if (vmdk_parent_open(bs)) {
559
        qemu_free(s->extents);
560
        return -EINVAL;
561
    }
562
    s->parent_cid = vmdk_read_cid(bs, 1);
563
    return 0;
564
}
565

    
566
static int vmdk_open(BlockDriverState *bs, int flags)
567
{
568
    uint32_t magic;
569

    
570
    if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
571
        return -EIO;
572
    }
573

    
574
    magic = be32_to_cpu(magic);
575
    if (magic == VMDK3_MAGIC) {
576
        return vmdk_open_vmdk3(bs, flags);
577
    } else if (magic == VMDK4_MAGIC) {
578
        return vmdk_open_vmdk4(bs, flags);
579
    } else {
580
        return vmdk_open_desc_file(bs, flags);
581
    }
582
}
583

    
584
static int get_whole_cluster(BlockDriverState *bs,
585
                VmdkExtent *extent,
586
                uint64_t cluster_offset,
587
                uint64_t offset,
588
                bool allocate)
589
{
590
    /* 128 sectors * 512 bytes each = grain size 64KB */
591
    uint8_t  whole_grain[extent->cluster_sectors * 512];
592

    
593
    /* we will be here if it's first write on non-exist grain(cluster).
594
     * try to read from parent image, if exist */
595
    if (bs->backing_hd) {
596
        int ret;
597

    
598
        if (!vmdk_is_cid_valid(bs))
599
            return -1;
600

    
601
        /* floor offset to cluster */
602
        offset -= offset % (extent->cluster_sectors * 512);
603
        ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
604
                extent->cluster_sectors);
605
        if (ret < 0) {
606
            return -1;
607
        }
608

    
609
        /* Write grain only into the active image */
610
        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
611
                extent->cluster_sectors);
612
        if (ret < 0) {
613
            return -1;
614
        }
615
    }
616
    return 0;
617
}
618

    
619
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
620
{
621
    /* update L2 table */
622
    if (bdrv_pwrite_sync(
623
                extent->file,
624
                ((int64_t)m_data->l2_offset * 512)
625
                    + (m_data->l2_index * sizeof(m_data->offset)),
626
                &(m_data->offset),
627
                sizeof(m_data->offset)
628
            ) < 0) {
629
        return -1;
630
    }
631
    /* update backup L2 table */
632
    if (extent->l1_backup_table_offset != 0) {
633
        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
634
        if (bdrv_pwrite_sync(
635
                    extent->file,
636
                    ((int64_t)m_data->l2_offset * 512)
637
                        + (m_data->l2_index * sizeof(m_data->offset)),
638
                    &(m_data->offset), sizeof(m_data->offset)
639
                ) < 0) {
640
            return -1;
641
        }
642
    }
643

    
644
    return 0;
645
}
646

    
647
static int get_cluster_offset(BlockDriverState *bs,
648
                                    VmdkExtent *extent,
649
                                    VmdkMetaData *m_data,
650
                                    uint64_t offset,
651
                                    int allocate,
652
                                    uint64_t *cluster_offset)
653
{
654
    unsigned int l1_index, l2_offset, l2_index;
655
    int min_index, i, j;
656
    uint32_t min_count, *l2_table, tmp = 0;
657

    
658
    if (m_data)
659
        m_data->valid = 0;
660
    if (extent->flat) {
661
        *cluster_offset = extent->flat_start_offset;
662
        return 0;
663
    }
664

    
665
    l1_index = (offset >> 9) / extent->l1_entry_sectors;
666
    if (l1_index >= extent->l1_size) {
667
        return -1;
668
    }
669
    l2_offset = extent->l1_table[l1_index];
670
    if (!l2_offset) {
671
        return -1;
672
    }
673
    for (i = 0; i < L2_CACHE_SIZE; i++) {
674
        if (l2_offset == extent->l2_cache_offsets[i]) {
675
            /* increment the hit count */
676
            if (++extent->l2_cache_counts[i] == 0xffffffff) {
677
                for (j = 0; j < L2_CACHE_SIZE; j++) {
678
                    extent->l2_cache_counts[j] >>= 1;
679
                }
680
            }
681
            l2_table = extent->l2_cache + (i * extent->l2_size);
682
            goto found;
683
        }
684
    }
685
    /* not found: load a new entry in the least used one */
686
    min_index = 0;
687
    min_count = 0xffffffff;
688
    for (i = 0; i < L2_CACHE_SIZE; i++) {
689
        if (extent->l2_cache_counts[i] < min_count) {
690
            min_count = extent->l2_cache_counts[i];
691
            min_index = i;
692
        }
693
    }
694
    l2_table = extent->l2_cache + (min_index * extent->l2_size);
695
    if (bdrv_pread(
696
                extent->file,
697
                (int64_t)l2_offset * 512,
698
                l2_table,
699
                extent->l2_size * sizeof(uint32_t)
700
            ) != extent->l2_size * sizeof(uint32_t)) {
701
        return -1;
702
    }
703

    
704
    extent->l2_cache_offsets[min_index] = l2_offset;
705
    extent->l2_cache_counts[min_index] = 1;
706
 found:
707
    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
708
    *cluster_offset = le32_to_cpu(l2_table[l2_index]);
709

    
710
    if (!*cluster_offset) {
711
        if (!allocate) {
712
            return -1;
713
        }
714

    
715
        // Avoid the L2 tables update for the images that have snapshots.
716
        *cluster_offset = bdrv_getlength(extent->file);
717
        bdrv_truncate(
718
            extent->file,
719
            *cluster_offset + (extent->cluster_sectors << 9)
720
        );
721

    
722
        *cluster_offset >>= 9;
723
        tmp = cpu_to_le32(*cluster_offset);
724
        l2_table[l2_index] = tmp;
725

    
726
        /* First of all we write grain itself, to avoid race condition
727
         * that may to corrupt the image.
728
         * This problem may occur because of insufficient space on host disk
729
         * or inappropriate VM shutdown.
730
         */
731
        if (get_whole_cluster(
732
                bs, extent, *cluster_offset, offset, allocate) == -1)
733
            return -1;
734

    
735
        if (m_data) {
736
            m_data->offset = tmp;
737
            m_data->l1_index = l1_index;
738
            m_data->l2_index = l2_index;
739
            m_data->l2_offset = l2_offset;
740
            m_data->valid = 1;
741
        }
742
    }
743
    *cluster_offset <<= 9;
744
    return 0;
745
}
746

    
747
static VmdkExtent *find_extent(BDRVVmdkState *s,
748
                                int64_t sector_num, VmdkExtent *start_hint)
749
{
750
    VmdkExtent *extent = start_hint;
751

    
752
    if (!extent) {
753
        extent = &s->extents[0];
754
    }
755
    while (extent < &s->extents[s->num_extents]) {
756
        if (sector_num < extent->end_sector) {
757
            return extent;
758
        }
759
        extent++;
760
    }
761
    return NULL;
762
}
763

    
764
static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
765
                             int nb_sectors, int *pnum)
766
{
767
    BDRVVmdkState *s = bs->opaque;
768
    int64_t index_in_cluster, n, ret;
769
    uint64_t offset;
770
    VmdkExtent *extent;
771

    
772
    extent = find_extent(s, sector_num, NULL);
773
    if (!extent) {
774
        return 0;
775
    }
776
    ret = get_cluster_offset(bs, extent, NULL,
777
                            sector_num * 512, 0, &offset);
778
    /* get_cluster_offset returning 0 means success */
779
    ret = !ret;
780

    
781
    index_in_cluster = sector_num % extent->cluster_sectors;
782
    n = extent->cluster_sectors - index_in_cluster;
783
    if (n > nb_sectors)
784
        n = nb_sectors;
785
    *pnum = n;
786
    return ret;
787
}
788

    
789
static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
790
                    uint8_t *buf, int nb_sectors)
791
{
792
    BDRVVmdkState *s = bs->opaque;
793
    int ret;
794
    uint64_t n, index_in_cluster;
795
    VmdkExtent *extent = NULL;
796
    uint64_t cluster_offset;
797

    
798
    while (nb_sectors > 0) {
799
        extent = find_extent(s, sector_num, extent);
800
        if (!extent) {
801
            return -EIO;
802
        }
803
        ret = get_cluster_offset(
804
                            bs, extent, NULL,
805
                            sector_num << 9, 0, &cluster_offset);
806
        index_in_cluster = sector_num % extent->cluster_sectors;
807
        n = extent->cluster_sectors - index_in_cluster;
808
        if (n > nb_sectors)
809
            n = nb_sectors;
810
        if (ret) {
811
            /* if not allocated, try to read from parent image, if exist */
812
            if (bs->backing_hd) {
813
                if (!vmdk_is_cid_valid(bs))
814
                    return -EINVAL;
815
                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
816
                if (ret < 0)
817
                    return ret;
818
            } else {
819
                memset(buf, 0, 512 * n);
820
            }
821
        } else {
822
            ret = bdrv_pread(extent->file,
823
                            cluster_offset + index_in_cluster * 512,
824
                            buf, n * 512);
825
            if (ret < 0) {
826
                return ret;
827
            }
828
        }
829
        nb_sectors -= n;
830
        sector_num += n;
831
        buf += n * 512;
832
    }
833
    return 0;
834
}
835

    
836
static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
837
                     const uint8_t *buf, int nb_sectors)
838
{
839
    BDRVVmdkState *s = bs->opaque;
840
    VmdkExtent *extent = NULL;
841
    int n, ret;
842
    int64_t index_in_cluster;
843
    uint64_t cluster_offset;
844
    VmdkMetaData m_data;
845

    
846
    if (sector_num > bs->total_sectors) {
847
        fprintf(stderr,
848
                "(VMDK) Wrong offset: sector_num=0x%" PRIx64
849
                " total_sectors=0x%" PRIx64 "\n",
850
                sector_num, bs->total_sectors);
851
        return -EIO;
852
    }
853

    
854
    while (nb_sectors > 0) {
855
        extent = find_extent(s, sector_num, extent);
856
        if (!extent) {
857
            return -EIO;
858
        }
859
        ret = get_cluster_offset(
860
                                bs,
861
                                extent,
862
                                &m_data,
863
                                sector_num << 9, 1,
864
                                &cluster_offset);
865
        if (ret) {
866
            return -EINVAL;
867
        }
868
        index_in_cluster = sector_num % extent->cluster_sectors;
869
        n = extent->cluster_sectors - index_in_cluster;
870
        if (n > nb_sectors) {
871
            n = nb_sectors;
872
        }
873

    
874
        ret = bdrv_pwrite(extent->file,
875
                        cluster_offset + index_in_cluster * 512,
876
                        buf,
877
                        n * 512);
878
        if (ret < 0) {
879
            return ret;
880
        }
881
        if (m_data.valid) {
882
            /* update L2 tables */
883
            if (vmdk_L2update(extent, &m_data) == -1) {
884
                return -EIO;
885
            }
886
        }
887
        nb_sectors -= n;
888
        sector_num += n;
889
        buf += n * 512;
890

    
891
        // update CID on the first write every time the virtual disk is opened
892
        if (!s->cid_updated) {
893
            vmdk_write_cid(bs, time(NULL));
894
            s->cid_updated = true;
895
        }
896
    }
897
    return 0;
898
}
899

    
900

    
901
static int vmdk_create_extent(const char *filename, int64_t filesize, bool flat)
902
{
903
    int ret, i;
904
    int fd = 0;
905
    VMDK4Header header;
906
    uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
907

    
908
    fd = open(
909
        filename,
910
        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
911
        0644);
912
    if (fd < 0) {
913
        return -errno;
914
    }
915
    if (flat) {
916
        ret = ftruncate(fd, filesize);
917
        if (ret < 0) {
918
            ret = -errno;
919
        }
920
        goto exit;
921
    }
922
    magic = cpu_to_be32(VMDK4_MAGIC);
923
    memset(&header, 0, sizeof(header));
924
    header.version = 1;
925
    header.flags = 3; /* ?? */
926
    header.capacity = filesize / 512;
927
    header.granularity = 128;
928
    header.num_gtes_per_gte = 512;
929

    
930
    grains = (filesize / 512 + header.granularity - 1) / header.granularity;
931
    gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
932
    gt_count =
933
        (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
934
    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
935

    
936
    header.desc_offset = 1;
937
    header.desc_size = 20;
938
    header.rgd_offset = header.desc_offset + header.desc_size;
939
    header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
940
    header.grain_offset =
941
       ((header.gd_offset + gd_size + (gt_size * gt_count) +
942
         header.granularity - 1) / header.granularity) *
943
        header.granularity;
944
    /* swap endianness for all header fields */
945
    header.version = cpu_to_le32(header.version);
946
    header.flags = cpu_to_le32(header.flags);
947
    header.capacity = cpu_to_le64(header.capacity);
948
    header.granularity = cpu_to_le64(header.granularity);
949
    header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
950
    header.desc_offset = cpu_to_le64(header.desc_offset);
951
    header.desc_size = cpu_to_le64(header.desc_size);
952
    header.rgd_offset = cpu_to_le64(header.rgd_offset);
953
    header.gd_offset = cpu_to_le64(header.gd_offset);
954
    header.grain_offset = cpu_to_le64(header.grain_offset);
955

    
956
    header.check_bytes[0] = 0xa;
957
    header.check_bytes[1] = 0x20;
958
    header.check_bytes[2] = 0xd;
959
    header.check_bytes[3] = 0xa;
960

    
961
    /* write all the data */
962
    ret = qemu_write_full(fd, &magic, sizeof(magic));
963
    if (ret != sizeof(magic)) {
964
        ret = -errno;
965
        goto exit;
966
    }
967
    ret = qemu_write_full(fd, &header, sizeof(header));
968
    if (ret != sizeof(header)) {
969
        ret = -errno;
970
        goto exit;
971
    }
972

    
973
    ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
974
    if (ret < 0) {
975
        ret = -errno;
976
        goto exit;
977
    }
978

    
979
    /* write grain directory */
980
    lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
981
    for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
982
         i < gt_count; i++, tmp += gt_size) {
983
        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
984
        if (ret != sizeof(tmp)) {
985
            ret = -errno;
986
            goto exit;
987
        }
988
    }
989

    
990
    /* write backup grain directory */
991
    lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
992
    for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
993
         i < gt_count; i++, tmp += gt_size) {
994
        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
995
        if (ret != sizeof(tmp)) {
996
            ret = -errno;
997
            goto exit;
998
        }
999
    }
1000

    
1001
    ret = 0;
1002
 exit:
1003
    close(fd);
1004
    return ret;
1005
}
1006

    
1007
static int filename_decompose(const char *filename, char *path, char *prefix,
1008
        char *postfix, size_t buf_len)
1009
{
1010
    const char *p, *q;
1011

    
1012
    if (filename == NULL || !strlen(filename)) {
1013
        fprintf(stderr, "Vmdk: no filename provided.\n");
1014
        return -1;
1015
    }
1016
    p = strrchr(filename, '/');
1017
    if (p == NULL) {
1018
        p = strrchr(filename, '\\');
1019
    }
1020
    if (p == NULL) {
1021
        p = strrchr(filename, ':');
1022
    }
1023
    if (p != NULL) {
1024
        p++;
1025
        if (p - filename >= buf_len) {
1026
            return -1;
1027
        }
1028
        pstrcpy(path, p - filename + 1, filename);
1029
    } else {
1030
        p = filename;
1031
        path[0] = '\0';
1032
    }
1033
    q = strrchr(p, '.');
1034
    if (q == NULL) {
1035
        pstrcpy(prefix, buf_len, p);
1036
        postfix[0] = '\0';
1037
    } else {
1038
        if (q - p >= buf_len) {
1039
            return -1;
1040
        }
1041
        pstrcpy(prefix, q - p + 1, p);
1042
        pstrcpy(postfix, buf_len, q);
1043
    }
1044
    return 0;
1045
}
1046

    
1047
static int relative_path(char *dest, int dest_size,
1048
        const char *base, const char *target)
1049
{
1050
    int i = 0;
1051
    int n = 0;
1052
    const char *p, *q;
1053
#ifdef _WIN32
1054
    const char *sep = "\\";
1055
#else
1056
    const char *sep = "/";
1057
#endif
1058

    
1059
    if (!(dest && base && target)) {
1060
        return -1;
1061
    }
1062
    if (path_is_absolute(target)) {
1063
        dest[dest_size - 1] = '\0';
1064
        strncpy(dest, target, dest_size - 1);
1065
        return 0;
1066
    }
1067
    while (base[i] == target[i]) {
1068
        i++;
1069
    }
1070
    p = &base[i];
1071
    q = &target[i];
1072
    while (*p) {
1073
        if (*p == *sep) {
1074
            n++;
1075
        }
1076
        p++;
1077
    }
1078
    dest[0] = '\0';
1079
    for (; n; n--) {
1080
        pstrcat(dest, dest_size, "..");
1081
        pstrcat(dest, dest_size, sep);
1082
    }
1083
    pstrcat(dest, dest_size, q);
1084
    return 0;
1085
}
1086

    
1087
static int vmdk_create(const char *filename, QEMUOptionParameter *options)
1088
{
1089
    int fd, idx = 0;
1090
    char desc[BUF_SIZE];
1091
    int64_t total_size = 0, filesize;
1092
    const char *backing_file = NULL;
1093
    const char *fmt = NULL;
1094
    int flags = 0;
1095
    int ret = 0;
1096
    bool flat, split;
1097
    char ext_desc_lines[BUF_SIZE] = "";
1098
    char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX];
1099
    const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
1100
    const char *desc_extent_line;
1101
    char parent_desc_line[BUF_SIZE] = "";
1102
    uint32_t parent_cid = 0xffffffff;
1103
    const char desc_template[] =
1104
        "# Disk DescriptorFile\n"
1105
        "version=1\n"
1106
        "CID=%x\n"
1107
        "parentCID=%x\n"
1108
        "createType=\"%s\"\n"
1109
        "%s"
1110
        "\n"
1111
        "# Extent description\n"
1112
        "%s"
1113
        "\n"
1114
        "# The Disk Data Base\n"
1115
        "#DDB\n"
1116
        "\n"
1117
        "ddb.virtualHWVersion = \"%d\"\n"
1118
        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
1119
        "ddb.geometry.heads = \"16\"\n"
1120
        "ddb.geometry.sectors = \"63\"\n"
1121
        "ddb.adapterType = \"ide\"\n";
1122

    
1123
    if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) {
1124
        return -EINVAL;
1125
    }
1126
    /* Read out options */
1127
    while (options && options->name) {
1128
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1129
            total_size = options->value.n;
1130
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
1131
            backing_file = options->value.s;
1132
        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
1133
            flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
1134
        } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
1135
            fmt = options->value.s;
1136
        }
1137
        options++;
1138
    }
1139
    if (!fmt) {
1140
        /* Default format to monolithicSparse */
1141
        fmt = "monolithicSparse";
1142
    } else if (strcmp(fmt, "monolithicFlat") &&
1143
               strcmp(fmt, "monolithicSparse") &&
1144
               strcmp(fmt, "twoGbMaxExtentSparse") &&
1145
               strcmp(fmt, "twoGbMaxExtentFlat")) {
1146
        fprintf(stderr, "VMDK: Unknown subformat: %s\n", fmt);
1147
        return -EINVAL;
1148
    }
1149
    split = !(strcmp(fmt, "twoGbMaxExtentFlat") &&
1150
              strcmp(fmt, "twoGbMaxExtentSparse"));
1151
    flat = !(strcmp(fmt, "monolithicFlat") &&
1152
             strcmp(fmt, "twoGbMaxExtentFlat"));
1153
    if (flat) {
1154
        desc_extent_line = "RW %lld FLAT \"%s\" 0\n";
1155
    } else {
1156
        desc_extent_line = "RW %lld SPARSE \"%s\"\n";
1157
    }
1158
    if (flat && backing_file) {
1159
        /* not supporting backing file for flat image */
1160
        return -ENOTSUP;
1161
    }
1162
    if (backing_file) {
1163
        char parent_filename[PATH_MAX];
1164
        BlockDriverState *bs = bdrv_new("");
1165
        ret = bdrv_open(bs, backing_file, 0, NULL);
1166
        if (ret != 0) {
1167
            bdrv_delete(bs);
1168
            return ret;
1169
        }
1170
        if (strcmp(bs->drv->format_name, "vmdk")) {
1171
            bdrv_delete(bs);
1172
            return -EINVAL;
1173
        }
1174
        filesize = bdrv_getlength(bs);
1175
        parent_cid = vmdk_read_cid(bs, 0);
1176
        bdrv_delete(bs);
1177
        relative_path(parent_filename, sizeof(parent_filename),
1178
                      filename, backing_file);
1179
        snprintf(parent_desc_line, sizeof(parent_desc_line),
1180
                "parentFileNameHint=\"%s\"", parent_filename);
1181
    }
1182

    
1183
    /* Create extents */
1184
    filesize = total_size;
1185
    while (filesize > 0) {
1186
        char desc_line[BUF_SIZE];
1187
        char ext_filename[PATH_MAX];
1188
        char desc_filename[PATH_MAX];
1189
        int64_t size = filesize;
1190

    
1191
        if (split && size > split_size) {
1192
            size = split_size;
1193
        }
1194
        if (split) {
1195
            snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s",
1196
                    prefix, flat ? 'f' : 's', ++idx, postfix);
1197
        } else if (flat) {
1198
            snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s",
1199
                    prefix, postfix);
1200
        } else {
1201
            snprintf(desc_filename, sizeof(desc_filename), "%s%s",
1202
                    prefix, postfix);
1203
        }
1204
        snprintf(ext_filename, sizeof(ext_filename), "%s%s",
1205
                path, desc_filename);
1206

    
1207
        if (vmdk_create_extent(ext_filename, size, flat)) {
1208
            return -EINVAL;
1209
        }
1210
        filesize -= size;
1211

    
1212
        /* Format description line */
1213
        snprintf(desc_line, sizeof(desc_line),
1214
                    desc_extent_line, size / 512, desc_filename);
1215
        pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line);
1216
    }
1217
    /* generate descriptor file */
1218
    snprintf(desc, sizeof(desc), desc_template,
1219
            (unsigned int)time(NULL),
1220
            parent_cid,
1221
            fmt,
1222
            parent_desc_line,
1223
            ext_desc_lines,
1224
            (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
1225
            total_size / (int64_t)(63 * 16 * 512));
1226
    if (split || flat) {
1227
        fd = open(
1228
                filename,
1229
                O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1230
                0644);
1231
    } else {
1232
        fd = open(
1233
                filename,
1234
                O_WRONLY | O_BINARY | O_LARGEFILE,
1235
                0644);
1236
    }
1237
    if (fd < 0) {
1238
        return -errno;
1239
    }
1240
    /* the descriptor offset = 0x200 */
1241
    if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) {
1242
        ret = -errno;
1243
        goto exit;
1244
    }
1245
    ret = qemu_write_full(fd, desc, strlen(desc));
1246
    if (ret != strlen(desc)) {
1247
        ret = -errno;
1248
        goto exit;
1249
    }
1250
    ret = 0;
1251
exit:
1252
    close(fd);
1253
    return ret;
1254
}
1255

    
1256
static void vmdk_close(BlockDriverState *bs)
1257
{
1258
    vmdk_free_extents(bs);
1259
}
1260

    
1261
static int vmdk_flush(BlockDriverState *bs)
1262
{
1263
    int i, ret, err;
1264
    BDRVVmdkState *s = bs->opaque;
1265

    
1266
    ret = bdrv_flush(bs->file);
1267
    for (i = 0; i < s->num_extents; i++) {
1268
        err = bdrv_flush(s->extents[i].file);
1269
        if (err < 0) {
1270
            ret = err;
1271
        }
1272
    }
1273
    return ret;
1274
}
1275

    
1276

    
1277
static QEMUOptionParameter vmdk_create_options[] = {
1278
    {
1279
        .name = BLOCK_OPT_SIZE,
1280
        .type = OPT_SIZE,
1281
        .help = "Virtual disk size"
1282
    },
1283
    {
1284
        .name = BLOCK_OPT_BACKING_FILE,
1285
        .type = OPT_STRING,
1286
        .help = "File name of a base image"
1287
    },
1288
    {
1289
        .name = BLOCK_OPT_COMPAT6,
1290
        .type = OPT_FLAG,
1291
        .help = "VMDK version 6 image"
1292
    },
1293
    {
1294
        .name = BLOCK_OPT_SUBFMT,
1295
        .type = OPT_STRING,
1296
        .help =
1297
            "VMDK flat extent format, can be one of "
1298
            "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat} "
1299
    },
1300
    { NULL }
1301
};
1302

    
1303
static BlockDriver bdrv_vmdk = {
1304
    .format_name        = "vmdk",
1305
    .instance_size        = sizeof(BDRVVmdkState),
1306
    .bdrv_probe                = vmdk_probe,
1307
    .bdrv_open      = vmdk_open,
1308
    .bdrv_read                = vmdk_read,
1309
    .bdrv_write                = vmdk_write,
1310
    .bdrv_close                = vmdk_close,
1311
    .bdrv_create        = vmdk_create,
1312
    .bdrv_flush                = vmdk_flush,
1313
    .bdrv_is_allocated        = vmdk_is_allocated,
1314

    
1315
    .create_options = vmdk_create_options,
1316
};
1317

    
1318
static void bdrv_vmdk_init(void)
1319
{
1320
    bdrv_register(&bdrv_vmdk);
1321
}
1322

    
1323
block_init(bdrv_vmdk_init);