Statistics
| Branch: | Revision:

root / block-vmdk.c @ e96efcfc

History | View | Annotate | Download (22.9 kB)

1
/*
2
 * Block driver for the VMDK format
3
 * 
4
 * Copyright (c) 2004 Fabrice Bellard
5
 * Copyright (c) 2005 Filip Navara
6
 * 
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy
8
 * of this software and associated documentation files (the "Software"), to deal
9
 * in the Software without restriction, including without limitation the rights
10
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the Software is
12
 * furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
 * THE SOFTWARE.
24
 */
25

    
26
#include "vl.h"
27
#include "block_int.h"
28

    
29
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
30
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
31

    
32
typedef struct {
33
    uint32_t version;
34
    uint32_t flags;
35
    uint32_t disk_sectors;
36
    uint32_t granularity;
37
    uint32_t l1dir_offset;
38
    uint32_t l1dir_size;
39
    uint32_t file_sectors;
40
    uint32_t cylinders;
41
    uint32_t heads;
42
    uint32_t sectors_per_track;
43
} VMDK3Header;
44

    
45
typedef struct {
46
    uint32_t version;
47
    uint32_t flags;
48
    int64_t capacity;
49
    int64_t granularity;
50
    int64_t desc_offset;
51
    int64_t desc_size;
52
    int32_t num_gtes_per_gte;
53
    int64_t rgd_offset;
54
    int64_t gd_offset;
55
    int64_t grain_offset;
56
    char filler[1];
57
    char check_bytes[4];
58
} __attribute__((packed)) VMDK4Header;
59

    
60
#define L2_CACHE_SIZE 16
61

    
62
typedef struct BDRVVmdkState {
63
    BlockDriverState *hd;
64
    int64_t l1_table_offset;
65
    int64_t l1_backup_table_offset;
66
    uint32_t *l1_table;
67
    uint32_t *l1_backup_table;
68
    unsigned int l1_size;
69
    uint32_t l1_entry_sectors;
70

    
71
    unsigned int l2_size;
72
    uint32_t *l2_cache;
73
    uint32_t l2_cache_offsets[L2_CACHE_SIZE];
74
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
75

    
76
    unsigned int cluster_sectors;
77
    uint32_t parent_cid;
78
} BDRVVmdkState;
79

    
80
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
81
{
82
    uint32_t magic;
83

    
84
    if (buf_size < 4)
85
        return 0;
86
    magic = be32_to_cpu(*(uint32_t *)buf);
87
    if (magic == VMDK3_MAGIC ||
88
        magic == VMDK4_MAGIC)
89
        return 100;
90
    else
91
        return 0;
92
}
93

    
94
#define CHECK_CID 1
95

    
96
#define SECTOR_SIZE 512                                
97
#define DESC_SIZE 20*SECTOR_SIZE        // 20 sectors of 512 bytes each
98
#define HEADER_SIZE 512                           // first sector of 512 bytes 
99

    
100
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
101
{
102
    BDRVVmdkState *s = bs->opaque;
103
    char desc[DESC_SIZE];
104
    uint32_t cid;
105
    char *p_name, *cid_str; 
106
    size_t cid_str_size;
107

    
108
    /* the descriptor offset = 0x200 */
109
    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
110
        return 0;
111

    
112
    if (parent) {
113
        cid_str = "parentCID";
114
        cid_str_size = sizeof("parentCID");
115
    } else {
116
        cid_str = "CID";
117
        cid_str_size = sizeof("CID");
118
    }
119

    
120
    if ((p_name = strstr(desc,cid_str)) != 0) {
121
        p_name += cid_str_size;
122
        sscanf(p_name,"%x",&cid);
123
    }
124

    
125
    return cid;
126
}
127

    
128
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
129
{
130
    BDRVVmdkState *s = bs->opaque;
131
    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
132
    char *p_name, *tmp_str;
133

    
134
    /* the descriptor offset = 0x200 */
135
    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
136
        return -1;
137

    
138
    tmp_str = strstr(desc,"parentCID");
139
    strcpy(tmp_desc, tmp_str);
140
    if ((p_name = strstr(desc,"CID")) != 0) {
141
        p_name += sizeof("CID");
142
        sprintf(p_name,"%x\n",cid);
143
        strcat(desc,tmp_desc);
144
    }
145

    
146
    if (bdrv_pwrite(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
147
        return -1;
148
    return 0;
149
}
150

    
151
static int vmdk_is_cid_valid(BlockDriverState *bs)
152
{
153
#ifdef CHECK_CID
154
    BDRVVmdkState *s = bs->opaque;
155
    BlockDriverState *p_bs = s->hd->backing_hd;
156
    uint32_t cur_pcid;
157

    
158
    if (p_bs) {
159
        cur_pcid = vmdk_read_cid(p_bs,0);
160
        if (s->parent_cid != cur_pcid)
161
            // CID not valid
162
            return 0;
163
    }
164
#endif
165
    // CID valid
166
    return 1;
167
}
168

    
169
static int vmdk_snapshot_create(const char *filename, const char *backing_file)
170
{
171
    int snp_fd, p_fd;
172
    uint32_t p_cid;
173
    char *p_name, *gd_buf, *rgd_buf; 
174
    const char *real_filename, *temp_str;
175
    VMDK4Header header;
176
    uint32_t gde_entries, gd_size;
177
    int64_t gd_offset, rgd_offset, capacity, gt_size;
178
    char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
179
    char *desc_template =
180
    "# Disk DescriptorFile\n"
181
    "version=1\n"
182
    "CID=%x\n"
183
    "parentCID=%x\n"
184
    "createType=\"monolithicSparse\"\n"
185
    "parentFileNameHint=\"%s\"\n"
186
    "\n"
187
    "# Extent description\n"
188
    "RW %lu SPARSE \"%s\"\n"
189
    "\n"
190
    "# The Disk Data Base \n"
191
    "#DDB\n"
192
    "\n";
193

    
194
    snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
195
    if (snp_fd < 0)
196
        return -1;
197
    p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
198
    if (p_fd < 0) {
199
        close(snp_fd);
200
        return -1;
201
    }
202

    
203
    /* read the header */
204
    if (lseek(p_fd, 0x0, SEEK_SET) == -1)
205
        goto fail;
206
    if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE)
207
        goto fail;
208

    
209
    /* write the header */
210
    if (lseek(snp_fd, 0x0, SEEK_SET) == -1)
211
        goto fail;
212
    if (write(snp_fd, hdr, HEADER_SIZE) == -1)
213
        goto fail;
214

    
215
    memset(&header, 0, sizeof(header));
216
    memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
217

    
218
    ftruncate(snp_fd, header.grain_offset << 9);
219
    /* the descriptor offset = 0x200 */
220
    if (lseek(p_fd, 0x200, SEEK_SET) == -1)
221
        goto fail;
222
    if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE)
223
        goto fail;
224

    
225
    if ((p_name = strstr(p_desc,"CID")) != 0) {
226
        p_name += sizeof("CID");
227
        sscanf(p_name,"%x",&p_cid);
228
    }
229

    
230
    real_filename = filename;
231
    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
232
        real_filename = temp_str + 1;
233
    if ((temp_str = strrchr(real_filename, '/')) != NULL)
234
        real_filename = temp_str + 1;
235
    if ((temp_str = strrchr(real_filename, ':')) != NULL)
236
        real_filename = temp_str + 1;
237

    
238
    sprintf(s_desc, desc_template, p_cid, p_cid, backing_file
239
            , (uint32_t)header.capacity, real_filename);
240

    
241
    /* write the descriptor */
242
    if (lseek(snp_fd, 0x200, SEEK_SET) == -1)
243
        goto fail;
244
    if (write(snp_fd, s_desc, strlen(s_desc)) == -1)
245
        goto fail;
246

    
247
    gd_offset = header.gd_offset * SECTOR_SIZE;     // offset of GD table
248
    rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
249
    capacity = header.capacity * SECTOR_SIZE;       // Extent size
250
    /*
251
     * Each GDE span 32M disk, means:
252
     * 512 GTE per GT, each GTE points to grain
253
     */
254
    gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
255
    if (!gt_size)
256
        goto fail;
257
    gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde 
258
    gd_size = gde_entries * sizeof(uint32_t);
259

    
260
    /* write RGD */
261
    rgd_buf = qemu_malloc(gd_size);
262
    if (!rgd_buf)
263
        goto fail;
264
    if (lseek(p_fd, rgd_offset, SEEK_SET) == -1)
265
        goto fail_rgd;
266
    if (read(p_fd, rgd_buf, gd_size) != gd_size)
267
        goto fail_rgd;
268
    if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1)
269
        goto fail_rgd;
270
    if (write(snp_fd, rgd_buf, gd_size) == -1)
271
        goto fail_rgd;
272
    qemu_free(rgd_buf);
273

    
274
    /* write GD */
275
    gd_buf = qemu_malloc(gd_size);
276
    if (!gd_buf)
277
        goto fail_rgd;
278
    if (lseek(p_fd, gd_offset, SEEK_SET) == -1)
279
        goto fail_gd;
280
    if (read(p_fd, gd_buf, gd_size) != gd_size)
281
        goto fail_gd;
282
    if (lseek(snp_fd, gd_offset, SEEK_SET) == -1)
283
        goto fail_gd;
284
    if (write(snp_fd, gd_buf, gd_size) == -1)
285
        goto fail_gd;
286
    qemu_free(gd_buf);
287

    
288
    close(p_fd);
289
    close(snp_fd);
290
    return 0;
291

    
292
    fail_gd:
293
    qemu_free(gd_buf);
294
    fail_rgd:   
295
    qemu_free(rgd_buf);
296
    fail:
297
    close(p_fd);
298
    close(snp_fd);
299
    return -1;
300
}
301

    
302
static void vmdk_parent_close(BlockDriverState *bs)
303
{
304
    if (bs->backing_hd)
305
        bdrv_close(bs->backing_hd);
306
}
307

    
308

    
309
static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
310
{
311
    BDRVVmdkState *s = bs->opaque;
312
    char *p_name; 
313
    char desc[DESC_SIZE];
314
    char parent_img_name[1024];
315

    
316
    /* the descriptor offset = 0x200 */
317
    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
318
        return -1;
319

    
320
    if ((p_name = strstr(desc,"parentFileNameHint")) != 0) {
321
        char *end_name;
322
        struct stat file_buf;
323

    
324
        p_name += sizeof("parentFileNameHint") + 1;
325
        if ((end_name = strchr(p_name,'\"')) == 0)
326
            return -1;
327
                
328
        strncpy(s->hd->backing_file, p_name, end_name - p_name);
329
        if (stat(s->hd->backing_file, &file_buf) != 0) {
330
            path_combine(parent_img_name, sizeof(parent_img_name),
331
                         filename, s->hd->backing_file);
332
        } else {
333
            strcpy(parent_img_name, s->hd->backing_file);
334
        }
335

    
336
        s->hd->backing_hd = bdrv_new("");
337
        if (!s->hd->backing_hd) {
338
            failure:
339
            bdrv_close(s->hd);
340
            return -1;
341
        }
342
        if (bdrv_open(s->hd->backing_hd, parent_img_name, 0) < 0)
343
            goto failure;
344
    }
345

    
346
    return 0;
347
}
348

    
349
static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
350
{
351
    BDRVVmdkState *s = bs->opaque;
352
    uint32_t magic;
353
    int l1_size, i, ret;
354

    
355
    ret = bdrv_file_open(&s->hd, filename, flags);
356
    if (ret < 0)
357
        return ret;
358
    if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic))
359
        goto fail;
360

    
361
    magic = be32_to_cpu(magic);
362
    if (magic == VMDK3_MAGIC) {
363
        VMDK3Header header;
364

    
365
        if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
366
            goto fail;
367
        s->cluster_sectors = le32_to_cpu(header.granularity);
368
        s->l2_size = 1 << 9;
369
        s->l1_size = 1 << 6;
370
        bs->total_sectors = le32_to_cpu(header.disk_sectors);
371
        s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
372
        s->l1_backup_table_offset = 0;
373
        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
374
    } else if (magic == VMDK4_MAGIC) {
375
        VMDK4Header header;
376

    
377
        if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
378
            goto fail;
379
        bs->total_sectors = le64_to_cpu(header.capacity);
380
        s->cluster_sectors = le64_to_cpu(header.granularity);
381
        s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
382
        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
383
        if (s->l1_entry_sectors <= 0)
384
            goto fail;
385
        s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1) 
386
            / s->l1_entry_sectors;
387
        s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
388
        s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
389

    
390
        // try to open parent images, if exist
391
        if (vmdk_parent_open(bs, filename) != 0)
392
            goto fail;
393
        // write the CID once after the image creation
394
        s->parent_cid = vmdk_read_cid(bs,1);
395
    } else {
396
        goto fail;
397
    }
398

    
399
    /* read the L1 table */
400
    l1_size = s->l1_size * sizeof(uint32_t);
401
    s->l1_table = qemu_malloc(l1_size);
402
    if (!s->l1_table)
403
        goto fail;
404
    if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
405
        goto fail;
406
    for(i = 0; i < s->l1_size; i++) {
407
        le32_to_cpus(&s->l1_table[i]);
408
    }
409

    
410
    if (s->l1_backup_table_offset) {
411
        s->l1_backup_table = qemu_malloc(l1_size);
412
        if (!s->l1_backup_table)
413
            goto fail;
414
        if (bdrv_pread(s->hd, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
415
            goto fail;
416
        for(i = 0; i < s->l1_size; i++) {
417
            le32_to_cpus(&s->l1_backup_table[i]);
418
        }
419
    }
420

    
421
    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
422
    if (!s->l2_cache)
423
        goto fail;
424
    return 0;
425
 fail:
426
    qemu_free(s->l1_backup_table);
427
    qemu_free(s->l1_table);
428
    qemu_free(s->l2_cache);
429
    bdrv_delete(s->hd);
430
    return -1;
431
}
432

    
433
static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate);
434

    
435
static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
436
                             uint64_t offset, int allocate)
437
{
438
    uint64_t parent_cluster_offset;
439
    BDRVVmdkState *s = bs->opaque;
440
    uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes each = grain size 64KB
441

    
442
    // we will be here if it's first write on non-exist grain(cluster).
443
    // try to read from parent image, if exist
444
    if (s->hd->backing_hd) {
445
        BDRVVmdkState *ps = s->hd->backing_hd->opaque;
446

    
447
        if (!vmdk_is_cid_valid(bs))
448
            return -1;
449
        parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, offset, allocate);
450
        if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != 
451
                                                                            ps->cluster_sectors*512)
452
            return -1;
453

    
454
        if (bdrv_pwrite(s->hd, cluster_offset << 9, whole_grain, sizeof(whole_grain)) != 
455
                                                                            sizeof(whole_grain))
456
            return -1;
457
    }
458
    return 0;
459
}
460

    
461
static uint64_t get_cluster_offset(BlockDriverState *bs,
462
                                   uint64_t offset, int allocate)
463
{
464
    BDRVVmdkState *s = bs->opaque;
465
    unsigned int l1_index, l2_offset, l2_index;
466
    int min_index, i, j;
467
    uint32_t min_count, *l2_table, tmp;
468
    uint64_t cluster_offset;
469
    
470
    l1_index = (offset >> 9) / s->l1_entry_sectors;
471
    if (l1_index >= s->l1_size)
472
        return 0;
473
    l2_offset = s->l1_table[l1_index];
474
    if (!l2_offset)
475
        return 0;
476
    for(i = 0; i < L2_CACHE_SIZE; i++) {
477
        if (l2_offset == s->l2_cache_offsets[i]) {
478
            /* increment the hit count */
479
            if (++s->l2_cache_counts[i] == 0xffffffff) {
480
                for(j = 0; j < L2_CACHE_SIZE; j++) {
481
                    s->l2_cache_counts[j] >>= 1;
482
                }
483
            }
484
            l2_table = s->l2_cache + (i * s->l2_size);
485
            goto found;
486
        }
487
    }
488
    /* not found: load a new entry in the least used one */
489
    min_index = 0;
490
    min_count = 0xffffffff;
491
    for(i = 0; i < L2_CACHE_SIZE; i++) {
492
        if (s->l2_cache_counts[i] < min_count) {
493
            min_count = s->l2_cache_counts[i];
494
            min_index = i;
495
        }
496
    }
497
    l2_table = s->l2_cache + (min_index * s->l2_size);
498
    if (bdrv_pread(s->hd, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) != 
499
                                                                        s->l2_size * sizeof(uint32_t))
500
        return 0;
501

    
502
    s->l2_cache_offsets[min_index] = l2_offset;
503
    s->l2_cache_counts[min_index] = 1;
504
 found:
505
    l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
506
    cluster_offset = le32_to_cpu(l2_table[l2_index]);
507
    if (!cluster_offset) {
508
        struct stat file_buf;
509

    
510
        if (!allocate)
511
            return 0;
512
        stat(s->hd->filename, &file_buf);
513
        cluster_offset = file_buf.st_size;
514
        bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
515

    
516
        cluster_offset >>= 9;
517
        /* update L2 table */
518
        tmp = cpu_to_le32(cluster_offset);
519
        l2_table[l2_index] = tmp;
520
        if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)), 
521
                        &tmp, sizeof(tmp)) != sizeof(tmp))
522
            return 0;
523
        /* update backup L2 table */
524
        if (s->l1_backup_table_offset != 0) {
525
            l2_offset = s->l1_backup_table[l1_index];
526
            if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)), 
527
                            &tmp, sizeof(tmp)) != sizeof(tmp))
528
                return 0;
529
        }
530

    
531
        if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
532
            return 0;
533
    }
534
    cluster_offset <<= 9;
535
    return cluster_offset;
536
}
537

    
538
static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, 
539
                             int nb_sectors, int *pnum)
540
{
541
    BDRVVmdkState *s = bs->opaque;
542
    int index_in_cluster, n;
543
    uint64_t cluster_offset;
544

    
545
    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
546
    index_in_cluster = sector_num % s->cluster_sectors;
547
    n = s->cluster_sectors - index_in_cluster;
548
    if (n > nb_sectors)
549
        n = nb_sectors;
550
    *pnum = n;
551
    return (cluster_offset != 0);
552
}
553

    
554
static int vmdk_read(BlockDriverState *bs, int64_t sector_num, 
555
                    uint8_t *buf, int nb_sectors)
556
{
557
    BDRVVmdkState *s = bs->opaque;
558
    int index_in_cluster, n, ret;
559
    uint64_t cluster_offset;
560

    
561
    while (nb_sectors > 0) {
562
        cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
563
        index_in_cluster = sector_num % s->cluster_sectors;
564
        n = s->cluster_sectors - index_in_cluster;
565
        if (n > nb_sectors)
566
            n = nb_sectors;
567
        if (!cluster_offset) {
568
            // try to read from parent image, if exist
569
            if (s->hd->backing_hd) {
570
                if (!vmdk_is_cid_valid(bs))
571
                    return -1;
572
                ret = bdrv_read(s->hd->backing_hd, sector_num, buf, n);
573
                if (ret < 0)
574
                    return -1;
575
            } else {
576
                memset(buf, 0, 512 * n);
577
            }
578
        } else {
579
            if(bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
580
                return -1;
581
        }
582
        nb_sectors -= n;
583
        sector_num += n;
584
        buf += n * 512;
585
    }
586
    return 0;
587
}
588

    
589
static int vmdk_write(BlockDriverState *bs, int64_t sector_num, 
590
                     const uint8_t *buf, int nb_sectors)
591
{
592
    BDRVVmdkState *s = bs->opaque;
593
    int index_in_cluster, n;
594
    uint64_t cluster_offset;
595
    static int cid_update = 0;
596

    
597
    while (nb_sectors > 0) {
598
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
599
        n = s->cluster_sectors - index_in_cluster;
600
        if (n > nb_sectors)
601
            n = nb_sectors;
602
        cluster_offset = get_cluster_offset(bs, sector_num << 9, 1);
603
        if (!cluster_offset)
604
            return -1;
605
        if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
606
            return -1;
607
        nb_sectors -= n;
608
        sector_num += n;
609
        buf += n * 512;
610

    
611
        // update CID on the first write every time the virtual disk is opened
612
        if (!cid_update) {
613
            vmdk_write_cid(bs, time(NULL));
614
            cid_update++;
615
        }
616
    }
617
    return 0;
618
}
619

    
620
static int vmdk_create(const char *filename, int64_t total_size,
621
                       const char *backing_file, int flags)
622
{
623
    int fd, i;
624
    VMDK4Header header;
625
    uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
626
    char *desc_template =
627
        "# Disk DescriptorFile\n"
628
        "version=1\n"
629
        "CID=%x\n"
630
        "parentCID=ffffffff\n"
631
        "createType=\"monolithicSparse\"\n"
632
        "\n"
633
        "# Extent description\n"
634
        "RW %lu SPARSE \"%s\"\n"
635
        "\n"
636
        "# The Disk Data Base \n"
637
        "#DDB\n"
638
        "\n"
639
        "ddb.virtualHWVersion = \"4\"\n"
640
        "ddb.geometry.cylinders = \"%lu\"\n"
641
        "ddb.geometry.heads = \"16\"\n"
642
        "ddb.geometry.sectors = \"63\"\n"
643
        "ddb.adapterType = \"ide\"\n";
644
    char desc[1024];
645
    const char *real_filename, *temp_str;
646

    
647
    /* XXX: add support for backing file */
648
    if (backing_file) {
649
        return vmdk_snapshot_create(filename, backing_file);
650
    }
651

    
652
    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
653
              0644);
654
    if (fd < 0)
655
        return -1;
656
    magic = cpu_to_be32(VMDK4_MAGIC);
657
    memset(&header, 0, sizeof(header));
658
    header.version = cpu_to_le32(1);
659
    header.flags = cpu_to_le32(3); /* ?? */
660
    header.capacity = cpu_to_le64(total_size);
661
    header.granularity = cpu_to_le64(128);
662
    header.num_gtes_per_gte = cpu_to_le32(512);
663

    
664
    grains = (total_size + header.granularity - 1) / header.granularity;
665
    gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
666
    gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
667
    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
668

    
669
    header.desc_offset = 1;
670
    header.desc_size = 20;
671
    header.rgd_offset = header.desc_offset + header.desc_size;
672
    header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
673
    header.grain_offset =
674
       ((header.gd_offset + gd_size + (gt_size * gt_count) +
675
         header.granularity - 1) / header.granularity) *
676
        header.granularity;
677

    
678
    header.desc_offset = cpu_to_le64(header.desc_offset);
679
    header.desc_size = cpu_to_le64(header.desc_size);
680
    header.rgd_offset = cpu_to_le64(header.rgd_offset);
681
    header.gd_offset = cpu_to_le64(header.gd_offset);
682
    header.grain_offset = cpu_to_le64(header.grain_offset);
683

    
684
    header.check_bytes[0] = 0xa;
685
    header.check_bytes[1] = 0x20;
686
    header.check_bytes[2] = 0xd;
687
    header.check_bytes[3] = 0xa;
688
    
689
    /* write all the data */    
690
    write(fd, &magic, sizeof(magic));
691
    write(fd, &header, sizeof(header));
692

    
693
    ftruncate(fd, header.grain_offset << 9);
694

    
695
    /* write grain directory */
696
    lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
697
    for (i = 0, tmp = header.rgd_offset + gd_size;
698
         i < gt_count; i++, tmp += gt_size)
699
        write(fd, &tmp, sizeof(tmp));
700
   
701
    /* write backup grain directory */
702
    lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
703
    for (i = 0, tmp = header.gd_offset + gd_size;
704
         i < gt_count; i++, tmp += gt_size)
705
        write(fd, &tmp, sizeof(tmp));
706

    
707
    /* compose the descriptor */
708
    real_filename = filename;
709
    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
710
        real_filename = temp_str + 1;
711
    if ((temp_str = strrchr(real_filename, '/')) != NULL)
712
        real_filename = temp_str + 1;
713
    if ((temp_str = strrchr(real_filename, ':')) != NULL)
714
        real_filename = temp_str + 1;
715
    sprintf(desc, desc_template, time(NULL), (unsigned long)total_size,
716
            real_filename, total_size / (63 * 16));
717

    
718
    /* write the descriptor */
719
    lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
720
    write(fd, desc, strlen(desc));
721

    
722
    close(fd);
723
    return 0;
724
}
725

    
726
static void vmdk_close(BlockDriverState *bs)
727
{
728
    BDRVVmdkState *s = bs->opaque;
729

    
730
    qemu_free(s->l1_table);
731
    qemu_free(s->l2_cache);
732
    bdrv_delete(s->hd);
733
    // try to close parent image, if exist
734
    vmdk_parent_close(s->hd);
735
}
736

    
737
static void vmdk_flush(BlockDriverState *bs)
738
{
739
    BDRVVmdkState *s = bs->opaque;
740
    bdrv_flush(s->hd);
741
}
742

    
743
BlockDriver bdrv_vmdk = {
744
    "vmdk",
745
    sizeof(BDRVVmdkState),
746
    vmdk_probe,
747
    vmdk_open,
748
    vmdk_read,
749
    vmdk_write,
750
    vmdk_close,
751
    vmdk_create,
752
    vmdk_flush,
753
    vmdk_is_allocated,
754
};