Revision b3976d3c

b/block/vmdk.c
60 60

  
61 61
#define L2_CACHE_SIZE 16
62 62

  
63
typedef struct BDRVVmdkState {
63
typedef struct VmdkExtent {
64
    BlockDriverState *file;
65
    bool flat;
66
    int64_t sectors;
67
    int64_t end_sector;
64 68
    int64_t l1_table_offset;
65 69
    int64_t l1_backup_table_offset;
66 70
    uint32_t *l1_table;
......
74 78
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
75 79

  
76 80
    unsigned int cluster_sectors;
81
} VmdkExtent;
82

  
83
typedef struct BDRVVmdkState {
77 84
    uint32_t parent_cid;
85
    int num_extents;
86
    /* Extent array with num_extents entries, ascend ordered by address */
87
    VmdkExtent *extents;
78 88
} BDRVVmdkState;
79 89

  
80 90
typedef struct VmdkMetaData {
......
105 115
#define DESC_SIZE 20*SECTOR_SIZE	// 20 sectors of 512 bytes each
106 116
#define HEADER_SIZE 512   			// first sector of 512 bytes
107 117

  
118
static void vmdk_free_extents(BlockDriverState *bs)
119
{
120
    int i;
121
    BDRVVmdkState *s = bs->opaque;
122

  
123
    for (i = 0; i < s->num_extents; i++) {
124
        qemu_free(s->extents[i].l1_table);
125
        qemu_free(s->extents[i].l2_cache);
126
        qemu_free(s->extents[i].l1_backup_table);
127
    }
128
    qemu_free(s->extents);
129
}
130

  
108 131
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
109 132
{
110 133
    char desc[DESC_SIZE];
......
358 381
    return 0;
359 382
}
360 383

  
384
/* Create and append extent to the extent array. Return the added VmdkExtent
385
 * address. return NULL if allocation failed. */
386
static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
387
                           BlockDriverState *file, bool flat, int64_t sectors,
388
                           int64_t l1_offset, int64_t l1_backup_offset,
389
                           uint32_t l1_size,
390
                           int l2_size, unsigned int cluster_sectors)
391
{
392
    VmdkExtent *extent;
393
    BDRVVmdkState *s = bs->opaque;
394

  
395
    s->extents = qemu_realloc(s->extents,
396
                              (s->num_extents + 1) * sizeof(VmdkExtent));
397
    extent = &s->extents[s->num_extents];
398
    s->num_extents++;
399

  
400
    memset(extent, 0, sizeof(VmdkExtent));
401
    extent->file = file;
402
    extent->flat = flat;
403
    extent->sectors = sectors;
404
    extent->l1_table_offset = l1_offset;
405
    extent->l1_backup_table_offset = l1_backup_offset;
406
    extent->l1_size = l1_size;
407
    extent->l1_entry_sectors = l2_size * cluster_sectors;
408
    extent->l2_size = l2_size;
409
    extent->cluster_sectors = cluster_sectors;
410

  
411
    if (s->num_extents > 1) {
412
        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
413
    } else {
414
        extent->end_sector = extent->sectors;
415
    }
416
    bs->total_sectors = extent->end_sector;
417
    return extent;
418
}
419

  
420

  
361 421
static int vmdk_open(BlockDriverState *bs, int flags)
362 422
{
363 423
    BDRVVmdkState *s = bs->opaque;
364 424
    uint32_t magic;
365
    int l1_size, i;
425
    int i;
426
    uint32_t l1_size, l1_entry_sectors;
427
    VmdkExtent *extent = NULL;
366 428

  
367 429
    if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
368 430
        goto fail;
......
370 432
    magic = be32_to_cpu(magic);
371 433
    if (magic == VMDK3_MAGIC) {
372 434
        VMDK3Header header;
373

  
374
        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
435
        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
436
                != sizeof(header)) {
375 437
            goto fail;
376
        s->cluster_sectors = le32_to_cpu(header.granularity);
377
        s->l2_size = 1 << 9;
378
        s->l1_size = 1 << 6;
379
        bs->total_sectors = le32_to_cpu(header.disk_sectors);
380
        s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
381
        s->l1_backup_table_offset = 0;
382
        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
438
        }
439
        extent = vmdk_add_extent(bs, bs->file, false,
440
                              le32_to_cpu(header.disk_sectors),
441
                              le32_to_cpu(header.l1dir_offset) << 9, 0,
442
                              1 << 6, 1 << 9, le32_to_cpu(header.granularity));
383 443
    } else if (magic == VMDK4_MAGIC) {
384 444
        VMDK4Header header;
385

  
386
        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
445
        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
446
                != sizeof(header)) {
387 447
            goto fail;
388
        bs->total_sectors = le64_to_cpu(header.capacity);
389
        s->cluster_sectors = le64_to_cpu(header.granularity);
390
        s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
391
        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
392
        if (s->l1_entry_sectors <= 0)
448
        }
449
        l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
450
                            * le64_to_cpu(header.granularity);
451
        l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
452
                    / l1_entry_sectors;
453
        extent = vmdk_add_extent(bs, bs->file, false,
454
                              le64_to_cpu(header.capacity),
455
                              le64_to_cpu(header.gd_offset) << 9,
456
                              le64_to_cpu(header.rgd_offset) << 9,
457
                              l1_size,
458
                              le32_to_cpu(header.num_gtes_per_gte),
459
                              le64_to_cpu(header.granularity));
460
        if (extent->l1_entry_sectors <= 0) {
393 461
            goto fail;
394
        s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
395
            / s->l1_entry_sectors;
396
        s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
397
        s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
398

  
462
        }
399 463
        // try to open parent images, if exist
400 464
        if (vmdk_parent_open(bs) != 0)
401 465
            goto fail;
......
406 470
    }
407 471

  
408 472
    /* read the L1 table */
409
    l1_size = s->l1_size * sizeof(uint32_t);
410
    s->l1_table = qemu_malloc(l1_size);
411
    if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
473
    l1_size = extent->l1_size * sizeof(uint32_t);
474
    extent->l1_table = qemu_malloc(l1_size);
475
    if (bdrv_pread(bs->file,
476
            extent->l1_table_offset,
477
            extent->l1_table,
478
            l1_size)
479
        != l1_size) {
412 480
        goto fail;
413
    for(i = 0; i < s->l1_size; i++) {
414
        le32_to_cpus(&s->l1_table[i]);
481
    }
482
    for (i = 0; i < extent->l1_size; i++) {
483
        le32_to_cpus(&extent->l1_table[i]);
415 484
    }
416 485

  
417
    if (s->l1_backup_table_offset) {
418
        s->l1_backup_table = qemu_malloc(l1_size);
419
        if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
486
    if (extent->l1_backup_table_offset) {
487
        extent->l1_backup_table = qemu_malloc(l1_size);
488
        if (bdrv_pread(bs->file,
489
                    extent->l1_backup_table_offset,
490
                    extent->l1_backup_table,
491
                    l1_size)
492
                != l1_size) {
420 493
            goto fail;
421
        for(i = 0; i < s->l1_size; i++) {
422
            le32_to_cpus(&s->l1_backup_table[i]);
494
        }
495
        for (i = 0; i < extent->l1_size; i++) {
496
            le32_to_cpus(&extent->l1_backup_table[i]);
423 497
        }
424 498
    }
425 499

  
426
    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
500
    extent->l2_cache =
501
        qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
427 502
    return 0;
428 503
 fail:
429
    qemu_free(s->l1_backup_table);
430
    qemu_free(s->l1_table);
431
    qemu_free(s->l2_cache);
504
    vmdk_free_extents(bs);
432 505
    return -1;
433 506
}
434 507

  
435
static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
436
                                   uint64_t offset, int allocate);
437

  
438
static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
439
                             uint64_t offset, int allocate)
508
static int get_whole_cluster(BlockDriverState *bs,
509
                VmdkExtent *extent,
510
                uint64_t cluster_offset,
511
                uint64_t offset,
512
                bool allocate)
440 513
{
441
    BDRVVmdkState *s = bs->opaque;
442
    uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes each = grain size 64KB
514
    /* 128 sectors * 512 bytes each = grain size 64KB */
515
    uint8_t  whole_grain[extent->cluster_sectors * 512];
443 516

  
444 517
    // we will be here if it's first write on non-exist grain(cluster).
445 518
    // try to read from parent image, if exist
......
450 523
            return -1;
451 524

  
452 525
        ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
453
            s->cluster_sectors);
526
                extent->cluster_sectors);
454 527
        if (ret < 0) {
455 528
            return -1;
456 529
        }
457 530

  
458 531
        //Write grain only into the active image
459
        ret = bdrv_write(bs->file, cluster_offset, whole_grain,
460
            s->cluster_sectors);
532
        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
533
                extent->cluster_sectors);
461 534
        if (ret < 0) {
462 535
            return -1;
463 536
        }
......
465 538
    return 0;
466 539
}
467 540

  
468
static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
541
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
469 542
{
470
    BDRVVmdkState *s = bs->opaque;
471

  
472 543
    /* update L2 table */
473
    if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
474
                    &(m_data->offset), sizeof(m_data->offset)) < 0)
544
    if (bdrv_pwrite_sync(
545
                extent->file,
546
                ((int64_t)m_data->l2_offset * 512)
547
                    + (m_data->l2_index * sizeof(m_data->offset)),
548
                &(m_data->offset),
549
                sizeof(m_data->offset)
550
            ) < 0) {
475 551
        return -1;
552
    }
476 553
    /* update backup L2 table */
477
    if (s->l1_backup_table_offset != 0) {
478
        m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
479
        if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
480
                        &(m_data->offset), sizeof(m_data->offset)) < 0)
554
    if (extent->l1_backup_table_offset != 0) {
555
        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
556
        if (bdrv_pwrite_sync(
557
                    extent->file,
558
                    ((int64_t)m_data->l2_offset * 512)
559
                        + (m_data->l2_index * sizeof(m_data->offset)),
560
                    &(m_data->offset), sizeof(m_data->offset)
561
                ) < 0) {
481 562
            return -1;
563
        }
482 564
    }
483 565

  
484 566
    return 0;
485 567
}
486 568

  
487
static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
488
                                   uint64_t offset, int allocate)
569
static uint64_t get_cluster_offset(BlockDriverState *bs,
570
                                    VmdkExtent *extent,
571
                                    VmdkMetaData *m_data,
572
                                    uint64_t offset, int allocate)
489 573
{
490
    BDRVVmdkState *s = bs->opaque;
491 574
    unsigned int l1_index, l2_offset, l2_index;
492 575
    int min_index, i, j;
493 576
    uint32_t min_count, *l2_table, tmp = 0;
......
496 579
    if (m_data)
497 580
        m_data->valid = 0;
498 581

  
499
    l1_index = (offset >> 9) / s->l1_entry_sectors;
500
    if (l1_index >= s->l1_size)
582
    l1_index = (offset >> 9) / extent->l1_entry_sectors;
583
    if (l1_index >= extent->l1_size) {
501 584
        return 0;
502
    l2_offset = s->l1_table[l1_index];
503
    if (!l2_offset)
585
    }
586
    l2_offset = extent->l1_table[l1_index];
587
    if (!l2_offset) {
504 588
        return 0;
589
    }
505 590
    for(i = 0; i < L2_CACHE_SIZE; i++) {
506
        if (l2_offset == s->l2_cache_offsets[i]) {
591
        if (l2_offset == extent->l2_cache_offsets[i]) {
507 592
            /* increment the hit count */
508
            if (++s->l2_cache_counts[i] == 0xffffffff) {
593
            if (++extent->l2_cache_counts[i] == 0xffffffff) {
509 594
                for(j = 0; j < L2_CACHE_SIZE; j++) {
510
                    s->l2_cache_counts[j] >>= 1;
595
                    extent->l2_cache_counts[j] >>= 1;
511 596
                }
512 597
            }
513
            l2_table = s->l2_cache + (i * s->l2_size);
598
            l2_table = extent->l2_cache + (i * extent->l2_size);
514 599
            goto found;
515 600
        }
516 601
    }
......
518 603
    min_index = 0;
519 604
    min_count = 0xffffffff;
520 605
    for(i = 0; i < L2_CACHE_SIZE; i++) {
521
        if (s->l2_cache_counts[i] < min_count) {
522
            min_count = s->l2_cache_counts[i];
606
        if (extent->l2_cache_counts[i] < min_count) {
607
            min_count = extent->l2_cache_counts[i];
523 608
            min_index = i;
524 609
        }
525 610
    }
526
    l2_table = s->l2_cache + (min_index * s->l2_size);
527
    if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
528
                                                                        s->l2_size * sizeof(uint32_t))
611
    l2_table = extent->l2_cache + (min_index * extent->l2_size);
612
    if (bdrv_pread(
613
                extent->file,
614
                (int64_t)l2_offset * 512,
615
                l2_table,
616
                extent->l2_size * sizeof(uint32_t)
617
            ) != extent->l2_size * sizeof(uint32_t)) {
529 618
        return 0;
619
    }
530 620

  
531
    s->l2_cache_offsets[min_index] = l2_offset;
532
    s->l2_cache_counts[min_index] = 1;
621
    extent->l2_cache_offsets[min_index] = l2_offset;
622
    extent->l2_cache_counts[min_index] = 1;
533 623
 found:
534
    l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
624
    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
535 625
    cluster_offset = le32_to_cpu(l2_table[l2_index]);
536 626

  
537 627
    if (!cluster_offset) {
......
539 629
            return 0;
540 630

  
541 631
        // Avoid the L2 tables update for the images that have snapshots.
542
        cluster_offset = bdrv_getlength(bs->file);
543
        bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
632
        cluster_offset = bdrv_getlength(extent->file);
633
        bdrv_truncate(
634
            extent->file,
635
            cluster_offset + (extent->cluster_sectors << 9)
636
        );
544 637

  
545 638
        cluster_offset >>= 9;
546 639
        tmp = cpu_to_le32(cluster_offset);
......
551 644
         * This problem may occur because of insufficient space on host disk
552 645
         * or inappropriate VM shutdown.
553 646
         */
554
        if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
647
        if (get_whole_cluster(
648
                bs, extent, cluster_offset, offset, allocate) == -1)
555 649
            return 0;
556 650

  
557 651
        if (m_data) {
......
566 660
    return cluster_offset;
567 661
}
568 662

  
663
static VmdkExtent *find_extent(BDRVVmdkState *s,
664
                                int64_t sector_num, VmdkExtent *start_hint)
665
{
666
    VmdkExtent *extent = start_hint;
667

  
668
    if (!extent) {
669
        extent = &s->extents[0];
670
    }
671
    while (extent < &s->extents[s->num_extents]) {
672
        if (sector_num < extent->end_sector) {
673
            return extent;
674
        }
675
        extent++;
676
    }
677
    return NULL;
678
}
679

  
569 680
static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
570 681
                             int nb_sectors, int *pnum)
571 682
{
572 683
    BDRVVmdkState *s = bs->opaque;
573
    int index_in_cluster, n;
574
    uint64_t cluster_offset;
575 684

  
576
    cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
577
    index_in_cluster = sector_num % s->cluster_sectors;
578
    n = s->cluster_sectors - index_in_cluster;
685
    int64_t index_in_cluster, n, ret;
686
    uint64_t offset;
687
    VmdkExtent *extent;
688

  
689
    extent = find_extent(s, sector_num, NULL);
690
    if (!extent) {
691
        return 0;
692
    }
693
    if (extent->flat) {
694
        n = extent->end_sector - sector_num;
695
        ret = 1;
696
    } else {
697
        offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
698
        index_in_cluster = sector_num % extent->cluster_sectors;
699
        n = extent->cluster_sectors - index_in_cluster;
700
        ret = offset ? 1 : 0;
701
    }
579 702
    if (n > nb_sectors)
580 703
        n = nb_sectors;
581 704
    *pnum = n;
582
    return (cluster_offset != 0);
705
    return ret;
583 706
}
584 707

  
585 708
static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
586 709
                    uint8_t *buf, int nb_sectors)
587 710
{
588 711
    BDRVVmdkState *s = bs->opaque;
589
    int index_in_cluster, n, ret;
712
    int ret;
713
    uint64_t n, index_in_cluster;
714
    VmdkExtent *extent = NULL;
590 715
    uint64_t cluster_offset;
591 716

  
592 717
    while (nb_sectors > 0) {
593
        cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
594
        index_in_cluster = sector_num % s->cluster_sectors;
595
        n = s->cluster_sectors - index_in_cluster;
718
        extent = find_extent(s, sector_num, extent);
719
        if (!extent) {
720
            return -EIO;
721
        }
722
        cluster_offset = get_cluster_offset(
723
                            bs, extent, NULL, sector_num << 9, 0);
724
        index_in_cluster = sector_num % extent->cluster_sectors;
725
        n = extent->cluster_sectors - index_in_cluster;
596 726
        if (n > nb_sectors)
597 727
            n = nb_sectors;
598 728
        if (!cluster_offset) {
......
621 751
                     const uint8_t *buf, int nb_sectors)
622 752
{
623 753
    BDRVVmdkState *s = bs->opaque;
624
    VmdkMetaData m_data;
625
    int index_in_cluster, n;
754
    VmdkExtent *extent = NULL;
755
    int n;
756
    int64_t index_in_cluster;
626 757
    uint64_t cluster_offset;
627 758
    static int cid_update = 0;
759
    VmdkMetaData m_data;
628 760

  
629 761
    if (sector_num > bs->total_sectors) {
630 762
        fprintf(stderr,
......
635 767
    }
636 768

  
637 769
    while (nb_sectors > 0) {
638
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
639
        n = s->cluster_sectors - index_in_cluster;
640
        if (n > nb_sectors)
641
            n = nb_sectors;
642
        cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
643
        if (!cluster_offset)
770
        extent = find_extent(s, sector_num, extent);
771
        if (!extent) {
772
            return -EIO;
773
        }
774
        cluster_offset = get_cluster_offset(
775
                                bs,
776
                                extent,
777
                                &m_data,
778
                                sector_num << 9, 1);
779
        if (!cluster_offset) {
644 780
            return -1;
781
        }
782
        index_in_cluster = sector_num % extent->cluster_sectors;
783
        n = extent->cluster_sectors - index_in_cluster;
784
        if (n > nb_sectors) {
785
            n = nb_sectors;
786
        }
645 787

  
646
        if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
788
        if (bdrv_pwrite(bs->file,
789
                        cluster_offset + index_in_cluster * 512,
790
                        buf, n * 512)
791
                != n * 512) {
647 792
            return -1;
793
        }
648 794
        if (m_data.valid) {
649 795
            /* update L2 tables */
650
            if (vmdk_L2update(bs, &m_data) == -1)
796
            if (vmdk_L2update(extent, &m_data) == -1) {
651 797
                return -1;
798
            }
652 799
        }
653 800
        nb_sectors -= n;
654 801
        sector_num += n;
......
822 969

  
823 970
static void vmdk_close(BlockDriverState *bs)
824 971
{
825
    BDRVVmdkState *s = bs->opaque;
826

  
827
    qemu_free(s->l1_table);
828
    qemu_free(s->l2_cache);
972
    vmdk_free_extents(bs);
829 973
}
830 974

  
831 975
static int vmdk_flush(BlockDriverState *bs)

Also available in: Unified diff