Revision 7327145f block.c

b/block.c
2231 2231
        .is_write       = is_write,
2232 2232
        .co             = qemu_coroutine_self(),
2233 2233
        .serialising    = false,
2234
        .overlap_offset = offset,
2235
        .overlap_bytes  = bytes,
2234 2236
    };
2235 2237

  
2236 2238
    qemu_co_queue_init(&req->wait_queue);
......
2238 2240
    QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2239 2241
}
2240 2242

  
2241
static void mark_request_serialising(BdrvTrackedRequest *req)
2243
static void mark_request_serialising(BdrvTrackedRequest *req, size_t align)
2242 2244
{
2245
    int64_t overlap_offset = req->offset & ~(align - 1);
2246
    int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2247
                      - overlap_offset;
2248

  
2243 2249
    if (!req->serialising) {
2244 2250
        req->bs->serialising_in_flight++;
2245 2251
        req->serialising = true;
2246 2252
    }
2253

  
2254
    req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2255
    req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
2247 2256
}
2248 2257

  
2249 2258
/**
......
2267 2276
    }
2268 2277
}
2269 2278

  
2270
static void round_bytes_to_clusters(BlockDriverState *bs,
2271
                                    int64_t offset, unsigned int bytes,
2272
                                    int64_t *cluster_offset,
2273
                                    unsigned int *cluster_bytes)
2279
static int bdrv_get_cluster_size(BlockDriverState *bs)
2274 2280
{
2275 2281
    BlockDriverInfo bdi;
2282
    int ret;
2276 2283

  
2277
    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2278
        *cluster_offset = offset;
2279
        *cluster_bytes = bytes;
2284
    ret = bdrv_get_info(bs, &bdi);
2285
    if (ret < 0 || bdi.cluster_size == 0) {
2286
        return bs->request_alignment;
2280 2287
    } else {
2281
        *cluster_offset = QEMU_ALIGN_DOWN(offset, bdi.cluster_size);
2282
        *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes,
2283
                                       bdi.cluster_size);
2288
        return bdi.cluster_size;
2284 2289
    }
2285 2290
}
2286 2291

  
......
2288 2293
                                     int64_t offset, unsigned int bytes)
2289 2294
{
2290 2295
    /*        aaaa   bbbb */
2291
    if (offset >= req->offset + req->bytes) {
2296
    if (offset >= req->overlap_offset + req->overlap_bytes) {
2292 2297
        return false;
2293 2298
    }
2294 2299
    /* bbbb   aaaa        */
2295
    if (req->offset >= offset + bytes) {
2300
    if (req->overlap_offset >= offset + bytes) {
2296 2301
        return false;
2297 2302
    }
2298 2303
    return true;
......
2302 2307
{
2303 2308
    BlockDriverState *bs = self->bs;
2304 2309
    BdrvTrackedRequest *req;
2305
    int64_t cluster_offset;
2306
    unsigned int cluster_bytes;
2307 2310
    bool retry;
2308 2311

  
2309 2312
    if (!bs->serialising_in_flight) {
2310 2313
        return;
2311 2314
    }
2312 2315

  
2313
    /* If we touch the same cluster it counts as an overlap.  This guarantees
2314
     * that allocating writes will be serialized and not race with each other
2315
     * for the same cluster.  For example, in copy-on-read it ensures that the
2316
     * CoR read and write operations are atomic and guest writes cannot
2317
     * interleave between them.
2318
     */
2319
    round_bytes_to_clusters(bs, self->offset, self->bytes,
2320
                            &cluster_offset, &cluster_bytes);
2321

  
2322 2316
    do {
2323 2317
        retry = false;
2324 2318
        QLIST_FOREACH(req, &bs->tracked_requests, list) {
2325 2319
            if (req == self || (!req->serialising && !self->serialising)) {
2326 2320
                continue;
2327 2321
            }
2328
            if (tracked_request_overlaps(req, cluster_offset, cluster_bytes)) {
2322
            if (tracked_request_overlaps(req, self->overlap_offset,
2323
                                         self->overlap_bytes))
2324
            {
2329 2325
                /* Hitting this means there was a reentrant request, for
2330 2326
                 * example, a block driver issuing nested requests.  This must
2331 2327
                 * never happen since it means deadlock.
......
2941 2937

  
2942 2938
    /* Handle Copy on Read and associated serialisation */
2943 2939
    if (flags & BDRV_REQ_COPY_ON_READ) {
2944
        mark_request_serialising(req);
2940
        /* If we touch the same cluster it counts as an overlap.  This
2941
         * guarantees that allocating writes will be serialized and not race
2942
         * with each other for the same cluster.  For example, in copy-on-read
2943
         * it ensures that the CoR read and write operations are atomic and
2944
         * guest writes cannot interleave between them. */
2945
        mark_request_serialising(req, bdrv_get_cluster_size(bs));
2945 2946
    }
2946 2947

  
2947 2948
    wait_serialising_requests(req);

Also available in: Unified diff