Statistics
| Branch: | Revision:

root / posix-aio-compat.c @ 47e6b251

History | View | Annotate | Download (8.5 kB)

1
/*
2
 * QEMU posix-aio emulation
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 * Contributions after 2012-01-13 are licensed under the terms of the
13
 * GNU GPL, version 2 or (at your option) any later version.
14
 */
15

    
16
#include <sys/ioctl.h>
17
#include <sys/types.h>
18
#include <pthread.h>
19
#include <unistd.h>
20
#include <errno.h>
21
#include <time.h>
22
#include <string.h>
23
#include <stdlib.h>
24
#include <stdio.h>
25

    
26
#include "qemu-queue.h"
27
#include "osdep.h"
28
#include "sysemu.h"
29
#include "qemu-common.h"
30
#include "trace.h"
31
#include "thread-pool.h"
32
#include "block_int.h"
33
#include "iov.h"
34

    
35
#include "block/raw-posix-aio.h"
36

    
37
struct qemu_paiocb {
38
    BlockDriverAIOCB common;
39
    int aio_fildes;
40
    union {
41
        struct iovec *aio_iov;
42
        void *aio_ioctl_buf;
43
    };
44
    int aio_niov;
45
    size_t aio_nbytes;
46
#define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
47
    off_t aio_offset;
48
    int aio_type;
49
};
50

    
51
#ifdef CONFIG_PREADV
52
static int preadv_present = 1;
53
#else
54
static int preadv_present = 0;
55
#endif
56

    
57
static ssize_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
58
{
59
    int ret;
60

    
61
    ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
62
    if (ret == -1)
63
        return -errno;
64

    
65
    /*
66
     * This looks weird, but the aio code only considers a request
67
     * successful if it has written the full number of bytes.
68
     *
69
     * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
70
     * so in fact we return the ioctl command here to make posix_aio_read()
71
     * happy..
72
     */
73
    return aiocb->aio_nbytes;
74
}
75

    
76
static ssize_t handle_aiocb_flush(struct qemu_paiocb *aiocb)
77
{
78
    int ret;
79

    
80
    ret = qemu_fdatasync(aiocb->aio_fildes);
81
    if (ret == -1)
82
        return -errno;
83
    return 0;
84
}
85

    
86
#ifdef CONFIG_PREADV
87

    
88
static ssize_t
89
qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
90
{
91
    return preadv(fd, iov, nr_iov, offset);
92
}
93

    
94
static ssize_t
95
qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
96
{
97
    return pwritev(fd, iov, nr_iov, offset);
98
}
99

    
100
#else
101

    
102
static ssize_t
103
qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
104
{
105
    return -ENOSYS;
106
}
107

    
108
static ssize_t
109
qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
110
{
111
    return -ENOSYS;
112
}
113

    
114
#endif
115

    
116
static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
117
{
118
    ssize_t len;
119

    
120
    do {
121
        if (aiocb->aio_type & QEMU_AIO_WRITE)
122
            len = qemu_pwritev(aiocb->aio_fildes,
123
                               aiocb->aio_iov,
124
                               aiocb->aio_niov,
125
                               aiocb->aio_offset);
126
         else
127
            len = qemu_preadv(aiocb->aio_fildes,
128
                              aiocb->aio_iov,
129
                              aiocb->aio_niov,
130
                              aiocb->aio_offset);
131
    } while (len == -1 && errno == EINTR);
132

    
133
    if (len == -1)
134
        return -errno;
135
    return len;
136
}
137

    
138
/*
139
 * Read/writes the data to/from a given linear buffer.
140
 *
141
 * Returns the number of bytes handles or -errno in case of an error. Short
142
 * reads are only returned if the end of the file is reached.
143
 */
144
static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
145
{
146
    ssize_t offset = 0;
147
    ssize_t len;
148

    
149
    while (offset < aiocb->aio_nbytes) {
150
         if (aiocb->aio_type & QEMU_AIO_WRITE)
151
             len = pwrite(aiocb->aio_fildes,
152
                          (const char *)buf + offset,
153
                          aiocb->aio_nbytes - offset,
154
                          aiocb->aio_offset + offset);
155
         else
156
             len = pread(aiocb->aio_fildes,
157
                         buf + offset,
158
                         aiocb->aio_nbytes - offset,
159
                         aiocb->aio_offset + offset);
160

    
161
         if (len == -1 && errno == EINTR)
162
             continue;
163
         else if (len == -1) {
164
             offset = -errno;
165
             break;
166
         } else if (len == 0)
167
             break;
168

    
169
         offset += len;
170
    }
171

    
172
    return offset;
173
}
174

    
175
static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
176
{
177
    ssize_t nbytes;
178
    char *buf;
179

    
180
    if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
181
        /*
182
         * If there is just a single buffer, and it is properly aligned
183
         * we can just use plain pread/pwrite without any problems.
184
         */
185
        if (aiocb->aio_niov == 1)
186
             return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
187

    
188
        /*
189
         * We have more than one iovec, and all are properly aligned.
190
         *
191
         * Try preadv/pwritev first and fall back to linearizing the
192
         * buffer if it's not supported.
193
         */
194
        if (preadv_present) {
195
            nbytes = handle_aiocb_rw_vector(aiocb);
196
            if (nbytes == aiocb->aio_nbytes)
197
                return nbytes;
198
            if (nbytes < 0 && nbytes != -ENOSYS)
199
                return nbytes;
200
            preadv_present = 0;
201
        }
202

    
203
        /*
204
         * XXX(hch): short read/write.  no easy way to handle the reminder
205
         * using these interfaces.  For now retry using plain
206
         * pread/pwrite?
207
         */
208
    }
209

    
210
    /*
211
     * Ok, we have to do it the hard way, copy all segments into
212
     * a single aligned buffer.
213
     */
214
    buf = qemu_blockalign(aiocb->common.bs, aiocb->aio_nbytes);
215
    if (aiocb->aio_type & QEMU_AIO_WRITE) {
216
        char *p = buf;
217
        int i;
218

    
219
        for (i = 0; i < aiocb->aio_niov; ++i) {
220
            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
221
            p += aiocb->aio_iov[i].iov_len;
222
        }
223
    }
224

    
225
    nbytes = handle_aiocb_rw_linear(aiocb, buf);
226
    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
227
        char *p = buf;
228
        size_t count = aiocb->aio_nbytes, copy;
229
        int i;
230

    
231
        for (i = 0; i < aiocb->aio_niov && count; ++i) {
232
            copy = count;
233
            if (copy > aiocb->aio_iov[i].iov_len)
234
                copy = aiocb->aio_iov[i].iov_len;
235
            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
236
            p     += copy;
237
            count -= copy;
238
        }
239
    }
240
    qemu_vfree(buf);
241

    
242
    return nbytes;
243
}
244

    
245
static int aio_worker(void *arg)
246
{
247
    struct qemu_paiocb *aiocb = arg;
248
    ssize_t ret = 0;
249

    
250
    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
251
    case QEMU_AIO_READ:
252
        ret = handle_aiocb_rw(aiocb);
253
        if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
254
            /* A short read means that we have reached EOF. Pad the buffer
255
             * with zeros for bytes after EOF. */
256
            iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
257
                       0, aiocb->aio_nbytes - ret);
258

    
259
            ret = aiocb->aio_nbytes;
260
        }
261
        if (ret == aiocb->aio_nbytes) {
262
            ret = 0;
263
        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
264
            ret = -EINVAL;
265
        }
266
        break;
267
    case QEMU_AIO_WRITE:
268
        ret = handle_aiocb_rw(aiocb);
269
        if (ret == aiocb->aio_nbytes) {
270
            ret = 0;
271
        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
272
            ret = -EINVAL;
273
        }
274
        break;
275
    case QEMU_AIO_FLUSH:
276
        ret = handle_aiocb_flush(aiocb);
277
        break;
278
    case QEMU_AIO_IOCTL:
279
        ret = handle_aiocb_ioctl(aiocb);
280
        break;
281
    default:
282
        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
283
        ret = -EINVAL;
284
        break;
285
    }
286

    
287
    qemu_aio_release(aiocb);
288
    return ret;
289
}
290

    
291
static AIOPool raw_aio_pool = {
292
    .aiocb_size         = sizeof(struct qemu_paiocb),
293
};
294

    
295
BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
296
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
297
        BlockDriverCompletionFunc *cb, void *opaque, int type)
298
{
299
    struct qemu_paiocb *acb;
300

    
301
    acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
302
    acb->aio_type = type;
303
    acb->aio_fildes = fd;
304

    
305
    if (qiov) {
306
        acb->aio_iov = qiov->iov;
307
        acb->aio_niov = qiov->niov;
308
    }
309
    acb->aio_nbytes = nb_sectors * 512;
310
    acb->aio_offset = sector_num * 512;
311

    
312
    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
313
    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
314
}
315

    
316
BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
317
        unsigned long int req, void *buf,
318
        BlockDriverCompletionFunc *cb, void *opaque)
319
{
320
    struct qemu_paiocb *acb;
321

    
322
    acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
323
    acb->aio_type = QEMU_AIO_IOCTL;
324
    acb->aio_fildes = fd;
325
    acb->aio_offset = 0;
326
    acb->aio_ioctl_buf = buf;
327
    acb->aio_ioctl_cmd = req;
328

    
329
    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
330
}