Statistics
| Branch: | Revision:

root / block / gluster.c @ 737e150e

History | View | Annotate | Download (17 kB)

1 8d6d89cb Bharata B Rao
/*
2 8d6d89cb Bharata B Rao
 * GlusterFS backend for QEMU
3 8d6d89cb Bharata B Rao
 *
4 8d6d89cb Bharata B Rao
 * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
5 8d6d89cb Bharata B Rao
 *
6 8d6d89cb Bharata B Rao
 * Pipe handling mechanism in AIO implementation is derived from
7 8d6d89cb Bharata B Rao
 * block/rbd.c. Hence,
8 8d6d89cb Bharata B Rao
 *
9 8d6d89cb Bharata B Rao
 * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
10 8d6d89cb Bharata B Rao
 *                         Josh Durgin <josh.durgin@dreamhost.com>
11 8d6d89cb Bharata B Rao
 *
12 8d6d89cb Bharata B Rao
 * This work is licensed under the terms of the GNU GPL, version 2.  See
13 8d6d89cb Bharata B Rao
 * the COPYING file in the top-level directory.
14 8d6d89cb Bharata B Rao
 *
15 8d6d89cb Bharata B Rao
 * Contributions after 2012-01-13 are licensed under the terms of the
16 8d6d89cb Bharata B Rao
 * GNU GPL, version 2 or (at your option) any later version.
17 8d6d89cb Bharata B Rao
 */
18 8d6d89cb Bharata B Rao
#include <glusterfs/api/glfs.h>
19 737e150e Paolo Bonzini
#include "block/block_int.h"
20 8d6d89cb Bharata B Rao
#include "qemu_socket.h"
21 8d6d89cb Bharata B Rao
#include "uri.h"
22 8d6d89cb Bharata B Rao
23 8d6d89cb Bharata B Rao
typedef struct GlusterAIOCB {
24 8d6d89cb Bharata B Rao
    BlockDriverAIOCB common;
25 8d6d89cb Bharata B Rao
    int64_t size;
26 8d6d89cb Bharata B Rao
    int ret;
27 8d6d89cb Bharata B Rao
    bool *finished;
28 8d6d89cb Bharata B Rao
    QEMUBH *bh;
29 8d6d89cb Bharata B Rao
} GlusterAIOCB;
30 8d6d89cb Bharata B Rao
31 8d6d89cb Bharata B Rao
typedef struct BDRVGlusterState {
32 8d6d89cb Bharata B Rao
    struct glfs *glfs;
33 8d6d89cb Bharata B Rao
    int fds[2];
34 8d6d89cb Bharata B Rao
    struct glfs_fd *fd;
35 8d6d89cb Bharata B Rao
    int qemu_aio_count;
36 8d6d89cb Bharata B Rao
    int event_reader_pos;
37 8d6d89cb Bharata B Rao
    GlusterAIOCB *event_acb;
38 8d6d89cb Bharata B Rao
} BDRVGlusterState;
39 8d6d89cb Bharata B Rao
40 8d6d89cb Bharata B Rao
#define GLUSTER_FD_READ  0
41 8d6d89cb Bharata B Rao
#define GLUSTER_FD_WRITE 1
42 8d6d89cb Bharata B Rao
43 8d6d89cb Bharata B Rao
typedef struct GlusterConf {
44 8d6d89cb Bharata B Rao
    char *server;
45 8d6d89cb Bharata B Rao
    int port;
46 8d6d89cb Bharata B Rao
    char *volname;
47 8d6d89cb Bharata B Rao
    char *image;
48 8d6d89cb Bharata B Rao
    char *transport;
49 8d6d89cb Bharata B Rao
} GlusterConf;
50 8d6d89cb Bharata B Rao
51 8d6d89cb Bharata B Rao
static void qemu_gluster_gconf_free(GlusterConf *gconf)
52 8d6d89cb Bharata B Rao
{
53 8d6d89cb Bharata B Rao
    g_free(gconf->server);
54 8d6d89cb Bharata B Rao
    g_free(gconf->volname);
55 8d6d89cb Bharata B Rao
    g_free(gconf->image);
56 8d6d89cb Bharata B Rao
    g_free(gconf->transport);
57 8d6d89cb Bharata B Rao
    g_free(gconf);
58 8d6d89cb Bharata B Rao
}
59 8d6d89cb Bharata B Rao
60 8d6d89cb Bharata B Rao
static int parse_volume_options(GlusterConf *gconf, char *path)
61 8d6d89cb Bharata B Rao
{
62 8d6d89cb Bharata B Rao
    char *p, *q;
63 8d6d89cb Bharata B Rao
64 8d6d89cb Bharata B Rao
    if (!path) {
65 8d6d89cb Bharata B Rao
        return -EINVAL;
66 8d6d89cb Bharata B Rao
    }
67 8d6d89cb Bharata B Rao
68 8d6d89cb Bharata B Rao
    /* volume */
69 8d6d89cb Bharata B Rao
    p = q = path + strspn(path, "/");
70 8d6d89cb Bharata B Rao
    p += strcspn(p, "/");
71 8d6d89cb Bharata B Rao
    if (*p == '\0') {
72 8d6d89cb Bharata B Rao
        return -EINVAL;
73 8d6d89cb Bharata B Rao
    }
74 8d6d89cb Bharata B Rao
    gconf->volname = g_strndup(q, p - q);
75 8d6d89cb Bharata B Rao
76 8d6d89cb Bharata B Rao
    /* image */
77 8d6d89cb Bharata B Rao
    p += strspn(p, "/");
78 8d6d89cb Bharata B Rao
    if (*p == '\0') {
79 8d6d89cb Bharata B Rao
        return -EINVAL;
80 8d6d89cb Bharata B Rao
    }
81 8d6d89cb Bharata B Rao
    gconf->image = g_strdup(p);
82 8d6d89cb Bharata B Rao
    return 0;
83 8d6d89cb Bharata B Rao
}
84 8d6d89cb Bharata B Rao
85 8d6d89cb Bharata B Rao
/*
86 8d6d89cb Bharata B Rao
 * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
87 8d6d89cb Bharata B Rao
 *
88 8d6d89cb Bharata B Rao
 * 'gluster' is the protocol.
89 8d6d89cb Bharata B Rao
 *
90 8d6d89cb Bharata B Rao
 * 'transport' specifies the transport type used to connect to gluster
91 8d6d89cb Bharata B Rao
 * management daemon (glusterd). Valid transport types are
92 8d6d89cb Bharata B Rao
 * tcp, unix and rdma. If a transport type isn't specified, then tcp
93 8d6d89cb Bharata B Rao
 * type is assumed.
94 8d6d89cb Bharata B Rao
 *
95 8d6d89cb Bharata B Rao
 * 'server' specifies the server where the volume file specification for
96 8d6d89cb Bharata B Rao
 * the given volume resides. This can be either hostname, ipv4 address
97 8d6d89cb Bharata B Rao
 * or ipv6 address. ipv6 address needs to be within square brackets [ ].
98 8d6d89cb Bharata B Rao
 * If transport type is 'unix', then 'server' field should not be specifed.
99 8d6d89cb Bharata B Rao
 * The 'socket' field needs to be populated with the path to unix domain
100 8d6d89cb Bharata B Rao
 * socket.
101 8d6d89cb Bharata B Rao
 *
102 8d6d89cb Bharata B Rao
 * 'port' is the port number on which glusterd is listening. This is optional
103 8d6d89cb Bharata B Rao
 * and if not specified, QEMU will send 0 which will make gluster to use the
104 8d6d89cb Bharata B Rao
 * default port. If the transport type is unix, then 'port' should not be
105 8d6d89cb Bharata B Rao
 * specified.
106 8d6d89cb Bharata B Rao
 *
107 8d6d89cb Bharata B Rao
 * 'volname' is the name of the gluster volume which contains the VM image.
108 8d6d89cb Bharata B Rao
 *
109 8d6d89cb Bharata B Rao
 * 'image' is the path to the actual VM image that resides on gluster volume.
110 8d6d89cb Bharata B Rao
 *
111 8d6d89cb Bharata B Rao
 * Examples:
112 8d6d89cb Bharata B Rao
 *
113 8d6d89cb Bharata B Rao
 * file=gluster://1.2.3.4/testvol/a.img
114 8d6d89cb Bharata B Rao
 * file=gluster+tcp://1.2.3.4/testvol/a.img
115 8d6d89cb Bharata B Rao
 * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
116 8d6d89cb Bharata B Rao
 * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
117 8d6d89cb Bharata B Rao
 * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
118 8d6d89cb Bharata B Rao
 * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
119 8d6d89cb Bharata B Rao
 * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
120 8d6d89cb Bharata B Rao
 * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
121 8d6d89cb Bharata B Rao
 */
122 8d6d89cb Bharata B Rao
static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
123 8d6d89cb Bharata B Rao
{
124 8d6d89cb Bharata B Rao
    URI *uri;
125 8d6d89cb Bharata B Rao
    QueryParams *qp = NULL;
126 8d6d89cb Bharata B Rao
    bool is_unix = false;
127 8d6d89cb Bharata B Rao
    int ret = 0;
128 8d6d89cb Bharata B Rao
129 8d6d89cb Bharata B Rao
    uri = uri_parse(filename);
130 8d6d89cb Bharata B Rao
    if (!uri) {
131 8d6d89cb Bharata B Rao
        return -EINVAL;
132 8d6d89cb Bharata B Rao
    }
133 8d6d89cb Bharata B Rao
134 8d6d89cb Bharata B Rao
    /* transport */
135 8d6d89cb Bharata B Rao
    if (!strcmp(uri->scheme, "gluster")) {
136 8d6d89cb Bharata B Rao
        gconf->transport = g_strdup("tcp");
137 8d6d89cb Bharata B Rao
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
138 8d6d89cb Bharata B Rao
        gconf->transport = g_strdup("tcp");
139 8d6d89cb Bharata B Rao
    } else if (!strcmp(uri->scheme, "gluster+unix")) {
140 8d6d89cb Bharata B Rao
        gconf->transport = g_strdup("unix");
141 8d6d89cb Bharata B Rao
        is_unix = true;
142 8d6d89cb Bharata B Rao
    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
143 8d6d89cb Bharata B Rao
        gconf->transport = g_strdup("rdma");
144 8d6d89cb Bharata B Rao
    } else {
145 8d6d89cb Bharata B Rao
        ret = -EINVAL;
146 8d6d89cb Bharata B Rao
        goto out;
147 8d6d89cb Bharata B Rao
    }
148 8d6d89cb Bharata B Rao
149 8d6d89cb Bharata B Rao
    ret = parse_volume_options(gconf, uri->path);
150 8d6d89cb Bharata B Rao
    if (ret < 0) {
151 8d6d89cb Bharata B Rao
        goto out;
152 8d6d89cb Bharata B Rao
    }
153 8d6d89cb Bharata B Rao
154 8d6d89cb Bharata B Rao
    qp = query_params_parse(uri->query);
155 8d6d89cb Bharata B Rao
    if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
156 8d6d89cb Bharata B Rao
        ret = -EINVAL;
157 8d6d89cb Bharata B Rao
        goto out;
158 8d6d89cb Bharata B Rao
    }
159 8d6d89cb Bharata B Rao
160 8d6d89cb Bharata B Rao
    if (is_unix) {
161 8d6d89cb Bharata B Rao
        if (uri->server || uri->port) {
162 8d6d89cb Bharata B Rao
            ret = -EINVAL;
163 8d6d89cb Bharata B Rao
            goto out;
164 8d6d89cb Bharata B Rao
        }
165 8d6d89cb Bharata B Rao
        if (strcmp(qp->p[0].name, "socket")) {
166 8d6d89cb Bharata B Rao
            ret = -EINVAL;
167 8d6d89cb Bharata B Rao
            goto out;
168 8d6d89cb Bharata B Rao
        }
169 8d6d89cb Bharata B Rao
        gconf->server = g_strdup(qp->p[0].value);
170 8d6d89cb Bharata B Rao
    } else {
171 8d6d89cb Bharata B Rao
        gconf->server = g_strdup(uri->server);
172 8d6d89cb Bharata B Rao
        gconf->port = uri->port;
173 8d6d89cb Bharata B Rao
    }
174 8d6d89cb Bharata B Rao
175 8d6d89cb Bharata B Rao
out:
176 8d6d89cb Bharata B Rao
    if (qp) {
177 8d6d89cb Bharata B Rao
        query_params_free(qp);
178 8d6d89cb Bharata B Rao
    }
179 8d6d89cb Bharata B Rao
    uri_free(uri);
180 8d6d89cb Bharata B Rao
    return ret;
181 8d6d89cb Bharata B Rao
}
182 8d6d89cb Bharata B Rao
183 8d6d89cb Bharata B Rao
static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
184 8d6d89cb Bharata B Rao
{
185 8d6d89cb Bharata B Rao
    struct glfs *glfs = NULL;
186 8d6d89cb Bharata B Rao
    int ret;
187 8d6d89cb Bharata B Rao
    int old_errno;
188 8d6d89cb Bharata B Rao
189 8d6d89cb Bharata B Rao
    ret = qemu_gluster_parseuri(gconf, filename);
190 8d6d89cb Bharata B Rao
    if (ret < 0) {
191 8d6d89cb Bharata B Rao
        error_report("Usage: file=gluster[+transport]://[server[:port]]/"
192 8d6d89cb Bharata B Rao
            "volname/image[?socket=...]");
193 8d6d89cb Bharata B Rao
        errno = -ret;
194 8d6d89cb Bharata B Rao
        goto out;
195 8d6d89cb Bharata B Rao
    }
196 8d6d89cb Bharata B Rao
197 8d6d89cb Bharata B Rao
    glfs = glfs_new(gconf->volname);
198 8d6d89cb Bharata B Rao
    if (!glfs) {
199 8d6d89cb Bharata B Rao
        goto out;
200 8d6d89cb Bharata B Rao
    }
201 8d6d89cb Bharata B Rao
202 8d6d89cb Bharata B Rao
    ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
203 8d6d89cb Bharata B Rao
            gconf->port);
204 8d6d89cb Bharata B Rao
    if (ret < 0) {
205 8d6d89cb Bharata B Rao
        goto out;
206 8d6d89cb Bharata B Rao
    }
207 8d6d89cb Bharata B Rao
208 8d6d89cb Bharata B Rao
    /*
209 8d6d89cb Bharata B Rao
     * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when
210 8d6d89cb Bharata B Rao
     * GlusterFS makes GF_LOG_* macros available to libgfapi users.
211 8d6d89cb Bharata B Rao
     */
212 8d6d89cb Bharata B Rao
    ret = glfs_set_logging(glfs, "-", 4);
213 8d6d89cb Bharata B Rao
    if (ret < 0) {
214 8d6d89cb Bharata B Rao
        goto out;
215 8d6d89cb Bharata B Rao
    }
216 8d6d89cb Bharata B Rao
217 8d6d89cb Bharata B Rao
    ret = glfs_init(glfs);
218 8d6d89cb Bharata B Rao
    if (ret) {
219 8d6d89cb Bharata B Rao
        error_report("Gluster connection failed for server=%s port=%d "
220 8d6d89cb Bharata B Rao
             "volume=%s image=%s transport=%s\n", gconf->server, gconf->port,
221 8d6d89cb Bharata B Rao
             gconf->volname, gconf->image, gconf->transport);
222 8d6d89cb Bharata B Rao
        goto out;
223 8d6d89cb Bharata B Rao
    }
224 8d6d89cb Bharata B Rao
    return glfs;
225 8d6d89cb Bharata B Rao
226 8d6d89cb Bharata B Rao
out:
227 8d6d89cb Bharata B Rao
    if (glfs) {
228 8d6d89cb Bharata B Rao
        old_errno = errno;
229 8d6d89cb Bharata B Rao
        glfs_fini(glfs);
230 8d6d89cb Bharata B Rao
        errno = old_errno;
231 8d6d89cb Bharata B Rao
    }
232 8d6d89cb Bharata B Rao
    return NULL;
233 8d6d89cb Bharata B Rao
}
234 8d6d89cb Bharata B Rao
235 8d6d89cb Bharata B Rao
static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
236 8d6d89cb Bharata B Rao
{
237 8d6d89cb Bharata B Rao
    int ret;
238 8d6d89cb Bharata B Rao
    bool *finished = acb->finished;
239 8d6d89cb Bharata B Rao
    BlockDriverCompletionFunc *cb = acb->common.cb;
240 8d6d89cb Bharata B Rao
    void *opaque = acb->common.opaque;
241 8d6d89cb Bharata B Rao
242 8d6d89cb Bharata B Rao
    if (!acb->ret || acb->ret == acb->size) {
243 8d6d89cb Bharata B Rao
        ret = 0; /* Success */
244 8d6d89cb Bharata B Rao
    } else if (acb->ret < 0) {
245 8d6d89cb Bharata B Rao
        ret = acb->ret; /* Read/Write failed */
246 8d6d89cb Bharata B Rao
    } else {
247 8d6d89cb Bharata B Rao
        ret = -EIO; /* Partial read/write - fail it */
248 8d6d89cb Bharata B Rao
    }
249 8d6d89cb Bharata B Rao
250 8d6d89cb Bharata B Rao
    s->qemu_aio_count--;
251 8d6d89cb Bharata B Rao
    qemu_aio_release(acb);
252 8d6d89cb Bharata B Rao
    cb(opaque, ret);
253 8d6d89cb Bharata B Rao
    if (finished) {
254 8d6d89cb Bharata B Rao
        *finished = true;
255 8d6d89cb Bharata B Rao
    }
256 8d6d89cb Bharata B Rao
}
257 8d6d89cb Bharata B Rao
258 8d6d89cb Bharata B Rao
static void qemu_gluster_aio_event_reader(void *opaque)
259 8d6d89cb Bharata B Rao
{
260 8d6d89cb Bharata B Rao
    BDRVGlusterState *s = opaque;
261 8d6d89cb Bharata B Rao
    ssize_t ret;
262 8d6d89cb Bharata B Rao
263 8d6d89cb Bharata B Rao
    do {
264 8d6d89cb Bharata B Rao
        char *p = (char *)&s->event_acb;
265 8d6d89cb Bharata B Rao
266 8d6d89cb Bharata B Rao
        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
267 8d6d89cb Bharata B Rao
                   sizeof(s->event_acb) - s->event_reader_pos);
268 8d6d89cb Bharata B Rao
        if (ret > 0) {
269 8d6d89cb Bharata B Rao
            s->event_reader_pos += ret;
270 8d6d89cb Bharata B Rao
            if (s->event_reader_pos == sizeof(s->event_acb)) {
271 8d6d89cb Bharata B Rao
                s->event_reader_pos = 0;
272 8d6d89cb Bharata B Rao
                qemu_gluster_complete_aio(s->event_acb, s);
273 8d6d89cb Bharata B Rao
            }
274 8d6d89cb Bharata B Rao
        }
275 8d6d89cb Bharata B Rao
    } while (ret < 0 && errno == EINTR);
276 8d6d89cb Bharata B Rao
}
277 8d6d89cb Bharata B Rao
278 8d6d89cb Bharata B Rao
static int qemu_gluster_aio_flush_cb(void *opaque)
279 8d6d89cb Bharata B Rao
{
280 8d6d89cb Bharata B Rao
    BDRVGlusterState *s = opaque;
281 8d6d89cb Bharata B Rao
282 8d6d89cb Bharata B Rao
    return (s->qemu_aio_count > 0);
283 8d6d89cb Bharata B Rao
}
284 8d6d89cb Bharata B Rao
285 8d6d89cb Bharata B Rao
static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
286 8d6d89cb Bharata B Rao
    int bdrv_flags)
287 8d6d89cb Bharata B Rao
{
288 8d6d89cb Bharata B Rao
    BDRVGlusterState *s = bs->opaque;
289 8d6d89cb Bharata B Rao
    int open_flags = O_BINARY;
290 8d6d89cb Bharata B Rao
    int ret = 0;
291 8d6d89cb Bharata B Rao
    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
292 8d6d89cb Bharata B Rao
293 8d6d89cb Bharata B Rao
    s->glfs = qemu_gluster_init(gconf, filename);
294 8d6d89cb Bharata B Rao
    if (!s->glfs) {
295 8d6d89cb Bharata B Rao
        ret = -errno;
296 8d6d89cb Bharata B Rao
        goto out;
297 8d6d89cb Bharata B Rao
    }
298 8d6d89cb Bharata B Rao
299 8d6d89cb Bharata B Rao
    if (bdrv_flags & BDRV_O_RDWR) {
300 8d6d89cb Bharata B Rao
        open_flags |= O_RDWR;
301 8d6d89cb Bharata B Rao
    } else {
302 8d6d89cb Bharata B Rao
        open_flags |= O_RDONLY;
303 8d6d89cb Bharata B Rao
    }
304 8d6d89cb Bharata B Rao
305 8d6d89cb Bharata B Rao
    if ((bdrv_flags & BDRV_O_NOCACHE)) {
306 8d6d89cb Bharata B Rao
        open_flags |= O_DIRECT;
307 8d6d89cb Bharata B Rao
    }
308 8d6d89cb Bharata B Rao
309 8d6d89cb Bharata B Rao
    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
310 8d6d89cb Bharata B Rao
    if (!s->fd) {
311 8d6d89cb Bharata B Rao
        ret = -errno;
312 8d6d89cb Bharata B Rao
        goto out;
313 8d6d89cb Bharata B Rao
    }
314 8d6d89cb Bharata B Rao
315 8d6d89cb Bharata B Rao
    ret = qemu_pipe(s->fds);
316 8d6d89cb Bharata B Rao
    if (ret < 0) {
317 8d6d89cb Bharata B Rao
        ret = -errno;
318 8d6d89cb Bharata B Rao
        goto out;
319 8d6d89cb Bharata B Rao
    }
320 8d6d89cb Bharata B Rao
    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
321 8d6d89cb Bharata B Rao
    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
322 8d6d89cb Bharata B Rao
        qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
323 8d6d89cb Bharata B Rao
324 8d6d89cb Bharata B Rao
out:
325 8d6d89cb Bharata B Rao
    qemu_gluster_gconf_free(gconf);
326 8d6d89cb Bharata B Rao
    if (!ret) {
327 8d6d89cb Bharata B Rao
        return ret;
328 8d6d89cb Bharata B Rao
    }
329 8d6d89cb Bharata B Rao
    if (s->fd) {
330 8d6d89cb Bharata B Rao
        glfs_close(s->fd);
331 8d6d89cb Bharata B Rao
    }
332 8d6d89cb Bharata B Rao
    if (s->glfs) {
333 8d6d89cb Bharata B Rao
        glfs_fini(s->glfs);
334 8d6d89cb Bharata B Rao
    }
335 8d6d89cb Bharata B Rao
    return ret;
336 8d6d89cb Bharata B Rao
}
337 8d6d89cb Bharata B Rao
338 8d6d89cb Bharata B Rao
static int qemu_gluster_create(const char *filename,
339 8d6d89cb Bharata B Rao
        QEMUOptionParameter *options)
340 8d6d89cb Bharata B Rao
{
341 8d6d89cb Bharata B Rao
    struct glfs *glfs;
342 8d6d89cb Bharata B Rao
    struct glfs_fd *fd;
343 8d6d89cb Bharata B Rao
    int ret = 0;
344 8d6d89cb Bharata B Rao
    int64_t total_size = 0;
345 8d6d89cb Bharata B Rao
    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
346 8d6d89cb Bharata B Rao
347 8d6d89cb Bharata B Rao
    glfs = qemu_gluster_init(gconf, filename);
348 8d6d89cb Bharata B Rao
    if (!glfs) {
349 8d6d89cb Bharata B Rao
        ret = -errno;
350 8d6d89cb Bharata B Rao
        goto out;
351 8d6d89cb Bharata B Rao
    }
352 8d6d89cb Bharata B Rao
353 8d6d89cb Bharata B Rao
    while (options && options->name) {
354 8d6d89cb Bharata B Rao
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
355 8d6d89cb Bharata B Rao
            total_size = options->value.n / BDRV_SECTOR_SIZE;
356 8d6d89cb Bharata B Rao
        }
357 8d6d89cb Bharata B Rao
        options++;
358 8d6d89cb Bharata B Rao
    }
359 8d6d89cb Bharata B Rao
360 8d6d89cb Bharata B Rao
    fd = glfs_creat(glfs, gconf->image,
361 8d6d89cb Bharata B Rao
        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
362 8d6d89cb Bharata B Rao
    if (!fd) {
363 8d6d89cb Bharata B Rao
        ret = -errno;
364 8d6d89cb Bharata B Rao
    } else {
365 8d6d89cb Bharata B Rao
        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
366 8d6d89cb Bharata B Rao
            ret = -errno;
367 8d6d89cb Bharata B Rao
        }
368 8d6d89cb Bharata B Rao
        if (glfs_close(fd) != 0) {
369 8d6d89cb Bharata B Rao
            ret = -errno;
370 8d6d89cb Bharata B Rao
        }
371 8d6d89cb Bharata B Rao
    }
372 8d6d89cb Bharata B Rao
out:
373 8d6d89cb Bharata B Rao
    qemu_gluster_gconf_free(gconf);
374 8d6d89cb Bharata B Rao
    if (glfs) {
375 8d6d89cb Bharata B Rao
        glfs_fini(glfs);
376 8d6d89cb Bharata B Rao
    }
377 8d6d89cb Bharata B Rao
    return ret;
378 8d6d89cb Bharata B Rao
}
379 8d6d89cb Bharata B Rao
380 8d6d89cb Bharata B Rao
static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
381 8d6d89cb Bharata B Rao
{
382 8d6d89cb Bharata B Rao
    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
383 8d6d89cb Bharata B Rao
    bool finished = false;
384 8d6d89cb Bharata B Rao
385 8d6d89cb Bharata B Rao
    acb->finished = &finished;
386 8d6d89cb Bharata B Rao
    while (!finished) {
387 8d6d89cb Bharata B Rao
        qemu_aio_wait();
388 8d6d89cb Bharata B Rao
    }
389 8d6d89cb Bharata B Rao
}
390 8d6d89cb Bharata B Rao
391 d7331bed Stefan Hajnoczi
static const AIOCBInfo gluster_aiocb_info = {
392 8d6d89cb Bharata B Rao
    .aiocb_size = sizeof(GlusterAIOCB),
393 8d6d89cb Bharata B Rao
    .cancel = qemu_gluster_aio_cancel,
394 8d6d89cb Bharata B Rao
};
395 8d6d89cb Bharata B Rao
396 8d6d89cb Bharata B Rao
static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
397 8d6d89cb Bharata B Rao
{
398 8d6d89cb Bharata B Rao
    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
399 8d6d89cb Bharata B Rao
    BlockDriverState *bs = acb->common.bs;
400 8d6d89cb Bharata B Rao
    BDRVGlusterState *s = bs->opaque;
401 8d6d89cb Bharata B Rao
    int retval;
402 8d6d89cb Bharata B Rao
403 8d6d89cb Bharata B Rao
    acb->ret = ret;
404 8d6d89cb Bharata B Rao
    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
405 8d6d89cb Bharata B Rao
    if (retval != sizeof(acb)) {
406 8d6d89cb Bharata B Rao
        /*
407 8d6d89cb Bharata B Rao
         * Gluster AIO callback thread failed to notify the waiting
408 8d6d89cb Bharata B Rao
         * QEMU thread about IO completion.
409 8d6d89cb Bharata B Rao
         *
410 8d6d89cb Bharata B Rao
         * Complete this IO request and make the disk inaccessible for
411 8d6d89cb Bharata B Rao
         * subsequent reads and writes.
412 8d6d89cb Bharata B Rao
         */
413 8d6d89cb Bharata B Rao
        error_report("Gluster failed to notify QEMU about IO completion");
414 8d6d89cb Bharata B Rao
415 8d6d89cb Bharata B Rao
        qemu_mutex_lock_iothread(); /* We are in gluster thread context */
416 8d6d89cb Bharata B Rao
        acb->common.cb(acb->common.opaque, -EIO);
417 8d6d89cb Bharata B Rao
        qemu_aio_release(acb);
418 8d6d89cb Bharata B Rao
        s->qemu_aio_count--;
419 8d6d89cb Bharata B Rao
        close(s->fds[GLUSTER_FD_READ]);
420 8d6d89cb Bharata B Rao
        close(s->fds[GLUSTER_FD_WRITE]);
421 8d6d89cb Bharata B Rao
        qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL,
422 8d6d89cb Bharata B Rao
            NULL);
423 8d6d89cb Bharata B Rao
        bs->drv = NULL; /* Make the disk inaccessible */
424 8d6d89cb Bharata B Rao
        qemu_mutex_unlock_iothread();
425 8d6d89cb Bharata B Rao
    }
426 8d6d89cb Bharata B Rao
}
427 8d6d89cb Bharata B Rao
428 8d6d89cb Bharata B Rao
static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
429 8d6d89cb Bharata B Rao
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
430 8d6d89cb Bharata B Rao
        BlockDriverCompletionFunc *cb, void *opaque, int write)
431 8d6d89cb Bharata B Rao
{
432 8d6d89cb Bharata B Rao
    int ret;
433 8d6d89cb Bharata B Rao
    GlusterAIOCB *acb;
434 8d6d89cb Bharata B Rao
    BDRVGlusterState *s = bs->opaque;
435 8d6d89cb Bharata B Rao
    size_t size;
436 8d6d89cb Bharata B Rao
    off_t offset;
437 8d6d89cb Bharata B Rao
438 8d6d89cb Bharata B Rao
    offset = sector_num * BDRV_SECTOR_SIZE;
439 8d6d89cb Bharata B Rao
    size = nb_sectors * BDRV_SECTOR_SIZE;
440 8d6d89cb Bharata B Rao
    s->qemu_aio_count++;
441 8d6d89cb Bharata B Rao
442 d7331bed Stefan Hajnoczi
    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
443 8d6d89cb Bharata B Rao
    acb->size = size;
444 8d6d89cb Bharata B Rao
    acb->ret = 0;
445 8d6d89cb Bharata B Rao
    acb->finished = NULL;
446 8d6d89cb Bharata B Rao
447 8d6d89cb Bharata B Rao
    if (write) {
448 8d6d89cb Bharata B Rao
        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
449 8d6d89cb Bharata B Rao
            &gluster_finish_aiocb, acb);
450 8d6d89cb Bharata B Rao
    } else {
451 8d6d89cb Bharata B Rao
        ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
452 8d6d89cb Bharata B Rao
            &gluster_finish_aiocb, acb);
453 8d6d89cb Bharata B Rao
    }
454 8d6d89cb Bharata B Rao
455 8d6d89cb Bharata B Rao
    if (ret < 0) {
456 8d6d89cb Bharata B Rao
        goto out;
457 8d6d89cb Bharata B Rao
    }
458 8d6d89cb Bharata B Rao
    return &acb->common;
459 8d6d89cb Bharata B Rao
460 8d6d89cb Bharata B Rao
out:
461 8d6d89cb Bharata B Rao
    s->qemu_aio_count--;
462 8d6d89cb Bharata B Rao
    qemu_aio_release(acb);
463 8d6d89cb Bharata B Rao
    return NULL;
464 8d6d89cb Bharata B Rao
}
465 8d6d89cb Bharata B Rao
466 8d6d89cb Bharata B Rao
static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
467 8d6d89cb Bharata B Rao
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
468 8d6d89cb Bharata B Rao
        BlockDriverCompletionFunc *cb, void *opaque)
469 8d6d89cb Bharata B Rao
{
470 8d6d89cb Bharata B Rao
    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
471 8d6d89cb Bharata B Rao
}
472 8d6d89cb Bharata B Rao
473 8d6d89cb Bharata B Rao
static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
474 8d6d89cb Bharata B Rao
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
475 8d6d89cb Bharata B Rao
        BlockDriverCompletionFunc *cb, void *opaque)
476 8d6d89cb Bharata B Rao
{
477 8d6d89cb Bharata B Rao
    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
478 8d6d89cb Bharata B Rao
}
479 8d6d89cb Bharata B Rao
480 8d6d89cb Bharata B Rao
static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
481 8d6d89cb Bharata B Rao
        BlockDriverCompletionFunc *cb, void *opaque)
482 8d6d89cb Bharata B Rao
{
483 8d6d89cb Bharata B Rao
    int ret;
484 8d6d89cb Bharata B Rao
    GlusterAIOCB *acb;
485 8d6d89cb Bharata B Rao
    BDRVGlusterState *s = bs->opaque;
486 8d6d89cb Bharata B Rao
487 d7331bed Stefan Hajnoczi
    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
488 8d6d89cb Bharata B Rao
    acb->size = 0;
489 8d6d89cb Bharata B Rao
    acb->ret = 0;
490 8d6d89cb Bharata B Rao
    acb->finished = NULL;
491 8d6d89cb Bharata B Rao
    s->qemu_aio_count++;
492 8d6d89cb Bharata B Rao
493 8d6d89cb Bharata B Rao
    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
494 8d6d89cb Bharata B Rao
    if (ret < 0) {
495 8d6d89cb Bharata B Rao
        goto out;
496 8d6d89cb Bharata B Rao
    }
497 8d6d89cb Bharata B Rao
    return &acb->common;
498 8d6d89cb Bharata B Rao
499 8d6d89cb Bharata B Rao
out:
500 8d6d89cb Bharata B Rao
    s->qemu_aio_count--;
501 8d6d89cb Bharata B Rao
    qemu_aio_release(acb);
502 8d6d89cb Bharata B Rao
    return NULL;
503 8d6d89cb Bharata B Rao
}
504 8d6d89cb Bharata B Rao
505 8d6d89cb Bharata B Rao
static int64_t qemu_gluster_getlength(BlockDriverState *bs)
506 8d6d89cb Bharata B Rao
{
507 8d6d89cb Bharata B Rao
    BDRVGlusterState *s = bs->opaque;
508 8d6d89cb Bharata B Rao
    int64_t ret;
509 8d6d89cb Bharata B Rao
510 8d6d89cb Bharata B Rao
    ret = glfs_lseek(s->fd, 0, SEEK_END);
511 8d6d89cb Bharata B Rao
    if (ret < 0) {
512 8d6d89cb Bharata B Rao
        return -errno;
513 8d6d89cb Bharata B Rao
    } else {
514 8d6d89cb Bharata B Rao
        return ret;
515 8d6d89cb Bharata B Rao
    }
516 8d6d89cb Bharata B Rao
}
517 8d6d89cb Bharata B Rao
518 8d6d89cb Bharata B Rao
static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
519 8d6d89cb Bharata B Rao
{
520 8d6d89cb Bharata B Rao
    BDRVGlusterState *s = bs->opaque;
521 8d6d89cb Bharata B Rao
    struct stat st;
522 8d6d89cb Bharata B Rao
    int ret;
523 8d6d89cb Bharata B Rao
524 8d6d89cb Bharata B Rao
    ret = glfs_fstat(s->fd, &st);
525 8d6d89cb Bharata B Rao
    if (ret < 0) {
526 8d6d89cb Bharata B Rao
        return -errno;
527 8d6d89cb Bharata B Rao
    } else {
528 8d6d89cb Bharata B Rao
        return st.st_blocks * 512;
529 8d6d89cb Bharata B Rao
    }
530 8d6d89cb Bharata B Rao
}
531 8d6d89cb Bharata B Rao
532 8d6d89cb Bharata B Rao
static void qemu_gluster_close(BlockDriverState *bs)
533 8d6d89cb Bharata B Rao
{
534 8d6d89cb Bharata B Rao
    BDRVGlusterState *s = bs->opaque;
535 8d6d89cb Bharata B Rao
536 8d6d89cb Bharata B Rao
    close(s->fds[GLUSTER_FD_READ]);
537 8d6d89cb Bharata B Rao
    close(s->fds[GLUSTER_FD_WRITE]);
538 8d6d89cb Bharata B Rao
    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL);
539 8d6d89cb Bharata B Rao
540 8d6d89cb Bharata B Rao
    if (s->fd) {
541 8d6d89cb Bharata B Rao
        glfs_close(s->fd);
542 8d6d89cb Bharata B Rao
        s->fd = NULL;
543 8d6d89cb Bharata B Rao
    }
544 8d6d89cb Bharata B Rao
    glfs_fini(s->glfs);
545 8d6d89cb Bharata B Rao
}
546 8d6d89cb Bharata B Rao
547 8d6d89cb Bharata B Rao
static QEMUOptionParameter qemu_gluster_create_options[] = {
548 8d6d89cb Bharata B Rao
    {
549 8d6d89cb Bharata B Rao
        .name = BLOCK_OPT_SIZE,
550 8d6d89cb Bharata B Rao
        .type = OPT_SIZE,
551 8d6d89cb Bharata B Rao
        .help = "Virtual disk size"
552 8d6d89cb Bharata B Rao
    },
553 8d6d89cb Bharata B Rao
    { NULL }
554 8d6d89cb Bharata B Rao
};
555 8d6d89cb Bharata B Rao
556 8d6d89cb Bharata B Rao
static BlockDriver bdrv_gluster = {
557 8d6d89cb Bharata B Rao
    .format_name                  = "gluster",
558 8d6d89cb Bharata B Rao
    .protocol_name                = "gluster",
559 8d6d89cb Bharata B Rao
    .instance_size                = sizeof(BDRVGlusterState),
560 8d6d89cb Bharata B Rao
    .bdrv_file_open               = qemu_gluster_open,
561 8d6d89cb Bharata B Rao
    .bdrv_close                   = qemu_gluster_close,
562 8d6d89cb Bharata B Rao
    .bdrv_create                  = qemu_gluster_create,
563 8d6d89cb Bharata B Rao
    .bdrv_getlength               = qemu_gluster_getlength,
564 8d6d89cb Bharata B Rao
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
565 8d6d89cb Bharata B Rao
    .bdrv_aio_readv               = qemu_gluster_aio_readv,
566 8d6d89cb Bharata B Rao
    .bdrv_aio_writev              = qemu_gluster_aio_writev,
567 8d6d89cb Bharata B Rao
    .bdrv_aio_flush               = qemu_gluster_aio_flush,
568 8d6d89cb Bharata B Rao
    .create_options               = qemu_gluster_create_options,
569 8d6d89cb Bharata B Rao
};
570 8d6d89cb Bharata B Rao
571 8d6d89cb Bharata B Rao
static BlockDriver bdrv_gluster_tcp = {
572 8d6d89cb Bharata B Rao
    .format_name                  = "gluster",
573 8d6d89cb Bharata B Rao
    .protocol_name                = "gluster+tcp",
574 8d6d89cb Bharata B Rao
    .instance_size                = sizeof(BDRVGlusterState),
575 8d6d89cb Bharata B Rao
    .bdrv_file_open               = qemu_gluster_open,
576 8d6d89cb Bharata B Rao
    .bdrv_close                   = qemu_gluster_close,
577 8d6d89cb Bharata B Rao
    .bdrv_create                  = qemu_gluster_create,
578 8d6d89cb Bharata B Rao
    .bdrv_getlength               = qemu_gluster_getlength,
579 8d6d89cb Bharata B Rao
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
580 8d6d89cb Bharata B Rao
    .bdrv_aio_readv               = qemu_gluster_aio_readv,
581 8d6d89cb Bharata B Rao
    .bdrv_aio_writev              = qemu_gluster_aio_writev,
582 8d6d89cb Bharata B Rao
    .bdrv_aio_flush               = qemu_gluster_aio_flush,
583 8d6d89cb Bharata B Rao
    .create_options               = qemu_gluster_create_options,
584 8d6d89cb Bharata B Rao
};
585 8d6d89cb Bharata B Rao
586 8d6d89cb Bharata B Rao
static BlockDriver bdrv_gluster_unix = {
587 8d6d89cb Bharata B Rao
    .format_name                  = "gluster",
588 8d6d89cb Bharata B Rao
    .protocol_name                = "gluster+unix",
589 8d6d89cb Bharata B Rao
    .instance_size                = sizeof(BDRVGlusterState),
590 8d6d89cb Bharata B Rao
    .bdrv_file_open               = qemu_gluster_open,
591 8d6d89cb Bharata B Rao
    .bdrv_close                   = qemu_gluster_close,
592 8d6d89cb Bharata B Rao
    .bdrv_create                  = qemu_gluster_create,
593 8d6d89cb Bharata B Rao
    .bdrv_getlength               = qemu_gluster_getlength,
594 8d6d89cb Bharata B Rao
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
595 8d6d89cb Bharata B Rao
    .bdrv_aio_readv               = qemu_gluster_aio_readv,
596 8d6d89cb Bharata B Rao
    .bdrv_aio_writev              = qemu_gluster_aio_writev,
597 8d6d89cb Bharata B Rao
    .bdrv_aio_flush               = qemu_gluster_aio_flush,
598 8d6d89cb Bharata B Rao
    .create_options               = qemu_gluster_create_options,
599 8d6d89cb Bharata B Rao
};
600 8d6d89cb Bharata B Rao
601 8d6d89cb Bharata B Rao
static BlockDriver bdrv_gluster_rdma = {
602 8d6d89cb Bharata B Rao
    .format_name                  = "gluster",
603 8d6d89cb Bharata B Rao
    .protocol_name                = "gluster+rdma",
604 8d6d89cb Bharata B Rao
    .instance_size                = sizeof(BDRVGlusterState),
605 8d6d89cb Bharata B Rao
    .bdrv_file_open               = qemu_gluster_open,
606 8d6d89cb Bharata B Rao
    .bdrv_close                   = qemu_gluster_close,
607 8d6d89cb Bharata B Rao
    .bdrv_create                  = qemu_gluster_create,
608 8d6d89cb Bharata B Rao
    .bdrv_getlength               = qemu_gluster_getlength,
609 8d6d89cb Bharata B Rao
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
610 8d6d89cb Bharata B Rao
    .bdrv_aio_readv               = qemu_gluster_aio_readv,
611 8d6d89cb Bharata B Rao
    .bdrv_aio_writev              = qemu_gluster_aio_writev,
612 8d6d89cb Bharata B Rao
    .bdrv_aio_flush               = qemu_gluster_aio_flush,
613 8d6d89cb Bharata B Rao
    .create_options               = qemu_gluster_create_options,
614 8d6d89cb Bharata B Rao
};
615 8d6d89cb Bharata B Rao
616 8d6d89cb Bharata B Rao
static void bdrv_gluster_init(void)
617 8d6d89cb Bharata B Rao
{
618 8d6d89cb Bharata B Rao
    bdrv_register(&bdrv_gluster_rdma);
619 8d6d89cb Bharata B Rao
    bdrv_register(&bdrv_gluster_unix);
620 8d6d89cb Bharata B Rao
    bdrv_register(&bdrv_gluster_tcp);
621 8d6d89cb Bharata B Rao
    bdrv_register(&bdrv_gluster);
622 8d6d89cb Bharata B Rao
}
623 8d6d89cb Bharata B Rao
624 8d6d89cb Bharata B Rao
block_init(bdrv_gluster_init);