root / block / rbd.c @ 30cdc48c
History | View | Annotate | Download (20.4 kB)
1 | f27aaf4b | Christian Brunner | /*
|
---|---|---|---|
2 | f27aaf4b | Christian Brunner | * QEMU Block driver for RADOS (Ceph)
|
3 | f27aaf4b | Christian Brunner | *
|
4 | ad32e9c0 | Josh Durgin | * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
|
5 | ad32e9c0 | Josh Durgin | * Josh Durgin <josh.durgin@dreamhost.com>
|
6 | f27aaf4b | Christian Brunner | *
|
7 | f27aaf4b | Christian Brunner | * This work is licensed under the terms of the GNU GPL, version 2. See
|
8 | f27aaf4b | Christian Brunner | * the COPYING file in the top-level directory.
|
9 | f27aaf4b | Christian Brunner | *
|
10 | f27aaf4b | Christian Brunner | */
|
11 | f27aaf4b | Christian Brunner | |
12 | ad32e9c0 | Josh Durgin | #include <inttypes.h> |
13 | ad32e9c0 | Josh Durgin | |
14 | f27aaf4b | Christian Brunner | #include "qemu-common.h" |
15 | f27aaf4b | Christian Brunner | #include "qemu-error.h" |
16 | f27aaf4b | Christian Brunner | |
17 | f27aaf4b | Christian Brunner | #include "block_int.h" |
18 | f27aaf4b | Christian Brunner | |
19 | ad32e9c0 | Josh Durgin | #include <rbd/librbd.h> |
20 | f27aaf4b | Christian Brunner | |
21 | f27aaf4b | Christian Brunner | |
22 | f27aaf4b | Christian Brunner | |
23 | f27aaf4b | Christian Brunner | /*
|
24 | f27aaf4b | Christian Brunner | * When specifying the image filename use:
|
25 | f27aaf4b | Christian Brunner | *
|
26 | fab5cf59 | Josh Durgin | * rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]]
|
27 | f27aaf4b | Christian Brunner | *
|
28 | f27aaf4b | Christian Brunner | * poolname must be the name of an existing rados pool
|
29 | f27aaf4b | Christian Brunner | *
|
30 | f27aaf4b | Christian Brunner | * devicename is the basename for all objects used to
|
31 | f27aaf4b | Christian Brunner | * emulate the raw device.
|
32 | f27aaf4b | Christian Brunner | *
|
33 | fab5cf59 | Josh Durgin | * Each option given is used to configure rados, and may be
|
34 | fab5cf59 | Josh Durgin | * any Ceph option, or "conf". The "conf" option specifies
|
35 | fab5cf59 | Josh Durgin | * a Ceph configuration file to read.
|
36 | fab5cf59 | Josh Durgin | *
|
37 | f27aaf4b | Christian Brunner | * Metadata information (image size, ...) is stored in an
|
38 | f27aaf4b | Christian Brunner | * object with the name "devicename.rbd".
|
39 | f27aaf4b | Christian Brunner | *
|
40 | f27aaf4b | Christian Brunner | * The raw device is split into 4MB sized objects by default.
|
41 | f27aaf4b | Christian Brunner | * The sequencenumber is encoded in a 12 byte long hex-string,
|
42 | f27aaf4b | Christian Brunner | * and is attached to the devicename, separated by a dot.
|
43 | f27aaf4b | Christian Brunner | * e.g. "devicename.1234567890ab"
|
44 | f27aaf4b | Christian Brunner | *
|
45 | f27aaf4b | Christian Brunner | */
|
46 | f27aaf4b | Christian Brunner | |
47 | f27aaf4b | Christian Brunner | #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER) |
48 | f27aaf4b | Christian Brunner | |
49 | ad32e9c0 | Josh Durgin | #define RBD_MAX_CONF_NAME_SIZE 128 |
50 | ad32e9c0 | Josh Durgin | #define RBD_MAX_CONF_VAL_SIZE 512 |
51 | ad32e9c0 | Josh Durgin | #define RBD_MAX_CONF_SIZE 1024 |
52 | ad32e9c0 | Josh Durgin | #define RBD_MAX_POOL_NAME_SIZE 128 |
53 | ad32e9c0 | Josh Durgin | #define RBD_MAX_SNAP_NAME_SIZE 128 |
54 | ad32e9c0 | Josh Durgin | #define RBD_MAX_SNAPS 100 |
55 | ad32e9c0 | Josh Durgin | |
56 | f27aaf4b | Christian Brunner | typedef struct RBDAIOCB { |
57 | f27aaf4b | Christian Brunner | BlockDriverAIOCB common; |
58 | f27aaf4b | Christian Brunner | QEMUBH *bh; |
59 | f27aaf4b | Christian Brunner | int ret;
|
60 | f27aaf4b | Christian Brunner | QEMUIOVector *qiov; |
61 | f27aaf4b | Christian Brunner | char *bounce;
|
62 | f27aaf4b | Christian Brunner | int write;
|
63 | f27aaf4b | Christian Brunner | int64_t sector_num; |
64 | f27aaf4b | Christian Brunner | int error;
|
65 | f27aaf4b | Christian Brunner | struct BDRVRBDState *s;
|
66 | f27aaf4b | Christian Brunner | int cancelled;
|
67 | f27aaf4b | Christian Brunner | } RBDAIOCB; |
68 | f27aaf4b | Christian Brunner | |
69 | f27aaf4b | Christian Brunner | typedef struct RADOSCB { |
70 | f27aaf4b | Christian Brunner | int rcbid;
|
71 | f27aaf4b | Christian Brunner | RBDAIOCB *acb; |
72 | f27aaf4b | Christian Brunner | struct BDRVRBDState *s;
|
73 | f27aaf4b | Christian Brunner | int done;
|
74 | ad32e9c0 | Josh Durgin | int64_t size; |
75 | f27aaf4b | Christian Brunner | char *buf;
|
76 | f27aaf4b | Christian Brunner | int ret;
|
77 | f27aaf4b | Christian Brunner | } RADOSCB; |
78 | f27aaf4b | Christian Brunner | |
79 | f27aaf4b | Christian Brunner | #define RBD_FD_READ 0 |
80 | f27aaf4b | Christian Brunner | #define RBD_FD_WRITE 1 |
81 | f27aaf4b | Christian Brunner | |
82 | f27aaf4b | Christian Brunner | typedef struct BDRVRBDState { |
83 | f27aaf4b | Christian Brunner | int fds[2]; |
84 | ad32e9c0 | Josh Durgin | rados_t cluster; |
85 | ad32e9c0 | Josh Durgin | rados_ioctx_t io_ctx; |
86 | ad32e9c0 | Josh Durgin | rbd_image_t image; |
87 | ad32e9c0 | Josh Durgin | char name[RBD_MAX_IMAGE_NAME_SIZE];
|
88 | f27aaf4b | Christian Brunner | int qemu_aio_count;
|
89 | ad32e9c0 | Josh Durgin | char *snap;
|
90 | f27aaf4b | Christian Brunner | int event_reader_pos;
|
91 | f27aaf4b | Christian Brunner | RADOSCB *event_rcb; |
92 | f27aaf4b | Christian Brunner | } BDRVRBDState; |
93 | f27aaf4b | Christian Brunner | |
94 | f27aaf4b | Christian Brunner | static void rbd_aio_bh_cb(void *opaque); |
95 | f27aaf4b | Christian Brunner | |
96 | ad32e9c0 | Josh Durgin | static int qemu_rbd_next_tok(char *dst, int dst_len, |
97 | ad32e9c0 | Josh Durgin | char *src, char delim, |
98 | ad32e9c0 | Josh Durgin | const char *name, |
99 | ad32e9c0 | Josh Durgin | char **p)
|
100 | f27aaf4b | Christian Brunner | { |
101 | f27aaf4b | Christian Brunner | int l;
|
102 | f27aaf4b | Christian Brunner | char *end;
|
103 | f27aaf4b | Christian Brunner | |
104 | f27aaf4b | Christian Brunner | *p = NULL;
|
105 | f27aaf4b | Christian Brunner | |
106 | f27aaf4b | Christian Brunner | if (delim != '\0') { |
107 | f27aaf4b | Christian Brunner | end = strchr(src, delim); |
108 | f27aaf4b | Christian Brunner | if (end) {
|
109 | f27aaf4b | Christian Brunner | *p = end + 1;
|
110 | f27aaf4b | Christian Brunner | *end = '\0';
|
111 | f27aaf4b | Christian Brunner | } |
112 | f27aaf4b | Christian Brunner | } |
113 | f27aaf4b | Christian Brunner | l = strlen(src); |
114 | f27aaf4b | Christian Brunner | if (l >= dst_len) {
|
115 | f27aaf4b | Christian Brunner | error_report("%s too long", name);
|
116 | f27aaf4b | Christian Brunner | return -EINVAL;
|
117 | f27aaf4b | Christian Brunner | } else if (l == 0) { |
118 | f27aaf4b | Christian Brunner | error_report("%s too short", name);
|
119 | f27aaf4b | Christian Brunner | return -EINVAL;
|
120 | f27aaf4b | Christian Brunner | } |
121 | f27aaf4b | Christian Brunner | |
122 | f27aaf4b | Christian Brunner | pstrcpy(dst, dst_len, src); |
123 | f27aaf4b | Christian Brunner | |
124 | f27aaf4b | Christian Brunner | return 0; |
125 | f27aaf4b | Christian Brunner | } |
126 | f27aaf4b | Christian Brunner | |
127 | ad32e9c0 | Josh Durgin | static int qemu_rbd_parsename(const char *filename, |
128 | ad32e9c0 | Josh Durgin | char *pool, int pool_len, |
129 | ad32e9c0 | Josh Durgin | char *snap, int snap_len, |
130 | fab5cf59 | Josh Durgin | char *name, int name_len, |
131 | fab5cf59 | Josh Durgin | char *conf, int conf_len) |
132 | f27aaf4b | Christian Brunner | { |
133 | f27aaf4b | Christian Brunner | const char *start; |
134 | f27aaf4b | Christian Brunner | char *p, *buf;
|
135 | f27aaf4b | Christian Brunner | int ret;
|
136 | f27aaf4b | Christian Brunner | |
137 | f27aaf4b | Christian Brunner | if (!strstart(filename, "rbd:", &start)) { |
138 | f27aaf4b | Christian Brunner | return -EINVAL;
|
139 | f27aaf4b | Christian Brunner | } |
140 | f27aaf4b | Christian Brunner | |
141 | f27aaf4b | Christian Brunner | buf = qemu_strdup(start); |
142 | f27aaf4b | Christian Brunner | p = buf; |
143 | fab5cf59 | Josh Durgin | *snap = '\0';
|
144 | fab5cf59 | Josh Durgin | *conf = '\0';
|
145 | f27aaf4b | Christian Brunner | |
146 | ad32e9c0 | Josh Durgin | ret = qemu_rbd_next_tok(pool, pool_len, p, '/', "pool name", &p); |
147 | f27aaf4b | Christian Brunner | if (ret < 0 || !p) { |
148 | f27aaf4b | Christian Brunner | ret = -EINVAL; |
149 | f27aaf4b | Christian Brunner | goto done;
|
150 | f27aaf4b | Christian Brunner | } |
151 | fab5cf59 | Josh Durgin | |
152 | fab5cf59 | Josh Durgin | if (strchr(p, '@')) { |
153 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(name, name_len, p, '@', "object name", &p); |
154 | fab5cf59 | Josh Durgin | if (ret < 0) { |
155 | fab5cf59 | Josh Durgin | goto done;
|
156 | fab5cf59 | Josh Durgin | } |
157 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(snap, snap_len, p, ':', "snap name", &p); |
158 | fab5cf59 | Josh Durgin | } else {
|
159 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(name, name_len, p, ':', "object name", &p); |
160 | f27aaf4b | Christian Brunner | } |
161 | fab5cf59 | Josh Durgin | if (ret < 0 || !p) { |
162 | f27aaf4b | Christian Brunner | goto done;
|
163 | f27aaf4b | Christian Brunner | } |
164 | f27aaf4b | Christian Brunner | |
165 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(conf, conf_len, p, '\0', "configuration", &p); |
166 | f27aaf4b | Christian Brunner | |
167 | f27aaf4b | Christian Brunner | done:
|
168 | f27aaf4b | Christian Brunner | qemu_free(buf); |
169 | f27aaf4b | Christian Brunner | return ret;
|
170 | f27aaf4b | Christian Brunner | } |
171 | f27aaf4b | Christian Brunner | |
172 | fab5cf59 | Josh Durgin | static int qemu_rbd_set_conf(rados_t cluster, const char *conf) |
173 | fab5cf59 | Josh Durgin | { |
174 | fab5cf59 | Josh Durgin | char *p, *buf;
|
175 | fab5cf59 | Josh Durgin | char name[RBD_MAX_CONF_NAME_SIZE];
|
176 | fab5cf59 | Josh Durgin | char value[RBD_MAX_CONF_VAL_SIZE];
|
177 | fab5cf59 | Josh Durgin | int ret = 0; |
178 | fab5cf59 | Josh Durgin | |
179 | fab5cf59 | Josh Durgin | buf = qemu_strdup(conf); |
180 | fab5cf59 | Josh Durgin | p = buf; |
181 | fab5cf59 | Josh Durgin | |
182 | fab5cf59 | Josh Durgin | while (p) {
|
183 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(name, sizeof(name), p,
|
184 | fab5cf59 | Josh Durgin | '=', "conf option name", &p); |
185 | fab5cf59 | Josh Durgin | if (ret < 0) { |
186 | fab5cf59 | Josh Durgin | break;
|
187 | fab5cf59 | Josh Durgin | } |
188 | fab5cf59 | Josh Durgin | |
189 | fab5cf59 | Josh Durgin | if (!p) {
|
190 | fab5cf59 | Josh Durgin | error_report("conf option %s has no value", name);
|
191 | fab5cf59 | Josh Durgin | ret = -EINVAL; |
192 | fab5cf59 | Josh Durgin | break;
|
193 | fab5cf59 | Josh Durgin | } |
194 | fab5cf59 | Josh Durgin | |
195 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(value, sizeof(value), p,
|
196 | fab5cf59 | Josh Durgin | ':', "conf option value", &p); |
197 | fab5cf59 | Josh Durgin | if (ret < 0) { |
198 | fab5cf59 | Josh Durgin | break;
|
199 | fab5cf59 | Josh Durgin | } |
200 | fab5cf59 | Josh Durgin | |
201 | fab5cf59 | Josh Durgin | if (strcmp(name, "conf")) { |
202 | fab5cf59 | Josh Durgin | ret = rados_conf_set(cluster, name, value); |
203 | fab5cf59 | Josh Durgin | if (ret < 0) { |
204 | fab5cf59 | Josh Durgin | error_report("invalid conf option %s", name);
|
205 | fab5cf59 | Josh Durgin | ret = -EINVAL; |
206 | fab5cf59 | Josh Durgin | break;
|
207 | fab5cf59 | Josh Durgin | } |
208 | fab5cf59 | Josh Durgin | } else {
|
209 | fab5cf59 | Josh Durgin | ret = rados_conf_read_file(cluster, value); |
210 | fab5cf59 | Josh Durgin | if (ret < 0) { |
211 | fab5cf59 | Josh Durgin | error_report("error reading conf file %s", value);
|
212 | fab5cf59 | Josh Durgin | break;
|
213 | fab5cf59 | Josh Durgin | } |
214 | fab5cf59 | Josh Durgin | } |
215 | fab5cf59 | Josh Durgin | } |
216 | fab5cf59 | Josh Durgin | |
217 | fab5cf59 | Josh Durgin | qemu_free(buf); |
218 | fab5cf59 | Josh Durgin | return ret;
|
219 | fab5cf59 | Josh Durgin | } |
220 | fab5cf59 | Josh Durgin | |
221 | ad32e9c0 | Josh Durgin | static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options) |
222 | f27aaf4b | Christian Brunner | { |
223 | f27aaf4b | Christian Brunner | int64_t bytes = 0;
|
224 | f27aaf4b | Christian Brunner | int64_t objsize; |
225 | ad32e9c0 | Josh Durgin | int obj_order = 0; |
226 | ad32e9c0 | Josh Durgin | char pool[RBD_MAX_POOL_NAME_SIZE];
|
227 | ad32e9c0 | Josh Durgin | char name[RBD_MAX_IMAGE_NAME_SIZE];
|
228 | ad32e9c0 | Josh Durgin | char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
|
229 | fab5cf59 | Josh Durgin | char conf[RBD_MAX_CONF_SIZE];
|
230 | f27aaf4b | Christian Brunner | char *snap = NULL; |
231 | ad32e9c0 | Josh Durgin | rados_t cluster; |
232 | ad32e9c0 | Josh Durgin | rados_ioctx_t io_ctx; |
233 | f27aaf4b | Christian Brunner | int ret;
|
234 | f27aaf4b | Christian Brunner | |
235 | ad32e9c0 | Josh Durgin | if (qemu_rbd_parsename(filename, pool, sizeof(pool), |
236 | ad32e9c0 | Josh Durgin | snap_buf, sizeof(snap_buf),
|
237 | fab5cf59 | Josh Durgin | name, sizeof(name),
|
238 | fab5cf59 | Josh Durgin | conf, sizeof(conf)) < 0) { |
239 | f27aaf4b | Christian Brunner | return -EINVAL;
|
240 | f27aaf4b | Christian Brunner | } |
241 | f27aaf4b | Christian Brunner | if (snap_buf[0] != '\0') { |
242 | f27aaf4b | Christian Brunner | snap = snap_buf; |
243 | f27aaf4b | Christian Brunner | } |
244 | f27aaf4b | Christian Brunner | |
245 | f27aaf4b | Christian Brunner | /* Read out options */
|
246 | f27aaf4b | Christian Brunner | while (options && options->name) {
|
247 | f27aaf4b | Christian Brunner | if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
|
248 | f27aaf4b | Christian Brunner | bytes = options->value.n; |
249 | f27aaf4b | Christian Brunner | } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { |
250 | f27aaf4b | Christian Brunner | if (options->value.n) {
|
251 | f27aaf4b | Christian Brunner | objsize = options->value.n; |
252 | f27aaf4b | Christian Brunner | if ((objsize - 1) & objsize) { /* not a power of 2? */ |
253 | f27aaf4b | Christian Brunner | error_report("obj size needs to be power of 2");
|
254 | f27aaf4b | Christian Brunner | return -EINVAL;
|
255 | f27aaf4b | Christian Brunner | } |
256 | f27aaf4b | Christian Brunner | if (objsize < 4096) { |
257 | f27aaf4b | Christian Brunner | error_report("obj size too small");
|
258 | f27aaf4b | Christian Brunner | return -EINVAL;
|
259 | f27aaf4b | Christian Brunner | } |
260 | ad32e9c0 | Josh Durgin | obj_order = ffs(objsize) - 1;
|
261 | f27aaf4b | Christian Brunner | } |
262 | f27aaf4b | Christian Brunner | } |
263 | f27aaf4b | Christian Brunner | options++; |
264 | f27aaf4b | Christian Brunner | } |
265 | f27aaf4b | Christian Brunner | |
266 | ad32e9c0 | Josh Durgin | if (rados_create(&cluster, NULL) < 0) { |
267 | f27aaf4b | Christian Brunner | error_report("error initializing");
|
268 | f27aaf4b | Christian Brunner | return -EIO;
|
269 | f27aaf4b | Christian Brunner | } |
270 | f27aaf4b | Christian Brunner | |
271 | fab5cf59 | Josh Durgin | if (strstr(conf, "conf=") == NULL) { |
272 | fab5cf59 | Josh Durgin | if (rados_conf_read_file(cluster, NULL) < 0) { |
273 | fab5cf59 | Josh Durgin | error_report("error reading config file");
|
274 | fab5cf59 | Josh Durgin | rados_shutdown(cluster); |
275 | fab5cf59 | Josh Durgin | return -EIO;
|
276 | fab5cf59 | Josh Durgin | } |
277 | fab5cf59 | Josh Durgin | } |
278 | fab5cf59 | Josh Durgin | |
279 | fab5cf59 | Josh Durgin | if (conf[0] != '\0' && |
280 | fab5cf59 | Josh Durgin | qemu_rbd_set_conf(cluster, conf) < 0) {
|
281 | fab5cf59 | Josh Durgin | error_report("error setting config options");
|
282 | ad32e9c0 | Josh Durgin | rados_shutdown(cluster); |
283 | f27aaf4b | Christian Brunner | return -EIO;
|
284 | f27aaf4b | Christian Brunner | } |
285 | f27aaf4b | Christian Brunner | |
286 | ad32e9c0 | Josh Durgin | if (rados_connect(cluster) < 0) { |
287 | ad32e9c0 | Josh Durgin | error_report("error connecting");
|
288 | ad32e9c0 | Josh Durgin | rados_shutdown(cluster); |
289 | f27aaf4b | Christian Brunner | return -EIO;
|
290 | f27aaf4b | Christian Brunner | } |
291 | f27aaf4b | Christian Brunner | |
292 | ad32e9c0 | Josh Durgin | if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) { |
293 | ad32e9c0 | Josh Durgin | error_report("error opening pool %s", pool);
|
294 | ad32e9c0 | Josh Durgin | rados_shutdown(cluster); |
295 | ad32e9c0 | Josh Durgin | return -EIO;
|
296 | f27aaf4b | Christian Brunner | } |
297 | f27aaf4b | Christian Brunner | |
298 | ad32e9c0 | Josh Durgin | ret = rbd_create(io_ctx, name, bytes, &obj_order); |
299 | ad32e9c0 | Josh Durgin | rados_ioctx_destroy(io_ctx); |
300 | ad32e9c0 | Josh Durgin | rados_shutdown(cluster); |
301 | f27aaf4b | Christian Brunner | |
302 | f27aaf4b | Christian Brunner | return ret;
|
303 | f27aaf4b | Christian Brunner | } |
304 | f27aaf4b | Christian Brunner | |
305 | f27aaf4b | Christian Brunner | /*
|
306 | ad32e9c0 | Josh Durgin | * This aio completion is being called from qemu_rbd_aio_event_reader()
|
307 | ad32e9c0 | Josh Durgin | * and runs in qemu context. It schedules a bh, but just in case the aio
|
308 | f27aaf4b | Christian Brunner | * was not cancelled before.
|
309 | f27aaf4b | Christian Brunner | */
|
310 | ad32e9c0 | Josh Durgin | static void qemu_rbd_complete_aio(RADOSCB *rcb) |
311 | f27aaf4b | Christian Brunner | { |
312 | f27aaf4b | Christian Brunner | RBDAIOCB *acb = rcb->acb; |
313 | f27aaf4b | Christian Brunner | int64_t r; |
314 | f27aaf4b | Christian Brunner | |
315 | f27aaf4b | Christian Brunner | if (acb->cancelled) {
|
316 | ad32e9c0 | Josh Durgin | qemu_vfree(acb->bounce); |
317 | ad32e9c0 | Josh Durgin | qemu_aio_release(acb); |
318 | f27aaf4b | Christian Brunner | goto done;
|
319 | f27aaf4b | Christian Brunner | } |
320 | f27aaf4b | Christian Brunner | |
321 | f27aaf4b | Christian Brunner | r = rcb->ret; |
322 | f27aaf4b | Christian Brunner | |
323 | f27aaf4b | Christian Brunner | if (acb->write) {
|
324 | f27aaf4b | Christian Brunner | if (r < 0) { |
325 | f27aaf4b | Christian Brunner | acb->ret = r; |
326 | f27aaf4b | Christian Brunner | acb->error = 1;
|
327 | f27aaf4b | Christian Brunner | } else if (!acb->error) { |
328 | ad32e9c0 | Josh Durgin | acb->ret = rcb->size; |
329 | f27aaf4b | Christian Brunner | } |
330 | f27aaf4b | Christian Brunner | } else {
|
331 | ad32e9c0 | Josh Durgin | if (r < 0) { |
332 | ad32e9c0 | Josh Durgin | memset(rcb->buf, 0, rcb->size);
|
333 | f27aaf4b | Christian Brunner | acb->ret = r; |
334 | f27aaf4b | Christian Brunner | acb->error = 1;
|
335 | ad32e9c0 | Josh Durgin | } else if (r < rcb->size) { |
336 | ad32e9c0 | Josh Durgin | memset(rcb->buf + r, 0, rcb->size - r);
|
337 | f27aaf4b | Christian Brunner | if (!acb->error) {
|
338 | ad32e9c0 | Josh Durgin | acb->ret = rcb->size; |
339 | f27aaf4b | Christian Brunner | } |
340 | f27aaf4b | Christian Brunner | } else if (!acb->error) { |
341 | ad32e9c0 | Josh Durgin | acb->ret = r; |
342 | f27aaf4b | Christian Brunner | } |
343 | f27aaf4b | Christian Brunner | } |
344 | f27aaf4b | Christian Brunner | /* Note that acb->bh can be NULL in case where the aio was cancelled */
|
345 | ad32e9c0 | Josh Durgin | acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb); |
346 | ad32e9c0 | Josh Durgin | qemu_bh_schedule(acb->bh); |
347 | f27aaf4b | Christian Brunner | done:
|
348 | f27aaf4b | Christian Brunner | qemu_free(rcb); |
349 | f27aaf4b | Christian Brunner | } |
350 | f27aaf4b | Christian Brunner | |
351 | f27aaf4b | Christian Brunner | /*
|
352 | f27aaf4b | Christian Brunner | * aio fd read handler. It runs in the qemu context and calls the
|
353 | f27aaf4b | Christian Brunner | * completion handling of completed rados aio operations.
|
354 | f27aaf4b | Christian Brunner | */
|
355 | ad32e9c0 | Josh Durgin | static void qemu_rbd_aio_event_reader(void *opaque) |
356 | f27aaf4b | Christian Brunner | { |
357 | f27aaf4b | Christian Brunner | BDRVRBDState *s = opaque; |
358 | f27aaf4b | Christian Brunner | |
359 | f27aaf4b | Christian Brunner | ssize_t ret; |
360 | f27aaf4b | Christian Brunner | |
361 | f27aaf4b | Christian Brunner | do {
|
362 | f27aaf4b | Christian Brunner | char *p = (char *)&s->event_rcb; |
363 | f27aaf4b | Christian Brunner | |
364 | f27aaf4b | Christian Brunner | /* now read the rcb pointer that was sent from a non qemu thread */
|
365 | f27aaf4b | Christian Brunner | if ((ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos,
|
366 | f27aaf4b | Christian Brunner | sizeof(s->event_rcb) - s->event_reader_pos)) > 0) { |
367 | f27aaf4b | Christian Brunner | if (ret > 0) { |
368 | f27aaf4b | Christian Brunner | s->event_reader_pos += ret; |
369 | f27aaf4b | Christian Brunner | if (s->event_reader_pos == sizeof(s->event_rcb)) { |
370 | f27aaf4b | Christian Brunner | s->event_reader_pos = 0;
|
371 | ad32e9c0 | Josh Durgin | qemu_rbd_complete_aio(s->event_rcb); |
372 | ad32e9c0 | Josh Durgin | s->qemu_aio_count--; |
373 | f27aaf4b | Christian Brunner | } |
374 | f27aaf4b | Christian Brunner | } |
375 | f27aaf4b | Christian Brunner | } |
376 | f27aaf4b | Christian Brunner | } while (ret < 0 && errno == EINTR); |
377 | f27aaf4b | Christian Brunner | } |
378 | f27aaf4b | Christian Brunner | |
379 | ad32e9c0 | Josh Durgin | static int qemu_rbd_aio_flush_cb(void *opaque) |
380 | f27aaf4b | Christian Brunner | { |
381 | f27aaf4b | Christian Brunner | BDRVRBDState *s = opaque; |
382 | f27aaf4b | Christian Brunner | |
383 | f27aaf4b | Christian Brunner | return (s->qemu_aio_count > 0); |
384 | f27aaf4b | Christian Brunner | } |
385 | f27aaf4b | Christian Brunner | |
386 | ad32e9c0 | Josh Durgin | static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags) |
387 | f27aaf4b | Christian Brunner | { |
388 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
389 | ad32e9c0 | Josh Durgin | char pool[RBD_MAX_POOL_NAME_SIZE];
|
390 | ad32e9c0 | Josh Durgin | char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
|
391 | fab5cf59 | Josh Durgin | char conf[RBD_MAX_CONF_SIZE];
|
392 | f27aaf4b | Christian Brunner | int r;
|
393 | f27aaf4b | Christian Brunner | |
394 | ad32e9c0 | Josh Durgin | if (qemu_rbd_parsename(filename, pool, sizeof(pool), |
395 | ad32e9c0 | Josh Durgin | snap_buf, sizeof(snap_buf),
|
396 | fab5cf59 | Josh Durgin | s->name, sizeof(s->name),
|
397 | fab5cf59 | Josh Durgin | conf, sizeof(conf)) < 0) { |
398 | f27aaf4b | Christian Brunner | return -EINVAL;
|
399 | f27aaf4b | Christian Brunner | } |
400 | ad32e9c0 | Josh Durgin | s->snap = NULL;
|
401 | f27aaf4b | Christian Brunner | if (snap_buf[0] != '\0') { |
402 | ad32e9c0 | Josh Durgin | s->snap = qemu_strdup(snap_buf); |
403 | f27aaf4b | Christian Brunner | } |
404 | f27aaf4b | Christian Brunner | |
405 | ad32e9c0 | Josh Durgin | r = rados_create(&s->cluster, NULL);
|
406 | ad32e9c0 | Josh Durgin | if (r < 0) { |
407 | f27aaf4b | Christian Brunner | error_report("error initializing");
|
408 | f27aaf4b | Christian Brunner | return r;
|
409 | f27aaf4b | Christian Brunner | } |
410 | f27aaf4b | Christian Brunner | |
411 | fab5cf59 | Josh Durgin | if (strstr(conf, "conf=") == NULL) { |
412 | fab5cf59 | Josh Durgin | r = rados_conf_read_file(s->cluster, NULL);
|
413 | fab5cf59 | Josh Durgin | if (r < 0) { |
414 | fab5cf59 | Josh Durgin | error_report("error reading config file");
|
415 | fab5cf59 | Josh Durgin | rados_shutdown(s->cluster); |
416 | fab5cf59 | Josh Durgin | return r;
|
417 | fab5cf59 | Josh Durgin | } |
418 | fab5cf59 | Josh Durgin | } |
419 | fab5cf59 | Josh Durgin | |
420 | fab5cf59 | Josh Durgin | if (conf[0] != '\0') { |
421 | fab5cf59 | Josh Durgin | r = qemu_rbd_set_conf(s->cluster, conf); |
422 | fab5cf59 | Josh Durgin | if (r < 0) { |
423 | fab5cf59 | Josh Durgin | error_report("error setting config options");
|
424 | fab5cf59 | Josh Durgin | rados_shutdown(s->cluster); |
425 | fab5cf59 | Josh Durgin | return r;
|
426 | fab5cf59 | Josh Durgin | } |
427 | f27aaf4b | Christian Brunner | } |
428 | f27aaf4b | Christian Brunner | |
429 | ad32e9c0 | Josh Durgin | r = rados_connect(s->cluster); |
430 | ad32e9c0 | Josh Durgin | if (r < 0) { |
431 | ad32e9c0 | Josh Durgin | error_report("error connecting");
|
432 | ad32e9c0 | Josh Durgin | rados_shutdown(s->cluster); |
433 | f27aaf4b | Christian Brunner | return r;
|
434 | f27aaf4b | Christian Brunner | } |
435 | f27aaf4b | Christian Brunner | |
436 | ad32e9c0 | Josh Durgin | r = rados_ioctx_create(s->cluster, pool, &s->io_ctx); |
437 | ad32e9c0 | Josh Durgin | if (r < 0) { |
438 | ad32e9c0 | Josh Durgin | error_report("error opening pool %s", pool);
|
439 | ad32e9c0 | Josh Durgin | rados_shutdown(s->cluster); |
440 | ad32e9c0 | Josh Durgin | return r;
|
441 | f27aaf4b | Christian Brunner | } |
442 | f27aaf4b | Christian Brunner | |
443 | ad32e9c0 | Josh Durgin | r = rbd_open(s->io_ctx, s->name, &s->image, s->snap); |
444 | f27aaf4b | Christian Brunner | if (r < 0) { |
445 | ad32e9c0 | Josh Durgin | error_report("error reading header from %s", s->name);
|
446 | ad32e9c0 | Josh Durgin | rados_ioctx_destroy(s->io_ctx); |
447 | ad32e9c0 | Josh Durgin | rados_shutdown(s->cluster); |
448 | ad32e9c0 | Josh Durgin | return r;
|
449 | f27aaf4b | Christian Brunner | } |
450 | f27aaf4b | Christian Brunner | |
451 | ad32e9c0 | Josh Durgin | bs->read_only = (s->snap != NULL);
|
452 | f27aaf4b | Christian Brunner | |
453 | f27aaf4b | Christian Brunner | s->event_reader_pos = 0;
|
454 | f27aaf4b | Christian Brunner | r = qemu_pipe(s->fds); |
455 | f27aaf4b | Christian Brunner | if (r < 0) { |
456 | f27aaf4b | Christian Brunner | error_report("error opening eventfd");
|
457 | f27aaf4b | Christian Brunner | goto failed;
|
458 | f27aaf4b | Christian Brunner | } |
459 | f27aaf4b | Christian Brunner | fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
|
460 | f27aaf4b | Christian Brunner | fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
|
461 | ad32e9c0 | Josh Durgin | qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader, |
462 | ad32e9c0 | Josh Durgin | NULL, qemu_rbd_aio_flush_cb, NULL, s); |
463 | f27aaf4b | Christian Brunner | |
464 | f27aaf4b | Christian Brunner | |
465 | f27aaf4b | Christian Brunner | return 0; |
466 | f27aaf4b | Christian Brunner | |
467 | f27aaf4b | Christian Brunner | failed:
|
468 | ad32e9c0 | Josh Durgin | rbd_close(s->image); |
469 | ad32e9c0 | Josh Durgin | rados_ioctx_destroy(s->io_ctx); |
470 | ad32e9c0 | Josh Durgin | rados_shutdown(s->cluster); |
471 | f27aaf4b | Christian Brunner | return r;
|
472 | f27aaf4b | Christian Brunner | } |
473 | f27aaf4b | Christian Brunner | |
474 | ad32e9c0 | Josh Durgin | static void qemu_rbd_close(BlockDriverState *bs) |
475 | f27aaf4b | Christian Brunner | { |
476 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
477 | f27aaf4b | Christian Brunner | |
478 | f27aaf4b | Christian Brunner | close(s->fds[0]);
|
479 | f27aaf4b | Christian Brunner | close(s->fds[1]);
|
480 | f27aaf4b | Christian Brunner | qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL , NULL, NULL, NULL, |
481 | f27aaf4b | Christian Brunner | NULL);
|
482 | f27aaf4b | Christian Brunner | |
483 | ad32e9c0 | Josh Durgin | rbd_close(s->image); |
484 | ad32e9c0 | Josh Durgin | rados_ioctx_destroy(s->io_ctx); |
485 | ad32e9c0 | Josh Durgin | qemu_free(s->snap); |
486 | ad32e9c0 | Josh Durgin | rados_shutdown(s->cluster); |
487 | f27aaf4b | Christian Brunner | } |
488 | f27aaf4b | Christian Brunner | |
489 | f27aaf4b | Christian Brunner | /*
|
490 | f27aaf4b | Christian Brunner | * Cancel aio. Since we don't reference acb in a non qemu threads,
|
491 | f27aaf4b | Christian Brunner | * it is safe to access it here.
|
492 | f27aaf4b | Christian Brunner | */
|
493 | ad32e9c0 | Josh Durgin | static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb) |
494 | f27aaf4b | Christian Brunner | { |
495 | f27aaf4b | Christian Brunner | RBDAIOCB *acb = (RBDAIOCB *) blockacb; |
496 | f27aaf4b | Christian Brunner | acb->cancelled = 1;
|
497 | f27aaf4b | Christian Brunner | } |
498 | f27aaf4b | Christian Brunner | |
499 | f27aaf4b | Christian Brunner | static AIOPool rbd_aio_pool = {
|
500 | f27aaf4b | Christian Brunner | .aiocb_size = sizeof(RBDAIOCB),
|
501 | ad32e9c0 | Josh Durgin | .cancel = qemu_rbd_aio_cancel, |
502 | f27aaf4b | Christian Brunner | }; |
503 | f27aaf4b | Christian Brunner | |
504 | ad32e9c0 | Josh Durgin | static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb) |
505 | f27aaf4b | Christian Brunner | { |
506 | ad32e9c0 | Josh Durgin | int ret = 0; |
507 | f27aaf4b | Christian Brunner | while (1) { |
508 | f27aaf4b | Christian Brunner | fd_set wfd; |
509 | ad32e9c0 | Josh Durgin | int fd = s->fds[RBD_FD_WRITE];
|
510 | f27aaf4b | Christian Brunner | |
511 | ad32e9c0 | Josh Durgin | /* send the op pointer to the qemu thread that is responsible
|
512 | ad32e9c0 | Josh Durgin | for the aio/op completion. Must do it in a qemu thread context */
|
513 | f27aaf4b | Christian Brunner | ret = write(fd, (void *)&rcb, sizeof(rcb)); |
514 | f27aaf4b | Christian Brunner | if (ret >= 0) { |
515 | f27aaf4b | Christian Brunner | break;
|
516 | f27aaf4b | Christian Brunner | } |
517 | f27aaf4b | Christian Brunner | if (errno == EINTR) {
|
518 | f27aaf4b | Christian Brunner | continue;
|
519 | ad32e9c0 | Josh Durgin | } |
520 | f27aaf4b | Christian Brunner | if (errno != EAGAIN) {
|
521 | f27aaf4b | Christian Brunner | break;
|
522 | ad32e9c0 | Josh Durgin | } |
523 | f27aaf4b | Christian Brunner | |
524 | f27aaf4b | Christian Brunner | FD_ZERO(&wfd); |
525 | f27aaf4b | Christian Brunner | FD_SET(fd, &wfd); |
526 | f27aaf4b | Christian Brunner | do {
|
527 | f27aaf4b | Christian Brunner | ret = select(fd + 1, NULL, &wfd, NULL, NULL); |
528 | f27aaf4b | Christian Brunner | } while (ret < 0 && errno == EINTR); |
529 | f27aaf4b | Christian Brunner | } |
530 | f27aaf4b | Christian Brunner | |
531 | ad32e9c0 | Josh Durgin | return ret;
|
532 | ad32e9c0 | Josh Durgin | } |
533 | ad32e9c0 | Josh Durgin | |
534 | ad32e9c0 | Josh Durgin | /*
|
535 | ad32e9c0 | Josh Durgin | * This is the callback function for rbd_aio_read and _write
|
536 | ad32e9c0 | Josh Durgin | *
|
537 | ad32e9c0 | Josh Durgin | * Note: this function is being called from a non qemu thread so
|
538 | ad32e9c0 | Josh Durgin | * we need to be careful about what we do here. Generally we only
|
539 | ad32e9c0 | Josh Durgin | * write to the block notification pipe, and do the rest of the
|
540 | ad32e9c0 | Josh Durgin | * io completion handling from qemu_rbd_aio_event_reader() which
|
541 | ad32e9c0 | Josh Durgin | * runs in a qemu context.
|
542 | ad32e9c0 | Josh Durgin | */
|
543 | ad32e9c0 | Josh Durgin | static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb) |
544 | ad32e9c0 | Josh Durgin | { |
545 | ad32e9c0 | Josh Durgin | int ret;
|
546 | ad32e9c0 | Josh Durgin | rcb->ret = rbd_aio_get_return_value(c); |
547 | ad32e9c0 | Josh Durgin | rbd_aio_release(c); |
548 | ad32e9c0 | Josh Durgin | ret = qemu_rbd_send_pipe(rcb->s, rcb); |
549 | f27aaf4b | Christian Brunner | if (ret < 0) { |
550 | ad32e9c0 | Josh Durgin | error_report("failed writing to acb->s->fds");
|
551 | f27aaf4b | Christian Brunner | qemu_free(rcb); |
552 | f27aaf4b | Christian Brunner | } |
553 | f27aaf4b | Christian Brunner | } |
554 | f27aaf4b | Christian Brunner | |
555 | ad32e9c0 | Josh Durgin | /* Callback when all queued rbd_aio requests are complete */
|
556 | f27aaf4b | Christian Brunner | |
557 | f27aaf4b | Christian Brunner | static void rbd_aio_bh_cb(void *opaque) |
558 | f27aaf4b | Christian Brunner | { |
559 | f27aaf4b | Christian Brunner | RBDAIOCB *acb = opaque; |
560 | f27aaf4b | Christian Brunner | |
561 | f27aaf4b | Christian Brunner | if (!acb->write) {
|
562 | f27aaf4b | Christian Brunner | qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); |
563 | f27aaf4b | Christian Brunner | } |
564 | f27aaf4b | Christian Brunner | qemu_vfree(acb->bounce); |
565 | f27aaf4b | Christian Brunner | acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); |
566 | f27aaf4b | Christian Brunner | qemu_bh_delete(acb->bh); |
567 | f27aaf4b | Christian Brunner | acb->bh = NULL;
|
568 | f27aaf4b | Christian Brunner | |
569 | f27aaf4b | Christian Brunner | qemu_aio_release(acb); |
570 | f27aaf4b | Christian Brunner | } |
571 | f27aaf4b | Christian Brunner | |
572 | f27aaf4b | Christian Brunner | static BlockDriverAIOCB *rbd_aio_rw_vector(BlockDriverState *bs,
|
573 | f27aaf4b | Christian Brunner | int64_t sector_num, |
574 | f27aaf4b | Christian Brunner | QEMUIOVector *qiov, |
575 | f27aaf4b | Christian Brunner | int nb_sectors,
|
576 | f27aaf4b | Christian Brunner | BlockDriverCompletionFunc *cb, |
577 | f27aaf4b | Christian Brunner | void *opaque, int write) |
578 | f27aaf4b | Christian Brunner | { |
579 | f27aaf4b | Christian Brunner | RBDAIOCB *acb; |
580 | f27aaf4b | Christian Brunner | RADOSCB *rcb; |
581 | ad32e9c0 | Josh Durgin | rbd_completion_t c; |
582 | f27aaf4b | Christian Brunner | int64_t off, size; |
583 | f27aaf4b | Christian Brunner | char *buf;
|
584 | 51a13528 | Josh Durgin | int r;
|
585 | f27aaf4b | Christian Brunner | |
586 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
587 | f27aaf4b | Christian Brunner | |
588 | f27aaf4b | Christian Brunner | acb = qemu_aio_get(&rbd_aio_pool, bs, cb, opaque); |
589 | 51a13528 | Josh Durgin | if (!acb) {
|
590 | 51a13528 | Josh Durgin | return NULL; |
591 | 51a13528 | Josh Durgin | } |
592 | f27aaf4b | Christian Brunner | acb->write = write; |
593 | f27aaf4b | Christian Brunner | acb->qiov = qiov; |
594 | f27aaf4b | Christian Brunner | acb->bounce = qemu_blockalign(bs, qiov->size); |
595 | f27aaf4b | Christian Brunner | acb->ret = 0;
|
596 | f27aaf4b | Christian Brunner | acb->error = 0;
|
597 | f27aaf4b | Christian Brunner | acb->s = s; |
598 | f27aaf4b | Christian Brunner | acb->cancelled = 0;
|
599 | f27aaf4b | Christian Brunner | acb->bh = NULL;
|
600 | f27aaf4b | Christian Brunner | |
601 | f27aaf4b | Christian Brunner | if (write) {
|
602 | f27aaf4b | Christian Brunner | qemu_iovec_to_buffer(acb->qiov, acb->bounce); |
603 | f27aaf4b | Christian Brunner | } |
604 | f27aaf4b | Christian Brunner | |
605 | f27aaf4b | Christian Brunner | buf = acb->bounce; |
606 | f27aaf4b | Christian Brunner | |
607 | f27aaf4b | Christian Brunner | off = sector_num * BDRV_SECTOR_SIZE; |
608 | f27aaf4b | Christian Brunner | size = nb_sectors * BDRV_SECTOR_SIZE; |
609 | f27aaf4b | Christian Brunner | |
610 | ad32e9c0 | Josh Durgin | s->qemu_aio_count++; /* All the RADOSCB */
|
611 | f27aaf4b | Christian Brunner | |
612 | ad32e9c0 | Josh Durgin | rcb = qemu_malloc(sizeof(RADOSCB));
|
613 | ad32e9c0 | Josh Durgin | rcb->done = 0;
|
614 | ad32e9c0 | Josh Durgin | rcb->acb = acb; |
615 | ad32e9c0 | Josh Durgin | rcb->buf = buf; |
616 | ad32e9c0 | Josh Durgin | rcb->s = acb->s; |
617 | ad32e9c0 | Josh Durgin | rcb->size = size; |
618 | 51a13528 | Josh Durgin | r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); |
619 | 51a13528 | Josh Durgin | if (r < 0) { |
620 | 51a13528 | Josh Durgin | goto failed;
|
621 | 51a13528 | Josh Durgin | } |
622 | f27aaf4b | Christian Brunner | |
623 | ad32e9c0 | Josh Durgin | if (write) {
|
624 | 51a13528 | Josh Durgin | r = rbd_aio_write(s->image, off, size, buf, c); |
625 | ad32e9c0 | Josh Durgin | } else {
|
626 | 51a13528 | Josh Durgin | r = rbd_aio_read(s->image, off, size, buf, c); |
627 | 51a13528 | Josh Durgin | } |
628 | 51a13528 | Josh Durgin | |
629 | 51a13528 | Josh Durgin | if (r < 0) { |
630 | 51a13528 | Josh Durgin | goto failed;
|
631 | f27aaf4b | Christian Brunner | } |
632 | f27aaf4b | Christian Brunner | |
633 | f27aaf4b | Christian Brunner | return &acb->common;
|
634 | 51a13528 | Josh Durgin | |
635 | 51a13528 | Josh Durgin | failed:
|
636 | 51a13528 | Josh Durgin | qemu_free(rcb); |
637 | 51a13528 | Josh Durgin | s->qemu_aio_count--; |
638 | 51a13528 | Josh Durgin | qemu_aio_release(acb); |
639 | 51a13528 | Josh Durgin | return NULL; |
640 | f27aaf4b | Christian Brunner | } |
641 | f27aaf4b | Christian Brunner | |
642 | ad32e9c0 | Josh Durgin | static BlockDriverAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
|
643 | ad32e9c0 | Josh Durgin | int64_t sector_num, |
644 | ad32e9c0 | Josh Durgin | QEMUIOVector *qiov, |
645 | ad32e9c0 | Josh Durgin | int nb_sectors,
|
646 | ad32e9c0 | Josh Durgin | BlockDriverCompletionFunc *cb, |
647 | ad32e9c0 | Josh Durgin | void *opaque)
|
648 | f27aaf4b | Christian Brunner | { |
649 | f27aaf4b | Christian Brunner | return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); |
650 | f27aaf4b | Christian Brunner | } |
651 | f27aaf4b | Christian Brunner | |
652 | ad32e9c0 | Josh Durgin | static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
|
653 | ad32e9c0 | Josh Durgin | int64_t sector_num, |
654 | ad32e9c0 | Josh Durgin | QEMUIOVector *qiov, |
655 | ad32e9c0 | Josh Durgin | int nb_sectors,
|
656 | ad32e9c0 | Josh Durgin | BlockDriverCompletionFunc *cb, |
657 | ad32e9c0 | Josh Durgin | void *opaque)
|
658 | f27aaf4b | Christian Brunner | { |
659 | f27aaf4b | Christian Brunner | return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); |
660 | f27aaf4b | Christian Brunner | } |
661 | f27aaf4b | Christian Brunner | |
662 | ad32e9c0 | Josh Durgin | static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi) |
663 | f27aaf4b | Christian Brunner | { |
664 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
665 | ad32e9c0 | Josh Durgin | rbd_image_info_t info; |
666 | ad32e9c0 | Josh Durgin | int r;
|
667 | ad32e9c0 | Josh Durgin | |
668 | ad32e9c0 | Josh Durgin | r = rbd_stat(s->image, &info, sizeof(info));
|
669 | ad32e9c0 | Josh Durgin | if (r < 0) { |
670 | ad32e9c0 | Josh Durgin | return r;
|
671 | ad32e9c0 | Josh Durgin | } |
672 | ad32e9c0 | Josh Durgin | |
673 | ad32e9c0 | Josh Durgin | bdi->cluster_size = info.obj_size; |
674 | f27aaf4b | Christian Brunner | return 0; |
675 | f27aaf4b | Christian Brunner | } |
676 | f27aaf4b | Christian Brunner | |
677 | ad32e9c0 | Josh Durgin | static int64_t qemu_rbd_getlength(BlockDriverState *bs)
|
678 | f27aaf4b | Christian Brunner | { |
679 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
680 | ad32e9c0 | Josh Durgin | rbd_image_info_t info; |
681 | ad32e9c0 | Josh Durgin | int r;
|
682 | f27aaf4b | Christian Brunner | |
683 | ad32e9c0 | Josh Durgin | r = rbd_stat(s->image, &info, sizeof(info));
|
684 | ad32e9c0 | Josh Durgin | if (r < 0) { |
685 | ad32e9c0 | Josh Durgin | return r;
|
686 | ad32e9c0 | Josh Durgin | } |
687 | ad32e9c0 | Josh Durgin | |
688 | ad32e9c0 | Josh Durgin | return info.size;
|
689 | f27aaf4b | Christian Brunner | } |
690 | f27aaf4b | Christian Brunner | |
691 | 30cdc48c | Josh Durgin | static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset) |
692 | 30cdc48c | Josh Durgin | { |
693 | 30cdc48c | Josh Durgin | BDRVRBDState *s = bs->opaque; |
694 | 30cdc48c | Josh Durgin | int r;
|
695 | 30cdc48c | Josh Durgin | |
696 | 30cdc48c | Josh Durgin | r = rbd_resize(s->image, offset); |
697 | 30cdc48c | Josh Durgin | if (r < 0) { |
698 | 30cdc48c | Josh Durgin | return r;
|
699 | 30cdc48c | Josh Durgin | } |
700 | 30cdc48c | Josh Durgin | |
701 | 30cdc48c | Josh Durgin | return 0; |
702 | 30cdc48c | Josh Durgin | } |
703 | 30cdc48c | Josh Durgin | |
704 | ad32e9c0 | Josh Durgin | static int qemu_rbd_snap_create(BlockDriverState *bs, |
705 | ad32e9c0 | Josh Durgin | QEMUSnapshotInfo *sn_info) |
706 | f27aaf4b | Christian Brunner | { |
707 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
708 | f27aaf4b | Christian Brunner | int r;
|
709 | f27aaf4b | Christian Brunner | |
710 | f27aaf4b | Christian Brunner | if (sn_info->name[0] == '\0') { |
711 | f27aaf4b | Christian Brunner | return -EINVAL; /* we need a name for rbd snapshots */ |
712 | f27aaf4b | Christian Brunner | } |
713 | f27aaf4b | Christian Brunner | |
714 | f27aaf4b | Christian Brunner | /*
|
715 | f27aaf4b | Christian Brunner | * rbd snapshots are using the name as the user controlled unique identifier
|
716 | f27aaf4b | Christian Brunner | * we can't use the rbd snapid for that purpose, as it can't be set
|
717 | f27aaf4b | Christian Brunner | */
|
718 | f27aaf4b | Christian Brunner | if (sn_info->id_str[0] != '\0' && |
719 | f27aaf4b | Christian Brunner | strcmp(sn_info->id_str, sn_info->name) != 0) {
|
720 | f27aaf4b | Christian Brunner | return -EINVAL;
|
721 | f27aaf4b | Christian Brunner | } |
722 | f27aaf4b | Christian Brunner | |
723 | f27aaf4b | Christian Brunner | if (strlen(sn_info->name) >= sizeof(sn_info->id_str)) { |
724 | f27aaf4b | Christian Brunner | return -ERANGE;
|
725 | f27aaf4b | Christian Brunner | } |
726 | f27aaf4b | Christian Brunner | |
727 | ad32e9c0 | Josh Durgin | r = rbd_snap_create(s->image, sn_info->name); |
728 | f27aaf4b | Christian Brunner | if (r < 0) { |
729 | ad32e9c0 | Josh Durgin | error_report("failed to create snap: %s", strerror(-r));
|
730 | f27aaf4b | Christian Brunner | return r;
|
731 | f27aaf4b | Christian Brunner | } |
732 | f27aaf4b | Christian Brunner | |
733 | f27aaf4b | Christian Brunner | return 0; |
734 | f27aaf4b | Christian Brunner | } |
735 | f27aaf4b | Christian Brunner | |
736 | ad32e9c0 | Josh Durgin | static int qemu_rbd_snap_list(BlockDriverState *bs, |
737 | ad32e9c0 | Josh Durgin | QEMUSnapshotInfo **psn_tab) |
738 | f27aaf4b | Christian Brunner | { |
739 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
740 | f27aaf4b | Christian Brunner | QEMUSnapshotInfo *sn_info, *sn_tab = NULL;
|
741 | ad32e9c0 | Josh Durgin | int i, snap_count;
|
742 | ad32e9c0 | Josh Durgin | rbd_snap_info_t *snaps; |
743 | ad32e9c0 | Josh Durgin | int max_snaps = RBD_MAX_SNAPS;
|
744 | f27aaf4b | Christian Brunner | |
745 | ad32e9c0 | Josh Durgin | do {
|
746 | ad32e9c0 | Josh Durgin | snaps = qemu_malloc(sizeof(*snaps) * max_snaps);
|
747 | ad32e9c0 | Josh Durgin | snap_count = rbd_snap_list(s->image, snaps, &max_snaps); |
748 | ad32e9c0 | Josh Durgin | if (snap_count < 0) { |
749 | ad32e9c0 | Josh Durgin | qemu_free(snaps); |
750 | f27aaf4b | Christian Brunner | } |
751 | ad32e9c0 | Josh Durgin | } while (snap_count == -ERANGE);
|
752 | f27aaf4b | Christian Brunner | |
753 | ad32e9c0 | Josh Durgin | if (snap_count <= 0) { |
754 | ad32e9c0 | Josh Durgin | return snap_count;
|
755 | f27aaf4b | Christian Brunner | } |
756 | f27aaf4b | Christian Brunner | |
757 | f27aaf4b | Christian Brunner | sn_tab = qemu_mallocz(snap_count * sizeof(QEMUSnapshotInfo));
|
758 | f27aaf4b | Christian Brunner | |
759 | ad32e9c0 | Josh Durgin | for (i = 0; i < snap_count; i++) { |
760 | ad32e9c0 | Josh Durgin | const char *snap_name = snaps[i].name; |
761 | f27aaf4b | Christian Brunner | |
762 | f27aaf4b | Christian Brunner | sn_info = sn_tab + i; |
763 | f27aaf4b | Christian Brunner | pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), snap_name);
|
764 | f27aaf4b | Christian Brunner | pstrcpy(sn_info->name, sizeof(sn_info->name), snap_name);
|
765 | f27aaf4b | Christian Brunner | |
766 | ad32e9c0 | Josh Durgin | sn_info->vm_state_size = snaps[i].size; |
767 | f27aaf4b | Christian Brunner | sn_info->date_sec = 0;
|
768 | f27aaf4b | Christian Brunner | sn_info->date_nsec = 0;
|
769 | f27aaf4b | Christian Brunner | sn_info->vm_clock_nsec = 0;
|
770 | f27aaf4b | Christian Brunner | } |
771 | ad32e9c0 | Josh Durgin | rbd_snap_list_end(snaps); |
772 | ad32e9c0 | Josh Durgin | |
773 | f27aaf4b | Christian Brunner | *psn_tab = sn_tab; |
774 | f27aaf4b | Christian Brunner | return snap_count;
|
775 | f27aaf4b | Christian Brunner | } |
776 | f27aaf4b | Christian Brunner | |
777 | ad32e9c0 | Josh Durgin | static QEMUOptionParameter qemu_rbd_create_options[] = {
|
778 | f27aaf4b | Christian Brunner | { |
779 | f27aaf4b | Christian Brunner | .name = BLOCK_OPT_SIZE, |
780 | f27aaf4b | Christian Brunner | .type = OPT_SIZE, |
781 | f27aaf4b | Christian Brunner | .help = "Virtual disk size"
|
782 | f27aaf4b | Christian Brunner | }, |
783 | f27aaf4b | Christian Brunner | { |
784 | f27aaf4b | Christian Brunner | .name = BLOCK_OPT_CLUSTER_SIZE, |
785 | f27aaf4b | Christian Brunner | .type = OPT_SIZE, |
786 | f27aaf4b | Christian Brunner | .help = "RBD object size"
|
787 | f27aaf4b | Christian Brunner | }, |
788 | f27aaf4b | Christian Brunner | {NULL}
|
789 | f27aaf4b | Christian Brunner | }; |
790 | f27aaf4b | Christian Brunner | |
791 | f27aaf4b | Christian Brunner | static BlockDriver bdrv_rbd = {
|
792 | f27aaf4b | Christian Brunner | .format_name = "rbd",
|
793 | f27aaf4b | Christian Brunner | .instance_size = sizeof(BDRVRBDState),
|
794 | ad32e9c0 | Josh Durgin | .bdrv_file_open = qemu_rbd_open, |
795 | ad32e9c0 | Josh Durgin | .bdrv_close = qemu_rbd_close, |
796 | ad32e9c0 | Josh Durgin | .bdrv_create = qemu_rbd_create, |
797 | ad32e9c0 | Josh Durgin | .bdrv_get_info = qemu_rbd_getinfo, |
798 | ad32e9c0 | Josh Durgin | .create_options = qemu_rbd_create_options, |
799 | ad32e9c0 | Josh Durgin | .bdrv_getlength = qemu_rbd_getlength, |
800 | 30cdc48c | Josh Durgin | .bdrv_truncate = qemu_rbd_truncate, |
801 | f27aaf4b | Christian Brunner | .protocol_name = "rbd",
|
802 | f27aaf4b | Christian Brunner | |
803 | ad32e9c0 | Josh Durgin | .bdrv_aio_readv = qemu_rbd_aio_readv, |
804 | ad32e9c0 | Josh Durgin | .bdrv_aio_writev = qemu_rbd_aio_writev, |
805 | f27aaf4b | Christian Brunner | |
806 | ad32e9c0 | Josh Durgin | .bdrv_snapshot_create = qemu_rbd_snap_create, |
807 | ad32e9c0 | Josh Durgin | .bdrv_snapshot_list = qemu_rbd_snap_list, |
808 | f27aaf4b | Christian Brunner | }; |
809 | f27aaf4b | Christian Brunner | |
810 | f27aaf4b | Christian Brunner | static void bdrv_rbd_init(void) |
811 | f27aaf4b | Christian Brunner | { |
812 | f27aaf4b | Christian Brunner | bdrv_register(&bdrv_rbd); |
813 | f27aaf4b | Christian Brunner | } |
814 | f27aaf4b | Christian Brunner | |
815 | f27aaf4b | Christian Brunner | block_init(bdrv_rbd_init); |