root / block / rbd.c @ 3d1807ac
History | View | Annotate | Download (24.5 kB)
1 | f27aaf4b | Christian Brunner | /*
|
---|---|---|---|
2 | f27aaf4b | Christian Brunner | * QEMU Block driver for RADOS (Ceph)
|
3 | f27aaf4b | Christian Brunner | *
|
4 | ad32e9c0 | Josh Durgin | * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
|
5 | ad32e9c0 | Josh Durgin | * Josh Durgin <josh.durgin@dreamhost.com>
|
6 | f27aaf4b | Christian Brunner | *
|
7 | f27aaf4b | Christian Brunner | * This work is licensed under the terms of the GNU GPL, version 2. See
|
8 | f27aaf4b | Christian Brunner | * the COPYING file in the top-level directory.
|
9 | f27aaf4b | Christian Brunner | *
|
10 | 6b620ca3 | Paolo Bonzini | * Contributions after 2012-01-13 are licensed under the terms of the
|
11 | 6b620ca3 | Paolo Bonzini | * GNU GPL, version 2 or (at your option) any later version.
|
12 | f27aaf4b | Christian Brunner | */
|
13 | f27aaf4b | Christian Brunner | |
14 | ad32e9c0 | Josh Durgin | #include <inttypes.h> |
15 | ad32e9c0 | Josh Durgin | |
16 | f27aaf4b | Christian Brunner | #include "qemu-common.h" |
17 | f27aaf4b | Christian Brunner | #include "qemu-error.h" |
18 | f27aaf4b | Christian Brunner | #include "block_int.h" |
19 | f27aaf4b | Christian Brunner | |
20 | ad32e9c0 | Josh Durgin | #include <rbd/librbd.h> |
21 | f27aaf4b | Christian Brunner | |
22 | f27aaf4b | Christian Brunner | /*
|
23 | f27aaf4b | Christian Brunner | * When specifying the image filename use:
|
24 | f27aaf4b | Christian Brunner | *
|
25 | fab5cf59 | Josh Durgin | * rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]]
|
26 | f27aaf4b | Christian Brunner | *
|
27 | 9e1fbcde | Sage Weil | * poolname must be the name of an existing rados pool.
|
28 | f27aaf4b | Christian Brunner | *
|
29 | 9e1fbcde | Sage Weil | * devicename is the name of the rbd image.
|
30 | f27aaf4b | Christian Brunner | *
|
31 | 9e1fbcde | Sage Weil | * Each option given is used to configure rados, and may be any valid
|
32 | 9e1fbcde | Sage Weil | * Ceph option, "id", or "conf".
|
33 | fab5cf59 | Josh Durgin | *
|
34 | 9e1fbcde | Sage Weil | * The "id" option indicates what user we should authenticate as to
|
35 | 9e1fbcde | Sage Weil | * the Ceph cluster. If it is excluded we will use the Ceph default
|
36 | 9e1fbcde | Sage Weil | * (normally 'admin').
|
37 | f27aaf4b | Christian Brunner | *
|
38 | 9e1fbcde | Sage Weil | * The "conf" option specifies a Ceph configuration file to read. If
|
39 | 9e1fbcde | Sage Weil | * it is not specified, we will read from the default Ceph locations
|
40 | 9e1fbcde | Sage Weil | * (e.g., /etc/ceph/ceph.conf). To avoid reading _any_ configuration
|
41 | 9e1fbcde | Sage Weil | * file, specify conf=/dev/null.
|
42 | f27aaf4b | Christian Brunner | *
|
43 | 9e1fbcde | Sage Weil | * Configuration values containing :, @, or = can be escaped with a
|
44 | 9e1fbcde | Sage Weil | * leading "\".
|
45 | f27aaf4b | Christian Brunner | */
|
46 | f27aaf4b | Christian Brunner | |
47 | 787f3133 | Josh Durgin | /* rbd_aio_discard added in 0.1.2 */
|
48 | 787f3133 | Josh Durgin | #if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) |
49 | 787f3133 | Josh Durgin | #define LIBRBD_SUPPORTS_DISCARD
|
50 | 787f3133 | Josh Durgin | #else
|
51 | 787f3133 | Josh Durgin | #undef LIBRBD_SUPPORTS_DISCARD
|
52 | 787f3133 | Josh Durgin | #endif
|
53 | 787f3133 | Josh Durgin | |
54 | f27aaf4b | Christian Brunner | #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER) |
55 | f27aaf4b | Christian Brunner | |
56 | ad32e9c0 | Josh Durgin | #define RBD_MAX_CONF_NAME_SIZE 128 |
57 | ad32e9c0 | Josh Durgin | #define RBD_MAX_CONF_VAL_SIZE 512 |
58 | ad32e9c0 | Josh Durgin | #define RBD_MAX_CONF_SIZE 1024 |
59 | ad32e9c0 | Josh Durgin | #define RBD_MAX_POOL_NAME_SIZE 128 |
60 | ad32e9c0 | Josh Durgin | #define RBD_MAX_SNAP_NAME_SIZE 128 |
61 | ad32e9c0 | Josh Durgin | #define RBD_MAX_SNAPS 100 |
62 | ad32e9c0 | Josh Durgin | |
63 | 787f3133 | Josh Durgin | typedef enum { |
64 | 787f3133 | Josh Durgin | RBD_AIO_READ, |
65 | 787f3133 | Josh Durgin | RBD_AIO_WRITE, |
66 | 787f3133 | Josh Durgin | RBD_AIO_DISCARD |
67 | 787f3133 | Josh Durgin | } RBDAIOCmd; |
68 | 787f3133 | Josh Durgin | |
69 | f27aaf4b | Christian Brunner | typedef struct RBDAIOCB { |
70 | f27aaf4b | Christian Brunner | BlockDriverAIOCB common; |
71 | f27aaf4b | Christian Brunner | QEMUBH *bh; |
72 | f27aaf4b | Christian Brunner | int ret;
|
73 | f27aaf4b | Christian Brunner | QEMUIOVector *qiov; |
74 | f27aaf4b | Christian Brunner | char *bounce;
|
75 | 787f3133 | Josh Durgin | RBDAIOCmd cmd; |
76 | f27aaf4b | Christian Brunner | int64_t sector_num; |
77 | f27aaf4b | Christian Brunner | int error;
|
78 | f27aaf4b | Christian Brunner | struct BDRVRBDState *s;
|
79 | f27aaf4b | Christian Brunner | int cancelled;
|
80 | f27aaf4b | Christian Brunner | } RBDAIOCB; |
81 | f27aaf4b | Christian Brunner | |
82 | f27aaf4b | Christian Brunner | typedef struct RADOSCB { |
83 | f27aaf4b | Christian Brunner | int rcbid;
|
84 | f27aaf4b | Christian Brunner | RBDAIOCB *acb; |
85 | f27aaf4b | Christian Brunner | struct BDRVRBDState *s;
|
86 | f27aaf4b | Christian Brunner | int done;
|
87 | ad32e9c0 | Josh Durgin | int64_t size; |
88 | f27aaf4b | Christian Brunner | char *buf;
|
89 | f27aaf4b | Christian Brunner | int ret;
|
90 | f27aaf4b | Christian Brunner | } RADOSCB; |
91 | f27aaf4b | Christian Brunner | |
92 | f27aaf4b | Christian Brunner | #define RBD_FD_READ 0 |
93 | f27aaf4b | Christian Brunner | #define RBD_FD_WRITE 1 |
94 | f27aaf4b | Christian Brunner | |
95 | f27aaf4b | Christian Brunner | typedef struct BDRVRBDState { |
96 | f27aaf4b | Christian Brunner | int fds[2]; |
97 | ad32e9c0 | Josh Durgin | rados_t cluster; |
98 | ad32e9c0 | Josh Durgin | rados_ioctx_t io_ctx; |
99 | ad32e9c0 | Josh Durgin | rbd_image_t image; |
100 | ad32e9c0 | Josh Durgin | char name[RBD_MAX_IMAGE_NAME_SIZE];
|
101 | f27aaf4b | Christian Brunner | int qemu_aio_count;
|
102 | ad32e9c0 | Josh Durgin | char *snap;
|
103 | f27aaf4b | Christian Brunner | int event_reader_pos;
|
104 | f27aaf4b | Christian Brunner | RADOSCB *event_rcb; |
105 | f27aaf4b | Christian Brunner | } BDRVRBDState; |
106 | f27aaf4b | Christian Brunner | |
107 | f27aaf4b | Christian Brunner | static void rbd_aio_bh_cb(void *opaque); |
108 | f27aaf4b | Christian Brunner | |
109 | ad32e9c0 | Josh Durgin | static int qemu_rbd_next_tok(char *dst, int dst_len, |
110 | ad32e9c0 | Josh Durgin | char *src, char delim, |
111 | ad32e9c0 | Josh Durgin | const char *name, |
112 | ad32e9c0 | Josh Durgin | char **p)
|
113 | f27aaf4b | Christian Brunner | { |
114 | f27aaf4b | Christian Brunner | int l;
|
115 | f27aaf4b | Christian Brunner | char *end;
|
116 | f27aaf4b | Christian Brunner | |
117 | f27aaf4b | Christian Brunner | *p = NULL;
|
118 | f27aaf4b | Christian Brunner | |
119 | f27aaf4b | Christian Brunner | if (delim != '\0') { |
120 | 16a06b24 | Sage Weil | for (end = src; *end; ++end) {
|
121 | 16a06b24 | Sage Weil | if (*end == delim) {
|
122 | 16a06b24 | Sage Weil | break;
|
123 | 16a06b24 | Sage Weil | } |
124 | 16a06b24 | Sage Weil | if (*end == '\\' && end[1] != '\0') { |
125 | 16a06b24 | Sage Weil | end++; |
126 | 16a06b24 | Sage Weil | } |
127 | 16a06b24 | Sage Weil | } |
128 | 16a06b24 | Sage Weil | if (*end == delim) {
|
129 | f27aaf4b | Christian Brunner | *p = end + 1;
|
130 | f27aaf4b | Christian Brunner | *end = '\0';
|
131 | f27aaf4b | Christian Brunner | } |
132 | f27aaf4b | Christian Brunner | } |
133 | f27aaf4b | Christian Brunner | l = strlen(src); |
134 | f27aaf4b | Christian Brunner | if (l >= dst_len) {
|
135 | f27aaf4b | Christian Brunner | error_report("%s too long", name);
|
136 | f27aaf4b | Christian Brunner | return -EINVAL;
|
137 | f27aaf4b | Christian Brunner | } else if (l == 0) { |
138 | f27aaf4b | Christian Brunner | error_report("%s too short", name);
|
139 | f27aaf4b | Christian Brunner | return -EINVAL;
|
140 | f27aaf4b | Christian Brunner | } |
141 | f27aaf4b | Christian Brunner | |
142 | f27aaf4b | Christian Brunner | pstrcpy(dst, dst_len, src); |
143 | f27aaf4b | Christian Brunner | |
144 | f27aaf4b | Christian Brunner | return 0; |
145 | f27aaf4b | Christian Brunner | } |
146 | f27aaf4b | Christian Brunner | |
147 | 16a06b24 | Sage Weil | static void qemu_rbd_unescape(char *src) |
148 | 16a06b24 | Sage Weil | { |
149 | 16a06b24 | Sage Weil | char *p;
|
150 | 16a06b24 | Sage Weil | |
151 | 16a06b24 | Sage Weil | for (p = src; *src; ++src, ++p) {
|
152 | 16a06b24 | Sage Weil | if (*src == '\\' && src[1] != '\0') { |
153 | 16a06b24 | Sage Weil | src++; |
154 | 16a06b24 | Sage Weil | } |
155 | 16a06b24 | Sage Weil | *p = *src; |
156 | 16a06b24 | Sage Weil | } |
157 | 16a06b24 | Sage Weil | *p = '\0';
|
158 | 16a06b24 | Sage Weil | } |
159 | 16a06b24 | Sage Weil | |
160 | ad32e9c0 | Josh Durgin | static int qemu_rbd_parsename(const char *filename, |
161 | ad32e9c0 | Josh Durgin | char *pool, int pool_len, |
162 | ad32e9c0 | Josh Durgin | char *snap, int snap_len, |
163 | fab5cf59 | Josh Durgin | char *name, int name_len, |
164 | fab5cf59 | Josh Durgin | char *conf, int conf_len) |
165 | f27aaf4b | Christian Brunner | { |
166 | f27aaf4b | Christian Brunner | const char *start; |
167 | f27aaf4b | Christian Brunner | char *p, *buf;
|
168 | f27aaf4b | Christian Brunner | int ret;
|
169 | f27aaf4b | Christian Brunner | |
170 | f27aaf4b | Christian Brunner | if (!strstart(filename, "rbd:", &start)) { |
171 | f27aaf4b | Christian Brunner | return -EINVAL;
|
172 | f27aaf4b | Christian Brunner | } |
173 | f27aaf4b | Christian Brunner | |
174 | 7267c094 | Anthony Liguori | buf = g_strdup(start); |
175 | f27aaf4b | Christian Brunner | p = buf; |
176 | fab5cf59 | Josh Durgin | *snap = '\0';
|
177 | fab5cf59 | Josh Durgin | *conf = '\0';
|
178 | f27aaf4b | Christian Brunner | |
179 | ad32e9c0 | Josh Durgin | ret = qemu_rbd_next_tok(pool, pool_len, p, '/', "pool name", &p); |
180 | f27aaf4b | Christian Brunner | if (ret < 0 || !p) { |
181 | f27aaf4b | Christian Brunner | ret = -EINVAL; |
182 | f27aaf4b | Christian Brunner | goto done;
|
183 | f27aaf4b | Christian Brunner | } |
184 | 16a06b24 | Sage Weil | qemu_rbd_unescape(pool); |
185 | fab5cf59 | Josh Durgin | |
186 | fab5cf59 | Josh Durgin | if (strchr(p, '@')) { |
187 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(name, name_len, p, '@', "object name", &p); |
188 | fab5cf59 | Josh Durgin | if (ret < 0) { |
189 | fab5cf59 | Josh Durgin | goto done;
|
190 | fab5cf59 | Josh Durgin | } |
191 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(snap, snap_len, p, ':', "snap name", &p); |
192 | 16a06b24 | Sage Weil | qemu_rbd_unescape(snap); |
193 | fab5cf59 | Josh Durgin | } else {
|
194 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(name, name_len, p, ':', "object name", &p); |
195 | f27aaf4b | Christian Brunner | } |
196 | 16a06b24 | Sage Weil | qemu_rbd_unescape(name); |
197 | fab5cf59 | Josh Durgin | if (ret < 0 || !p) { |
198 | f27aaf4b | Christian Brunner | goto done;
|
199 | f27aaf4b | Christian Brunner | } |
200 | f27aaf4b | Christian Brunner | |
201 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(conf, conf_len, p, '\0', "configuration", &p); |
202 | f27aaf4b | Christian Brunner | |
203 | f27aaf4b | Christian Brunner | done:
|
204 | 7267c094 | Anthony Liguori | g_free(buf); |
205 | f27aaf4b | Christian Brunner | return ret;
|
206 | f27aaf4b | Christian Brunner | } |
207 | f27aaf4b | Christian Brunner | |
208 | 7c7e9df0 | Sage Weil | static char *qemu_rbd_parse_clientname(const char *conf, char *clientname) |
209 | 7c7e9df0 | Sage Weil | { |
210 | 7c7e9df0 | Sage Weil | const char *p = conf; |
211 | 7c7e9df0 | Sage Weil | |
212 | 7c7e9df0 | Sage Weil | while (*p) {
|
213 | 7c7e9df0 | Sage Weil | int len;
|
214 | 7c7e9df0 | Sage Weil | const char *end = strchr(p, ':'); |
215 | 7c7e9df0 | Sage Weil | |
216 | 7c7e9df0 | Sage Weil | if (end) {
|
217 | 7c7e9df0 | Sage Weil | len = end - p; |
218 | 7c7e9df0 | Sage Weil | } else {
|
219 | 7c7e9df0 | Sage Weil | len = strlen(p); |
220 | 7c7e9df0 | Sage Weil | } |
221 | 7c7e9df0 | Sage Weil | |
222 | 7c7e9df0 | Sage Weil | if (strncmp(p, "id=", 3) == 0) { |
223 | 7c7e9df0 | Sage Weil | len -= 3;
|
224 | 7c7e9df0 | Sage Weil | strncpy(clientname, p + 3, len);
|
225 | 7c7e9df0 | Sage Weil | clientname[len] = '\0';
|
226 | 7c7e9df0 | Sage Weil | return clientname;
|
227 | 7c7e9df0 | Sage Weil | } |
228 | 7c7e9df0 | Sage Weil | if (end == NULL) { |
229 | 7c7e9df0 | Sage Weil | break;
|
230 | 7c7e9df0 | Sage Weil | } |
231 | 7c7e9df0 | Sage Weil | p = end + 1;
|
232 | 7c7e9df0 | Sage Weil | } |
233 | 7c7e9df0 | Sage Weil | return NULL; |
234 | 7c7e9df0 | Sage Weil | } |
235 | 7c7e9df0 | Sage Weil | |
236 | fab5cf59 | Josh Durgin | static int qemu_rbd_set_conf(rados_t cluster, const char *conf) |
237 | fab5cf59 | Josh Durgin | { |
238 | fab5cf59 | Josh Durgin | char *p, *buf;
|
239 | fab5cf59 | Josh Durgin | char name[RBD_MAX_CONF_NAME_SIZE];
|
240 | fab5cf59 | Josh Durgin | char value[RBD_MAX_CONF_VAL_SIZE];
|
241 | fab5cf59 | Josh Durgin | int ret = 0; |
242 | fab5cf59 | Josh Durgin | |
243 | 7267c094 | Anthony Liguori | buf = g_strdup(conf); |
244 | fab5cf59 | Josh Durgin | p = buf; |
245 | fab5cf59 | Josh Durgin | |
246 | fab5cf59 | Josh Durgin | while (p) {
|
247 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(name, sizeof(name), p,
|
248 | fab5cf59 | Josh Durgin | '=', "conf option name", &p); |
249 | fab5cf59 | Josh Durgin | if (ret < 0) { |
250 | fab5cf59 | Josh Durgin | break;
|
251 | fab5cf59 | Josh Durgin | } |
252 | 16a06b24 | Sage Weil | qemu_rbd_unescape(name); |
253 | fab5cf59 | Josh Durgin | |
254 | fab5cf59 | Josh Durgin | if (!p) {
|
255 | fab5cf59 | Josh Durgin | error_report("conf option %s has no value", name);
|
256 | fab5cf59 | Josh Durgin | ret = -EINVAL; |
257 | fab5cf59 | Josh Durgin | break;
|
258 | fab5cf59 | Josh Durgin | } |
259 | fab5cf59 | Josh Durgin | |
260 | fab5cf59 | Josh Durgin | ret = qemu_rbd_next_tok(value, sizeof(value), p,
|
261 | fab5cf59 | Josh Durgin | ':', "conf option value", &p); |
262 | fab5cf59 | Josh Durgin | if (ret < 0) { |
263 | fab5cf59 | Josh Durgin | break;
|
264 | fab5cf59 | Josh Durgin | } |
265 | 16a06b24 | Sage Weil | qemu_rbd_unescape(value); |
266 | fab5cf59 | Josh Durgin | |
267 | 7c7e9df0 | Sage Weil | if (strcmp(name, "conf") == 0) { |
268 | 7c7e9df0 | Sage Weil | ret = rados_conf_read_file(cluster, value); |
269 | fab5cf59 | Josh Durgin | if (ret < 0) { |
270 | 7c7e9df0 | Sage Weil | error_report("error reading conf file %s", value);
|
271 | fab5cf59 | Josh Durgin | break;
|
272 | fab5cf59 | Josh Durgin | } |
273 | 7c7e9df0 | Sage Weil | } else if (strcmp(name, "id") == 0) { |
274 | 7c7e9df0 | Sage Weil | /* ignore, this is parsed by qemu_rbd_parse_clientname() */
|
275 | fab5cf59 | Josh Durgin | } else {
|
276 | 7c7e9df0 | Sage Weil | ret = rados_conf_set(cluster, name, value); |
277 | fab5cf59 | Josh Durgin | if (ret < 0) { |
278 | 7c7e9df0 | Sage Weil | error_report("invalid conf option %s", name);
|
279 | 7c7e9df0 | Sage Weil | ret = -EINVAL; |
280 | fab5cf59 | Josh Durgin | break;
|
281 | fab5cf59 | Josh Durgin | } |
282 | fab5cf59 | Josh Durgin | } |
283 | fab5cf59 | Josh Durgin | } |
284 | fab5cf59 | Josh Durgin | |
285 | 7267c094 | Anthony Liguori | g_free(buf); |
286 | fab5cf59 | Josh Durgin | return ret;
|
287 | fab5cf59 | Josh Durgin | } |
288 | fab5cf59 | Josh Durgin | |
289 | ad32e9c0 | Josh Durgin | static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options) |
290 | f27aaf4b | Christian Brunner | { |
291 | f27aaf4b | Christian Brunner | int64_t bytes = 0;
|
292 | f27aaf4b | Christian Brunner | int64_t objsize; |
293 | ad32e9c0 | Josh Durgin | int obj_order = 0; |
294 | ad32e9c0 | Josh Durgin | char pool[RBD_MAX_POOL_NAME_SIZE];
|
295 | ad32e9c0 | Josh Durgin | char name[RBD_MAX_IMAGE_NAME_SIZE];
|
296 | ad32e9c0 | Josh Durgin | char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
|
297 | fab5cf59 | Josh Durgin | char conf[RBD_MAX_CONF_SIZE];
|
298 | 7c7e9df0 | Sage Weil | char clientname_buf[RBD_MAX_CONF_SIZE];
|
299 | 7c7e9df0 | Sage Weil | char *clientname;
|
300 | ad32e9c0 | Josh Durgin | rados_t cluster; |
301 | ad32e9c0 | Josh Durgin | rados_ioctx_t io_ctx; |
302 | f27aaf4b | Christian Brunner | int ret;
|
303 | f27aaf4b | Christian Brunner | |
304 | ad32e9c0 | Josh Durgin | if (qemu_rbd_parsename(filename, pool, sizeof(pool), |
305 | ad32e9c0 | Josh Durgin | snap_buf, sizeof(snap_buf),
|
306 | fab5cf59 | Josh Durgin | name, sizeof(name),
|
307 | fab5cf59 | Josh Durgin | conf, sizeof(conf)) < 0) { |
308 | f27aaf4b | Christian Brunner | return -EINVAL;
|
309 | f27aaf4b | Christian Brunner | } |
310 | f27aaf4b | Christian Brunner | |
311 | f27aaf4b | Christian Brunner | /* Read out options */
|
312 | f27aaf4b | Christian Brunner | while (options && options->name) {
|
313 | f27aaf4b | Christian Brunner | if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
|
314 | f27aaf4b | Christian Brunner | bytes = options->value.n; |
315 | f27aaf4b | Christian Brunner | } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { |
316 | f27aaf4b | Christian Brunner | if (options->value.n) {
|
317 | f27aaf4b | Christian Brunner | objsize = options->value.n; |
318 | f27aaf4b | Christian Brunner | if ((objsize - 1) & objsize) { /* not a power of 2? */ |
319 | f27aaf4b | Christian Brunner | error_report("obj size needs to be power of 2");
|
320 | f27aaf4b | Christian Brunner | return -EINVAL;
|
321 | f27aaf4b | Christian Brunner | } |
322 | f27aaf4b | Christian Brunner | if (objsize < 4096) { |
323 | f27aaf4b | Christian Brunner | error_report("obj size too small");
|
324 | f27aaf4b | Christian Brunner | return -EINVAL;
|
325 | f27aaf4b | Christian Brunner | } |
326 | ad32e9c0 | Josh Durgin | obj_order = ffs(objsize) - 1;
|
327 | f27aaf4b | Christian Brunner | } |
328 | f27aaf4b | Christian Brunner | } |
329 | f27aaf4b | Christian Brunner | options++; |
330 | f27aaf4b | Christian Brunner | } |
331 | f27aaf4b | Christian Brunner | |
332 | 7c7e9df0 | Sage Weil | clientname = qemu_rbd_parse_clientname(conf, clientname_buf); |
333 | 7c7e9df0 | Sage Weil | if (rados_create(&cluster, clientname) < 0) { |
334 | f27aaf4b | Christian Brunner | error_report("error initializing");
|
335 | f27aaf4b | Christian Brunner | return -EIO;
|
336 | f27aaf4b | Christian Brunner | } |
337 | f27aaf4b | Christian Brunner | |
338 | fab5cf59 | Josh Durgin | if (strstr(conf, "conf=") == NULL) { |
339 | f9fe18ec | Sage Weil | /* try default location, but ignore failure */
|
340 | f9fe18ec | Sage Weil | rados_conf_read_file(cluster, NULL);
|
341 | fab5cf59 | Josh Durgin | } |
342 | fab5cf59 | Josh Durgin | |
343 | fab5cf59 | Josh Durgin | if (conf[0] != '\0' && |
344 | fab5cf59 | Josh Durgin | qemu_rbd_set_conf(cluster, conf) < 0) {
|
345 | fab5cf59 | Josh Durgin | error_report("error setting config options");
|
346 | ad32e9c0 | Josh Durgin | rados_shutdown(cluster); |
347 | f27aaf4b | Christian Brunner | return -EIO;
|
348 | f27aaf4b | Christian Brunner | } |
349 | f27aaf4b | Christian Brunner | |
350 | ad32e9c0 | Josh Durgin | if (rados_connect(cluster) < 0) { |
351 | ad32e9c0 | Josh Durgin | error_report("error connecting");
|
352 | ad32e9c0 | Josh Durgin | rados_shutdown(cluster); |
353 | f27aaf4b | Christian Brunner | return -EIO;
|
354 | f27aaf4b | Christian Brunner | } |
355 | f27aaf4b | Christian Brunner | |
356 | ad32e9c0 | Josh Durgin | if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) { |
357 | ad32e9c0 | Josh Durgin | error_report("error opening pool %s", pool);
|
358 | ad32e9c0 | Josh Durgin | rados_shutdown(cluster); |
359 | ad32e9c0 | Josh Durgin | return -EIO;
|
360 | f27aaf4b | Christian Brunner | } |
361 | f27aaf4b | Christian Brunner | |
362 | ad32e9c0 | Josh Durgin | ret = rbd_create(io_ctx, name, bytes, &obj_order); |
363 | ad32e9c0 | Josh Durgin | rados_ioctx_destroy(io_ctx); |
364 | ad32e9c0 | Josh Durgin | rados_shutdown(cluster); |
365 | f27aaf4b | Christian Brunner | |
366 | f27aaf4b | Christian Brunner | return ret;
|
367 | f27aaf4b | Christian Brunner | } |
368 | f27aaf4b | Christian Brunner | |
369 | f27aaf4b | Christian Brunner | /*
|
370 | ad32e9c0 | Josh Durgin | * This aio completion is being called from qemu_rbd_aio_event_reader()
|
371 | ad32e9c0 | Josh Durgin | * and runs in qemu context. It schedules a bh, but just in case the aio
|
372 | f27aaf4b | Christian Brunner | * was not cancelled before.
|
373 | f27aaf4b | Christian Brunner | */
|
374 | ad32e9c0 | Josh Durgin | static void qemu_rbd_complete_aio(RADOSCB *rcb) |
375 | f27aaf4b | Christian Brunner | { |
376 | f27aaf4b | Christian Brunner | RBDAIOCB *acb = rcb->acb; |
377 | f27aaf4b | Christian Brunner | int64_t r; |
378 | f27aaf4b | Christian Brunner | |
379 | f27aaf4b | Christian Brunner | if (acb->cancelled) {
|
380 | ad32e9c0 | Josh Durgin | qemu_vfree(acb->bounce); |
381 | ad32e9c0 | Josh Durgin | qemu_aio_release(acb); |
382 | f27aaf4b | Christian Brunner | goto done;
|
383 | f27aaf4b | Christian Brunner | } |
384 | f27aaf4b | Christian Brunner | |
385 | f27aaf4b | Christian Brunner | r = rcb->ret; |
386 | f27aaf4b | Christian Brunner | |
387 | 787f3133 | Josh Durgin | if (acb->cmd == RBD_AIO_WRITE ||
|
388 | 787f3133 | Josh Durgin | acb->cmd == RBD_AIO_DISCARD) { |
389 | f27aaf4b | Christian Brunner | if (r < 0) { |
390 | f27aaf4b | Christian Brunner | acb->ret = r; |
391 | f27aaf4b | Christian Brunner | acb->error = 1;
|
392 | f27aaf4b | Christian Brunner | } else if (!acb->error) { |
393 | ad32e9c0 | Josh Durgin | acb->ret = rcb->size; |
394 | f27aaf4b | Christian Brunner | } |
395 | f27aaf4b | Christian Brunner | } else {
|
396 | ad32e9c0 | Josh Durgin | if (r < 0) { |
397 | ad32e9c0 | Josh Durgin | memset(rcb->buf, 0, rcb->size);
|
398 | f27aaf4b | Christian Brunner | acb->ret = r; |
399 | f27aaf4b | Christian Brunner | acb->error = 1;
|
400 | ad32e9c0 | Josh Durgin | } else if (r < rcb->size) { |
401 | ad32e9c0 | Josh Durgin | memset(rcb->buf + r, 0, rcb->size - r);
|
402 | f27aaf4b | Christian Brunner | if (!acb->error) {
|
403 | ad32e9c0 | Josh Durgin | acb->ret = rcb->size; |
404 | f27aaf4b | Christian Brunner | } |
405 | f27aaf4b | Christian Brunner | } else if (!acb->error) { |
406 | ad32e9c0 | Josh Durgin | acb->ret = r; |
407 | f27aaf4b | Christian Brunner | } |
408 | f27aaf4b | Christian Brunner | } |
409 | f27aaf4b | Christian Brunner | /* Note that acb->bh can be NULL in case where the aio was cancelled */
|
410 | ad32e9c0 | Josh Durgin | acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb); |
411 | ad32e9c0 | Josh Durgin | qemu_bh_schedule(acb->bh); |
412 | f27aaf4b | Christian Brunner | done:
|
413 | 7267c094 | Anthony Liguori | g_free(rcb); |
414 | f27aaf4b | Christian Brunner | } |
415 | f27aaf4b | Christian Brunner | |
416 | f27aaf4b | Christian Brunner | /*
|
417 | f27aaf4b | Christian Brunner | * aio fd read handler. It runs in the qemu context and calls the
|
418 | f27aaf4b | Christian Brunner | * completion handling of completed rados aio operations.
|
419 | f27aaf4b | Christian Brunner | */
|
420 | ad32e9c0 | Josh Durgin | static void qemu_rbd_aio_event_reader(void *opaque) |
421 | f27aaf4b | Christian Brunner | { |
422 | f27aaf4b | Christian Brunner | BDRVRBDState *s = opaque; |
423 | f27aaf4b | Christian Brunner | |
424 | f27aaf4b | Christian Brunner | ssize_t ret; |
425 | f27aaf4b | Christian Brunner | |
426 | f27aaf4b | Christian Brunner | do {
|
427 | f27aaf4b | Christian Brunner | char *p = (char *)&s->event_rcb; |
428 | f27aaf4b | Christian Brunner | |
429 | f27aaf4b | Christian Brunner | /* now read the rcb pointer that was sent from a non qemu thread */
|
430 | dfe80b07 | Sage Weil | ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos, |
431 | dfe80b07 | Sage Weil | sizeof(s->event_rcb) - s->event_reader_pos);
|
432 | dfe80b07 | Sage Weil | if (ret > 0) { |
433 | dfe80b07 | Sage Weil | s->event_reader_pos += ret; |
434 | dfe80b07 | Sage Weil | if (s->event_reader_pos == sizeof(s->event_rcb)) { |
435 | dfe80b07 | Sage Weil | s->event_reader_pos = 0;
|
436 | dfe80b07 | Sage Weil | qemu_rbd_complete_aio(s->event_rcb); |
437 | dfe80b07 | Sage Weil | s->qemu_aio_count--; |
438 | f27aaf4b | Christian Brunner | } |
439 | f27aaf4b | Christian Brunner | } |
440 | f27aaf4b | Christian Brunner | } while (ret < 0 && errno == EINTR); |
441 | f27aaf4b | Christian Brunner | } |
442 | f27aaf4b | Christian Brunner | |
443 | ad32e9c0 | Josh Durgin | static int qemu_rbd_aio_flush_cb(void *opaque) |
444 | f27aaf4b | Christian Brunner | { |
445 | f27aaf4b | Christian Brunner | BDRVRBDState *s = opaque; |
446 | f27aaf4b | Christian Brunner | |
447 | f27aaf4b | Christian Brunner | return (s->qemu_aio_count > 0); |
448 | f27aaf4b | Christian Brunner | } |
449 | f27aaf4b | Christian Brunner | |
450 | ad32e9c0 | Josh Durgin | static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags) |
451 | f27aaf4b | Christian Brunner | { |
452 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
453 | ad32e9c0 | Josh Durgin | char pool[RBD_MAX_POOL_NAME_SIZE];
|
454 | ad32e9c0 | Josh Durgin | char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
|
455 | fab5cf59 | Josh Durgin | char conf[RBD_MAX_CONF_SIZE];
|
456 | 7c7e9df0 | Sage Weil | char clientname_buf[RBD_MAX_CONF_SIZE];
|
457 | 7c7e9df0 | Sage Weil | char *clientname;
|
458 | f27aaf4b | Christian Brunner | int r;
|
459 | f27aaf4b | Christian Brunner | |
460 | ad32e9c0 | Josh Durgin | if (qemu_rbd_parsename(filename, pool, sizeof(pool), |
461 | ad32e9c0 | Josh Durgin | snap_buf, sizeof(snap_buf),
|
462 | fab5cf59 | Josh Durgin | s->name, sizeof(s->name),
|
463 | fab5cf59 | Josh Durgin | conf, sizeof(conf)) < 0) { |
464 | f27aaf4b | Christian Brunner | return -EINVAL;
|
465 | f27aaf4b | Christian Brunner | } |
466 | f27aaf4b | Christian Brunner | |
467 | 7c7e9df0 | Sage Weil | clientname = qemu_rbd_parse_clientname(conf, clientname_buf); |
468 | 7c7e9df0 | Sage Weil | r = rados_create(&s->cluster, clientname); |
469 | ad32e9c0 | Josh Durgin | if (r < 0) { |
470 | f27aaf4b | Christian Brunner | error_report("error initializing");
|
471 | f27aaf4b | Christian Brunner | return r;
|
472 | f27aaf4b | Christian Brunner | } |
473 | f27aaf4b | Christian Brunner | |
474 | eb93d5d9 | Sage Weil | s->snap = NULL;
|
475 | eb93d5d9 | Sage Weil | if (snap_buf[0] != '\0') { |
476 | eb93d5d9 | Sage Weil | s->snap = g_strdup(snap_buf); |
477 | eb93d5d9 | Sage Weil | } |
478 | eb93d5d9 | Sage Weil | |
479 | b11f38fc | Josh Durgin | /*
|
480 | b11f38fc | Josh Durgin | * Fallback to more conservative semantics if setting cache
|
481 | b11f38fc | Josh Durgin | * options fails. Ignore errors from setting rbd_cache because the
|
482 | b11f38fc | Josh Durgin | * only possible error is that the option does not exist, and
|
483 | b11f38fc | Josh Durgin | * librbd defaults to no caching. If write through caching cannot
|
484 | b11f38fc | Josh Durgin | * be set up, fall back to no caching.
|
485 | b11f38fc | Josh Durgin | */
|
486 | b11f38fc | Josh Durgin | if (flags & BDRV_O_NOCACHE) {
|
487 | b11f38fc | Josh Durgin | rados_conf_set(s->cluster, "rbd_cache", "false"); |
488 | b11f38fc | Josh Durgin | } else {
|
489 | b11f38fc | Josh Durgin | rados_conf_set(s->cluster, "rbd_cache", "true"); |
490 | b11f38fc | Josh Durgin | } |
491 | b11f38fc | Josh Durgin | |
492 | fab5cf59 | Josh Durgin | if (strstr(conf, "conf=") == NULL) { |
493 | f9fe18ec | Sage Weil | /* try default location, but ignore failure */
|
494 | f9fe18ec | Sage Weil | rados_conf_read_file(s->cluster, NULL);
|
495 | fab5cf59 | Josh Durgin | } |
496 | fab5cf59 | Josh Durgin | |
497 | fab5cf59 | Josh Durgin | if (conf[0] != '\0') { |
498 | fab5cf59 | Josh Durgin | r = qemu_rbd_set_conf(s->cluster, conf); |
499 | fab5cf59 | Josh Durgin | if (r < 0) { |
500 | fab5cf59 | Josh Durgin | error_report("error setting config options");
|
501 | eb93d5d9 | Sage Weil | goto failed_shutdown;
|
502 | fab5cf59 | Josh Durgin | } |
503 | f27aaf4b | Christian Brunner | } |
504 | f27aaf4b | Christian Brunner | |
505 | ad32e9c0 | Josh Durgin | r = rados_connect(s->cluster); |
506 | ad32e9c0 | Josh Durgin | if (r < 0) { |
507 | ad32e9c0 | Josh Durgin | error_report("error connecting");
|
508 | eb93d5d9 | Sage Weil | goto failed_shutdown;
|
509 | f27aaf4b | Christian Brunner | } |
510 | f27aaf4b | Christian Brunner | |
511 | ad32e9c0 | Josh Durgin | r = rados_ioctx_create(s->cluster, pool, &s->io_ctx); |
512 | ad32e9c0 | Josh Durgin | if (r < 0) { |
513 | ad32e9c0 | Josh Durgin | error_report("error opening pool %s", pool);
|
514 | eb93d5d9 | Sage Weil | goto failed_shutdown;
|
515 | f27aaf4b | Christian Brunner | } |
516 | f27aaf4b | Christian Brunner | |
517 | ad32e9c0 | Josh Durgin | r = rbd_open(s->io_ctx, s->name, &s->image, s->snap); |
518 | f27aaf4b | Christian Brunner | if (r < 0) { |
519 | ad32e9c0 | Josh Durgin | error_report("error reading header from %s", s->name);
|
520 | eb93d5d9 | Sage Weil | goto failed_open;
|
521 | f27aaf4b | Christian Brunner | } |
522 | f27aaf4b | Christian Brunner | |
523 | ad32e9c0 | Josh Durgin | bs->read_only = (s->snap != NULL);
|
524 | f27aaf4b | Christian Brunner | |
525 | f27aaf4b | Christian Brunner | s->event_reader_pos = 0;
|
526 | f27aaf4b | Christian Brunner | r = qemu_pipe(s->fds); |
527 | f27aaf4b | Christian Brunner | if (r < 0) { |
528 | f27aaf4b | Christian Brunner | error_report("error opening eventfd");
|
529 | f27aaf4b | Christian Brunner | goto failed;
|
530 | f27aaf4b | Christian Brunner | } |
531 | f27aaf4b | Christian Brunner | fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
|
532 | f27aaf4b | Christian Brunner | fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
|
533 | ad32e9c0 | Josh Durgin | qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader, |
534 | bafbd6a1 | Paolo Bonzini | NULL, qemu_rbd_aio_flush_cb, s);
|
535 | f27aaf4b | Christian Brunner | |
536 | f27aaf4b | Christian Brunner | |
537 | f27aaf4b | Christian Brunner | return 0; |
538 | f27aaf4b | Christian Brunner | |
539 | f27aaf4b | Christian Brunner | failed:
|
540 | ad32e9c0 | Josh Durgin | rbd_close(s->image); |
541 | eb93d5d9 | Sage Weil | failed_open:
|
542 | ad32e9c0 | Josh Durgin | rados_ioctx_destroy(s->io_ctx); |
543 | eb93d5d9 | Sage Weil | failed_shutdown:
|
544 | ad32e9c0 | Josh Durgin | rados_shutdown(s->cluster); |
545 | eb93d5d9 | Sage Weil | g_free(s->snap); |
546 | f27aaf4b | Christian Brunner | return r;
|
547 | f27aaf4b | Christian Brunner | } |
548 | f27aaf4b | Christian Brunner | |
549 | ad32e9c0 | Josh Durgin | static void qemu_rbd_close(BlockDriverState *bs) |
550 | f27aaf4b | Christian Brunner | { |
551 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
552 | f27aaf4b | Christian Brunner | |
553 | f27aaf4b | Christian Brunner | close(s->fds[0]);
|
554 | f27aaf4b | Christian Brunner | close(s->fds[1]);
|
555 | bafbd6a1 | Paolo Bonzini | qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL, NULL); |
556 | f27aaf4b | Christian Brunner | |
557 | ad32e9c0 | Josh Durgin | rbd_close(s->image); |
558 | ad32e9c0 | Josh Durgin | rados_ioctx_destroy(s->io_ctx); |
559 | 7267c094 | Anthony Liguori | g_free(s->snap); |
560 | ad32e9c0 | Josh Durgin | rados_shutdown(s->cluster); |
561 | f27aaf4b | Christian Brunner | } |
562 | f27aaf4b | Christian Brunner | |
563 | f27aaf4b | Christian Brunner | /*
|
564 | f27aaf4b | Christian Brunner | * Cancel aio. Since we don't reference acb in a non qemu threads,
|
565 | f27aaf4b | Christian Brunner | * it is safe to access it here.
|
566 | f27aaf4b | Christian Brunner | */
|
567 | ad32e9c0 | Josh Durgin | static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb) |
568 | f27aaf4b | Christian Brunner | { |
569 | f27aaf4b | Christian Brunner | RBDAIOCB *acb = (RBDAIOCB *) blockacb; |
570 | f27aaf4b | Christian Brunner | acb->cancelled = 1;
|
571 | f27aaf4b | Christian Brunner | } |
572 | f27aaf4b | Christian Brunner | |
573 | f27aaf4b | Christian Brunner | static AIOPool rbd_aio_pool = {
|
574 | f27aaf4b | Christian Brunner | .aiocb_size = sizeof(RBDAIOCB),
|
575 | ad32e9c0 | Josh Durgin | .cancel = qemu_rbd_aio_cancel, |
576 | f27aaf4b | Christian Brunner | }; |
577 | f27aaf4b | Christian Brunner | |
578 | ad32e9c0 | Josh Durgin | static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb) |
579 | f27aaf4b | Christian Brunner | { |
580 | ad32e9c0 | Josh Durgin | int ret = 0; |
581 | f27aaf4b | Christian Brunner | while (1) { |
582 | f27aaf4b | Christian Brunner | fd_set wfd; |
583 | ad32e9c0 | Josh Durgin | int fd = s->fds[RBD_FD_WRITE];
|
584 | f27aaf4b | Christian Brunner | |
585 | ad32e9c0 | Josh Durgin | /* send the op pointer to the qemu thread that is responsible
|
586 | ad32e9c0 | Josh Durgin | for the aio/op completion. Must do it in a qemu thread context */
|
587 | f27aaf4b | Christian Brunner | ret = write(fd, (void *)&rcb, sizeof(rcb)); |
588 | f27aaf4b | Christian Brunner | if (ret >= 0) { |
589 | f27aaf4b | Christian Brunner | break;
|
590 | f27aaf4b | Christian Brunner | } |
591 | f27aaf4b | Christian Brunner | if (errno == EINTR) {
|
592 | f27aaf4b | Christian Brunner | continue;
|
593 | ad32e9c0 | Josh Durgin | } |
594 | f27aaf4b | Christian Brunner | if (errno != EAGAIN) {
|
595 | f27aaf4b | Christian Brunner | break;
|
596 | ad32e9c0 | Josh Durgin | } |
597 | f27aaf4b | Christian Brunner | |
598 | f27aaf4b | Christian Brunner | FD_ZERO(&wfd); |
599 | f27aaf4b | Christian Brunner | FD_SET(fd, &wfd); |
600 | f27aaf4b | Christian Brunner | do {
|
601 | f27aaf4b | Christian Brunner | ret = select(fd + 1, NULL, &wfd, NULL, NULL); |
602 | f27aaf4b | Christian Brunner | } while (ret < 0 && errno == EINTR); |
603 | f27aaf4b | Christian Brunner | } |
604 | f27aaf4b | Christian Brunner | |
605 | ad32e9c0 | Josh Durgin | return ret;
|
606 | ad32e9c0 | Josh Durgin | } |
607 | ad32e9c0 | Josh Durgin | |
608 | ad32e9c0 | Josh Durgin | /*
|
609 | ad32e9c0 | Josh Durgin | * This is the callback function for rbd_aio_read and _write
|
610 | ad32e9c0 | Josh Durgin | *
|
611 | ad32e9c0 | Josh Durgin | * Note: this function is being called from a non qemu thread so
|
612 | ad32e9c0 | Josh Durgin | * we need to be careful about what we do here. Generally we only
|
613 | ad32e9c0 | Josh Durgin | * write to the block notification pipe, and do the rest of the
|
614 | ad32e9c0 | Josh Durgin | * io completion handling from qemu_rbd_aio_event_reader() which
|
615 | ad32e9c0 | Josh Durgin | * runs in a qemu context.
|
616 | ad32e9c0 | Josh Durgin | */
|
617 | ad32e9c0 | Josh Durgin | static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb) |
618 | ad32e9c0 | Josh Durgin | { |
619 | ad32e9c0 | Josh Durgin | int ret;
|
620 | ad32e9c0 | Josh Durgin | rcb->ret = rbd_aio_get_return_value(c); |
621 | ad32e9c0 | Josh Durgin | rbd_aio_release(c); |
622 | ad32e9c0 | Josh Durgin | ret = qemu_rbd_send_pipe(rcb->s, rcb); |
623 | f27aaf4b | Christian Brunner | if (ret < 0) { |
624 | ad32e9c0 | Josh Durgin | error_report("failed writing to acb->s->fds");
|
625 | 7267c094 | Anthony Liguori | g_free(rcb); |
626 | f27aaf4b | Christian Brunner | } |
627 | f27aaf4b | Christian Brunner | } |
628 | f27aaf4b | Christian Brunner | |
629 | ad32e9c0 | Josh Durgin | /* Callback when all queued rbd_aio requests are complete */
|
630 | f27aaf4b | Christian Brunner | |
631 | f27aaf4b | Christian Brunner | static void rbd_aio_bh_cb(void *opaque) |
632 | f27aaf4b | Christian Brunner | { |
633 | f27aaf4b | Christian Brunner | RBDAIOCB *acb = opaque; |
634 | f27aaf4b | Christian Brunner | |
635 | 787f3133 | Josh Durgin | if (acb->cmd == RBD_AIO_READ) {
|
636 | 03396148 | Michael Tokarev | qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
|
637 | f27aaf4b | Christian Brunner | } |
638 | f27aaf4b | Christian Brunner | qemu_vfree(acb->bounce); |
639 | f27aaf4b | Christian Brunner | acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); |
640 | f27aaf4b | Christian Brunner | qemu_bh_delete(acb->bh); |
641 | f27aaf4b | Christian Brunner | acb->bh = NULL;
|
642 | f27aaf4b | Christian Brunner | |
643 | f27aaf4b | Christian Brunner | qemu_aio_release(acb); |
644 | f27aaf4b | Christian Brunner | } |
645 | f27aaf4b | Christian Brunner | |
646 | 787f3133 | Josh Durgin | static int rbd_aio_discard_wrapper(rbd_image_t image, |
647 | 787f3133 | Josh Durgin | uint64_t off, |
648 | 787f3133 | Josh Durgin | uint64_t len, |
649 | 787f3133 | Josh Durgin | rbd_completion_t comp) |
650 | 787f3133 | Josh Durgin | { |
651 | 787f3133 | Josh Durgin | #ifdef LIBRBD_SUPPORTS_DISCARD
|
652 | 787f3133 | Josh Durgin | return rbd_aio_discard(image, off, len, comp);
|
653 | 787f3133 | Josh Durgin | #else
|
654 | 787f3133 | Josh Durgin | return -ENOTSUP;
|
655 | 787f3133 | Josh Durgin | #endif
|
656 | 787f3133 | Josh Durgin | } |
657 | 787f3133 | Josh Durgin | |
658 | 787f3133 | Josh Durgin | static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
|
659 | 787f3133 | Josh Durgin | int64_t sector_num, |
660 | 787f3133 | Josh Durgin | QEMUIOVector *qiov, |
661 | 787f3133 | Josh Durgin | int nb_sectors,
|
662 | 787f3133 | Josh Durgin | BlockDriverCompletionFunc *cb, |
663 | 787f3133 | Josh Durgin | void *opaque,
|
664 | 787f3133 | Josh Durgin | RBDAIOCmd cmd) |
665 | f27aaf4b | Christian Brunner | { |
666 | f27aaf4b | Christian Brunner | RBDAIOCB *acb; |
667 | f27aaf4b | Christian Brunner | RADOSCB *rcb; |
668 | ad32e9c0 | Josh Durgin | rbd_completion_t c; |
669 | f27aaf4b | Christian Brunner | int64_t off, size; |
670 | f27aaf4b | Christian Brunner | char *buf;
|
671 | 51a13528 | Josh Durgin | int r;
|
672 | f27aaf4b | Christian Brunner | |
673 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
674 | f27aaf4b | Christian Brunner | |
675 | f27aaf4b | Christian Brunner | acb = qemu_aio_get(&rbd_aio_pool, bs, cb, opaque); |
676 | 787f3133 | Josh Durgin | acb->cmd = cmd; |
677 | f27aaf4b | Christian Brunner | acb->qiov = qiov; |
678 | 787f3133 | Josh Durgin | if (cmd == RBD_AIO_DISCARD) {
|
679 | 787f3133 | Josh Durgin | acb->bounce = NULL;
|
680 | 787f3133 | Josh Durgin | } else {
|
681 | 787f3133 | Josh Durgin | acb->bounce = qemu_blockalign(bs, qiov->size); |
682 | 787f3133 | Josh Durgin | } |
683 | f27aaf4b | Christian Brunner | acb->ret = 0;
|
684 | f27aaf4b | Christian Brunner | acb->error = 0;
|
685 | f27aaf4b | Christian Brunner | acb->s = s; |
686 | f27aaf4b | Christian Brunner | acb->cancelled = 0;
|
687 | f27aaf4b | Christian Brunner | acb->bh = NULL;
|
688 | f27aaf4b | Christian Brunner | |
689 | 787f3133 | Josh Durgin | if (cmd == RBD_AIO_WRITE) {
|
690 | d5e6b161 | Michael Tokarev | qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
|
691 | f27aaf4b | Christian Brunner | } |
692 | f27aaf4b | Christian Brunner | |
693 | f27aaf4b | Christian Brunner | buf = acb->bounce; |
694 | f27aaf4b | Christian Brunner | |
695 | f27aaf4b | Christian Brunner | off = sector_num * BDRV_SECTOR_SIZE; |
696 | f27aaf4b | Christian Brunner | size = nb_sectors * BDRV_SECTOR_SIZE; |
697 | f27aaf4b | Christian Brunner | |
698 | ad32e9c0 | Josh Durgin | s->qemu_aio_count++; /* All the RADOSCB */
|
699 | f27aaf4b | Christian Brunner | |
700 | 7267c094 | Anthony Liguori | rcb = g_malloc(sizeof(RADOSCB));
|
701 | ad32e9c0 | Josh Durgin | rcb->done = 0;
|
702 | ad32e9c0 | Josh Durgin | rcb->acb = acb; |
703 | ad32e9c0 | Josh Durgin | rcb->buf = buf; |
704 | ad32e9c0 | Josh Durgin | rcb->s = acb->s; |
705 | ad32e9c0 | Josh Durgin | rcb->size = size; |
706 | 51a13528 | Josh Durgin | r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); |
707 | 51a13528 | Josh Durgin | if (r < 0) { |
708 | 51a13528 | Josh Durgin | goto failed;
|
709 | 51a13528 | Josh Durgin | } |
710 | f27aaf4b | Christian Brunner | |
711 | 787f3133 | Josh Durgin | switch (cmd) {
|
712 | 787f3133 | Josh Durgin | case RBD_AIO_WRITE:
|
713 | 51a13528 | Josh Durgin | r = rbd_aio_write(s->image, off, size, buf, c); |
714 | 787f3133 | Josh Durgin | break;
|
715 | 787f3133 | Josh Durgin | case RBD_AIO_READ:
|
716 | 51a13528 | Josh Durgin | r = rbd_aio_read(s->image, off, size, buf, c); |
717 | 787f3133 | Josh Durgin | break;
|
718 | 787f3133 | Josh Durgin | case RBD_AIO_DISCARD:
|
719 | 787f3133 | Josh Durgin | r = rbd_aio_discard_wrapper(s->image, off, size, c); |
720 | 787f3133 | Josh Durgin | break;
|
721 | 787f3133 | Josh Durgin | default:
|
722 | 787f3133 | Josh Durgin | r = -EINVAL; |
723 | 51a13528 | Josh Durgin | } |
724 | 51a13528 | Josh Durgin | |
725 | 51a13528 | Josh Durgin | if (r < 0) { |
726 | 51a13528 | Josh Durgin | goto failed;
|
727 | f27aaf4b | Christian Brunner | } |
728 | f27aaf4b | Christian Brunner | |
729 | f27aaf4b | Christian Brunner | return &acb->common;
|
730 | 51a13528 | Josh Durgin | |
731 | 51a13528 | Josh Durgin | failed:
|
732 | 7267c094 | Anthony Liguori | g_free(rcb); |
733 | 51a13528 | Josh Durgin | s->qemu_aio_count--; |
734 | 51a13528 | Josh Durgin | qemu_aio_release(acb); |
735 | 51a13528 | Josh Durgin | return NULL; |
736 | f27aaf4b | Christian Brunner | } |
737 | f27aaf4b | Christian Brunner | |
738 | ad32e9c0 | Josh Durgin | static BlockDriverAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
|
739 | ad32e9c0 | Josh Durgin | int64_t sector_num, |
740 | ad32e9c0 | Josh Durgin | QEMUIOVector *qiov, |
741 | ad32e9c0 | Josh Durgin | int nb_sectors,
|
742 | ad32e9c0 | Josh Durgin | BlockDriverCompletionFunc *cb, |
743 | ad32e9c0 | Josh Durgin | void *opaque)
|
744 | f27aaf4b | Christian Brunner | { |
745 | 787f3133 | Josh Durgin | return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
|
746 | 787f3133 | Josh Durgin | RBD_AIO_READ); |
747 | f27aaf4b | Christian Brunner | } |
748 | f27aaf4b | Christian Brunner | |
749 | ad32e9c0 | Josh Durgin | static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
|
750 | ad32e9c0 | Josh Durgin | int64_t sector_num, |
751 | ad32e9c0 | Josh Durgin | QEMUIOVector *qiov, |
752 | ad32e9c0 | Josh Durgin | int nb_sectors,
|
753 | ad32e9c0 | Josh Durgin | BlockDriverCompletionFunc *cb, |
754 | ad32e9c0 | Josh Durgin | void *opaque)
|
755 | f27aaf4b | Christian Brunner | { |
756 | 787f3133 | Josh Durgin | return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
|
757 | 787f3133 | Josh Durgin | RBD_AIO_WRITE); |
758 | f27aaf4b | Christian Brunner | } |
759 | f27aaf4b | Christian Brunner | |
760 | 8b94ff85 | Paolo Bonzini | static int qemu_rbd_co_flush(BlockDriverState *bs) |
761 | 7a3f5fe9 | Sage Weil | { |
762 | 7a3f5fe9 | Sage Weil | #if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1) |
763 | 7a3f5fe9 | Sage Weil | /* rbd_flush added in 0.1.1 */
|
764 | 7a3f5fe9 | Sage Weil | BDRVRBDState *s = bs->opaque; |
765 | 7a3f5fe9 | Sage Weil | return rbd_flush(s->image);
|
766 | 7a3f5fe9 | Sage Weil | #else
|
767 | 7a3f5fe9 | Sage Weil | return 0; |
768 | 7a3f5fe9 | Sage Weil | #endif
|
769 | 7a3f5fe9 | Sage Weil | } |
770 | 7a3f5fe9 | Sage Weil | |
771 | ad32e9c0 | Josh Durgin | static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi) |
772 | f27aaf4b | Christian Brunner | { |
773 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
774 | ad32e9c0 | Josh Durgin | rbd_image_info_t info; |
775 | ad32e9c0 | Josh Durgin | int r;
|
776 | ad32e9c0 | Josh Durgin | |
777 | ad32e9c0 | Josh Durgin | r = rbd_stat(s->image, &info, sizeof(info));
|
778 | ad32e9c0 | Josh Durgin | if (r < 0) { |
779 | ad32e9c0 | Josh Durgin | return r;
|
780 | ad32e9c0 | Josh Durgin | } |
781 | ad32e9c0 | Josh Durgin | |
782 | ad32e9c0 | Josh Durgin | bdi->cluster_size = info.obj_size; |
783 | f27aaf4b | Christian Brunner | return 0; |
784 | f27aaf4b | Christian Brunner | } |
785 | f27aaf4b | Christian Brunner | |
786 | ad32e9c0 | Josh Durgin | static int64_t qemu_rbd_getlength(BlockDriverState *bs)
|
787 | f27aaf4b | Christian Brunner | { |
788 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
789 | ad32e9c0 | Josh Durgin | rbd_image_info_t info; |
790 | ad32e9c0 | Josh Durgin | int r;
|
791 | f27aaf4b | Christian Brunner | |
792 | ad32e9c0 | Josh Durgin | r = rbd_stat(s->image, &info, sizeof(info));
|
793 | ad32e9c0 | Josh Durgin | if (r < 0) { |
794 | ad32e9c0 | Josh Durgin | return r;
|
795 | ad32e9c0 | Josh Durgin | } |
796 | ad32e9c0 | Josh Durgin | |
797 | ad32e9c0 | Josh Durgin | return info.size;
|
798 | f27aaf4b | Christian Brunner | } |
799 | f27aaf4b | Christian Brunner | |
800 | 30cdc48c | Josh Durgin | static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset) |
801 | 30cdc48c | Josh Durgin | { |
802 | 30cdc48c | Josh Durgin | BDRVRBDState *s = bs->opaque; |
803 | 30cdc48c | Josh Durgin | int r;
|
804 | 30cdc48c | Josh Durgin | |
805 | 30cdc48c | Josh Durgin | r = rbd_resize(s->image, offset); |
806 | 30cdc48c | Josh Durgin | if (r < 0) { |
807 | 30cdc48c | Josh Durgin | return r;
|
808 | 30cdc48c | Josh Durgin | } |
809 | 30cdc48c | Josh Durgin | |
810 | 30cdc48c | Josh Durgin | return 0; |
811 | 30cdc48c | Josh Durgin | } |
812 | 30cdc48c | Josh Durgin | |
813 | ad32e9c0 | Josh Durgin | static int qemu_rbd_snap_create(BlockDriverState *bs, |
814 | ad32e9c0 | Josh Durgin | QEMUSnapshotInfo *sn_info) |
815 | f27aaf4b | Christian Brunner | { |
816 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
817 | f27aaf4b | Christian Brunner | int r;
|
818 | f27aaf4b | Christian Brunner | |
819 | f27aaf4b | Christian Brunner | if (sn_info->name[0] == '\0') { |
820 | f27aaf4b | Christian Brunner | return -EINVAL; /* we need a name for rbd snapshots */ |
821 | f27aaf4b | Christian Brunner | } |
822 | f27aaf4b | Christian Brunner | |
823 | f27aaf4b | Christian Brunner | /*
|
824 | f27aaf4b | Christian Brunner | * rbd snapshots are using the name as the user controlled unique identifier
|
825 | f27aaf4b | Christian Brunner | * we can't use the rbd snapid for that purpose, as it can't be set
|
826 | f27aaf4b | Christian Brunner | */
|
827 | f27aaf4b | Christian Brunner | if (sn_info->id_str[0] != '\0' && |
828 | f27aaf4b | Christian Brunner | strcmp(sn_info->id_str, sn_info->name) != 0) {
|
829 | f27aaf4b | Christian Brunner | return -EINVAL;
|
830 | f27aaf4b | Christian Brunner | } |
831 | f27aaf4b | Christian Brunner | |
832 | f27aaf4b | Christian Brunner | if (strlen(sn_info->name) >= sizeof(sn_info->id_str)) { |
833 | f27aaf4b | Christian Brunner | return -ERANGE;
|
834 | f27aaf4b | Christian Brunner | } |
835 | f27aaf4b | Christian Brunner | |
836 | ad32e9c0 | Josh Durgin | r = rbd_snap_create(s->image, sn_info->name); |
837 | f27aaf4b | Christian Brunner | if (r < 0) { |
838 | ad32e9c0 | Josh Durgin | error_report("failed to create snap: %s", strerror(-r));
|
839 | f27aaf4b | Christian Brunner | return r;
|
840 | f27aaf4b | Christian Brunner | } |
841 | f27aaf4b | Christian Brunner | |
842 | f27aaf4b | Christian Brunner | return 0; |
843 | f27aaf4b | Christian Brunner | } |
844 | f27aaf4b | Christian Brunner | |
845 | bd603247 | Gregory Farnum | static int qemu_rbd_snap_remove(BlockDriverState *bs, |
846 | bd603247 | Gregory Farnum | const char *snapshot_name) |
847 | bd603247 | Gregory Farnum | { |
848 | bd603247 | Gregory Farnum | BDRVRBDState *s = bs->opaque; |
849 | bd603247 | Gregory Farnum | int r;
|
850 | bd603247 | Gregory Farnum | |
851 | bd603247 | Gregory Farnum | r = rbd_snap_remove(s->image, snapshot_name); |
852 | bd603247 | Gregory Farnum | return r;
|
853 | bd603247 | Gregory Farnum | } |
854 | bd603247 | Gregory Farnum | |
855 | bd603247 | Gregory Farnum | static int qemu_rbd_snap_rollback(BlockDriverState *bs, |
856 | bd603247 | Gregory Farnum | const char *snapshot_name) |
857 | bd603247 | Gregory Farnum | { |
858 | bd603247 | Gregory Farnum | BDRVRBDState *s = bs->opaque; |
859 | bd603247 | Gregory Farnum | int r;
|
860 | bd603247 | Gregory Farnum | |
861 | bd603247 | Gregory Farnum | r = rbd_snap_rollback(s->image, snapshot_name); |
862 | bd603247 | Gregory Farnum | return r;
|
863 | bd603247 | Gregory Farnum | } |
864 | bd603247 | Gregory Farnum | |
865 | ad32e9c0 | Josh Durgin | static int qemu_rbd_snap_list(BlockDriverState *bs, |
866 | ad32e9c0 | Josh Durgin | QEMUSnapshotInfo **psn_tab) |
867 | f27aaf4b | Christian Brunner | { |
868 | f27aaf4b | Christian Brunner | BDRVRBDState *s = bs->opaque; |
869 | f27aaf4b | Christian Brunner | QEMUSnapshotInfo *sn_info, *sn_tab = NULL;
|
870 | ad32e9c0 | Josh Durgin | int i, snap_count;
|
871 | ad32e9c0 | Josh Durgin | rbd_snap_info_t *snaps; |
872 | ad32e9c0 | Josh Durgin | int max_snaps = RBD_MAX_SNAPS;
|
873 | f27aaf4b | Christian Brunner | |
874 | ad32e9c0 | Josh Durgin | do {
|
875 | 7267c094 | Anthony Liguori | snaps = g_malloc(sizeof(*snaps) * max_snaps);
|
876 | ad32e9c0 | Josh Durgin | snap_count = rbd_snap_list(s->image, snaps, &max_snaps); |
877 | ad32e9c0 | Josh Durgin | if (snap_count < 0) { |
878 | 7267c094 | Anthony Liguori | g_free(snaps); |
879 | f27aaf4b | Christian Brunner | } |
880 | ad32e9c0 | Josh Durgin | } while (snap_count == -ERANGE);
|
881 | f27aaf4b | Christian Brunner | |
882 | ad32e9c0 | Josh Durgin | if (snap_count <= 0) { |
883 | b9c53290 | Josh Durgin | goto done;
|
884 | f27aaf4b | Christian Brunner | } |
885 | f27aaf4b | Christian Brunner | |
886 | 7267c094 | Anthony Liguori | sn_tab = g_malloc0(snap_count * sizeof(QEMUSnapshotInfo));
|
887 | f27aaf4b | Christian Brunner | |
888 | ad32e9c0 | Josh Durgin | for (i = 0; i < snap_count; i++) { |
889 | ad32e9c0 | Josh Durgin | const char *snap_name = snaps[i].name; |
890 | f27aaf4b | Christian Brunner | |
891 | f27aaf4b | Christian Brunner | sn_info = sn_tab + i; |
892 | f27aaf4b | Christian Brunner | pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), snap_name);
|
893 | f27aaf4b | Christian Brunner | pstrcpy(sn_info->name, sizeof(sn_info->name), snap_name);
|
894 | f27aaf4b | Christian Brunner | |
895 | ad32e9c0 | Josh Durgin | sn_info->vm_state_size = snaps[i].size; |
896 | f27aaf4b | Christian Brunner | sn_info->date_sec = 0;
|
897 | f27aaf4b | Christian Brunner | sn_info->date_nsec = 0;
|
898 | f27aaf4b | Christian Brunner | sn_info->vm_clock_nsec = 0;
|
899 | f27aaf4b | Christian Brunner | } |
900 | ad32e9c0 | Josh Durgin | rbd_snap_list_end(snaps); |
901 | ad32e9c0 | Josh Durgin | |
902 | b9c53290 | Josh Durgin | done:
|
903 | f27aaf4b | Christian Brunner | *psn_tab = sn_tab; |
904 | f27aaf4b | Christian Brunner | return snap_count;
|
905 | f27aaf4b | Christian Brunner | } |
906 | f27aaf4b | Christian Brunner | |
907 | 787f3133 | Josh Durgin | #ifdef LIBRBD_SUPPORTS_DISCARD
|
908 | 787f3133 | Josh Durgin | static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
|
909 | 787f3133 | Josh Durgin | int64_t sector_num, |
910 | 787f3133 | Josh Durgin | int nb_sectors,
|
911 | 787f3133 | Josh Durgin | BlockDriverCompletionFunc *cb, |
912 | 787f3133 | Josh Durgin | void *opaque)
|
913 | 787f3133 | Josh Durgin | { |
914 | 787f3133 | Josh Durgin | return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque, |
915 | 787f3133 | Josh Durgin | RBD_AIO_DISCARD); |
916 | 787f3133 | Josh Durgin | } |
917 | 787f3133 | Josh Durgin | #endif
|
918 | 787f3133 | Josh Durgin | |
919 | ad32e9c0 | Josh Durgin | static QEMUOptionParameter qemu_rbd_create_options[] = {
|
920 | f27aaf4b | Christian Brunner | { |
921 | f27aaf4b | Christian Brunner | .name = BLOCK_OPT_SIZE, |
922 | f27aaf4b | Christian Brunner | .type = OPT_SIZE, |
923 | f27aaf4b | Christian Brunner | .help = "Virtual disk size"
|
924 | f27aaf4b | Christian Brunner | }, |
925 | f27aaf4b | Christian Brunner | { |
926 | f27aaf4b | Christian Brunner | .name = BLOCK_OPT_CLUSTER_SIZE, |
927 | f27aaf4b | Christian Brunner | .type = OPT_SIZE, |
928 | f27aaf4b | Christian Brunner | .help = "RBD object size"
|
929 | f27aaf4b | Christian Brunner | }, |
930 | f27aaf4b | Christian Brunner | {NULL}
|
931 | f27aaf4b | Christian Brunner | }; |
932 | f27aaf4b | Christian Brunner | |
933 | f27aaf4b | Christian Brunner | static BlockDriver bdrv_rbd = {
|
934 | f27aaf4b | Christian Brunner | .format_name = "rbd",
|
935 | f27aaf4b | Christian Brunner | .instance_size = sizeof(BDRVRBDState),
|
936 | ad32e9c0 | Josh Durgin | .bdrv_file_open = qemu_rbd_open, |
937 | ad32e9c0 | Josh Durgin | .bdrv_close = qemu_rbd_close, |
938 | ad32e9c0 | Josh Durgin | .bdrv_create = qemu_rbd_create, |
939 | ad32e9c0 | Josh Durgin | .bdrv_get_info = qemu_rbd_getinfo, |
940 | ad32e9c0 | Josh Durgin | .create_options = qemu_rbd_create_options, |
941 | ad32e9c0 | Josh Durgin | .bdrv_getlength = qemu_rbd_getlength, |
942 | 30cdc48c | Josh Durgin | .bdrv_truncate = qemu_rbd_truncate, |
943 | f27aaf4b | Christian Brunner | .protocol_name = "rbd",
|
944 | f27aaf4b | Christian Brunner | |
945 | c68b89ac | Kevin Wolf | .bdrv_aio_readv = qemu_rbd_aio_readv, |
946 | c68b89ac | Kevin Wolf | .bdrv_aio_writev = qemu_rbd_aio_writev, |
947 | c68b89ac | Kevin Wolf | .bdrv_co_flush_to_disk = qemu_rbd_co_flush, |
948 | f27aaf4b | Christian Brunner | |
949 | 787f3133 | Josh Durgin | #ifdef LIBRBD_SUPPORTS_DISCARD
|
950 | 787f3133 | Josh Durgin | .bdrv_aio_discard = qemu_rbd_aio_discard, |
951 | 787f3133 | Josh Durgin | #endif
|
952 | 787f3133 | Josh Durgin | |
953 | c68b89ac | Kevin Wolf | .bdrv_snapshot_create = qemu_rbd_snap_create, |
954 | bd603247 | Gregory Farnum | .bdrv_snapshot_delete = qemu_rbd_snap_remove, |
955 | c68b89ac | Kevin Wolf | .bdrv_snapshot_list = qemu_rbd_snap_list, |
956 | bd603247 | Gregory Farnum | .bdrv_snapshot_goto = qemu_rbd_snap_rollback, |
957 | f27aaf4b | Christian Brunner | }; |
958 | f27aaf4b | Christian Brunner | |
959 | f27aaf4b | Christian Brunner | static void bdrv_rbd_init(void) |
960 | f27aaf4b | Christian Brunner | { |
961 | f27aaf4b | Christian Brunner | bdrv_register(&bdrv_rbd); |
962 | f27aaf4b | Christian Brunner | } |
963 | f27aaf4b | Christian Brunner | |
964 | f27aaf4b | Christian Brunner | block_init(bdrv_rbd_init); |