Revision ad32e9c0 block/rbd.c
b/block/rbd.c | ||
---|---|---|
1 | 1 |
/* |
2 | 2 |
* QEMU Block driver for RADOS (Ceph) |
3 | 3 |
* |
4 |
* Copyright (C) 2010 Christian Brunner <chb@muc.de> |
|
4 |
* Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>, |
|
5 |
* Josh Durgin <josh.durgin@dreamhost.com> |
|
5 | 6 |
* |
6 | 7 |
* This work is licensed under the terms of the GNU GPL, version 2. See |
7 | 8 |
* the COPYING file in the top-level directory. |
8 | 9 |
* |
9 | 10 |
*/ |
10 | 11 |
|
12 |
#include <inttypes.h> |
|
13 |
|
|
11 | 14 |
#include "qemu-common.h" |
12 | 15 |
#include "qemu-error.h" |
13 | 16 |
|
14 |
#include "rbd_types.h" |
|
15 | 17 |
#include "block_int.h" |
16 | 18 |
|
17 |
#include <rados/librados.h>
|
|
19 |
#include <rbd/librbd.h>
|
|
18 | 20 |
|
19 | 21 |
|
20 | 22 |
|
... | ... | |
40 | 42 |
|
41 | 43 |
#define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER) |
42 | 44 |
|
45 |
#define RBD_MAX_CONF_NAME_SIZE 128 |
|
46 |
#define RBD_MAX_CONF_VAL_SIZE 512 |
|
47 |
#define RBD_MAX_CONF_SIZE 1024 |
|
48 |
#define RBD_MAX_POOL_NAME_SIZE 128 |
|
49 |
#define RBD_MAX_SNAP_NAME_SIZE 128 |
|
50 |
#define RBD_MAX_SNAPS 100 |
|
51 |
|
|
43 | 52 |
typedef struct RBDAIOCB { |
44 | 53 |
BlockDriverAIOCB common; |
45 | 54 |
QEMUBH *bh; |
... | ... | |
48 | 57 |
char *bounce; |
49 | 58 |
int write; |
50 | 59 |
int64_t sector_num; |
51 |
int aiocnt; |
|
52 | 60 |
int error; |
53 | 61 |
struct BDRVRBDState *s; |
54 | 62 |
int cancelled; |
... | ... | |
59 | 67 |
RBDAIOCB *acb; |
60 | 68 |
struct BDRVRBDState *s; |
61 | 69 |
int done; |
62 |
int64_t segsize;
|
|
70 |
int64_t size; |
|
63 | 71 |
char *buf; |
64 | 72 |
int ret; |
65 | 73 |
} RADOSCB; |
... | ... | |
69 | 77 |
|
70 | 78 |
typedef struct BDRVRBDState { |
71 | 79 |
int fds[2]; |
72 |
rados_pool_t pool; |
|
73 |
rados_pool_t header_pool; |
|
74 |
char name[RBD_MAX_OBJ_NAME_SIZE]; |
|
75 |
char block_name[RBD_MAX_BLOCK_NAME_SIZE]; |
|
76 |
uint64_t size; |
|
77 |
uint64_t objsize; |
|
80 |
rados_t cluster; |
|
81 |
rados_ioctx_t io_ctx; |
|
82 |
rbd_image_t image; |
|
83 |
char name[RBD_MAX_IMAGE_NAME_SIZE]; |
|
78 | 84 |
int qemu_aio_count; |
85 |
char *snap; |
|
79 | 86 |
int event_reader_pos; |
80 | 87 |
RADOSCB *event_rcb; |
81 | 88 |
} BDRVRBDState; |
82 | 89 |
|
83 |
typedef struct rbd_obj_header_ondisk RbdHeader1; |
|
84 |
|
|
85 | 90 |
static void rbd_aio_bh_cb(void *opaque); |
86 | 91 |
|
87 |
static int rbd_next_tok(char *dst, int dst_len, |
|
88 |
char *src, char delim, |
|
89 |
const char *name, |
|
90 |
char **p) |
|
92 |
static int qemu_rbd_next_tok(char *dst, int dst_len,
|
|
93 |
char *src, char delim,
|
|
94 |
const char *name,
|
|
95 |
char **p)
|
|
91 | 96 |
{ |
92 | 97 |
int l; |
93 | 98 |
char *end; |
... | ... | |
115 | 120 |
return 0; |
116 | 121 |
} |
117 | 122 |
|
118 |
static int rbd_parsename(const char *filename, |
|
119 |
char *pool, int pool_len, |
|
120 |
char *snap, int snap_len, |
|
121 |
char *name, int name_len) |
|
123 |
static int qemu_rbd_parsename(const char *filename,
|
|
124 |
char *pool, int pool_len,
|
|
125 |
char *snap, int snap_len,
|
|
126 |
char *name, int name_len)
|
|
122 | 127 |
{ |
123 | 128 |
const char *start; |
124 | 129 |
char *p, *buf; |
... | ... | |
131 | 136 |
buf = qemu_strdup(start); |
132 | 137 |
p = buf; |
133 | 138 |
|
134 |
ret = rbd_next_tok(pool, pool_len, p, '/', "pool name", &p); |
|
139 |
ret = qemu_rbd_next_tok(pool, pool_len, p, '/', "pool name", &p);
|
|
135 | 140 |
if (ret < 0 || !p) { |
136 | 141 |
ret = -EINVAL; |
137 | 142 |
goto done; |
138 | 143 |
} |
139 |
ret = rbd_next_tok(name, name_len, p, '@', "object name", &p); |
|
144 |
ret = qemu_rbd_next_tok(name, name_len, p, '@', "object name", &p);
|
|
140 | 145 |
if (ret < 0) { |
141 | 146 |
goto done; |
142 | 147 |
} |
... | ... | |
145 | 150 |
goto done; |
146 | 151 |
} |
147 | 152 |
|
148 |
ret = rbd_next_tok(snap, snap_len, p, '\0', "snap name", &p); |
|
153 |
ret = qemu_rbd_next_tok(snap, snap_len, p, '\0', "snap name", &p);
|
|
149 | 154 |
|
150 | 155 |
done: |
151 | 156 |
qemu_free(buf); |
152 | 157 |
return ret; |
153 | 158 |
} |
154 | 159 |
|
155 |
static int create_tmap_op(uint8_t op, const char *name, char **tmap_desc) |
|
156 |
{ |
|
157 |
uint32_t len = strlen(name); |
|
158 |
uint32_t len_le = cpu_to_le32(len); |
|
159 |
/* total_len = encoding op + name + empty buffer */ |
|
160 |
uint32_t total_len = 1 + (sizeof(uint32_t) + len) + sizeof(uint32_t); |
|
161 |
uint8_t *desc = NULL; |
|
162 |
|
|
163 |
desc = qemu_malloc(total_len); |
|
164 |
|
|
165 |
*tmap_desc = (char *)desc; |
|
166 |
|
|
167 |
*desc = op; |
|
168 |
desc++; |
|
169 |
memcpy(desc, &len_le, sizeof(len_le)); |
|
170 |
desc += sizeof(len_le); |
|
171 |
memcpy(desc, name, len); |
|
172 |
desc += len; |
|
173 |
len = 0; /* no need for endian conversion for 0 */ |
|
174 |
memcpy(desc, &len, sizeof(len)); |
|
175 |
desc += sizeof(len); |
|
176 |
|
|
177 |
return (char *)desc - *tmap_desc; |
|
178 |
} |
|
179 |
|
|
180 |
static void free_tmap_op(char *tmap_desc) |
|
181 |
{ |
|
182 |
qemu_free(tmap_desc); |
|
183 |
} |
|
184 |
|
|
185 |
static int rbd_register_image(rados_pool_t pool, const char *name) |
|
186 |
{ |
|
187 |
char *tmap_desc; |
|
188 |
const char *dir = RBD_DIRECTORY; |
|
189 |
int ret; |
|
190 |
|
|
191 |
ret = create_tmap_op(CEPH_OSD_TMAP_SET, name, &tmap_desc); |
|
192 |
if (ret < 0) { |
|
193 |
return ret; |
|
194 |
} |
|
195 |
|
|
196 |
ret = rados_tmap_update(pool, dir, tmap_desc, ret); |
|
197 |
free_tmap_op(tmap_desc); |
|
198 |
|
|
199 |
return ret; |
|
200 |
} |
|
201 |
|
|
202 |
static int touch_rbd_info(rados_pool_t pool, const char *info_oid) |
|
203 |
{ |
|
204 |
int r = rados_write(pool, info_oid, 0, NULL, 0); |
|
205 |
if (r < 0) { |
|
206 |
return r; |
|
207 |
} |
|
208 |
return 0; |
|
209 |
} |
|
210 |
|
|
211 |
static int rbd_assign_bid(rados_pool_t pool, uint64_t *id) |
|
212 |
{ |
|
213 |
uint64_t out[1]; |
|
214 |
const char *info_oid = RBD_INFO; |
|
215 |
|
|
216 |
*id = 0; |
|
217 |
|
|
218 |
int r = touch_rbd_info(pool, info_oid); |
|
219 |
if (r < 0) { |
|
220 |
return r; |
|
221 |
} |
|
222 |
|
|
223 |
r = rados_exec(pool, info_oid, "rbd", "assign_bid", NULL, |
|
224 |
0, (char *)out, sizeof(out)); |
|
225 |
if (r < 0) { |
|
226 |
return r; |
|
227 |
} |
|
228 |
|
|
229 |
le64_to_cpus(out); |
|
230 |
*id = out[0]; |
|
231 |
|
|
232 |
return 0; |
|
233 |
} |
|
234 |
|
|
235 |
static int rbd_create(const char *filename, QEMUOptionParameter *options) |
|
160 |
static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options) |
|
236 | 161 |
{ |
237 | 162 |
int64_t bytes = 0; |
238 | 163 |
int64_t objsize; |
239 |
uint64_t size; |
|
240 |
time_t mtime; |
|
241 |
uint8_t obj_order = RBD_DEFAULT_OBJ_ORDER; |
|
242 |
char pool[RBD_MAX_SEG_NAME_SIZE]; |
|
243 |
char n[RBD_MAX_SEG_NAME_SIZE]; |
|
244 |
char name[RBD_MAX_OBJ_NAME_SIZE]; |
|
245 |
char snap_buf[RBD_MAX_SEG_NAME_SIZE]; |
|
164 |
int obj_order = 0; |
|
165 |
char pool[RBD_MAX_POOL_NAME_SIZE]; |
|
166 |
char name[RBD_MAX_IMAGE_NAME_SIZE]; |
|
167 |
char snap_buf[RBD_MAX_SNAP_NAME_SIZE]; |
|
246 | 168 |
char *snap = NULL; |
247 |
RbdHeader1 header; |
|
248 |
rados_pool_t p; |
|
249 |
uint64_t bid; |
|
250 |
uint32_t hi, lo; |
|
169 |
rados_t cluster; |
|
170 |
rados_ioctx_t io_ctx; |
|
251 | 171 |
int ret; |
252 | 172 |
|
253 |
if (rbd_parsename(filename, |
|
254 |
pool, sizeof(pool), |
|
255 |
snap_buf, sizeof(snap_buf), |
|
256 |
name, sizeof(name)) < 0) { |
|
173 |
if (qemu_rbd_parsename(filename, pool, sizeof(pool), |
|
174 |
snap_buf, sizeof(snap_buf), |
|
175 |
name, sizeof(name)) < 0) { |
|
257 | 176 |
return -EINVAL; |
258 | 177 |
} |
259 | 178 |
if (snap_buf[0] != '\0') { |
260 | 179 |
snap = snap_buf; |
261 | 180 |
} |
262 | 181 |
|
263 |
snprintf(n, sizeof(n), "%s%s", name, RBD_SUFFIX); |
|
264 |
|
|
265 | 182 |
/* Read out options */ |
266 | 183 |
while (options && options->name) { |
267 | 184 |
if (!strcmp(options->name, BLOCK_OPT_SIZE)) { |
... | ... | |
277 | 194 |
error_report("obj size too small"); |
278 | 195 |
return -EINVAL; |
279 | 196 |
} |
280 |
obj_order = ffs(objsize) - 1;
|
|
197 |
obj_order = ffs(objsize) - 1;
|
|
281 | 198 |
} |
282 | 199 |
} |
283 | 200 |
options++; |
284 | 201 |
} |
285 | 202 |
|
286 |
memset(&header, 0, sizeof(header)); |
|
287 |
pstrcpy(header.text, sizeof(header.text), RBD_HEADER_TEXT); |
|
288 |
pstrcpy(header.signature, sizeof(header.signature), RBD_HEADER_SIGNATURE); |
|
289 |
pstrcpy(header.version, sizeof(header.version), RBD_HEADER_VERSION); |
|
290 |
header.image_size = cpu_to_le64(bytes); |
|
291 |
header.options.order = obj_order; |
|
292 |
header.options.crypt_type = RBD_CRYPT_NONE; |
|
293 |
header.options.comp_type = RBD_COMP_NONE; |
|
294 |
header.snap_seq = 0; |
|
295 |
header.snap_count = 0; |
|
296 |
|
|
297 |
if (rados_initialize(0, NULL) < 0) { |
|
203 |
if (rados_create(&cluster, NULL) < 0) { |
|
298 | 204 |
error_report("error initializing"); |
299 | 205 |
return -EIO; |
300 | 206 |
} |
301 | 207 |
|
302 |
if (rados_open_pool(pool, &p)) {
|
|
303 |
error_report("error opening pool %s", pool);
|
|
304 |
rados_deinitialize();
|
|
208 |
if (rados_conf_read_file(cluster, NULL) < 0) {
|
|
209 |
error_report("error reading config file");
|
|
210 |
rados_shutdown(cluster);
|
|
305 | 211 |
return -EIO; |
306 | 212 |
} |
307 | 213 |
|
308 |
/* check for existing rbd header file */ |
|
309 |
ret = rados_stat(p, n, &size, &mtime); |
|
310 |
if (ret == 0) { |
|
311 |
ret=-EEXIST; |
|
312 |
goto done; |
|
313 |
} |
|
314 |
|
|
315 |
ret = rbd_assign_bid(p, &bid); |
|
316 |
if (ret < 0) { |
|
317 |
error_report("failed assigning block id"); |
|
318 |
rados_deinitialize(); |
|
214 |
if (rados_connect(cluster) < 0) { |
|
215 |
error_report("error connecting"); |
|
216 |
rados_shutdown(cluster); |
|
319 | 217 |
return -EIO; |
320 | 218 |
} |
321 |
hi = bid >> 32; |
|
322 |
lo = bid & 0xFFFFFFFF; |
|
323 |
snprintf(header.block_name, sizeof(header.block_name), "rb.%x.%x", hi, lo); |
|
324 | 219 |
|
325 |
/* create header file */
|
|
326 |
ret = rados_write(p, n, 0, (const char *)&header, sizeof(header));
|
|
327 |
if (ret < 0) {
|
|
328 |
goto done;
|
|
220 |
if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) {
|
|
221 |
error_report("error opening pool %s", pool);
|
|
222 |
rados_shutdown(cluster);
|
|
223 |
return -EIO;
|
|
329 | 224 |
} |
330 | 225 |
|
331 |
ret = rbd_register_image(p, name); |
|
332 |
done: |
|
333 |
rados_close_pool(p); |
|
334 |
rados_deinitialize(); |
|
226 |
ret = rbd_create(io_ctx, name, bytes, &obj_order); |
|
227 |
rados_ioctx_destroy(io_ctx); |
|
228 |
rados_shutdown(cluster); |
|
335 | 229 |
|
336 | 230 |
return ret; |
337 | 231 |
} |
338 | 232 |
|
339 | 233 |
/* |
340 |
* This aio completion is being called from rbd_aio_event_reader() and
|
|
341 |
* runs in qemu context. It schedules a bh, but just in case the aio |
|
234 |
* This aio completion is being called from qemu_rbd_aio_event_reader()
|
|
235 |
* and runs in qemu context. It schedules a bh, but just in case the aio
|
|
342 | 236 |
* was not cancelled before. |
343 | 237 |
*/ |
344 |
static void rbd_complete_aio(RADOSCB *rcb) |
|
238 |
static void qemu_rbd_complete_aio(RADOSCB *rcb)
|
|
345 | 239 |
{ |
346 | 240 |
RBDAIOCB *acb = rcb->acb; |
347 | 241 |
int64_t r; |
348 | 242 |
|
349 |
acb->aiocnt--; |
|
350 |
|
|
351 | 243 |
if (acb->cancelled) { |
352 |
if (!acb->aiocnt) { |
|
353 |
qemu_vfree(acb->bounce); |
|
354 |
qemu_aio_release(acb); |
|
355 |
} |
|
244 |
qemu_vfree(acb->bounce); |
|
245 |
qemu_aio_release(acb); |
|
356 | 246 |
goto done; |
357 | 247 |
} |
358 | 248 |
|
... | ... | |
363 | 253 |
acb->ret = r; |
364 | 254 |
acb->error = 1; |
365 | 255 |
} else if (!acb->error) { |
366 |
acb->ret += rcb->segsize;
|
|
256 |
acb->ret = rcb->size;
|
|
367 | 257 |
} |
368 | 258 |
} else { |
369 |
if (r == -ENOENT) { |
|
370 |
memset(rcb->buf, 0, rcb->segsize); |
|
371 |
if (!acb->error) { |
|
372 |
acb->ret += rcb->segsize; |
|
373 |
} |
|
374 |
} else if (r < 0) { |
|
375 |
memset(rcb->buf, 0, rcb->segsize); |
|
259 |
if (r < 0) { |
|
260 |
memset(rcb->buf, 0, rcb->size); |
|
376 | 261 |
acb->ret = r; |
377 | 262 |
acb->error = 1; |
378 |
} else if (r < rcb->segsize) {
|
|
379 |
memset(rcb->buf + r, 0, rcb->segsize - r);
|
|
263 |
} else if (r < rcb->size) { |
|
264 |
memset(rcb->buf + r, 0, rcb->size - r); |
|
380 | 265 |
if (!acb->error) { |
381 |
acb->ret += rcb->segsize;
|
|
266 |
acb->ret = rcb->size;
|
|
382 | 267 |
} |
383 | 268 |
} else if (!acb->error) { |
384 |
acb->ret += r;
|
|
269 |
acb->ret = r; |
|
385 | 270 |
} |
386 | 271 |
} |
387 | 272 |
/* Note that acb->bh can be NULL in case where the aio was cancelled */ |
388 |
if (!acb->aiocnt) { |
|
389 |
acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb); |
|
390 |
qemu_bh_schedule(acb->bh); |
|
391 |
} |
|
273 |
acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb); |
|
274 |
qemu_bh_schedule(acb->bh); |
|
392 | 275 |
done: |
393 | 276 |
qemu_free(rcb); |
394 | 277 |
} |
... | ... | |
397 | 280 |
* aio fd read handler. It runs in the qemu context and calls the |
398 | 281 |
* completion handling of completed rados aio operations. |
399 | 282 |
*/ |
400 |
static void rbd_aio_event_reader(void *opaque) |
|
283 |
static void qemu_rbd_aio_event_reader(void *opaque)
|
|
401 | 284 |
{ |
402 | 285 |
BDRVRBDState *s = opaque; |
403 | 286 |
|
... | ... | |
413 | 296 |
s->event_reader_pos += ret; |
414 | 297 |
if (s->event_reader_pos == sizeof(s->event_rcb)) { |
415 | 298 |
s->event_reader_pos = 0; |
416 |
rbd_complete_aio(s->event_rcb); |
|
417 |
s->qemu_aio_count --;
|
|
299 |
qemu_rbd_complete_aio(s->event_rcb);
|
|
300 |
s->qemu_aio_count--; |
|
418 | 301 |
} |
419 | 302 |
} |
420 | 303 |
} |
421 | 304 |
} while (ret < 0 && errno == EINTR); |
422 | 305 |
} |
423 | 306 |
|
424 |
static int rbd_aio_flush_cb(void *opaque) |
|
307 |
static int qemu_rbd_aio_flush_cb(void *opaque)
|
|
425 | 308 |
{ |
426 | 309 |
BDRVRBDState *s = opaque; |
427 | 310 |
|
428 | 311 |
return (s->qemu_aio_count > 0); |
429 | 312 |
} |
430 | 313 |
|
431 |
|
|
432 |
static int rbd_set_snapc(rados_pool_t pool, const char *snap, RbdHeader1 *header) |
|
433 |
{ |
|
434 |
uint32_t snap_count = le32_to_cpu(header->snap_count); |
|
435 |
rados_snap_t *snaps = NULL; |
|
436 |
rados_snap_t seq; |
|
437 |
uint32_t i; |
|
438 |
uint64_t snap_names_len = le64_to_cpu(header->snap_names_len); |
|
439 |
int r; |
|
440 |
rados_snap_t snapid = 0; |
|
441 |
|
|
442 |
if (snap_count) { |
|
443 |
const char *header_snap = (const char *)&header->snaps[snap_count]; |
|
444 |
const char *end = header_snap + snap_names_len; |
|
445 |
snaps = qemu_malloc(sizeof(rados_snap_t) * header->snap_count); |
|
446 |
|
|
447 |
for (i=0; i < snap_count; i++) { |
|
448 |
snaps[i] = le64_to_cpu(header->snaps[i].id); |
|
449 |
|
|
450 |
if (snap && strcmp(snap, header_snap) == 0) { |
|
451 |
snapid = snaps[i]; |
|
452 |
} |
|
453 |
|
|
454 |
header_snap += strlen(header_snap) + 1; |
|
455 |
if (header_snap > end) { |
|
456 |
error_report("bad header, snapshot list broken"); |
|
457 |
} |
|
458 |
} |
|
459 |
} |
|
460 |
|
|
461 |
if (snap && !snapid) { |
|
462 |
error_report("snapshot not found"); |
|
463 |
qemu_free(snaps); |
|
464 |
return -ENOENT; |
|
465 |
} |
|
466 |
seq = le32_to_cpu(header->snap_seq); |
|
467 |
|
|
468 |
r = rados_set_snap_context(pool, seq, snaps, snap_count); |
|
469 |
|
|
470 |
rados_set_snap(pool, snapid); |
|
471 |
|
|
472 |
qemu_free(snaps); |
|
473 |
|
|
474 |
return r; |
|
475 |
} |
|
476 |
|
|
477 |
#define BUF_READ_START_LEN 4096 |
|
478 |
|
|
479 |
static int rbd_read_header(BDRVRBDState *s, char **hbuf) |
|
480 |
{ |
|
481 |
char *buf = NULL; |
|
482 |
char n[RBD_MAX_SEG_NAME_SIZE]; |
|
483 |
uint64_t len = BUF_READ_START_LEN; |
|
484 |
int r; |
|
485 |
|
|
486 |
snprintf(n, sizeof(n), "%s%s", s->name, RBD_SUFFIX); |
|
487 |
|
|
488 |
buf = qemu_malloc(len); |
|
489 |
|
|
490 |
r = rados_read(s->header_pool, n, 0, buf, len); |
|
491 |
if (r < 0) { |
|
492 |
goto failed; |
|
493 |
} |
|
494 |
|
|
495 |
if (r < len) { |
|
496 |
goto done; |
|
497 |
} |
|
498 |
|
|
499 |
qemu_free(buf); |
|
500 |
buf = qemu_malloc(len); |
|
501 |
|
|
502 |
r = rados_stat(s->header_pool, n, &len, NULL); |
|
503 |
if (r < 0) { |
|
504 |
goto failed; |
|
505 |
} |
|
506 |
|
|
507 |
r = rados_read(s->header_pool, n, 0, buf, len); |
|
508 |
if (r < 0) { |
|
509 |
goto failed; |
|
510 |
} |
|
511 |
|
|
512 |
done: |
|
513 |
*hbuf = buf; |
|
514 |
return 0; |
|
515 |
|
|
516 |
failed: |
|
517 |
qemu_free(buf); |
|
518 |
return r; |
|
519 |
} |
|
520 |
|
|
521 |
static int rbd_open(BlockDriverState *bs, const char *filename, int flags) |
|
314 |
static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags) |
|
522 | 315 |
{ |
523 | 316 |
BDRVRBDState *s = bs->opaque; |
524 |
RbdHeader1 *header; |
|
525 |
char pool[RBD_MAX_SEG_NAME_SIZE]; |
|
526 |
char snap_buf[RBD_MAX_SEG_NAME_SIZE]; |
|
527 |
char *snap = NULL; |
|
528 |
char *hbuf = NULL; |
|
317 |
char pool[RBD_MAX_POOL_NAME_SIZE]; |
|
318 |
char snap_buf[RBD_MAX_SNAP_NAME_SIZE]; |
|
529 | 319 |
int r; |
530 | 320 |
|
531 |
if (rbd_parsename(filename, pool, sizeof(pool), |
|
532 |
snap_buf, sizeof(snap_buf), |
|
533 |
s->name, sizeof(s->name)) < 0) { |
|
321 |
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
|
|
322 |
snap_buf, sizeof(snap_buf),
|
|
323 |
s->name, sizeof(s->name)) < 0) {
|
|
534 | 324 |
return -EINVAL; |
535 | 325 |
} |
326 |
s->snap = NULL; |
|
536 | 327 |
if (snap_buf[0] != '\0') { |
537 |
snap = snap_buf;
|
|
328 |
s->snap = qemu_strdup(snap_buf);
|
|
538 | 329 |
} |
539 | 330 |
|
540 |
if ((r = rados_initialize(0, NULL)) < 0) { |
|
331 |
r = rados_create(&s->cluster, NULL); |
|
332 |
if (r < 0) { |
|
541 | 333 |
error_report("error initializing"); |
542 | 334 |
return r; |
543 | 335 |
} |
544 | 336 |
|
545 |
if ((r = rados_open_pool(pool, &s->pool))) { |
|
546 |
error_report("error opening pool %s", pool); |
|
547 |
rados_deinitialize(); |
|
337 |
r = rados_conf_read_file(s->cluster, NULL); |
|
338 |
if (r < 0) { |
|
339 |
error_report("error reading config file"); |
|
340 |
rados_shutdown(s->cluster); |
|
548 | 341 |
return r; |
549 | 342 |
} |
550 | 343 |
|
551 |
if ((r = rados_open_pool(pool, &s->header_pool))) { |
|
552 |
error_report("error opening pool %s", pool); |
|
553 |
rados_deinitialize(); |
|
344 |
r = rados_connect(s->cluster); |
|
345 |
if (r < 0) { |
|
346 |
error_report("error connecting"); |
|
347 |
rados_shutdown(s->cluster); |
|
554 | 348 |
return r; |
555 | 349 |
} |
556 | 350 |
|
557 |
if ((r = rbd_read_header(s, &hbuf)) < 0) { |
|
558 |
error_report("error reading header from %s", s->name); |
|
559 |
goto failed; |
|
560 |
} |
|
561 |
|
|
562 |
if (memcmp(hbuf + 64, RBD_HEADER_SIGNATURE, 4)) { |
|
563 |
error_report("Invalid header signature"); |
|
564 |
r = -EMEDIUMTYPE; |
|
565 |
goto failed; |
|
566 |
} |
|
567 |
|
|
568 |
if (memcmp(hbuf + 68, RBD_HEADER_VERSION, 8)) { |
|
569 |
error_report("Unknown image version"); |
|
570 |
r = -EMEDIUMTYPE; |
|
571 |
goto failed; |
|
351 |
r = rados_ioctx_create(s->cluster, pool, &s->io_ctx); |
|
352 |
if (r < 0) { |
|
353 |
error_report("error opening pool %s", pool); |
|
354 |
rados_shutdown(s->cluster); |
|
355 |
return r; |
|
572 | 356 |
} |
573 | 357 |
|
574 |
header = (RbdHeader1 *) hbuf; |
|
575 |
s->size = le64_to_cpu(header->image_size); |
|
576 |
s->objsize = 1ULL << header->options.order; |
|
577 |
memcpy(s->block_name, header->block_name, sizeof(header->block_name)); |
|
578 |
|
|
579 |
r = rbd_set_snapc(s->pool, snap, header); |
|
358 |
r = rbd_open(s->io_ctx, s->name, &s->image, s->snap); |
|
580 | 359 |
if (r < 0) { |
581 |
error_report("failed setting snap context: %s", strerror(-r)); |
|
582 |
goto failed; |
|
360 |
error_report("error reading header from %s", s->name); |
|
361 |
rados_ioctx_destroy(s->io_ctx); |
|
362 |
rados_shutdown(s->cluster); |
|
363 |
return r; |
|
583 | 364 |
} |
584 | 365 |
|
585 |
bs->read_only = (snap != NULL); |
|
366 |
bs->read_only = (s->snap != NULL);
|
|
586 | 367 |
|
587 | 368 |
s->event_reader_pos = 0; |
588 | 369 |
r = qemu_pipe(s->fds); |
... | ... | |
592 | 373 |
} |
593 | 374 |
fcntl(s->fds[0], F_SETFL, O_NONBLOCK); |
594 | 375 |
fcntl(s->fds[1], F_SETFL, O_NONBLOCK); |
595 |
qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], rbd_aio_event_reader, NULL,
|
|
596 |
rbd_aio_flush_cb, NULL, s); |
|
376 |
qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
|
|
377 |
NULL, qemu_rbd_aio_flush_cb, NULL, s);
|
|
597 | 378 |
|
598 |
qemu_free(hbuf); |
|
599 | 379 |
|
600 | 380 |
return 0; |
601 | 381 |
|
602 | 382 |
failed: |
603 |
qemu_free(hbuf); |
|
604 |
|
|
605 |
rados_close_pool(s->header_pool); |
|
606 |
rados_close_pool(s->pool); |
|
607 |
rados_deinitialize(); |
|
383 |
rbd_close(s->image); |
|
384 |
rados_ioctx_destroy(s->io_ctx); |
|
385 |
rados_shutdown(s->cluster); |
|
608 | 386 |
return r; |
609 | 387 |
} |
610 | 388 |
|
611 |
static void rbd_close(BlockDriverState *bs) |
|
389 |
static void qemu_rbd_close(BlockDriverState *bs)
|
|
612 | 390 |
{ |
613 | 391 |
BDRVRBDState *s = bs->opaque; |
614 | 392 |
|
... | ... | |
617 | 395 |
qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL , NULL, NULL, NULL, |
618 | 396 |
NULL); |
619 | 397 |
|
620 |
rados_close_pool(s->header_pool); |
|
621 |
rados_close_pool(s->pool); |
|
622 |
rados_deinitialize(); |
|
398 |
rbd_close(s->image); |
|
399 |
rados_ioctx_destroy(s->io_ctx); |
|
400 |
qemu_free(s->snap); |
|
401 |
rados_shutdown(s->cluster); |
|
623 | 402 |
} |
624 | 403 |
|
625 | 404 |
/* |
626 | 405 |
* Cancel aio. Since we don't reference acb in a non qemu threads, |
627 | 406 |
* it is safe to access it here. |
628 | 407 |
*/ |
629 |
static void rbd_aio_cancel(BlockDriverAIOCB *blockacb) |
|
408 |
static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
|
|
630 | 409 |
{ |
631 | 410 |
RBDAIOCB *acb = (RBDAIOCB *) blockacb; |
632 | 411 |
acb->cancelled = 1; |
... | ... | |
634 | 413 |
|
635 | 414 |
static AIOPool rbd_aio_pool = { |
636 | 415 |
.aiocb_size = sizeof(RBDAIOCB), |
637 |
.cancel = rbd_aio_cancel, |
|
416 |
.cancel = qemu_rbd_aio_cancel,
|
|
638 | 417 |
}; |
639 | 418 |
|
640 |
/* |
|
641 |
* This is the callback function for rados_aio_read and _write |
|
642 |
* |
|
643 |
* Note: this function is being called from a non qemu thread so |
|
644 |
* we need to be careful about what we do here. Generally we only |
|
645 |
* write to the block notification pipe, and do the rest of the |
|
646 |
* io completion handling from rbd_aio_event_reader() which |
|
647 |
* runs in a qemu context. |
|
648 |
*/ |
|
649 |
static void rbd_finish_aiocb(rados_completion_t c, RADOSCB *rcb) |
|
419 |
static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb) |
|
650 | 420 |
{ |
651 |
int ret; |
|
652 |
rcb->ret = rados_aio_get_return_value(c); |
|
653 |
rados_aio_release(c); |
|
421 |
int ret = 0; |
|
654 | 422 |
while (1) { |
655 | 423 |
fd_set wfd; |
656 |
int fd = rcb->s->fds[RBD_FD_WRITE];
|
|
424 |
int fd = s->fds[RBD_FD_WRITE]; |
|
657 | 425 |
|
658 |
/* send the rcb pointer to the qemu thread that is responsible
|
|
659 |
for the aio completion. Must do it in a qemu thread context */ |
|
426 |
/* send the op pointer to the qemu thread that is responsible
|
|
427 |
for the aio/op completion. Must do it in a qemu thread context */
|
|
660 | 428 |
ret = write(fd, (void *)&rcb, sizeof(rcb)); |
661 | 429 |
if (ret >= 0) { |
662 | 430 |
break; |
663 | 431 |
} |
664 | 432 |
if (errno == EINTR) { |
665 | 433 |
continue; |
666 |
}
|
|
434 |
}
|
|
667 | 435 |
if (errno != EAGAIN) { |
668 | 436 |
break; |
669 |
}
|
|
437 |
}
|
|
670 | 438 |
|
671 | 439 |
FD_ZERO(&wfd); |
672 | 440 |
FD_SET(fd, &wfd); |
... | ... | |
675 | 443 |
} while (ret < 0 && errno == EINTR); |
676 | 444 |
} |
677 | 445 |
|
446 |
return ret; |
|
447 |
} |
|
448 |
|
|
449 |
/* |
|
450 |
* This is the callback function for rbd_aio_read and _write |
|
451 |
* |
|
452 |
* Note: this function is being called from a non qemu thread so |
|
453 |
* we need to be careful about what we do here. Generally we only |
|
454 |
* write to the block notification pipe, and do the rest of the |
|
455 |
* io completion handling from qemu_rbd_aio_event_reader() which |
|
456 |
* runs in a qemu context. |
|
457 |
*/ |
|
458 |
static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb) |
|
459 |
{ |
|
460 |
int ret; |
|
461 |
rcb->ret = rbd_aio_get_return_value(c); |
|
462 |
rbd_aio_release(c); |
|
463 |
ret = qemu_rbd_send_pipe(rcb->s, rcb); |
|
678 | 464 |
if (ret < 0) { |
679 |
error_report("failed writing to acb->s->fds\n");
|
|
465 |
error_report("failed writing to acb->s->fds"); |
|
680 | 466 |
qemu_free(rcb); |
681 | 467 |
} |
682 | 468 |
} |
683 | 469 |
|
684 |
/* Callback when all queued rados_aio requests are complete */
|
|
470 |
/* Callback when all queued rbd_aio requests are complete */
|
|
685 | 471 |
|
686 | 472 |
static void rbd_aio_bh_cb(void *opaque) |
687 | 473 |
{ |
... | ... | |
707 | 493 |
{ |
708 | 494 |
RBDAIOCB *acb; |
709 | 495 |
RADOSCB *rcb; |
710 |
rados_completion_t c; |
|
711 |
char n[RBD_MAX_SEG_NAME_SIZE]; |
|
712 |
int64_t segnr, segoffs, segsize, last_segnr; |
|
496 |
rbd_completion_t c; |
|
713 | 497 |
int64_t off, size; |
714 | 498 |
char *buf; |
715 | 499 |
|
... | ... | |
719 | 503 |
acb->write = write; |
720 | 504 |
acb->qiov = qiov; |
721 | 505 |
acb->bounce = qemu_blockalign(bs, qiov->size); |
722 |
acb->aiocnt = 0; |
|
723 | 506 |
acb->ret = 0; |
724 | 507 |
acb->error = 0; |
725 | 508 |
acb->s = s; |
... | ... | |
734 | 517 |
|
735 | 518 |
off = sector_num * BDRV_SECTOR_SIZE; |
736 | 519 |
size = nb_sectors * BDRV_SECTOR_SIZE; |
737 |
segnr = off / s->objsize; |
|
738 |
segoffs = off % s->objsize; |
|
739 |
segsize = s->objsize - segoffs; |
|
740 |
|
|
741 |
last_segnr = ((off + size - 1) / s->objsize); |
|
742 |
acb->aiocnt = (last_segnr - segnr) + 1; |
|
743 | 520 |
|
744 |
s->qemu_aio_count += acb->aiocnt; /* All the RADOSCB */
|
|
521 |
s->qemu_aio_count++; /* All the RADOSCB */
|
|
745 | 522 |
|
746 |
while (size > 0) { |
|
747 |
if (size < segsize) { |
|
748 |
segsize = size; |
|
749 |
} |
|
750 |
|
|
751 |
snprintf(n, sizeof(n), "%s.%012" PRIx64, s->block_name, |
|
752 |
segnr); |
|
753 |
|
|
754 |
rcb = qemu_malloc(sizeof(RADOSCB)); |
|
755 |
rcb->done = 0; |
|
756 |
rcb->acb = acb; |
|
757 |
rcb->segsize = segsize; |
|
758 |
rcb->buf = buf; |
|
759 |
rcb->s = acb->s; |
|
760 |
|
|
761 |
if (write) { |
|
762 |
rados_aio_create_completion(rcb, NULL, |
|
763 |
(rados_callback_t) rbd_finish_aiocb, |
|
764 |
&c); |
|
765 |
rados_aio_write(s->pool, n, segoffs, buf, segsize, c); |
|
766 |
} else { |
|
767 |
rados_aio_create_completion(rcb, |
|
768 |
(rados_callback_t) rbd_finish_aiocb, |
|
769 |
NULL, &c); |
|
770 |
rados_aio_read(s->pool, n, segoffs, buf, segsize, c); |
|
771 |
} |
|
523 |
rcb = qemu_malloc(sizeof(RADOSCB)); |
|
524 |
rcb->done = 0; |
|
525 |
rcb->acb = acb; |
|
526 |
rcb->buf = buf; |
|
527 |
rcb->s = acb->s; |
|
528 |
rcb->size = size; |
|
772 | 529 |
|
773 |
buf += segsize; |
|
774 |
size -= segsize; |
|
775 |
segoffs = 0; |
|
776 |
segsize = s->objsize; |
|
777 |
segnr++; |
|
530 |
if (write) { |
|
531 |
rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); |
|
532 |
rbd_aio_write(s->image, off, size, buf, c); |
|
533 |
} else { |
|
534 |
rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); |
|
535 |
rbd_aio_read(s->image, off, size, buf, c); |
|
778 | 536 |
} |
779 | 537 |
|
780 | 538 |
return &acb->common; |
781 | 539 |
} |
782 | 540 |
|
783 |
static BlockDriverAIOCB *rbd_aio_readv(BlockDriverState * bs, |
|
784 |
int64_t sector_num, QEMUIOVector * qiov, |
|
785 |
int nb_sectors, |
|
786 |
BlockDriverCompletionFunc * cb, |
|
787 |
void *opaque) |
|
541 |
static BlockDriverAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs, |
|
542 |
int64_t sector_num, |
|
543 |
QEMUIOVector *qiov, |
|
544 |
int nb_sectors, |
|
545 |
BlockDriverCompletionFunc *cb, |
|
546 |
void *opaque) |
|
788 | 547 |
{ |
789 | 548 |
return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); |
790 | 549 |
} |
791 | 550 |
|
792 |
static BlockDriverAIOCB *rbd_aio_writev(BlockDriverState * bs, |
|
793 |
int64_t sector_num, QEMUIOVector * qiov, |
|
794 |
int nb_sectors, |
|
795 |
BlockDriverCompletionFunc * cb, |
|
796 |
void *opaque) |
|
551 |
static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs, |
|
552 |
int64_t sector_num, |
|
553 |
QEMUIOVector *qiov, |
|
554 |
int nb_sectors, |
|
555 |
BlockDriverCompletionFunc *cb, |
|
556 |
void *opaque) |
|
797 | 557 |
{ |
798 | 558 |
return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); |
799 | 559 |
} |
800 | 560 |
|
801 |
static int rbd_getinfo(BlockDriverState * bs, BlockDriverInfo * bdi)
|
|
561 |
static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
|
|
802 | 562 |
{ |
803 | 563 |
BDRVRBDState *s = bs->opaque; |
804 |
bdi->cluster_size = s->objsize; |
|
564 |
rbd_image_info_t info; |
|
565 |
int r; |
|
566 |
|
|
567 |
r = rbd_stat(s->image, &info, sizeof(info)); |
|
568 |
if (r < 0) { |
|
569 |
return r; |
|
570 |
} |
|
571 |
|
|
572 |
bdi->cluster_size = info.obj_size; |
|
805 | 573 |
return 0; |
806 | 574 |
} |
807 | 575 |
|
808 |
static int64_t rbd_getlength(BlockDriverState * bs)
|
|
576 |
static int64_t qemu_rbd_getlength(BlockDriverState *bs)
|
|
809 | 577 |
{ |
810 | 578 |
BDRVRBDState *s = bs->opaque; |
579 |
rbd_image_info_t info; |
|
580 |
int r; |
|
811 | 581 |
|
812 |
return s->size; |
|
582 |
r = rbd_stat(s->image, &info, sizeof(info)); |
|
583 |
if (r < 0) { |
|
584 |
return r; |
|
585 |
} |
|
586 |
|
|
587 |
return info.size; |
|
813 | 588 |
} |
814 | 589 |
|
815 |
static int rbd_snap_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) |
|
590 |
static int qemu_rbd_snap_create(BlockDriverState *bs, |
|
591 |
QEMUSnapshotInfo *sn_info) |
|
816 | 592 |
{ |
817 | 593 |
BDRVRBDState *s = bs->opaque; |
818 |
char inbuf[512], outbuf[128]; |
|
819 |
uint64_t snap_id; |
|
820 | 594 |
int r; |
821 |
char *p = inbuf; |
|
822 |
char *end = inbuf + sizeof(inbuf); |
|
823 |
char n[RBD_MAX_SEG_NAME_SIZE]; |
|
824 |
char *hbuf = NULL; |
|
825 |
RbdHeader1 *header; |
|
826 | 595 |
|
827 | 596 |
if (sn_info->name[0] == '\0') { |
828 | 597 |
return -EINVAL; /* we need a name for rbd snapshots */ |
... | ... | |
841 | 610 |
return -ERANGE; |
842 | 611 |
} |
843 | 612 |
|
844 |
r = rados_selfmanaged_snap_create(s->header_pool, &snap_id);
|
|
613 |
r = rbd_snap_create(s->image, sn_info->name);
|
|
845 | 614 |
if (r < 0) { |
846 |
error_report("failed to create snap id: %s", strerror(-r));
|
|
615 |
error_report("failed to create snap: %s", strerror(-r)); |
|
847 | 616 |
return r; |
848 | 617 |
} |
849 | 618 |
|
850 |
*(uint32_t *)p = strlen(sn_info->name); |
|
851 |
cpu_to_le32s((uint32_t *)p); |
|
852 |
p += sizeof(uint32_t); |
|
853 |
strncpy(p, sn_info->name, end - p); |
|
854 |
p += strlen(p); |
|
855 |
if (p + sizeof(snap_id) > end) { |
|
856 |
error_report("invalid input parameter"); |
|
857 |
return -EINVAL; |
|
858 |
} |
|
859 |
|
|
860 |
*(uint64_t *)p = snap_id; |
|
861 |
cpu_to_le64s((uint64_t *)p); |
|
862 |
|
|
863 |
snprintf(n, sizeof(n), "%s%s", s->name, RBD_SUFFIX); |
|
864 |
|
|
865 |
r = rados_exec(s->header_pool, n, "rbd", "snap_add", inbuf, |
|
866 |
sizeof(inbuf), outbuf, sizeof(outbuf)); |
|
867 |
if (r < 0) { |
|
868 |
error_report("rbd.snap_add execution failed failed: %s", strerror(-r)); |
|
869 |
return r; |
|
870 |
} |
|
871 |
|
|
872 |
sprintf(sn_info->id_str, "%s", sn_info->name); |
|
873 |
|
|
874 |
r = rbd_read_header(s, &hbuf); |
|
875 |
if (r < 0) { |
|
876 |
error_report("failed reading header: %s", strerror(-r)); |
|
877 |
return r; |
|
878 |
} |
|
879 |
|
|
880 |
header = (RbdHeader1 *) hbuf; |
|
881 |
r = rbd_set_snapc(s->pool, sn_info->name, header); |
|
882 |
if (r < 0) { |
|
883 |
error_report("failed setting snap context: %s", strerror(-r)); |
|
884 |
goto failed; |
|
885 |
} |
|
886 |
|
|
887 |
return 0; |
|
888 |
|
|
889 |
failed: |
|
890 |
qemu_free(header); |
|
891 |
return r; |
|
892 |
} |
|
893 |
|
|
894 |
static int decode32(char **p, const char *end, uint32_t *v) |
|
895 |
{ |
|
896 |
if (*p + 4 > end) { |
|
897 |
return -ERANGE; |
|
898 |
} |
|
899 |
|
|
900 |
*v = *(uint32_t *)(*p); |
|
901 |
le32_to_cpus(v); |
|
902 |
*p += 4; |
|
903 | 619 |
return 0; |
904 | 620 |
} |
905 | 621 |
|
906 |
static int decode64(char **p, const char *end, uint64_t *v) |
|
907 |
{ |
|
908 |
if (*p + 8 > end) { |
|
909 |
return -ERANGE; |
|
910 |
} |
|
911 |
|
|
912 |
*v = *(uint64_t *)(*p); |
|
913 |
le64_to_cpus(v); |
|
914 |
*p += 8; |
|
915 |
return 0; |
|
916 |
} |
|
917 |
|
|
918 |
static int decode_str(char **p, const char *end, char **s) |
|
919 |
{ |
|
920 |
uint32_t len; |
|
921 |
int r; |
|
922 |
|
|
923 |
if ((r = decode32(p, end, &len)) < 0) { |
|
924 |
return r; |
|
925 |
} |
|
926 |
|
|
927 |
*s = qemu_malloc(len + 1); |
|
928 |
memcpy(*s, *p, len); |
|
929 |
*p += len; |
|
930 |
(*s)[len] = '\0'; |
|
931 |
|
|
932 |
return len; |
|
933 |
} |
|
934 |
|
|
935 |
static int rbd_snap_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) |
|
622 |
static int qemu_rbd_snap_list(BlockDriverState *bs, |
|
623 |
QEMUSnapshotInfo **psn_tab) |
|
936 | 624 |
{ |
937 | 625 |
BDRVRBDState *s = bs->opaque; |
938 |
char n[RBD_MAX_SEG_NAME_SIZE]; |
|
939 | 626 |
QEMUSnapshotInfo *sn_info, *sn_tab = NULL; |
940 |
RbdHeader1 *header; |
|
941 |
char *hbuf = NULL; |
|
942 |
char *outbuf = NULL, *end, *buf; |
|
943 |
uint64_t len; |
|
944 |
uint64_t snap_seq; |
|
945 |
uint32_t snap_count; |
|
946 |
int r, i; |
|
947 |
|
|
948 |
/* read header to estimate how much space we need to read the snap |
|
949 |
* list */ |
|
950 |
if ((r = rbd_read_header(s, &hbuf)) < 0) { |
|
951 |
goto done_err; |
|
952 |
} |
|
953 |
header = (RbdHeader1 *)hbuf; |
|
954 |
len = le64_to_cpu(header->snap_names_len); |
|
955 |
len += 1024; /* should have already been enough, but new snapshots might |
|
956 |
already been created since we read the header. just allocate |
|
957 |
a bit more, so that in most cases it'll suffice anyway */ |
|
958 |
qemu_free(hbuf); |
|
959 |
|
|
960 |
snprintf(n, sizeof(n), "%s%s", s->name, RBD_SUFFIX); |
|
961 |
while (1) { |
|
962 |
qemu_free(outbuf); |
|
963 |
outbuf = qemu_malloc(len); |
|
627 |
int i, snap_count; |
|
628 |
rbd_snap_info_t *snaps; |
|
629 |
int max_snaps = RBD_MAX_SNAPS; |
|
964 | 630 |
|
965 |
r = rados_exec(s->header_pool, n, "rbd", "snap_list", NULL, 0,
|
|
966 |
outbuf, len);
|
|
967 |
if (r < 0) {
|
|
968 |
error_report("rbd.snap_list execution failed failed: %s", strerror(-r));
|
|
969 |
goto done_err;
|
|
631 |
do {
|
|
632 |
snaps = qemu_malloc(sizeof(*snaps) * max_snaps);
|
|
633 |
snap_count = rbd_snap_list(s->image, snaps, &max_snaps);
|
|
634 |
if (snap_count < 0) {
|
|
635 |
qemu_free(snaps);
|
|
970 | 636 |
} |
971 |
if (r != len) { |
|
972 |
break; |
|
973 |
} |
|
637 |
} while (snap_count == -ERANGE); |
|
974 | 638 |
|
975 |
/* if we're here, we probably raced with some snaps creation */ |
|
976 |
len *= 2; |
|
977 |
} |
|
978 |
buf = outbuf; |
|
979 |
end = buf + len; |
|
980 |
|
|
981 |
if ((r = decode64(&buf, end, &snap_seq)) < 0) { |
|
982 |
goto done_err; |
|
983 |
} |
|
984 |
if ((r = decode32(&buf, end, &snap_count)) < 0) { |
|
985 |
goto done_err; |
|
639 |
if (snap_count <= 0) { |
|
640 |
return snap_count; |
|
986 | 641 |
} |
987 | 642 |
|
988 | 643 |
sn_tab = qemu_mallocz(snap_count * sizeof(QEMUSnapshotInfo)); |
989 |
for (i = 0; i < snap_count; i++) { |
|
990 |
uint64_t id, image_size; |
|
991 |
char *snap_name; |
|
992 | 644 |
|
993 |
if ((r = decode64(&buf, end, &id)) < 0) { |
|
994 |
goto done_err; |
|
995 |
} |
|
996 |
if ((r = decode64(&buf, end, &image_size)) < 0) { |
|
997 |
goto done_err; |
|
998 |
} |
|
999 |
if ((r = decode_str(&buf, end, &snap_name)) < 0) { |
|
1000 |
goto done_err; |
|
1001 |
} |
|
645 |
for (i = 0; i < snap_count; i++) { |
|
646 |
const char *snap_name = snaps[i].name; |
|
1002 | 647 |
|
1003 | 648 |
sn_info = sn_tab + i; |
1004 | 649 |
pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), snap_name); |
1005 | 650 |
pstrcpy(sn_info->name, sizeof(sn_info->name), snap_name); |
1006 |
qemu_free(snap_name); |
|
1007 | 651 |
|
1008 |
sn_info->vm_state_size = image_size;
|
|
652 |
sn_info->vm_state_size = snaps[i].size;
|
|
1009 | 653 |
sn_info->date_sec = 0; |
1010 | 654 |
sn_info->date_nsec = 0; |
1011 | 655 |
sn_info->vm_clock_nsec = 0; |
1012 | 656 |
} |
657 |
rbd_snap_list_end(snaps); |
|
658 |
|
|
1013 | 659 |
*psn_tab = sn_tab; |
1014 |
qemu_free(outbuf); |
|
1015 | 660 |
return snap_count; |
1016 |
done_err: |
|
1017 |
qemu_free(sn_tab); |
|
1018 |
qemu_free(outbuf); |
|
1019 |
return r; |
|
1020 | 661 |
} |
1021 | 662 |
|
1022 |
static QEMUOptionParameter rbd_create_options[] = { |
|
663 |
static QEMUOptionParameter qemu_rbd_create_options[] = {
|
|
1023 | 664 |
{ |
1024 | 665 |
.name = BLOCK_OPT_SIZE, |
1025 | 666 |
.type = OPT_SIZE, |
... | ... | |
1036 | 677 |
static BlockDriver bdrv_rbd = { |
1037 | 678 |
.format_name = "rbd", |
1038 | 679 |
.instance_size = sizeof(BDRVRBDState), |
1039 |
.bdrv_file_open = rbd_open, |
|
1040 |
.bdrv_close = rbd_close, |
|
1041 |
.bdrv_create = rbd_create, |
|
1042 |
.bdrv_get_info = rbd_getinfo, |
|
1043 |
.create_options = rbd_create_options, |
|
1044 |
.bdrv_getlength = rbd_getlength, |
|
680 |
.bdrv_file_open = qemu_rbd_open,
|
|
681 |
.bdrv_close = qemu_rbd_close,
|
|
682 |
.bdrv_create = qemu_rbd_create,
|
|
683 |
.bdrv_get_info = qemu_rbd_getinfo,
|
|
684 |
.create_options = qemu_rbd_create_options,
|
|
685 |
.bdrv_getlength = qemu_rbd_getlength,
|
|
1045 | 686 |
.protocol_name = "rbd", |
1046 | 687 |
|
1047 |
.bdrv_aio_readv = rbd_aio_readv, |
|
1048 |
.bdrv_aio_writev = rbd_aio_writev, |
|
688 |
.bdrv_aio_readv = qemu_rbd_aio_readv,
|
|
689 |
.bdrv_aio_writev = qemu_rbd_aio_writev,
|
|
1049 | 690 |
|
1050 |
.bdrv_snapshot_create = rbd_snap_create, |
|
1051 |
.bdrv_snapshot_list = rbd_snap_list, |
|
691 |
.bdrv_snapshot_create = qemu_rbd_snap_create,
|
|
692 |
.bdrv_snapshot_list = qemu_rbd_snap_list,
|
|
1052 | 693 |
}; |
1053 | 694 |
|
1054 | 695 |
static void bdrv_rbd_init(void) |
Also available in: Unified diff