root / block / nbd.c @ d9b09f13
History | View | Annotate | Download (10.7 kB)
1 |
/*
|
---|---|
2 |
* QEMU Block driver for NBD
|
3 |
*
|
4 |
* Copyright (C) 2008 Bull S.A.S.
|
5 |
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
|
6 |
*
|
7 |
* Some parts:
|
8 |
* Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
|
9 |
*
|
10 |
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
11 |
* of this software and associated documentation files (the "Software"), to deal
|
12 |
* in the Software without restriction, including without limitation the rights
|
13 |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
14 |
* copies of the Software, and to permit persons to whom the Software is
|
15 |
* furnished to do so, subject to the following conditions:
|
16 |
*
|
17 |
* The above copyright notice and this permission notice shall be included in
|
18 |
* all copies or substantial portions of the Software.
|
19 |
*
|
20 |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
21 |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
22 |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
23 |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
24 |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
25 |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
26 |
* THE SOFTWARE.
|
27 |
*/
|
28 |
|
29 |
#include "qemu-common.h" |
30 |
#include "nbd.h" |
31 |
#include "block_int.h" |
32 |
#include "module.h" |
33 |
#include "qemu_socket.h" |
34 |
|
35 |
#include <sys/types.h> |
36 |
#include <unistd.h> |
37 |
|
38 |
#define EN_OPTSTR ":exportname=" |
39 |
|
40 |
/* #define DEBUG_NBD */
|
41 |
|
42 |
#if defined(DEBUG_NBD)
|
43 |
#define logout(fmt, ...) \
|
44 |
fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__) |
45 |
#else
|
46 |
#define logout(fmt, ...) ((void)0) |
47 |
#endif
|
48 |
|
49 |
typedef struct BDRVNBDState { |
50 |
int sock;
|
51 |
uint32_t nbdflags; |
52 |
off_t size; |
53 |
size_t blocksize; |
54 |
char *export_name; /* An NBD server may export several devices */ |
55 |
|
56 |
CoMutex mutex; |
57 |
Coroutine *coroutine; |
58 |
|
59 |
struct nbd_reply reply;
|
60 |
|
61 |
/* If it begins with '/', this is a UNIX domain socket. Otherwise,
|
62 |
* it's a string of the form <hostname|ip4|\[ip6\]>:port
|
63 |
*/
|
64 |
char *host_spec;
|
65 |
} BDRVNBDState; |
66 |
|
67 |
static int nbd_config(BDRVNBDState *s, const char *filename, int flags) |
68 |
{ |
69 |
char *file;
|
70 |
char *export_name;
|
71 |
const char *host_spec; |
72 |
const char *unixpath; |
73 |
int err = -EINVAL;
|
74 |
|
75 |
file = g_strdup(filename); |
76 |
|
77 |
export_name = strstr(file, EN_OPTSTR); |
78 |
if (export_name) {
|
79 |
if (export_name[strlen(EN_OPTSTR)] == 0) { |
80 |
goto out;
|
81 |
} |
82 |
export_name[0] = 0; /* truncate 'file' */ |
83 |
export_name += strlen(EN_OPTSTR); |
84 |
s->export_name = g_strdup(export_name); |
85 |
} |
86 |
|
87 |
/* extract the host_spec - fail if it's not nbd:... */
|
88 |
if (!strstart(file, "nbd:", &host_spec)) { |
89 |
goto out;
|
90 |
} |
91 |
|
92 |
/* are we a UNIX or TCP socket? */
|
93 |
if (strstart(host_spec, "unix:", &unixpath)) { |
94 |
if (unixpath[0] != '/') { /* We demand an absolute path*/ |
95 |
goto out;
|
96 |
} |
97 |
s->host_spec = g_strdup(unixpath); |
98 |
} else {
|
99 |
s->host_spec = g_strdup(host_spec); |
100 |
} |
101 |
|
102 |
err = 0;
|
103 |
|
104 |
out:
|
105 |
g_free(file); |
106 |
if (err != 0) { |
107 |
g_free(s->export_name); |
108 |
g_free(s->host_spec); |
109 |
} |
110 |
return err;
|
111 |
} |
112 |
|
113 |
static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request) |
114 |
{ |
115 |
qemu_co_mutex_lock(&s->mutex); |
116 |
s->coroutine = qemu_coroutine_self(); |
117 |
request->handle = (uint64_t)(intptr_t)s; |
118 |
} |
119 |
|
120 |
static int nbd_have_request(void *opaque) |
121 |
{ |
122 |
BDRVNBDState *s = opaque; |
123 |
|
124 |
return !!s->coroutine;
|
125 |
} |
126 |
|
127 |
static void nbd_reply_ready(void *opaque) |
128 |
{ |
129 |
BDRVNBDState *s = opaque; |
130 |
|
131 |
if (s->reply.handle == 0) { |
132 |
/* No reply already in flight. Fetch a header. */
|
133 |
if (nbd_receive_reply(s->sock, &s->reply) < 0) { |
134 |
s->reply.handle = 0;
|
135 |
} |
136 |
} |
137 |
|
138 |
/* There's no need for a mutex on the receive side, because the
|
139 |
* handler acts as a synchronization point and ensures that only
|
140 |
* one coroutine is called until the reply finishes. */
|
141 |
if (s->coroutine) {
|
142 |
qemu_coroutine_enter(s->coroutine, NULL);
|
143 |
} |
144 |
} |
145 |
|
146 |
static void nbd_restart_write(void *opaque) |
147 |
{ |
148 |
BDRVNBDState *s = opaque; |
149 |
qemu_coroutine_enter(s->coroutine, NULL);
|
150 |
} |
151 |
|
152 |
static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, |
153 |
struct iovec *iov, int offset) |
154 |
{ |
155 |
int rc, ret;
|
156 |
|
157 |
qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, |
158 |
nbd_have_request, NULL, s);
|
159 |
rc = nbd_send_request(s->sock, request); |
160 |
if (rc != -1 && iov) { |
161 |
ret = qemu_co_sendv(s->sock, iov, request->len, offset); |
162 |
if (ret != request->len) {
|
163 |
errno = -EIO; |
164 |
rc = -1;
|
165 |
} |
166 |
} |
167 |
qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
|
168 |
nbd_have_request, NULL, s);
|
169 |
return rc;
|
170 |
} |
171 |
|
172 |
static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request, |
173 |
struct nbd_reply *reply,
|
174 |
struct iovec *iov, int offset) |
175 |
{ |
176 |
int ret;
|
177 |
|
178 |
/* Wait until we're woken up by the read handler. */
|
179 |
qemu_coroutine_yield(); |
180 |
*reply = s->reply; |
181 |
if (reply->handle != request->handle) {
|
182 |
reply->error = EIO; |
183 |
} else {
|
184 |
if (iov && reply->error == 0) { |
185 |
ret = qemu_co_recvv(s->sock, iov, request->len, offset); |
186 |
if (ret != request->len) {
|
187 |
reply->error = EIO; |
188 |
} |
189 |
} |
190 |
|
191 |
/* Tell the read handler to read another header. */
|
192 |
s->reply.handle = 0;
|
193 |
} |
194 |
} |
195 |
|
196 |
static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request) |
197 |
{ |
198 |
s->coroutine = NULL;
|
199 |
qemu_co_mutex_unlock(&s->mutex); |
200 |
} |
201 |
|
202 |
static int nbd_establish_connection(BlockDriverState *bs) |
203 |
{ |
204 |
BDRVNBDState *s = bs->opaque; |
205 |
int sock;
|
206 |
int ret;
|
207 |
off_t size; |
208 |
size_t blocksize; |
209 |
|
210 |
if (s->host_spec[0] == '/') { |
211 |
sock = unix_socket_outgoing(s->host_spec); |
212 |
} else {
|
213 |
sock = tcp_socket_outgoing_spec(s->host_spec); |
214 |
} |
215 |
|
216 |
/* Failed to establish connection */
|
217 |
if (sock == -1) { |
218 |
logout("Failed to establish connection to NBD server\n");
|
219 |
return -errno;
|
220 |
} |
221 |
|
222 |
/* NBD handshake */
|
223 |
ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size, |
224 |
&blocksize); |
225 |
if (ret == -1) { |
226 |
logout("Failed to negotiate with the NBD server\n");
|
227 |
closesocket(sock); |
228 |
return -errno;
|
229 |
} |
230 |
|
231 |
/* Now that we're connected, set the socket to be non-blocking and
|
232 |
* kick the reply mechanism. */
|
233 |
socket_set_nonblock(sock); |
234 |
qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
|
235 |
nbd_have_request, NULL, s);
|
236 |
|
237 |
s->sock = sock; |
238 |
s->size = size; |
239 |
s->blocksize = blocksize; |
240 |
|
241 |
logout("Established connection with NBD server\n");
|
242 |
return 0; |
243 |
} |
244 |
|
245 |
static void nbd_teardown_connection(BlockDriverState *bs) |
246 |
{ |
247 |
BDRVNBDState *s = bs->opaque; |
248 |
struct nbd_request request;
|
249 |
|
250 |
request.type = NBD_CMD_DISC; |
251 |
request.from = 0;
|
252 |
request.len = 0;
|
253 |
nbd_send_request(s->sock, &request); |
254 |
|
255 |
qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL, NULL); |
256 |
closesocket(s->sock); |
257 |
} |
258 |
|
259 |
static int nbd_open(BlockDriverState *bs, const char* filename, int flags) |
260 |
{ |
261 |
BDRVNBDState *s = bs->opaque; |
262 |
int result;
|
263 |
|
264 |
qemu_co_mutex_init(&s->mutex); |
265 |
|
266 |
/* Pop the config into our state object. Exit if invalid. */
|
267 |
result = nbd_config(s, filename, flags); |
268 |
if (result != 0) { |
269 |
return result;
|
270 |
} |
271 |
|
272 |
/* establish TCP connection, return error if it fails
|
273 |
* TODO: Configurable retry-until-timeout behaviour.
|
274 |
*/
|
275 |
result = nbd_establish_connection(bs); |
276 |
|
277 |
return result;
|
278 |
} |
279 |
|
280 |
static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, |
281 |
int nb_sectors, QEMUIOVector *qiov,
|
282 |
int offset)
|
283 |
{ |
284 |
BDRVNBDState *s = bs->opaque; |
285 |
struct nbd_request request;
|
286 |
struct nbd_reply reply;
|
287 |
|
288 |
request.type = NBD_CMD_READ; |
289 |
request.from = sector_num * 512;
|
290 |
request.len = nb_sectors * 512;
|
291 |
|
292 |
nbd_coroutine_start(s, &request); |
293 |
if (nbd_co_send_request(s, &request, NULL, 0) == -1) { |
294 |
reply.error = errno; |
295 |
} else {
|
296 |
nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset); |
297 |
} |
298 |
nbd_coroutine_end(s, &request); |
299 |
return -reply.error;
|
300 |
|
301 |
} |
302 |
|
303 |
static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, |
304 |
int nb_sectors, QEMUIOVector *qiov,
|
305 |
int offset)
|
306 |
{ |
307 |
BDRVNBDState *s = bs->opaque; |
308 |
struct nbd_request request;
|
309 |
struct nbd_reply reply;
|
310 |
|
311 |
request.type = NBD_CMD_WRITE; |
312 |
request.from = sector_num * 512;
|
313 |
request.len = nb_sectors * 512;
|
314 |
|
315 |
nbd_coroutine_start(s, &request); |
316 |
if (nbd_co_send_request(s, &request, qiov->iov, offset) == -1) { |
317 |
reply.error = errno; |
318 |
} else {
|
319 |
nbd_co_receive_reply(s, &request, &reply, NULL, 0); |
320 |
} |
321 |
nbd_coroutine_end(s, &request); |
322 |
return -reply.error;
|
323 |
} |
324 |
|
325 |
/* qemu-nbd has a limit of slightly less than 1M per request. Try to
|
326 |
* remain aligned to 4K. */
|
327 |
#define NBD_MAX_SECTORS 2040 |
328 |
|
329 |
static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num, |
330 |
int nb_sectors, QEMUIOVector *qiov)
|
331 |
{ |
332 |
int offset = 0; |
333 |
int ret;
|
334 |
while (nb_sectors > NBD_MAX_SECTORS) {
|
335 |
ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); |
336 |
if (ret < 0) { |
337 |
return ret;
|
338 |
} |
339 |
offset += NBD_MAX_SECTORS * 512;
|
340 |
sector_num += NBD_MAX_SECTORS; |
341 |
nb_sectors -= NBD_MAX_SECTORS; |
342 |
} |
343 |
return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
|
344 |
} |
345 |
|
346 |
static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num, |
347 |
int nb_sectors, QEMUIOVector *qiov)
|
348 |
{ |
349 |
int offset = 0; |
350 |
int ret;
|
351 |
while (nb_sectors > NBD_MAX_SECTORS) {
|
352 |
ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); |
353 |
if (ret < 0) { |
354 |
return ret;
|
355 |
} |
356 |
offset += NBD_MAX_SECTORS * 512;
|
357 |
sector_num += NBD_MAX_SECTORS; |
358 |
nb_sectors -= NBD_MAX_SECTORS; |
359 |
} |
360 |
return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
|
361 |
} |
362 |
|
363 |
static void nbd_close(BlockDriverState *bs) |
364 |
{ |
365 |
BDRVNBDState *s = bs->opaque; |
366 |
g_free(s->export_name); |
367 |
g_free(s->host_spec); |
368 |
|
369 |
nbd_teardown_connection(bs); |
370 |
} |
371 |
|
372 |
static int64_t nbd_getlength(BlockDriverState *bs)
|
373 |
{ |
374 |
BDRVNBDState *s = bs->opaque; |
375 |
|
376 |
return s->size;
|
377 |
} |
378 |
|
379 |
static BlockDriver bdrv_nbd = {
|
380 |
.format_name = "nbd",
|
381 |
.instance_size = sizeof(BDRVNBDState),
|
382 |
.bdrv_file_open = nbd_open, |
383 |
.bdrv_co_readv = nbd_co_readv, |
384 |
.bdrv_co_writev = nbd_co_writev, |
385 |
.bdrv_close = nbd_close, |
386 |
.bdrv_getlength = nbd_getlength, |
387 |
.protocol_name = "nbd",
|
388 |
}; |
389 |
|
390 |
static void bdrv_nbd_init(void) |
391 |
{ |
392 |
bdrv_register(&bdrv_nbd); |
393 |
} |
394 |
|
395 |
block_init(bdrv_nbd_init); |