Statistics
| Branch: | Revision:

root / nbd.c @ 60b46aa2

History | View | Annotate | Download (24.4 kB)

1 75818250 ths
/*
2 7a5ca864 bellard
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
3 7a5ca864 bellard
 *
4 7a5ca864 bellard
 *  Network Block Device
5 7a5ca864 bellard
 *
6 7a5ca864 bellard
 *  This program is free software; you can redistribute it and/or modify
7 7a5ca864 bellard
 *  it under the terms of the GNU General Public License as published by
8 7a5ca864 bellard
 *  the Free Software Foundation; under version 2 of the License.
9 7a5ca864 bellard
 *
10 7a5ca864 bellard
 *  This program is distributed in the hope that it will be useful,
11 7a5ca864 bellard
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 7a5ca864 bellard
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 7a5ca864 bellard
 *  GNU General Public License for more details.
14 7a5ca864 bellard
 *
15 7a5ca864 bellard
 *  You should have received a copy of the GNU General Public License
16 8167ee88 Blue Swirl
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
17 75818250 ths
 */
18 7a5ca864 bellard
19 7a5ca864 bellard
#include "nbd.h"
20 ab359cd1 Markus Armbruster
#include "block.h"
21 7a5ca864 bellard
22 262db388 Paolo Bonzini
#include "qemu-coroutine.h"
23 262db388 Paolo Bonzini
24 7a5ca864 bellard
#include <errno.h>
25 7a5ca864 bellard
#include <string.h>
26 03ff3ca3 aliguori
#ifndef _WIN32
27 7a5ca864 bellard
#include <sys/ioctl.h>
28 03ff3ca3 aliguori
#endif
29 5dc2eec9 Andreas Färber
#if defined(__sun__) || defined(__HAIKU__)
30 7e00eb9b aliguori
#include <sys/ioccom.h>
31 7e00eb9b aliguori
#endif
32 7a5ca864 bellard
#include <ctype.h>
33 7a5ca864 bellard
#include <inttypes.h>
34 75818250 ths
35 b90fb4b8 Paolo Bonzini
#ifdef __linux__
36 b90fb4b8 Paolo Bonzini
#include <linux/fs.h>
37 b90fb4b8 Paolo Bonzini
#endif
38 b90fb4b8 Paolo Bonzini
39 03ff3ca3 aliguori
#include "qemu_socket.h"
40 d9a73806 Paolo Bonzini
#include "qemu-queue.h"
41 03ff3ca3 aliguori
42 03ff3ca3 aliguori
//#define DEBUG_NBD
43 03ff3ca3 aliguori
44 03ff3ca3 aliguori
#ifdef DEBUG_NBD
45 75818250 ths
#define TRACE(msg, ...) do { \
46 03ff3ca3 aliguori
    LOG(msg, ## __VA_ARGS__); \
47 75818250 ths
} while(0)
48 03ff3ca3 aliguori
#else
49 03ff3ca3 aliguori
#define TRACE(msg, ...) \
50 03ff3ca3 aliguori
    do { } while (0)
51 03ff3ca3 aliguori
#endif
52 7a5ca864 bellard
53 7a5ca864 bellard
#define LOG(msg, ...) do { \
54 7a5ca864 bellard
    fprintf(stderr, "%s:%s():L%d: " msg "\n", \
55 7a5ca864 bellard
            __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
56 7a5ca864 bellard
} while(0)
57 7a5ca864 bellard
58 7a5ca864 bellard
/* This is all part of the "official" NBD API */
59 7a5ca864 bellard
60 b2e3d87f Nick Thomas
#define NBD_REPLY_SIZE          (4 + 4 + 8)
61 7a5ca864 bellard
#define NBD_REQUEST_MAGIC       0x25609513
62 7a5ca864 bellard
#define NBD_REPLY_MAGIC         0x67446698
63 7a5ca864 bellard
64 7a5ca864 bellard
#define NBD_SET_SOCK            _IO(0xab, 0)
65 7a5ca864 bellard
#define NBD_SET_BLKSIZE         _IO(0xab, 1)
66 7a5ca864 bellard
#define NBD_SET_SIZE            _IO(0xab, 2)
67 7a5ca864 bellard
#define NBD_DO_IT               _IO(0xab, 3)
68 7a5ca864 bellard
#define NBD_CLEAR_SOCK          _IO(0xab, 4)
69 7a5ca864 bellard
#define NBD_CLEAR_QUE           _IO(0xab, 5)
70 b2e3d87f Nick Thomas
#define NBD_PRINT_DEBUG         _IO(0xab, 6)
71 b2e3d87f Nick Thomas
#define NBD_SET_SIZE_BLOCKS     _IO(0xab, 7)
72 7a5ca864 bellard
#define NBD_DISCONNECT          _IO(0xab, 8)
73 bbb74edd Paolo Bonzini
#define NBD_SET_TIMEOUT         _IO(0xab, 9)
74 bbb74edd Paolo Bonzini
#define NBD_SET_FLAGS           _IO(0xab, 10)
75 7a5ca864 bellard
76 b2e3d87f Nick Thomas
#define NBD_OPT_EXPORT_NAME     (1 << 0)
77 1d45f8b5 Laurent Vivier
78 7a5ca864 bellard
/* That's all folks */
79 7a5ca864 bellard
80 185b4338 Paolo Bonzini
ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
81 7a5ca864 bellard
{
82 7a5ca864 bellard
    size_t offset = 0;
83 185b4338 Paolo Bonzini
    int err;
84 7a5ca864 bellard
85 ae255e52 Paolo Bonzini
    if (qemu_in_coroutine()) {
86 ae255e52 Paolo Bonzini
        if (do_read) {
87 ae255e52 Paolo Bonzini
            return qemu_co_recv(fd, buffer, size);
88 ae255e52 Paolo Bonzini
        } else {
89 ae255e52 Paolo Bonzini
            return qemu_co_send(fd, buffer, size);
90 ae255e52 Paolo Bonzini
        }
91 ae255e52 Paolo Bonzini
    }
92 ae255e52 Paolo Bonzini
93 7a5ca864 bellard
    while (offset < size) {
94 7a5ca864 bellard
        ssize_t len;
95 7a5ca864 bellard
96 7a5ca864 bellard
        if (do_read) {
97 00aa0040 Blue Swirl
            len = qemu_recv(fd, buffer + offset, size - offset, 0);
98 7a5ca864 bellard
        } else {
99 03ff3ca3 aliguori
            len = send(fd, buffer + offset, size - offset, 0);
100 7a5ca864 bellard
        }
101 7a5ca864 bellard
102 fc19f8a0 Paolo Bonzini
        if (len < 0) {
103 185b4338 Paolo Bonzini
            err = socket_error();
104 03ff3ca3 aliguori
105 fc19f8a0 Paolo Bonzini
            /* recoverable error */
106 7fe7b68b Paolo Bonzini
            if (err == EINTR || (offset > 0 && err == EAGAIN)) {
107 fc19f8a0 Paolo Bonzini
                continue;
108 fc19f8a0 Paolo Bonzini
            }
109 fc19f8a0 Paolo Bonzini
110 fc19f8a0 Paolo Bonzini
            /* unrecoverable error */
111 185b4338 Paolo Bonzini
            return -err;
112 7a5ca864 bellard
        }
113 7a5ca864 bellard
114 7a5ca864 bellard
        /* eof */
115 7a5ca864 bellard
        if (len == 0) {
116 7a5ca864 bellard
            break;
117 7a5ca864 bellard
        }
118 7a5ca864 bellard
119 7a5ca864 bellard
        offset += len;
120 7a5ca864 bellard
    }
121 7a5ca864 bellard
122 7a5ca864 bellard
    return offset;
123 7a5ca864 bellard
}
124 7a5ca864 bellard
125 7fe7b68b Paolo Bonzini
static ssize_t read_sync(int fd, void *buffer, size_t size)
126 7fe7b68b Paolo Bonzini
{
127 7fe7b68b Paolo Bonzini
    /* Sockets are kept in blocking mode in the negotiation phase.  After
128 7fe7b68b Paolo Bonzini
     * that, a non-readable socket simply means that another thread stole
129 7fe7b68b Paolo Bonzini
     * our request/reply.  Synchronization is done with recv_coroutine, so
130 7fe7b68b Paolo Bonzini
     * that this is coroutine-safe.
131 7fe7b68b Paolo Bonzini
     */
132 7fe7b68b Paolo Bonzini
    return nbd_wr_sync(fd, buffer, size, true);
133 7fe7b68b Paolo Bonzini
}
134 7fe7b68b Paolo Bonzini
135 7fe7b68b Paolo Bonzini
static ssize_t write_sync(int fd, void *buffer, size_t size)
136 7fe7b68b Paolo Bonzini
{
137 7fe7b68b Paolo Bonzini
    int ret;
138 7fe7b68b Paolo Bonzini
    do {
139 7fe7b68b Paolo Bonzini
        /* For writes, we do expect the socket to be writable.  */
140 7fe7b68b Paolo Bonzini
        ret = nbd_wr_sync(fd, buffer, size, false);
141 7fe7b68b Paolo Bonzini
    } while (ret == -EAGAIN);
142 7fe7b68b Paolo Bonzini
    return ret;
143 7fe7b68b Paolo Bonzini
}
144 7fe7b68b Paolo Bonzini
145 c12504ce Nick Thomas
static void combine_addr(char *buf, size_t len, const char* address,
146 c12504ce Nick Thomas
                         uint16_t port)
147 7a5ca864 bellard
{
148 c12504ce Nick Thomas
    /* If the address-part contains a colon, it's an IPv6 IP so needs [] */
149 c12504ce Nick Thomas
    if (strstr(address, ":")) {
150 c12504ce Nick Thomas
        snprintf(buf, len, "[%s]:%u", address, port);
151 c12504ce Nick Thomas
    } else {
152 c12504ce Nick Thomas
        snprintf(buf, len, "%s:%u", address, port);
153 7a5ca864 bellard
    }
154 7a5ca864 bellard
}
155 7a5ca864 bellard
156 c12504ce Nick Thomas
int tcp_socket_outgoing(const char *address, uint16_t port)
157 7a5ca864 bellard
{
158 c12504ce Nick Thomas
    char address_and_port[128];
159 c12504ce Nick Thomas
    combine_addr(address_and_port, 128, address, port);
160 c12504ce Nick Thomas
    return tcp_socket_outgoing_spec(address_and_port);
161 7a5ca864 bellard
}
162 7a5ca864 bellard
163 c12504ce Nick Thomas
int tcp_socket_outgoing_spec(const char *address_and_port)
164 cd831bd7 ths
{
165 a6ba35b3 Amos Kong
    return inet_connect(address_and_port, true, NULL);
166 cd831bd7 ths
}
167 cd831bd7 ths
168 c12504ce Nick Thomas
int tcp_socket_incoming(const char *address, uint16_t port)
169 cd831bd7 ths
{
170 c12504ce Nick Thomas
    char address_and_port[128];
171 c12504ce Nick Thomas
    combine_addr(address_and_port, 128, address, port);
172 c12504ce Nick Thomas
    return tcp_socket_incoming_spec(address_and_port);
173 c12504ce Nick Thomas
}
174 cd831bd7 ths
175 c12504ce Nick Thomas
int tcp_socket_incoming_spec(const char *address_and_port)
176 c12504ce Nick Thomas
{
177 c12504ce Nick Thomas
    char *ostr  = NULL;
178 c12504ce Nick Thomas
    int olen = 0;
179 029409e5 Amos Kong
    return inet_listen(address_and_port, ostr, olen, SOCK_STREAM, 0, NULL);
180 03ff3ca3 aliguori
}
181 c12504ce Nick Thomas
182 03ff3ca3 aliguori
int unix_socket_incoming(const char *path)
183 03ff3ca3 aliguori
{
184 c12504ce Nick Thomas
    char *ostr = NULL;
185 c12504ce Nick Thomas
    int olen = 0;
186 c12504ce Nick Thomas
187 c12504ce Nick Thomas
    return unix_listen(path, ostr, olen);
188 cd831bd7 ths
}
189 cd831bd7 ths
190 03ff3ca3 aliguori
int unix_socket_outgoing(const char *path)
191 03ff3ca3 aliguori
{
192 c12504ce Nick Thomas
    return unix_connect(path);
193 03ff3ca3 aliguori
}
194 cd831bd7 ths
195 7a5ca864 bellard
/* Basic flow
196 7a5ca864 bellard

197 7a5ca864 bellard
   Server         Client
198 7a5ca864 bellard

199 7a5ca864 bellard
   Negotiate
200 7a5ca864 bellard
                  Request
201 7a5ca864 bellard
   Response
202 7a5ca864 bellard
                  Request
203 7a5ca864 bellard
   Response
204 7a5ca864 bellard
                  ...
205 7a5ca864 bellard
   ...
206 7a5ca864 bellard
                  Request (type == 2)
207 7a5ca864 bellard
*/
208 7a5ca864 bellard
209 af49bbbe Paolo Bonzini
static int nbd_send_negotiate(int csock, off_t size, uint32_t flags)
210 7a5ca864 bellard
{
211 b2e3d87f Nick Thomas
    char buf[8 + 8 + 8 + 128];
212 185b4338 Paolo Bonzini
    int rc;
213 b2e3d87f Nick Thomas
214 b2e3d87f Nick Thomas
    /* Negotiate
215 b2e3d87f Nick Thomas
        [ 0 ..   7]   passwd   ("NBDMAGIC")
216 b2e3d87f Nick Thomas
        [ 8 ..  15]   magic    (0x00420281861253)
217 b2e3d87f Nick Thomas
        [16 ..  23]   size
218 b90fb4b8 Paolo Bonzini
        [24 ..  27]   flags
219 b90fb4b8 Paolo Bonzini
        [28 .. 151]   reserved (0)
220 b2e3d87f Nick Thomas
     */
221 b2e3d87f Nick Thomas
222 7fe7b68b Paolo Bonzini
    socket_set_block(csock);
223 185b4338 Paolo Bonzini
    rc = -EINVAL;
224 185b4338 Paolo Bonzini
225 b2e3d87f Nick Thomas
    TRACE("Beginning negotiation.");
226 b2e3d87f Nick Thomas
    memcpy(buf, "NBDMAGIC", 8);
227 b2e3d87f Nick Thomas
    cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
228 b2e3d87f Nick Thomas
    cpu_to_be64w((uint64_t*)(buf + 16), size);
229 2c7989a9 Paolo Bonzini
    cpu_to_be32w((uint32_t*)(buf + 24),
230 7a706633 Paolo Bonzini
                 flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
231 7a706633 Paolo Bonzini
                 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
232 b90fb4b8 Paolo Bonzini
    memset(buf + 28, 0, 124);
233 b2e3d87f Nick Thomas
234 b2e3d87f Nick Thomas
    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
235 b2e3d87f Nick Thomas
        LOG("write failed");
236 185b4338 Paolo Bonzini
        goto fail;
237 b2e3d87f Nick Thomas
    }
238 b2e3d87f Nick Thomas
239 07f35073 Dong Xu Wang
    TRACE("Negotiation succeeded.");
240 185b4338 Paolo Bonzini
    rc = 0;
241 185b4338 Paolo Bonzini
fail:
242 7fe7b68b Paolo Bonzini
    socket_set_nonblock(csock);
243 185b4338 Paolo Bonzini
    return rc;
244 7a5ca864 bellard
}
245 7a5ca864 bellard
246 1d45f8b5 Laurent Vivier
int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
247 1d45f8b5 Laurent Vivier
                          off_t *size, size_t *blocksize)
248 7a5ca864 bellard
{
249 b2e3d87f Nick Thomas
    char buf[256];
250 b2e3d87f Nick Thomas
    uint64_t magic, s;
251 b2e3d87f Nick Thomas
    uint16_t tmp;
252 185b4338 Paolo Bonzini
    int rc;
253 b2e3d87f Nick Thomas
254 07f35073 Dong Xu Wang
    TRACE("Receiving negotiation.");
255 b2e3d87f Nick Thomas
256 7fe7b68b Paolo Bonzini
    socket_set_block(csock);
257 185b4338 Paolo Bonzini
    rc = -EINVAL;
258 185b4338 Paolo Bonzini
259 b2e3d87f Nick Thomas
    if (read_sync(csock, buf, 8) != 8) {
260 b2e3d87f Nick Thomas
        LOG("read failed");
261 185b4338 Paolo Bonzini
        goto fail;
262 b2e3d87f Nick Thomas
    }
263 b2e3d87f Nick Thomas
264 b2e3d87f Nick Thomas
    buf[8] = '\0';
265 b2e3d87f Nick Thomas
    if (strlen(buf) == 0) {
266 b2e3d87f Nick Thomas
        LOG("server connection closed");
267 185b4338 Paolo Bonzini
        goto fail;
268 b2e3d87f Nick Thomas
    }
269 b2e3d87f Nick Thomas
270 b2e3d87f Nick Thomas
    TRACE("Magic is %c%c%c%c%c%c%c%c",
271 b2e3d87f Nick Thomas
          qemu_isprint(buf[0]) ? buf[0] : '.',
272 b2e3d87f Nick Thomas
          qemu_isprint(buf[1]) ? buf[1] : '.',
273 b2e3d87f Nick Thomas
          qemu_isprint(buf[2]) ? buf[2] : '.',
274 b2e3d87f Nick Thomas
          qemu_isprint(buf[3]) ? buf[3] : '.',
275 b2e3d87f Nick Thomas
          qemu_isprint(buf[4]) ? buf[4] : '.',
276 b2e3d87f Nick Thomas
          qemu_isprint(buf[5]) ? buf[5] : '.',
277 b2e3d87f Nick Thomas
          qemu_isprint(buf[6]) ? buf[6] : '.',
278 b2e3d87f Nick Thomas
          qemu_isprint(buf[7]) ? buf[7] : '.');
279 b2e3d87f Nick Thomas
280 b2e3d87f Nick Thomas
    if (memcmp(buf, "NBDMAGIC", 8) != 0) {
281 b2e3d87f Nick Thomas
        LOG("Invalid magic received");
282 185b4338 Paolo Bonzini
        goto fail;
283 b2e3d87f Nick Thomas
    }
284 b2e3d87f Nick Thomas
285 b2e3d87f Nick Thomas
    if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
286 b2e3d87f Nick Thomas
        LOG("read failed");
287 185b4338 Paolo Bonzini
        goto fail;
288 b2e3d87f Nick Thomas
    }
289 b2e3d87f Nick Thomas
    magic = be64_to_cpu(magic);
290 b2e3d87f Nick Thomas
    TRACE("Magic is 0x%" PRIx64, magic);
291 b2e3d87f Nick Thomas
292 b2e3d87f Nick Thomas
    if (name) {
293 b2e3d87f Nick Thomas
        uint32_t reserved = 0;
294 b2e3d87f Nick Thomas
        uint32_t opt;
295 b2e3d87f Nick Thomas
        uint32_t namesize;
296 b2e3d87f Nick Thomas
297 b2e3d87f Nick Thomas
        TRACE("Checking magic (opts_magic)");
298 b2e3d87f Nick Thomas
        if (magic != 0x49484156454F5054LL) {
299 b2e3d87f Nick Thomas
            LOG("Bad magic received");
300 185b4338 Paolo Bonzini
            goto fail;
301 b2e3d87f Nick Thomas
        }
302 b2e3d87f Nick Thomas
        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
303 b2e3d87f Nick Thomas
            LOG("flags read failed");
304 185b4338 Paolo Bonzini
            goto fail;
305 b2e3d87f Nick Thomas
        }
306 b2e3d87f Nick Thomas
        *flags = be16_to_cpu(tmp) << 16;
307 b2e3d87f Nick Thomas
        /* reserved for future use */
308 b2e3d87f Nick Thomas
        if (write_sync(csock, &reserved, sizeof(reserved)) !=
309 b2e3d87f Nick Thomas
            sizeof(reserved)) {
310 b2e3d87f Nick Thomas
            LOG("write failed (reserved)");
311 185b4338 Paolo Bonzini
            goto fail;
312 b2e3d87f Nick Thomas
        }
313 b2e3d87f Nick Thomas
        /* write the export name */
314 b2e3d87f Nick Thomas
        magic = cpu_to_be64(magic);
315 b2e3d87f Nick Thomas
        if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
316 b2e3d87f Nick Thomas
            LOG("write failed (magic)");
317 185b4338 Paolo Bonzini
            goto fail;
318 b2e3d87f Nick Thomas
        }
319 b2e3d87f Nick Thomas
        opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
320 b2e3d87f Nick Thomas
        if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
321 b2e3d87f Nick Thomas
            LOG("write failed (opt)");
322 185b4338 Paolo Bonzini
            goto fail;
323 b2e3d87f Nick Thomas
        }
324 b2e3d87f Nick Thomas
        namesize = cpu_to_be32(strlen(name));
325 b2e3d87f Nick Thomas
        if (write_sync(csock, &namesize, sizeof(namesize)) !=
326 b2e3d87f Nick Thomas
            sizeof(namesize)) {
327 b2e3d87f Nick Thomas
            LOG("write failed (namesize)");
328 185b4338 Paolo Bonzini
            goto fail;
329 b2e3d87f Nick Thomas
        }
330 b2e3d87f Nick Thomas
        if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) {
331 b2e3d87f Nick Thomas
            LOG("write failed (name)");
332 185b4338 Paolo Bonzini
            goto fail;
333 b2e3d87f Nick Thomas
        }
334 b2e3d87f Nick Thomas
    } else {
335 b2e3d87f Nick Thomas
        TRACE("Checking magic (cli_magic)");
336 b2e3d87f Nick Thomas
337 b2e3d87f Nick Thomas
        if (magic != 0x00420281861253LL) {
338 b2e3d87f Nick Thomas
            LOG("Bad magic received");
339 185b4338 Paolo Bonzini
            goto fail;
340 b2e3d87f Nick Thomas
        }
341 b2e3d87f Nick Thomas
    }
342 b2e3d87f Nick Thomas
343 b2e3d87f Nick Thomas
    if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) {
344 b2e3d87f Nick Thomas
        LOG("read failed");
345 185b4338 Paolo Bonzini
        goto fail;
346 b2e3d87f Nick Thomas
    }
347 b2e3d87f Nick Thomas
    *size = be64_to_cpu(s);
348 b2e3d87f Nick Thomas
    *blocksize = 1024;
349 b2e3d87f Nick Thomas
    TRACE("Size is %" PRIu64, *size);
350 b2e3d87f Nick Thomas
351 b2e3d87f Nick Thomas
    if (!name) {
352 b2e3d87f Nick Thomas
        if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) {
353 b2e3d87f Nick Thomas
            LOG("read failed (flags)");
354 185b4338 Paolo Bonzini
            goto fail;
355 b2e3d87f Nick Thomas
        }
356 b2e3d87f Nick Thomas
        *flags = be32_to_cpup(flags);
357 b2e3d87f Nick Thomas
    } else {
358 b2e3d87f Nick Thomas
        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
359 b2e3d87f Nick Thomas
            LOG("read failed (tmp)");
360 185b4338 Paolo Bonzini
            goto fail;
361 b2e3d87f Nick Thomas
        }
362 b2e3d87f Nick Thomas
        *flags |= be32_to_cpu(tmp);
363 b2e3d87f Nick Thomas
    }
364 b2e3d87f Nick Thomas
    if (read_sync(csock, &buf, 124) != 124) {
365 b2e3d87f Nick Thomas
        LOG("read failed (buf)");
366 185b4338 Paolo Bonzini
        goto fail;
367 b2e3d87f Nick Thomas
    }
368 185b4338 Paolo Bonzini
    rc = 0;
369 185b4338 Paolo Bonzini
370 185b4338 Paolo Bonzini
fail:
371 7fe7b68b Paolo Bonzini
    socket_set_nonblock(csock);
372 185b4338 Paolo Bonzini
    return rc;
373 cd831bd7 ths
}
374 7a5ca864 bellard
375 b90fb4b8 Paolo Bonzini
#ifdef __linux__
376 b90fb4b8 Paolo Bonzini
int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
377 cd831bd7 ths
{
378 3e05c785 Chunyan Liu
    TRACE("Setting NBD socket");
379 3e05c785 Chunyan Liu
380 fc19f8a0 Paolo Bonzini
    if (ioctl(fd, NBD_SET_SOCK, csock) < 0) {
381 3e05c785 Chunyan Liu
        int serrno = errno;
382 3e05c785 Chunyan Liu
        LOG("Failed to set NBD socket");
383 185b4338 Paolo Bonzini
        return -serrno;
384 3e05c785 Chunyan Liu
    }
385 3e05c785 Chunyan Liu
386 b2e3d87f Nick Thomas
    TRACE("Setting block size to %lu", (unsigned long)blocksize);
387 7a5ca864 bellard
388 fc19f8a0 Paolo Bonzini
    if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) < 0) {
389 b2e3d87f Nick Thomas
        int serrno = errno;
390 b2e3d87f Nick Thomas
        LOG("Failed setting NBD block size");
391 185b4338 Paolo Bonzini
        return -serrno;
392 b2e3d87f Nick Thomas
    }
393 7a5ca864 bellard
394 0bfcd599 Blue Swirl
        TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize));
395 7a5ca864 bellard
396 fc19f8a0 Paolo Bonzini
    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) < 0) {
397 b2e3d87f Nick Thomas
        int serrno = errno;
398 b2e3d87f Nick Thomas
        LOG("Failed setting size (in blocks)");
399 185b4338 Paolo Bonzini
        return -serrno;
400 b2e3d87f Nick Thomas
    }
401 7a5ca864 bellard
402 b90fb4b8 Paolo Bonzini
    if (flags & NBD_FLAG_READ_ONLY) {
403 b90fb4b8 Paolo Bonzini
        int read_only = 1;
404 b90fb4b8 Paolo Bonzini
        TRACE("Setting readonly attribute");
405 b90fb4b8 Paolo Bonzini
406 b90fb4b8 Paolo Bonzini
        if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
407 b90fb4b8 Paolo Bonzini
            int serrno = errno;
408 b90fb4b8 Paolo Bonzini
            LOG("Failed setting read-only attribute");
409 185b4338 Paolo Bonzini
            return -serrno;
410 b90fb4b8 Paolo Bonzini
        }
411 b90fb4b8 Paolo Bonzini
    }
412 b90fb4b8 Paolo Bonzini
413 973b3d0a Paolo Bonzini
    if (ioctl(fd, NBD_SET_FLAGS, flags) < 0
414 973b3d0a Paolo Bonzini
        && errno != ENOTTY) {
415 973b3d0a Paolo Bonzini
        int serrno = errno;
416 973b3d0a Paolo Bonzini
        LOG("Failed setting flags");
417 185b4338 Paolo Bonzini
        return -serrno;
418 973b3d0a Paolo Bonzini
    }
419 973b3d0a Paolo Bonzini
420 b2e3d87f Nick Thomas
    TRACE("Negotiation ended");
421 7a5ca864 bellard
422 b2e3d87f Nick Thomas
    return 0;
423 7a5ca864 bellard
}
424 7a5ca864 bellard
425 7a5ca864 bellard
int nbd_disconnect(int fd)
426 7a5ca864 bellard
{
427 b2e3d87f Nick Thomas
    ioctl(fd, NBD_CLEAR_QUE);
428 b2e3d87f Nick Thomas
    ioctl(fd, NBD_DISCONNECT);
429 b2e3d87f Nick Thomas
    ioctl(fd, NBD_CLEAR_SOCK);
430 b2e3d87f Nick Thomas
    return 0;
431 7a5ca864 bellard
}
432 7a5ca864 bellard
433 0a4eb864 Jes Sorensen
int nbd_client(int fd)
434 7a5ca864 bellard
{
435 b2e3d87f Nick Thomas
    int ret;
436 b2e3d87f Nick Thomas
    int serrno;
437 7a5ca864 bellard
438 b2e3d87f Nick Thomas
    TRACE("Doing NBD loop");
439 7a5ca864 bellard
440 b2e3d87f Nick Thomas
    ret = ioctl(fd, NBD_DO_IT);
441 fc19f8a0 Paolo Bonzini
    if (ret < 0 && errno == EPIPE) {
442 74624688 Paolo Bonzini
        /* NBD_DO_IT normally returns EPIPE when someone has disconnected
443 74624688 Paolo Bonzini
         * the socket via NBD_DISCONNECT.  We do not want to return 1 in
444 74624688 Paolo Bonzini
         * that case.
445 74624688 Paolo Bonzini
         */
446 74624688 Paolo Bonzini
        ret = 0;
447 74624688 Paolo Bonzini
    }
448 b2e3d87f Nick Thomas
    serrno = errno;
449 7a5ca864 bellard
450 b2e3d87f Nick Thomas
    TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
451 7a5ca864 bellard
452 b2e3d87f Nick Thomas
    TRACE("Clearing NBD queue");
453 b2e3d87f Nick Thomas
    ioctl(fd, NBD_CLEAR_QUE);
454 7a5ca864 bellard
455 b2e3d87f Nick Thomas
    TRACE("Clearing NBD socket");
456 b2e3d87f Nick Thomas
    ioctl(fd, NBD_CLEAR_SOCK);
457 7a5ca864 bellard
458 b2e3d87f Nick Thomas
    errno = serrno;
459 b2e3d87f Nick Thomas
    return ret;
460 7a5ca864 bellard
}
461 03ff3ca3 aliguori
#else
462 8e72506e Paolo Bonzini
int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
463 03ff3ca3 aliguori
{
464 185b4338 Paolo Bonzini
    return -ENOTSUP;
465 03ff3ca3 aliguori
}
466 03ff3ca3 aliguori
467 03ff3ca3 aliguori
int nbd_disconnect(int fd)
468 03ff3ca3 aliguori
{
469 185b4338 Paolo Bonzini
    return -ENOTSUP;
470 03ff3ca3 aliguori
}
471 03ff3ca3 aliguori
472 0a4eb864 Jes Sorensen
int nbd_client(int fd)
473 03ff3ca3 aliguori
{
474 185b4338 Paolo Bonzini
    return -ENOTSUP;
475 03ff3ca3 aliguori
}
476 03ff3ca3 aliguori
#endif
477 7a5ca864 bellard
478 94e7340b Paolo Bonzini
ssize_t nbd_send_request(int csock, struct nbd_request *request)
479 7a5ca864 bellard
{
480 b2e3d87f Nick Thomas
    uint8_t buf[4 + 4 + 8 + 8 + 4];
481 185b4338 Paolo Bonzini
    ssize_t ret;
482 b2e3d87f Nick Thomas
483 b2e3d87f Nick Thomas
    cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
484 b2e3d87f Nick Thomas
    cpu_to_be32w((uint32_t*)(buf + 4), request->type);
485 b2e3d87f Nick Thomas
    cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
486 b2e3d87f Nick Thomas
    cpu_to_be64w((uint64_t*)(buf + 16), request->from);
487 b2e3d87f Nick Thomas
    cpu_to_be32w((uint32_t*)(buf + 24), request->len);
488 75818250 ths
489 b2e3d87f Nick Thomas
    TRACE("Sending request to client: "
490 b2e3d87f Nick Thomas
          "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
491 b2e3d87f Nick Thomas
          request->from, request->len, request->handle, request->type);
492 b2e3d87f Nick Thomas
493 185b4338 Paolo Bonzini
    ret = write_sync(csock, buf, sizeof(buf));
494 185b4338 Paolo Bonzini
    if (ret < 0) {
495 185b4338 Paolo Bonzini
        return ret;
496 185b4338 Paolo Bonzini
    }
497 185b4338 Paolo Bonzini
498 185b4338 Paolo Bonzini
    if (ret != sizeof(buf)) {
499 b2e3d87f Nick Thomas
        LOG("writing to socket failed");
500 185b4338 Paolo Bonzini
        return -EINVAL;
501 b2e3d87f Nick Thomas
    }
502 b2e3d87f Nick Thomas
    return 0;
503 b2e3d87f Nick Thomas
}
504 75818250 ths
505 94e7340b Paolo Bonzini
static ssize_t nbd_receive_request(int csock, struct nbd_request *request)
506 75818250 ths
{
507 b2e3d87f Nick Thomas
    uint8_t buf[4 + 4 + 8 + 8 + 4];
508 b2e3d87f Nick Thomas
    uint32_t magic;
509 185b4338 Paolo Bonzini
    ssize_t ret;
510 b2e3d87f Nick Thomas
511 185b4338 Paolo Bonzini
    ret = read_sync(csock, buf, sizeof(buf));
512 185b4338 Paolo Bonzini
    if (ret < 0) {
513 185b4338 Paolo Bonzini
        return ret;
514 185b4338 Paolo Bonzini
    }
515 185b4338 Paolo Bonzini
516 185b4338 Paolo Bonzini
    if (ret != sizeof(buf)) {
517 b2e3d87f Nick Thomas
        LOG("read failed");
518 185b4338 Paolo Bonzini
        return -EINVAL;
519 b2e3d87f Nick Thomas
    }
520 b2e3d87f Nick Thomas
521 b2e3d87f Nick Thomas
    /* Request
522 b2e3d87f Nick Thomas
       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
523 b2e3d87f Nick Thomas
       [ 4 ..  7]   type    (0 == READ, 1 == WRITE)
524 b2e3d87f Nick Thomas
       [ 8 .. 15]   handle
525 b2e3d87f Nick Thomas
       [16 .. 23]   from
526 b2e3d87f Nick Thomas
       [24 .. 27]   len
527 b2e3d87f Nick Thomas
     */
528 b2e3d87f Nick Thomas
529 b2e3d87f Nick Thomas
    magic = be32_to_cpup((uint32_t*)buf);
530 b2e3d87f Nick Thomas
    request->type  = be32_to_cpup((uint32_t*)(buf + 4));
531 b2e3d87f Nick Thomas
    request->handle = be64_to_cpup((uint64_t*)(buf + 8));
532 b2e3d87f Nick Thomas
    request->from  = be64_to_cpup((uint64_t*)(buf + 16));
533 b2e3d87f Nick Thomas
    request->len   = be32_to_cpup((uint32_t*)(buf + 24));
534 b2e3d87f Nick Thomas
535 b2e3d87f Nick Thomas
    TRACE("Got request: "
536 b2e3d87f Nick Thomas
          "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
537 b2e3d87f Nick Thomas
          magic, request->type, request->from, request->len);
538 b2e3d87f Nick Thomas
539 b2e3d87f Nick Thomas
    if (magic != NBD_REQUEST_MAGIC) {
540 b2e3d87f Nick Thomas
        LOG("invalid magic (got 0x%x)", magic);
541 185b4338 Paolo Bonzini
        return -EINVAL;
542 b2e3d87f Nick Thomas
    }
543 b2e3d87f Nick Thomas
    return 0;
544 75818250 ths
}
545 75818250 ths
546 94e7340b Paolo Bonzini
ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply)
547 75818250 ths
{
548 b2e3d87f Nick Thomas
    uint8_t buf[NBD_REPLY_SIZE];
549 b2e3d87f Nick Thomas
    uint32_t magic;
550 185b4338 Paolo Bonzini
    ssize_t ret;
551 b2e3d87f Nick Thomas
552 185b4338 Paolo Bonzini
    ret = read_sync(csock, buf, sizeof(buf));
553 185b4338 Paolo Bonzini
    if (ret < 0) {
554 185b4338 Paolo Bonzini
        return ret;
555 185b4338 Paolo Bonzini
    }
556 185b4338 Paolo Bonzini
557 185b4338 Paolo Bonzini
    if (ret != sizeof(buf)) {
558 b2e3d87f Nick Thomas
        LOG("read failed");
559 185b4338 Paolo Bonzini
        return -EINVAL;
560 b2e3d87f Nick Thomas
    }
561 b2e3d87f Nick Thomas
562 b2e3d87f Nick Thomas
    /* Reply
563 b2e3d87f Nick Thomas
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
564 b2e3d87f Nick Thomas
       [ 4 ..  7]    error   (0 == no error)
565 b2e3d87f Nick Thomas
       [ 7 .. 15]    handle
566 b2e3d87f Nick Thomas
     */
567 b2e3d87f Nick Thomas
568 b2e3d87f Nick Thomas
    magic = be32_to_cpup((uint32_t*)buf);
569 b2e3d87f Nick Thomas
    reply->error  = be32_to_cpup((uint32_t*)(buf + 4));
570 b2e3d87f Nick Thomas
    reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
571 b2e3d87f Nick Thomas
572 b2e3d87f Nick Thomas
    TRACE("Got reply: "
573 b2e3d87f Nick Thomas
          "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
574 b2e3d87f Nick Thomas
          magic, reply->error, reply->handle);
575 b2e3d87f Nick Thomas
576 b2e3d87f Nick Thomas
    if (magic != NBD_REPLY_MAGIC) {
577 b2e3d87f Nick Thomas
        LOG("invalid magic (got 0x%x)", magic);
578 185b4338 Paolo Bonzini
        return -EINVAL;
579 b2e3d87f Nick Thomas
    }
580 b2e3d87f Nick Thomas
    return 0;
581 75818250 ths
}
582 75818250 ths
583 94e7340b Paolo Bonzini
static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply)
584 75818250 ths
{
585 b2e3d87f Nick Thomas
    uint8_t buf[4 + 4 + 8];
586 185b4338 Paolo Bonzini
    ssize_t ret;
587 b2e3d87f Nick Thomas
588 b2e3d87f Nick Thomas
    /* Reply
589 b2e3d87f Nick Thomas
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
590 b2e3d87f Nick Thomas
       [ 4 ..  7]    error   (0 == no error)
591 b2e3d87f Nick Thomas
       [ 7 .. 15]    handle
592 b2e3d87f Nick Thomas
     */
593 b2e3d87f Nick Thomas
    cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
594 b2e3d87f Nick Thomas
    cpu_to_be32w((uint32_t*)(buf + 4), reply->error);
595 b2e3d87f Nick Thomas
    cpu_to_be64w((uint64_t*)(buf + 8), reply->handle);
596 b2e3d87f Nick Thomas
597 b2e3d87f Nick Thomas
    TRACE("Sending response to client");
598 b2e3d87f Nick Thomas
599 185b4338 Paolo Bonzini
    ret = write_sync(csock, buf, sizeof(buf));
600 185b4338 Paolo Bonzini
    if (ret < 0) {
601 185b4338 Paolo Bonzini
        return ret;
602 185b4338 Paolo Bonzini
    }
603 185b4338 Paolo Bonzini
604 185b4338 Paolo Bonzini
    if (ret != sizeof(buf)) {
605 b2e3d87f Nick Thomas
        LOG("writing to socket failed");
606 185b4338 Paolo Bonzini
        return -EINVAL;
607 b2e3d87f Nick Thomas
    }
608 b2e3d87f Nick Thomas
    return 0;
609 75818250 ths
}
610 7a5ca864 bellard
611 41996e38 Paolo Bonzini
#define MAX_NBD_REQUESTS 16
612 41996e38 Paolo Bonzini
613 d9a73806 Paolo Bonzini
typedef struct NBDRequest NBDRequest;
614 d9a73806 Paolo Bonzini
615 d9a73806 Paolo Bonzini
struct NBDRequest {
616 d9a73806 Paolo Bonzini
    QSIMPLEQ_ENTRY(NBDRequest) entry;
617 72deddc5 Paolo Bonzini
    NBDClient *client;
618 d9a73806 Paolo Bonzini
    uint8_t *data;
619 d9a73806 Paolo Bonzini
};
620 d9a73806 Paolo Bonzini
621 af49bbbe Paolo Bonzini
struct NBDExport {
622 af49bbbe Paolo Bonzini
    BlockDriverState *bs;
623 af49bbbe Paolo Bonzini
    off_t dev_offset;
624 af49bbbe Paolo Bonzini
    off_t size;
625 af49bbbe Paolo Bonzini
    uint32_t nbdflags;
626 d9a73806 Paolo Bonzini
    QSIMPLEQ_HEAD(, NBDRequest) requests;
627 af49bbbe Paolo Bonzini
};
628 af49bbbe Paolo Bonzini
629 1743b515 Paolo Bonzini
struct NBDClient {
630 1743b515 Paolo Bonzini
    int refcount;
631 1743b515 Paolo Bonzini
    void (*close)(NBDClient *client);
632 1743b515 Paolo Bonzini
633 1743b515 Paolo Bonzini
    NBDExport *exp;
634 1743b515 Paolo Bonzini
    int sock;
635 262db388 Paolo Bonzini
636 262db388 Paolo Bonzini
    Coroutine *recv_coroutine;
637 262db388 Paolo Bonzini
638 262db388 Paolo Bonzini
    CoMutex send_lock;
639 262db388 Paolo Bonzini
    Coroutine *send_coroutine;
640 41996e38 Paolo Bonzini
641 41996e38 Paolo Bonzini
    int nb_requests;
642 1743b515 Paolo Bonzini
};
643 1743b515 Paolo Bonzini
644 1743b515 Paolo Bonzini
static void nbd_client_get(NBDClient *client)
645 1743b515 Paolo Bonzini
{
646 1743b515 Paolo Bonzini
    client->refcount++;
647 1743b515 Paolo Bonzini
}
648 1743b515 Paolo Bonzini
649 1743b515 Paolo Bonzini
static void nbd_client_put(NBDClient *client)
650 1743b515 Paolo Bonzini
{
651 1743b515 Paolo Bonzini
    if (--client->refcount == 0) {
652 1743b515 Paolo Bonzini
        g_free(client);
653 1743b515 Paolo Bonzini
    }
654 1743b515 Paolo Bonzini
}
655 1743b515 Paolo Bonzini
656 1743b515 Paolo Bonzini
static void nbd_client_close(NBDClient *client)
657 1743b515 Paolo Bonzini
{
658 1743b515 Paolo Bonzini
    qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
659 1743b515 Paolo Bonzini
    close(client->sock);
660 1743b515 Paolo Bonzini
    client->sock = -1;
661 1743b515 Paolo Bonzini
    if (client->close) {
662 1743b515 Paolo Bonzini
        client->close(client);
663 1743b515 Paolo Bonzini
    }
664 1743b515 Paolo Bonzini
    nbd_client_put(client);
665 1743b515 Paolo Bonzini
}
666 1743b515 Paolo Bonzini
667 72deddc5 Paolo Bonzini
static NBDRequest *nbd_request_get(NBDClient *client)
668 d9a73806 Paolo Bonzini
{
669 d9a73806 Paolo Bonzini
    NBDRequest *req;
670 72deddc5 Paolo Bonzini
    NBDExport *exp = client->exp;
671 72deddc5 Paolo Bonzini
672 41996e38 Paolo Bonzini
    assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
673 41996e38 Paolo Bonzini
    client->nb_requests++;
674 41996e38 Paolo Bonzini
675 d9a73806 Paolo Bonzini
    if (QSIMPLEQ_EMPTY(&exp->requests)) {
676 d9a73806 Paolo Bonzini
        req = g_malloc0(sizeof(NBDRequest));
677 d9a73806 Paolo Bonzini
        req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE);
678 d9a73806 Paolo Bonzini
    } else {
679 d9a73806 Paolo Bonzini
        req = QSIMPLEQ_FIRST(&exp->requests);
680 d9a73806 Paolo Bonzini
        QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
681 d9a73806 Paolo Bonzini
    }
682 72deddc5 Paolo Bonzini
    nbd_client_get(client);
683 72deddc5 Paolo Bonzini
    req->client = client;
684 d9a73806 Paolo Bonzini
    return req;
685 d9a73806 Paolo Bonzini
}
686 d9a73806 Paolo Bonzini
687 72deddc5 Paolo Bonzini
static void nbd_request_put(NBDRequest *req)
688 d9a73806 Paolo Bonzini
{
689 72deddc5 Paolo Bonzini
    NBDClient *client = req->client;
690 72deddc5 Paolo Bonzini
    QSIMPLEQ_INSERT_HEAD(&client->exp->requests, req, entry);
691 41996e38 Paolo Bonzini
    if (client->nb_requests-- == MAX_NBD_REQUESTS) {
692 41996e38 Paolo Bonzini
        qemu_notify_event();
693 41996e38 Paolo Bonzini
    }
694 72deddc5 Paolo Bonzini
    nbd_client_put(client);
695 d9a73806 Paolo Bonzini
}
696 d9a73806 Paolo Bonzini
697 af49bbbe Paolo Bonzini
NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
698 af49bbbe Paolo Bonzini
                          off_t size, uint32_t nbdflags)
699 af49bbbe Paolo Bonzini
{
700 af49bbbe Paolo Bonzini
    NBDExport *exp = g_malloc0(sizeof(NBDExport));
701 d9a73806 Paolo Bonzini
    QSIMPLEQ_INIT(&exp->requests);
702 af49bbbe Paolo Bonzini
    exp->bs = bs;
703 af49bbbe Paolo Bonzini
    exp->dev_offset = dev_offset;
704 af49bbbe Paolo Bonzini
    exp->nbdflags = nbdflags;
705 38ceff04 Paolo Bonzini
    exp->size = size == -1 ? bdrv_getlength(bs) : size;
706 af49bbbe Paolo Bonzini
    return exp;
707 af49bbbe Paolo Bonzini
}
708 af49bbbe Paolo Bonzini
709 af49bbbe Paolo Bonzini
void nbd_export_close(NBDExport *exp)
710 af49bbbe Paolo Bonzini
{
711 d9a73806 Paolo Bonzini
    while (!QSIMPLEQ_EMPTY(&exp->requests)) {
712 d9a73806 Paolo Bonzini
        NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
713 d9a73806 Paolo Bonzini
        QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
714 d9a73806 Paolo Bonzini
        qemu_vfree(first->data);
715 d9a73806 Paolo Bonzini
        g_free(first);
716 d9a73806 Paolo Bonzini
    }
717 d9a73806 Paolo Bonzini
718 af49bbbe Paolo Bonzini
    bdrv_close(exp->bs);
719 af49bbbe Paolo Bonzini
    g_free(exp);
720 af49bbbe Paolo Bonzini
}
721 af49bbbe Paolo Bonzini
722 41996e38 Paolo Bonzini
static int nbd_can_read(void *opaque);
723 262db388 Paolo Bonzini
static void nbd_read(void *opaque);
724 262db388 Paolo Bonzini
static void nbd_restart_write(void *opaque);
725 262db388 Paolo Bonzini
726 94e7340b Paolo Bonzini
static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
727 94e7340b Paolo Bonzini
                                 int len)
728 22045592 Paolo Bonzini
{
729 72deddc5 Paolo Bonzini
    NBDClient *client = req->client;
730 72deddc5 Paolo Bonzini
    int csock = client->sock;
731 94e7340b Paolo Bonzini
    ssize_t rc, ret;
732 22045592 Paolo Bonzini
733 262db388 Paolo Bonzini
    qemu_co_mutex_lock(&client->send_lock);
734 41996e38 Paolo Bonzini
    qemu_set_fd_handler2(csock, nbd_can_read, nbd_read,
735 41996e38 Paolo Bonzini
                         nbd_restart_write, client);
736 262db388 Paolo Bonzini
    client->send_coroutine = qemu_coroutine_self();
737 262db388 Paolo Bonzini
738 22045592 Paolo Bonzini
    if (!len) {
739 22045592 Paolo Bonzini
        rc = nbd_send_reply(csock, reply);
740 22045592 Paolo Bonzini
    } else {
741 22045592 Paolo Bonzini
        socket_set_cork(csock, 1);
742 22045592 Paolo Bonzini
        rc = nbd_send_reply(csock, reply);
743 fc19f8a0 Paolo Bonzini
        if (rc >= 0) {
744 262db388 Paolo Bonzini
            ret = qemu_co_send(csock, req->data, len);
745 22045592 Paolo Bonzini
            if (ret != len) {
746 185b4338 Paolo Bonzini
                rc = -EIO;
747 22045592 Paolo Bonzini
            }
748 22045592 Paolo Bonzini
        }
749 22045592 Paolo Bonzini
        socket_set_cork(csock, 0);
750 22045592 Paolo Bonzini
    }
751 262db388 Paolo Bonzini
752 262db388 Paolo Bonzini
    client->send_coroutine = NULL;
753 41996e38 Paolo Bonzini
    qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
754 262db388 Paolo Bonzini
    qemu_co_mutex_unlock(&client->send_lock);
755 22045592 Paolo Bonzini
    return rc;
756 22045592 Paolo Bonzini
}
757 22045592 Paolo Bonzini
758 94e7340b Paolo Bonzini
static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request)
759 a030b347 Paolo Bonzini
{
760 72deddc5 Paolo Bonzini
    NBDClient *client = req->client;
761 72deddc5 Paolo Bonzini
    int csock = client->sock;
762 94e7340b Paolo Bonzini
    ssize_t rc;
763 a030b347 Paolo Bonzini
764 262db388 Paolo Bonzini
    client->recv_coroutine = qemu_coroutine_self();
765 7fe7b68b Paolo Bonzini
    rc = nbd_receive_request(csock, request);
766 7fe7b68b Paolo Bonzini
    if (rc < 0) {
767 7fe7b68b Paolo Bonzini
        if (rc != -EAGAIN) {
768 7fe7b68b Paolo Bonzini
            rc = -EIO;
769 7fe7b68b Paolo Bonzini
        }
770 a030b347 Paolo Bonzini
        goto out;
771 a030b347 Paolo Bonzini
    }
772 a030b347 Paolo Bonzini
773 a030b347 Paolo Bonzini
    if (request->len > NBD_BUFFER_SIZE) {
774 a030b347 Paolo Bonzini
        LOG("len (%u) is larger than max len (%u)",
775 a030b347 Paolo Bonzini
            request->len, NBD_BUFFER_SIZE);
776 a030b347 Paolo Bonzini
        rc = -EINVAL;
777 a030b347 Paolo Bonzini
        goto out;
778 a030b347 Paolo Bonzini
    }
779 a030b347 Paolo Bonzini
780 a030b347 Paolo Bonzini
    if ((request->from + request->len) < request->from) {
781 a030b347 Paolo Bonzini
        LOG("integer overflow detected! "
782 a030b347 Paolo Bonzini
            "you're probably being attacked");
783 a030b347 Paolo Bonzini
        rc = -EINVAL;
784 a030b347 Paolo Bonzini
        goto out;
785 a030b347 Paolo Bonzini
    }
786 a030b347 Paolo Bonzini
787 a030b347 Paolo Bonzini
    TRACE("Decoding type");
788 a030b347 Paolo Bonzini
789 a030b347 Paolo Bonzini
    if ((request->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
790 a030b347 Paolo Bonzini
        TRACE("Reading %u byte(s)", request->len);
791 a030b347 Paolo Bonzini
792 262db388 Paolo Bonzini
        if (qemu_co_recv(csock, req->data, request->len) != request->len) {
793 a030b347 Paolo Bonzini
            LOG("reading from socket failed");
794 a030b347 Paolo Bonzini
            rc = -EIO;
795 a030b347 Paolo Bonzini
            goto out;
796 a030b347 Paolo Bonzini
        }
797 a030b347 Paolo Bonzini
    }
798 a030b347 Paolo Bonzini
    rc = 0;
799 a030b347 Paolo Bonzini
800 a030b347 Paolo Bonzini
out:
801 262db388 Paolo Bonzini
    client->recv_coroutine = NULL;
802 a030b347 Paolo Bonzini
    return rc;
803 a030b347 Paolo Bonzini
}
804 a030b347 Paolo Bonzini
805 262db388 Paolo Bonzini
static void nbd_trip(void *opaque)
806 75818250 ths
{
807 262db388 Paolo Bonzini
    NBDClient *client = opaque;
808 72deddc5 Paolo Bonzini
    NBDRequest *req = nbd_request_get(client);
809 1743b515 Paolo Bonzini
    NBDExport *exp = client->exp;
810 b2e3d87f Nick Thomas
    struct nbd_request request;
811 b2e3d87f Nick Thomas
    struct nbd_reply reply;
812 94e7340b Paolo Bonzini
    ssize_t ret;
813 b2e3d87f Nick Thomas
814 b2e3d87f Nick Thomas
    TRACE("Reading request.");
815 b2e3d87f Nick Thomas
816 262db388 Paolo Bonzini
    ret = nbd_co_receive_request(req, &request);
817 7fe7b68b Paolo Bonzini
    if (ret == -EAGAIN) {
818 7fe7b68b Paolo Bonzini
        goto done;
819 7fe7b68b Paolo Bonzini
    }
820 a030b347 Paolo Bonzini
    if (ret == -EIO) {
821 d9a73806 Paolo Bonzini
        goto out;
822 a030b347 Paolo Bonzini
    }
823 b2e3d87f Nick Thomas
824 fae69416 Paolo Bonzini
    reply.handle = request.handle;
825 fae69416 Paolo Bonzini
    reply.error = 0;
826 fae69416 Paolo Bonzini
827 a030b347 Paolo Bonzini
    if (ret < 0) {
828 a030b347 Paolo Bonzini
        reply.error = -ret;
829 a030b347 Paolo Bonzini
        goto error_reply;
830 b2e3d87f Nick Thomas
    }
831 b2e3d87f Nick Thomas
832 af49bbbe Paolo Bonzini
    if ((request.from + request.len) > exp->size) {
833 b2e3d87f Nick Thomas
            LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
834 b2e3d87f Nick Thomas
            ", Offset: %" PRIu64 "\n",
835 af49bbbe Paolo Bonzini
                    request.from, request.len,
836 0fee8f34 Stefan Weil
                    (uint64_t)exp->size, (uint64_t)exp->dev_offset);
837 b2e3d87f Nick Thomas
        LOG("requested operation past EOF--bad client?");
838 fae69416 Paolo Bonzini
        goto invalid_request;
839 b2e3d87f Nick Thomas
    }
840 b2e3d87f Nick Thomas
841 2c7989a9 Paolo Bonzini
    switch (request.type & NBD_CMD_MASK_COMMAND) {
842 b2e3d87f Nick Thomas
    case NBD_CMD_READ:
843 b2e3d87f Nick Thomas
        TRACE("Request type is READ");
844 b2e3d87f Nick Thomas
845 e25ceb76 Paolo Bonzini
        if (request.type & NBD_CMD_FLAG_FUA) {
846 e25ceb76 Paolo Bonzini
            ret = bdrv_co_flush(exp->bs);
847 e25ceb76 Paolo Bonzini
            if (ret < 0) {
848 e25ceb76 Paolo Bonzini
                LOG("flush failed");
849 e25ceb76 Paolo Bonzini
                reply.error = -ret;
850 e25ceb76 Paolo Bonzini
                goto error_reply;
851 e25ceb76 Paolo Bonzini
            }
852 e25ceb76 Paolo Bonzini
        }
853 e25ceb76 Paolo Bonzini
854 af49bbbe Paolo Bonzini
        ret = bdrv_read(exp->bs, (request.from + exp->dev_offset) / 512,
855 d9a73806 Paolo Bonzini
                        req->data, request.len / 512);
856 adcf6302 Paolo Bonzini
        if (ret < 0) {
857 b2e3d87f Nick Thomas
            LOG("reading from file failed");
858 adcf6302 Paolo Bonzini
            reply.error = -ret;
859 fae69416 Paolo Bonzini
            goto error_reply;
860 b2e3d87f Nick Thomas
        }
861 b2e3d87f Nick Thomas
862 b2e3d87f Nick Thomas
        TRACE("Read %u byte(s)", request.len);
863 262db388 Paolo Bonzini
        if (nbd_co_send_reply(req, &reply, request.len) < 0)
864 d9a73806 Paolo Bonzini
            goto out;
865 b2e3d87f Nick Thomas
        break;
866 b2e3d87f Nick Thomas
    case NBD_CMD_WRITE:
867 b2e3d87f Nick Thomas
        TRACE("Request type is WRITE");
868 b2e3d87f Nick Thomas
869 af49bbbe Paolo Bonzini
        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
870 b2e3d87f Nick Thomas
            TRACE("Server is read-only, return error");
871 fae69416 Paolo Bonzini
            reply.error = EROFS;
872 fae69416 Paolo Bonzini
            goto error_reply;
873 fae69416 Paolo Bonzini
        }
874 fae69416 Paolo Bonzini
875 fae69416 Paolo Bonzini
        TRACE("Writing to device");
876 fae69416 Paolo Bonzini
877 af49bbbe Paolo Bonzini
        ret = bdrv_write(exp->bs, (request.from + exp->dev_offset) / 512,
878 d9a73806 Paolo Bonzini
                         req->data, request.len / 512);
879 fae69416 Paolo Bonzini
        if (ret < 0) {
880 fae69416 Paolo Bonzini
            LOG("writing to file failed");
881 fae69416 Paolo Bonzini
            reply.error = -ret;
882 fae69416 Paolo Bonzini
            goto error_reply;
883 fae69416 Paolo Bonzini
        }
884 b2e3d87f Nick Thomas
885 fae69416 Paolo Bonzini
        if (request.type & NBD_CMD_FLAG_FUA) {
886 262db388 Paolo Bonzini
            ret = bdrv_co_flush(exp->bs);
887 adcf6302 Paolo Bonzini
            if (ret < 0) {
888 fae69416 Paolo Bonzini
                LOG("flush failed");
889 adcf6302 Paolo Bonzini
                reply.error = -ret;
890 fae69416 Paolo Bonzini
                goto error_reply;
891 2c7989a9 Paolo Bonzini
            }
892 b2e3d87f Nick Thomas
        }
893 b2e3d87f Nick Thomas
894 fc19f8a0 Paolo Bonzini
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
895 d9a73806 Paolo Bonzini
            goto out;
896 fc19f8a0 Paolo Bonzini
        }
897 b2e3d87f Nick Thomas
        break;
898 b2e3d87f Nick Thomas
    case NBD_CMD_DISC:
899 b2e3d87f Nick Thomas
        TRACE("Request type is DISCONNECT");
900 b2e3d87f Nick Thomas
        errno = 0;
901 262db388 Paolo Bonzini
        goto out;
902 1486d04a Paolo Bonzini
    case NBD_CMD_FLUSH:
903 1486d04a Paolo Bonzini
        TRACE("Request type is FLUSH");
904 1486d04a Paolo Bonzini
905 262db388 Paolo Bonzini
        ret = bdrv_co_flush(exp->bs);
906 1486d04a Paolo Bonzini
        if (ret < 0) {
907 1486d04a Paolo Bonzini
            LOG("flush failed");
908 1486d04a Paolo Bonzini
            reply.error = -ret;
909 1486d04a Paolo Bonzini
        }
910 fc19f8a0 Paolo Bonzini
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
911 d9a73806 Paolo Bonzini
            goto out;
912 fc19f8a0 Paolo Bonzini
        }
913 1486d04a Paolo Bonzini
        break;
914 7a706633 Paolo Bonzini
    case NBD_CMD_TRIM:
915 7a706633 Paolo Bonzini
        TRACE("Request type is TRIM");
916 262db388 Paolo Bonzini
        ret = bdrv_co_discard(exp->bs, (request.from + exp->dev_offset) / 512,
917 262db388 Paolo Bonzini
                              request.len / 512);
918 7a706633 Paolo Bonzini
        if (ret < 0) {
919 7a706633 Paolo Bonzini
            LOG("discard failed");
920 7a706633 Paolo Bonzini
            reply.error = -ret;
921 7a706633 Paolo Bonzini
        }
922 fc19f8a0 Paolo Bonzini
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
923 d9a73806 Paolo Bonzini
            goto out;
924 fc19f8a0 Paolo Bonzini
        }
925 7a706633 Paolo Bonzini
        break;
926 b2e3d87f Nick Thomas
    default:
927 b2e3d87f Nick Thomas
        LOG("invalid request type (%u) received", request.type);
928 fae69416 Paolo Bonzini
    invalid_request:
929 fae69416 Paolo Bonzini
        reply.error = -EINVAL;
930 fae69416 Paolo Bonzini
    error_reply:
931 fc19f8a0 Paolo Bonzini
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
932 d9a73806 Paolo Bonzini
            goto out;
933 fc19f8a0 Paolo Bonzini
        }
934 fae69416 Paolo Bonzini
        break;
935 b2e3d87f Nick Thomas
    }
936 b2e3d87f Nick Thomas
937 b2e3d87f Nick Thomas
    TRACE("Request/Reply complete");
938 b2e3d87f Nick Thomas
939 7fe7b68b Paolo Bonzini
done:
940 262db388 Paolo Bonzini
    nbd_request_put(req);
941 262db388 Paolo Bonzini
    return;
942 262db388 Paolo Bonzini
943 d9a73806 Paolo Bonzini
out:
944 72deddc5 Paolo Bonzini
    nbd_request_put(req);
945 262db388 Paolo Bonzini
    nbd_client_close(client);
946 7a5ca864 bellard
}
947 af49bbbe Paolo Bonzini
948 41996e38 Paolo Bonzini
static int nbd_can_read(void *opaque)
949 41996e38 Paolo Bonzini
{
950 41996e38 Paolo Bonzini
    NBDClient *client = opaque;
951 41996e38 Paolo Bonzini
952 41996e38 Paolo Bonzini
    return client->recv_coroutine || client->nb_requests < MAX_NBD_REQUESTS;
953 41996e38 Paolo Bonzini
}
954 41996e38 Paolo Bonzini
955 1743b515 Paolo Bonzini
static void nbd_read(void *opaque)
956 1743b515 Paolo Bonzini
{
957 1743b515 Paolo Bonzini
    NBDClient *client = opaque;
958 1743b515 Paolo Bonzini
959 262db388 Paolo Bonzini
    if (client->recv_coroutine) {
960 262db388 Paolo Bonzini
        qemu_coroutine_enter(client->recv_coroutine, NULL);
961 262db388 Paolo Bonzini
    } else {
962 262db388 Paolo Bonzini
        qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client);
963 1743b515 Paolo Bonzini
    }
964 1743b515 Paolo Bonzini
}
965 1743b515 Paolo Bonzini
966 262db388 Paolo Bonzini
static void nbd_restart_write(void *opaque)
967 262db388 Paolo Bonzini
{
968 262db388 Paolo Bonzini
    NBDClient *client = opaque;
969 262db388 Paolo Bonzini
970 262db388 Paolo Bonzini
    qemu_coroutine_enter(client->send_coroutine, NULL);
971 262db388 Paolo Bonzini
}
972 262db388 Paolo Bonzini
973 1743b515 Paolo Bonzini
NBDClient *nbd_client_new(NBDExport *exp, int csock,
974 1743b515 Paolo Bonzini
                          void (*close)(NBDClient *))
975 af49bbbe Paolo Bonzini
{
976 1743b515 Paolo Bonzini
    NBDClient *client;
977 fc19f8a0 Paolo Bonzini
    if (nbd_send_negotiate(csock, exp->size, exp->nbdflags) < 0) {
978 1743b515 Paolo Bonzini
        return NULL;
979 1743b515 Paolo Bonzini
    }
980 1743b515 Paolo Bonzini
    client = g_malloc0(sizeof(NBDClient));
981 1743b515 Paolo Bonzini
    client->refcount = 1;
982 1743b515 Paolo Bonzini
    client->exp = exp;
983 1743b515 Paolo Bonzini
    client->sock = csock;
984 1743b515 Paolo Bonzini
    client->close = close;
985 262db388 Paolo Bonzini
    qemu_co_mutex_init(&client->send_lock);
986 41996e38 Paolo Bonzini
    qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
987 1743b515 Paolo Bonzini
    return client;
988 af49bbbe Paolo Bonzini
}