Statistics
| Branch: | Revision:

root / nbd.c @ 60e1b2a6

History | View | Annotate | Download (23.8 kB)

1 75818250 ths
/*
2 7a5ca864 bellard
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
3 7a5ca864 bellard
 *
4 7a5ca864 bellard
 *  Network Block Device
5 7a5ca864 bellard
 *
6 7a5ca864 bellard
 *  This program is free software; you can redistribute it and/or modify
7 7a5ca864 bellard
 *  it under the terms of the GNU General Public License as published by
8 7a5ca864 bellard
 *  the Free Software Foundation; under version 2 of the License.
9 7a5ca864 bellard
 *
10 7a5ca864 bellard
 *  This program is distributed in the hope that it will be useful,
11 7a5ca864 bellard
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 7a5ca864 bellard
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 7a5ca864 bellard
 *  GNU General Public License for more details.
14 7a5ca864 bellard
 *
15 7a5ca864 bellard
 *  You should have received a copy of the GNU General Public License
16 8167ee88 Blue Swirl
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
17 75818250 ths
 */
18 7a5ca864 bellard
19 7a5ca864 bellard
#include "nbd.h"
20 ab359cd1 Markus Armbruster
#include "block.h"
21 af49bbbe Paolo Bonzini
#include "block_int.h"
22 7a5ca864 bellard
23 262db388 Paolo Bonzini
#include "qemu-coroutine.h"
24 262db388 Paolo Bonzini
25 7a5ca864 bellard
#include <errno.h>
26 7a5ca864 bellard
#include <string.h>
27 03ff3ca3 aliguori
#ifndef _WIN32
28 7a5ca864 bellard
#include <sys/ioctl.h>
29 03ff3ca3 aliguori
#endif
30 5dc2eec9 Andreas Färber
#if defined(__sun__) || defined(__HAIKU__)
31 7e00eb9b aliguori
#include <sys/ioccom.h>
32 7e00eb9b aliguori
#endif
33 7a5ca864 bellard
#include <ctype.h>
34 7a5ca864 bellard
#include <inttypes.h>
35 75818250 ths
36 b90fb4b8 Paolo Bonzini
#ifdef __linux__
37 b90fb4b8 Paolo Bonzini
#include <linux/fs.h>
38 b90fb4b8 Paolo Bonzini
#endif
39 b90fb4b8 Paolo Bonzini
40 03ff3ca3 aliguori
#include "qemu_socket.h"
41 d9a73806 Paolo Bonzini
#include "qemu-queue.h"
42 03ff3ca3 aliguori
43 03ff3ca3 aliguori
//#define DEBUG_NBD
44 03ff3ca3 aliguori
45 03ff3ca3 aliguori
#ifdef DEBUG_NBD
46 75818250 ths
#define TRACE(msg, ...) do { \
47 03ff3ca3 aliguori
    LOG(msg, ## __VA_ARGS__); \
48 75818250 ths
} while(0)
49 03ff3ca3 aliguori
#else
50 03ff3ca3 aliguori
#define TRACE(msg, ...) \
51 03ff3ca3 aliguori
    do { } while (0)
52 03ff3ca3 aliguori
#endif
53 7a5ca864 bellard
54 7a5ca864 bellard
#define LOG(msg, ...) do { \
55 7a5ca864 bellard
    fprintf(stderr, "%s:%s():L%d: " msg "\n", \
56 7a5ca864 bellard
            __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
57 7a5ca864 bellard
} while(0)
58 7a5ca864 bellard
59 7a5ca864 bellard
/* This is all part of the "official" NBD API */
60 7a5ca864 bellard
61 b2e3d87f Nick Thomas
#define NBD_REPLY_SIZE          (4 + 4 + 8)
62 7a5ca864 bellard
#define NBD_REQUEST_MAGIC       0x25609513
63 7a5ca864 bellard
#define NBD_REPLY_MAGIC         0x67446698
64 7a5ca864 bellard
65 7a5ca864 bellard
#define NBD_SET_SOCK            _IO(0xab, 0)
66 7a5ca864 bellard
#define NBD_SET_BLKSIZE         _IO(0xab, 1)
67 7a5ca864 bellard
#define NBD_SET_SIZE            _IO(0xab, 2)
68 7a5ca864 bellard
#define NBD_DO_IT               _IO(0xab, 3)
69 7a5ca864 bellard
#define NBD_CLEAR_SOCK          _IO(0xab, 4)
70 7a5ca864 bellard
#define NBD_CLEAR_QUE           _IO(0xab, 5)
71 b2e3d87f Nick Thomas
#define NBD_PRINT_DEBUG         _IO(0xab, 6)
72 b2e3d87f Nick Thomas
#define NBD_SET_SIZE_BLOCKS     _IO(0xab, 7)
73 7a5ca864 bellard
#define NBD_DISCONNECT          _IO(0xab, 8)
74 bbb74edd Paolo Bonzini
#define NBD_SET_TIMEOUT         _IO(0xab, 9)
75 bbb74edd Paolo Bonzini
#define NBD_SET_FLAGS           _IO(0xab, 10)
76 7a5ca864 bellard
77 b2e3d87f Nick Thomas
#define NBD_OPT_EXPORT_NAME     (1 << 0)
78 1d45f8b5 Laurent Vivier
79 7a5ca864 bellard
/* That's all folks */
80 7a5ca864 bellard
81 75818250 ths
#define read_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, true)
82 75818250 ths
#define write_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, false)
83 7a5ca864 bellard
84 75818250 ths
size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
85 7a5ca864 bellard
{
86 7a5ca864 bellard
    size_t offset = 0;
87 7a5ca864 bellard
88 ae255e52 Paolo Bonzini
    if (qemu_in_coroutine()) {
89 ae255e52 Paolo Bonzini
        if (do_read) {
90 ae255e52 Paolo Bonzini
            return qemu_co_recv(fd, buffer, size);
91 ae255e52 Paolo Bonzini
        } else {
92 ae255e52 Paolo Bonzini
            return qemu_co_send(fd, buffer, size);
93 ae255e52 Paolo Bonzini
        }
94 ae255e52 Paolo Bonzini
    }
95 ae255e52 Paolo Bonzini
96 7a5ca864 bellard
    while (offset < size) {
97 7a5ca864 bellard
        ssize_t len;
98 7a5ca864 bellard
99 7a5ca864 bellard
        if (do_read) {
100 00aa0040 Blue Swirl
            len = qemu_recv(fd, buffer + offset, size - offset, 0);
101 7a5ca864 bellard
        } else {
102 03ff3ca3 aliguori
            len = send(fd, buffer + offset, size - offset, 0);
103 7a5ca864 bellard
        }
104 7a5ca864 bellard
105 03ff3ca3 aliguori
        if (len == -1)
106 03ff3ca3 aliguori
            errno = socket_error();
107 03ff3ca3 aliguori
108 7a5ca864 bellard
        /* recoverable error */
109 75818250 ths
        if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
110 7a5ca864 bellard
            continue;
111 7a5ca864 bellard
        }
112 7a5ca864 bellard
113 7a5ca864 bellard
        /* eof */
114 7a5ca864 bellard
        if (len == 0) {
115 7a5ca864 bellard
            break;
116 7a5ca864 bellard
        }
117 7a5ca864 bellard
118 7a5ca864 bellard
        /* unrecoverable error */
119 7a5ca864 bellard
        if (len == -1) {
120 7a5ca864 bellard
            return 0;
121 7a5ca864 bellard
        }
122 7a5ca864 bellard
123 7a5ca864 bellard
        offset += len;
124 7a5ca864 bellard
    }
125 7a5ca864 bellard
126 7a5ca864 bellard
    return offset;
127 7a5ca864 bellard
}
128 7a5ca864 bellard
129 c12504ce Nick Thomas
static void combine_addr(char *buf, size_t len, const char* address,
130 c12504ce Nick Thomas
                         uint16_t port)
131 7a5ca864 bellard
{
132 c12504ce Nick Thomas
    /* If the address-part contains a colon, it's an IPv6 IP so needs [] */
133 c12504ce Nick Thomas
    if (strstr(address, ":")) {
134 c12504ce Nick Thomas
        snprintf(buf, len, "[%s]:%u", address, port);
135 c12504ce Nick Thomas
    } else {
136 c12504ce Nick Thomas
        snprintf(buf, len, "%s:%u", address, port);
137 7a5ca864 bellard
    }
138 7a5ca864 bellard
}
139 7a5ca864 bellard
140 c12504ce Nick Thomas
int tcp_socket_outgoing(const char *address, uint16_t port)
141 7a5ca864 bellard
{
142 c12504ce Nick Thomas
    char address_and_port[128];
143 c12504ce Nick Thomas
    combine_addr(address_and_port, 128, address, port);
144 c12504ce Nick Thomas
    return tcp_socket_outgoing_spec(address_and_port);
145 7a5ca864 bellard
}
146 7a5ca864 bellard
147 c12504ce Nick Thomas
int tcp_socket_outgoing_spec(const char *address_and_port)
148 cd831bd7 ths
{
149 c12504ce Nick Thomas
    return inet_connect(address_and_port, SOCK_STREAM);
150 cd831bd7 ths
}
151 cd831bd7 ths
152 c12504ce Nick Thomas
int tcp_socket_incoming(const char *address, uint16_t port)
153 cd831bd7 ths
{
154 c12504ce Nick Thomas
    char address_and_port[128];
155 c12504ce Nick Thomas
    combine_addr(address_and_port, 128, address, port);
156 c12504ce Nick Thomas
    return tcp_socket_incoming_spec(address_and_port);
157 c12504ce Nick Thomas
}
158 cd831bd7 ths
159 c12504ce Nick Thomas
int tcp_socket_incoming_spec(const char *address_and_port)
160 c12504ce Nick Thomas
{
161 c12504ce Nick Thomas
    char *ostr  = NULL;
162 c12504ce Nick Thomas
    int olen = 0;
163 c12504ce Nick Thomas
    return inet_listen(address_and_port, ostr, olen, SOCK_STREAM, 0);
164 03ff3ca3 aliguori
}
165 c12504ce Nick Thomas
166 03ff3ca3 aliguori
int unix_socket_incoming(const char *path)
167 03ff3ca3 aliguori
{
168 c12504ce Nick Thomas
    char *ostr = NULL;
169 c12504ce Nick Thomas
    int olen = 0;
170 c12504ce Nick Thomas
171 c12504ce Nick Thomas
    return unix_listen(path, ostr, olen);
172 cd831bd7 ths
}
173 cd831bd7 ths
174 03ff3ca3 aliguori
int unix_socket_outgoing(const char *path)
175 03ff3ca3 aliguori
{
176 c12504ce Nick Thomas
    return unix_connect(path);
177 03ff3ca3 aliguori
}
178 cd831bd7 ths
179 7a5ca864 bellard
/* Basic flow
180 7a5ca864 bellard

181 7a5ca864 bellard
   Server         Client
182 7a5ca864 bellard

183 7a5ca864 bellard
   Negotiate
184 7a5ca864 bellard
                  Request
185 7a5ca864 bellard
   Response
186 7a5ca864 bellard
                  Request
187 7a5ca864 bellard
   Response
188 7a5ca864 bellard
                  ...
189 7a5ca864 bellard
   ...
190 7a5ca864 bellard
                  Request (type == 2)
191 7a5ca864 bellard
*/
192 7a5ca864 bellard
193 af49bbbe Paolo Bonzini
static int nbd_send_negotiate(int csock, off_t size, uint32_t flags)
194 7a5ca864 bellard
{
195 b2e3d87f Nick Thomas
    char buf[8 + 8 + 8 + 128];
196 b2e3d87f Nick Thomas
197 b2e3d87f Nick Thomas
    /* Negotiate
198 b2e3d87f Nick Thomas
        [ 0 ..   7]   passwd   ("NBDMAGIC")
199 b2e3d87f Nick Thomas
        [ 8 ..  15]   magic    (0x00420281861253)
200 b2e3d87f Nick Thomas
        [16 ..  23]   size
201 b90fb4b8 Paolo Bonzini
        [24 ..  27]   flags
202 b90fb4b8 Paolo Bonzini
        [28 .. 151]   reserved (0)
203 b2e3d87f Nick Thomas
     */
204 b2e3d87f Nick Thomas
205 b2e3d87f Nick Thomas
    TRACE("Beginning negotiation.");
206 b2e3d87f Nick Thomas
    memcpy(buf, "NBDMAGIC", 8);
207 b2e3d87f Nick Thomas
    cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
208 b2e3d87f Nick Thomas
    cpu_to_be64w((uint64_t*)(buf + 16), size);
209 2c7989a9 Paolo Bonzini
    cpu_to_be32w((uint32_t*)(buf + 24),
210 7a706633 Paolo Bonzini
                 flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
211 7a706633 Paolo Bonzini
                 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
212 b90fb4b8 Paolo Bonzini
    memset(buf + 28, 0, 124);
213 b2e3d87f Nick Thomas
214 b2e3d87f Nick Thomas
    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
215 b2e3d87f Nick Thomas
        LOG("write failed");
216 b2e3d87f Nick Thomas
        errno = EINVAL;
217 b2e3d87f Nick Thomas
        return -1;
218 b2e3d87f Nick Thomas
    }
219 b2e3d87f Nick Thomas
220 07f35073 Dong Xu Wang
    TRACE("Negotiation succeeded.");
221 b2e3d87f Nick Thomas
222 b2e3d87f Nick Thomas
    return 0;
223 7a5ca864 bellard
}
224 7a5ca864 bellard
225 1d45f8b5 Laurent Vivier
int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
226 1d45f8b5 Laurent Vivier
                          off_t *size, size_t *blocksize)
227 7a5ca864 bellard
{
228 b2e3d87f Nick Thomas
    char buf[256];
229 b2e3d87f Nick Thomas
    uint64_t magic, s;
230 b2e3d87f Nick Thomas
    uint16_t tmp;
231 b2e3d87f Nick Thomas
232 07f35073 Dong Xu Wang
    TRACE("Receiving negotiation.");
233 b2e3d87f Nick Thomas
234 b2e3d87f Nick Thomas
    if (read_sync(csock, buf, 8) != 8) {
235 b2e3d87f Nick Thomas
        LOG("read failed");
236 b2e3d87f Nick Thomas
        errno = EINVAL;
237 b2e3d87f Nick Thomas
        return -1;
238 b2e3d87f Nick Thomas
    }
239 b2e3d87f Nick Thomas
240 b2e3d87f Nick Thomas
    buf[8] = '\0';
241 b2e3d87f Nick Thomas
    if (strlen(buf) == 0) {
242 b2e3d87f Nick Thomas
        LOG("server connection closed");
243 b2e3d87f Nick Thomas
        errno = EINVAL;
244 b2e3d87f Nick Thomas
        return -1;
245 b2e3d87f Nick Thomas
    }
246 b2e3d87f Nick Thomas
247 b2e3d87f Nick Thomas
    TRACE("Magic is %c%c%c%c%c%c%c%c",
248 b2e3d87f Nick Thomas
          qemu_isprint(buf[0]) ? buf[0] : '.',
249 b2e3d87f Nick Thomas
          qemu_isprint(buf[1]) ? buf[1] : '.',
250 b2e3d87f Nick Thomas
          qemu_isprint(buf[2]) ? buf[2] : '.',
251 b2e3d87f Nick Thomas
          qemu_isprint(buf[3]) ? buf[3] : '.',
252 b2e3d87f Nick Thomas
          qemu_isprint(buf[4]) ? buf[4] : '.',
253 b2e3d87f Nick Thomas
          qemu_isprint(buf[5]) ? buf[5] : '.',
254 b2e3d87f Nick Thomas
          qemu_isprint(buf[6]) ? buf[6] : '.',
255 b2e3d87f Nick Thomas
          qemu_isprint(buf[7]) ? buf[7] : '.');
256 b2e3d87f Nick Thomas
257 b2e3d87f Nick Thomas
    if (memcmp(buf, "NBDMAGIC", 8) != 0) {
258 b2e3d87f Nick Thomas
        LOG("Invalid magic received");
259 b2e3d87f Nick Thomas
        errno = EINVAL;
260 b2e3d87f Nick Thomas
        return -1;
261 b2e3d87f Nick Thomas
    }
262 b2e3d87f Nick Thomas
263 b2e3d87f Nick Thomas
    if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
264 b2e3d87f Nick Thomas
        LOG("read failed");
265 b2e3d87f Nick Thomas
        errno = EINVAL;
266 b2e3d87f Nick Thomas
        return -1;
267 b2e3d87f Nick Thomas
    }
268 b2e3d87f Nick Thomas
    magic = be64_to_cpu(magic);
269 b2e3d87f Nick Thomas
    TRACE("Magic is 0x%" PRIx64, magic);
270 b2e3d87f Nick Thomas
271 b2e3d87f Nick Thomas
    if (name) {
272 b2e3d87f Nick Thomas
        uint32_t reserved = 0;
273 b2e3d87f Nick Thomas
        uint32_t opt;
274 b2e3d87f Nick Thomas
        uint32_t namesize;
275 b2e3d87f Nick Thomas
276 b2e3d87f Nick Thomas
        TRACE("Checking magic (opts_magic)");
277 b2e3d87f Nick Thomas
        if (magic != 0x49484156454F5054LL) {
278 b2e3d87f Nick Thomas
            LOG("Bad magic received");
279 b2e3d87f Nick Thomas
            errno = EINVAL;
280 b2e3d87f Nick Thomas
            return -1;
281 b2e3d87f Nick Thomas
        }
282 b2e3d87f Nick Thomas
        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
283 b2e3d87f Nick Thomas
            LOG("flags read failed");
284 b2e3d87f Nick Thomas
            errno = EINVAL;
285 b2e3d87f Nick Thomas
            return -1;
286 b2e3d87f Nick Thomas
        }
287 b2e3d87f Nick Thomas
        *flags = be16_to_cpu(tmp) << 16;
288 b2e3d87f Nick Thomas
        /* reserved for future use */
289 b2e3d87f Nick Thomas
        if (write_sync(csock, &reserved, sizeof(reserved)) !=
290 b2e3d87f Nick Thomas
            sizeof(reserved)) {
291 b2e3d87f Nick Thomas
            LOG("write failed (reserved)");
292 b2e3d87f Nick Thomas
            errno = EINVAL;
293 b2e3d87f Nick Thomas
            return -1;
294 b2e3d87f Nick Thomas
        }
295 b2e3d87f Nick Thomas
        /* write the export name */
296 b2e3d87f Nick Thomas
        magic = cpu_to_be64(magic);
297 b2e3d87f Nick Thomas
        if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
298 b2e3d87f Nick Thomas
            LOG("write failed (magic)");
299 b2e3d87f Nick Thomas
            errno = EINVAL;
300 b2e3d87f Nick Thomas
            return -1;
301 b2e3d87f Nick Thomas
        }
302 b2e3d87f Nick Thomas
        opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
303 b2e3d87f Nick Thomas
        if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
304 b2e3d87f Nick Thomas
            LOG("write failed (opt)");
305 b2e3d87f Nick Thomas
            errno = EINVAL;
306 b2e3d87f Nick Thomas
            return -1;
307 b2e3d87f Nick Thomas
        }
308 b2e3d87f Nick Thomas
        namesize = cpu_to_be32(strlen(name));
309 b2e3d87f Nick Thomas
        if (write_sync(csock, &namesize, sizeof(namesize)) !=
310 b2e3d87f Nick Thomas
            sizeof(namesize)) {
311 b2e3d87f Nick Thomas
            LOG("write failed (namesize)");
312 b2e3d87f Nick Thomas
            errno = EINVAL;
313 b2e3d87f Nick Thomas
            return -1;
314 b2e3d87f Nick Thomas
        }
315 b2e3d87f Nick Thomas
        if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) {
316 b2e3d87f Nick Thomas
            LOG("write failed (name)");
317 b2e3d87f Nick Thomas
            errno = EINVAL;
318 b2e3d87f Nick Thomas
            return -1;
319 b2e3d87f Nick Thomas
        }
320 b2e3d87f Nick Thomas
    } else {
321 b2e3d87f Nick Thomas
        TRACE("Checking magic (cli_magic)");
322 b2e3d87f Nick Thomas
323 b2e3d87f Nick Thomas
        if (magic != 0x00420281861253LL) {
324 b2e3d87f Nick Thomas
            LOG("Bad magic received");
325 b2e3d87f Nick Thomas
            errno = EINVAL;
326 b2e3d87f Nick Thomas
            return -1;
327 b2e3d87f Nick Thomas
        }
328 b2e3d87f Nick Thomas
    }
329 b2e3d87f Nick Thomas
330 b2e3d87f Nick Thomas
    if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) {
331 b2e3d87f Nick Thomas
        LOG("read failed");
332 b2e3d87f Nick Thomas
        errno = EINVAL;
333 b2e3d87f Nick Thomas
        return -1;
334 b2e3d87f Nick Thomas
    }
335 b2e3d87f Nick Thomas
    *size = be64_to_cpu(s);
336 b2e3d87f Nick Thomas
    *blocksize = 1024;
337 b2e3d87f Nick Thomas
    TRACE("Size is %" PRIu64, *size);
338 b2e3d87f Nick Thomas
339 b2e3d87f Nick Thomas
    if (!name) {
340 b2e3d87f Nick Thomas
        if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) {
341 b2e3d87f Nick Thomas
            LOG("read failed (flags)");
342 b2e3d87f Nick Thomas
            errno = EINVAL;
343 b2e3d87f Nick Thomas
            return -1;
344 b2e3d87f Nick Thomas
        }
345 b2e3d87f Nick Thomas
        *flags = be32_to_cpup(flags);
346 b2e3d87f Nick Thomas
    } else {
347 b2e3d87f Nick Thomas
        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
348 b2e3d87f Nick Thomas
            LOG("read failed (tmp)");
349 b2e3d87f Nick Thomas
            errno = EINVAL;
350 b2e3d87f Nick Thomas
            return -1;
351 b2e3d87f Nick Thomas
        }
352 b2e3d87f Nick Thomas
        *flags |= be32_to_cpu(tmp);
353 b2e3d87f Nick Thomas
    }
354 b2e3d87f Nick Thomas
    if (read_sync(csock, &buf, 124) != 124) {
355 b2e3d87f Nick Thomas
        LOG("read failed (buf)");
356 b2e3d87f Nick Thomas
        errno = EINVAL;
357 b2e3d87f Nick Thomas
        return -1;
358 b2e3d87f Nick Thomas
    }
359 cd831bd7 ths
        return 0;
360 cd831bd7 ths
}
361 7a5ca864 bellard
362 b90fb4b8 Paolo Bonzini
#ifdef __linux__
363 b90fb4b8 Paolo Bonzini
int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
364 cd831bd7 ths
{
365 3e05c785 Chunyan Liu
    TRACE("Setting NBD socket");
366 3e05c785 Chunyan Liu
367 3e05c785 Chunyan Liu
    if (ioctl(fd, NBD_SET_SOCK, csock) == -1) {
368 3e05c785 Chunyan Liu
        int serrno = errno;
369 3e05c785 Chunyan Liu
        LOG("Failed to set NBD socket");
370 3e05c785 Chunyan Liu
        errno = serrno;
371 3e05c785 Chunyan Liu
        return -1;
372 3e05c785 Chunyan Liu
    }
373 3e05c785 Chunyan Liu
374 b2e3d87f Nick Thomas
    TRACE("Setting block size to %lu", (unsigned long)blocksize);
375 7a5ca864 bellard
376 b2e3d87f Nick Thomas
    if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) == -1) {
377 b2e3d87f Nick Thomas
        int serrno = errno;
378 b2e3d87f Nick Thomas
        LOG("Failed setting NBD block size");
379 b2e3d87f Nick Thomas
        errno = serrno;
380 b2e3d87f Nick Thomas
        return -1;
381 b2e3d87f Nick Thomas
    }
382 7a5ca864 bellard
383 0bfcd599 Blue Swirl
        TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize));
384 7a5ca864 bellard
385 b2e3d87f Nick Thomas
    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) == -1) {
386 b2e3d87f Nick Thomas
        int serrno = errno;
387 b2e3d87f Nick Thomas
        LOG("Failed setting size (in blocks)");
388 b2e3d87f Nick Thomas
        errno = serrno;
389 b2e3d87f Nick Thomas
        return -1;
390 b2e3d87f Nick Thomas
    }
391 7a5ca864 bellard
392 b90fb4b8 Paolo Bonzini
    if (flags & NBD_FLAG_READ_ONLY) {
393 b90fb4b8 Paolo Bonzini
        int read_only = 1;
394 b90fb4b8 Paolo Bonzini
        TRACE("Setting readonly attribute");
395 b90fb4b8 Paolo Bonzini
396 b90fb4b8 Paolo Bonzini
        if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
397 b90fb4b8 Paolo Bonzini
            int serrno = errno;
398 b90fb4b8 Paolo Bonzini
            LOG("Failed setting read-only attribute");
399 b90fb4b8 Paolo Bonzini
            errno = serrno;
400 b90fb4b8 Paolo Bonzini
            return -1;
401 b90fb4b8 Paolo Bonzini
        }
402 b90fb4b8 Paolo Bonzini
    }
403 b90fb4b8 Paolo Bonzini
404 973b3d0a Paolo Bonzini
    if (ioctl(fd, NBD_SET_FLAGS, flags) < 0
405 973b3d0a Paolo Bonzini
        && errno != ENOTTY) {
406 973b3d0a Paolo Bonzini
        int serrno = errno;
407 973b3d0a Paolo Bonzini
        LOG("Failed setting flags");
408 973b3d0a Paolo Bonzini
        errno = serrno;
409 973b3d0a Paolo Bonzini
        return -1;
410 973b3d0a Paolo Bonzini
    }
411 973b3d0a Paolo Bonzini
412 b2e3d87f Nick Thomas
    TRACE("Negotiation ended");
413 7a5ca864 bellard
414 b2e3d87f Nick Thomas
    return 0;
415 7a5ca864 bellard
}
416 7a5ca864 bellard
417 7a5ca864 bellard
int nbd_disconnect(int fd)
418 7a5ca864 bellard
{
419 b2e3d87f Nick Thomas
    ioctl(fd, NBD_CLEAR_QUE);
420 b2e3d87f Nick Thomas
    ioctl(fd, NBD_DISCONNECT);
421 b2e3d87f Nick Thomas
    ioctl(fd, NBD_CLEAR_SOCK);
422 b2e3d87f Nick Thomas
    return 0;
423 7a5ca864 bellard
}
424 7a5ca864 bellard
425 0a4eb864 Jes Sorensen
int nbd_client(int fd)
426 7a5ca864 bellard
{
427 b2e3d87f Nick Thomas
    int ret;
428 b2e3d87f Nick Thomas
    int serrno;
429 7a5ca864 bellard
430 b2e3d87f Nick Thomas
    TRACE("Doing NBD loop");
431 7a5ca864 bellard
432 b2e3d87f Nick Thomas
    ret = ioctl(fd, NBD_DO_IT);
433 74624688 Paolo Bonzini
    if (ret == -1 && errno == EPIPE) {
434 74624688 Paolo Bonzini
        /* NBD_DO_IT normally returns EPIPE when someone has disconnected
435 74624688 Paolo Bonzini
         * the socket via NBD_DISCONNECT.  We do not want to return 1 in
436 74624688 Paolo Bonzini
         * that case.
437 74624688 Paolo Bonzini
         */
438 74624688 Paolo Bonzini
        ret = 0;
439 74624688 Paolo Bonzini
    }
440 b2e3d87f Nick Thomas
    serrno = errno;
441 7a5ca864 bellard
442 b2e3d87f Nick Thomas
    TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
443 7a5ca864 bellard
444 b2e3d87f Nick Thomas
    TRACE("Clearing NBD queue");
445 b2e3d87f Nick Thomas
    ioctl(fd, NBD_CLEAR_QUE);
446 7a5ca864 bellard
447 b2e3d87f Nick Thomas
    TRACE("Clearing NBD socket");
448 b2e3d87f Nick Thomas
    ioctl(fd, NBD_CLEAR_SOCK);
449 7a5ca864 bellard
450 b2e3d87f Nick Thomas
    errno = serrno;
451 b2e3d87f Nick Thomas
    return ret;
452 7a5ca864 bellard
}
453 03ff3ca3 aliguori
#else
454 8e72506e Paolo Bonzini
int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
455 03ff3ca3 aliguori
{
456 03ff3ca3 aliguori
    errno = ENOTSUP;
457 03ff3ca3 aliguori
    return -1;
458 03ff3ca3 aliguori
}
459 03ff3ca3 aliguori
460 03ff3ca3 aliguori
int nbd_disconnect(int fd)
461 03ff3ca3 aliguori
{
462 03ff3ca3 aliguori
    errno = ENOTSUP;
463 03ff3ca3 aliguori
    return -1;
464 03ff3ca3 aliguori
}
465 03ff3ca3 aliguori
466 0a4eb864 Jes Sorensen
int nbd_client(int fd)
467 03ff3ca3 aliguori
{
468 03ff3ca3 aliguori
    errno = ENOTSUP;
469 03ff3ca3 aliguori
    return -1;
470 03ff3ca3 aliguori
}
471 03ff3ca3 aliguori
#endif
472 7a5ca864 bellard
473 75818250 ths
int nbd_send_request(int csock, struct nbd_request *request)
474 7a5ca864 bellard
{
475 b2e3d87f Nick Thomas
    uint8_t buf[4 + 4 + 8 + 8 + 4];
476 b2e3d87f Nick Thomas
477 b2e3d87f Nick Thomas
    cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
478 b2e3d87f Nick Thomas
    cpu_to_be32w((uint32_t*)(buf + 4), request->type);
479 b2e3d87f Nick Thomas
    cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
480 b2e3d87f Nick Thomas
    cpu_to_be64w((uint64_t*)(buf + 16), request->from);
481 b2e3d87f Nick Thomas
    cpu_to_be32w((uint32_t*)(buf + 24), request->len);
482 75818250 ths
483 b2e3d87f Nick Thomas
    TRACE("Sending request to client: "
484 b2e3d87f Nick Thomas
          "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
485 b2e3d87f Nick Thomas
          request->from, request->len, request->handle, request->type);
486 b2e3d87f Nick Thomas
487 b2e3d87f Nick Thomas
    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
488 b2e3d87f Nick Thomas
        LOG("writing to socket failed");
489 b2e3d87f Nick Thomas
        errno = EINVAL;
490 b2e3d87f Nick Thomas
        return -1;
491 b2e3d87f Nick Thomas
    }
492 b2e3d87f Nick Thomas
    return 0;
493 b2e3d87f Nick Thomas
}
494 75818250 ths
495 75818250 ths
static int nbd_receive_request(int csock, struct nbd_request *request)
496 75818250 ths
{
497 b2e3d87f Nick Thomas
    uint8_t buf[4 + 4 + 8 + 8 + 4];
498 b2e3d87f Nick Thomas
    uint32_t magic;
499 b2e3d87f Nick Thomas
500 b2e3d87f Nick Thomas
    if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
501 b2e3d87f Nick Thomas
        LOG("read failed");
502 b2e3d87f Nick Thomas
        errno = EINVAL;
503 b2e3d87f Nick Thomas
        return -1;
504 b2e3d87f Nick Thomas
    }
505 b2e3d87f Nick Thomas
506 b2e3d87f Nick Thomas
    /* Request
507 b2e3d87f Nick Thomas
       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
508 b2e3d87f Nick Thomas
       [ 4 ..  7]   type    (0 == READ, 1 == WRITE)
509 b2e3d87f Nick Thomas
       [ 8 .. 15]   handle
510 b2e3d87f Nick Thomas
       [16 .. 23]   from
511 b2e3d87f Nick Thomas
       [24 .. 27]   len
512 b2e3d87f Nick Thomas
     */
513 b2e3d87f Nick Thomas
514 b2e3d87f Nick Thomas
    magic = be32_to_cpup((uint32_t*)buf);
515 b2e3d87f Nick Thomas
    request->type  = be32_to_cpup((uint32_t*)(buf + 4));
516 b2e3d87f Nick Thomas
    request->handle = be64_to_cpup((uint64_t*)(buf + 8));
517 b2e3d87f Nick Thomas
    request->from  = be64_to_cpup((uint64_t*)(buf + 16));
518 b2e3d87f Nick Thomas
    request->len   = be32_to_cpup((uint32_t*)(buf + 24));
519 b2e3d87f Nick Thomas
520 b2e3d87f Nick Thomas
    TRACE("Got request: "
521 b2e3d87f Nick Thomas
          "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
522 b2e3d87f Nick Thomas
          magic, request->type, request->from, request->len);
523 b2e3d87f Nick Thomas
524 b2e3d87f Nick Thomas
    if (magic != NBD_REQUEST_MAGIC) {
525 b2e3d87f Nick Thomas
        LOG("invalid magic (got 0x%x)", magic);
526 b2e3d87f Nick Thomas
        errno = EINVAL;
527 b2e3d87f Nick Thomas
        return -1;
528 b2e3d87f Nick Thomas
    }
529 b2e3d87f Nick Thomas
    return 0;
530 75818250 ths
}
531 75818250 ths
532 75818250 ths
int nbd_receive_reply(int csock, struct nbd_reply *reply)
533 75818250 ths
{
534 b2e3d87f Nick Thomas
    uint8_t buf[NBD_REPLY_SIZE];
535 b2e3d87f Nick Thomas
    uint32_t magic;
536 b2e3d87f Nick Thomas
537 b2e3d87f Nick Thomas
    memset(buf, 0xAA, sizeof(buf));
538 b2e3d87f Nick Thomas
539 b2e3d87f Nick Thomas
    if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
540 b2e3d87f Nick Thomas
        LOG("read failed");
541 b2e3d87f Nick Thomas
        errno = EINVAL;
542 b2e3d87f Nick Thomas
        return -1;
543 b2e3d87f Nick Thomas
    }
544 b2e3d87f Nick Thomas
545 b2e3d87f Nick Thomas
    /* Reply
546 b2e3d87f Nick Thomas
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
547 b2e3d87f Nick Thomas
       [ 4 ..  7]    error   (0 == no error)
548 b2e3d87f Nick Thomas
       [ 7 .. 15]    handle
549 b2e3d87f Nick Thomas
     */
550 b2e3d87f Nick Thomas
551 b2e3d87f Nick Thomas
    magic = be32_to_cpup((uint32_t*)buf);
552 b2e3d87f Nick Thomas
    reply->error  = be32_to_cpup((uint32_t*)(buf + 4));
553 b2e3d87f Nick Thomas
    reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
554 b2e3d87f Nick Thomas
555 b2e3d87f Nick Thomas
    TRACE("Got reply: "
556 b2e3d87f Nick Thomas
          "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
557 b2e3d87f Nick Thomas
          magic, reply->error, reply->handle);
558 b2e3d87f Nick Thomas
559 b2e3d87f Nick Thomas
    if (magic != NBD_REPLY_MAGIC) {
560 b2e3d87f Nick Thomas
        LOG("invalid magic (got 0x%x)", magic);
561 b2e3d87f Nick Thomas
        errno = EINVAL;
562 b2e3d87f Nick Thomas
        return -1;
563 b2e3d87f Nick Thomas
    }
564 b2e3d87f Nick Thomas
    return 0;
565 75818250 ths
}
566 75818250 ths
567 75818250 ths
static int nbd_send_reply(int csock, struct nbd_reply *reply)
568 75818250 ths
{
569 b2e3d87f Nick Thomas
    uint8_t buf[4 + 4 + 8];
570 b2e3d87f Nick Thomas
571 b2e3d87f Nick Thomas
    /* Reply
572 b2e3d87f Nick Thomas
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
573 b2e3d87f Nick Thomas
       [ 4 ..  7]    error   (0 == no error)
574 b2e3d87f Nick Thomas
       [ 7 .. 15]    handle
575 b2e3d87f Nick Thomas
     */
576 b2e3d87f Nick Thomas
    cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
577 b2e3d87f Nick Thomas
    cpu_to_be32w((uint32_t*)(buf + 4), reply->error);
578 b2e3d87f Nick Thomas
    cpu_to_be64w((uint64_t*)(buf + 8), reply->handle);
579 b2e3d87f Nick Thomas
580 b2e3d87f Nick Thomas
    TRACE("Sending response to client");
581 b2e3d87f Nick Thomas
582 b2e3d87f Nick Thomas
    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
583 b2e3d87f Nick Thomas
        LOG("writing to socket failed");
584 b2e3d87f Nick Thomas
        errno = EINVAL;
585 b2e3d87f Nick Thomas
        return -1;
586 b2e3d87f Nick Thomas
    }
587 b2e3d87f Nick Thomas
    return 0;
588 75818250 ths
}
589 7a5ca864 bellard
590 41996e38 Paolo Bonzini
#define MAX_NBD_REQUESTS 16
591 41996e38 Paolo Bonzini
592 d9a73806 Paolo Bonzini
typedef struct NBDRequest NBDRequest;
593 d9a73806 Paolo Bonzini
594 d9a73806 Paolo Bonzini
struct NBDRequest {
595 d9a73806 Paolo Bonzini
    QSIMPLEQ_ENTRY(NBDRequest) entry;
596 72deddc5 Paolo Bonzini
    NBDClient *client;
597 d9a73806 Paolo Bonzini
    uint8_t *data;
598 d9a73806 Paolo Bonzini
};
599 d9a73806 Paolo Bonzini
600 af49bbbe Paolo Bonzini
struct NBDExport {
601 af49bbbe Paolo Bonzini
    BlockDriverState *bs;
602 af49bbbe Paolo Bonzini
    off_t dev_offset;
603 af49bbbe Paolo Bonzini
    off_t size;
604 af49bbbe Paolo Bonzini
    uint32_t nbdflags;
605 d9a73806 Paolo Bonzini
    QSIMPLEQ_HEAD(, NBDRequest) requests;
606 af49bbbe Paolo Bonzini
};
607 af49bbbe Paolo Bonzini
608 1743b515 Paolo Bonzini
struct NBDClient {
609 1743b515 Paolo Bonzini
    int refcount;
610 1743b515 Paolo Bonzini
    void (*close)(NBDClient *client);
611 1743b515 Paolo Bonzini
612 1743b515 Paolo Bonzini
    NBDExport *exp;
613 1743b515 Paolo Bonzini
    int sock;
614 262db388 Paolo Bonzini
615 262db388 Paolo Bonzini
    Coroutine *recv_coroutine;
616 262db388 Paolo Bonzini
617 262db388 Paolo Bonzini
    CoMutex send_lock;
618 262db388 Paolo Bonzini
    Coroutine *send_coroutine;
619 41996e38 Paolo Bonzini
620 41996e38 Paolo Bonzini
    int nb_requests;
621 1743b515 Paolo Bonzini
};
622 1743b515 Paolo Bonzini
623 1743b515 Paolo Bonzini
static void nbd_client_get(NBDClient *client)
624 1743b515 Paolo Bonzini
{
625 1743b515 Paolo Bonzini
    client->refcount++;
626 1743b515 Paolo Bonzini
}
627 1743b515 Paolo Bonzini
628 1743b515 Paolo Bonzini
static void nbd_client_put(NBDClient *client)
629 1743b515 Paolo Bonzini
{
630 1743b515 Paolo Bonzini
    if (--client->refcount == 0) {
631 1743b515 Paolo Bonzini
        g_free(client);
632 1743b515 Paolo Bonzini
    }
633 1743b515 Paolo Bonzini
}
634 1743b515 Paolo Bonzini
635 1743b515 Paolo Bonzini
static void nbd_client_close(NBDClient *client)
636 1743b515 Paolo Bonzini
{
637 1743b515 Paolo Bonzini
    qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
638 1743b515 Paolo Bonzini
    close(client->sock);
639 1743b515 Paolo Bonzini
    client->sock = -1;
640 1743b515 Paolo Bonzini
    if (client->close) {
641 1743b515 Paolo Bonzini
        client->close(client);
642 1743b515 Paolo Bonzini
    }
643 1743b515 Paolo Bonzini
    nbd_client_put(client);
644 1743b515 Paolo Bonzini
}
645 1743b515 Paolo Bonzini
646 72deddc5 Paolo Bonzini
static NBDRequest *nbd_request_get(NBDClient *client)
647 d9a73806 Paolo Bonzini
{
648 d9a73806 Paolo Bonzini
    NBDRequest *req;
649 72deddc5 Paolo Bonzini
    NBDExport *exp = client->exp;
650 72deddc5 Paolo Bonzini
651 41996e38 Paolo Bonzini
    assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
652 41996e38 Paolo Bonzini
    client->nb_requests++;
653 41996e38 Paolo Bonzini
654 d9a73806 Paolo Bonzini
    if (QSIMPLEQ_EMPTY(&exp->requests)) {
655 d9a73806 Paolo Bonzini
        req = g_malloc0(sizeof(NBDRequest));
656 d9a73806 Paolo Bonzini
        req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE);
657 d9a73806 Paolo Bonzini
    } else {
658 d9a73806 Paolo Bonzini
        req = QSIMPLEQ_FIRST(&exp->requests);
659 d9a73806 Paolo Bonzini
        QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
660 d9a73806 Paolo Bonzini
    }
661 72deddc5 Paolo Bonzini
    nbd_client_get(client);
662 72deddc5 Paolo Bonzini
    req->client = client;
663 d9a73806 Paolo Bonzini
    return req;
664 d9a73806 Paolo Bonzini
}
665 d9a73806 Paolo Bonzini
666 72deddc5 Paolo Bonzini
static void nbd_request_put(NBDRequest *req)
667 d9a73806 Paolo Bonzini
{
668 72deddc5 Paolo Bonzini
    NBDClient *client = req->client;
669 72deddc5 Paolo Bonzini
    QSIMPLEQ_INSERT_HEAD(&client->exp->requests, req, entry);
670 41996e38 Paolo Bonzini
    if (client->nb_requests-- == MAX_NBD_REQUESTS) {
671 41996e38 Paolo Bonzini
        qemu_notify_event();
672 41996e38 Paolo Bonzini
    }
673 72deddc5 Paolo Bonzini
    nbd_client_put(client);
674 d9a73806 Paolo Bonzini
}
675 d9a73806 Paolo Bonzini
676 af49bbbe Paolo Bonzini
NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
677 af49bbbe Paolo Bonzini
                          off_t size, uint32_t nbdflags)
678 af49bbbe Paolo Bonzini
{
679 af49bbbe Paolo Bonzini
    NBDExport *exp = g_malloc0(sizeof(NBDExport));
680 d9a73806 Paolo Bonzini
    QSIMPLEQ_INIT(&exp->requests);
681 af49bbbe Paolo Bonzini
    exp->bs = bs;
682 af49bbbe Paolo Bonzini
    exp->dev_offset = dev_offset;
683 af49bbbe Paolo Bonzini
    exp->nbdflags = nbdflags;
684 af49bbbe Paolo Bonzini
    exp->size = size == -1 ? exp->bs->total_sectors * 512 : size;
685 af49bbbe Paolo Bonzini
    return exp;
686 af49bbbe Paolo Bonzini
}
687 af49bbbe Paolo Bonzini
688 af49bbbe Paolo Bonzini
void nbd_export_close(NBDExport *exp)
689 af49bbbe Paolo Bonzini
{
690 d9a73806 Paolo Bonzini
    while (!QSIMPLEQ_EMPTY(&exp->requests)) {
691 d9a73806 Paolo Bonzini
        NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
692 d9a73806 Paolo Bonzini
        QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
693 d9a73806 Paolo Bonzini
        qemu_vfree(first->data);
694 d9a73806 Paolo Bonzini
        g_free(first);
695 d9a73806 Paolo Bonzini
    }
696 d9a73806 Paolo Bonzini
697 af49bbbe Paolo Bonzini
    bdrv_close(exp->bs);
698 af49bbbe Paolo Bonzini
    g_free(exp);
699 af49bbbe Paolo Bonzini
}
700 af49bbbe Paolo Bonzini
701 41996e38 Paolo Bonzini
static int nbd_can_read(void *opaque);
702 262db388 Paolo Bonzini
static void nbd_read(void *opaque);
703 262db388 Paolo Bonzini
static void nbd_restart_write(void *opaque);
704 262db388 Paolo Bonzini
705 262db388 Paolo Bonzini
static int nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
706 72deddc5 Paolo Bonzini
                             int len)
707 22045592 Paolo Bonzini
{
708 72deddc5 Paolo Bonzini
    NBDClient *client = req->client;
709 72deddc5 Paolo Bonzini
    int csock = client->sock;
710 22045592 Paolo Bonzini
    int rc, ret;
711 22045592 Paolo Bonzini
712 262db388 Paolo Bonzini
    qemu_co_mutex_lock(&client->send_lock);
713 41996e38 Paolo Bonzini
    qemu_set_fd_handler2(csock, nbd_can_read, nbd_read,
714 41996e38 Paolo Bonzini
                         nbd_restart_write, client);
715 262db388 Paolo Bonzini
    client->send_coroutine = qemu_coroutine_self();
716 262db388 Paolo Bonzini
717 22045592 Paolo Bonzini
    if (!len) {
718 22045592 Paolo Bonzini
        rc = nbd_send_reply(csock, reply);
719 22045592 Paolo Bonzini
        if (rc == -1) {
720 22045592 Paolo Bonzini
            rc = -errno;
721 22045592 Paolo Bonzini
        }
722 22045592 Paolo Bonzini
    } else {
723 22045592 Paolo Bonzini
        socket_set_cork(csock, 1);
724 22045592 Paolo Bonzini
        rc = nbd_send_reply(csock, reply);
725 22045592 Paolo Bonzini
        if (rc != -1) {
726 262db388 Paolo Bonzini
            ret = qemu_co_send(csock, req->data, len);
727 22045592 Paolo Bonzini
            if (ret != len) {
728 22045592 Paolo Bonzini
                errno = EIO;
729 22045592 Paolo Bonzini
                rc = -1;
730 22045592 Paolo Bonzini
            }
731 22045592 Paolo Bonzini
        }
732 22045592 Paolo Bonzini
        if (rc == -1) {
733 22045592 Paolo Bonzini
            rc = -errno;
734 22045592 Paolo Bonzini
        }
735 22045592 Paolo Bonzini
        socket_set_cork(csock, 0);
736 22045592 Paolo Bonzini
    }
737 262db388 Paolo Bonzini
738 262db388 Paolo Bonzini
    client->send_coroutine = NULL;
739 41996e38 Paolo Bonzini
    qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
740 262db388 Paolo Bonzini
    qemu_co_mutex_unlock(&client->send_lock);
741 22045592 Paolo Bonzini
    return rc;
742 22045592 Paolo Bonzini
}
743 22045592 Paolo Bonzini
744 262db388 Paolo Bonzini
static int nbd_co_receive_request(NBDRequest *req, struct nbd_request *request)
745 a030b347 Paolo Bonzini
{
746 72deddc5 Paolo Bonzini
    NBDClient *client = req->client;
747 72deddc5 Paolo Bonzini
    int csock = client->sock;
748 a030b347 Paolo Bonzini
    int rc;
749 a030b347 Paolo Bonzini
750 262db388 Paolo Bonzini
    client->recv_coroutine = qemu_coroutine_self();
751 a030b347 Paolo Bonzini
    if (nbd_receive_request(csock, request) == -1) {
752 a030b347 Paolo Bonzini
        rc = -EIO;
753 a030b347 Paolo Bonzini
        goto out;
754 a030b347 Paolo Bonzini
    }
755 a030b347 Paolo Bonzini
756 a030b347 Paolo Bonzini
    if (request->len > NBD_BUFFER_SIZE) {
757 a030b347 Paolo Bonzini
        LOG("len (%u) is larger than max len (%u)",
758 a030b347 Paolo Bonzini
            request->len, NBD_BUFFER_SIZE);
759 a030b347 Paolo Bonzini
        rc = -EINVAL;
760 a030b347 Paolo Bonzini
        goto out;
761 a030b347 Paolo Bonzini
    }
762 a030b347 Paolo Bonzini
763 a030b347 Paolo Bonzini
    if ((request->from + request->len) < request->from) {
764 a030b347 Paolo Bonzini
        LOG("integer overflow detected! "
765 a030b347 Paolo Bonzini
            "you're probably being attacked");
766 a030b347 Paolo Bonzini
        rc = -EINVAL;
767 a030b347 Paolo Bonzini
        goto out;
768 a030b347 Paolo Bonzini
    }
769 a030b347 Paolo Bonzini
770 a030b347 Paolo Bonzini
    TRACE("Decoding type");
771 a030b347 Paolo Bonzini
772 a030b347 Paolo Bonzini
    if ((request->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
773 a030b347 Paolo Bonzini
        TRACE("Reading %u byte(s)", request->len);
774 a030b347 Paolo Bonzini
775 262db388 Paolo Bonzini
        if (qemu_co_recv(csock, req->data, request->len) != request->len) {
776 a030b347 Paolo Bonzini
            LOG("reading from socket failed");
777 a030b347 Paolo Bonzini
            rc = -EIO;
778 a030b347 Paolo Bonzini
            goto out;
779 a030b347 Paolo Bonzini
        }
780 a030b347 Paolo Bonzini
    }
781 a030b347 Paolo Bonzini
    rc = 0;
782 a030b347 Paolo Bonzini
783 a030b347 Paolo Bonzini
out:
784 262db388 Paolo Bonzini
    client->recv_coroutine = NULL;
785 a030b347 Paolo Bonzini
    return rc;
786 a030b347 Paolo Bonzini
}
787 a030b347 Paolo Bonzini
788 262db388 Paolo Bonzini
static void nbd_trip(void *opaque)
789 75818250 ths
{
790 262db388 Paolo Bonzini
    NBDClient *client = opaque;
791 72deddc5 Paolo Bonzini
    NBDRequest *req = nbd_request_get(client);
792 1743b515 Paolo Bonzini
    NBDExport *exp = client->exp;
793 b2e3d87f Nick Thomas
    struct nbd_request request;
794 b2e3d87f Nick Thomas
    struct nbd_reply reply;
795 adcf6302 Paolo Bonzini
    int ret;
796 b2e3d87f Nick Thomas
797 b2e3d87f Nick Thomas
    TRACE("Reading request.");
798 b2e3d87f Nick Thomas
799 262db388 Paolo Bonzini
    ret = nbd_co_receive_request(req, &request);
800 a030b347 Paolo Bonzini
    if (ret == -EIO) {
801 d9a73806 Paolo Bonzini
        goto out;
802 a030b347 Paolo Bonzini
    }
803 b2e3d87f Nick Thomas
804 fae69416 Paolo Bonzini
    reply.handle = request.handle;
805 fae69416 Paolo Bonzini
    reply.error = 0;
806 fae69416 Paolo Bonzini
807 a030b347 Paolo Bonzini
    if (ret < 0) {
808 a030b347 Paolo Bonzini
        reply.error = -ret;
809 a030b347 Paolo Bonzini
        goto error_reply;
810 b2e3d87f Nick Thomas
    }
811 b2e3d87f Nick Thomas
812 af49bbbe Paolo Bonzini
    if ((request.from + request.len) > exp->size) {
813 b2e3d87f Nick Thomas
            LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
814 b2e3d87f Nick Thomas
            ", Offset: %" PRIu64 "\n",
815 af49bbbe Paolo Bonzini
                    request.from, request.len,
816 af49bbbe Paolo Bonzini
                    (uint64_t)exp->size, exp->dev_offset);
817 b2e3d87f Nick Thomas
        LOG("requested operation past EOF--bad client?");
818 fae69416 Paolo Bonzini
        goto invalid_request;
819 b2e3d87f Nick Thomas
    }
820 b2e3d87f Nick Thomas
821 2c7989a9 Paolo Bonzini
    switch (request.type & NBD_CMD_MASK_COMMAND) {
822 b2e3d87f Nick Thomas
    case NBD_CMD_READ:
823 b2e3d87f Nick Thomas
        TRACE("Request type is READ");
824 b2e3d87f Nick Thomas
825 af49bbbe Paolo Bonzini
        ret = bdrv_read(exp->bs, (request.from + exp->dev_offset) / 512,
826 d9a73806 Paolo Bonzini
                        req->data, request.len / 512);
827 adcf6302 Paolo Bonzini
        if (ret < 0) {
828 b2e3d87f Nick Thomas
            LOG("reading from file failed");
829 adcf6302 Paolo Bonzini
            reply.error = -ret;
830 fae69416 Paolo Bonzini
            goto error_reply;
831 b2e3d87f Nick Thomas
        }
832 b2e3d87f Nick Thomas
833 b2e3d87f Nick Thomas
        TRACE("Read %u byte(s)", request.len);
834 262db388 Paolo Bonzini
        if (nbd_co_send_reply(req, &reply, request.len) < 0)
835 d9a73806 Paolo Bonzini
            goto out;
836 b2e3d87f Nick Thomas
        break;
837 b2e3d87f Nick Thomas
    case NBD_CMD_WRITE:
838 b2e3d87f Nick Thomas
        TRACE("Request type is WRITE");
839 b2e3d87f Nick Thomas
840 af49bbbe Paolo Bonzini
        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
841 b2e3d87f Nick Thomas
            TRACE("Server is read-only, return error");
842 fae69416 Paolo Bonzini
            reply.error = EROFS;
843 fae69416 Paolo Bonzini
            goto error_reply;
844 fae69416 Paolo Bonzini
        }
845 fae69416 Paolo Bonzini
846 fae69416 Paolo Bonzini
        TRACE("Writing to device");
847 fae69416 Paolo Bonzini
848 af49bbbe Paolo Bonzini
        ret = bdrv_write(exp->bs, (request.from + exp->dev_offset) / 512,
849 d9a73806 Paolo Bonzini
                         req->data, request.len / 512);
850 fae69416 Paolo Bonzini
        if (ret < 0) {
851 fae69416 Paolo Bonzini
            LOG("writing to file failed");
852 fae69416 Paolo Bonzini
            reply.error = -ret;
853 fae69416 Paolo Bonzini
            goto error_reply;
854 fae69416 Paolo Bonzini
        }
855 b2e3d87f Nick Thomas
856 fae69416 Paolo Bonzini
        if (request.type & NBD_CMD_FLAG_FUA) {
857 262db388 Paolo Bonzini
            ret = bdrv_co_flush(exp->bs);
858 adcf6302 Paolo Bonzini
            if (ret < 0) {
859 fae69416 Paolo Bonzini
                LOG("flush failed");
860 adcf6302 Paolo Bonzini
                reply.error = -ret;
861 fae69416 Paolo Bonzini
                goto error_reply;
862 2c7989a9 Paolo Bonzini
            }
863 b2e3d87f Nick Thomas
        }
864 b2e3d87f Nick Thomas
865 262db388 Paolo Bonzini
        if (nbd_co_send_reply(req, &reply, 0) < 0)
866 d9a73806 Paolo Bonzini
            goto out;
867 b2e3d87f Nick Thomas
        break;
868 b2e3d87f Nick Thomas
    case NBD_CMD_DISC:
869 b2e3d87f Nick Thomas
        TRACE("Request type is DISCONNECT");
870 b2e3d87f Nick Thomas
        errno = 0;
871 262db388 Paolo Bonzini
        goto out;
872 1486d04a Paolo Bonzini
    case NBD_CMD_FLUSH:
873 1486d04a Paolo Bonzini
        TRACE("Request type is FLUSH");
874 1486d04a Paolo Bonzini
875 262db388 Paolo Bonzini
        ret = bdrv_co_flush(exp->bs);
876 1486d04a Paolo Bonzini
        if (ret < 0) {
877 1486d04a Paolo Bonzini
            LOG("flush failed");
878 1486d04a Paolo Bonzini
            reply.error = -ret;
879 1486d04a Paolo Bonzini
        }
880 1486d04a Paolo Bonzini
881 262db388 Paolo Bonzini
        if (nbd_co_send_reply(req, &reply, 0) < 0)
882 d9a73806 Paolo Bonzini
            goto out;
883 1486d04a Paolo Bonzini
        break;
884 7a706633 Paolo Bonzini
    case NBD_CMD_TRIM:
885 7a706633 Paolo Bonzini
        TRACE("Request type is TRIM");
886 262db388 Paolo Bonzini
        ret = bdrv_co_discard(exp->bs, (request.from + exp->dev_offset) / 512,
887 262db388 Paolo Bonzini
                              request.len / 512);
888 7a706633 Paolo Bonzini
        if (ret < 0) {
889 7a706633 Paolo Bonzini
            LOG("discard failed");
890 7a706633 Paolo Bonzini
            reply.error = -ret;
891 7a706633 Paolo Bonzini
        }
892 262db388 Paolo Bonzini
        if (nbd_co_send_reply(req, &reply, 0) < 0)
893 d9a73806 Paolo Bonzini
            goto out;
894 7a706633 Paolo Bonzini
        break;
895 b2e3d87f Nick Thomas
    default:
896 b2e3d87f Nick Thomas
        LOG("invalid request type (%u) received", request.type);
897 fae69416 Paolo Bonzini
    invalid_request:
898 fae69416 Paolo Bonzini
        reply.error = -EINVAL;
899 fae69416 Paolo Bonzini
    error_reply:
900 262db388 Paolo Bonzini
        if (nbd_co_send_reply(req, &reply, 0) == -1)
901 d9a73806 Paolo Bonzini
            goto out;
902 fae69416 Paolo Bonzini
        break;
903 b2e3d87f Nick Thomas
    }
904 b2e3d87f Nick Thomas
905 b2e3d87f Nick Thomas
    TRACE("Request/Reply complete");
906 b2e3d87f Nick Thomas
907 262db388 Paolo Bonzini
    nbd_request_put(req);
908 262db388 Paolo Bonzini
    return;
909 262db388 Paolo Bonzini
910 d9a73806 Paolo Bonzini
out:
911 72deddc5 Paolo Bonzini
    nbd_request_put(req);
912 262db388 Paolo Bonzini
    nbd_client_close(client);
913 7a5ca864 bellard
}
914 af49bbbe Paolo Bonzini
915 41996e38 Paolo Bonzini
static int nbd_can_read(void *opaque)
916 41996e38 Paolo Bonzini
{
917 41996e38 Paolo Bonzini
    NBDClient *client = opaque;
918 41996e38 Paolo Bonzini
919 41996e38 Paolo Bonzini
    return client->recv_coroutine || client->nb_requests < MAX_NBD_REQUESTS;
920 41996e38 Paolo Bonzini
}
921 41996e38 Paolo Bonzini
922 1743b515 Paolo Bonzini
static void nbd_read(void *opaque)
923 1743b515 Paolo Bonzini
{
924 1743b515 Paolo Bonzini
    NBDClient *client = opaque;
925 1743b515 Paolo Bonzini
926 262db388 Paolo Bonzini
    if (client->recv_coroutine) {
927 262db388 Paolo Bonzini
        qemu_coroutine_enter(client->recv_coroutine, NULL);
928 262db388 Paolo Bonzini
    } else {
929 262db388 Paolo Bonzini
        qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client);
930 1743b515 Paolo Bonzini
    }
931 1743b515 Paolo Bonzini
}
932 1743b515 Paolo Bonzini
933 262db388 Paolo Bonzini
static void nbd_restart_write(void *opaque)
934 262db388 Paolo Bonzini
{
935 262db388 Paolo Bonzini
    NBDClient *client = opaque;
936 262db388 Paolo Bonzini
937 262db388 Paolo Bonzini
    qemu_coroutine_enter(client->send_coroutine, NULL);
938 262db388 Paolo Bonzini
}
939 262db388 Paolo Bonzini
940 1743b515 Paolo Bonzini
NBDClient *nbd_client_new(NBDExport *exp, int csock,
941 1743b515 Paolo Bonzini
                          void (*close)(NBDClient *))
942 af49bbbe Paolo Bonzini
{
943 1743b515 Paolo Bonzini
    NBDClient *client;
944 1743b515 Paolo Bonzini
    if (nbd_send_negotiate(csock, exp->size, exp->nbdflags) == -1) {
945 1743b515 Paolo Bonzini
        return NULL;
946 1743b515 Paolo Bonzini
    }
947 1743b515 Paolo Bonzini
    client = g_malloc0(sizeof(NBDClient));
948 1743b515 Paolo Bonzini
    client->refcount = 1;
949 1743b515 Paolo Bonzini
    client->exp = exp;
950 1743b515 Paolo Bonzini
    client->sock = csock;
951 1743b515 Paolo Bonzini
    client->close = close;
952 262db388 Paolo Bonzini
    qemu_co_mutex_init(&client->send_lock);
953 41996e38 Paolo Bonzini
    qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
954 1743b515 Paolo Bonzini
    return client;
955 af49bbbe Paolo Bonzini
}