Statistics
| Branch: | Revision:

root / nbd.c @ 74624688

History | View | Annotate | Download (17.9 kB)

1
/*
2
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
3
 *
4
 *  Network Block Device
5
 *
6
 *  This program is free software; you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation; under version 2 of the License.
9
 *
10
 *  This program is distributed in the hope that it will be useful,
11
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 *  GNU General Public License for more details.
14
 *
15
 *  You should have received a copy of the GNU General Public License
16
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
17
 */
18

    
19
#include "nbd.h"
20
#include "block.h"
21

    
22
#include <errno.h>
23
#include <string.h>
24
#ifndef _WIN32
25
#include <sys/ioctl.h>
26
#endif
27
#if defined(__sun__) || defined(__HAIKU__)
28
#include <sys/ioccom.h>
29
#endif
30
#include <ctype.h>
31
#include <inttypes.h>
32

    
33
#ifdef __linux__
34
#include <linux/fs.h>
35
#endif
36

    
37
#include "qemu_socket.h"
38

    
39
//#define DEBUG_NBD
40

    
41
#ifdef DEBUG_NBD
42
#define TRACE(msg, ...) do { \
43
    LOG(msg, ## __VA_ARGS__); \
44
} while(0)
45
#else
46
#define TRACE(msg, ...) \
47
    do { } while (0)
48
#endif
49

    
50
#define LOG(msg, ...) do { \
51
    fprintf(stderr, "%s:%s():L%d: " msg "\n", \
52
            __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
53
} while(0)
54

    
55
/* This is all part of the "official" NBD API */
56

    
57
#define NBD_REPLY_SIZE          (4 + 4 + 8)
58
#define NBD_REQUEST_MAGIC       0x25609513
59
#define NBD_REPLY_MAGIC         0x67446698
60

    
61
#define NBD_SET_SOCK            _IO(0xab, 0)
62
#define NBD_SET_BLKSIZE         _IO(0xab, 1)
63
#define NBD_SET_SIZE            _IO(0xab, 2)
64
#define NBD_DO_IT               _IO(0xab, 3)
65
#define NBD_CLEAR_SOCK          _IO(0xab, 4)
66
#define NBD_CLEAR_QUE           _IO(0xab, 5)
67
#define NBD_PRINT_DEBUG         _IO(0xab, 6)
68
#define NBD_SET_SIZE_BLOCKS     _IO(0xab, 7)
69
#define NBD_DISCONNECT          _IO(0xab, 8)
70
#define NBD_SET_TIMEOUT         _IO(0xab, 9)
71
#define NBD_SET_FLAGS           _IO(0xab, 10)
72

    
73
#define NBD_OPT_EXPORT_NAME     (1 << 0)
74

    
75
/* That's all folks */
76

    
77
#define read_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, true)
78
#define write_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, false)
79

    
80
size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
81
{
82
    size_t offset = 0;
83

    
84
    while (offset < size) {
85
        ssize_t len;
86

    
87
        if (do_read) {
88
            len = qemu_recv(fd, buffer + offset, size - offset, 0);
89
        } else {
90
            len = send(fd, buffer + offset, size - offset, 0);
91
        }
92

    
93
        if (len == -1)
94
            errno = socket_error();
95

    
96
        /* recoverable error */
97
        if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
98
            continue;
99
        }
100

    
101
        /* eof */
102
        if (len == 0) {
103
            break;
104
        }
105

    
106
        /* unrecoverable error */
107
        if (len == -1) {
108
            return 0;
109
        }
110

    
111
        offset += len;
112
    }
113

    
114
    return offset;
115
}
116

    
117
static void combine_addr(char *buf, size_t len, const char* address,
118
                         uint16_t port)
119
{
120
    /* If the address-part contains a colon, it's an IPv6 IP so needs [] */
121
    if (strstr(address, ":")) {
122
        snprintf(buf, len, "[%s]:%u", address, port);
123
    } else {
124
        snprintf(buf, len, "%s:%u", address, port);
125
    }
126
}
127

    
128
int tcp_socket_outgoing(const char *address, uint16_t port)
129
{
130
    char address_and_port[128];
131
    combine_addr(address_and_port, 128, address, port);
132
    return tcp_socket_outgoing_spec(address_and_port);
133
}
134

    
135
int tcp_socket_outgoing_spec(const char *address_and_port)
136
{
137
    return inet_connect(address_and_port, SOCK_STREAM);
138
}
139

    
140
int tcp_socket_incoming(const char *address, uint16_t port)
141
{
142
    char address_and_port[128];
143
    combine_addr(address_and_port, 128, address, port);
144
    return tcp_socket_incoming_spec(address_and_port);
145
}
146

    
147
int tcp_socket_incoming_spec(const char *address_and_port)
148
{
149
    char *ostr  = NULL;
150
    int olen = 0;
151
    return inet_listen(address_and_port, ostr, olen, SOCK_STREAM, 0);
152
}
153

    
154
int unix_socket_incoming(const char *path)
155
{
156
    char *ostr = NULL;
157
    int olen = 0;
158

    
159
    return unix_listen(path, ostr, olen);
160
}
161

    
162
int unix_socket_outgoing(const char *path)
163
{
164
    return unix_connect(path);
165
}
166

    
167
/* Basic flow
168

169
   Server         Client
170

171
   Negotiate
172
                  Request
173
   Response
174
                  Request
175
   Response
176
                  ...
177
   ...
178
                  Request (type == 2)
179
*/
180

    
181
int nbd_negotiate(int csock, off_t size, uint32_t flags)
182
{
183
    char buf[8 + 8 + 8 + 128];
184

    
185
    /* Negotiate
186
        [ 0 ..   7]   passwd   ("NBDMAGIC")
187
        [ 8 ..  15]   magic    (0x00420281861253)
188
        [16 ..  23]   size
189
        [24 ..  27]   flags
190
        [28 .. 151]   reserved (0)
191
     */
192

    
193
    TRACE("Beginning negotiation.");
194
    memcpy(buf, "NBDMAGIC", 8);
195
    cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
196
    cpu_to_be64w((uint64_t*)(buf + 16), size);
197
    cpu_to_be32w((uint32_t*)(buf + 24), flags | NBD_FLAG_HAS_FLAGS);
198
    memset(buf + 28, 0, 124);
199

    
200
    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
201
        LOG("write failed");
202
        errno = EINVAL;
203
        return -1;
204
    }
205

    
206
    TRACE("Negotation succeeded.");
207

    
208
    return 0;
209
}
210

    
211
int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
212
                          off_t *size, size_t *blocksize)
213
{
214
    char buf[256];
215
    uint64_t magic, s;
216
    uint16_t tmp;
217

    
218
    TRACE("Receiving negotation.");
219

    
220
    if (read_sync(csock, buf, 8) != 8) {
221
        LOG("read failed");
222
        errno = EINVAL;
223
        return -1;
224
    }
225

    
226
    buf[8] = '\0';
227
    if (strlen(buf) == 0) {
228
        LOG("server connection closed");
229
        errno = EINVAL;
230
        return -1;
231
    }
232

    
233
    TRACE("Magic is %c%c%c%c%c%c%c%c",
234
          qemu_isprint(buf[0]) ? buf[0] : '.',
235
          qemu_isprint(buf[1]) ? buf[1] : '.',
236
          qemu_isprint(buf[2]) ? buf[2] : '.',
237
          qemu_isprint(buf[3]) ? buf[3] : '.',
238
          qemu_isprint(buf[4]) ? buf[4] : '.',
239
          qemu_isprint(buf[5]) ? buf[5] : '.',
240
          qemu_isprint(buf[6]) ? buf[6] : '.',
241
          qemu_isprint(buf[7]) ? buf[7] : '.');
242

    
243
    if (memcmp(buf, "NBDMAGIC", 8) != 0) {
244
        LOG("Invalid magic received");
245
        errno = EINVAL;
246
        return -1;
247
    }
248

    
249
    if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
250
        LOG("read failed");
251
        errno = EINVAL;
252
        return -1;
253
    }
254
    magic = be64_to_cpu(magic);
255
    TRACE("Magic is 0x%" PRIx64, magic);
256

    
257
    if (name) {
258
        uint32_t reserved = 0;
259
        uint32_t opt;
260
        uint32_t namesize;
261

    
262
        TRACE("Checking magic (opts_magic)");
263
        if (magic != 0x49484156454F5054LL) {
264
            LOG("Bad magic received");
265
            errno = EINVAL;
266
            return -1;
267
        }
268
        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
269
            LOG("flags read failed");
270
            errno = EINVAL;
271
            return -1;
272
        }
273
        *flags = be16_to_cpu(tmp) << 16;
274
        /* reserved for future use */
275
        if (write_sync(csock, &reserved, sizeof(reserved)) !=
276
            sizeof(reserved)) {
277
            LOG("write failed (reserved)");
278
            errno = EINVAL;
279
            return -1;
280
        }
281
        /* write the export name */
282
        magic = cpu_to_be64(magic);
283
        if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
284
            LOG("write failed (magic)");
285
            errno = EINVAL;
286
            return -1;
287
        }
288
        opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
289
        if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
290
            LOG("write failed (opt)");
291
            errno = EINVAL;
292
            return -1;
293
        }
294
        namesize = cpu_to_be32(strlen(name));
295
        if (write_sync(csock, &namesize, sizeof(namesize)) !=
296
            sizeof(namesize)) {
297
            LOG("write failed (namesize)");
298
            errno = EINVAL;
299
            return -1;
300
        }
301
        if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) {
302
            LOG("write failed (name)");
303
            errno = EINVAL;
304
            return -1;
305
        }
306
    } else {
307
        TRACE("Checking magic (cli_magic)");
308

    
309
        if (magic != 0x00420281861253LL) {
310
            LOG("Bad magic received");
311
            errno = EINVAL;
312
            return -1;
313
        }
314
    }
315

    
316
    if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) {
317
        LOG("read failed");
318
        errno = EINVAL;
319
        return -1;
320
    }
321
    *size = be64_to_cpu(s);
322
    *blocksize = 1024;
323
    TRACE("Size is %" PRIu64, *size);
324

    
325
    if (!name) {
326
        if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) {
327
            LOG("read failed (flags)");
328
            errno = EINVAL;
329
            return -1;
330
        }
331
        *flags = be32_to_cpup(flags);
332
    } else {
333
        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
334
            LOG("read failed (tmp)");
335
            errno = EINVAL;
336
            return -1;
337
        }
338
        *flags |= be32_to_cpu(tmp);
339
    }
340
    if (read_sync(csock, &buf, 124) != 124) {
341
        LOG("read failed (buf)");
342
        errno = EINVAL;
343
        return -1;
344
    }
345
        return 0;
346
}
347

    
348
#ifdef __linux__
349
int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
350
{
351
    TRACE("Setting block size to %lu", (unsigned long)blocksize);
352

    
353
    if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) == -1) {
354
        int serrno = errno;
355
        LOG("Failed setting NBD block size");
356
        errno = serrno;
357
        return -1;
358
    }
359

    
360
        TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize));
361

    
362
    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) == -1) {
363
        int serrno = errno;
364
        LOG("Failed setting size (in blocks)");
365
        errno = serrno;
366
        return -1;
367
    }
368

    
369
    if (flags & NBD_FLAG_READ_ONLY) {
370
        int read_only = 1;
371
        TRACE("Setting readonly attribute");
372

    
373
        if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
374
            int serrno = errno;
375
            LOG("Failed setting read-only attribute");
376
            errno = serrno;
377
            return -1;
378
        }
379
    }
380

    
381
    if (ioctl(fd, NBD_SET_FLAGS, flags) < 0
382
        && errno != ENOTTY) {
383
        int serrno = errno;
384
        LOG("Failed setting flags");
385
        errno = serrno;
386
        return -1;
387
    }
388

    
389
    TRACE("Clearing NBD socket");
390

    
391
    if (ioctl(fd, NBD_CLEAR_SOCK) == -1) {
392
        int serrno = errno;
393
        LOG("Failed clearing NBD socket");
394
        errno = serrno;
395
        return -1;
396
    }
397

    
398
    TRACE("Setting NBD socket");
399

    
400
    if (ioctl(fd, NBD_SET_SOCK, csock) == -1) {
401
        int serrno = errno;
402
        LOG("Failed to set NBD socket");
403
        errno = serrno;
404
        return -1;
405
    }
406

    
407
    TRACE("Negotiation ended");
408

    
409
    return 0;
410
}
411

    
412
int nbd_disconnect(int fd)
413
{
414
    ioctl(fd, NBD_CLEAR_QUE);
415
    ioctl(fd, NBD_DISCONNECT);
416
    ioctl(fd, NBD_CLEAR_SOCK);
417
    return 0;
418
}
419

    
420
int nbd_client(int fd)
421
{
422
    int ret;
423
    int serrno;
424

    
425
    TRACE("Doing NBD loop");
426

    
427
    ret = ioctl(fd, NBD_DO_IT);
428
    if (ret == -1 && errno == EPIPE) {
429
        /* NBD_DO_IT normally returns EPIPE when someone has disconnected
430
         * the socket via NBD_DISCONNECT.  We do not want to return 1 in
431
         * that case.
432
         */
433
        ret = 0;
434
    }
435
    serrno = errno;
436

    
437
    TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
438

    
439
    TRACE("Clearing NBD queue");
440
    ioctl(fd, NBD_CLEAR_QUE);
441

    
442
    TRACE("Clearing NBD socket");
443
    ioctl(fd, NBD_CLEAR_SOCK);
444

    
445
    errno = serrno;
446
    return ret;
447
}
448
#else
449
int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
450
{
451
    errno = ENOTSUP;
452
    return -1;
453
}
454

    
455
int nbd_disconnect(int fd)
456
{
457
    errno = ENOTSUP;
458
    return -1;
459
}
460

    
461
int nbd_client(int fd)
462
{
463
    errno = ENOTSUP;
464
    return -1;
465
}
466
#endif
467

    
468
int nbd_send_request(int csock, struct nbd_request *request)
469
{
470
    uint8_t buf[4 + 4 + 8 + 8 + 4];
471

    
472
    cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
473
    cpu_to_be32w((uint32_t*)(buf + 4), request->type);
474
    cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
475
    cpu_to_be64w((uint64_t*)(buf + 16), request->from);
476
    cpu_to_be32w((uint32_t*)(buf + 24), request->len);
477

    
478
    TRACE("Sending request to client: "
479
          "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
480
          request->from, request->len, request->handle, request->type);
481

    
482
    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
483
        LOG("writing to socket failed");
484
        errno = EINVAL;
485
        return -1;
486
    }
487
    return 0;
488
}
489

    
490
static int nbd_receive_request(int csock, struct nbd_request *request)
491
{
492
    uint8_t buf[4 + 4 + 8 + 8 + 4];
493
    uint32_t magic;
494

    
495
    if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
496
        LOG("read failed");
497
        errno = EINVAL;
498
        return -1;
499
    }
500

    
501
    /* Request
502
       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
503
       [ 4 ..  7]   type    (0 == READ, 1 == WRITE)
504
       [ 8 .. 15]   handle
505
       [16 .. 23]   from
506
       [24 .. 27]   len
507
     */
508

    
509
    magic = be32_to_cpup((uint32_t*)buf);
510
    request->type  = be32_to_cpup((uint32_t*)(buf + 4));
511
    request->handle = be64_to_cpup((uint64_t*)(buf + 8));
512
    request->from  = be64_to_cpup((uint64_t*)(buf + 16));
513
    request->len   = be32_to_cpup((uint32_t*)(buf + 24));
514

    
515
    TRACE("Got request: "
516
          "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
517
          magic, request->type, request->from, request->len);
518

    
519
    if (magic != NBD_REQUEST_MAGIC) {
520
        LOG("invalid magic (got 0x%x)", magic);
521
        errno = EINVAL;
522
        return -1;
523
    }
524
    return 0;
525
}
526

    
527
int nbd_receive_reply(int csock, struct nbd_reply *reply)
528
{
529
    uint8_t buf[NBD_REPLY_SIZE];
530
    uint32_t magic;
531

    
532
    memset(buf, 0xAA, sizeof(buf));
533

    
534
    if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
535
        LOG("read failed");
536
        errno = EINVAL;
537
        return -1;
538
    }
539

    
540
    /* Reply
541
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
542
       [ 4 ..  7]    error   (0 == no error)
543
       [ 7 .. 15]    handle
544
     */
545

    
546
    magic = be32_to_cpup((uint32_t*)buf);
547
    reply->error  = be32_to_cpup((uint32_t*)(buf + 4));
548
    reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
549

    
550
    TRACE("Got reply: "
551
          "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
552
          magic, reply->error, reply->handle);
553

    
554
    if (magic != NBD_REPLY_MAGIC) {
555
        LOG("invalid magic (got 0x%x)", magic);
556
        errno = EINVAL;
557
        return -1;
558
    }
559
    return 0;
560
}
561

    
562
static int nbd_send_reply(int csock, struct nbd_reply *reply)
563
{
564
    uint8_t buf[4 + 4 + 8];
565

    
566
    /* Reply
567
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
568
       [ 4 ..  7]    error   (0 == no error)
569
       [ 7 .. 15]    handle
570
     */
571
    cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
572
    cpu_to_be32w((uint32_t*)(buf + 4), reply->error);
573
    cpu_to_be64w((uint64_t*)(buf + 8), reply->handle);
574

    
575
    TRACE("Sending response to client");
576

    
577
    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
578
        LOG("writing to socket failed");
579
        errno = EINVAL;
580
        return -1;
581
    }
582
    return 0;
583
}
584

    
585
int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
586
             off_t *offset, uint32_t nbdflags, uint8_t *data, int data_size)
587
{
588
    struct nbd_request request;
589
    struct nbd_reply reply;
590

    
591
    TRACE("Reading request.");
592

    
593
    if (nbd_receive_request(csock, &request) == -1)
594
        return -1;
595

    
596
    if (request.len + NBD_REPLY_SIZE > data_size) {
597
        LOG("len (%u) is larger than max len (%u)",
598
            request.len + NBD_REPLY_SIZE, data_size);
599
        errno = EINVAL;
600
        return -1;
601
    }
602

    
603
    if ((request.from + request.len) < request.from) {
604
        LOG("integer overflow detected! "
605
            "you're probably being attacked");
606
        errno = EINVAL;
607
        return -1;
608
    }
609

    
610
    if ((request.from + request.len) > size) {
611
            LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
612
            ", Offset: %" PRIu64 "\n",
613
                    request.from, request.len, (uint64_t)size, dev_offset);
614
        LOG("requested operation past EOF--bad client?");
615
        errno = EINVAL;
616
        return -1;
617
    }
618

    
619
    TRACE("Decoding type");
620

    
621
    reply.handle = request.handle;
622
    reply.error = 0;
623

    
624
    switch (request.type) {
625
    case NBD_CMD_READ:
626
        TRACE("Request type is READ");
627

    
628
        if (bdrv_read(bs, (request.from + dev_offset) / 512,
629
                  data + NBD_REPLY_SIZE,
630
                  request.len / 512) == -1) {
631
            LOG("reading from file failed");
632
            errno = EINVAL;
633
            return -1;
634
        }
635
        *offset += request.len;
636

    
637
        TRACE("Read %u byte(s)", request.len);
638

    
639
        /* Reply
640
           [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
641
           [ 4 ..  7]    error   (0 == no error)
642
           [ 7 .. 15]    handle
643
         */
644

    
645
        cpu_to_be32w((uint32_t*)data, NBD_REPLY_MAGIC);
646
        cpu_to_be32w((uint32_t*)(data + 4), reply.error);
647
        cpu_to_be64w((uint64_t*)(data + 8), reply.handle);
648

    
649
        TRACE("Sending data to client");
650

    
651
        if (write_sync(csock, data,
652
                   request.len + NBD_REPLY_SIZE) !=
653
                   request.len + NBD_REPLY_SIZE) {
654
            LOG("writing to socket failed");
655
            errno = EINVAL;
656
            return -1;
657
        }
658
        break;
659
    case NBD_CMD_WRITE:
660
        TRACE("Request type is WRITE");
661

    
662
        TRACE("Reading %u byte(s)", request.len);
663

    
664
        if (read_sync(csock, data, request.len) != request.len) {
665
            LOG("reading from socket failed");
666
            errno = EINVAL;
667
            return -1;
668
        }
669

    
670
        if (nbdflags & NBD_FLAG_READ_ONLY) {
671
            TRACE("Server is read-only, return error");
672
            reply.error = 1;
673
        } else {
674
            TRACE("Writing to device");
675

    
676
            if (bdrv_write(bs, (request.from + dev_offset) / 512,
677
                       data, request.len / 512) == -1) {
678
                LOG("writing to file failed");
679
                errno = EINVAL;
680
                return -1;
681
            }
682

    
683
            *offset += request.len;
684
        }
685

    
686
        if (nbd_send_reply(csock, &reply) == -1)
687
            return -1;
688
        break;
689
    case NBD_CMD_DISC:
690
        TRACE("Request type is DISCONNECT");
691
        errno = 0;
692
        return 1;
693
    default:
694
        LOG("invalid request type (%u) received", request.type);
695
        errno = EINVAL;
696
        return -1;
697
    }
698

    
699
    TRACE("Request/Reply complete");
700

    
701
    return 0;
702
}