Statistics
| Branch: | Revision:

root / nbd.c @ b90fb4b8

History | View | Annotate | Download (17.4 kB)

1
/*
2
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
3
 *
4
 *  Network Block Device
5
 *
6
 *  This program is free software; you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation; under version 2 of the License.
9
 *
10
 *  This program is distributed in the hope that it will be useful,
11
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 *  GNU General Public License for more details.
14
 *
15
 *  You should have received a copy of the GNU General Public License
16
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
17
 */
18

    
19
#include "nbd.h"
20
#include "block.h"
21

    
22
#include <errno.h>
23
#include <string.h>
24
#ifndef _WIN32
25
#include <sys/ioctl.h>
26
#endif
27
#if defined(__sun__) || defined(__HAIKU__)
28
#include <sys/ioccom.h>
29
#endif
30
#include <ctype.h>
31
#include <inttypes.h>
32

    
33
#ifdef __linux__
34
#include <linux/fs.h>
35
#endif
36

    
37
#include "qemu_socket.h"
38

    
39
//#define DEBUG_NBD
40

    
41
#ifdef DEBUG_NBD
42
#define TRACE(msg, ...) do { \
43
    LOG(msg, ## __VA_ARGS__); \
44
} while(0)
45
#else
46
#define TRACE(msg, ...) \
47
    do { } while (0)
48
#endif
49

    
50
#define LOG(msg, ...) do { \
51
    fprintf(stderr, "%s:%s():L%d: " msg "\n", \
52
            __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
53
} while(0)
54

    
55
/* This is all part of the "official" NBD API */
56

    
57
#define NBD_REPLY_SIZE          (4 + 4 + 8)
58
#define NBD_REQUEST_MAGIC       0x25609513
59
#define NBD_REPLY_MAGIC         0x67446698
60

    
61
#define NBD_SET_SOCK            _IO(0xab, 0)
62
#define NBD_SET_BLKSIZE         _IO(0xab, 1)
63
#define NBD_SET_SIZE            _IO(0xab, 2)
64
#define NBD_DO_IT               _IO(0xab, 3)
65
#define NBD_CLEAR_SOCK          _IO(0xab, 4)
66
#define NBD_CLEAR_QUE           _IO(0xab, 5)
67
#define NBD_PRINT_DEBUG         _IO(0xab, 6)
68
#define NBD_SET_SIZE_BLOCKS     _IO(0xab, 7)
69
#define NBD_DISCONNECT          _IO(0xab, 8)
70

    
71
#define NBD_OPT_EXPORT_NAME     (1 << 0)
72

    
73
/* That's all folks */
74

    
75
#define read_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, true)
76
#define write_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, false)
77

    
78
size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
79
{
80
    size_t offset = 0;
81

    
82
    while (offset < size) {
83
        ssize_t len;
84

    
85
        if (do_read) {
86
            len = qemu_recv(fd, buffer + offset, size - offset, 0);
87
        } else {
88
            len = send(fd, buffer + offset, size - offset, 0);
89
        }
90

    
91
        if (len == -1)
92
            errno = socket_error();
93

    
94
        /* recoverable error */
95
        if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
96
            continue;
97
        }
98

    
99
        /* eof */
100
        if (len == 0) {
101
            break;
102
        }
103

    
104
        /* unrecoverable error */
105
        if (len == -1) {
106
            return 0;
107
        }
108

    
109
        offset += len;
110
    }
111

    
112
    return offset;
113
}
114

    
115
static void combine_addr(char *buf, size_t len, const char* address,
116
                         uint16_t port)
117
{
118
    /* If the address-part contains a colon, it's an IPv6 IP so needs [] */
119
    if (strstr(address, ":")) {
120
        snprintf(buf, len, "[%s]:%u", address, port);
121
    } else {
122
        snprintf(buf, len, "%s:%u", address, port);
123
    }
124
}
125

    
126
int tcp_socket_outgoing(const char *address, uint16_t port)
127
{
128
    char address_and_port[128];
129
    combine_addr(address_and_port, 128, address, port);
130
    return tcp_socket_outgoing_spec(address_and_port);
131
}
132

    
133
int tcp_socket_outgoing_spec(const char *address_and_port)
134
{
135
    return inet_connect(address_and_port, SOCK_STREAM);
136
}
137

    
138
int tcp_socket_incoming(const char *address, uint16_t port)
139
{
140
    char address_and_port[128];
141
    combine_addr(address_and_port, 128, address, port);
142
    return tcp_socket_incoming_spec(address_and_port);
143
}
144

    
145
int tcp_socket_incoming_spec(const char *address_and_port)
146
{
147
    char *ostr  = NULL;
148
    int olen = 0;
149
    return inet_listen(address_and_port, ostr, olen, SOCK_STREAM, 0);
150
}
151

    
152
int unix_socket_incoming(const char *path)
153
{
154
    char *ostr = NULL;
155
    int olen = 0;
156

    
157
    return unix_listen(path, ostr, olen);
158
}
159

    
160
int unix_socket_outgoing(const char *path)
161
{
162
    return unix_connect(path);
163
}
164

    
165
/* Basic flow
166

167
   Server         Client
168

169
   Negotiate
170
                  Request
171
   Response
172
                  Request
173
   Response
174
                  ...
175
   ...
176
                  Request (type == 2)
177
*/
178

    
179
int nbd_negotiate(int csock, off_t size, uint32_t flags)
180
{
181
    char buf[8 + 8 + 8 + 128];
182

    
183
    /* Negotiate
184
        [ 0 ..   7]   passwd   ("NBDMAGIC")
185
        [ 8 ..  15]   magic    (0x00420281861253)
186
        [16 ..  23]   size
187
        [24 ..  27]   flags
188
        [28 .. 151]   reserved (0)
189
     */
190

    
191
    TRACE("Beginning negotiation.");
192
    memcpy(buf, "NBDMAGIC", 8);
193
    cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
194
    cpu_to_be64w((uint64_t*)(buf + 16), size);
195
    cpu_to_be32w((uint32_t*)(buf + 24), flags | NBD_FLAG_HAS_FLAGS);
196
    memset(buf + 28, 0, 124);
197

    
198
    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
199
        LOG("write failed");
200
        errno = EINVAL;
201
        return -1;
202
    }
203

    
204
    TRACE("Negotation succeeded.");
205

    
206
    return 0;
207
}
208

    
209
int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
210
                          off_t *size, size_t *blocksize)
211
{
212
    char buf[256];
213
    uint64_t magic, s;
214
    uint16_t tmp;
215

    
216
    TRACE("Receiving negotation.");
217

    
218
    if (read_sync(csock, buf, 8) != 8) {
219
        LOG("read failed");
220
        errno = EINVAL;
221
        return -1;
222
    }
223

    
224
    buf[8] = '\0';
225
    if (strlen(buf) == 0) {
226
        LOG("server connection closed");
227
        errno = EINVAL;
228
        return -1;
229
    }
230

    
231
    TRACE("Magic is %c%c%c%c%c%c%c%c",
232
          qemu_isprint(buf[0]) ? buf[0] : '.',
233
          qemu_isprint(buf[1]) ? buf[1] : '.',
234
          qemu_isprint(buf[2]) ? buf[2] : '.',
235
          qemu_isprint(buf[3]) ? buf[3] : '.',
236
          qemu_isprint(buf[4]) ? buf[4] : '.',
237
          qemu_isprint(buf[5]) ? buf[5] : '.',
238
          qemu_isprint(buf[6]) ? buf[6] : '.',
239
          qemu_isprint(buf[7]) ? buf[7] : '.');
240

    
241
    if (memcmp(buf, "NBDMAGIC", 8) != 0) {
242
        LOG("Invalid magic received");
243
        errno = EINVAL;
244
        return -1;
245
    }
246

    
247
    if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
248
        LOG("read failed");
249
        errno = EINVAL;
250
        return -1;
251
    }
252
    magic = be64_to_cpu(magic);
253
    TRACE("Magic is 0x%" PRIx64, magic);
254

    
255
    if (name) {
256
        uint32_t reserved = 0;
257
        uint32_t opt;
258
        uint32_t namesize;
259

    
260
        TRACE("Checking magic (opts_magic)");
261
        if (magic != 0x49484156454F5054LL) {
262
            LOG("Bad magic received");
263
            errno = EINVAL;
264
            return -1;
265
        }
266
        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
267
            LOG("flags read failed");
268
            errno = EINVAL;
269
            return -1;
270
        }
271
        *flags = be16_to_cpu(tmp) << 16;
272
        /* reserved for future use */
273
        if (write_sync(csock, &reserved, sizeof(reserved)) !=
274
            sizeof(reserved)) {
275
            LOG("write failed (reserved)");
276
            errno = EINVAL;
277
            return -1;
278
        }
279
        /* write the export name */
280
        magic = cpu_to_be64(magic);
281
        if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
282
            LOG("write failed (magic)");
283
            errno = EINVAL;
284
            return -1;
285
        }
286
        opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
287
        if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
288
            LOG("write failed (opt)");
289
            errno = EINVAL;
290
            return -1;
291
        }
292
        namesize = cpu_to_be32(strlen(name));
293
        if (write_sync(csock, &namesize, sizeof(namesize)) !=
294
            sizeof(namesize)) {
295
            LOG("write failed (namesize)");
296
            errno = EINVAL;
297
            return -1;
298
        }
299
        if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) {
300
            LOG("write failed (name)");
301
            errno = EINVAL;
302
            return -1;
303
        }
304
    } else {
305
        TRACE("Checking magic (cli_magic)");
306

    
307
        if (magic != 0x00420281861253LL) {
308
            LOG("Bad magic received");
309
            errno = EINVAL;
310
            return -1;
311
        }
312
    }
313

    
314
    if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) {
315
        LOG("read failed");
316
        errno = EINVAL;
317
        return -1;
318
    }
319
    *size = be64_to_cpu(s);
320
    *blocksize = 1024;
321
    TRACE("Size is %" PRIu64, *size);
322

    
323
    if (!name) {
324
        if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) {
325
            LOG("read failed (flags)");
326
            errno = EINVAL;
327
            return -1;
328
        }
329
        *flags = be32_to_cpup(flags);
330
    } else {
331
        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
332
            LOG("read failed (tmp)");
333
            errno = EINVAL;
334
            return -1;
335
        }
336
        *flags |= be32_to_cpu(tmp);
337
    }
338
    if (read_sync(csock, &buf, 124) != 124) {
339
        LOG("read failed (buf)");
340
        errno = EINVAL;
341
        return -1;
342
    }
343
        return 0;
344
}
345

    
346
#ifdef __linux__
347
int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
348
{
349
    TRACE("Setting block size to %lu", (unsigned long)blocksize);
350

    
351
    if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) == -1) {
352
        int serrno = errno;
353
        LOG("Failed setting NBD block size");
354
        errno = serrno;
355
        return -1;
356
    }
357

    
358
        TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize));
359

    
360
    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) == -1) {
361
        int serrno = errno;
362
        LOG("Failed setting size (in blocks)");
363
        errno = serrno;
364
        return -1;
365
    }
366

    
367
    if (flags & NBD_FLAG_READ_ONLY) {
368
        int read_only = 1;
369
        TRACE("Setting readonly attribute");
370

    
371
        if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
372
            int serrno = errno;
373
            LOG("Failed setting read-only attribute");
374
            errno = serrno;
375
            return -1;
376
        }
377
    }
378

    
379
    TRACE("Clearing NBD socket");
380

    
381
    if (ioctl(fd, NBD_CLEAR_SOCK) == -1) {
382
        int serrno = errno;
383
        LOG("Failed clearing NBD socket");
384
        errno = serrno;
385
        return -1;
386
    }
387

    
388
    TRACE("Setting NBD socket");
389

    
390
    if (ioctl(fd, NBD_SET_SOCK, csock) == -1) {
391
        int serrno = errno;
392
        LOG("Failed to set NBD socket");
393
        errno = serrno;
394
        return -1;
395
    }
396

    
397
    TRACE("Negotiation ended");
398

    
399
    return 0;
400
}
401

    
402
int nbd_disconnect(int fd)
403
{
404
    ioctl(fd, NBD_CLEAR_QUE);
405
    ioctl(fd, NBD_DISCONNECT);
406
    ioctl(fd, NBD_CLEAR_SOCK);
407
    return 0;
408
}
409

    
410
int nbd_client(int fd)
411
{
412
    int ret;
413
    int serrno;
414

    
415
    TRACE("Doing NBD loop");
416

    
417
    ret = ioctl(fd, NBD_DO_IT);
418
    serrno = errno;
419

    
420
    TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
421

    
422
    TRACE("Clearing NBD queue");
423
    ioctl(fd, NBD_CLEAR_QUE);
424

    
425
    TRACE("Clearing NBD socket");
426
    ioctl(fd, NBD_CLEAR_SOCK);
427

    
428
    errno = serrno;
429
    return ret;
430
}
431
#else
432
int nbd_init(int fd, int csock, off_t size, size_t blocksize)
433
{
434
    errno = ENOTSUP;
435
    return -1;
436
}
437

    
438
int nbd_disconnect(int fd)
439
{
440
    errno = ENOTSUP;
441
    return -1;
442
}
443

    
444
int nbd_client(int fd)
445
{
446
    errno = ENOTSUP;
447
    return -1;
448
}
449
#endif
450

    
451
int nbd_send_request(int csock, struct nbd_request *request)
452
{
453
    uint8_t buf[4 + 4 + 8 + 8 + 4];
454

    
455
    cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
456
    cpu_to_be32w((uint32_t*)(buf + 4), request->type);
457
    cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
458
    cpu_to_be64w((uint64_t*)(buf + 16), request->from);
459
    cpu_to_be32w((uint32_t*)(buf + 24), request->len);
460

    
461
    TRACE("Sending request to client: "
462
          "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
463
          request->from, request->len, request->handle, request->type);
464

    
465
    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
466
        LOG("writing to socket failed");
467
        errno = EINVAL;
468
        return -1;
469
    }
470
    return 0;
471
}
472

    
473
static int nbd_receive_request(int csock, struct nbd_request *request)
474
{
475
    uint8_t buf[4 + 4 + 8 + 8 + 4];
476
    uint32_t magic;
477

    
478
    if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
479
        LOG("read failed");
480
        errno = EINVAL;
481
        return -1;
482
    }
483

    
484
    /* Request
485
       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
486
       [ 4 ..  7]   type    (0 == READ, 1 == WRITE)
487
       [ 8 .. 15]   handle
488
       [16 .. 23]   from
489
       [24 .. 27]   len
490
     */
491

    
492
    magic = be32_to_cpup((uint32_t*)buf);
493
    request->type  = be32_to_cpup((uint32_t*)(buf + 4));
494
    request->handle = be64_to_cpup((uint64_t*)(buf + 8));
495
    request->from  = be64_to_cpup((uint64_t*)(buf + 16));
496
    request->len   = be32_to_cpup((uint32_t*)(buf + 24));
497

    
498
    TRACE("Got request: "
499
          "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
500
          magic, request->type, request->from, request->len);
501

    
502
    if (magic != NBD_REQUEST_MAGIC) {
503
        LOG("invalid magic (got 0x%x)", magic);
504
        errno = EINVAL;
505
        return -1;
506
    }
507
    return 0;
508
}
509

    
510
int nbd_receive_reply(int csock, struct nbd_reply *reply)
511
{
512
    uint8_t buf[NBD_REPLY_SIZE];
513
    uint32_t magic;
514

    
515
    memset(buf, 0xAA, sizeof(buf));
516

    
517
    if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
518
        LOG("read failed");
519
        errno = EINVAL;
520
        return -1;
521
    }
522

    
523
    /* Reply
524
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
525
       [ 4 ..  7]    error   (0 == no error)
526
       [ 7 .. 15]    handle
527
     */
528

    
529
    magic = be32_to_cpup((uint32_t*)buf);
530
    reply->error  = be32_to_cpup((uint32_t*)(buf + 4));
531
    reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
532

    
533
    TRACE("Got reply: "
534
          "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
535
          magic, reply->error, reply->handle);
536

    
537
    if (magic != NBD_REPLY_MAGIC) {
538
        LOG("invalid magic (got 0x%x)", magic);
539
        errno = EINVAL;
540
        return -1;
541
    }
542
    return 0;
543
}
544

    
545
static int nbd_send_reply(int csock, struct nbd_reply *reply)
546
{
547
    uint8_t buf[4 + 4 + 8];
548

    
549
    /* Reply
550
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
551
       [ 4 ..  7]    error   (0 == no error)
552
       [ 7 .. 15]    handle
553
     */
554
    cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
555
    cpu_to_be32w((uint32_t*)(buf + 4), reply->error);
556
    cpu_to_be64w((uint64_t*)(buf + 8), reply->handle);
557

    
558
    TRACE("Sending response to client");
559

    
560
    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
561
        LOG("writing to socket failed");
562
        errno = EINVAL;
563
        return -1;
564
    }
565
    return 0;
566
}
567

    
568
int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
569
             off_t *offset, uint32_t nbdflags, uint8_t *data, int data_size)
570
{
571
    struct nbd_request request;
572
    struct nbd_reply reply;
573

    
574
    TRACE("Reading request.");
575

    
576
    if (nbd_receive_request(csock, &request) == -1)
577
        return -1;
578

    
579
    if (request.len + NBD_REPLY_SIZE > data_size) {
580
        LOG("len (%u) is larger than max len (%u)",
581
            request.len + NBD_REPLY_SIZE, data_size);
582
        errno = EINVAL;
583
        return -1;
584
    }
585

    
586
    if ((request.from + request.len) < request.from) {
587
        LOG("integer overflow detected! "
588
            "you're probably being attacked");
589
        errno = EINVAL;
590
        return -1;
591
    }
592

    
593
    if ((request.from + request.len) > size) {
594
            LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
595
            ", Offset: %" PRIu64 "\n",
596
                    request.from, request.len, (uint64_t)size, dev_offset);
597
        LOG("requested operation past EOF--bad client?");
598
        errno = EINVAL;
599
        return -1;
600
    }
601

    
602
    TRACE("Decoding type");
603

    
604
    reply.handle = request.handle;
605
    reply.error = 0;
606

    
607
    switch (request.type) {
608
    case NBD_CMD_READ:
609
        TRACE("Request type is READ");
610

    
611
        if (bdrv_read(bs, (request.from + dev_offset) / 512,
612
                  data + NBD_REPLY_SIZE,
613
                  request.len / 512) == -1) {
614
            LOG("reading from file failed");
615
            errno = EINVAL;
616
            return -1;
617
        }
618
        *offset += request.len;
619

    
620
        TRACE("Read %u byte(s)", request.len);
621

    
622
        /* Reply
623
           [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
624
           [ 4 ..  7]    error   (0 == no error)
625
           [ 7 .. 15]    handle
626
         */
627

    
628
        cpu_to_be32w((uint32_t*)data, NBD_REPLY_MAGIC);
629
        cpu_to_be32w((uint32_t*)(data + 4), reply.error);
630
        cpu_to_be64w((uint64_t*)(data + 8), reply.handle);
631

    
632
        TRACE("Sending data to client");
633

    
634
        if (write_sync(csock, data,
635
                   request.len + NBD_REPLY_SIZE) !=
636
                   request.len + NBD_REPLY_SIZE) {
637
            LOG("writing to socket failed");
638
            errno = EINVAL;
639
            return -1;
640
        }
641
        break;
642
    case NBD_CMD_WRITE:
643
        TRACE("Request type is WRITE");
644

    
645
        TRACE("Reading %u byte(s)", request.len);
646

    
647
        if (read_sync(csock, data, request.len) != request.len) {
648
            LOG("reading from socket failed");
649
            errno = EINVAL;
650
            return -1;
651
        }
652

    
653
        if (nbdflags & NBD_FLAG_READ_ONLY) {
654
            TRACE("Server is read-only, return error");
655
            reply.error = 1;
656
        } else {
657
            TRACE("Writing to device");
658

    
659
            if (bdrv_write(bs, (request.from + dev_offset) / 512,
660
                       data, request.len / 512) == -1) {
661
                LOG("writing to file failed");
662
                errno = EINVAL;
663
                return -1;
664
            }
665

    
666
            *offset += request.len;
667
        }
668

    
669
        if (nbd_send_reply(csock, &reply) == -1)
670
            return -1;
671
        break;
672
    case NBD_CMD_DISC:
673
        TRACE("Request type is DISCONNECT");
674
        errno = 0;
675
        return 1;
676
    default:
677
        LOG("invalid request type (%u) received", request.type);
678
        errno = EINVAL;
679
        return -1;
680
    }
681

    
682
    TRACE("Request/Reply complete");
683

    
684
    return 0;
685
}