Statistics
| Branch: | Revision:

root / nbd.c @ 975b092b

History | View | Annotate | Download (10.6 kB)

1
/*\
2
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
3
 *
4
 *  Network Block Device
5
 *
6
 *  This program is free software; you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation; under version 2 of the License.
9
 *
10
 *  This program is distributed in the hope that it will be useful,
11
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 *  GNU General Public License for more details.
14
 *
15
 *  You should have received a copy of the GNU General Public License
16
 *  along with this program; if not, write to the Free Software
17
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
\*/
19

    
20
#include "nbd.h"
21

    
22
#include <errno.h>
23
#include <string.h>
24
#include <sys/ioctl.h>
25
#include <ctype.h>
26
#include <inttypes.h>
27
#include <sys/socket.h>
28
#include <netinet/in.h>
29
#include <netinet/tcp.h>
30
#include <arpa/inet.h>
31
#include <netdb.h>
32

    
33
extern int verbose;
34

    
35
#define LOG(msg, ...) do { \
36
    fprintf(stderr, "%s:%s():L%d: " msg "\n", \
37
            __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
38
} while(0)
39

    
40
#define TRACE(msg, ...) do { \
41
    if (verbose) LOG(msg, ## __VA_ARGS__); \
42
} while(0)
43

    
44
/* This is all part of the "official" NBD API */
45

    
46
#define NBD_REQUEST_MAGIC       0x25609513
47
#define NBD_REPLY_MAGIC         0x67446698
48

    
49
#define NBD_SET_SOCK            _IO(0xab, 0)
50
#define NBD_SET_BLKSIZE         _IO(0xab, 1)
51
#define NBD_SET_SIZE            _IO(0xab, 2)
52
#define NBD_DO_IT               _IO(0xab, 3)
53
#define NBD_CLEAR_SOCK          _IO(0xab, 4)
54
#define NBD_CLEAR_QUE           _IO(0xab, 5)
55
#define NBD_PRINT_DEBUG                _IO(0xab, 6)
56
#define NBD_SET_SIZE_BLOCKS        _IO(0xab, 7)
57
#define NBD_DISCONNECT          _IO(0xab, 8)
58

    
59
/* That's all folks */
60

    
61
#define read_sync(fd, buffer, size) wr_sync(fd, buffer, size, true)
62
#define write_sync(fd, buffer, size) wr_sync(fd, buffer, size, false)
63

    
64
static size_t wr_sync(int fd, void *buffer, size_t size, bool do_read)
65
{
66
    size_t offset = 0;
67

    
68
    while (offset < size) {
69
        ssize_t len;
70

    
71
        if (do_read) {
72
            len = read(fd, buffer + offset, size - offset);
73
        } else {
74
            len = write(fd, buffer + offset, size - offset);
75
        }
76

    
77
        /* recoverable error */
78
        if (len == -1 && errno == EAGAIN) {
79
            continue;
80
        }
81

    
82
        /* eof */
83
        if (len == 0) {
84
            break;
85
        }
86

    
87
        /* unrecoverable error */
88
        if (len == -1) {
89
            return 0;
90
        }
91

    
92
        offset += len;
93
    }
94

    
95
    return offset;
96
}
97

    
98
static int tcp_socket_outgoing(const char *address, uint16_t port)
99
{
100
    int s;
101
    struct in_addr in;
102
    struct sockaddr_in addr;
103
    int serrno;
104

    
105
    s = socket(PF_INET, SOCK_STREAM, 0);
106
    if (s == -1) {
107
        return -1;
108
    }
109

    
110
    if (inet_aton(address, &in) == 0) {
111
        struct hostent *ent;
112

    
113
        ent = gethostbyname(address);
114
        if (ent == NULL) {
115
            goto error;
116
        }
117

    
118
        memcpy(&in, ent->h_addr, sizeof(in));
119
    }
120

    
121
    addr.sin_family = AF_INET;
122
    addr.sin_port = htons(port);
123
    memcpy(&addr.sin_addr.s_addr, &in, sizeof(in));
124

    
125
    if (connect(s, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
126
        goto error;
127
    }
128

    
129
    return s;
130
error:
131
    serrno = errno;
132
    close(s);
133
    errno = serrno;
134
    return -1;
135
}
136

    
137
int tcp_socket_incoming(const char *address, uint16_t port)
138
{
139
    int s;
140
    struct in_addr in;
141
    struct sockaddr_in addr;
142
    int serrno;
143
    int opt;
144

    
145
    s = socket(PF_INET, SOCK_STREAM, 0);
146
    if (s == -1) {
147
        return -1;
148
    }
149

    
150
    if (inet_aton(address, &in) == 0) {
151
        struct hostent *ent;
152

    
153
        ent = gethostbyname(address);
154
        if (ent == NULL) {
155
            goto error;
156
        }
157

    
158
        memcpy(&in, ent->h_addr, sizeof(in));
159
    }
160

    
161
    addr.sin_family = AF_INET;
162
    addr.sin_port = htons(port);
163
    memcpy(&addr.sin_addr.s_addr, &in, sizeof(in));
164

    
165
    opt = 1;
166
    if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) == -1) {
167
        goto error;
168
    }
169

    
170
    if (bind(s, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
171
        goto error;
172
    }
173

    
174
    if (listen(s, 128) == -1) {
175
        goto error;
176
    }
177

    
178
    return s;
179
error:
180
    serrno = errno;
181
    close(s);
182
    errno = serrno;
183
    return -1;
184
}
185

    
186
/* Basic flow
187

188
   Server         Client
189

190
   Negotiate
191
                  Request
192
   Response
193
                  Request
194
   Response
195
                  ...
196
   ...
197
                  Request (type == 2)
198
*/
199

    
200
int nbd_negotiate(BlockDriverState *bs, int csock, off_t size)
201
{
202
        char buf[8 + 8 + 8 + 128];
203

    
204
        /* Negotiate
205
           [ 0 ..   7]   passwd   ("NBDMAGIC")
206
           [ 8 ..  15]   magic    (0x00420281861253)
207
           [16 ..  23]   size
208
           [24 .. 151]   reserved (0)
209
         */
210

    
211
        TRACE("Beginning negotiation.");
212
        memcpy(buf, "NBDMAGIC", 8);
213
        cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
214
        cpu_to_be64w((uint64_t*)(buf + 16), size);
215
        memset(buf + 24, 0, 128);
216

    
217
        if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
218
                LOG("write failed");
219
                errno = EINVAL;
220
                return -1;
221
        }
222

    
223
        TRACE("Negotation succeeded.");
224

    
225
        return 0;
226
}
227

    
228
int nbd_receive_negotiate(int fd, int csock)
229
{
230
        char buf[8 + 8 + 8 + 128];
231
        uint64_t magic;
232
        off_t size;
233
        size_t blocksize;
234

    
235
        TRACE("Receiving negotation.");
236

    
237
        if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
238
                LOG("read failed");
239
                errno = EINVAL;
240
                return -1;
241
        }
242

    
243
        magic = be64_to_cpup((uint64_t*)(buf + 8));
244
        size = be64_to_cpup((uint64_t*)(buf + 16));
245
        blocksize = 1024;
246

    
247
        TRACE("Magic is %c%c%c%c%c%c%c%c",
248
              isprint(buf[0]) ? buf[0] : '.',
249
              isprint(buf[1]) ? buf[1] : '.',
250
              isprint(buf[2]) ? buf[2] : '.',
251
              isprint(buf[3]) ? buf[3] : '.',
252
              isprint(buf[4]) ? buf[4] : '.',
253
              isprint(buf[5]) ? buf[5] : '.',
254
              isprint(buf[6]) ? buf[6] : '.',
255
              isprint(buf[7]) ? buf[7] : '.');
256
        TRACE("Magic is 0x%" PRIx64, magic);
257
        TRACE("Size is %" PRIu64, size);
258

    
259
        if (memcmp(buf, "NBDMAGIC", 8) != 0) {
260
                LOG("Invalid magic received");
261
                errno = EINVAL;
262
                return -1;
263
        }
264

    
265
        TRACE("Checking magic");
266

    
267
        if (magic != 0x00420281861253LL) {
268
                LOG("Bad magic received");
269
                errno = EINVAL;
270
                return -1;
271
        }
272

    
273
        TRACE("Setting block size to %lu", (unsigned long)blocksize);
274

    
275
        if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) == -1) {
276
                int serrno = errno;
277
                LOG("Failed setting NBD block size");
278
                errno = serrno;
279
                return -1;
280
        }
281

    
282
        TRACE("Setting size to %llu block(s)",
283
              (unsigned long long)(size / blocksize));
284

    
285
        if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) == -1) {
286
                int serrno = errno;
287
                LOG("Failed setting size (in blocks)");
288
                errno = serrno;
289
                return -1;
290
        }
291

    
292
        TRACE("Clearing NBD socket");
293

    
294
        if (ioctl(fd, NBD_CLEAR_SOCK) == -1) {
295
                int serrno = errno;
296
                LOG("Failed clearing NBD socket");
297
                errno = serrno;
298
                return -1;
299
        }
300

    
301
        TRACE("Setting NBD socket");
302

    
303
        if (ioctl(fd, NBD_SET_SOCK, csock) == -1) {
304
                int serrno = errno;
305
                LOG("Failed to set NBD socket");
306
                errno = serrno;
307
                return -1;
308
        }
309

    
310
        TRACE("Negotiation ended");
311

    
312
        return 0;
313
}
314

    
315
int nbd_disconnect(int fd)
316
{
317
        ioctl(fd, NBD_CLEAR_QUE);
318
        ioctl(fd, NBD_DISCONNECT);
319
        ioctl(fd, NBD_CLEAR_SOCK);
320
        return 0;
321
}
322

    
323
int nbd_client(int fd, int csock)
324
{
325
        int ret;
326
        int serrno;
327

    
328
        TRACE("Doing NBD loop");
329

    
330
        ret = ioctl(fd, NBD_DO_IT);
331
        serrno = errno;
332

    
333
        TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
334

    
335
        TRACE("Clearing NBD queue");
336
        ioctl(fd, NBD_CLEAR_QUE);
337

    
338
        TRACE("Clearing NBD socket");
339
        ioctl(fd, NBD_CLEAR_SOCK);
340

    
341
        errno = serrno;
342
        return ret;
343
}
344

    
345
int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset, off_t *offset, bool readonly)
346
{
347
#ifndef _REENTRANT
348
        static uint8_t data[1024 * 1024]; // keep this off of the stack
349
#else
350
        uint8_t data[1024 * 1024];
351
#endif
352
        uint8_t buf[4 + 4 + 8 + 8 + 4];
353
        uint32_t magic;
354
        uint32_t type;
355
        uint64_t from;
356
        uint32_t len;
357

    
358
        TRACE("Reading request.");
359

    
360
        if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
361
                LOG("read failed");
362
                errno = EINVAL;
363
                return -1;
364
        }
365

    
366
        /* Request
367
          [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
368
          [ 4 ..  7]   type    (0 == READ, 1 == WRITE)
369
          [ 8 .. 15]   handle
370
          [16 .. 23]   from
371
          [24 .. 27]   len
372
         */
373

    
374
        magic = be32_to_cpup((uint32_t*)buf);
375
        type  = be32_to_cpup((uint32_t*)(buf + 4));
376
        from  = be64_to_cpup((uint64_t*)(buf + 16));
377
        len   = be32_to_cpup((uint32_t*)(buf + 24));
378

    
379
        TRACE("Got request: "
380
              "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
381
              magic, type, from, len);
382

    
383

    
384
        if (magic != NBD_REQUEST_MAGIC) {
385
                LOG("invalid magic (got 0x%x)", magic);
386
                errno = EINVAL;
387
                return -1;
388
        }
389

    
390
        if (len > sizeof(data)) {
391
                LOG("len (%u) is larger than max len (%lu)",
392
                    len, (unsigned long)sizeof(data));
393
                errno = EINVAL;
394
                return -1;
395
        }
396

    
397
        if ((from + len) < from) {
398
                LOG("integer overflow detected! "
399
                    "you're probably being attacked");
400
                errno = EINVAL;
401
                return -1;
402
        }
403

    
404
        if ((from + len) > size) {
405
                LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
406
                    ", Offset: %" PRIu64 "\n",
407
                     from, len, size, dev_offset);
408
                LOG("requested operation past EOF--bad client?");
409
                errno = EINVAL;
410
                return -1;
411
        }
412

    
413
        /* Reply
414
         [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
415
         [ 4 ..  7]    error   (0 == no error)
416
         [ 7 .. 15]    handle
417
         */
418
        cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
419
        cpu_to_be32w((uint32_t*)(buf + 4), 0);
420

    
421
        TRACE("Decoding type");
422

    
423
        switch (type) {
424
        case 0:
425
                TRACE("Request type is READ");
426

    
427
                if (bdrv_read(bs, (from + dev_offset) / 512, data, len / 512) == -1) {
428
                        LOG("reading from file failed");
429
                        errno = EINVAL;
430
                        return -1;
431
                }
432
                *offset += len;
433

    
434
                TRACE("Read %u byte(s)", len);
435

    
436
                TRACE("Sending OK response");
437

    
438
                if (write_sync(csock, buf, 16) != 16) {
439
                        LOG("writing to socket failed");
440
                        errno = EINVAL;
441
                        return -1;
442
                }
443

    
444
                TRACE("Sending data to client");
445

    
446
                if (write_sync(csock, data, len) != len) {
447
                        LOG("writing to socket failed");
448
                        errno = EINVAL;
449
                        return -1;
450
                }
451
                break;
452
        case 1:
453
                TRACE("Request type is WRITE");
454

    
455
                TRACE("Reading %u byte(s)", len);
456

    
457
                if (read_sync(csock, data, len) != len) {
458
                        LOG("reading from socket failed");
459
                        errno = EINVAL;
460
                        return -1;
461
                }
462

    
463
                if (readonly) {
464
                        TRACE("Server is read-only, return error");
465

    
466
                        cpu_to_be32w((uint32_t*)(buf + 4), 1);
467
                } else {
468
                        TRACE("Writing to device");
469

    
470
                        if (bdrv_write(bs, (from + dev_offset) / 512, data, len / 512) == -1) {
471
                                LOG("writing to file failed");
472
                                errno = EINVAL;
473
                                return -1;
474
                        }
475

    
476
                        *offset += len;
477
                }
478

    
479
                TRACE("Sending response to client");
480

    
481
                if (write_sync(csock, buf, 16) != 16) {
482
                        LOG("writing to socket failed");
483
                        errno = EINVAL;
484
                        return -1;
485
                }
486
                break;
487
        case 2:
488
                TRACE("Request type is DISCONNECT");
489
                errno = 0;
490
                return 1;
491
        default:
492
                LOG("invalid request type (%u) received", type);
493
                errno = EINVAL;
494
                return -1;
495
        }
496

    
497
        TRACE("Request/Reply complete");
498

    
499
        return 0;
500
}