90d07efe764e2a4ede07cd86bc4a7ee141f7f07b
[archipelago] / xseg / sys / xsegbd.c
1 /* xsegbd.c
2  *
3  */
4
5 #include <linux/module.h>
6 #include <linux/moduleparam.h>
7 #include <linux/init.h>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/fs.h>
12 #include <linux/errno.h>
13 #include <linux/timer.h>
14 #include <linux/types.h>
15 #include <linux/vmalloc.h>
16 #include <linux/genhd.h>
17 #include <linux/blkdev.h>
18 #include <linux/bio.h>
19 #include <linux/device.h>
20 #include <linux/completion.h>
21
22 #include "xsegdev.h"
23 #include "xsegbd.h"
24
25 #define XSEGBD_MINORS 1
26
27 MODULE_DESCRIPTION("xsegbd");
28 MODULE_AUTHOR("XSEG");
29 MODULE_LICENSE("GPL");
30
31 static long sector_size = 0;
32 static long blksize = 512;
33 static int major = 0;
34 static char name[XSEGBD_SEGMENT_NAMELEN] = "xsegbd";
35 static char spec[256] = "xsegdev:xsegbd:4:512:64:1024:12";
36
37 module_param(sector_size, long, 0644);
38 module_param(blksize, long, 0644);
39 module_param(major, int, 0644);
40 module_param_string(name, name, sizeof(name), 0644);
41 module_param_string(spec, spec, sizeof(spec), 0644);
42
43 static struct xsegbd xsegbd;
44 static DEFINE_MUTEX(xsegbd_mutex);
45 static LIST_HEAD(xsegbd_dev_list);
46
47 /* ********************* */
48 /* ** XSEG Operations ** */
49 /* ********************* */
50
51 static void *xsegdev_malloc(uint64_t size)
52 {
53         return kmalloc((size_t)size, GFP_KERNEL);
54 }
55
56 static void *xsegdev_realloc(void *mem, uint64_t size)
57 {
58         return krealloc(mem, (size_t)size, GFP_KERNEL);
59 }
60
61 static void xsegdev_mfree(void *ptr)
62 {
63         return kfree(ptr);
64 }
65
66 static long xsegdev_allocate(const char *name, uint64_t size)
67 {
68         int r;
69         struct xsegdev *xsegdev = xsegdev_get(0);
70
71         r = IS_ERR(xsegdev) ? PTR_ERR(xsegdev) : 0;
72         if (r) {
73                 XSEGLOG("cannot acquire xsegdev");
74                 goto err;
75         }
76
77         if (xsegdev->segment) {
78                 XSEGLOG("destroying existing xsegdev segment");
79                 r = xsegdev_destroy_segment(xsegdev);
80                 if (r)
81                         goto err;
82         }
83
84         XSEGLOG("creating xsegdev segment size %llu", size);
85         r = xsegdev_create_segment(xsegdev, size, 1);
86         if (r)
87                 goto err;
88
89         xsegdev->segsize = size;
90         xsegdev_put(xsegdev);
91         return 0;
92
93 err:
94         return r;
95 }
96
97 static long xsegdev_deallocate(const char *name)
98 {
99         struct xsegdev *xsegdev = xsegdev_get(0);
100         int r = IS_ERR(xsegdev) ? PTR_ERR(xsegdev) : 0;
101         if (r)
102                 return r;
103
104         clear_bit(XSEGDEV_RESERVED, &xsegdev->flags);
105         XSEGLOG("destroying segment");
106         r = xsegdev_destroy_segment(xsegdev);
107         if (r)
108                 XSEGLOG("   ...failed");
109         xsegdev_put(xsegdev);
110         return r;
111 }
112
113 static long xseg_callback(void *arg);
114
115 static void *xsegdev_map(const char *name, uint64_t size)
116 {
117         struct xseg *xseg = NULL;
118         struct xsegdev *dev = xsegdev_get(0);
119         int r;
120         r = IS_ERR(dev) ? PTR_ERR(dev) : 0;
121         if (r)
122                 goto out;
123
124         if (!dev->segment)
125                 goto put_out;
126
127         if (size > dev->segsize)
128                 goto put_out;
129
130         if (dev->callback) /* in use */
131                 goto put_out;
132
133         dev->callback = xseg_callback;
134         xseg = (void *)dev->segment;
135
136 put_out:
137         xsegdev_put(dev);
138 out:
139         return xseg;
140 }
141
142 static void xsegdev_unmap(void *ptr, uint64_t size)
143 {
144         struct xsegdev *xsegdev = xsegdev_get(0);
145         int r = IS_ERR(xsegdev) ? PTR_ERR(xsegdev) : 0;
146         if (r)
147                 return;
148
149         //xsegdev->callarg = NULL;
150         xsegdev->callback = NULL;
151         xsegdev_put(xsegdev);
152 }
153
154 static struct xseg_type xseg_xsegdev = {
155         /* xseg operations */
156         {
157                 .malloc = xsegdev_malloc,
158                 .realloc = xsegdev_realloc,
159                 .mfree = xsegdev_mfree,
160                 .allocate = xsegdev_allocate,
161                 .deallocate = xsegdev_deallocate,
162                 .map = xsegdev_map,
163                 .unmap = xsegdev_unmap
164         },
165         /* name */
166         "xsegdev"
167 };
168
169 static int posix_signal_init(void)
170 {
171         return 0;
172 }
173
174 static void posix_signal_quit(void) { }
175
176 static int posix_prepare_wait(struct xseg_port *port)
177 {
178         return 0;
179 }
180
181 static int posix_cancel_wait(struct xseg_port *port)
182 {
183         return 0;
184 }
185
186 static int posix_wait_signal(struct xseg_port *port, uint32_t timeout)
187 {
188         return 0;
189 }
190
191 static int posix_signal(struct xseg_port *port)
192 {
193         struct pid *pid;
194         struct task_struct *task;
195         int ret = -ENOENT;
196
197         rcu_read_lock();
198         pid = find_vpid((pid_t)port->waitcue);
199         if (!pid)
200                 goto out;
201         task = pid_task(pid, PIDTYPE_PID);
202         if (!task)
203                 goto out;
204
205         ret = send_sig(SIGIO, task, 1);
206 out:
207         rcu_read_unlock();
208         return ret;
209 }
210
211 static void *posix_malloc(uint64_t size)
212 {
213         return NULL;
214 }
215
216 static void *posix_realloc(void *mem, uint64_t size)
217 {
218         return NULL;
219 }
220
221 static void posix_mfree(void *mem) { }
222
223 static struct xseg_peer xseg_peer_posix = {
224         /* xseg signal operations */
225         {
226                 .signal_init = posix_signal_init,
227                 .signal_quit = posix_signal_quit,
228                 .cancel_wait = posix_cancel_wait,
229                 .prepare_wait = posix_prepare_wait,
230                 .wait_signal = posix_wait_signal,
231                 .signal = posix_signal,
232                 .malloc = posix_malloc,
233                 .realloc = posix_realloc,
234                 .mfree = posix_mfree
235         },
236         /* name */
237         "posix"
238 };
239
240 static int xsegdev_signal_init(void)
241 {
242         return 0;
243 }
244
245 static void xsegdev_signal_quit(void) { }
246
247 static int xsegdev_prepare_wait(struct xseg_port *port)
248 {
249         return -1;
250 }
251
252 static int xsegdev_cancel_wait(struct xseg_port *port)
253 {
254         return -1;
255 }
256
257 static int xsegdev_wait_signal(struct xseg_port *port, uint32_t timeout)
258 {
259         return -1;
260 }
261
262 static int xsegdev_signal(struct xseg_port *port)
263 {
264         return -1;
265 }
266
267 static struct xseg_peer xseg_peer_xsegdev = {
268         /* xseg signal operations */
269         {
270                 .signal_init = xsegdev_signal_init,
271                 .signal_quit = xsegdev_signal_quit,
272                 .cancel_wait = xsegdev_cancel_wait,
273                 .prepare_wait = xsegdev_prepare_wait,
274                 .wait_signal = xsegdev_wait_signal,
275                 .signal = xsegdev_signal,
276                 .malloc = xsegdev_malloc,
277                 .realloc = xsegdev_realloc,
278                 .mfree = xsegdev_mfree
279         },
280         /* name */
281         "xsegdev"
282 };
283
284
285 /* ************************* */
286 /* ***** sysfs helpers ***** */
287 /* ************************* */
288
289 static struct xsegbd_device *dev_to_xsegbd(struct device *dev)
290 {
291         return container_of(dev, struct xsegbd_device, dev);
292 }
293
294 static struct device *xsegbd_get_dev(struct xsegbd_device *xsegbd_dev)
295 {
296         /* FIXME */
297         return get_device(&xsegbd_dev->dev);
298 }
299
300 static void xsegbd_put_dev(struct xsegbd_device *xsegbd_dev)
301 {
302         put_device(&xsegbd_dev->dev);
303 }
304
305 /* ************************* */
306 /* ** XSEG Initialization ** */
307 /* ************************* */
308
309 int xsegbd_xseg_init(void)
310 {
311         struct xsegdev *xsegdev;
312         int r;
313
314         if (!xsegbd.name[0])
315                 strncpy(xsegbd.name, name, XSEGBD_SEGMENT_NAMELEN);
316
317         XSEGLOG("registering xseg types");
318         xsegbd.namesize = strlen(xsegbd.name);
319
320         r = xseg_register_type(&xseg_xsegdev);
321         if (r)
322                 goto err0;
323
324         r = xseg_register_peer(&xseg_peer_posix);
325         if (r)
326                 goto err1;
327
328         r = xseg_register_peer(&xseg_peer_xsegdev);
329         if (r)
330                 goto err2;
331
332         r = xseg_initialize("xsegdev");
333         if (r) {
334                 XSEGLOG("cannot initialize 'xsegdev' peer");
335                 goto err3;
336         }
337
338         r = xseg_parse_spec(spec, &xsegbd.config);
339         if (r)
340                 goto err3;
341
342         if (strncmp(xsegbd.config.type, "xsegdev", 16))
343                 XSEGLOG("WARNING: unexpected segment type '%s' vs 'xsegdev'",
344                          xsegbd.config.type);
345
346         xsegdev = xsegdev_get(0);
347         if (!xsegdev->segment) {
348                 XSEGLOG("creating segment");
349                 r = xseg_create(&xsegbd.config);
350                 if (r) {
351                         XSEGLOG("cannot create segment");
352                         goto err3;
353                 }
354         }
355         xsegdev_put(xsegdev);
356
357         XSEGLOG("joining segment");
358         xsegbd.xseg = xseg_join("xsegdev", "xsegdev");
359         if (!xsegbd.xseg) {
360                 XSEGLOG("cannot join segment");
361                 r = -EFAULT;
362                 goto err3;
363         }
364
365         return 0;
366 err3:
367         xseg_unregister_peer(xseg_peer_xsegdev.name);
368 err2:
369         xseg_unregister_peer(xseg_peer_posix.name);
370 err1:
371         xseg_unregister_type(xseg_xsegdev.name);
372 err0:
373         return r;
374
375 }
376
377 int xsegbd_xseg_quit(void)
378 {
379         /* make sure to unmap the segment first */
380         xsegbd.xseg->type.ops.unmap(xsegbd.xseg, xsegbd.xseg->segment_size);
381
382         xseg_unregister_peer(xseg_peer_xsegdev.name);
383         xseg_unregister_peer(xseg_peer_posix.name);
384         xseg_unregister_type(xseg_xsegdev.name);
385
386         return 0;
387 }
388
389
390 /* ***************************** */
391 /* ** Block Device Operations ** */
392 /* ***************************** */
393
394 static int xsegbd_open(struct block_device *bdev, fmode_t mode)
395 {
396         struct gendisk *disk = bdev->bd_disk;
397         struct xsegbd_device *xsegbd_dev = disk->private_data;
398
399         xsegbd_get_dev(xsegbd_dev);
400
401         return 0;
402 }
403
404 static int xsegbd_release(struct gendisk *gd, fmode_t mode)
405 {
406         struct xsegbd_device *xsegbd_dev = gd->private_data;
407
408         xsegbd_put_dev(xsegbd_dev);
409
410         return 0;
411 }
412
413 static int xsegbd_ioctl(struct block_device *bdev, fmode_t mode,
414                         unsigned int cmd, unsigned long arg)
415 {
416         return -ENOTTY;
417 }
418
419 static const struct block_device_operations xsegbd_ops = {
420         .owner          = THIS_MODULE,
421         .open           = xsegbd_open,
422         .release        = xsegbd_release,
423         .ioctl          = xsegbd_ioctl 
424 };
425
426
427 /* *************************** */
428 /* ** Device Initialization ** */
429 /* *************************** */
430
431 static void xseg_request_fn(struct request_queue *rq);
432 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev);
433
434 static int xsegbd_dev_init(struct xsegbd_device *xsegbd_dev)
435 {
436         int ret = -ENOMEM;
437         struct gendisk *disk;
438         unsigned int max_request_size_bytes;
439
440         spin_lock_init(&xsegbd_dev->lock);
441
442         xsegbd_dev->xsegbd = &xsegbd;
443
444         xsegbd_dev->blk_queue = blk_alloc_queue(GFP_KERNEL);
445         if (!xsegbd_dev->blk_queue)
446                 goto out;
447
448         blk_init_allocated_queue(xsegbd_dev->blk_queue, xseg_request_fn, &xsegbd_dev->lock);
449         xsegbd_dev->blk_queue->queuedata = xsegbd_dev;
450
451         blk_queue_flush(xsegbd_dev->blk_queue, REQ_FLUSH | REQ_FUA);
452         blk_queue_logical_block_size(xsegbd_dev->blk_queue, 512);
453         blk_queue_physical_block_size(xsegbd_dev->blk_queue, blksize);
454         blk_queue_bounce_limit(xsegbd_dev->blk_queue, BLK_BOUNCE_ANY);
455         
456         //blk_queue_max_segments(dev->blk_queue, 512);
457         /* calculate maximum block request size
458          * request size in pages * page_size
459          * leave one page in buffer for name
460          */
461         max_request_size_bytes =
462                  (unsigned int) (xsegbd.config.request_size - 1) *
463                                 ( 1 << xsegbd.config.page_shift) ;
464         blk_queue_max_hw_sectors(xsegbd_dev->blk_queue, max_request_size_bytes >> 9);
465         blk_queue_max_segment_size(xsegbd_dev->blk_queue, max_request_size_bytes);
466         blk_queue_io_min(xsegbd_dev->blk_queue, max_request_size_bytes);
467         blk_queue_io_opt(xsegbd_dev->blk_queue, max_request_size_bytes);
468
469         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xsegbd_dev->blk_queue);
470
471         /* vkoukis says we don't need partitions */
472         xsegbd_dev->gd = disk = alloc_disk(1);
473         if (!disk)
474                 goto out_disk;
475
476         disk->major = xsegbd_dev->major;
477         disk->first_minor = 0; // id * XSEGBD_MINORS;
478         disk->fops = &xsegbd_ops;
479         disk->queue = xsegbd_dev->blk_queue;
480         disk->private_data = xsegbd_dev;
481         disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
482         snprintf(disk->disk_name, 32, "xsegbd%u", xsegbd_dev->id);
483
484         if (!xq_alloc_seq(&xsegbd_dev->blk_queue_pending, xsegbd_dev->nr_requests, xsegbd_dev->nr_requests))
485                 goto out_disk;
486
487         xsegbd_dev->blk_req_pending = kzalloc(sizeof(struct request *) * xsegbd_dev->nr_requests, GFP_KERNEL);
488         if (!xsegbd_dev->blk_req_pending)
489                 goto out_disk;
490
491         /* allow a non-zero sector_size parameter to override the disk size */
492         if (sector_size)
493                 xsegbd_dev->sectors = sector_size;
494         else {
495                 ret = xsegbd_get_size(xsegbd_dev);
496                 if (ret)
497                         goto out_disk;
498         }
499
500         set_capacity(disk, xsegbd_dev->sectors);
501         XSEGLOG("xsegbd active...");
502         add_disk(disk); /* immediately activates the device */
503
504         return 0;
505
506 out_disk:
507         put_disk(disk);
508 out:
509         return ret;
510 }
511
512 static void xsegbd_dev_release(struct device *dev)
513 {
514         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
515         struct xseg_port *port;
516
517         /* cleanup gendisk and blk_queue the right way */
518         if (xsegbd_dev->gd) {
519                 if (xsegbd_dev->gd->flags & GENHD_FL_UP)
520                         del_gendisk(xsegbd_dev->gd);
521
522                 blk_cleanup_queue(xsegbd_dev->blk_queue);
523                 put_disk(xsegbd_dev->gd);
524         }
525
526         /* reset the port's waitcue (aka cancel_wait) */
527         port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
528         port->waitcue = (long) NULL;
529
530         xseg_free_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests);
531
532         kfree(xsegbd_dev->blk_req_pending);
533         xq_free(&xsegbd_dev->blk_queue_pending);
534
535         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
536
537         kfree(xsegbd_dev);
538
539         module_put(THIS_MODULE);
540 }
541
542 /* ******************* */
543 /* ** Critical Path ** */
544 /* ******************* */
545
546 static void blk_to_xseg(struct xseg *xseg, struct xseg_request *xreq,
547                         struct request *blkreq)
548 {
549         struct bio_vec *bvec;
550         struct req_iterator iter;
551         uint64_t off = 0;
552         char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
553         rq_for_each_segment(bvec, blkreq, iter) {
554                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
555                 memcpy(data + off, bdata, bvec->bv_len);
556                 off += bvec->bv_len;
557                 kunmap_atomic(bdata);
558         }
559 }
560
561 static void xseg_to_blk(struct xseg *xseg, struct xseg_request *xreq,
562                         struct request *blkreq)
563 {
564         struct bio_vec *bvec;
565         struct req_iterator iter;
566         uint64_t off = 0;
567         char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
568         rq_for_each_segment(bvec, blkreq, iter) {
569                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
570                 memcpy(bdata, data + off, bvec->bv_len);
571                 off += bvec->bv_len;
572                 kunmap_atomic(bdata);
573         }
574 }
575
576 static void xseg_request_fn(struct request_queue *rq)
577 {
578         struct xseg_request *xreq;
579         struct xsegbd_device *xsegbd_dev = rq->queuedata;
580         struct xseg_port *port;
581         struct request *blkreq;
582         xqindex blkreq_idx;
583         char *name;
584         uint64_t datasize;
585
586         for (;;) {
587                 xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
588                 if (!xreq)
589                         break;
590
591                 blkreq = blk_fetch_request(rq);
592                 if (!blkreq)
593                         break;
594
595                 if (blkreq->cmd_type != REQ_TYPE_FS) {
596                         XSEGLOG("non-fs cmd_type: %u. *shrug*", blkreq->cmd_type);
597                         __blk_end_request_all(blkreq, 0);
598                 }
599
600
601                 datasize = blk_rq_bytes(blkreq);
602                 BUG_ON(xreq->buffersize - xsegbd_dev->namesize < datasize);
603                 BUG_ON(xseg_prep_request(xreq, xsegbd_dev->namesize, datasize));
604
605                 name = XSEG_TAKE_PTR(xreq->name, xsegbd.xseg->segment);
606                 strncpy(name, xsegbd_dev->name, xsegbd_dev->namesize);
607                 blkreq_idx = xq_pop_head(&xsegbd_dev->blk_queue_pending);
608                 BUG_ON(blkreq_idx == None);
609                 /* WARN_ON(xsebd_dev->blk_req_pending[blkreq_idx] */
610                 xsegbd_dev->blk_req_pending[blkreq_idx] = blkreq;
611                 xreq->priv = (uint64_t)blkreq_idx;
612                 xreq->size = datasize;
613                 xreq->offset = blk_rq_pos(blkreq) << 9;
614                 /*
615                 if (xreq->offset >= (sector_size << 9))
616                         XSEGLOG("sector offset: %lu > %lu, flush:%u, fua:%u",
617                                  blk_rq_pos(blkreq), sector_size,
618                                  blkreq->cmd_flags & REQ_FLUSH,
619                                  blkreq->cmd_flags & REQ_FUA);
620                 */
621
622                 if (blkreq->cmd_flags & REQ_FLUSH)
623                         xreq->flags |= XF_FLUSH;
624
625                 if (blkreq->cmd_flags & REQ_FUA)
626                         xreq->flags |= XF_FUA;
627
628                 if (rq_data_dir(blkreq)) {
629                         /* unlock for data transfers? */
630                         blk_to_xseg(xsegbd.xseg, xreq, blkreq);
631                         xreq->op = X_WRITE;
632                 } else {
633                         xreq->op = X_READ;
634                 }
635
636                 /* TODO:
637                  * Temp/ugly hack, add support for it in prepare_wait instead
638                  */
639                 port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
640                 port->waitcue = (long) xsegbd_dev;
641
642                 BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
643         }
644
645         /* TODO:
646          * This is going to happen at least once.
647          * Add a WARN_ON when debugging find out why it happens more than once.
648          */
649         xseg_signal(xsegbd_dev->xsegbd->xseg, xsegbd_dev->dst_portno);
650         if (xreq)
651                 xseg_put_request(xsegbd_dev->xsegbd->xseg, xsegbd_dev->src_portno, xreq);
652 }
653
654 int update_dev_sectors_from_request(    struct xsegbd_device *xsegbd_dev,
655                                         struct xseg_request *xreq       )
656 {
657         void *data;
658
659         if (xreq->state & XS_ERROR)
660                 return -ENOENT;
661
662         if (!(xreq->state & XS_SERVED))
663                 return -EIO;
664
665         data = XSEG_TAKE_PTR(xreq->data, xsegbd.xseg->segment);
666         xsegbd_dev->sectors = *((uint64_t *) data) / 512ULL;
667         return 0;
668 }
669
670 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
671 {
672         struct xseg_request *xreq;
673         struct xseg_port *port;
674         char *name;
675         uint64_t datasize;
676         struct completion comp;
677         int ret = -EBUSY;
678
679         xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
680         if (!xreq)
681                 goto out;
682
683         datasize = sizeof(uint64_t);
684         BUG_ON((uint64_t)&comp < xsegbd_dev->nr_requests);
685         BUG_ON(xreq->buffersize - xsegbd_dev->namesize < datasize);
686         BUG_ON(xseg_prep_request(xreq, xsegbd_dev->namesize, datasize));
687
688         init_completion(&comp);
689         xreq->priv = (uint64_t)(long)&comp;
690
691         name = XSEG_TAKE_PTR(xreq->name, xsegbd.xseg->segment);
692         strncpy(name, xsegbd_dev->name, xsegbd_dev->namesize);
693         xreq->size = datasize;
694         xreq->offset = 0;
695
696         xreq->op = X_INFO;
697
698         port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
699         port->waitcue = (uint64_t)(long)xsegbd_dev;
700
701         BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
702         xseg_signal(xsegbd.xseg, xsegbd_dev->dst_portno);
703
704         wait_for_completion_interruptible(&comp);
705         ret = update_dev_sectors_from_request(xsegbd_dev, xreq);
706 out:
707         xseg_put_request(xsegbd.xseg, xsegbd_dev->src_portno, xreq);
708         return ret;
709 }
710
711 static long xseg_callback(void *arg)
712 {
713         struct xsegbd_device *xsegbd_dev = NULL;
714         struct xseg_request *xreq;
715         struct xseg_port *port;
716         struct request *blkreq;
717         unsigned long flags;
718         uint64_t blkreq_idx;
719         int err;
720
721         port = XSEG_TAKE_PTR(arg, xsegbd.xseg->segment);
722         xsegbd_dev = (struct xsegbd_device *) port->waitcue;
723
724         if (!xsegbd_dev)
725                 return -ENODEV;
726
727         for (;;) {
728                 xreq = xseg_receive(xsegbd.xseg, xsegbd_dev->src_portno);
729                 if (!xreq)
730                         break;
731
732                 /* we rely upon our peers to not have touched ->priv */
733                 blkreq_idx = (uint64_t)xreq->priv;
734                 if (blkreq_idx >= xsegbd_dev->nr_requests) {
735                         /* someone is blocking on this request
736                            and will handle it when we wake them up. */
737                         complete((void *)(long)xreq->priv);
738                         /* the request is blocker's responsibility so
739                            we will not put_request(); */
740                         continue;
741                 }
742
743                 /* this is now treated as a block I/O request to end */
744                 blkreq = xsegbd_dev->blk_req_pending[blkreq_idx];
745                 /* WARN_ON(!blkreq); */
746                 err = -EIO;
747
748                 if (!(xreq->state & XS_SERVED))
749                         goto blk_end;
750
751                 if (xreq->serviced != blk_rq_bytes(blkreq))
752                         goto blk_end;
753
754                 /* unlock for data transfer? */
755                 if (!rq_data_dir(blkreq))
756                         xseg_to_blk(xsegbd.xseg, xreq, blkreq);
757
758                 err = 0;
759 blk_end:
760                 blk_end_request_all(blkreq, err);
761                 xq_append_head(&xsegbd_dev->blk_queue_pending, blkreq_idx);
762                 xseg_put_request(xsegbd.xseg, xreq->portno, xreq);
763         }
764
765         spin_lock_irqsave(&xsegbd_dev->lock, flags);
766         xseg_request_fn(xsegbd_dev->blk_queue);
767         spin_unlock_irqrestore(&xsegbd_dev->lock, flags);
768         return 0;
769 }
770
771
772 /* sysfs interface */
773
774 static struct bus_type xsegbd_bus_type = {
775         .name   = "xsegbd",
776 };
777
778 static ssize_t xsegbd_size_show(struct device *dev,
779                                         struct device_attribute *attr, char *buf)
780 {
781         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
782
783         return sprintf(buf, "%llu\n", (unsigned long long) xsegbd_dev->sectors * 512ULL);
784 }
785
786 static ssize_t xsegbd_major_show(struct device *dev,
787                                         struct device_attribute *attr, char *buf)
788 {
789         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
790
791         return sprintf(buf, "%d\n", xsegbd_dev->major);
792 }
793
794 static ssize_t xsegbd_srcport_show(struct device *dev,
795                                         struct device_attribute *attr, char *buf)
796 {
797         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
798
799         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->src_portno);
800 }
801
802 static ssize_t xsegbd_dstport_show(struct device *dev,
803                                         struct device_attribute *attr, char *buf)
804 {
805         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
806
807         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->dst_portno);
808 }
809
810 static ssize_t xsegbd_id_show(struct device *dev,
811                                         struct device_attribute *attr, char *buf)
812 {
813         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
814
815         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->id);
816 }
817
818 static ssize_t xsegbd_reqs_show(struct device *dev,
819                                         struct device_attribute *attr, char *buf)
820 {
821         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
822
823         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->nr_requests);
824 }
825
826 static ssize_t xsegbd_name_show(struct device *dev,
827                                         struct device_attribute *attr, char *buf)
828 {
829         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
830
831         return sprintf(buf, "%s\n", xsegbd_dev->name);
832 }
833
834 static DEVICE_ATTR(size, S_IRUGO, xsegbd_size_show, NULL);
835 static DEVICE_ATTR(major, S_IRUGO, xsegbd_major_show, NULL);
836 static DEVICE_ATTR(srcport, S_IRUGO, xsegbd_srcport_show, NULL);
837 static DEVICE_ATTR(dstport, S_IRUGO, xsegbd_dstport_show, NULL);
838 static DEVICE_ATTR(id , S_IRUGO, xsegbd_id_show, NULL);
839 static DEVICE_ATTR(reqs , S_IRUGO, xsegbd_reqs_show, NULL);
840 static DEVICE_ATTR(name , S_IRUGO, xsegbd_name_show, NULL);
841
842 static struct attribute *xsegbd_attrs[] = {
843         &dev_attr_size.attr,
844         &dev_attr_major.attr,
845         &dev_attr_srcport.attr,
846         &dev_attr_dstport.attr,
847         &dev_attr_id.attr,
848         &dev_attr_reqs.attr,
849         &dev_attr_name.attr,
850         NULL
851 };
852
853 static struct attribute_group xsegbd_attr_group = {
854         .attrs = xsegbd_attrs,
855 };
856
857 static const struct attribute_group *xsegbd_attr_groups[] = {
858         &xsegbd_attr_group,
859         NULL
860 };
861
862 static void xsegbd_sysfs_dev_release(struct device *dev)
863 {
864 }
865
866 static struct device_type xsegbd_device_type = {
867         .name           = "xsegbd",
868         .groups         = xsegbd_attr_groups,
869         .release        = xsegbd_sysfs_dev_release,
870 };
871
872 static void xsegbd_root_dev_release(struct device *dev)
873 {
874 }
875
876 static struct device xsegbd_root_dev = {
877         .init_name      = "xsegbd",
878         .release        = xsegbd_root_dev_release,
879 };
880
881 static int xsegbd_bus_add_dev(struct xsegbd_device *xsegbd_dev)
882 {
883         int ret = -ENOMEM;
884         struct device *dev;
885
886         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
887         dev = &xsegbd_dev->dev;
888
889         dev->bus = &xsegbd_bus_type;
890         dev->type = &xsegbd_device_type;
891         dev->parent = &xsegbd_root_dev;
892         dev->release = xsegbd_dev_release;
893         dev_set_name(dev, "%d", xsegbd_dev->id);
894
895         ret = device_register(dev);
896
897         mutex_unlock(&xsegbd_mutex);
898         return ret;
899 }
900
901 static void xsegbd_bus_del_dev(struct xsegbd_device *xsegbd_dev)
902 {
903         device_unregister(&xsegbd_dev->dev);
904 }
905
906 static ssize_t xsegbd_add(struct bus_type *bus, const char *buf, size_t count)
907 {
908         struct xsegbd_device *xsegbd_dev;
909         struct xseg_port *xport;
910         ssize_t ret = -ENOMEM;
911         int new_id = 0;
912         struct list_head *tmp;
913
914         if (!try_module_get(THIS_MODULE))
915                 return -ENODEV;
916
917         xsegbd_dev = kzalloc(sizeof(*xsegbd_dev), GFP_KERNEL);
918         if (!xsegbd_dev)
919                 goto out;
920
921         spin_lock_init(&xsegbd_dev->lock);
922         INIT_LIST_HEAD(&xsegbd_dev->node);
923
924         /* parse cmd */
925         if (sscanf(buf, "%" __stringify(XSEGBD_TARGET_NAMELEN) "s "
926                         "%d:%d:%d", xsegbd_dev->name, &xsegbd_dev->src_portno,
927                         &xsegbd_dev->dst_portno, &xsegbd_dev->nr_requests) < 3) {
928                 ret = -EINVAL;
929                 goto out_dev;
930         }
931         xsegbd_dev->namesize = strlen(xsegbd_dev->name);
932
933         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
934
935         list_for_each(tmp, &xsegbd_dev_list) {
936                 struct xsegbd_device *entry;
937
938                 entry = list_entry(tmp, struct xsegbd_device, node);
939
940                 if (entry->src_portno == xsegbd_dev->src_portno) {
941                         ret = -EINVAL;
942                         goto out_unlock;
943                 }
944
945                 if (entry->id >= new_id)
946                         new_id = entry->id + 1;
947         }
948
949         xsegbd_dev->id = new_id;
950
951         list_add_tail(&xsegbd_dev->node, &xsegbd_dev_list);
952
953         mutex_unlock(&xsegbd_mutex);
954
955         XSEGLOG("registering block device major %d", major);
956         ret = register_blkdev(major, XSEGBD_NAME);
957         if (ret < 0) {
958                 XSEGLOG("cannot register block device!");
959                 ret = -EBUSY;
960                 goto out_delentry;
961         }
962         xsegbd_dev->major = ret;
963         XSEGLOG("registered block device major %d", xsegbd_dev->major);
964
965         ret = xsegbd_bus_add_dev(xsegbd_dev);
966         if (ret)
967                 goto out_blkdev;
968
969         XSEGLOG("binding to source port %u (destination %u)",
970                         xsegbd_dev->src_portno, xsegbd_dev->dst_portno);
971         xport = xseg_bind_port(xsegbd.xseg, xsegbd_dev->src_portno);
972         if (!xport) {
973                 XSEGLOG("cannot bind to port");
974                 ret = -EFAULT;
975
976                 goto out_bus;
977         }
978         /* make sure we don't get any requests until we're ready to handle them */
979         xport->waitcue = (long) NULL;
980
981         XSEGLOG("allocating %u requests", xsegbd_dev->nr_requests);
982         if (xseg_alloc_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests)) {
983                 XSEGLOG("cannot allocate requests");
984                 ret = -EFAULT;
985
986                 goto out_bus;
987         }
988
989         ret = xsegbd_dev_init(xsegbd_dev);
990         if (ret)
991                 goto out_bus;
992
993         return count;
994
995 out_bus:
996         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
997
998         list_del_init(&xsegbd_dev->node);
999         xsegbd_bus_del_dev(xsegbd_dev);
1000
1001         mutex_unlock(&xsegbd_mutex);
1002
1003         return ret;
1004
1005 out_blkdev:
1006         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
1007
1008 out_delentry:
1009         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
1010         list_del_init(&xsegbd_dev->node);
1011
1012 out_unlock:
1013         mutex_unlock(&xsegbd_mutex);
1014
1015 out_dev:
1016         kfree(xsegbd_dev);
1017
1018 out:
1019         return ret;
1020 }
1021
1022 static struct xsegbd_device *__xsegbd_get_dev(unsigned long id)
1023 {
1024         struct list_head *tmp;
1025         struct xsegbd_device *xsegbd_dev;
1026
1027         list_for_each(tmp, &xsegbd_dev_list) {
1028                 xsegbd_dev = list_entry(tmp, struct xsegbd_device, node);
1029                 if (xsegbd_dev->id == id)
1030                         return xsegbd_dev;
1031
1032         }
1033
1034         return NULL;
1035 }
1036
1037 static ssize_t xsegbd_remove(struct bus_type *bus, const char *buf, size_t count)
1038 {
1039         struct xsegbd_device *xsegbd_dev = NULL;
1040         int id, ret;
1041         unsigned long ul_id;
1042
1043         ret = kstrtoul(buf, 10, &ul_id);
1044         if (ret)
1045                 return ret;
1046
1047         id = (int) ul_id;
1048         if (id != ul_id)
1049                 return -EINVAL;
1050
1051         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
1052
1053         ret = count;
1054         xsegbd_dev = __xsegbd_get_dev(id);
1055         if (!xsegbd_dev) {
1056                 ret = -ENOENT;
1057                 goto out_unlock;
1058         }
1059
1060         list_del_init(&xsegbd_dev->node);
1061
1062         xsegbd_bus_del_dev(xsegbd_dev);
1063
1064 out_unlock:
1065         mutex_unlock(&xsegbd_mutex);
1066         return ret;
1067 }
1068
1069 static struct bus_attribute xsegbd_bus_attrs[] = {
1070         __ATTR(add, S_IWUSR, NULL, xsegbd_add),
1071         __ATTR(remove, S_IWUSR, NULL, xsegbd_remove),
1072         __ATTR_NULL
1073 };
1074
1075 static int xsegbd_sysfs_init(void)
1076 {
1077         int ret;
1078
1079         xsegbd_bus_type.bus_attrs = xsegbd_bus_attrs;
1080
1081         ret = bus_register(&xsegbd_bus_type);
1082         if (ret < 0)
1083                 return ret;
1084
1085         ret = device_register(&xsegbd_root_dev);
1086
1087         return ret;
1088 }
1089
1090 static void xsegbd_sysfs_cleanup(void)
1091 {
1092         device_unregister(&xsegbd_root_dev);
1093         bus_unregister(&xsegbd_bus_type);
1094 }
1095
1096 /* *************************** */
1097 /* ** Module Initialization ** */
1098 /* *************************** */
1099
1100 static int __init xsegbd_init(void)
1101 {
1102         int ret;
1103
1104         ret = xsegbd_xseg_init();
1105         if (ret)
1106                 goto out;
1107
1108         ret = xsegbd_sysfs_init();
1109         if (ret)
1110                 goto out_xseg_destroy;
1111
1112         XSEGLOG("initialization complete");
1113
1114 out:
1115         return ret;
1116
1117 out_xseg_destroy:
1118         xsegbd_xseg_quit();
1119         return -ENOSYS;
1120 }
1121
1122 static void __exit xsegbd_exit(void)
1123 {
1124         xsegbd_sysfs_cleanup();
1125         xsegbd_xseg_quit();
1126 }
1127
1128 module_init(xsegbd_init);
1129 module_exit(xsegbd_exit);