5 #include <linux/module.h>
6 #include <linux/moduleparam.h>
7 #include <linux/init.h>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
12 #include <linux/errno.h>
13 #include <linux/timer.h>
14 #include <linux/types.h>
15 #include <linux/vmalloc.h>
16 #include <linux/genhd.h>
17 #include <linux/blkdev.h>
18 #include <linux/bio.h>
23 #define XSEGBD_MINORS 1
25 MODULE_DESCRIPTION("xsegbd");
26 MODULE_AUTHOR("XSEG");
27 MODULE_LICENSE("GPL");
29 static long sector_size = 200000;
30 static long blksize = 512;
32 static char name[XSEGBD_VOLUME_NAMELEN] = "xsegbd";
33 static char spec[256] = "xsegdev:xsegbd:4:512:64:1024:12";
34 static int src_portno = 0, dst_portno = 1, nr_requests = 128;
36 module_param(sector_size, long, 0644);
37 module_param(blksize, long, 0644);
38 module_param(major, int, 0644);
39 module_param(src_portno, int, 0644);
40 module_param(dst_portno, int, 0644);
41 module_param(nr_requests, int, 0644);
42 module_param_string(name, name, sizeof(name), 0644);
43 module_param_string(spec, spec, sizeof(spec), 0644);
45 static volatile int count;
46 struct semaphore xsegbd_lock;
47 static struct xsegbd xsegbd;
50 /* ********************* */
51 /* ** XSEG Operations ** */
52 /* ********************* */
54 static void *xsegdev_malloc(uint64_t size)
56 return kmalloc((size_t)size, GFP_KERNEL);
59 static void *xsegdev_realloc(void *mem, uint64_t size)
61 return krealloc(mem, (size_t)size, GFP_KERNEL);
64 static void xsegdev_mfree(void *ptr)
69 static long xsegdev_allocate(const char *name, uint64_t size)
72 struct xsegdev *xsegdev = xsegdev_get(0);
74 r = IS_ERR(xsegdev) ? PTR_ERR(xsegdev) : 0;
76 XSEGLOG("cannot acquire xsegdev");
80 if (xsegdev->segment) {
81 XSEGLOG("destroying existing xsegdev segment");
82 r = xsegdev_destroy_segment(xsegdev);
87 XSEGLOG("creating xsegdev segment size %llu", size);
88 r = xsegdev_create_segment(xsegdev, size, 1);
92 xsegdev->segsize = size;
100 static long xsegdev_deallocate(const char *name)
102 struct xsegdev *xsegdev = xsegdev_get(0);
103 int r = IS_ERR(xsegdev) ? PTR_ERR(xsegdev) : 0;
107 clear_bit(XSEGDEV_RESERVED, &xsegdev->flags);
108 XSEGLOG("destroying segment");
109 r = xsegdev_destroy_segment(xsegdev);
111 XSEGLOG(" ...failed");
112 xsegdev_put(xsegdev);
116 static long xseg_callback(void *arg);
118 static void *xsegdev_map(const char *name, uint64_t size)
120 struct xseg *xseg = NULL;
121 struct xsegdev *dev = xsegdev_get(0);
123 r = IS_ERR(dev) ? PTR_ERR(dev) : 0;
130 if (size > dev->segsize)
133 if (dev->callback) /* in use */
136 dev->callback = xseg_callback;
137 dev->callarg = &xsegbd;
138 xseg = (void *)dev->segment;
146 static void xsegdev_unmap(void *ptr, uint64_t size)
148 struct xsegdev *xsegdev = xsegdev_get(0);
149 int r = IS_ERR(xsegdev) ? PTR_ERR(xsegdev) : 0;
153 xsegdev->callarg = NULL;
154 xsegdev->callback = NULL;
155 xsegdev_put(xsegdev);
158 static struct xseg_type xseg_xsegdev = {
159 /* xseg operations */
161 .malloc = xsegdev_malloc,
162 .realloc = xsegdev_realloc,
163 .mfree = xsegdev_mfree,
164 .allocate = xsegdev_allocate,
165 .deallocate = xsegdev_deallocate,
167 .unmap = xsegdev_unmap
173 static int posix_signal_init(void)
178 static void posix_signal_quit(void) { }
180 static int posix_prepare_wait(struct xseg_port *port)
185 static int posix_cancel_wait(struct xseg_port *port)
190 static int posix_wait_signal(struct xseg_port *port, uint32_t timeout)
195 static int posix_signal(struct xseg_port *port)
198 struct task_struct *task;
202 pid = find_vpid((pid_t)port->waitcue);
205 task = pid_task(pid, PIDTYPE_PID);
209 ret = send_sig(SIGIO, task, 1);
215 static void *posix_malloc(uint64_t size)
220 static void *posix_realloc(void *mem, uint64_t size)
225 static void posix_mfree(void *mem) { }
227 static struct xseg_peer xseg_peer_posix = {
228 /* xseg signal operations */
230 .signal_init = posix_signal_init,
231 .signal_quit = posix_signal_quit,
232 .cancel_wait = posix_cancel_wait,
233 .prepare_wait = posix_prepare_wait,
234 .wait_signal = posix_wait_signal,
235 .signal = posix_signal,
236 .malloc = posix_malloc,
237 .realloc = posix_realloc,
244 static int xsegdev_signal_init(void)
249 static void xsegdev_signal_quit(void) { }
251 static int xsegdev_prepare_wait(struct xseg_port *port)
256 static int xsegdev_cancel_wait(struct xseg_port *port)
261 static int xsegdev_wait_signal(struct xseg_port *port, uint32_t timeout)
266 static int xsegdev_signal(struct xseg_port *port)
271 static struct xseg_peer xseg_peer_xsegdev = {
272 /* xseg signal operations */
274 .signal_init = xsegdev_signal_init,
275 .signal_quit = xsegdev_signal_quit,
276 .cancel_wait = xsegdev_cancel_wait,
277 .prepare_wait = xsegdev_prepare_wait,
278 .wait_signal = xsegdev_wait_signal,
279 .signal = xsegdev_signal,
280 .malloc = xsegdev_malloc,
281 .realloc = xsegdev_realloc,
282 .mfree = xsegdev_mfree
288 /* ************************* */
289 /* ** XSEG Initialization ** */
290 /* ************************* */
292 int xsegbd_xseg_init(struct xsegbd *dev)
294 struct xseg_port *xport;
298 strncpy(dev->name, name, XSEGBD_VOLUME_NAMELEN);
300 XSEGLOG("registering xseg types");
301 dev->namesize = strlen(dev->name);
302 r = xseg_register_type(&xseg_xsegdev);
306 r = xseg_register_peer(&xseg_peer_posix);
310 r = xseg_register_peer(&xseg_peer_xsegdev);
314 r = xseg_initialize("xsegdev");
316 XSEGLOG("cannot initialize 'xsegdev' peer");
320 r = xseg_parse_spec(spec, &dev->config);
324 if (strncmp(dev->config.type, "xsegdev", 16))
325 XSEGLOG("WARNING: unexpected segment type '%s' vs 'xsegdev'",
328 XSEGLOG("creating segment");
329 r = xseg_create(&dev->config);
331 XSEGLOG("cannot create segment");
335 XSEGLOG("joining segment");
336 dev->xseg = xseg_join("xsegdev", "xsegbd");
338 XSEGLOG("cannot join segment");
343 XSEGLOG("binding to source port %u (destination %u)",
344 src_portno, dst_portno);
345 xport = xseg_bind_port(dev->xseg, src_portno);
347 XSEGLOG("cannot bind to port");
352 dev->src_portno = xseg_portno(dev->xseg, xport);
353 dev->dst_portno = dst_portno;
355 if (nr_requests > dev->xseg->config.nr_requests)
356 nr_requests = dev->xseg->config.nr_requests;
358 if (xseg_alloc_requests(dev->xseg, src_portno, nr_requests)) {
359 XSEGLOG("cannot allocate requests");
367 xseg_unregister_peer(xseg_peer_xsegdev.name);
369 xseg_unregister_peer(xseg_peer_posix.name);
371 xseg_unregister_type(xseg_xsegdev.name);
376 int xsegbd_xseg_quit(struct xsegbd *dev)
378 xseg_destroy(dev->xseg);
384 /* ***************************** */
385 /* ** Block Device Operations ** */
386 /* ***************************** */
388 static int xsegbd_open(struct block_device *bdev, fmode_t mode)
390 int ret = down_interruptible(&xsegbd_lock);
398 static int xsegbd_release(struct gendisk *gd, fmode_t mode)
400 int ret = down_interruptible(&xsegbd_lock);
408 static int xsegbd_ioctl(struct block_device *bdev, fmode_t mode,
409 unsigned int cmd, unsigned long arg)
414 static const struct block_device_operations xsegbd_ops = {
415 .owner = THIS_MODULE,
417 .release = xsegbd_release,
418 .ioctl = xsegbd_ioctl
422 /* *************************** */
423 /* ** Device Initialization ** */
424 /* *************************** */
426 static void xseg_request_fn(struct request_queue *rq);
428 static int xsegbd_dev_init(struct xsegbd *dev, int id, sector_t size)
431 struct gendisk *disk;
432 unsigned int max_request_size_bytes;
434 spin_lock_init(&dev->lock);
437 ret = xsegbd_xseg_init(dev);
441 dev->blk_queue = blk_alloc_queue(GFP_KERNEL);
445 blk_init_allocated_queue(dev->blk_queue, xseg_request_fn, &dev->lock);
446 dev->blk_queue->queuedata = dev;
448 blk_queue_flush(dev->blk_queue, REQ_FLUSH | REQ_FUA);
449 blk_queue_logical_block_size(dev->blk_queue, 512);
450 blk_queue_physical_block_size(dev->blk_queue, blksize);
451 blk_queue_bounce_limit(dev->blk_queue, BLK_BOUNCE_ANY);
453 //blk_queue_max_segments(dev->blk_queue, 512);
454 /* calculate maximum block request size
455 * request size in pages * page_size
456 * leave one page in buffer for name
458 max_request_size_bytes = (unsigned int) (dev->config.request_size -1) * ( 1 << dev->config.page_shift) ;
459 blk_queue_max_hw_sectors(dev->blk_queue, max_request_size_bytes >> 9);
460 blk_queue_max_segment_size(dev->blk_queue, max_request_size_bytes);
461 blk_queue_io_min(dev->blk_queue, max_request_size_bytes);
462 blk_queue_io_opt(dev->blk_queue, max_request_size_bytes);
464 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, dev->blk_queue);
466 /* vkoukis says we don't need partitions */
467 dev->gd = disk = alloc_disk(1);
472 disk->first_minor = id * XSEGBD_MINORS;
473 disk->fops = &xsegbd_ops;
474 disk->queue = dev->blk_queue;
475 disk->private_data = dev;
476 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
477 snprintf(disk->disk_name, 32, "xsegbd%c", 'a' + id);
480 if (!xq_alloc_seq(&dev->blk_queue_pending, nr_requests, nr_requests))
483 dev->blk_req_pending = kmalloc(sizeof(struct request *) * nr_requests, GFP_KERNEL);
484 if (!dev->blk_req_pending)
485 goto out_free_pending;
488 set_capacity(disk, dev->sectors);
490 add_disk(disk); /* immediately activates the device */
496 xq_free(&dev->blk_queue_pending);
502 blk_cleanup_queue(dev->blk_queue);
505 xsegbd_xseg_quit(dev);
509 static int xsegbd_dev_destroy(struct xsegbd *dev)
511 xq_free(&dev->blk_queue_pending);
512 kfree(dev->blk_req_pending);
513 del_gendisk(dev->gd);
515 blk_cleanup_queue(dev->blk_queue);
516 xsegbd_xseg_quit(dev);
521 /* *************************** */
522 /* ** Module Initialization ** */
523 /* *************************** */
525 static int __init xsegbd_init(void)
529 sema_init(&xsegbd_lock, 1);
531 XSEGLOG("registering block device major %d", major);
532 ret = register_blkdev(major, XSEGBD_NAME);
534 XSEGLOG("cannot register block device!");
539 XSEGLOG("registered block device major %d", major);
541 XSEGLOG("initializing device");
542 ret = xsegbd_dev_init(&xsegbd, 0, sector_size);
544 XSEGLOG("cannot initialize device!");
548 XSEGLOG("initialization complete");
553 unregister_blkdev(major, XSEGBD_NAME);
557 static void __exit xsegbd_exit(void)
559 unregister_blkdev(major, XSEGBD_NAME);
561 xseg_disable_driver(xsegbd.xseg, "posix");
562 xseg_unregister_peer("posix");
563 xseg_disable_driver(xsegbd.xseg, "xsegdev");
564 xseg_unregister_peer("xsegdev");
566 xsegbd_dev_destroy(&xsegbd);
567 xseg_unregister_type("xsegdev");
570 module_init(xsegbd_init);
571 module_exit(xsegbd_exit);
574 /* ******************* */
575 /* ** Critical Path ** */
576 /* ******************* */
578 static void blk_to_xseg(struct xseg *xseg, struct xseg_request *xreq,
579 struct request *blkreq)
581 struct bio_vec *bvec;
582 struct req_iterator iter;
584 char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
585 rq_for_each_segment(bvec, blkreq, iter) {
586 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
587 memcpy(data + off, bdata, bvec->bv_len);
589 kunmap_atomic(bdata);
593 static void xseg_to_blk(struct xseg *xseg, struct xseg_request *xreq,
594 struct request *blkreq)
596 struct bio_vec *bvec;
597 struct req_iterator iter;
599 char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
600 rq_for_each_segment(bvec, blkreq, iter) {
601 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
602 memcpy(bdata, data + off, bvec->bv_len);
604 kunmap_atomic(bdata);
608 static void xseg_request_fn(struct request_queue *rq)
610 struct xseg_request *xreq;
611 struct xsegbd *dev = rq->queuedata;
612 struct request *blkreq;
618 xreq = xseg_get_request(dev->xseg, dev->src_portno);
622 blkreq = blk_fetch_request(rq);
626 if (blkreq->cmd_type != REQ_TYPE_FS) {
627 XSEGLOG("non-fs cmd_type: %u. *shrug*", blkreq->cmd_type);
628 __blk_end_request_all(blkreq, 0);
631 datasize = blk_rq_bytes(blkreq);
632 BUG_ON(xreq->buffersize - dev->namesize < datasize);
633 BUG_ON(xseg_prep_request(xreq, dev->namesize, datasize));
635 name = XSEG_TAKE_PTR(xreq->name, dev->xseg->segment);
636 strncpy(name, dev->name, dev->namesize);
637 blkreq_idx = xq_pop_head(&dev->blk_queue_pending);
638 BUG_ON(blkreq_idx == None);
639 /* WARN_ON(dev->blk_req_pending[blkreq_idx] */
640 dev->blk_req_pending[blkreq_idx] = blkreq;
641 xreq->priv = (void *)(unsigned long)blkreq_idx;
642 xreq->size = datasize;
643 xreq->offset = blk_rq_pos(blkreq) << 9;
645 if (xreq->offset >= (sector_size << 9))
646 XSEGLOG("sector offset: %lu > %lu, flush:%u, fua:%u",
647 blk_rq_pos(blkreq), sector_size,
648 blkreq->cmd_flags & REQ_FLUSH,
649 blkreq->cmd_flags & REQ_FUA);
652 if (blkreq->cmd_flags & REQ_FLUSH)
653 xreq->flags |= XF_FLUSH;
655 if (blkreq->cmd_flags & REQ_FUA)
656 xreq->flags |= XF_FUA;
658 if (rq_data_dir(blkreq)) {
659 /* unlock for data transfers? */
660 blk_to_xseg(dev->xseg, xreq, blkreq);
666 BUG_ON(xseg_submit(dev->xseg, dev->dst_portno, xreq) == NoSerial);
668 //This is going to happen at least once.
669 //TODO find out why it happens more than once.
670 WARN_ON(xseg_signal(dev->xseg, dev->dst_portno));
672 xseg_put_request(dev->xseg, dev->src_portno, xreq);
675 static long xseg_callback(void *arg)
677 struct xsegbd *dev = arg;
678 struct xseg_request *xreq;
679 struct request *blkreq;
685 xreq = xseg_receive(dev->xseg, dev->src_portno);
689 /* we rely upon our peers to not have touched ->priv */
690 blkreq_idx = (xqindex)(unsigned long)xreq->priv;
691 if (blkreq_idx < 0 || blkreq_idx >= nr_requests) {
692 XSEGLOG("invalid request index: %u! Ignoring.", blkreq_idx);
696 blkreq = dev->blk_req_pending[blkreq_idx];
697 /* WARN_ON(!blkreq); */
700 if (!(xreq->state & XS_SERVED))
703 if (xreq->serviced != blk_rq_bytes(blkreq))
706 /* unlock for data transfer? */
707 if (!rq_data_dir(blkreq))
708 xseg_to_blk(dev->xseg, xreq, blkreq);
712 blk_end_request_all(blkreq, err);
713 xq_append_head(&dev->blk_queue_pending, blkreq_idx);
715 xseg_put_request(dev->xseg, xreq->portno, xreq);
718 spin_lock_irqsave(&dev->lock, flags);
719 xseg_request_fn(dev->blk_queue);
720 spin_unlock_irqrestore(&dev->lock, flags);