xsegbd: stricter error checks
[archipelago] / xseg / peers / kernel / xsegbd.c
1 /* xsegbd.c
2  *
3  */
4
5 #include <linux/module.h>
6 #include <linux/moduleparam.h>
7 #include <linux/init.h>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/fs.h>
12 #include <linux/errno.h>
13 #include <linux/timer.h>
14 #include <linux/types.h>
15 #include <linux/vmalloc.h>
16 #include <linux/genhd.h>
17 #include <linux/blkdev.h>
18 #include <linux/bio.h>
19 #include <linux/device.h>
20 #include <linux/completion.h>
21
22 #include <sys/kernel/segdev.h>
23 #include "xsegbd.h"
24
25 #define XSEGBD_MINORS 1
26
27 MODULE_DESCRIPTION("xsegbd");
28 MODULE_AUTHOR("XSEG");
29 MODULE_LICENSE("GPL");
30
31 static long sector_size = 0;
32 static long blksize = 512;
33 static int major = 0;
34 static int max_nr_pending = 1024;
35 static char name[XSEGBD_SEGMENT_NAMELEN] = "xsegbd";
36 static char spec[256] = "segdev:xsegbd:4:512:64:1024:12";
37
38 module_param(sector_size, long, 0644);
39 module_param(blksize, long, 0644);
40 module_param(max_nr_pending, int, 0644);
41 module_param(major, int, 0644);
42 module_param_string(name, name, sizeof(name), 0644);
43 module_param_string(spec, spec, sizeof(spec), 0644);
44
45 struct pending {
46         struct request *request;
47         struct completion *comp;
48         struct xsegbd_device *dev;
49 };
50
51 static struct xq blk_queue_pending;
52 static struct pending *blk_req_pending;
53 static unsigned int nr_pending;
54 static spinlock_t __lock;
55 static struct xsegbd xsegbd;
56 static DEFINE_MUTEX(xsegbd_mutex);
57 static LIST_HEAD(xsegbd_dev_list);
58 static DEFINE_SPINLOCK(xsegbd_dev_list_lock);
59
60 /* ************************* */
61 /* ***** sysfs helpers ***** */
62 /* ************************* */
63
64 static struct xsegbd_device *dev_to_xsegbd(struct device *dev)
65 {
66         return container_of(dev, struct xsegbd_device, dev);
67 }
68
69 static struct device *xsegbd_get_dev(struct xsegbd_device *xsegbd_dev)
70 {
71         /* FIXME */
72         return get_device(&xsegbd_dev->dev);
73 }
74
75 static void xsegbd_put_dev(struct xsegbd_device *xsegbd_dev)
76 {
77         put_device(&xsegbd_dev->dev);
78 }
79
80 /* ************************* */
81 /* ** XSEG Initialization ** */
82 /* ************************* */
83
84 static void xseg_callback(struct xseg *xseg, uint32_t portno);
85
86 int xsegbd_xseg_init(void)
87 {
88         int r;
89
90         if (!xsegbd.name[0])
91                 strncpy(xsegbd.name, name, XSEGBD_SEGMENT_NAMELEN);
92
93         r = xseg_initialize();
94         if (r) {
95                 XSEGLOG("cannot initialize 'segdev' peer");
96                 goto err;
97         }
98
99         r = xseg_parse_spec(spec, &xsegbd.config);
100         if (r)
101                 goto err;
102
103         if (strncmp(xsegbd.config.type, "segdev", 16))
104                 XSEGLOG("WARNING: unexpected segment type '%s' vs 'segdev'",
105                          xsegbd.config.type);
106
107         XSEGLOG("joining segment");
108         xsegbd.xseg = xseg_join(        xsegbd.config.type,
109                                         xsegbd.config.name,
110                                         "segdev",
111                                         xseg_callback           );
112         if (!xsegbd.xseg) {
113                 XSEGLOG("cannot find segment");
114                 r = -ENODEV;
115                 goto err;
116         }
117
118         return 0;
119 err:
120         return r;
121
122 }
123
124 int xsegbd_xseg_quit(void)
125 {
126         struct segdev *segdev;
127
128         /* make sure to unmap the segment first */
129         segdev = segdev_get(0);
130         clear_bit(SEGDEV_RESERVED, &segdev->flags);
131         xsegbd.xseg->priv->segment_type.ops.unmap(xsegbd.xseg, xsegbd.xseg->segment_size);
132         segdev_put(segdev);
133
134         return 0;
135 }
136
137
138 /* ***************************** */
139 /* ** Block Device Operations ** */
140 /* ***************************** */
141
142 static int xsegbd_open(struct block_device *bdev, fmode_t mode)
143 {
144         struct gendisk *disk = bdev->bd_disk;
145         struct xsegbd_device *xsegbd_dev = disk->private_data;
146
147         xsegbd_get_dev(xsegbd_dev);
148
149         return 0;
150 }
151
152 static int xsegbd_release(struct gendisk *gd, fmode_t mode)
153 {
154         struct xsegbd_device *xsegbd_dev = gd->private_data;
155
156         xsegbd_put_dev(xsegbd_dev);
157
158         return 0;
159 }
160
161 static int xsegbd_ioctl(struct block_device *bdev, fmode_t mode,
162                         unsigned int cmd, unsigned long arg)
163 {
164         return -ENOTTY;
165 }
166
167 static const struct block_device_operations xsegbd_ops = {
168         .owner          = THIS_MODULE,
169         .open           = xsegbd_open,
170         .release        = xsegbd_release,
171         .ioctl          = xsegbd_ioctl 
172 };
173
174
175 /* *************************** */
176 /* ** Device Initialization ** */
177 /* *************************** */
178
179 static void xseg_request_fn(struct request_queue *rq);
180 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev);
181
182 static int xsegbd_dev_init(struct xsegbd_device *xsegbd_dev)
183 {
184         int ret = -ENOMEM;
185         struct gendisk *disk;
186         unsigned int max_request_size_bytes;
187
188         spin_lock_init(&xsegbd_dev->lock);
189
190         xsegbd_dev->xsegbd = &xsegbd;
191
192         xsegbd_dev->blk_queue = blk_alloc_queue(GFP_KERNEL);
193         if (!xsegbd_dev->blk_queue)
194                 goto out;
195
196         blk_init_allocated_queue(xsegbd_dev->blk_queue, xseg_request_fn, &xsegbd_dev->lock);
197         xsegbd_dev->blk_queue->queuedata = xsegbd_dev;
198
199         blk_queue_flush(xsegbd_dev->blk_queue, REQ_FLUSH | REQ_FUA);
200         blk_queue_logical_block_size(xsegbd_dev->blk_queue, 512);
201         blk_queue_physical_block_size(xsegbd_dev->blk_queue, blksize);
202         blk_queue_bounce_limit(xsegbd_dev->blk_queue, BLK_BOUNCE_ANY);
203         
204         //blk_queue_max_segments(dev->blk_queue, 512);
205         /* calculate maximum block request size
206          * request size in pages * page_size
207          * leave one page in buffer for name
208          */
209         max_request_size_bytes =
210                  (unsigned int) (xsegbd.config.request_size - 1) *
211                                 ( 1 << xsegbd.config.page_shift) ;
212         blk_queue_max_hw_sectors(xsegbd_dev->blk_queue, max_request_size_bytes >> 9);
213         blk_queue_max_segment_size(xsegbd_dev->blk_queue, max_request_size_bytes);
214         blk_queue_io_min(xsegbd_dev->blk_queue, max_request_size_bytes);
215         blk_queue_io_opt(xsegbd_dev->blk_queue, max_request_size_bytes);
216
217         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xsegbd_dev->blk_queue);
218
219         /* vkoukis says we don't need partitions */
220         xsegbd_dev->gd = disk = alloc_disk(1);
221         if (!disk)
222                 /* FIXME: We call xsegbd_dev_release if something goes wrong, to cleanup
223                  * disks/queues/etc.
224                  * Would it be better to do the cleanup here, and conditionally cleanup
225                  * in dev_release?
226                  */
227                 goto out;
228
229         disk->major = xsegbd_dev->major;
230         disk->first_minor = 0; // id * XSEGBD_MINORS;
231         disk->fops = &xsegbd_ops;
232         disk->queue = xsegbd_dev->blk_queue;
233         disk->private_data = xsegbd_dev;
234         disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
235         snprintf(disk->disk_name, 32, "xsegbd%u", xsegbd_dev->id);
236
237         ret = 0;
238         spin_lock_irq(&__lock);
239         if (nr_pending + xsegbd_dev->nr_requests > max_nr_pending)
240                 ret = -ENOBUFS;
241         else
242                 nr_pending += xsegbd_dev->nr_requests;
243         spin_unlock_irq(&__lock);
244
245         if (ret)
246                 goto out;
247
248         /* allow a non-zero sector_size parameter to override the disk size */
249         if (sector_size)
250                 xsegbd_dev->sectors = sector_size;
251         else {
252                 ret = xsegbd_get_size(xsegbd_dev);
253                 if (ret)
254                         goto out;
255         }
256
257         set_capacity(disk, xsegbd_dev->sectors);
258         XSEGLOG("xsegbd active...");
259         add_disk(disk); /* immediately activates the device */
260
261         return 0;
262
263 out:
264         return ret;
265 }
266
267 static void xsegbd_dev_release(struct device *dev)
268 {
269         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
270         struct xseg_port *port;
271
272         /* cleanup gendisk and blk_queue the right way */
273         if (xsegbd_dev->gd) {
274                 if (xsegbd_dev->gd->flags & GENHD_FL_UP)
275                         del_gendisk(xsegbd_dev->gd);
276
277                 blk_cleanup_queue(xsegbd_dev->blk_queue);
278                 put_disk(xsegbd_dev->gd);
279         }
280
281         /* reset the port's waitcue (aka cancel_wait) */
282         port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
283         port->waitcue = (long) NULL;
284
285         if (xseg_free_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests) != 0)
286                 XSEGLOG("Error trying to free requests!\n");
287
288                 if (xseg_free_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests) != 0)
289                         XSEGLOG("Error trying to free requests!\n");
290         }
291         
292         WARN_ON(nr_pending < xsegbd_dev->nr_requests);
293         spin_lock_irq(&__lock);
294         nr_pending -= xsegbd_dev->nr_requests;
295         spin_unlock_irq(&__lock);
296
297         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
298
299         spin_lock(&xsegbd_dev_list_lock);
300         list_del_init(&xsegbd_dev->node);
301         spin_unlock(&xsegbd_dev_list_lock);
302         kfree(xsegbd_dev);
303
304         module_put(THIS_MODULE);
305 }
306
307 /* ******************* */
308 /* ** Critical Path ** */
309 /* ******************* */
310
311 static void blk_to_xseg(struct xseg *xseg, struct xseg_request *xreq,
312                         struct request *blkreq)
313 {
314         struct bio_vec *bvec;
315         struct req_iterator iter;
316         uint64_t off = 0;
317         char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
318         rq_for_each_segment(bvec, blkreq, iter) {
319                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
320                 memcpy(data + off, bdata, bvec->bv_len);
321                 off += bvec->bv_len;
322                 kunmap_atomic(bdata);
323         }
324 }
325
326 static void xseg_to_blk(struct xseg *xseg, struct xseg_request *xreq,
327                         struct request *blkreq)
328 {
329         struct bio_vec *bvec;
330         struct req_iterator iter;
331         uint64_t off = 0;
332         char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
333         rq_for_each_segment(bvec, blkreq, iter) {
334                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
335                 memcpy(bdata, data + off, bvec->bv_len);
336                 off += bvec->bv_len;
337                 kunmap_atomic(bdata);
338         }
339 }
340
341 static void xseg_request_fn(struct request_queue *rq)
342 {
343         struct xseg_request *xreq;
344         struct xsegbd_device *xsegbd_dev = rq->queuedata;
345         struct request *blkreq;
346         struct pending *pending;
347         xqindex blkreq_idx;
348         char *target;
349         uint64_t datalen;
350
351         for (;;) {
352                 xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
353                 if (!xreq)
354                         break;
355
356                 blkreq = blk_fetch_request(rq);
357                 if (!blkreq)
358                         break;
359
360                 if (blkreq->cmd_type != REQ_TYPE_FS) {
361                         XSEGLOG("non-fs cmd_type: %u. *shrug*", blkreq->cmd_type);
362                         __blk_end_request_all(blkreq, 0);
363                 }
364
365
366                 datalen = blk_rq_bytes(blkreq);
367                 BUG_ON(xreq->bufferlen - xsegbd_dev->targetlen < datalen);
368                 BUG_ON(xseg_prep_request(xreq, xsegbd_dev->targetlen, datalen));
369
370                 target = XSEG_TAKE_PTR(xreq->target, xsegbd.xseg->segment);
371                 strncpy(target, xsegbd_dev->target, xsegbd_dev->targetlen);
372                 blkreq_idx = xq_pop_head(&blk_queue_pending);
373                 BUG_ON(blkreq_idx == None);
374                 pending = &blk_req_pending[blkreq_idx];
375                 pending->dev = xsegbd_dev;
376                 pending->request = blkreq;
377                 pending->comp = NULL;
378                 xreq->priv = (uint64_t)blkreq_idx;
379                 xreq->size = datalen;
380                 xreq->offset = blk_rq_pos(blkreq) << 9;
381                 /*
382                 if (xreq->offset >= (sector_size << 9))
383                         XSEGLOG("sector offset: %lu > %lu, flush:%u, fua:%u",
384                                  blk_rq_pos(blkreq), sector_size,
385                                  blkreq->cmd_flags & REQ_FLUSH,
386                                  blkreq->cmd_flags & REQ_FUA);
387                 */
388
389                 if (blkreq->cmd_flags & REQ_FLUSH)
390                         xreq->flags |= XF_FLUSH;
391
392                 if (blkreq->cmd_flags & REQ_FUA)
393                         xreq->flags |= XF_FUA;
394
395                 if (rq_data_dir(blkreq)) {
396                         /* unlock for data transfers? */
397                         blk_to_xseg(xsegbd.xseg, xreq, blkreq);
398                         xreq->op = X_WRITE;
399                 } else {
400                         xreq->op = X_READ;
401                 }
402
403                 BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
404         }
405
406         WARN_ON(xseg_signal(xsegbd_dev->xsegbd->xseg, xsegbd_dev->dst_portno) < 0);
407         if (xreq)
408                 BUG_ON(xseg_put_request(xsegbd_dev->xsegbd->xseg, xsegbd_dev->src_portno, xreq) == NoSerial);
409 }
410
411 int update_dev_sectors_from_request(    struct xsegbd_device *xsegbd_dev,
412                                         struct xseg_request *xreq       )
413 {
414         void *data;
415
416         if (xreq->state & XS_FAILED)
417                 return -ENOENT;
418
419         if (!(xreq->state & XS_SERVED))
420                 return -EIO;
421
422         data = XSEG_TAKE_PTR(xreq->data, xsegbd.xseg->segment);
423         xsegbd_dev->sectors = *((uint64_t *) data) / 512ULL;
424         return 0;
425 }
426
427 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
428 {
429         struct xseg_request *xreq;
430         struct xseg_port *port;
431         char *target;
432         uint64_t datalen;
433         xqindex blkreq_idx;
434         struct pending *pending;
435         struct completion comp;
436         int ret = -EBUSY;
437
438         xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
439         if (!xreq)
440                 goto out;
441
442         datalen = sizeof(uint64_t);
443         BUG_ON(xreq->bufferlen - xsegbd_dev->targetlen < datalen);
444         BUG_ON(xseg_prep_request(xreq, xsegbd_dev->targetlen, datalen));
445
446         init_completion(&comp);
447         blkreq_idx = xq_pop_head(&blk_queue_pending);
448         BUG_ON(blkreq_idx == None);
449         pending = &blk_req_pending[blkreq_idx];
450         pending->dev = xsegbd_dev;
451         pending->request = NULL;
452         pending->comp = &comp;
453         xreq->priv = (uint64_t)blkreq_idx;
454
455         target = XSEG_TAKE_PTR(xreq->target, xsegbd.xseg->segment);
456         strncpy(target, xsegbd_dev->target, xsegbd_dev->targetlen);
457         xreq->size = datalen;
458         xreq->offset = 0;
459
460         xreq->op = X_INFO;
461
462         port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
463         port->waitcue = (uint64_t)(long)xsegbd_dev;
464
465         BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
466         WARN_ON(xseg_signal(xsegbd.xseg, xsegbd_dev->dst_portno) < 0);
467
468         wait_for_completion_interruptible(&comp);
469         XSEGLOG("Woken up after wait_for_completion_interruptible()\n");
470         ret = update_dev_sectors_from_request(xsegbd_dev, xreq);
471         XSEGLOG("get_size: sectors = %ld\n", (long)xsegbd_dev->sectors);
472 out:
473         BUG_ON(xseg_put_request(xsegbd.xseg, xsegbd_dev->src_portno, xreq) == NoSerial);
474         return ret;
475 }
476
477 static void xseg_callback(struct xseg *xseg, uint32_t portno)
478 {
479         struct xsegbd_device *xsegbd_dev = NULL, *old_dev = NULL;
480         struct xseg_request *xreq;
481         struct request *blkreq;
482         struct pending *pending;
483         unsigned long flags;
484         uint32_t blkreq_idx;
485         int err;
486
487         for (;;) {
488                 xreq = xseg_receive(xseg, portno);
489                 if (!xreq)
490                         break;
491
492                 /* we rely upon our peers to not have touched ->priv */
493                 blkreq_idx = (uint64_t)xreq->priv;
494                 if (blkreq_idx >= max_nr_pending) {
495                         WARN_ON(1);
496                         continue;
497                 }
498
499                 pending = &blk_req_pending[blkreq_idx];
500                 if (pending->comp) {
501                         /* someone is blocking on this request
502                            and will handle it when we wake them up. */
503                         complete(pending->comp);
504                         /* the request is blocker's responsibility so
505                            we will not put_request(); */
506                         continue;
507                 }
508
509                 /* this is now treated as a block I/O request to end */
510                 blkreq = pending->request;
511                 pending->request = NULL;
512                 xsegbd_dev = pending->dev;
513                 pending->dev = NULL;
514                 WARN_ON(!blkreq);
515
516                 if ((xsegbd_dev != old_dev) && old_dev) {
517                         spin_lock_irqsave(&old_dev->lock, flags);
518                         xseg_request_fn(old_dev->blk_queue);
519                         spin_unlock_irqrestore(&old_dev->lock, flags);
520                 }
521
522                 old_dev = xsegbd_dev;
523
524                 if (!(xreq->state & XS_SERVED))
525                         goto blk_end;
526
527                 if (xreq->serviced != blk_rq_bytes(blkreq))
528                         goto blk_end;
529
530                 /* unlock for data transfer? */
531                 if (!rq_data_dir(blkreq))
532                         xseg_to_blk(xseg, xreq, blkreq);
533
534                 err = 0;
535 blk_end:
536                 blk_end_request_all(blkreq, err);
537                 xq_append_head(&blk_queue_pending, blkreq_idx);
538                 BUG_ON(xseg_put_request(xseg, xreq->portno, xreq) == NoSerial);
539         }
540
541         if (xsegbd_dev) {
542                 spin_lock_irqsave(&xsegbd_dev->lock, flags);
543                 xseg_request_fn(xsegbd_dev->blk_queue);
544                 spin_unlock_irqrestore(&xsegbd_dev->lock, flags);
545         }
546 }
547
548
549 /* sysfs interface */
550
551 static struct bus_type xsegbd_bus_type = {
552         .name   = "xsegbd",
553 };
554
555 static ssize_t xsegbd_size_show(struct device *dev,
556                                         struct device_attribute *attr, char *buf)
557 {
558         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
559
560         return sprintf(buf, "%llu\n", (unsigned long long) xsegbd_dev->sectors * 512ULL);
561 }
562
563 static ssize_t xsegbd_major_show(struct device *dev,
564                                         struct device_attribute *attr, char *buf)
565 {
566         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
567
568         return sprintf(buf, "%d\n", xsegbd_dev->major);
569 }
570
571 static ssize_t xsegbd_srcport_show(struct device *dev,
572                                         struct device_attribute *attr, char *buf)
573 {
574         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
575
576         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->src_portno);
577 }
578
579 static ssize_t xsegbd_dstport_show(struct device *dev,
580                                         struct device_attribute *attr, char *buf)
581 {
582         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
583
584         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->dst_portno);
585 }
586
587 static ssize_t xsegbd_id_show(struct device *dev,
588                                         struct device_attribute *attr, char *buf)
589 {
590         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
591
592         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->id);
593 }
594
595 static ssize_t xsegbd_reqs_show(struct device *dev,
596                                         struct device_attribute *attr, char *buf)
597 {
598         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
599
600         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->nr_requests);
601 }
602
603 static ssize_t xsegbd_target_show(struct device *dev,
604                                         struct device_attribute *attr, char *buf)
605 {
606         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
607
608         return sprintf(buf, "%s\n", xsegbd_dev->target);
609 }
610
611 static ssize_t xsegbd_image_refresh(struct device *dev,
612                                         struct device_attribute *attr,
613                                         const char *buf,
614                                         size_t size)
615 {
616         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
617         int rc, ret = size;
618
619         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
620
621         rc = xsegbd_get_size(xsegbd_dev);
622         if (rc < 0) {
623                 ret = rc;
624                 goto out;
625         }
626
627         set_capacity(xsegbd_dev->gd, xsegbd_dev->sectors);
628
629 out:
630         mutex_unlock(&xsegbd_mutex);
631         return ret;
632 }
633
634 static DEVICE_ATTR(size, S_IRUGO, xsegbd_size_show, NULL);
635 static DEVICE_ATTR(major, S_IRUGO, xsegbd_major_show, NULL);
636 static DEVICE_ATTR(srcport, S_IRUGO, xsegbd_srcport_show, NULL);
637 static DEVICE_ATTR(dstport, S_IRUGO, xsegbd_dstport_show, NULL);
638 static DEVICE_ATTR(id , S_IRUGO, xsegbd_id_show, NULL);
639 static DEVICE_ATTR(reqs , S_IRUGO, xsegbd_reqs_show, NULL);
640 static DEVICE_ATTR(target, S_IRUGO, xsegbd_target_show, NULL);
641 static DEVICE_ATTR(refresh , S_IWUSR, NULL, xsegbd_image_refresh);
642
643 static struct attribute *xsegbd_attrs[] = {
644         &dev_attr_size.attr,
645         &dev_attr_major.attr,
646         &dev_attr_srcport.attr,
647         &dev_attr_dstport.attr,
648         &dev_attr_id.attr,
649         &dev_attr_reqs.attr,
650         &dev_attr_target.attr,
651         &dev_attr_refresh.attr,
652         NULL
653 };
654
655 static struct attribute_group xsegbd_attr_group = {
656         .attrs = xsegbd_attrs,
657 };
658
659 static const struct attribute_group *xsegbd_attr_groups[] = {
660         &xsegbd_attr_group,
661         NULL
662 };
663
664 static void xsegbd_sysfs_dev_release(struct device *dev)
665 {
666 }
667
668 static struct device_type xsegbd_device_type = {
669         .name           = "xsegbd",
670         .groups         = xsegbd_attr_groups,
671         .release        = xsegbd_sysfs_dev_release,
672 };
673
674 static void xsegbd_root_dev_release(struct device *dev)
675 {
676 }
677
678 static struct device xsegbd_root_dev = {
679         .init_name      = "xsegbd",
680         .release        = xsegbd_root_dev_release,
681 };
682
683 static int xsegbd_bus_add_dev(struct xsegbd_device *xsegbd_dev)
684 {
685         int ret = -ENOMEM;
686         struct device *dev;
687
688         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
689         dev = &xsegbd_dev->dev;
690
691         dev->bus = &xsegbd_bus_type;
692         dev->type = &xsegbd_device_type;
693         dev->parent = &xsegbd_root_dev;
694         dev->release = xsegbd_dev_release;
695         dev_set_name(dev, "%d", xsegbd_dev->id);
696
697         ret = device_register(dev);
698
699         mutex_unlock(&xsegbd_mutex);
700         return ret;
701 }
702
703 static void xsegbd_bus_del_dev(struct xsegbd_device *xsegbd_dev)
704 {
705         device_unregister(&xsegbd_dev->dev);
706 }
707
708 static ssize_t xsegbd_add(struct bus_type *bus, const char *buf, size_t count)
709 {
710         struct xsegbd_device *xsegbd_dev;
711         struct xseg_port *xport;
712         ssize_t ret = -ENOMEM;
713         int new_id = 0;
714         struct list_head *tmp;
715
716         if (!try_module_get(THIS_MODULE))
717                 return -ENODEV;
718
719         xsegbd_dev = kzalloc(sizeof(*xsegbd_dev), GFP_KERNEL);
720         if (!xsegbd_dev)
721                 goto out;
722
723         spin_lock_init(&xsegbd_dev->lock);
724         INIT_LIST_HEAD(&xsegbd_dev->node);
725
726         /* parse cmd */
727         if (sscanf(buf, "%" __stringify(XSEGBD_TARGET_NAMELEN) "s "
728                         "%d:%d:%d", xsegbd_dev->target, &xsegbd_dev->src_portno,
729                         &xsegbd_dev->dst_portno, &xsegbd_dev->nr_requests) < 3) {
730                 ret = -EINVAL;
731                 goto out_dev;
732         }
733         xsegbd_dev->targetlen = strlen(xsegbd_dev->target);
734
735         spin_lock(&xsegbd_dev_list_lock);
736
737         list_for_each(tmp, &xsegbd_dev_list) {
738                 struct xsegbd_device *entry;
739
740                 entry = list_entry(tmp, struct xsegbd_device, node);
741
742                 if (entry->src_portno == xsegbd_dev->src_portno) {
743                         ret = -EINVAL;
744                         goto out_unlock;
745                 }
746
747                 if (entry->id >= new_id)
748                         new_id = entry->id + 1;
749         }
750
751         xsegbd_dev->id = new_id;
752
753         list_add_tail(&xsegbd_dev->node, &xsegbd_dev_list);
754
755         spin_unlock(&xsegbd_dev_list_lock);
756
757         XSEGLOG("registering block device major %d", major);
758         ret = register_blkdev(major, XSEGBD_NAME);
759         if (ret < 0) {
760                 XSEGLOG("cannot register block device!");
761                 ret = -EBUSY;
762                 goto out_delentry;
763         }
764         xsegbd_dev->major = ret;
765         XSEGLOG("registered block device major %d", xsegbd_dev->major);
766
767         ret = xsegbd_bus_add_dev(xsegbd_dev);
768         if (ret)
769                 goto out_blkdev;
770
771         XSEGLOG("binding to source port %u (destination %u)",
772                         xsegbd_dev->src_portno, xsegbd_dev->dst_portno);
773         xport = xseg_bind_port(xsegbd.xseg, xsegbd_dev->src_portno);
774         if (!xport) {
775                 XSEGLOG("cannot bind to port");
776                 ret = -EFAULT;
777
778                 goto out_bus;
779         }
780         /* make sure we don't get any requests until we're ready to handle them */
781         xport->waitcue = (long) NULL;
782
783         XSEGLOG("allocating %u requests", xsegbd_dev->nr_requests);
784         if (xseg_alloc_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests)) {
785                 XSEGLOG("cannot allocate requests");
786                 ret = -EFAULT;
787
788                 goto out_bus;
789         }
790
791         ret = xsegbd_dev_init(xsegbd_dev);
792         if (ret)
793                 goto out_bus;
794
795         return count;
796
797 out_bus:
798         xsegbd_bus_del_dev(xsegbd_dev);
799
800         return ret;
801
802 out_blkdev:
803         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
804
805 out_delentry:
806         spin_lock(&xsegbd_dev_list_lock);
807         list_del_init(&xsegbd_dev->node);
808
809 out_unlock:
810         spin_unlock(&xsegbd_dev_list_lock);
811
812 out_dev:
813         kfree(xsegbd_dev);
814
815 out:
816         return ret;
817 }
818
819 static struct xsegbd_device *__xsegbd_get_dev(unsigned long id)
820 {
821         struct list_head *tmp;
822         struct xsegbd_device *xsegbd_dev;
823
824
825         spin_lock(&xsegbd_dev_list_lock);
826         list_for_each(tmp, &xsegbd_dev_list) {
827                 xsegbd_dev = list_entry(tmp, struct xsegbd_device, node);
828                 if (xsegbd_dev->id == id) {
829                         spin_unlock(&xsegbd_dev_list_lock);
830                         return xsegbd_dev;
831                 }
832         }
833         spin_unlock(&xsegbd_dev_list_lock);
834         return NULL;
835 }
836
837 static ssize_t xsegbd_remove(struct bus_type *bus, const char *buf, size_t count)
838 {
839         struct xsegbd_device *xsegbd_dev = NULL;
840         int id, ret;
841         unsigned long ul_id;
842
843         ret = strict_strtoul(buf, 10, &ul_id);
844         if (ret)
845                 return ret;
846
847         id = (int) ul_id;
848         if (id != ul_id)
849                 return -EINVAL;
850
851         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
852
853         ret = count;
854         xsegbd_dev = __xsegbd_get_dev(id);
855         if (!xsegbd_dev) {
856                 ret = -ENOENT;
857                 goto out_unlock;
858         }
859
860         xsegbd_bus_del_dev(xsegbd_dev);
861
862 out_unlock:
863         mutex_unlock(&xsegbd_mutex);
864         return ret;
865 }
866
867 static struct bus_attribute xsegbd_bus_attrs[] = {
868         __ATTR(add, S_IWUSR, NULL, xsegbd_add),
869         __ATTR(remove, S_IWUSR, NULL, xsegbd_remove),
870         __ATTR_NULL
871 };
872
873 static int xsegbd_sysfs_init(void)
874 {
875         int ret;
876
877         ret = device_register(&xsegbd_root_dev);
878         if (ret < 0)
879                 return ret;
880
881         xsegbd_bus_type.bus_attrs = xsegbd_bus_attrs;
882         ret = bus_register(&xsegbd_bus_type);
883         if (ret < 0)
884                 device_unregister(&xsegbd_root_dev);
885
886         return ret;
887 }
888
889 static void xsegbd_sysfs_cleanup(void)
890 {
891         bus_unregister(&xsegbd_bus_type);
892         device_unregister(&xsegbd_root_dev);
893 }
894
895 /* *************************** */
896 /* ** Module Initialization ** */
897 /* *************************** */
898
899 static int __init xsegbd_init(void)
900 {
901         int ret = -ENOMEM;
902
903         if (!xq_alloc_seq(&blk_queue_pending, max_nr_pending, max_nr_pending))
904                 goto out;
905
906         blk_req_pending = kzalloc(sizeof(struct pending) * max_nr_pending, GFP_KERNEL);
907         if (!blk_req_pending)
908                 goto out_queue;
909
910         ret = -ENOSYS;
911         ret = xsegbd_xseg_init();
912         if (ret)
913                 goto out_pending;
914
915         ret = xsegbd_sysfs_init();
916         if (ret)
917                 goto out_xseg;
918
919         XSEGLOG("initialization complete");
920
921 out:
922         return ret;
923
924 out_xseg:
925         xsegbd_xseg_quit();
926 out_pending:
927         kfree(blk_req_pending);
928 out_queue:
929         xq_free(&blk_queue_pending);
930         goto out;
931 }
932
933 static void __exit xsegbd_exit(void)
934 {
935         xsegbd_sysfs_cleanup();
936         xsegbd_xseg_quit();
937 }
938
939 module_init(xsegbd_init);
940 module_exit(xsegbd_exit);
941