Various fixes to make xseg functional again
[archipelago] / xseg / peers / kernel / xsegbd.c
1 /* xsegbd.c
2  *
3  */
4
5 #include <linux/module.h>
6 #include <linux/moduleparam.h>
7 #include <linux/init.h>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/fs.h>
12 #include <linux/errno.h>
13 #include <linux/timer.h>
14 #include <linux/types.h>
15 #include <linux/vmalloc.h>
16 #include <linux/genhd.h>
17 #include <linux/blkdev.h>
18 #include <linux/bio.h>
19 #include <linux/device.h>
20 #include <linux/completion.h>
21
22 #include <sys/kernel/segdev.h>
23 #include "xsegbd.h"
24
25 #define XSEGBD_MINORS 1
26
27 MODULE_DESCRIPTION("xsegbd");
28 MODULE_AUTHOR("XSEG");
29 MODULE_LICENSE("GPL");
30
31 static long sector_size = 0;
32 static long blksize = 512;
33 static int major = 0;
34 static int max_nr_pending = 1024;
35 static char name[XSEGBD_SEGMENT_NAMELEN] = "xsegbd";
36 static char spec[256] = "segdev:xsegbd:4:512:64:1024:12";
37
38 module_param(sector_size, long, 0644);
39 module_param(blksize, long, 0644);
40 module_param(max_nr_pending, int, 0644);
41 module_param(major, int, 0644);
42 module_param_string(name, name, sizeof(name), 0644);
43 module_param_string(spec, spec, sizeof(spec), 0644);
44
45 struct pending {
46         struct request *request;
47         struct completion *comp;
48         struct xsegbd_device *dev;
49 };
50
51 static struct xq blk_queue_pending;
52 static struct pending *blk_req_pending;
53 static unsigned int nr_pending;
54 static spinlock_t __lock;
55 static struct xsegbd xsegbd;
56 static DEFINE_MUTEX(xsegbd_mutex);
57 static LIST_HEAD(xsegbd_dev_list);
58
59 /* ************************* */
60 /* ***** sysfs helpers ***** */
61 /* ************************* */
62
63 static struct xsegbd_device *dev_to_xsegbd(struct device *dev)
64 {
65         return container_of(dev, struct xsegbd_device, dev);
66 }
67
68 static struct device *xsegbd_get_dev(struct xsegbd_device *xsegbd_dev)
69 {
70         /* FIXME */
71         return get_device(&xsegbd_dev->dev);
72 }
73
74 static void xsegbd_put_dev(struct xsegbd_device *xsegbd_dev)
75 {
76         put_device(&xsegbd_dev->dev);
77 }
78
79 /* ************************* */
80 /* ** XSEG Initialization ** */
81 /* ************************* */
82
83 static void xseg_callback(struct xseg *xseg, uint32_t portno);
84
85 int xsegbd_xseg_init(void)
86 {
87         int r;
88
89         if (!xsegbd.name[0])
90                 strncpy(xsegbd.name, name, XSEGBD_SEGMENT_NAMELEN);
91
92         r = xseg_initialize();
93         if (r) {
94                 XSEGLOG("cannot initialize 'segdev' peer");
95                 goto err;
96         }
97
98         r = xseg_parse_spec(spec, &xsegbd.config);
99         if (r)
100                 goto err;
101
102         if (strncmp(xsegbd.config.type, "segdev", 16))
103                 XSEGLOG("WARNING: unexpected segment type '%s' vs 'segdev'",
104                          xsegbd.config.type);
105
106         XSEGLOG("joining segment");
107         xsegbd.xseg = xseg_join(        xsegbd.config.type,
108                                         xsegbd.config.name,
109                                         "segdev",
110                                         xseg_callback           );
111         if (!xsegbd.xseg) {
112                 XSEGLOG("cannot find segment");
113                 r = -ENODEV;
114                 goto err;
115         }
116
117         return 0;
118 err:
119         return r;
120
121 }
122
123 int xsegbd_xseg_quit(void)
124 {
125         struct segdev *segdev;
126
127         /* make sure to unmap the segment first */
128         segdev = segdev_get(0);
129         clear_bit(SEGDEV_RESERVED, &segdev->flags);
130         xsegbd.xseg->priv->segment_type.ops.unmap(xsegbd.xseg, xsegbd.xseg->segment_size);
131         segdev_put(segdev);
132
133         return 0;
134 }
135
136
137 /* ***************************** */
138 /* ** Block Device Operations ** */
139 /* ***************************** */
140
141 static int xsegbd_open(struct block_device *bdev, fmode_t mode)
142 {
143         struct gendisk *disk = bdev->bd_disk;
144         struct xsegbd_device *xsegbd_dev = disk->private_data;
145
146         xsegbd_get_dev(xsegbd_dev);
147
148         return 0;
149 }
150
151 static int xsegbd_release(struct gendisk *gd, fmode_t mode)
152 {
153         struct xsegbd_device *xsegbd_dev = gd->private_data;
154
155         xsegbd_put_dev(xsegbd_dev);
156
157         return 0;
158 }
159
160 static int xsegbd_ioctl(struct block_device *bdev, fmode_t mode,
161                         unsigned int cmd, unsigned long arg)
162 {
163         return -ENOTTY;
164 }
165
166 static const struct block_device_operations xsegbd_ops = {
167         .owner          = THIS_MODULE,
168         .open           = xsegbd_open,
169         .release        = xsegbd_release,
170         .ioctl          = xsegbd_ioctl 
171 };
172
173
174 /* *************************** */
175 /* ** Device Initialization ** */
176 /* *************************** */
177
178 static void xseg_request_fn(struct request_queue *rq);
179 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev);
180
181 static int xsegbd_dev_init(struct xsegbd_device *xsegbd_dev)
182 {
183         int ret = -ENOMEM;
184         struct gendisk *disk;
185         unsigned int max_request_size_bytes;
186
187         spin_lock_init(&xsegbd_dev->lock);
188
189         xsegbd_dev->xsegbd = &xsegbd;
190
191         xsegbd_dev->blk_queue = blk_alloc_queue(GFP_KERNEL);
192         if (!xsegbd_dev->blk_queue)
193                 goto out;
194
195         blk_init_allocated_queue(xsegbd_dev->blk_queue, xseg_request_fn, &xsegbd_dev->lock);
196         xsegbd_dev->blk_queue->queuedata = xsegbd_dev;
197
198         blk_queue_flush(xsegbd_dev->blk_queue, REQ_FLUSH | REQ_FUA);
199         blk_queue_logical_block_size(xsegbd_dev->blk_queue, 512);
200         blk_queue_physical_block_size(xsegbd_dev->blk_queue, blksize);
201         blk_queue_bounce_limit(xsegbd_dev->blk_queue, BLK_BOUNCE_ANY);
202         
203         //blk_queue_max_segments(dev->blk_queue, 512);
204         /* calculate maximum block request size
205          * request size in pages * page_size
206          * leave one page in buffer for name
207          */
208         max_request_size_bytes =
209                  (unsigned int) (xsegbd.config.request_size - 1) *
210                                 ( 1 << xsegbd.config.page_shift) ;
211         blk_queue_max_hw_sectors(xsegbd_dev->blk_queue, max_request_size_bytes >> 9);
212         blk_queue_max_segment_size(xsegbd_dev->blk_queue, max_request_size_bytes);
213         blk_queue_io_min(xsegbd_dev->blk_queue, max_request_size_bytes);
214         blk_queue_io_opt(xsegbd_dev->blk_queue, max_request_size_bytes);
215
216         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xsegbd_dev->blk_queue);
217
218         /* vkoukis says we don't need partitions */
219         xsegbd_dev->gd = disk = alloc_disk(1);
220         if (!disk)
221                 goto out_disk;
222
223         disk->major = xsegbd_dev->major;
224         disk->first_minor = 0; // id * XSEGBD_MINORS;
225         disk->fops = &xsegbd_ops;
226         disk->queue = xsegbd_dev->blk_queue;
227         disk->private_data = xsegbd_dev;
228         disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
229         snprintf(disk->disk_name, 32, "xsegbd%u", xsegbd_dev->id);
230
231         ret = 0;
232         spin_lock_irq(&__lock);
233         if (nr_pending + xsegbd_dev->nr_requests > max_nr_pending)
234                 ret = -ENOBUFS;
235         else
236                 nr_pending += xsegbd_dev->nr_requests;
237         spin_unlock_irq(&__lock);
238
239         if (ret)
240                 goto out_disk;
241
242         /* allow a non-zero sector_size parameter to override the disk size */
243         if (sector_size)
244                 xsegbd_dev->sectors = sector_size;
245         else {
246                 ret = xsegbd_get_size(xsegbd_dev);
247                 if (ret)
248                         goto out_disk;
249         }
250
251         
252         set_capacity(disk, xsegbd_dev->sectors);
253         XSEGLOG("xsegbd active...");
254         add_disk(disk); /* immediately activates the device */
255
256         return 0;
257
258 out_disk:
259         put_disk(disk);
260 out:
261         return ret;
262 }
263
264 static void xsegbd_dev_release(struct device *dev)
265 {
266         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
267         struct xseg_port *port;
268
269         /* cleanup gendisk and blk_queue the right way */
270         if (xsegbd_dev->gd) {
271                 if (xsegbd_dev->gd->flags & GENHD_FL_UP)
272                         del_gendisk(xsegbd_dev->gd);
273
274                 blk_cleanup_queue(xsegbd_dev->blk_queue);
275                 put_disk(xsegbd_dev->gd);
276         }
277
278         /* reset the port's waitcue (aka cancel_wait) */
279         port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
280         port->waitcue = (long) NULL;
281
282         xseg_free_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests);
283
284         WARN_ON(nr_pending < xsegbd_dev->nr_requests);
285         spin_lock_irq(&__lock);
286         nr_pending -= xsegbd_dev->nr_requests;
287         spin_unlock_irq(&__lock);
288
289         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
290
291         kfree(xsegbd_dev);
292
293         module_put(THIS_MODULE);
294 }
295
296 /* ******************* */
297 /* ** Critical Path ** */
298 /* ******************* */
299
300 static void blk_to_xseg(struct xseg *xseg, struct xseg_request *xreq,
301                         struct request *blkreq)
302 {
303         struct bio_vec *bvec;
304         struct req_iterator iter;
305         uint64_t off = 0;
306         char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
307         rq_for_each_segment(bvec, blkreq, iter) {
308                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
309                 memcpy(data + off, bdata, bvec->bv_len);
310                 off += bvec->bv_len;
311                 kunmap_atomic(bdata);
312         }
313 }
314
315 static void xseg_to_blk(struct xseg *xseg, struct xseg_request *xreq,
316                         struct request *blkreq)
317 {
318         struct bio_vec *bvec;
319         struct req_iterator iter;
320         uint64_t off = 0;
321         char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
322         rq_for_each_segment(bvec, blkreq, iter) {
323                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
324                 memcpy(bdata, data + off, bvec->bv_len);
325                 off += bvec->bv_len;
326                 kunmap_atomic(bdata);
327         }
328 }
329
330 static void xseg_request_fn(struct request_queue *rq)
331 {
332         struct xseg_request *xreq;
333         struct xsegbd_device *xsegbd_dev = rq->queuedata;
334         struct request *blkreq;
335         struct pending *pending;
336         xqindex blkreq_idx;
337         char *name;
338         uint64_t datasize;
339
340         for (;;) {
341                 xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
342                 if (!xreq)
343                         break;
344
345                 blkreq = blk_fetch_request(rq);
346                 if (!blkreq)
347                         break;
348
349                 if (blkreq->cmd_type != REQ_TYPE_FS) {
350                         XSEGLOG("non-fs cmd_type: %u. *shrug*", blkreq->cmd_type);
351                         __blk_end_request_all(blkreq, 0);
352                 }
353
354
355                 datasize = blk_rq_bytes(blkreq);
356                 BUG_ON(xreq->buffersize - xsegbd_dev->namesize < datasize);
357                 BUG_ON(xseg_prep_request(xreq, xsegbd_dev->namesize, datasize));
358
359                 name = XSEG_TAKE_PTR(xreq->name, xsegbd.xseg->segment);
360                 strncpy(name, xsegbd_dev->name, xsegbd_dev->namesize);
361                 blkreq_idx = xq_pop_head(&blk_queue_pending);
362                 BUG_ON(blkreq_idx == None);
363                 pending = &blk_req_pending[blkreq_idx];
364                 pending->dev = xsegbd_dev;
365                 pending->request = blkreq;
366                 pending->comp = NULL;
367                 xreq->priv = (uint64_t)blkreq_idx;
368                 xreq->size = datasize;
369                 xreq->offset = blk_rq_pos(blkreq) << 9;
370                 /*
371                 if (xreq->offset >= (sector_size << 9))
372                         XSEGLOG("sector offset: %lu > %lu, flush:%u, fua:%u",
373                                  blk_rq_pos(blkreq), sector_size,
374                                  blkreq->cmd_flags & REQ_FLUSH,
375                                  blkreq->cmd_flags & REQ_FUA);
376                 */
377
378                 if (blkreq->cmd_flags & REQ_FLUSH)
379                         xreq->flags |= XF_FLUSH;
380
381                 if (blkreq->cmd_flags & REQ_FUA)
382                         xreq->flags |= XF_FUA;
383
384                 if (rq_data_dir(blkreq)) {
385                         /* unlock for data transfers? */
386                         blk_to_xseg(xsegbd.xseg, xreq, blkreq);
387                         xreq->op = X_WRITE;
388                 } else {
389                         xreq->op = X_READ;
390                 }
391
392                 BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
393         }
394
395         /* TODO:
396          * This is going to happen at least once.
397          * Add a WARN_ON when debugging find out why it happens more than once.
398          */
399         xseg_signal(xsegbd_dev->xsegbd->xseg, xsegbd_dev->dst_portno);
400         if (xreq)
401                 xseg_put_request(xsegbd_dev->xsegbd->xseg, xsegbd_dev->src_portno, xreq);
402 }
403
404 int update_dev_sectors_from_request(    struct xsegbd_device *xsegbd_dev,
405                                         struct xseg_request *xreq       )
406 {
407         void *data;
408
409         if (xreq->state & XS_FAILED)
410                 return -ENOENT;
411
412         if (!(xreq->state & XS_SERVED))
413                 return -EIO;
414
415         data = XSEG_TAKE_PTR(xreq->data, xsegbd.xseg->segment);
416         xsegbd_dev->sectors = *((uint64_t *) data) / 512ULL;
417         return 0;
418 }
419
420 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
421 {
422         struct xseg_request *xreq;
423         struct xseg_port *port;
424         char *name;
425         uint64_t datasize;
426         xqindex blkreq_idx;
427         struct pending *pending;
428         struct completion comp;
429         int ret = -EBUSY;
430
431         xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
432         if (!xreq)
433                 goto out;
434
435         datasize = sizeof(uint64_t);
436         BUG_ON(xreq->buffersize - xsegbd_dev->namesize < datasize);
437         BUG_ON(xseg_prep_request(xreq, xsegbd_dev->namesize, datasize));
438
439         init_completion(&comp);
440         blkreq_idx = xq_pop_head(&blk_queue_pending);
441         BUG_ON(blkreq_idx == None);
442         pending = &blk_req_pending[blkreq_idx];
443         pending->dev = xsegbd_dev;
444         pending->request = NULL;
445         pending->comp = &comp;
446         xreq->priv = (uint64_t)blkreq_idx;
447
448         name = XSEG_TAKE_PTR(xreq->name, xsegbd.xseg->segment);
449         strncpy(name, xsegbd_dev->name, xsegbd_dev->namesize);
450         xreq->size = datasize;
451         xreq->offset = 0;
452
453         xreq->op = X_INFO;
454
455         port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
456         port->waitcue = (uint64_t)(long)xsegbd_dev;
457
458         BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
459         xseg_signal(xsegbd.xseg, xsegbd_dev->dst_portno);
460
461         wait_for_completion_interruptible(&comp);
462         XSEGLOG("Woken up after wait_for_completion_interruptible()\n");
463         ret = update_dev_sectors_from_request(xsegbd_dev, xreq);
464         XSEGLOG("get_size: sectors = %ld\n", (long)xsegbd_dev->sectors);
465 out:
466         xseg_put_request(xsegbd.xseg, xsegbd_dev->src_portno, xreq);
467         return ret;
468 }
469
470 static void xseg_callback(struct xseg *xseg, uint32_t portno)
471 {
472         struct xsegbd_device *xsegbd_dev = NULL, *old_dev = NULL;
473         struct xseg_request *xreq;
474         struct request *blkreq;
475         struct pending *pending;
476         unsigned long flags;
477         uint32_t blkreq_idx;
478         int err;
479
480         for (;;) {
481                 xreq = xseg_receive(xseg, portno);
482                 if (!xreq)
483                         break;
484
485                 /* we rely upon our peers to not have touched ->priv */
486                 blkreq_idx = (uint64_t)xreq->priv;
487                 if (blkreq_idx >= max_nr_pending) {
488                         WARN_ON(1);
489                         continue;
490                 }
491
492                 pending = &blk_req_pending[blkreq_idx];
493                 if (pending->comp) {
494                         /* someone is blocking on this request
495                            and will handle it when we wake them up. */
496                         complete(pending->comp);
497                         /* the request is blocker's responsibility so
498                            we will not put_request(); */
499                         continue;
500                 }
501
502                 /* this is now treated as a block I/O request to end */
503                 blkreq = pending->request;
504                 pending->request = NULL;
505                 xsegbd_dev = pending->dev;
506                 pending->dev = NULL;
507                 WARN_ON(!blkreq);
508
509                 if ((xsegbd_dev != old_dev) && old_dev) {
510                         spin_lock_irqsave(&old_dev->lock, flags);
511                         xseg_request_fn(old_dev->blk_queue);
512                         spin_unlock_irqrestore(&old_dev->lock, flags);
513                 }
514
515                 old_dev = xsegbd_dev;
516
517                 if (!(xreq->state & XS_SERVED))
518                         goto blk_end;
519
520                 if (xreq->serviced != blk_rq_bytes(blkreq))
521                         goto blk_end;
522
523                 /* unlock for data transfer? */
524                 if (!rq_data_dir(blkreq))
525                         xseg_to_blk(xseg, xreq, blkreq);
526
527                 err = 0;
528 blk_end:
529                 blk_end_request_all(blkreq, err);
530                 xq_append_head(&blk_queue_pending, blkreq_idx);
531                 xseg_put_request(xseg, xreq->portno, xreq);
532         }
533
534         if (xsegbd_dev) {
535                 spin_lock_irqsave(&xsegbd_dev->lock, flags);
536                 xseg_request_fn(xsegbd_dev->blk_queue);
537                 spin_unlock_irqrestore(&xsegbd_dev->lock, flags);
538         }
539 }
540
541
542 /* sysfs interface */
543
544 static struct bus_type xsegbd_bus_type = {
545         .name   = "xsegbd",
546 };
547
548 static ssize_t xsegbd_size_show(struct device *dev,
549                                         struct device_attribute *attr, char *buf)
550 {
551         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
552
553         return sprintf(buf, "%llu\n", (unsigned long long) xsegbd_dev->sectors * 512ULL);
554 }
555
556 static ssize_t xsegbd_major_show(struct device *dev,
557                                         struct device_attribute *attr, char *buf)
558 {
559         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
560
561         return sprintf(buf, "%d\n", xsegbd_dev->major);
562 }
563
564 static ssize_t xsegbd_srcport_show(struct device *dev,
565                                         struct device_attribute *attr, char *buf)
566 {
567         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
568
569         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->src_portno);
570 }
571
572 static ssize_t xsegbd_dstport_show(struct device *dev,
573                                         struct device_attribute *attr, char *buf)
574 {
575         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
576
577         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->dst_portno);
578 }
579
580 static ssize_t xsegbd_id_show(struct device *dev,
581                                         struct device_attribute *attr, char *buf)
582 {
583         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
584
585         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->id);
586 }
587
588 static ssize_t xsegbd_reqs_show(struct device *dev,
589                                         struct device_attribute *attr, char *buf)
590 {
591         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
592
593         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->nr_requests);
594 }
595
596 static ssize_t xsegbd_name_show(struct device *dev,
597                                         struct device_attribute *attr, char *buf)
598 {
599         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
600
601         return sprintf(buf, "%s\n", xsegbd_dev->name);
602 }
603
604 static DEVICE_ATTR(size, S_IRUGO, xsegbd_size_show, NULL);
605 static DEVICE_ATTR(major, S_IRUGO, xsegbd_major_show, NULL);
606 static DEVICE_ATTR(srcport, S_IRUGO, xsegbd_srcport_show, NULL);
607 static DEVICE_ATTR(dstport, S_IRUGO, xsegbd_dstport_show, NULL);
608 static DEVICE_ATTR(id , S_IRUGO, xsegbd_id_show, NULL);
609 static DEVICE_ATTR(reqs , S_IRUGO, xsegbd_reqs_show, NULL);
610 static DEVICE_ATTR(name , S_IRUGO, xsegbd_name_show, NULL);
611
612 static struct attribute *xsegbd_attrs[] = {
613         &dev_attr_size.attr,
614         &dev_attr_major.attr,
615         &dev_attr_srcport.attr,
616         &dev_attr_dstport.attr,
617         &dev_attr_id.attr,
618         &dev_attr_reqs.attr,
619         &dev_attr_name.attr,
620         NULL
621 };
622
623 static struct attribute_group xsegbd_attr_group = {
624         .attrs = xsegbd_attrs,
625 };
626
627 static const struct attribute_group *xsegbd_attr_groups[] = {
628         &xsegbd_attr_group,
629         NULL
630 };
631
632 static void xsegbd_sysfs_dev_release(struct device *dev)
633 {
634 }
635
636 static struct device_type xsegbd_device_type = {
637         .name           = "xsegbd",
638         .groups         = xsegbd_attr_groups,
639         .release        = xsegbd_sysfs_dev_release,
640 };
641
642 static void xsegbd_root_dev_release(struct device *dev)
643 {
644 }
645
646 static struct device xsegbd_root_dev = {
647         .init_name      = "xsegbd",
648         .release        = xsegbd_root_dev_release,
649 };
650
651 static int xsegbd_bus_add_dev(struct xsegbd_device *xsegbd_dev)
652 {
653         int ret = -ENOMEM;
654         struct device *dev;
655
656         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
657         dev = &xsegbd_dev->dev;
658
659         dev->bus = &xsegbd_bus_type;
660         dev->type = &xsegbd_device_type;
661         dev->parent = &xsegbd_root_dev;
662         dev->release = xsegbd_dev_release;
663         dev_set_name(dev, "%d", xsegbd_dev->id);
664
665         ret = device_register(dev);
666
667         mutex_unlock(&xsegbd_mutex);
668         return ret;
669 }
670
671 static void xsegbd_bus_del_dev(struct xsegbd_device *xsegbd_dev)
672 {
673         device_unregister(&xsegbd_dev->dev);
674 }
675
676 static ssize_t xsegbd_add(struct bus_type *bus, const char *buf, size_t count)
677 {
678         struct xsegbd_device *xsegbd_dev;
679         struct xseg_port *xport;
680         ssize_t ret = -ENOMEM;
681         int new_id = 0;
682         struct list_head *tmp;
683
684         if (!try_module_get(THIS_MODULE))
685                 return -ENODEV;
686
687         xsegbd_dev = kzalloc(sizeof(*xsegbd_dev), GFP_KERNEL);
688         if (!xsegbd_dev)
689                 goto out;
690
691         spin_lock_init(&xsegbd_dev->lock);
692         INIT_LIST_HEAD(&xsegbd_dev->node);
693
694         /* parse cmd */
695         if (sscanf(buf, "%" __stringify(XSEGBD_TARGET_NAMELEN) "s "
696                         "%d:%d:%d", xsegbd_dev->name, &xsegbd_dev->src_portno,
697                         &xsegbd_dev->dst_portno, &xsegbd_dev->nr_requests) < 3) {
698                 ret = -EINVAL;
699                 goto out_dev;
700         }
701         xsegbd_dev->namesize = strlen(xsegbd_dev->name);
702
703         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
704
705         list_for_each(tmp, &xsegbd_dev_list) {
706                 struct xsegbd_device *entry;
707
708                 entry = list_entry(tmp, struct xsegbd_device, node);
709
710                 if (entry->src_portno == xsegbd_dev->src_portno) {
711                         ret = -EINVAL;
712                         goto out_unlock;
713                 }
714
715                 if (entry->id >= new_id)
716                         new_id = entry->id + 1;
717         }
718
719         xsegbd_dev->id = new_id;
720
721         list_add_tail(&xsegbd_dev->node, &xsegbd_dev_list);
722
723         mutex_unlock(&xsegbd_mutex);
724
725         XSEGLOG("registering block device major %d", major);
726         ret = register_blkdev(major, XSEGBD_NAME);
727         if (ret < 0) {
728                 XSEGLOG("cannot register block device!");
729                 ret = -EBUSY;
730                 goto out_delentry;
731         }
732         xsegbd_dev->major = ret;
733         XSEGLOG("registered block device major %d", xsegbd_dev->major);
734
735         ret = xsegbd_bus_add_dev(xsegbd_dev);
736         if (ret)
737                 goto out_blkdev;
738
739         XSEGLOG("binding to source port %u (destination %u)",
740                         xsegbd_dev->src_portno, xsegbd_dev->dst_portno);
741         xport = xseg_bind_port(xsegbd.xseg, xsegbd_dev->src_portno);
742         if (!xport) {
743                 XSEGLOG("cannot bind to port");
744                 ret = -EFAULT;
745
746                 goto out_bus;
747         }
748         /* make sure we don't get any requests until we're ready to handle them */
749         xport->waitcue = (long) NULL;
750
751         XSEGLOG("allocating %u requests", xsegbd_dev->nr_requests);
752         if (xseg_alloc_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests)) {
753                 XSEGLOG("cannot allocate requests");
754                 ret = -EFAULT;
755
756                 goto out_bus;
757         }
758
759         ret = xsegbd_dev_init(xsegbd_dev);
760         if (ret)
761                 goto out_bus;
762
763         return count;
764
765 out_bus:
766         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
767
768         list_del_init(&xsegbd_dev->node);
769         xsegbd_bus_del_dev(xsegbd_dev);
770
771         mutex_unlock(&xsegbd_mutex);
772
773         return ret;
774
775 out_blkdev:
776         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
777
778 out_delentry:
779         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
780         list_del_init(&xsegbd_dev->node);
781
782 out_unlock:
783         mutex_unlock(&xsegbd_mutex);
784
785 out_dev:
786         kfree(xsegbd_dev);
787
788 out:
789         return ret;
790 }
791
792 static struct xsegbd_device *__xsegbd_get_dev(unsigned long id)
793 {
794         struct list_head *tmp;
795         struct xsegbd_device *xsegbd_dev;
796
797         list_for_each(tmp, &xsegbd_dev_list) {
798                 xsegbd_dev = list_entry(tmp, struct xsegbd_device, node);
799                 if (xsegbd_dev->id == id)
800                         return xsegbd_dev;
801
802         }
803
804         return NULL;
805 }
806
807 static ssize_t xsegbd_remove(struct bus_type *bus, const char *buf, size_t count)
808 {
809         struct xsegbd_device *xsegbd_dev = NULL;
810         int id, ret;
811         unsigned long ul_id;
812
813         ret = kstrtoul(buf, 10, &ul_id);
814         if (ret)
815                 return ret;
816
817         id = (int) ul_id;
818         if (id != ul_id)
819                 return -EINVAL;
820
821         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
822
823         ret = count;
824         xsegbd_dev = __xsegbd_get_dev(id);
825         if (!xsegbd_dev) {
826                 ret = -ENOENT;
827                 goto out_unlock;
828         }
829
830         list_del_init(&xsegbd_dev->node);
831
832         xsegbd_bus_del_dev(xsegbd_dev);
833
834 out_unlock:
835         mutex_unlock(&xsegbd_mutex);
836         return ret;
837 }
838
839 static struct bus_attribute xsegbd_bus_attrs[] = {
840         __ATTR(add, S_IWUSR, NULL, xsegbd_add),
841         __ATTR(remove, S_IWUSR, NULL, xsegbd_remove),
842         __ATTR_NULL
843 };
844
845 static int xsegbd_sysfs_init(void)
846 {
847         int ret;
848
849         xsegbd_bus_type.bus_attrs = xsegbd_bus_attrs;
850
851         ret = bus_register(&xsegbd_bus_type);
852         if (ret < 0)
853                 return ret;
854
855         ret = device_register(&xsegbd_root_dev);
856
857         return ret;
858 }
859
860 static void xsegbd_sysfs_cleanup(void)
861 {
862         device_unregister(&xsegbd_root_dev);
863         bus_unregister(&xsegbd_bus_type);
864 }
865
866 /* *************************** */
867 /* ** Module Initialization ** */
868 /* *************************** */
869
870 static int __init xsegbd_init(void)
871 {
872         int ret = -ENOMEM;
873
874         if (!xq_alloc_seq(&blk_queue_pending, max_nr_pending, max_nr_pending))
875                 goto out;
876
877         blk_req_pending = kzalloc(sizeof(struct pending) * max_nr_pending, GFP_KERNEL);
878         if (!blk_req_pending)
879                 goto out_queue;
880
881         ret = -ENOSYS;
882         ret = xsegbd_xseg_init();
883         if (ret)
884                 goto out_pending;
885
886         ret = xsegbd_sysfs_init();
887         if (ret)
888                 goto out_xseg;
889
890         XSEGLOG("initialization complete");
891
892 out:
893         return ret;
894
895 out_xseg:
896         xsegbd_xseg_quit();
897 out_pending:
898         kfree(blk_req_pending);
899 out_queue:
900         xq_free(&blk_queue_pending);
901         goto out;
902 }
903
904 static void __exit xsegbd_exit(void)
905 {
906         xsegbd_sysfs_cleanup();
907         xsegbd_xseg_quit();
908 }
909
910 module_init(xsegbd_init);
911 module_exit(xsegbd_exit);
912