xsegbd: Make sysfs add/remove more robust
[archipelago] / xseg / peers / kernel / xsegbd.c
1 /* xsegbd.c
2  *
3  */
4
5 #include <linux/module.h>
6 #include <linux/moduleparam.h>
7 #include <linux/init.h>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/fs.h>
12 #include <linux/errno.h>
13 #include <linux/timer.h>
14 #include <linux/types.h>
15 #include <linux/vmalloc.h>
16 #include <linux/genhd.h>
17 #include <linux/blkdev.h>
18 #include <linux/bio.h>
19 #include <linux/device.h>
20 #include <linux/completion.h>
21
22 #include <sys/kernel/segdev.h>
23 #include "xsegbd.h"
24
25 #define XSEGBD_MINORS 1
26
27 MODULE_DESCRIPTION("xsegbd");
28 MODULE_AUTHOR("XSEG");
29 MODULE_LICENSE("GPL");
30
31 static long sector_size = 0;
32 static long blksize = 512;
33 static int major = 0;
34 static int max_nr_pending = 1024;
35 static char name[XSEGBD_SEGMENT_NAMELEN] = "xsegbd";
36 static char spec[256] = "segdev:xsegbd:4:512:64:1024:12";
37
38 module_param(sector_size, long, 0644);
39 module_param(blksize, long, 0644);
40 module_param(max_nr_pending, int, 0644);
41 module_param(major, int, 0644);
42 module_param_string(name, name, sizeof(name), 0644);
43 module_param_string(spec, spec, sizeof(spec), 0644);
44
45 struct pending {
46         struct request *request;
47         struct completion *comp;
48         struct xsegbd_device *dev;
49 };
50
51 static struct xq blk_queue_pending;
52 static struct pending *blk_req_pending;
53 static unsigned int nr_pending;
54 static spinlock_t __lock;
55 static struct xsegbd xsegbd;
56 static DEFINE_MUTEX(xsegbd_mutex);
57 static LIST_HEAD(xsegbd_dev_list);
58 static DEFINE_SPINLOCK(xsegbd_dev_list_lock);
59
60 /* ************************* */
61 /* ***** sysfs helpers ***** */
62 /* ************************* */
63
64 static struct xsegbd_device *dev_to_xsegbd(struct device *dev)
65 {
66         return container_of(dev, struct xsegbd_device, dev);
67 }
68
69 static struct device *xsegbd_get_dev(struct xsegbd_device *xsegbd_dev)
70 {
71         /* FIXME */
72         return get_device(&xsegbd_dev->dev);
73 }
74
75 static void xsegbd_put_dev(struct xsegbd_device *xsegbd_dev)
76 {
77         put_device(&xsegbd_dev->dev);
78 }
79
80 /* ************************* */
81 /* ** XSEG Initialization ** */
82 /* ************************* */
83
84 static void xseg_callback(struct xseg *xseg, uint32_t portno);
85
86 int xsegbd_xseg_init(void)
87 {
88         int r;
89
90         if (!xsegbd.name[0])
91                 strncpy(xsegbd.name, name, XSEGBD_SEGMENT_NAMELEN);
92
93         r = xseg_initialize();
94         if (r) {
95                 XSEGLOG("cannot initialize 'segdev' peer");
96                 goto err;
97         }
98
99         r = xseg_parse_spec(spec, &xsegbd.config);
100         if (r)
101                 goto err;
102
103         if (strncmp(xsegbd.config.type, "segdev", 16))
104                 XSEGLOG("WARNING: unexpected segment type '%s' vs 'segdev'",
105                          xsegbd.config.type);
106
107         XSEGLOG("joining segment");
108         xsegbd.xseg = xseg_join(        xsegbd.config.type,
109                                         xsegbd.config.name,
110                                         "segdev",
111                                         xseg_callback           );
112         if (!xsegbd.xseg) {
113                 XSEGLOG("cannot find segment");
114                 r = -ENODEV;
115                 goto err;
116         }
117
118         return 0;
119 err:
120         return r;
121
122 }
123
124 int xsegbd_xseg_quit(void)
125 {
126         struct segdev *segdev;
127
128         /* make sure to unmap the segment first */
129         segdev = segdev_get(0);
130         clear_bit(SEGDEV_RESERVED, &segdev->flags);
131         xsegbd.xseg->priv->segment_type.ops.unmap(xsegbd.xseg, xsegbd.xseg->segment_size);
132         segdev_put(segdev);
133
134         return 0;
135 }
136
137
138 /* ***************************** */
139 /* ** Block Device Operations ** */
140 /* ***************************** */
141
142 static int xsegbd_open(struct block_device *bdev, fmode_t mode)
143 {
144         struct gendisk *disk = bdev->bd_disk;
145         struct xsegbd_device *xsegbd_dev = disk->private_data;
146
147         xsegbd_get_dev(xsegbd_dev);
148
149         return 0;
150 }
151
152 static int xsegbd_release(struct gendisk *gd, fmode_t mode)
153 {
154         struct xsegbd_device *xsegbd_dev = gd->private_data;
155
156         xsegbd_put_dev(xsegbd_dev);
157
158         return 0;
159 }
160
161 static int xsegbd_ioctl(struct block_device *bdev, fmode_t mode,
162                         unsigned int cmd, unsigned long arg)
163 {
164         return -ENOTTY;
165 }
166
167 static const struct block_device_operations xsegbd_ops = {
168         .owner          = THIS_MODULE,
169         .open           = xsegbd_open,
170         .release        = xsegbd_release,
171         .ioctl          = xsegbd_ioctl 
172 };
173
174
175 /* *************************** */
176 /* ** Device Initialization ** */
177 /* *************************** */
178
179 static void xseg_request_fn(struct request_queue *rq);
180 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev);
181
182 static int xsegbd_dev_init(struct xsegbd_device *xsegbd_dev)
183 {
184         int ret = -ENOMEM;
185         struct gendisk *disk;
186         unsigned int max_request_size_bytes;
187
188         spin_lock_init(&xsegbd_dev->lock);
189
190         xsegbd_dev->xsegbd = &xsegbd;
191
192         xsegbd_dev->blk_queue = blk_alloc_queue(GFP_KERNEL);
193         if (!xsegbd_dev->blk_queue)
194                 goto out;
195
196         blk_init_allocated_queue(xsegbd_dev->blk_queue, xseg_request_fn, &xsegbd_dev->lock);
197         xsegbd_dev->blk_queue->queuedata = xsegbd_dev;
198
199         blk_queue_flush(xsegbd_dev->blk_queue, REQ_FLUSH | REQ_FUA);
200         blk_queue_logical_block_size(xsegbd_dev->blk_queue, 512);
201         blk_queue_physical_block_size(xsegbd_dev->blk_queue, blksize);
202         blk_queue_bounce_limit(xsegbd_dev->blk_queue, BLK_BOUNCE_ANY);
203         
204         //blk_queue_max_segments(dev->blk_queue, 512);
205         /* calculate maximum block request size
206          * request size in pages * page_size
207          * leave one page in buffer for name
208          */
209         max_request_size_bytes =
210                  (unsigned int) (xsegbd.config.request_size - 1) *
211                                 ( 1 << xsegbd.config.page_shift) ;
212         blk_queue_max_hw_sectors(xsegbd_dev->blk_queue, max_request_size_bytes >> 9);
213         blk_queue_max_segment_size(xsegbd_dev->blk_queue, max_request_size_bytes);
214         blk_queue_io_min(xsegbd_dev->blk_queue, max_request_size_bytes);
215         blk_queue_io_opt(xsegbd_dev->blk_queue, max_request_size_bytes);
216
217         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xsegbd_dev->blk_queue);
218
219         /* vkoukis says we don't need partitions */
220         xsegbd_dev->gd = disk = alloc_disk(1);
221         if (!disk)
222                 goto out_disk;
223
224         disk->major = xsegbd_dev->major;
225         disk->first_minor = 0; // id * XSEGBD_MINORS;
226         disk->fops = &xsegbd_ops;
227         disk->queue = xsegbd_dev->blk_queue;
228         disk->private_data = xsegbd_dev;
229         disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
230         snprintf(disk->disk_name, 32, "xsegbd%u", xsegbd_dev->id);
231
232         ret = 0;
233         spin_lock_irq(&__lock);
234         if (nr_pending + xsegbd_dev->nr_requests > max_nr_pending)
235                 ret = -ENOBUFS;
236         else
237                 nr_pending += xsegbd_dev->nr_requests;
238         spin_unlock_irq(&__lock);
239
240         if (ret)
241                 goto out_disk;
242
243         /* allow a non-zero sector_size parameter to override the disk size */
244         if (sector_size)
245                 xsegbd_dev->sectors = sector_size;
246         else {
247                 ret = xsegbd_get_size(xsegbd_dev);
248                 if (ret)
249                         goto out_disk;
250         }
251
252         
253         set_capacity(disk, xsegbd_dev->sectors);
254         XSEGLOG("xsegbd active...");
255         add_disk(disk); /* immediately activates the device */
256
257         return 0;
258
259 out_disk:
260         put_disk(disk);
261 out:
262         return ret;
263 }
264
265 static void xsegbd_dev_release(struct device *dev)
266 {
267         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
268         struct xseg_port *port;
269
270         /* cleanup gendisk and blk_queue the right way */
271         if (xsegbd_dev->gd) {
272                 if (xsegbd_dev->gd->flags & GENHD_FL_UP)
273                         del_gendisk(xsegbd_dev->gd);
274
275                 blk_cleanup_queue(xsegbd_dev->blk_queue);
276                 put_disk(xsegbd_dev->gd);
277         }
278
279         /* reset the port's waitcue (aka cancel_wait) */
280         port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
281         port->waitcue = (long) NULL;
282
283         xseg_free_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests);
284
285         WARN_ON(nr_pending < xsegbd_dev->nr_requests);
286         spin_lock_irq(&__lock);
287         nr_pending -= xsegbd_dev->nr_requests;
288         spin_unlock_irq(&__lock);
289
290         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
291
292         spin_lock(&xsegbd_dev_list_lock);
293         list_del_init(&xsegbd_dev->node);
294         spin_unlock(&xsegbd_dev_list_lock);
295         kfree(xsegbd_dev);
296
297         module_put(THIS_MODULE);
298 }
299
300 /* ******************* */
301 /* ** Critical Path ** */
302 /* ******************* */
303
304 static void blk_to_xseg(struct xseg *xseg, struct xseg_request *xreq,
305                         struct request *blkreq)
306 {
307         struct bio_vec *bvec;
308         struct req_iterator iter;
309         uint64_t off = 0;
310         char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
311         rq_for_each_segment(bvec, blkreq, iter) {
312                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
313                 memcpy(data + off, bdata, bvec->bv_len);
314                 off += bvec->bv_len;
315                 kunmap_atomic(bdata);
316         }
317 }
318
319 static void xseg_to_blk(struct xseg *xseg, struct xseg_request *xreq,
320                         struct request *blkreq)
321 {
322         struct bio_vec *bvec;
323         struct req_iterator iter;
324         uint64_t off = 0;
325         char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
326         rq_for_each_segment(bvec, blkreq, iter) {
327                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
328                 memcpy(bdata, data + off, bvec->bv_len);
329                 off += bvec->bv_len;
330                 kunmap_atomic(bdata);
331         }
332 }
333
334 static void xseg_request_fn(struct request_queue *rq)
335 {
336         struct xseg_request *xreq;
337         struct xsegbd_device *xsegbd_dev = rq->queuedata;
338         struct request *blkreq;
339         struct pending *pending;
340         xqindex blkreq_idx;
341         char *name;
342         uint64_t datasize;
343
344         for (;;) {
345                 xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
346                 if (!xreq)
347                         break;
348
349                 blkreq = blk_fetch_request(rq);
350                 if (!blkreq)
351                         break;
352
353                 if (blkreq->cmd_type != REQ_TYPE_FS) {
354                         XSEGLOG("non-fs cmd_type: %u. *shrug*", blkreq->cmd_type);
355                         __blk_end_request_all(blkreq, 0);
356                 }
357
358
359                 datasize = blk_rq_bytes(blkreq);
360                 BUG_ON(xreq->buffersize - xsegbd_dev->namesize < datasize);
361                 BUG_ON(xseg_prep_request(xreq, xsegbd_dev->namesize, datasize));
362
363                 name = XSEG_TAKE_PTR(xreq->name, xsegbd.xseg->segment);
364                 strncpy(name, xsegbd_dev->name, xsegbd_dev->namesize);
365                 blkreq_idx = xq_pop_head(&blk_queue_pending);
366                 BUG_ON(blkreq_idx == None);
367                 pending = &blk_req_pending[blkreq_idx];
368                 pending->dev = xsegbd_dev;
369                 pending->request = blkreq;
370                 pending->comp = NULL;
371                 xreq->priv = (uint64_t)blkreq_idx;
372                 xreq->size = datasize;
373                 xreq->offset = blk_rq_pos(blkreq) << 9;
374                 /*
375                 if (xreq->offset >= (sector_size << 9))
376                         XSEGLOG("sector offset: %lu > %lu, flush:%u, fua:%u",
377                                  blk_rq_pos(blkreq), sector_size,
378                                  blkreq->cmd_flags & REQ_FLUSH,
379                                  blkreq->cmd_flags & REQ_FUA);
380                 */
381
382                 if (blkreq->cmd_flags & REQ_FLUSH)
383                         xreq->flags |= XF_FLUSH;
384
385                 if (blkreq->cmd_flags & REQ_FUA)
386                         xreq->flags |= XF_FUA;
387
388                 if (rq_data_dir(blkreq)) {
389                         /* unlock for data transfers? */
390                         blk_to_xseg(xsegbd.xseg, xreq, blkreq);
391                         xreq->op = X_WRITE;
392                 } else {
393                         xreq->op = X_READ;
394                 }
395
396                 BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
397         }
398
399         /* TODO:
400          * This is going to happen at least once.
401          * Add a WARN_ON when debugging find out why it happens more than once.
402          */
403         xseg_signal(xsegbd_dev->xsegbd->xseg, xsegbd_dev->dst_portno);
404         if (xreq)
405                 xseg_put_request(xsegbd_dev->xsegbd->xseg, xsegbd_dev->src_portno, xreq);
406 }
407
408 int update_dev_sectors_from_request(    struct xsegbd_device *xsegbd_dev,
409                                         struct xseg_request *xreq       )
410 {
411         void *data;
412
413         if (xreq->state & XS_FAILED)
414                 return -ENOENT;
415
416         if (!(xreq->state & XS_SERVED))
417                 return -EIO;
418
419         data = XSEG_TAKE_PTR(xreq->data, xsegbd.xseg->segment);
420         xsegbd_dev->sectors = *((uint64_t *) data) / 512ULL;
421         return 0;
422 }
423
424 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
425 {
426         struct xseg_request *xreq;
427         struct xseg_port *port;
428         char *name;
429         uint64_t datasize;
430         xqindex blkreq_idx;
431         struct pending *pending;
432         struct completion comp;
433         int ret = -EBUSY;
434
435         xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
436         if (!xreq)
437                 goto out;
438
439         datasize = sizeof(uint64_t);
440         BUG_ON(xreq->buffersize - xsegbd_dev->namesize < datasize);
441         BUG_ON(xseg_prep_request(xreq, xsegbd_dev->namesize, datasize));
442
443         init_completion(&comp);
444         blkreq_idx = xq_pop_head(&blk_queue_pending);
445         BUG_ON(blkreq_idx == None);
446         pending = &blk_req_pending[blkreq_idx];
447         pending->dev = xsegbd_dev;
448         pending->request = NULL;
449         pending->comp = &comp;
450         xreq->priv = (uint64_t)blkreq_idx;
451
452         name = XSEG_TAKE_PTR(xreq->name, xsegbd.xseg->segment);
453         strncpy(name, xsegbd_dev->name, xsegbd_dev->namesize);
454         xreq->size = datasize;
455         xreq->offset = 0;
456
457         xreq->op = X_INFO;
458
459         port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
460         port->waitcue = (uint64_t)(long)xsegbd_dev;
461
462         BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
463         xseg_signal(xsegbd.xseg, xsegbd_dev->dst_portno);
464
465         wait_for_completion_interruptible(&comp);
466         XSEGLOG("Woken up after wait_for_completion_interruptible()\n");
467         ret = update_dev_sectors_from_request(xsegbd_dev, xreq);
468         XSEGLOG("get_size: sectors = %ld\n", (long)xsegbd_dev->sectors);
469 out:
470         xseg_put_request(xsegbd.xseg, xsegbd_dev->src_portno, xreq);
471         return ret;
472 }
473
474 static void xseg_callback(struct xseg *xseg, uint32_t portno)
475 {
476         struct xsegbd_device *xsegbd_dev = NULL, *old_dev = NULL;
477         struct xseg_request *xreq;
478         struct request *blkreq;
479         struct pending *pending;
480         unsigned long flags;
481         uint32_t blkreq_idx;
482         int err;
483
484         for (;;) {
485                 xreq = xseg_receive(xseg, portno);
486                 if (!xreq)
487                         break;
488
489                 /* we rely upon our peers to not have touched ->priv */
490                 blkreq_idx = (uint64_t)xreq->priv;
491                 if (blkreq_idx >= max_nr_pending) {
492                         WARN_ON(1);
493                         continue;
494                 }
495
496                 pending = &blk_req_pending[blkreq_idx];
497                 if (pending->comp) {
498                         /* someone is blocking on this request
499                            and will handle it when we wake them up. */
500                         complete(pending->comp);
501                         /* the request is blocker's responsibility so
502                            we will not put_request(); */
503                         continue;
504                 }
505
506                 /* this is now treated as a block I/O request to end */
507                 blkreq = pending->request;
508                 pending->request = NULL;
509                 xsegbd_dev = pending->dev;
510                 pending->dev = NULL;
511                 WARN_ON(!blkreq);
512
513                 if ((xsegbd_dev != old_dev) && old_dev) {
514                         spin_lock_irqsave(&old_dev->lock, flags);
515                         xseg_request_fn(old_dev->blk_queue);
516                         spin_unlock_irqrestore(&old_dev->lock, flags);
517                 }
518
519                 old_dev = xsegbd_dev;
520
521                 if (!(xreq->state & XS_SERVED))
522                         goto blk_end;
523
524                 if (xreq->serviced != blk_rq_bytes(blkreq))
525                         goto blk_end;
526
527                 /* unlock for data transfer? */
528                 if (!rq_data_dir(blkreq))
529                         xseg_to_blk(xseg, xreq, blkreq);
530
531                 err = 0;
532 blk_end:
533                 blk_end_request_all(blkreq, err);
534                 xq_append_head(&blk_queue_pending, blkreq_idx);
535                 xseg_put_request(xseg, xreq->portno, xreq);
536         }
537
538         if (xsegbd_dev) {
539                 spin_lock_irqsave(&xsegbd_dev->lock, flags);
540                 xseg_request_fn(xsegbd_dev->blk_queue);
541                 spin_unlock_irqrestore(&xsegbd_dev->lock, flags);
542         }
543 }
544
545
546 /* sysfs interface */
547
548 static struct bus_type xsegbd_bus_type = {
549         .name   = "xsegbd",
550 };
551
552 static ssize_t xsegbd_size_show(struct device *dev,
553                                         struct device_attribute *attr, char *buf)
554 {
555         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
556
557         return sprintf(buf, "%llu\n", (unsigned long long) xsegbd_dev->sectors * 512ULL);
558 }
559
560 static ssize_t xsegbd_major_show(struct device *dev,
561                                         struct device_attribute *attr, char *buf)
562 {
563         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
564
565         return sprintf(buf, "%d\n", xsegbd_dev->major);
566 }
567
568 static ssize_t xsegbd_srcport_show(struct device *dev,
569                                         struct device_attribute *attr, char *buf)
570 {
571         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
572
573         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->src_portno);
574 }
575
576 static ssize_t xsegbd_dstport_show(struct device *dev,
577                                         struct device_attribute *attr, char *buf)
578 {
579         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
580
581         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->dst_portno);
582 }
583
584 static ssize_t xsegbd_id_show(struct device *dev,
585                                         struct device_attribute *attr, char *buf)
586 {
587         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
588
589         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->id);
590 }
591
592 static ssize_t xsegbd_reqs_show(struct device *dev,
593                                         struct device_attribute *attr, char *buf)
594 {
595         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
596
597         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->nr_requests);
598 }
599
600 static ssize_t xsegbd_name_show(struct device *dev,
601                                         struct device_attribute *attr, char *buf)
602 {
603         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
604
605         return sprintf(buf, "%s\n", xsegbd_dev->name);
606 }
607
608 static DEVICE_ATTR(size, S_IRUGO, xsegbd_size_show, NULL);
609 static DEVICE_ATTR(major, S_IRUGO, xsegbd_major_show, NULL);
610 static DEVICE_ATTR(srcport, S_IRUGO, xsegbd_srcport_show, NULL);
611 static DEVICE_ATTR(dstport, S_IRUGO, xsegbd_dstport_show, NULL);
612 static DEVICE_ATTR(id , S_IRUGO, xsegbd_id_show, NULL);
613 static DEVICE_ATTR(reqs , S_IRUGO, xsegbd_reqs_show, NULL);
614 static DEVICE_ATTR(name , S_IRUGO, xsegbd_name_show, NULL);
615
616 static struct attribute *xsegbd_attrs[] = {
617         &dev_attr_size.attr,
618         &dev_attr_major.attr,
619         &dev_attr_srcport.attr,
620         &dev_attr_dstport.attr,
621         &dev_attr_id.attr,
622         &dev_attr_reqs.attr,
623         &dev_attr_name.attr,
624         NULL
625 };
626
627 static struct attribute_group xsegbd_attr_group = {
628         .attrs = xsegbd_attrs,
629 };
630
631 static const struct attribute_group *xsegbd_attr_groups[] = {
632         &xsegbd_attr_group,
633         NULL
634 };
635
636 static void xsegbd_sysfs_dev_release(struct device *dev)
637 {
638 }
639
640 static struct device_type xsegbd_device_type = {
641         .name           = "xsegbd",
642         .groups         = xsegbd_attr_groups,
643         .release        = xsegbd_sysfs_dev_release,
644 };
645
646 static void xsegbd_root_dev_release(struct device *dev)
647 {
648 }
649
650 static struct device xsegbd_root_dev = {
651         .init_name      = "xsegbd",
652         .release        = xsegbd_root_dev_release,
653 };
654
655 static int xsegbd_bus_add_dev(struct xsegbd_device *xsegbd_dev)
656 {
657         int ret = -ENOMEM;
658         struct device *dev;
659
660         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
661         dev = &xsegbd_dev->dev;
662
663         dev->bus = &xsegbd_bus_type;
664         dev->type = &xsegbd_device_type;
665         dev->parent = &xsegbd_root_dev;
666         dev->release = xsegbd_dev_release;
667         dev_set_name(dev, "%d", xsegbd_dev->id);
668
669         ret = device_register(dev);
670
671         mutex_unlock(&xsegbd_mutex);
672         return ret;
673 }
674
675 static void xsegbd_bus_del_dev(struct xsegbd_device *xsegbd_dev)
676 {
677         device_unregister(&xsegbd_dev->dev);
678 }
679
680 static ssize_t xsegbd_add(struct bus_type *bus, const char *buf, size_t count)
681 {
682         struct xsegbd_device *xsegbd_dev;
683         struct xseg_port *xport;
684         ssize_t ret = -ENOMEM;
685         int new_id = 0;
686         struct list_head *tmp;
687
688         if (!try_module_get(THIS_MODULE))
689                 return -ENODEV;
690
691         xsegbd_dev = kzalloc(sizeof(*xsegbd_dev), GFP_KERNEL);
692         if (!xsegbd_dev)
693                 goto out;
694
695         spin_lock_init(&xsegbd_dev->lock);
696         INIT_LIST_HEAD(&xsegbd_dev->node);
697
698         /* parse cmd */
699         if (sscanf(buf, "%" __stringify(XSEGBD_TARGET_NAMELEN) "s "
700                         "%d:%d:%d", xsegbd_dev->name, &xsegbd_dev->src_portno,
701                         &xsegbd_dev->dst_portno, &xsegbd_dev->nr_requests) < 3) {
702                 ret = -EINVAL;
703                 goto out_dev;
704         }
705         xsegbd_dev->namesize = strlen(xsegbd_dev->name);
706
707         spin_lock(&xsegbd_dev_list_lock);
708
709         list_for_each(tmp, &xsegbd_dev_list) {
710                 struct xsegbd_device *entry;
711
712                 entry = list_entry(tmp, struct xsegbd_device, node);
713
714                 if (entry->src_portno == xsegbd_dev->src_portno) {
715                         ret = -EINVAL;
716                         goto out_unlock;
717                 }
718
719                 if (entry->id >= new_id)
720                         new_id = entry->id + 1;
721         }
722
723         xsegbd_dev->id = new_id;
724
725         list_add_tail(&xsegbd_dev->node, &xsegbd_dev_list);
726
727         spin_unlock(&xsegbd_dev_list_lock);
728
729         XSEGLOG("registering block device major %d", major);
730         ret = register_blkdev(major, XSEGBD_NAME);
731         if (ret < 0) {
732                 XSEGLOG("cannot register block device!");
733                 ret = -EBUSY;
734                 goto out_delentry;
735         }
736         xsegbd_dev->major = ret;
737         XSEGLOG("registered block device major %d", xsegbd_dev->major);
738
739         ret = xsegbd_bus_add_dev(xsegbd_dev);
740         if (ret)
741                 goto out_blkdev;
742
743         XSEGLOG("binding to source port %u (destination %u)",
744                         xsegbd_dev->src_portno, xsegbd_dev->dst_portno);
745         xport = xseg_bind_port(xsegbd.xseg, xsegbd_dev->src_portno);
746         if (!xport) {
747                 XSEGLOG("cannot bind to port");
748                 ret = -EFAULT;
749
750                 goto out_bus;
751         }
752         /* make sure we don't get any requests until we're ready to handle them */
753         xport->waitcue = (long) NULL;
754
755         XSEGLOG("allocating %u requests", xsegbd_dev->nr_requests);
756         if (xseg_alloc_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests)) {
757                 XSEGLOG("cannot allocate requests");
758                 ret = -EFAULT;
759
760                 goto out_bus;
761         }
762
763         ret = xsegbd_dev_init(xsegbd_dev);
764         if (ret)
765                 goto out_bus;
766
767         return count;
768
769 out_bus:
770         xsegbd_bus_del_dev(xsegbd_dev);
771
772         return ret;
773
774 out_blkdev:
775         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
776
777 out_delentry:
778         spin_lock(&xsegbd_dev_list_lock);
779         list_del_init(&xsegbd_dev->node);
780
781 out_unlock:
782         spin_unlock(&xsegbd_dev_list_lock);
783
784 out_dev:
785         kfree(xsegbd_dev);
786
787 out:
788         return ret;
789 }
790
791 static struct xsegbd_device *__xsegbd_get_dev(unsigned long id)
792 {
793         struct list_head *tmp;
794         struct xsegbd_device *xsegbd_dev;
795
796
797         spin_lock(&xsegbd_dev_list_lock);
798         list_for_each(tmp, &xsegbd_dev_list) {
799                 xsegbd_dev = list_entry(tmp, struct xsegbd_device, node);
800                 if (xsegbd_dev->id == id) {
801                         spin_unlock(&xsegbd_dev_list_lock);
802                         return xsegbd_dev;
803                 }
804         }
805         spin_unlock(&xsegbd_dev_list_lock);
806         return NULL;
807 }
808
809 static ssize_t xsegbd_remove(struct bus_type *bus, const char *buf, size_t count)
810 {
811         struct xsegbd_device *xsegbd_dev = NULL;
812         int id, ret;
813         unsigned long ul_id;
814
815         ret = kstrtoul(buf, 10, &ul_id);
816         if (ret)
817                 return ret;
818
819         id = (int) ul_id;
820         if (id != ul_id)
821                 return -EINVAL;
822
823         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
824
825         ret = count;
826         xsegbd_dev = __xsegbd_get_dev(id);
827         if (!xsegbd_dev) {
828                 ret = -ENOENT;
829                 goto out_unlock;
830         }
831
832         xsegbd_bus_del_dev(xsegbd_dev);
833
834 out_unlock:
835         mutex_unlock(&xsegbd_mutex);
836         return ret;
837 }
838
839 static struct bus_attribute xsegbd_bus_attrs[] = {
840         __ATTR(add, S_IWUSR, NULL, xsegbd_add),
841         __ATTR(remove, S_IWUSR, NULL, xsegbd_remove),
842         __ATTR_NULL
843 };
844
845 static int xsegbd_sysfs_init(void)
846 {
847         int ret;
848
849         ret = device_register(&xsegbd_root_dev);
850         if (ret < 0)
851                 return ret;
852
853         xsegbd_bus_type.bus_attrs = xsegbd_bus_attrs;
854         ret = bus_register(&xsegbd_bus_type);
855         if (ret < 0)
856                 device_unregister(&xsegbd_root_dev);
857
858         return ret;
859 }
860
861 static void xsegbd_sysfs_cleanup(void)
862 {
863         bus_unregister(&xsegbd_bus_type);
864         device_unregister(&xsegbd_root_dev);
865 }
866
867 /* *************************** */
868 /* ** Module Initialization ** */
869 /* *************************** */
870
871 static int __init xsegbd_init(void)
872 {
873         int ret = -ENOMEM;
874
875         if (!xq_alloc_seq(&blk_queue_pending, max_nr_pending, max_nr_pending))
876                 goto out;
877
878         blk_req_pending = kzalloc(sizeof(struct pending) * max_nr_pending, GFP_KERNEL);
879         if (!blk_req_pending)
880                 goto out_queue;
881
882         ret = -ENOSYS;
883         ret = xsegbd_xseg_init();
884         if (ret)
885                 goto out_pending;
886
887         ret = xsegbd_sysfs_init();
888         if (ret)
889                 goto out_xseg;
890
891         XSEGLOG("initialization complete");
892
893 out:
894         return ret;
895
896 out_xseg:
897         xsegbd_xseg_quit();
898 out_pending:
899         kfree(blk_req_pending);
900 out_queue:
901         xq_free(&blk_queue_pending);
902         goto out;
903 }
904
905 static void __exit xsegbd_exit(void)
906 {
907         xsegbd_sysfs_cleanup();
908         xsegbd_xseg_quit();
909 }
910
911 module_init(xsegbd_init);
912 module_exit(xsegbd_exit);
913